1 // Copyright 2014 PDFium Authors. All rights reserved.
\r
2 // Use of this source code is governed by a BSD-style license that can be
\r
3 // found in the LICENSE file.
\r
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
\r
10 #include "fpdfview.h"
\r
12 // Exported Functions
\r
17 // Function: FPDFText_LoadPage
\r
18 // Prepare information about all characters in a page.
\r
20 // page - Handle to the page. Returned by FPDF_LoadPage function (in FPDFVIEW module).
\r
22 // A handle to the text page information structure.
\r
23 // NULL if something goes wrong.
\r
25 // Application must call FPDFText_ClosePage to release the text page information.
\r
26 // If you don't purchase Text Module , this function will return NULL.
\r
28 DLLEXPORT FPDF_TEXTPAGE STDCALL FPDFText_LoadPage(FPDF_PAGE page);
\r
30 // Function: FPDFText_ClosePage
\r
31 // Release all resources allocated for a text page information structure.
\r
33 // text_page - Handle to a text page information structure. Returned by FPDFText_LoadPage function.
\r
37 DLLEXPORT void STDCALL FPDFText_ClosePage(FPDF_TEXTPAGE text_page);
\r
39 // Function: FPDFText_CountChars
\r
40 // Get number of characters in a page.
\r
42 // text_page - Handle to a text page information structure. Returned by FPDFText_LoadPage function.
\r
44 // Number of characters in the page. Return -1 for error.
\r
45 // Generated characters, like additional space characters, new line characters, are also counted.
\r
47 // Characters in a page form a "stream", inside the stream, each character has an index.
\r
48 // We will use the index parameters in many of FPDFTEXT functions. The first character in the page
\r
49 // has an index value of zero.
\r
51 DLLEXPORT int STDCALL FPDFText_CountChars(FPDF_TEXTPAGE text_page);
\r
53 // Function: FPDFText_GetUnicode
\r
54 // Get Unicode of a character in a page.
\r
56 // text_page - Handle to a text page information structure. Returned by FPDFText_LoadPage function.
\r
57 // index - Zero-based index of the character.
\r
59 // The Unicode of the particular character.
\r
60 // If a character is not encoded in Unicode and Foxit engine can't convert to Unicode,
\r
61 // the return value will be zero.
\r
63 DLLEXPORT unsigned int STDCALL FPDFText_GetUnicode(FPDF_TEXTPAGE text_page, int index);
\r
65 // Function: FPDFText_GetFontSize
\r
66 // Get the font size of a particular character.
\r
68 // text_page - Handle to a text page information structure. Returned by FPDFText_LoadPage function.
\r
69 // index - Zero-based index of the character.
\r
71 // The font size of the particular character, measured in points (about 1/72 inch).
\r
72 // This is the typographic size of the font (so called "em size").
\r
74 DLLEXPORT double STDCALL FPDFText_GetFontSize(FPDF_TEXTPAGE text_page, int index);
\r
76 // Function: FPDFText_GetCharBox
\r
77 // Get bounding box of a particular character.
\r
79 // text_page - Handle to a text page information structure. Returned by FPDFText_LoadPage function.
\r
80 // index - Zero-based index of the character.
\r
81 // left - Pointer to a double number receiving left position of the character box.
\r
82 // right - Pointer to a double number receiving right position of the character box.
\r
83 // bottom - Pointer to a double number receiving bottom position of the character box.
\r
84 // top - Pointer to a double number receiving top position of the character box.
\r
88 // All positions are measured in PDF "user space".
\r
90 DLLEXPORT void STDCALL FPDFText_GetCharBox(FPDF_TEXTPAGE text_page, int index, double* left,
\r
91 double* right, double* bottom, double* top);
\r
93 // Function: FPDFText_GetCharIndexAtPos
\r
94 // Get the index of a character at or nearby a certain position on the page.
\r
96 // text_page - Handle to a text page information structure. Returned by FPDFText_LoadPage function.
\r
97 // x - X position in PDF "user space".
\r
98 // y - Y position in PDF "user space".
\r
99 // xTolerance - An x-axis tolerance value for character hit detection, in point unit.
\r
100 // yTolerance - A y-axis tolerance value for character hit detection, in point unit.
\r
102 // The zero-based index of the character at, or nearby the point (x,y).
\r
103 // If there is no character at or nearby the point, return value will be -1.
\r
104 // If an error occurs, -3 will be returned.
\r
106 DLLEXPORT int STDCALL FPDFText_GetCharIndexAtPos(FPDF_TEXTPAGE text_page,
\r
107 double x, double y, double xTorelance, double yTolerance);
\r
109 // Function: FPDFText_GetText
\r
110 // Extract unicode text string from the page.
\r
112 // text_page - Handle to a text page information structure. Returned by FPDFText_LoadPage function.
\r
113 // start_index - Index for the start characters.
\r
114 // count - Number of characters to be extracted.
\r
115 // result - A buffer (allocated by application) receiving the extracted unicodes.
\r
116 // The size of the buffer must be able to hold the number of characters plus a terminator.
\r
118 // Number of characters written into the result buffer, including the trailing terminator.
\r
120 // This function ignores characters without unicode information.
\r
122 DLLEXPORT int STDCALL FPDFText_GetText(FPDF_TEXTPAGE text_page, int start_index, int count, unsigned short* result);
\r
124 // Function: FPDFText_CountRects
\r
125 // Count number of rectangular areas occupied by a segment of texts.
\r
127 // text_page - Handle to a text page information structure. Returned by FPDFText_LoadPage function.
\r
128 // start_index - Index for the start characters.
\r
129 // count - Number of characters.
\r
131 // Number of rectangles. Zero for error.
\r
133 // This function, along with FPDFText_GetRect can be used by applications to detect the position
\r
134 // on the page for a text segment, so proper areas can be highlighted or something.
\r
135 // FPDFTEXT will automatically merge small character boxes into bigger one if those characters
\r
136 // are on the same line and use same font settings.
\r
138 DLLEXPORT int STDCALL FPDFText_CountRects(FPDF_TEXTPAGE text_page, int start_index, int count);
\r
140 // Function: FPDFText_GetRect
\r
141 // Get a rectangular area from the result generated by FPDFText_CountRects.
\r
143 // text_page - Handle to a text page information structure. Returned by FPDFText_LoadPage function.
\r
144 // rect_index - Zero-based index for the rectangle.
\r
145 // left - Pointer to a double value receiving the rectangle left boundary.
\r
146 // top - Pointer to a double value receiving the rectangle top boundary.
\r
147 // right - Pointer to a double value receiving the rectangle right boundary.
\r
148 // bottom - Pointer to a double value receiving the rectangle bottom boundary.
\r
152 DLLEXPORT void STDCALL FPDFText_GetRect(FPDF_TEXTPAGE text_page, int rect_index, double* left, double* top,
\r
153 double* right, double* bottom);
\r
155 // Function: FPDFText_GetBoundedText
\r
156 // Extract unicode text within a rectangular boundary on the page.
\r
158 // text_page - Handle to a text page information structure. Returned by FPDFText_LoadPage function.
\r
159 // left - Left boundary.
\r
160 // top - Top boundary.
\r
161 // right - Right boundary.
\r
162 // bottom - Bottom boundary.
\r
163 // buffer - A unicode buffer.
\r
164 // buflen - Number of characters (not bytes) for the buffer, excluding an additional terminator.
\r
166 // If buffer is NULL or buflen is zero, return number of characters (not bytes) needed,
\r
167 // otherwise, return number of characters copied into the buffer.
\r
169 DLLEXPORT int STDCALL FPDFText_GetBoundedText(FPDF_TEXTPAGE text_page,double left, double top,
\r
170 double right, double bottom,unsigned short* buffer,int buflen);
\r
173 // Flags used by FPDFText_FindStart function.
\r
174 #define FPDF_MATCHCASE 0x00000001 //If not set, it will not match case by default.
\r
175 #define FPDF_MATCHWHOLEWORD 0x00000002 //If not set, it will not match the whole word by default.
\r
177 // Function: FPDFText_FindStart
\r
180 // text_page - Handle to a text page information structure. Returned by FPDFText_LoadPage function.
\r
181 // findwhat - A unicode match pattern.
\r
182 // flags - Option flags.
\r
183 // start_index - Start from this character. -1 for end of the page.
\r
185 // A handle for the search context. FPDFText_FindClose must be called to release this handle.
\r
187 DLLEXPORT FPDF_SCHHANDLE STDCALL FPDFText_FindStart(FPDF_TEXTPAGE text_page, FPDF_WIDESTRING findwhat,
\r
188 unsigned long flags, int start_index);
\r
190 // Function: FPDFText_FindNext
\r
191 // Search in the direction from page start to end.
\r
193 // handle - A search context handle returned by FPDFText_FindStart.
\r
195 // Whether a match is found.
\r
197 DLLEXPORT FPDF_BOOL STDCALL FPDFText_FindNext(FPDF_SCHHANDLE handle);
\r
199 // Function: FPDFText_FindPrev
\r
200 // Search in the direction from page end to start.
\r
202 // handle - A search context handle returned by FPDFText_FindStart.
\r
204 // Whether a match is found.
\r
206 DLLEXPORT FPDF_BOOL STDCALL FPDFText_FindPrev(FPDF_SCHHANDLE handle);
\r
208 // Function: FPDFText_GetSchResultIndex
\r
209 // Get the starting character index of the search result.
\r
211 // handle - A search context handle returned by FPDFText_FindStart.
\r
213 // Index for the starting character.
\r
215 DLLEXPORT int STDCALL FPDFText_GetSchResultIndex(FPDF_SCHHANDLE handle);
\r
217 // Function: FPDFText_GetSchCount
\r
218 // Get the number of matched characters in the search result.
\r
220 // handle - A search context handle returned by FPDFText_FindStart.
\r
222 // Number of matched characters.
\r
224 DLLEXPORT int STDCALL FPDFText_GetSchCount(FPDF_SCHHANDLE handle);
\r
226 // Function: FPDFText_FindClose
\r
227 // Release a search context.
\r
229 // handle - A search context handle returned by FPDFText_FindStart.
\r
233 DLLEXPORT void STDCALL FPDFText_FindClose(FPDF_SCHHANDLE handle);
\r
235 // Function: FPDFLink_LoadWebLinks
\r
236 // Prepare information about weblinks in a page.
\r
238 // text_page - Handle to a text page information structure. Returned by FPDFText_LoadPage function.
\r
240 // A handle to the page's links information structure.
\r
241 // NULL if something goes wrong.
\r
243 // Weblinks are those links implicitly embedded in PDF pages. PDF also has a type of
\r
244 // annotation called "link", FPDFTEXT doesn't deal with that kind of link.
\r
245 // FPDFTEXT weblink feature is useful for automatically detecting links in the page
\r
246 // contents. For example, things like "http://www.foxitsoftware.com" will be detected,
\r
247 // so applications can allow user to click on those characters to activate the link,
\r
248 // even the PDF doesn't come with link annotations.
\r
250 // FPDFLink_CloseWebLinks must be called to release resources.
\r
252 DLLEXPORT FPDF_PAGELINK STDCALL FPDFLink_LoadWebLinks(FPDF_TEXTPAGE text_page);
\r
254 // Function: FPDFLink_CountWebLinks
\r
255 // Count number of detected web links.
\r
257 // link_page - Handle returned by FPDFLink_LoadWebLinks.
\r
259 // Number of detected web links.
\r
261 DLLEXPORT int STDCALL FPDFLink_CountWebLinks(FPDF_PAGELINK link_page);
\r
263 // Function: FPDFLink_GetURL
\r
264 // Fetch the URL information for a detected web link.
\r
266 // link_page - Handle returned by FPDFLink_LoadWebLinks.
\r
267 // link_index - Zero-based index for the link.
\r
268 // buffer - A unicode buffer.
\r
269 // buflen - Number of characters (not bytes) for the buffer, including an additional terminator.
\r
271 // If buffer is NULL or buflen is zero, return number of characters (not bytes and an additional terminator is also counted) needed,
\r
272 // otherwise, return number of characters copied into the buffer.
\r
274 DLLEXPORT int STDCALL FPDFLink_GetURL(FPDF_PAGELINK link_page, int link_index, unsigned short* buffer,int buflen);
\r
276 // Function: FPDFLink_CountRects
\r
277 // Count number of rectangular areas for the link.
\r
279 // link_page - Handle returned by FPDFLink_LoadWebLinks.
\r
280 // link_index - Zero-based index for the link.
\r
282 // Number of rectangular areas for the link.
\r
284 DLLEXPORT int STDCALL FPDFLink_CountRects(FPDF_PAGELINK link_page, int link_index);
\r
286 // Function: FPDFLink_GetRect
\r
287 // Fetch the boundaries of a rectangle for a link.
\r
289 // link_page - Handle returned by FPDFLink_LoadWebLinks.
\r
290 // link_index - Zero-based index for the link.
\r
291 // rect_index - Zero-based index for a rectangle.
\r
292 // left - Pointer to a double value receiving the rectangle left boundary.
\r
293 // top - Pointer to a double value receiving the rectangle top boundary.
\r
294 // right - Pointer to a double value receiving the rectangle right boundary.
\r
295 // bottom - Pointer to a double value receiving the rectangle bottom boundary.
\r
299 DLLEXPORT void STDCALL FPDFLink_GetRect(FPDF_PAGELINK link_page, int link_index, int rect_index,
\r
300 double* left, double* top,double* right, double* bottom);
\r
302 // Function: FPDFLink_CloseWebLinks
\r
303 // Release resources used by weblink feature.
\r
305 // link_page - Handle returned by FPDFLink_LoadWebLinks.
\r
309 DLLEXPORT void STDCALL FPDFLink_CloseWebLinks(FPDF_PAGELINK link_page);
\r
316 #endif//_FPDFTEXT_H_
\r