1 // Copyright 2014 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
17 // Function: FPDFText_LoadPage
18 // Prepare information about all characters in a page.
20 // page - Handle to the page. Returned by FPDF_LoadPage function (in FPDFVIEW module).
22 // A handle to the text page information structure.
23 // NULL if something goes wrong.
25 // Application must call FPDFText_ClosePage to release the text page information.
27 // The method can not support to load out FPDF_TEXTPAGE for the document consists of dynamic fields.
29 DLLEXPORT FPDF_TEXTPAGE STDCALL FPDFText_LoadPage(FPDF_PAGE page);
31 // Function: FPDFText_ClosePage
32 // Release all resources allocated for a text page information structure.
34 // text_page - Handle to a text page information structure. Returned by FPDFText_LoadPage function.
38 DLLEXPORT void STDCALL FPDFText_ClosePage(FPDF_TEXTPAGE text_page);
40 // Function: FPDFText_CountChars
41 // Get number of characters in a page.
43 // text_page - Handle to a text page information structure. Returned by FPDFText_LoadPage function.
45 // Number of characters in the page. Return -1 for error.
46 // Generated characters, like additional space characters, new line characters, are also counted.
48 // Characters in a page form a "stream", inside the stream, each character has an index.
49 // We will use the index parameters in many of FPDFTEXT functions. The first character in the page
50 // has an index value of zero.
52 DLLEXPORT int STDCALL FPDFText_CountChars(FPDF_TEXTPAGE text_page);
54 // Function: FPDFText_GetUnicode
55 // Get Unicode of a character in a page.
57 // text_page - Handle to a text page information structure. Returned by FPDFText_LoadPage function.
58 // index - Zero-based index of the character.
60 // The Unicode of the particular character.
61 // If a character is not encoded in Unicode and Foxit engine can't convert to Unicode,
62 // the return value will be zero.
64 DLLEXPORT unsigned int STDCALL FPDFText_GetUnicode(FPDF_TEXTPAGE text_page, int index);
66 // Function: FPDFText_GetFontSize
67 // Get the font size of a particular character.
69 // text_page - Handle to a text page information structure. Returned by FPDFText_LoadPage function.
70 // index - Zero-based index of the character.
72 // The font size of the particular character, measured in points (about 1/72 inch).
73 // This is the typographic size of the font (so called "em size").
75 DLLEXPORT double STDCALL FPDFText_GetFontSize(FPDF_TEXTPAGE text_page, int index);
77 // Function: FPDFText_GetCharBox
78 // Get bounding box of a particular character.
80 // text_page - Handle to a text page information structure. Returned by FPDFText_LoadPage function.
81 // index - Zero-based index of the character.
82 // left - Pointer to a double number receiving left position of the character box.
83 // right - Pointer to a double number receiving right position of the character box.
84 // bottom - Pointer to a double number receiving bottom position of the character box.
85 // top - Pointer to a double number receiving top position of the character box.
89 // All positions are measured in PDF "user space".
91 DLLEXPORT void STDCALL FPDFText_GetCharBox(FPDF_TEXTPAGE text_page, int index, double* left,
92 double* right, double* bottom, double* top);
94 // Function: FPDFText_GetCharIndexAtPos
95 // Get the index of a character at or nearby a certain position on the page.
97 // text_page - Handle to a text page information structure. Returned by FPDFText_LoadPage function.
98 // x - X position in PDF "user space".
99 // y - Y position in PDF "user space".
100 // xTolerance - An x-axis tolerance value for character hit detection, in point unit.
101 // yTolerance - A y-axis tolerance value for character hit detection, in point unit.
103 // The zero-based index of the character at, or nearby the point (x,y).
104 // If there is no character at or nearby the point, return value will be -1.
105 // If an error occurs, -3 will be returned.
107 DLLEXPORT int STDCALL FPDFText_GetCharIndexAtPos(FPDF_TEXTPAGE text_page,
108 double x, double y, double xTorelance, double yTolerance);
110 // Function: FPDFText_GetText
111 // Extract unicode text string from the page.
113 // text_page - Handle to a text page information structure. Returned by FPDFText_LoadPage function.
114 // start_index - Index for the start characters.
115 // count - Number of characters to be extracted.
116 // result - A buffer (allocated by application) receiving the extracted unicodes.
117 // The size of the buffer must be able to hold the number of characters plus a terminator.
119 // Number of characters written into the result buffer, including the trailing terminator.
121 // This function ignores characters without unicode information.
123 DLLEXPORT int STDCALL FPDFText_GetText(FPDF_TEXTPAGE text_page, int start_index, int count, unsigned short* result);
125 // Function: FPDFText_CountRects
126 // Count number of rectangular areas occupied by a segment of texts.
128 // text_page - Handle to a text page information structure. Returned by FPDFText_LoadPage function.
129 // start_index - Index for the start characters.
130 // count - Number of characters.
132 // Number of rectangles. Zero for error.
134 // This function, along with FPDFText_GetRect can be used by applications to detect the position
135 // on the page for a text segment, so proper areas can be highlighted or something.
136 // FPDFTEXT will automatically merge small character boxes into bigger one if those characters
137 // are on the same line and use same font settings.
139 DLLEXPORT int STDCALL FPDFText_CountRects(FPDF_TEXTPAGE text_page, int start_index, int count);
141 // Function: FPDFText_GetRect
142 // Get a rectangular area from the result generated by FPDFText_CountRects.
144 // text_page - Handle to a text page information structure. Returned by FPDFText_LoadPage function.
145 // rect_index - Zero-based index for the rectangle.
146 // left - Pointer to a double value receiving the rectangle left boundary.
147 // top - Pointer to a double value receiving the rectangle top boundary.
148 // right - Pointer to a double value receiving the rectangle right boundary.
149 // bottom - Pointer to a double value receiving the rectangle bottom boundary.
153 DLLEXPORT void STDCALL FPDFText_GetRect(FPDF_TEXTPAGE text_page, int rect_index, double* left, double* top,
154 double* right, double* bottom);
156 // Function: FPDFText_GetBoundedText
157 // Extract unicode text within a rectangular boundary on the page.
159 // text_page - Handle to a text page information structure. Returned by FPDFText_LoadPage function.
160 // left - Left boundary.
161 // top - Top boundary.
162 // right - Right boundary.
163 // bottom - Bottom boundary.
164 // buffer - A unicode buffer.
165 // buflen - Number of characters (not bytes) for the buffer, excluding an additional terminator.
167 // If buffer is NULL or buflen is zero, return number of characters (not bytes) of text present within
168 // the rectangle, excluding a terminating NUL. Generally you should pass a buffer at least one larger
169 // than this if you want a terminating NUL, which will be provided if space is available.
170 // Otherwise, return number of characters copied into the buffer, including the terminating NUL
171 // when space for it is available.
173 // If the buffer is too small, as much text as will fit is copied into it.
175 DLLEXPORT int STDCALL FPDFText_GetBoundedText(FPDF_TEXTPAGE text_page,double left, double top,
176 double right, double bottom,unsigned short* buffer,int buflen);
179 // Flags used by FPDFText_FindStart function.
180 #define FPDF_MATCHCASE 0x00000001 //If not set, it will not match case by default.
181 #define FPDF_MATCHWHOLEWORD 0x00000002 //If not set, it will not match the whole word by default.
183 // Function: FPDFText_FindStart
186 // text_page - Handle to a text page information structure. Returned by FPDFText_LoadPage function.
187 // findwhat - A unicode match pattern.
188 // flags - Option flags.
189 // start_index - Start from this character. -1 for end of the page.
191 // A handle for the search context. FPDFText_FindClose must be called to release this handle.
193 DLLEXPORT FPDF_SCHHANDLE STDCALL FPDFText_FindStart(FPDF_TEXTPAGE text_page, FPDF_WIDESTRING findwhat,
194 unsigned long flags, int start_index);
196 // Function: FPDFText_FindNext
197 // Search in the direction from page start to end.
199 // handle - A search context handle returned by FPDFText_FindStart.
201 // Whether a match is found.
203 DLLEXPORT FPDF_BOOL STDCALL FPDFText_FindNext(FPDF_SCHHANDLE handle);
205 // Function: FPDFText_FindPrev
206 // Search in the direction from page end to start.
208 // handle - A search context handle returned by FPDFText_FindStart.
210 // Whether a match is found.
212 DLLEXPORT FPDF_BOOL STDCALL FPDFText_FindPrev(FPDF_SCHHANDLE handle);
214 // Function: FPDFText_GetSchResultIndex
215 // Get the starting character index of the search result.
217 // handle - A search context handle returned by FPDFText_FindStart.
219 // Index for the starting character.
221 DLLEXPORT int STDCALL FPDFText_GetSchResultIndex(FPDF_SCHHANDLE handle);
223 // Function: FPDFText_GetSchCount
224 // Get the number of matched characters in the search result.
226 // handle - A search context handle returned by FPDFText_FindStart.
228 // Number of matched characters.
230 DLLEXPORT int STDCALL FPDFText_GetSchCount(FPDF_SCHHANDLE handle);
232 // Function: FPDFText_FindClose
233 // Release a search context.
235 // handle - A search context handle returned by FPDFText_FindStart.
239 DLLEXPORT void STDCALL FPDFText_FindClose(FPDF_SCHHANDLE handle);
241 // Function: FPDFLink_LoadWebLinks
242 // Prepare information about weblinks in a page.
244 // text_page - Handle to a text page information structure. Returned by FPDFText_LoadPage function.
246 // A handle to the page's links information structure.
247 // NULL if something goes wrong.
249 // Weblinks are those links implicitly embedded in PDF pages. PDF also has a type of
250 // annotation called "link", FPDFTEXT doesn't deal with that kind of link.
251 // FPDFTEXT weblink feature is useful for automatically detecting links in the page
252 // contents. For example, things like "http://www.foxitsoftware.com" will be detected,
253 // so applications can allow user to click on those characters to activate the link,
254 // even the PDF doesn't come with link annotations.
256 // FPDFLink_CloseWebLinks must be called to release resources.
258 DLLEXPORT FPDF_PAGELINK STDCALL FPDFLink_LoadWebLinks(FPDF_TEXTPAGE text_page);
260 // Function: FPDFLink_CountWebLinks
261 // Count number of detected web links.
263 // link_page - Handle returned by FPDFLink_LoadWebLinks.
265 // Number of detected web links.
267 DLLEXPORT int STDCALL FPDFLink_CountWebLinks(FPDF_PAGELINK link_page);
269 // Function: FPDFLink_GetURL
270 // Fetch the URL information for a detected web link.
272 // link_page - Handle returned by FPDFLink_LoadWebLinks.
273 // link_index - Zero-based index for the link.
274 // buffer - A unicode buffer.
275 // buflen - Number of characters (not bytes) for the buffer, including an additional terminator.
277 // If buffer is NULL or buflen is zero, return number of characters (not bytes and an additional terminator is also counted) needed,
278 // otherwise, return number of characters copied into the buffer.
280 DLLEXPORT int STDCALL FPDFLink_GetURL(FPDF_PAGELINK link_page, int link_index, unsigned short* buffer,int buflen);
282 // Function: FPDFLink_CountRects
283 // Count number of rectangular areas for the link.
285 // link_page - Handle returned by FPDFLink_LoadWebLinks.
286 // link_index - Zero-based index for the link.
288 // Number of rectangular areas for the link.
290 DLLEXPORT int STDCALL FPDFLink_CountRects(FPDF_PAGELINK link_page, int link_index);
292 // Function: FPDFLink_GetRect
293 // Fetch the boundaries of a rectangle for a link.
295 // link_page - Handle returned by FPDFLink_LoadWebLinks.
296 // link_index - Zero-based index for the link.
297 // rect_index - Zero-based index for a rectangle.
298 // left - Pointer to a double value receiving the rectangle left boundary.
299 // top - Pointer to a double value receiving the rectangle top boundary.
300 // right - Pointer to a double value receiving the rectangle right boundary.
301 // bottom - Pointer to a double value receiving the rectangle bottom boundary.
305 DLLEXPORT void STDCALL FPDFLink_GetRect(FPDF_PAGELINK link_page, int link_index, int rect_index,
306 double* left, double* top,double* right, double* bottom);
308 // Function: FPDFLink_CloseWebLinks
309 // Release resources used by weblink feature.
311 // link_page - Handle returned by FPDFLink_LoadWebLinks.
315 DLLEXPORT void STDCALL FPDFLink_CloseWebLinks(FPDF_PAGELINK link_page);