Fix ALL the include guards.
[pdfium.git] / core / src / fpdftext / text_int.h
1 // Copyright 2014 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4  
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7 #ifndef CORE_SRC_FPDFTEXT_TEXT_INT_H_
8 #define CORE_SRC_FPDFTEXT_TEXT_INT_H_
9
10 class CPDF_TextParseOptions 
11 {
12 public:
13     CPDF_TextParseOptions();
14     FX_BOOL                     m_bCheckObjectOrder;
15     FX_BOOL                     m_bCheckDirection;
16     int                         m_nCheckSameObject;
17 };
18 class CPDF_TextPage;
19 class CPDF_LinkExtract;
20 class CPDF_TextPageFind;
21 class CPDF_DocProgressiveSearch;
22 #define FPDFTEXT_CHAR_ERROR                     -1
23 #define FPDFTEXT_CHAR_NORMAL            0
24 #define FPDFTEXT_CHAR_GENERATED         1
25 #define FPDFTEXT_CHAR_UNUNICODE         2
26 #define FPDFTEXT_CHAR_HYPHEN            3
27 #define FPDFTEXT_CHAR_PIECE                     4
28 #define FPDFTEXT_MC_PASS                        0
29 #define FPDFTEXT_MC_DONE                        1
30 #define FPDFTEXT_MC_DELAY                       2
31 typedef struct _PAGECHAR_INFO {
32     int                                 m_CharCode;
33     FX_WCHAR                    m_Unicode;
34     FX_FLOAT                    m_OriginX;
35     FX_FLOAT                    m_OriginY;
36     FX_INT32                    m_Flag;
37     CFX_FloatRect               m_CharBox;
38     CPDF_TextObject*    m_pTextObj;
39     CFX_AffineMatrix    m_Matrix;
40     int                                 m_Index;
41 } PAGECHAR_INFO;
42 typedef CFX_SegmentedArray<PAGECHAR_INFO> PAGECHAR_InfoArray;
43 typedef struct {
44     int m_Start;
45     int m_nCount;
46 } FPDF_SEGMENT;
47 typedef CFX_ArrayTemplate<FPDF_SEGMENT> SEGMENT_Array;
48 typedef struct {
49     CPDF_TextObject*    m_pTextObj;
50     CFX_AffineMatrix    m_formMatrix;
51 } PDFTEXT_Obj;
52 typedef CFX_ArrayTemplate<PDFTEXT_Obj> LINEOBJ;
53 class CPDF_TextPage: public IPDF_TextPage
54 {
55 public:
56     CPDF_TextPage(const CPDF_Page* pPage, int flags = 0);
57     CPDF_TextPage(const CPDF_PageObjects* pPage, int flags = 0);
58     CPDF_TextPage(const CPDF_Page* pPage, CPDFText_ParseOptions ParserOptions);
59     virtual FX_BOOL                                     ParseTextPage();
60     virtual void                                        NormalizeObjects(FX_BOOL bNormalize);
61     virtual     FX_BOOL                                 IsParsered() const
62     {
63         return m_IsParsered;
64     }
65     virtual ~CPDF_TextPage() {};
66 public:
67     virtual int CharIndexFromTextIndex(int TextIndex)const ;
68     virtual int TextIndexFromCharIndex(int CharIndex)const;
69     virtual int                                         CountChars() const;
70     virtual     void                                    GetCharInfo(int index, FPDF_CHAR_INFO & info) const;
71     virtual void                                        GetRectArray(int start, int nCount, CFX_RectArray& rectArray) const;
72     virtual int                                         GetIndexAtPos(CPDF_Point point, FX_FLOAT xTorelance, FX_FLOAT yTorelance) const;
73     virtual int                                         GetIndexAtPos(FX_FLOAT x, FX_FLOAT y, FX_FLOAT xTorelance,
74             FX_FLOAT yTorelance) const;
75     virtual CFX_WideString                      GetTextByRect(const CFX_FloatRect& rect) const;
76     virtual void                                        GetRectsArrayByRect(const CFX_FloatRect& rect, CFX_RectArray& resRectArray) const;
77     virtual     int                                             GetOrderByDirection(int order, int direction) const;
78     virtual     CFX_WideString                  GetPageText(int start = 0, int nCount = -1) const;
79
80     virtual int                                         CountRects(int start, int nCount);
81     virtual     void                                    GetRect(int rectIndex, FX_FLOAT& left, FX_FLOAT& top
82                                             , FX_FLOAT& right, FX_FLOAT &bottom) const;
83     virtual FX_BOOL                                     GetBaselineRotate(int rectIndex, int& Rotate);
84     virtual FX_BOOL                                     GetBaselineRotate(const CFX_FloatRect& rect, int& Rotate);
85     virtual     int                                             CountBoundedSegments(FX_FLOAT left, FX_FLOAT top,
86             FX_FLOAT right, FX_FLOAT bottom, FX_BOOL bContains = FALSE);
87     virtual     void                                    GetBoundedSegment(int index, int& start, int& count) const;
88     virtual int                                         GetWordBreak(int index, int direction) const;
89 public:
90     const       PAGECHAR_InfoArray*             GetCharList() const
91     {
92         return &m_charList;
93     }
94     static      FX_BOOL                                 IsRectIntersect(const CFX_FloatRect& rect1, const CFX_FloatRect& rect2);
95     static      FX_BOOL                                 IsLetter(FX_WCHAR unicode);
96 private:
97     FX_BOOL                                                     IsHyphen(FX_WCHAR curChar);
98     FX_BOOL                                                     IsControlChar(PAGECHAR_INFO* pCharInfo);
99     FX_BOOL                                                     GetBaselineRotate(int start, int end, int& Rotate);
100     void                                                        ProcessObject();
101     void                                                        ProcessFormObject(CPDF_FormObject*      pFormObj, const CFX_AffineMatrix& formMatrix);
102     void                                                        ProcessTextObject(PDFTEXT_Obj pObj);
103     void                                                        ProcessTextObject(CPDF_TextObject*      pTextObj, const CFX_AffineMatrix& formMatrix, FX_POSITION ObjPos);
104     int                                                         ProcessInsertObject(const CPDF_TextObject* pObj, const CFX_AffineMatrix& formMatrix);
105     FX_BOOL                                                     GenerateCharInfo(FX_WCHAR unicode, PAGECHAR_INFO& info);
106     FX_BOOL                                                     IsSameAsPreTextObject(CPDF_TextObject* pTextObj, FX_POSITION ObjPos);
107     FX_BOOL                                                     IsSameTextObject(CPDF_TextObject* pTextObj1, CPDF_TextObject* pTextObj2);
108     int                                                         GetCharWidth(FX_DWORD charCode, CPDF_Font* pFont) const;
109     void                                                        CloseTempLine();
110     void                                                        OnPiece(IFX_BidiChar* pBidi, CFX_WideString& str);
111     FX_INT32    PreMarkedContent(PDFTEXT_Obj pObj);
112     void                ProcessMarkedContent(PDFTEXT_Obj pObj);
113     void                CheckMarkedContentObject(FX_INT32& start, FX_INT32& nCount) const;
114     void                FindPreviousTextObject(void);
115     void                AddCharInfoByLRDirection(CFX_WideString& str, int i);
116     void                AddCharInfoByRLDirection(CFX_WideString& str, int i);
117     FX_INT32    GetTextObjectWritingMode(const CPDF_TextObject* pTextObj);
118     FX_INT32    FindTextlineFlowDirection();
119     void SwapTempTextBuf(FX_INT32 iCharListStartAppend,
120                          FX_INT32 iBufStartAppend);
121     FX_BOOL IsRightToLeft(const CPDF_TextObject* pTextObj,
122                           const CPDF_Font* pFont,
123                           int nItems) const;
124 protected:
125     CPDFText_ParseOptions                       m_ParseOptions;
126     CFX_WordArray                                       m_CharIndex;
127     const CPDF_PageObjects*                     m_pPage;
128     PAGECHAR_InfoArray                          m_charList;
129     CFX_WideTextBuf                                     m_TextBuf;
130     PAGECHAR_InfoArray                          m_TempCharList;
131     CFX_WideTextBuf                                     m_TempTextBuf;
132     int                                                         m_parserflag;
133     CPDF_TextObject*                            m_pPreTextObj;
134     CFX_AffineMatrix                            m_perMatrix;
135     FX_BOOL                                                     m_IsParsered;
136     CFX_AffineMatrix                            m_DisplayMatrix;
137
138     SEGMENT_Array                                       m_Segment;
139     CFX_RectArray                                       m_SelRects;
140     LINEOBJ                                                     m_LineObj;
141     FX_BOOL                                                     m_TextlineDir;
142     CFX_FloatRect                                       m_CurlineRect;
143 };
144 class CPDF_TextPageFind: public IPDF_TextPageFind
145 {
146 public:
147     CPDF_TextPageFind(const IPDF_TextPage* pTextPage);
148     virtual                                                     ~CPDF_TextPageFind() {};
149 public:
150     virtual     FX_BOOL                                 FindFirst(const CFX_WideString& findwhat, int flags, int startPos = 0);
151     virtual     FX_BOOL                                 FindNext();
152     virtual     FX_BOOL                                 FindPrev();
153
154     virtual void                                        GetRectArray(CFX_RectArray& rects) const;
155     virtual int                                         GetCurOrder() const;
156     virtual int                                         GetMatchedCount()const;
157 protected:
158     void                                                        ExtractFindWhat(const CFX_WideString& findwhat);
159     FX_BOOL                                                     IsMatchWholeWord(const CFX_WideString& csPageText, int startPos, int endPos);
160     FX_BOOL                                                     ExtractSubString(CFX_WideString& rString, FX_LPCWSTR lpszFullString,
161             int iSubString, FX_WCHAR chSep);
162     CFX_WideString                                      MakeReverse(const CFX_WideString& str);
163     int                                                         ReverseFind(const CFX_WideString& csPageText, const CFX_WideString& csWord, int nStartPos, int& WordLength);
164     int                                                         GetCharIndex(int index) const;
165 private:
166     CFX_WordArray                                       m_CharIndex;
167     const IPDF_TextPage*                        m_pTextPage;
168     CFX_WideString                                      m_strText;
169     CFX_WideString                                      m_findWhat;
170     int                                                         m_flags;
171     CFX_WideStringArray                         m_csFindWhatArray;
172     int                                                         m_findNextStart;
173     int                                                         m_findPreStart;
174     FX_BOOL                                                     m_bMatchCase;
175     FX_BOOL                                                     m_bMatchWholeWord;
176     int                                                         m_resStart;
177     int                                                         m_resEnd;
178     CFX_RectArray                                       m_resArray;
179     FX_BOOL                                                     m_IsFind;
180 };
181 class CPDF_LinkExt
182 {
183 public:
184     CPDF_LinkExt() {};
185     int                                                         m_Start;
186     int                                                         m_Count;
187     CFX_WideString                                      m_strUrl;
188     virtual                                                     ~CPDF_LinkExt() {};
189 };
190 typedef CFX_ArrayTemplate<CPDF_LinkExt*> LINK_InfoArray;
191 class CPDF_LinkExtract: public IPDF_LinkExtract
192 {
193 public:
194     CPDF_LinkExtract();
195     virtual                                                     ~CPDF_LinkExtract();
196     virtual FX_BOOL                                     ExtractLinks(const IPDF_TextPage* pTextPage);
197     virtual     FX_BOOL                                 IsExtract() const
198     {
199         return m_IsParserd;
200     }
201 public:
202     virtual int                                         CountLinks() const;
203     virtual     CFX_WideString                  GetURL(int index) const;
204     virtual     void                                    GetBoundedSegment(int index, int& start, int& count) const;
205     virtual     void                                    GetRects(int index, CFX_RectArray& rects)const;
206 protected:
207     void                                                        parserLink();
208     void                                                        DeleteLinkList();
209     FX_BOOL                                                     CheckWebLink(CFX_WideString& strBeCheck);
210     FX_BOOL                                                     CheckMailLink(CFX_WideString& str);
211     FX_BOOL                                                     AppendToLinkList(int start, int count, const CFX_WideString& strUrl);
212 private:
213     LINK_InfoArray                                      m_LinkList;
214     const CPDF_TextPage*                        m_pTextPage;
215     CFX_WideString                                      m_strPageText;
216     FX_BOOL                                                     m_IsParserd;
217 };
218 FX_STRSIZE FX_Unicode_GetNormalization(FX_WCHAR wch, FX_LPWSTR pDst);
219 void NormalizeString(CFX_WideString& str);
220 void NormalizeCompositeChar(FX_WCHAR wChar, CFX_WideString& sDest);
221
222 #endif  // CORE_SRC_FPDFTEXT_TEXT_INT_H_