Fix else-after-returns throughout pdfium.
[pdfium.git] / core / src / fpdftext / text_int.h
1 // Copyright 2014 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7 #ifndef CORE_SRC_FPDFTEXT_TEXT_INT_H_
8 #define CORE_SRC_FPDFTEXT_TEXT_INT_H_
9
10 class CPDF_TextPage;
11 class CPDF_LinkExtract;
12 class CPDF_TextPageFind;
13 class CPDF_DocProgressiveSearch;
14 #define FPDFTEXT_CHAR_ERROR                     -1
15 #define FPDFTEXT_CHAR_NORMAL            0
16 #define FPDFTEXT_CHAR_GENERATED         1
17 #define FPDFTEXT_CHAR_UNUNICODE         2
18 #define FPDFTEXT_CHAR_HYPHEN            3
19 #define FPDFTEXT_CHAR_PIECE                     4
20 #define FPDFTEXT_MC_PASS                        0
21 #define FPDFTEXT_MC_DONE                        1
22 #define FPDFTEXT_MC_DELAY                       2
23 typedef struct _PAGECHAR_INFO {
24     int                                 m_CharCode;
25     FX_WCHAR                    m_Unicode;
26     FX_FLOAT                    m_OriginX;
27     FX_FLOAT                    m_OriginY;
28     int32_t                     m_Flag;
29     CFX_FloatRect               m_CharBox;
30     CPDF_TextObject*    m_pTextObj;
31     CFX_AffineMatrix    m_Matrix;
32     int                                 m_Index;
33 } PAGECHAR_INFO;
34 typedef CFX_SegmentedArray<PAGECHAR_INFO> PAGECHAR_InfoArray;
35 typedef struct {
36     int m_Start;
37     int m_nCount;
38 } FPDF_SEGMENT;
39 typedef CFX_ArrayTemplate<FPDF_SEGMENT> SEGMENT_Array;
40 typedef struct {
41     CPDF_TextObject*    m_pTextObj;
42     CFX_AffineMatrix    m_formMatrix;
43 } PDFTEXT_Obj;
44 typedef CFX_ArrayTemplate<PDFTEXT_Obj> LINEOBJ;
45 class CPDF_TextPage: public IPDF_TextPage
46 {
47 public:
48     CPDF_TextPage(const CPDF_Page* pPage, int flags = 0);
49     CPDF_TextPage(const CPDF_PageObjects* pPage, int flags = 0);
50     CPDF_TextPage(const CPDF_Page* pPage, CPDFText_ParseOptions ParserOptions);
51     virtual FX_BOOL                                     ParseTextPage();
52     virtual void                                        NormalizeObjects(FX_BOOL bNormalize);
53     virtual     FX_BOOL                                 IsParsered() const
54     {
55         return m_IsParsered;
56     }
57     virtual ~CPDF_TextPage() {};
58 public:
59     virtual int CharIndexFromTextIndex(int TextIndex)const ;
60     virtual int TextIndexFromCharIndex(int CharIndex)const;
61     virtual int                                         CountChars() const;
62     virtual     void                                    GetCharInfo(int index, FPDF_CHAR_INFO & info) const;
63     virtual void                                        GetRectArray(int start, int nCount, CFX_RectArray& rectArray) const;
64     virtual int                                         GetIndexAtPos(CPDF_Point point, FX_FLOAT xTorelance, FX_FLOAT yTorelance) const;
65     virtual int                                         GetIndexAtPos(FX_FLOAT x, FX_FLOAT y, FX_FLOAT xTorelance,
66             FX_FLOAT yTorelance) const;
67     virtual CFX_WideString                      GetTextByRect(const CFX_FloatRect& rect) const;
68     virtual void                                        GetRectsArrayByRect(const CFX_FloatRect& rect, CFX_RectArray& resRectArray) const;
69     virtual     CFX_WideString                  GetPageText(int start = 0, int nCount = -1) const;
70
71     virtual int                                         CountRects(int start, int nCount);
72     virtual     void                                    GetRect(int rectIndex, FX_FLOAT& left, FX_FLOAT& top
73                                             , FX_FLOAT& right, FX_FLOAT &bottom) const;
74     virtual FX_BOOL                                     GetBaselineRotate(int rectIndex, int& Rotate);
75     virtual FX_BOOL                                     GetBaselineRotate(const CFX_FloatRect& rect, int& Rotate);
76     virtual     int                                             CountBoundedSegments(FX_FLOAT left, FX_FLOAT top,
77             FX_FLOAT right, FX_FLOAT bottom, FX_BOOL bContains = FALSE);
78     virtual     void                                    GetBoundedSegment(int index, int& start, int& count) const;
79     virtual int                                         GetWordBreak(int index, int direction) const;
80 public:
81     const       PAGECHAR_InfoArray*             GetCharList() const
82     {
83         return &m_charList;
84     }
85     static      FX_BOOL                                 IsRectIntersect(const CFX_FloatRect& rect1, const CFX_FloatRect& rect2);
86     static      FX_BOOL                                 IsLetter(FX_WCHAR unicode);
87 private:
88     FX_BOOL                                                     IsHyphen(FX_WCHAR curChar);
89     bool                                                        IsControlChar(const PAGECHAR_INFO& charInfo);
90     FX_BOOL                                                     GetBaselineRotate(int start, int end, int& Rotate);
91     void                                                        ProcessObject();
92     void                                                        ProcessFormObject(CPDF_FormObject*      pFormObj, const CFX_AffineMatrix& formMatrix);
93     void                                                        ProcessTextObject(PDFTEXT_Obj pObj);
94     void                                                        ProcessTextObject(CPDF_TextObject*      pTextObj, const CFX_AffineMatrix& formMatrix, FX_POSITION ObjPos);
95     int                                                         ProcessInsertObject(const CPDF_TextObject* pObj, const CFX_AffineMatrix& formMatrix);
96     FX_BOOL                                                     GenerateCharInfo(FX_WCHAR unicode, PAGECHAR_INFO& info);
97     FX_BOOL                                                     IsSameAsPreTextObject(CPDF_TextObject* pTextObj, FX_POSITION ObjPos);
98     FX_BOOL                                                     IsSameTextObject(CPDF_TextObject* pTextObj1, CPDF_TextObject* pTextObj2);
99     int                                                         GetCharWidth(FX_DWORD charCode, CPDF_Font* pFont) const;
100     void                                                        CloseTempLine();
101     void                                                        OnPiece(IFX_BidiChar* pBidi, CFX_WideString& str);
102     int32_t     PreMarkedContent(PDFTEXT_Obj pObj);
103     void                ProcessMarkedContent(PDFTEXT_Obj pObj);
104     void                CheckMarkedContentObject(int32_t& start, int32_t& nCount) const;
105     void                FindPreviousTextObject(void);
106     void                AddCharInfoByLRDirection(CFX_WideString& str, int i);
107     void                AddCharInfoByRLDirection(CFX_WideString& str, int i);
108     int32_t     GetTextObjectWritingMode(const CPDF_TextObject* pTextObj);
109     int32_t     FindTextlineFlowDirection();
110     void SwapTempTextBuf(int32_t iCharListStartAppend,
111                          int32_t iBufStartAppend);
112     FX_BOOL IsRightToLeft(const CPDF_TextObject* pTextObj,
113                           const CPDF_Font* pFont,
114                           int nItems) const;
115 protected:
116     CPDFText_ParseOptions                       m_ParseOptions;
117     CFX_WordArray                                       m_CharIndex;
118     const CPDF_PageObjects*                     m_pPage;
119     PAGECHAR_InfoArray                          m_charList;
120     CFX_WideTextBuf                                     m_TextBuf;
121     PAGECHAR_InfoArray                          m_TempCharList;
122     CFX_WideTextBuf                                     m_TempTextBuf;
123     int                                                         m_parserflag;
124     CPDF_TextObject*                            m_pPreTextObj;
125     CFX_AffineMatrix                            m_perMatrix;
126     FX_BOOL                                                     m_IsParsered;
127     CFX_AffineMatrix                            m_DisplayMatrix;
128
129     SEGMENT_Array                                       m_Segment;
130     CFX_RectArray                                       m_SelRects;
131     LINEOBJ                                                     m_LineObj;
132     FX_BOOL                                                     m_TextlineDir;
133     CFX_FloatRect                                       m_CurlineRect;
134 };
135 class CPDF_TextPageFind: public IPDF_TextPageFind
136 {
137 public:
138     CPDF_TextPageFind(const IPDF_TextPage* pTextPage);
139     virtual                                                     ~CPDF_TextPageFind() {};
140 public:
141     virtual     FX_BOOL                                 FindFirst(const CFX_WideString& findwhat, int flags, int startPos = 0);
142     virtual     FX_BOOL                                 FindNext();
143     virtual     FX_BOOL                                 FindPrev();
144
145     virtual void                                        GetRectArray(CFX_RectArray& rects) const;
146     virtual int                                         GetCurOrder() const;
147     virtual int                                         GetMatchedCount()const;
148 protected:
149     void                                                        ExtractFindWhat(const CFX_WideString& findwhat);
150     FX_BOOL                                                     IsMatchWholeWord(const CFX_WideString& csPageText, int startPos, int endPos);
151     FX_BOOL                                                     ExtractSubString(CFX_WideString& rString, const FX_WCHAR* lpszFullString,
152             int iSubString, FX_WCHAR chSep);
153     CFX_WideString                                      MakeReverse(const CFX_WideString& str);
154     int                                                         ReverseFind(const CFX_WideString& csPageText, const CFX_WideString& csWord, int nStartPos, int& WordLength);
155     int                                                         GetCharIndex(int index) const;
156 private:
157     CFX_WordArray                                       m_CharIndex;
158     const IPDF_TextPage*                        m_pTextPage;
159     CFX_WideString                                      m_strText;
160     CFX_WideString                                      m_findWhat;
161     int                                                         m_flags;
162     CFX_WideStringArray                         m_csFindWhatArray;
163     int                                                         m_findNextStart;
164     int                                                         m_findPreStart;
165     FX_BOOL                                                     m_bMatchCase;
166     FX_BOOL                                                     m_bMatchWholeWord;
167     int                                                         m_resStart;
168     int                                                         m_resEnd;
169     CFX_RectArray                                       m_resArray;
170     FX_BOOL                                                     m_IsFind;
171 };
172 class CPDF_LinkExt
173 {
174 public:
175     CPDF_LinkExt() {};
176     int                                                         m_Start;
177     int                                                         m_Count;
178     CFX_WideString                                      m_strUrl;
179     virtual                                                     ~CPDF_LinkExt() {};
180 };
181 typedef CFX_ArrayTemplate<CPDF_LinkExt*> LINK_InfoArray;
182 class CPDF_LinkExtract: public IPDF_LinkExtract
183 {
184 public:
185     CPDF_LinkExtract();
186     virtual                                                     ~CPDF_LinkExtract();
187     virtual FX_BOOL                                     ExtractLinks(const IPDF_TextPage* pTextPage);
188     virtual     FX_BOOL                                 IsExtract() const
189     {
190         return m_IsParserd;
191     }
192 public:
193     virtual int                                         CountLinks() const;
194     virtual     CFX_WideString                  GetURL(int index) const;
195     virtual     void                                    GetBoundedSegment(int index, int& start, int& count) const;
196     virtual     void                                    GetRects(int index, CFX_RectArray& rects)const;
197 protected:
198     void                                                        parserLink();
199     void                                                        DeleteLinkList();
200     FX_BOOL                                                     CheckWebLink(CFX_WideString& strBeCheck);
201     FX_BOOL                                                     CheckMailLink(CFX_WideString& str);
202     FX_BOOL                                                     AppendToLinkList(int start, int count, const CFX_WideString& strUrl);
203 private:
204     LINK_InfoArray                                      m_LinkList;
205     const CPDF_TextPage*                        m_pTextPage;
206     CFX_WideString                                      m_strPageText;
207     FX_BOOL                                                     m_IsParserd;
208 };
209 FX_STRSIZE FX_Unicode_GetNormalization(FX_WCHAR wch, FX_WCHAR* pDst);
210 void NormalizeString(CFX_WideString& str);
211 void NormalizeCompositeChar(FX_WCHAR wChar, CFX_WideString& sDest);
212
213 #endif  // CORE_SRC_FPDFTEXT_TEXT_INT_H_