Merge to XFA: Use stdint.h types throughout PDFium.
[pdfium.git] / xfa / src / fee / src / fx_wordbreak / fx_wordbreak_impl.cpp
1 // Copyright 2014 PDFium Authors. All rights reserved.\r
2 // Use of this source code is governed by a BSD-style license that can be\r
3 // found in the LICENSE file.\r
4 \r
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com\r
6 \r
7 #include "../../../foxitlib.h"\r
8 #include "fx_wordbreak_impl.h"\r
9 #define FX_IsOdd(a) ((a) & 1)\r
10 FX_WordBreakProp FX_GetWordBreakProperty(FX_WCHAR wcCodePoint)\r
11 {\r
12     FX_DWORD    dwProperty = (FX_DWORD)gs_FX_WordBreak_CodePointProperties[wcCodePoint >> 1];\r
13     return (FX_WordBreakProp)(FX_IsOdd(wcCodePoint) ? (dwProperty & 0x0F) : (dwProperty >> 4));\r
14 }\r
15 CFX_CharIter::CFX_CharIter(const CFX_WideString & wsText)\r
16     : m_wsText(wsText)\r
17     , m_nIndex(0)\r
18 {\r
19     FXSYS_assert(!wsText.IsEmpty());\r
20 }\r
21 CFX_CharIter::~CFX_CharIter()\r
22 {\r
23 }\r
24 void CFX_CharIter::Release()\r
25 {\r
26     delete this;\r
27 }\r
28 FX_BOOL CFX_CharIter::Next(FX_BOOL bPrev )\r
29 {\r
30     if (bPrev) {\r
31         if (m_nIndex <= 0) {\r
32             return FALSE;\r
33         }\r
34         m_nIndex --;\r
35     } else {\r
36         if (m_nIndex + 1 >= m_wsText.GetLength()) {\r
37             return FALSE;\r
38         }\r
39         m_nIndex ++;\r
40     }\r
41     return TRUE;\r
42 }\r
43 FX_WCHAR CFX_CharIter::GetChar()\r
44 {\r
45     return m_wsText.GetAt(m_nIndex);\r
46 }\r
47 void CFX_CharIter::SetAt(int32_t nIndex)\r
48 {\r
49     if (nIndex < 0 || nIndex >= m_wsText.GetLength()) {\r
50         return;\r
51     }\r
52     m_nIndex = nIndex;\r
53 }\r
54 int32_t CFX_CharIter::GetAt() const\r
55 {\r
56     return m_nIndex;\r
57 }\r
58 FX_BOOL CFX_CharIter::IsEOF(FX_BOOL bTail ) const\r
59 {\r
60     return bTail ? (m_nIndex + 1 == m_wsText.GetLength()) : (m_nIndex == 0);\r
61 }\r
62 IFX_CharIter * CFX_CharIter::Clone()\r
63 {\r
64     CFX_CharIter * pIter = new CFX_CharIter(m_wsText);\r
65     pIter->m_nIndex = m_nIndex;\r
66     return pIter;\r
67 }\r
68 CFX_WordBreak::CFX_WordBreak()\r
69     : m_pPreIter(NULL)\r
70     , m_pCurIter(NULL)\r
71 {\r
72 }\r
73 CFX_WordBreak::~CFX_WordBreak()\r
74 {\r
75     if (m_pPreIter) {\r
76         m_pPreIter->Release();\r
77         m_pPreIter = NULL;\r
78     }\r
79     if (m_pCurIter) {\r
80         m_pCurIter->Release();\r
81         m_pCurIter = NULL;\r
82     }\r
83 }\r
84 void CFX_WordBreak::Release()\r
85 {\r
86     delete this;\r
87 }\r
88 void CFX_WordBreak::Attach(IFX_CharIter * pIter)\r
89 {\r
90     FXSYS_assert(pIter);\r
91     m_pCurIter = pIter;\r
92 }\r
93 void CFX_WordBreak::Attach(const CFX_WideString &wsText)\r
94 {\r
95     m_pCurIter = new CFX_CharIter(wsText);\r
96 }\r
97 FX_BOOL CFX_WordBreak::Next(FX_BOOL bPrev)\r
98 {\r
99     IFX_CharIter * pIter = bPrev ? m_pPreIter->Clone() : m_pCurIter->Clone();\r
100     if (pIter->IsEOF(!bPrev)) {\r
101         return FALSE;\r
102     }\r
103     pIter->Next(bPrev);\r
104     if (!FindNextBreakPos(pIter, bPrev, TRUE)) {\r
105         pIter->Release();\r
106         return FALSE;\r
107     }\r
108     if (bPrev) {\r
109         m_pCurIter->Release();\r
110         m_pCurIter = m_pPreIter;\r
111         m_pCurIter->Next(TRUE);\r
112         m_pPreIter = pIter;\r
113     } else {\r
114         m_pPreIter->Release();\r
115         m_pPreIter = m_pCurIter;\r
116         m_pPreIter->Next();\r
117         m_pCurIter = pIter;\r
118     }\r
119     return TRUE;\r
120 }\r
121 void CFX_WordBreak::SetAt(int32_t nIndex)\r
122 {\r
123     if (m_pPreIter) {\r
124         m_pPreIter->Release();\r
125         m_pPreIter = NULL;\r
126     }\r
127     m_pCurIter->SetAt(nIndex);\r
128     FindNextBreakPos(m_pCurIter, TRUE, FALSE);\r
129     m_pPreIter = m_pCurIter;\r
130     m_pCurIter = m_pPreIter->Clone();\r
131     FindNextBreakPos(m_pCurIter, FALSE, FALSE);\r
132 }\r
133 int32_t CFX_WordBreak::GetWordPos() const\r
134 {\r
135     return m_pPreIter->GetAt();\r
136 }\r
137 int32_t CFX_WordBreak::GetWordLength() const\r
138 {\r
139     return m_pCurIter->GetAt() - m_pPreIter->GetAt() + 1;\r
140 }\r
141 void CFX_WordBreak::GetWord(CFX_WideString &wsWord) const\r
142 {\r
143     int32_t nWordLength = GetWordLength();\r
144     if (nWordLength <= 0) {\r
145         return;\r
146     }\r
147     FX_LPWSTR lpBuf = wsWord.GetBuffer(nWordLength);\r
148     IFX_CharIter * pTempIter = m_pPreIter->Clone();\r
149     int32_t i = 0;\r
150     while (pTempIter->GetAt() <= m_pCurIter->GetAt()) {\r
151         lpBuf[i++] = pTempIter->GetChar();\r
152         FX_BOOL bEnd = pTempIter->Next();\r
153         if (!bEnd) {\r
154             break;\r
155         }\r
156     }\r
157     pTempIter->Release();\r
158     wsWord.ReleaseBuffer(nWordLength);\r
159 }\r
160 FX_BOOL CFX_WordBreak::IsEOF(FX_BOOL bTail) const\r
161 {\r
162     return m_pCurIter->IsEOF(bTail);\r
163 }\r
164 FX_BOOL CFX_WordBreak::FindNextBreakPos(IFX_CharIter * pIter, FX_BOOL bPrev,\r
165                                         FX_BOOL bFromNext )\r
166 {\r
167     FX_WordBreakProp ePreType   = FX_WordBreakProp_None;\r
168     FX_WordBreakProp eCurType   = FX_WordBreakProp_None;\r
169     FX_WordBreakProp eNextType = FX_WordBreakProp_None;\r
170     if (pIter->IsEOF(!bPrev)) {\r
171         return TRUE;\r
172     }\r
173     if (!(bFromNext || pIter->IsEOF(bPrev))) {\r
174         pIter->Next(!bPrev);\r
175         FX_WCHAR        wcTemp  = pIter->GetChar();\r
176         ePreType = FX_GetWordBreakProperty(wcTemp);\r
177         pIter->Next(bPrev);\r
178     }\r
179     FX_WCHAR    wcTemp  = pIter->GetChar();\r
180     eCurType = FX_GetWordBreakProperty(wcTemp);\r
181     FX_BOOL bFirst = TRUE;\r
182     do {\r
183         pIter->Next(bPrev);\r
184         FX_WCHAR        wcTemp  = pIter->GetChar();\r
185         eNextType = FX_GetWordBreakProperty(wcTemp);\r
186         FX_WORD wBreak = gs_FX_WordBreak_Table[eCurType] & ((FX_WORD)(1 << eNextType));\r
187         if (wBreak) {\r
188             if (pIter->IsEOF(!bPrev)) {\r
189                 pIter->Next(!bPrev);\r
190                 return TRUE;\r
191             }\r
192             if (bFirst) {\r
193                 int32_t nFlags = 0;\r
194                 if (eCurType == FX_WordBreakProp_MidLetter) {\r
195                     if (eNextType == FX_WordBreakProp_ALetter) {\r
196                         nFlags = 1;\r
197                     }\r
198                 } else if (eCurType == FX_WordBreakProp_MidNum) {\r
199                     if (eNextType == FX_WordBreakProp_Numberic) {\r
200                         nFlags = 2;\r
201                     }\r
202                 } else if (eCurType == FX_WordBreakProp_MidNumLet) {\r
203                     if (eNextType == FX_WordBreakProp_ALetter) {\r
204                         nFlags = 1;\r
205                     } else if (eNextType == FX_WordBreakProp_Numberic) {\r
206                         nFlags = 2;\r
207                     }\r
208                 }\r
209                 if (nFlags > 0) {\r
210                     FXSYS_assert(nFlags <= 2);\r
211                     if (!((nFlags == 1 && ePreType == FX_WordBreakProp_ALetter) ||\r
212                             (nFlags == 2 && ePreType == FX_WordBreakProp_Numberic))) {\r
213                         pIter->Next(!bPrev);\r
214                         return TRUE;\r
215                     }\r
216                     pIter->Next(bPrev);\r
217                     wBreak = FALSE;\r
218                 }\r
219                 bFirst = FALSE;\r
220             }\r
221             if (wBreak) {\r
222                 int32_t nFlags = 0;\r
223                 if (eNextType == FX_WordBreakProp_MidLetter) {\r
224                     if (eCurType == FX_WordBreakProp_ALetter) {\r
225                         nFlags = 1;\r
226                     }\r
227                 } else if (eNextType == FX_WordBreakProp_MidNum) {\r
228                     if (eCurType == FX_WordBreakProp_Numberic) {\r
229                         nFlags = 2;\r
230                     }\r
231                 } else if (eNextType == FX_WordBreakProp_MidNumLet) {\r
232                     if (eCurType == FX_WordBreakProp_ALetter) {\r
233                         nFlags = 1;\r
234                     } else if (eCurType == FX_WordBreakProp_Numberic) {\r
235                         nFlags = 2;\r
236                     }\r
237                 }\r
238                 if (nFlags <= 0) {\r
239                     pIter->Next(!bPrev);\r
240                     return TRUE;\r
241                 }\r
242                 FXSYS_assert(nFlags <= 2);\r
243                 pIter->Next(bPrev);\r
244                 wcTemp = pIter->GetChar();\r
245                 eNextType = (FX_WordBreakProp)FX_GetWordBreakProperty(wcTemp);\r
246                 if (!((nFlags == 1 && eNextType == FX_WordBreakProp_ALetter) ||\r
247                         (nFlags == 2 && eNextType == FX_WordBreakProp_Numberic))) {\r
248                     pIter->Next(!bPrev);\r
249                     pIter->Next(!bPrev);\r
250                     return TRUE;\r
251                 }\r
252             }\r
253         }\r
254         ePreType        = eCurType;\r
255         eCurType        = eNextType;\r
256         bFirst          = FALSE;\r
257     } while (!pIter->IsEOF(!bPrev));\r
258     return TRUE;\r
259 }\r
260 IFX_WordBreak * FX_WordBreak_Create()\r
261 {\r
262     return new CFX_WordBreak;\r
263 }\r