Merge XFA to PDFium master at 4dc95e7 on 10/28/2014
[pdfium.git] / xfa / src / fgas / include / fx_cpg.h
1 // Copyright 2014 PDFium Authors. All rights reserved.\r
2 // Use of this source code is governed by a BSD-style license that can be\r
3 // found in the LICENSE file.\r
4 \r
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com\r
6 \r
7 #ifndef _FX_CODEPAGE\r
8 #define _FX_CODEPAGE\r
9 class IFX_CodePage;\r
10 #define FX_CODEPAGE_DefANSI                                             0\r
11 #define FX_CODEPAGE_DefOEM                                              1\r
12 #define FX_CODEPAGE_DefMAC                                              2\r
13 #define FX_CODEPAGE_Thread                                              3\r
14 #define FX_CODEPAGE_Symbol                                              42\r
15 #define FX_CODEPAGE_MSDOS_US                                    437\r
16 #define FX_CODEPAGE_Arabic_ASMO708                              708\r
17 #define FX_CODEPAGE_Arabic_ASMO449Plus                  709\r
18 #define FX_CODEPAGE_Arabic_Transparent                  710\r
19 #define FX_CODEPAGE_Arabic_NafithaEnhanced              711\r
20 #define FX_CODEPAGE_Arabic_TransparentASMO              720\r
21 #define FX_CODEPAGE_MSDOS_Greek1                                737\r
22 #define FX_CODEPAGE_MSDOS_Baltic                                775\r
23 #define FX_CODEPAGE_MSWin31_WesternEuropean             819\r
24 #define FX_CODEPAGE_MSDOS_WesternEuropean               850\r
25 #define FX_CODEPAGE_MSDOS_EasternEuropean               852\r
26 #define FX_CODEPAGE_MSDOS_Latin3                                853\r
27 #define FX_CODEPAGE_MSDOS_Cyrillic                              855\r
28 #define FX_CODEPAGE_MSDOS_Turkish                               857\r
29 #define FX_CODEPAGE_MSDOS_Latin1Euro                    858\r
30 #define FX_CODEPAGE_MSDOS_Portuguese                    860\r
31 #define FX_CODEPAGE_MSDOS_Icelandic                             861\r
32 #define FX_CODEPAGE_MSDOS_Hebrew                                862\r
33 #define FX_CODEPAGE_MSDOS_FrenchCanadian                863\r
34 #define FX_CODEPAGE_MSDOS_Arabic                                864\r
35 #define FX_CODEPAGE_MSDOS_Norwegian                             865\r
36 #define FX_CODEPAGE_MSDOS_Russian                               866\r
37 #define FX_CODEPAGE_MSDOS_Greek2                                869\r
38 #define FX_CODEPAGE_MSDOS_Thai                                  874\r
39 #define FX_CODEPAGE_MSDOS_KamenickyCS                   895\r
40 #define FX_CODEPAGE_ShiftJIS                                    932\r
41 #define FX_CODEPAGE_ChineseSimplified                   936\r
42 #define FX_CODEPAGE_Korean                                              949\r
43 #define FX_CODEPAGE_ChineseTraditional                  950\r
44 #define FX_CODEPAGE_UTF16LE                                             1200\r
45 #define FX_CODEPAGE_UTF16BE                                             1201\r
46 #define FX_CODEPAGE_MSWin_EasternEuropean               1250\r
47 #define FX_CODEPAGE_MSWin_Cyrillic                              1251\r
48 #define FX_CODEPAGE_MSWin_WesternEuropean               1252\r
49 #define FX_CODEPAGE_MSWin_Greek                                 1253\r
50 #define FX_CODEPAGE_MSWin_Turkish                               1254\r
51 #define FX_CODEPAGE_MSWin_Hebrew                                1255\r
52 #define FX_CODEPAGE_MSWin_Arabic                                1256\r
53 #define FX_CODEPAGE_MSWin_Baltic                                1257\r
54 #define FX_CODEPAGE_MSWin_Vietnamese                    1258\r
55 #define FX_CODEPAGE_Johab                                               1361\r
56 #define FX_CODEPAGE_MAC_Roman                                   10000\r
57 #define FX_CODEPAGE_MAC_ShiftJIS                                10001\r
58 #define FX_CODEPAGE_MAC_ChineseTraditional              10002\r
59 #define FX_CODEPAGE_MAC_Korean                                  10003\r
60 #define FX_CODEPAGE_MAC_Arabic                                  10004\r
61 #define FX_CODEPAGE_MAC_Hebrew                                  10005\r
62 #define FX_CODEPAGE_MAC_Greek                                   10006\r
63 #define FX_CODEPAGE_MAC_Cyrillic                                10007\r
64 #define FX_CODEPAGE_MAC_ChineseSimplified               10008\r
65 #define FX_CODEPAGE_MAC_Thai                                    10021\r
66 #define FX_CODEPAGE_MAC_EasternEuropean                 10029\r
67 #define FX_CODEPAGE_MAC_Turkish                                 10081\r
68 #define FX_CODEPAGE_UTF32LE                                             12000\r
69 #define FX_CODEPAGE_UTF32BE                                             12001\r
70 #define FX_CODEPAGE_ISO8859_1                                   28591\r
71 #define FX_CODEPAGE_ISO8859_2                                   28592\r
72 #define FX_CODEPAGE_ISO8859_3                                   28593\r
73 #define FX_CODEPAGE_ISO8859_4                                   28594\r
74 #define FX_CODEPAGE_ISO8859_5                                   28595\r
75 #define FX_CODEPAGE_ISO8859_6                                   28596\r
76 #define FX_CODEPAGE_ISO8859_7                                   28597\r
77 #define FX_CODEPAGE_ISO8859_8                                   28598\r
78 #define FX_CODEPAGE_ISO8859_9                                   28599\r
79 #define FX_CODEPAGE_ISO8859_10                                  28600\r
80 #define FX_CODEPAGE_ISO8859_11                                  28601\r
81 #define FX_CODEPAGE_ISO8859_12                                  28602\r
82 #define FX_CODEPAGE_ISO8859_13                                  28603\r
83 #define FX_CODEPAGE_ISO8859_14                                  28604\r
84 #define FX_CODEPAGE_ISO8859_15                                  28605\r
85 #define FX_CODEPAGE_ISO8859_16                                  28606\r
86 #define FX_CODEPAGE_ISCII_Devanagari                    57002\r
87 #define FX_CODEPAGE_ISCII_Bengali                               57003\r
88 #define FX_CODEPAGE_ISCII_Tamil                                 57004\r
89 #define FX_CODEPAGE_ISCII_Telugu                                57005\r
90 #define FX_CODEPAGE_ISCII_Assamese                              57006\r
91 #define FX_CODEPAGE_ISCII_Oriya                                 57007\r
92 #define FX_CODEPAGE_ISCII_Kannada                               57008\r
93 #define FX_CODEPAGE_ISCII_Malayalam                             57009\r
94 #define FX_CODEPAGE_ISCII_Gujarati                              57010\r
95 #define FX_CODEPAGE_ISCII_Punjabi                               57011\r
96 #define FX_CODEPAGE_UTF7                                                65000\r
97 #define FX_CODEPAGE_UTF8                                                65001\r
98 #define FX_CHARSET_ANSI                                                 0\r
99 #define FX_CHARSET_Default                                              1\r
100 #define FX_CHARSET_Symbol                                               2\r
101 #define FX_CHARSET_MAC_Roman                                    77\r
102 #define FX_CHARSET_MAC_ShiftJIS                                 78\r
103 #define FX_CHARSET_MAC_Korean                                   79\r
104 #define FX_CHARSET_MAC_ChineseSimplified                80\r
105 #define FX_CHARSET_MAC_ChineseTriditional               81\r
106 #define FX_CHARSET_MAC_Johab                                    82\r
107 #define FX_CHARSET_MAC_Hebrew                                   83\r
108 #define FX_CHARSET_MAC_Arabic                                   84\r
109 #define FX_CHARSET_MAC_Greek                                    85\r
110 #define FX_CHARSET_MAC_Turkish                                  86\r
111 #define FX_CHARSET_MAC_Thai                                             87\r
112 #define FX_CHARSET_MAC_EasternEuropean                  88\r
113 #define FX_CHARSET_MAC_Cyrillic                                 89\r
114 #define FX_CHARSET_ShiftJIS                                             128\r
115 #define FX_CHARSET_Korean                                               129\r
116 #define FX_CHARSET_Johab                                                130\r
117 #define FX_CHARSET_ChineseSimplified                    134\r
118 #define FX_CHARSET_ChineseTriditional                   136\r
119 #define FX_CHARSET_MSWin_Greek                                  161\r
120 #define FX_CHARSET_MSWin_Turkish                                162\r
121 #define FX_CHARSET_MSWin_Vietnamese                             163\r
122 #define FX_CHARSET_MSWin_Hebrew                                 177\r
123 #define FX_CHARSET_MSWin_Arabic                                 178\r
124 #define FX_CHARSET_ArabicTraditional                    179\r
125 #define FX_CHARSET_ArabicUser                                   180\r
126 #define FX_CHARSET_HebrewUser                                   181\r
127 #define FX_CHARSET_MSWin_Baltic                                 186\r
128 #define FX_CHARSET_MSWin_Cyrillic                               204\r
129 #define FX_CHARSET_Thai                                                 222\r
130 #define FX_CHARSET_MSWin_EasterEuropean                 238\r
131 #define FX_CHARSET_US                                                   254\r
132 #define FX_CHARSET_OEM                                                  255\r
133 FX_WORD FX_GetCodePageFromCharset(FX_BYTE charset);\r
134 FX_WORD FX_GetCharsetFromCodePage(FX_WORD codepage);\r
135 FX_WORD FX_GetCodePageFromStringA(FX_LPCSTR pStr, FX_INT32 iLength);\r
136 FX_WORD FX_GetCodePageFormStringW(FX_LPCWSTR pStr, FX_INT32 iLength);\r
137 FX_WORD FX_GetDefCodePageByLanguage(FX_WORD wLanguage);\r
138 void    FX_SwapByteOrder(FX_LPWSTR pStr, FX_INT32 iLength);\r
139 void    FX_SwapByteOrderCopy(FX_LPCWSTR pSrc, FX_LPWSTR pDst, FX_INT32 iLength);\r
140 void    FX_UTF16ToWChar(FX_LPVOID pBuffer, FX_INT32 iLength);\r
141 void    FX_UTF16ToWCharCopy(const FX_WORD *pUTF16, FX_LPWSTR pWChar, FX_INT32 iLength);\r
142 void    FX_WCharToUTF16(FX_LPVOID pBuffer, FX_INT32 iLength);\r
143 void    FX_WCharToUTF16Copy(FX_LPCWSTR pWChar, FX_WORD *pUTF16, FX_INT32 iLength);\r
144 FX_INT32        FX_DecodeString(FX_WORD wCodePage, FX_LPCSTR pSrc, FX_INT32 *pSrcLen, FX_LPWSTR pDst, FX_INT32 *pDstLen, FX_BOOL bErrBreak = FALSE);\r
145 FX_INT32        FX_UTF8Decode(FX_LPCSTR pSrc, FX_INT32 *pSrcLen, FX_LPWSTR pDst, FX_INT32 *pDstLen);\r
146 enum FX_CODESYSTEM {\r
147     FX_MBCS             =  0,\r
148     FX_SBCS                     ,\r
149     FX_DBCS                     ,\r
150 };\r
151 typedef struct _FX_CODEPAGE_HEADER {\r
152     FX_UINT16           uCPID;\r
153     FX_UINT8            uMinCharBytes;\r
154     FX_UINT8            uMaxCharBytes;\r
155     FX_CODESYSTEM       eCPType;\r
156     FX_BOOL                     bHasLeadByte;\r
157     FX_WCHAR            wMinChar;\r
158     FX_WCHAR            wMaxChar;\r
159     FX_WCHAR            wDefChar;\r
160     FX_WCHAR            wMinUnicode;\r
161     FX_WCHAR            wMaxUnicode;\r
162     FX_WCHAR            wDefUnicode;\r
163 } FX_CODEPAGE_HEADER;\r
164 #define FX_CPMAPTYPE_Consecution        1\r
165 #define FX_CPMAPTYPE_Strict                     2\r
166 #define FX_CPMAPTYPE_NoMapping          3\r
167 #define FX_CPMAPTYPE_Delta                      4\r
168 typedef struct _FX_CPCU_MAPTABLE1 {\r
169     FX_UINT16  uMapType;\r
170     FX_UINT16  uUniocde;\r
171 } FX_CPCU_MAPTABLE1;\r
172 typedef struct _FX_CPCU_MAPTABLE2 {\r
173     FX_UINT8    uTrailByte;\r
174     FX_UINT8    uMapType;\r
175     FX_UINT16   uOffset;\r
176 } FX_CPCU_MAPTABLE2;\r
177 typedef struct _FX_CPCU_MAPINFO {\r
178     FX_CPCU_MAPTABLE1           *pMapTable1;\r
179     FX_CPCU_MAPTABLE2           *pMapTable2;\r
180     FX_LPCBYTE                          pMapData;\r
181 } FX_CPCU_MAPINFO;\r
182 typedef struct _FX_CPUC_MAPTABLE {\r
183     FX_UINT16   uStartUnicode;\r
184     FX_UINT16   uEndUnicode;\r
185     FX_UINT16   uMapType;\r
186     FX_UINT16   uOffset;\r
187 } FX_CPUC_MAPTABLE;\r
188 typedef struct _FX_CPUC_MAPINFO {\r
189     FX_UINT32                   uMapCount;\r
190     FX_CPUC_MAPTABLE    *pMapTable;\r
191     FX_LPCBYTE                  pMapData;\r
192 } FX_CPUC_MAPINFO;\r
193 typedef struct _FX_CODEPAGE {\r
194     FX_CODEPAGE_HEADER const    *pCPHeader;\r
195     FX_CPCU_MAPINFO const               *pCPCUMapInfo;\r
196     FX_CPUC_MAPINFO const               *pCPUCMapInfo;\r
197 } FX_CODEPAGE, * FX_LPCODEPAGE;\r
198 typedef FX_CODEPAGE const * FX_LPCCODEPAGE;\r
199 typedef struct _FX_STR2CPHASH {\r
200     FX_UINT32  uHash;\r
201     FX_UINT32  uCodePage;\r
202 } FX_STR2CPHASH;\r
203 typedef struct _FX_CHARSET_MAP {\r
204     FX_UINT16 charset;\r
205     FX_UINT16 codepage;\r
206 } FX_CHARSET_MAP;\r
207 typedef struct _FX_LANG2CPMAP {\r
208     FX_WORD     wLanguage;\r
209     FX_WORD     wCodepage;\r
210 } FX_LANG2CPMAP;\r
211 class IFX_CodePage\r
212 {\r
213 public:\r
214     static IFX_CodePage*        Create(FX_WORD wCodePage);\r
215     virtual void                        Release() = 0;\r
216     virtual FX_WORD                     GetCodePageNumber() const = 0;\r
217     virtual FX_CODESYSTEM       GetCodeSystemType() const = 0;\r
218     virtual FX_BOOL                     HasLeadByte() const = 0;\r
219     virtual FX_BOOL                     IsLeadByte(FX_BYTE byte) const = 0;\r
220     virtual FX_INT32            GetMinBytesPerChar() const = 0;\r
221     virtual FX_INT32            GetMaxBytesPerChar() const = 0;\r
222     virtual FX_WCHAR            GetMinCharcode() const = 0;\r
223     virtual FX_WCHAR            GetMaxCharcode() const = 0;\r
224     virtual FX_WCHAR            GetDefCharcode() const = 0;\r
225     virtual FX_WCHAR            GetMinUnicode() const = 0;\r
226     virtual FX_WCHAR            GetMaxUnicode() const = 0;\r
227     virtual FX_WCHAR            GetDefUnicode() const = 0;\r
228     virtual FX_BOOL                     IsValidCharcode(FX_WORD wCharcode) const = 0;\r
229     virtual FX_WCHAR            GetUnicode(FX_WORD wCharcode) const = 0;\r
230     virtual FX_BOOL                     IsValidUnicode(FX_WCHAR wUnicode) const = 0;\r
231     virtual FX_WORD                     GetCharcode(FX_WCHAR wUnicode) const = 0;\r
232 };\r
233 #endif\r