1 // Copyright 2014 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
7 #include "../../../include/fpdfapi/fpdf_parser.h"
8 extern const FX_LPCSTR _PDF_CharType =
9 "WRRRRRRRRWWRWWRRRRRRRRRRRRRRRRRR"
10 "WRRRRDRRDDRNRNNDNNNNNNNNNNRRDRDR"
11 "RRRRRRRRRRRRRRRRRRRRRRRRRRRDRDRR"
12 "RRRRRRRRRRRRRRRRRRRRRRRRRRRDRDRR"
13 "WRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRR"
14 "RRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRR"
15 "RRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRR"
16 "RRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRW";
20 CPDF_SimpleParser::CPDF_SimpleParser(FX_LPCBYTE pData, FX_DWORD dwSize)
26 CPDF_SimpleParser::CPDF_SimpleParser(FX_BSTR str)
29 m_dwSize = str.GetLength();
32 void CPDF_SimpleParser::ParseWord(FX_LPCBYTE& pStart, FX_DWORD& dwSize, int& type)
40 if (m_dwSize <= m_dwCurPos) {
43 ch = m_pData[m_dwCurPos++];
44 chartype = _PDF_CharType[ch];
45 while (chartype == 'W') {
46 if (m_dwSize <= m_dwCurPos) {
49 ch = m_pData[m_dwCurPos++];
50 chartype = _PDF_CharType[ch];
56 if (m_dwSize <= m_dwCurPos) {
59 ch = m_pData[m_dwCurPos++];
60 if (ch == '\r' || ch == '\n') {
64 chartype = _PDF_CharType[ch];
66 FX_DWORD start_pos = m_dwCurPos - 1;
67 pStart = m_pData + start_pos;
68 if (chartype == 'D') {
71 if (m_dwSize <= m_dwCurPos) {
74 ch = m_pData[m_dwCurPos++];
75 chartype = _PDF_CharType[ch];
76 if (chartype != 'R' && chartype != 'N') {
78 dwSize = m_dwCurPos - start_pos;
84 type = PDFWORD_DELIMITER;
87 if (m_dwSize <= m_dwCurPos) {
90 ch = m_pData[m_dwCurPos++];
96 } else if (ch == '>') {
97 if (m_dwSize <= m_dwCurPos) {
100 ch = m_pData[m_dwCurPos++];
110 type = PDFWORD_NUMBER;
113 if (chartype != 'N') {
116 if (m_dwSize <= m_dwCurPos) {
119 ch = m_pData[m_dwCurPos++];
120 chartype = _PDF_CharType[ch];
121 if (chartype == 'D' || chartype == 'W') {
128 CFX_ByteStringC CPDF_SimpleParser::GetWord()
133 ParseWord(pStart, dwSize, type);
134 if (dwSize == 1 && pStart[0] == '<') {
135 while (m_dwCurPos < m_dwSize && m_pData[m_dwCurPos] != '>') {
138 if (m_dwCurPos < m_dwSize) {
141 return CFX_ByteStringC(pStart, (FX_STRSIZE)(m_dwCurPos - (pStart - m_pData)));
142 } else if (dwSize == 1 && pStart[0] == '(') {
144 while (m_dwCurPos < m_dwSize) {
145 if (m_pData[m_dwCurPos] == ')') {
151 if (m_pData[m_dwCurPos] == '\\') {
152 if (m_dwSize <= m_dwCurPos) {
156 } else if (m_pData[m_dwCurPos] == '(') {
159 if (m_dwSize <= m_dwCurPos) {
164 if (m_dwCurPos < m_dwSize) {
167 return CFX_ByteStringC(pStart, (FX_STRSIZE)(m_dwCurPos - (pStart - m_pData)));
169 return CFX_ByteStringC(pStart, dwSize);
171 FX_BOOL CPDF_SimpleParser::SearchToken(FX_BSTR token)
173 int token_len = token.GetLength();
174 while (m_dwCurPos < m_dwSize - token_len) {
175 if (FXSYS_memcmp32(m_pData + m_dwCurPos, token, token_len) == 0) {
180 if (m_dwCurPos == m_dwSize - token_len) {
183 m_dwCurPos += token_len;
186 FX_BOOL CPDF_SimpleParser::SkipWord(FX_BSTR token)
189 CFX_ByteStringC word = GetWord();
190 if (word.IsEmpty()) {
199 FX_BOOL CPDF_SimpleParser::FindTagPair(FX_BSTR start_token, FX_BSTR end_token,
200 FX_DWORD& start_pos, FX_DWORD& end_pos)
202 if (!start_token.IsEmpty()) {
203 if (!SkipWord(start_token)) {
206 start_pos = m_dwCurPos;
209 end_pos = m_dwCurPos;
210 CFX_ByteStringC word = GetWord();
211 if (word.IsEmpty()) {
214 if (word == end_token) {
220 FX_BOOL CPDF_SimpleParser::FindTagParam(FX_BSTR token, int nParams)
223 FX_DWORD* pBuf = FX_Alloc(FX_DWORD, nParams);
227 pBuf[buf_index++] = m_dwCurPos;
228 if (buf_index == nParams) {
232 if (buf_count > nParams) {
235 CFX_ByteStringC word = GetWord();
236 if (word.IsEmpty()) {
241 if (buf_count < nParams) {
244 m_dwCurPos = pBuf[buf_index];
251 static int _hex2dec(char ch)
253 if (ch >= '0' && ch <= '9') {
256 if (ch >= 'a' && ch <= 'f') {
257 return ch - 'a' + 10;
259 if (ch >= 'A' && ch <= 'F') {
260 return ch - 'A' + 10;
264 CFX_ByteString PDF_NameDecode(FX_BSTR bstr)
266 int size = bstr.GetLength();
267 FX_LPCSTR pSrc = bstr.GetCStr();
268 if (FXSYS_memchr(pSrc, '#', size) == NULL) {
271 CFX_ByteString result;
272 FX_LPSTR pDestStart = result.GetBuffer(size);
273 FX_LPSTR pDest = pDestStart;
274 for (int i = 0; i < size; i ++) {
275 if (pSrc[i] == '#' && i < size - 2) {
276 *pDest ++ = _hex2dec(pSrc[i + 1]) * 16 + _hex2dec(pSrc[i + 2]);
282 result.ReleaseBuffer((FX_STRSIZE)(pDest - pDestStart));
285 CFX_ByteString PDF_NameDecode(const CFX_ByteString& orig)
287 if (FXSYS_memchr(orig.c_str(), '#', orig.GetLength()) == NULL) {
290 return PDF_NameDecode(CFX_ByteStringC(orig));
292 CFX_ByteString PDF_NameEncode(const CFX_ByteString& orig)
294 FX_LPBYTE src_buf = (FX_LPBYTE)orig.c_str();
295 int src_len = orig.GetLength();
298 for (i = 0; i < src_len; i ++) {
299 FX_BYTE ch = src_buf[i];
300 if (ch >= 0x80 || _PDF_CharType[ch] == 'W' || ch == '#' ||
301 _PDF_CharType[ch] == 'D') {
307 if (dest_len == src_len) {
311 FX_LPSTR dest_buf = res.GetBuffer(dest_len);
313 for (i = 0; i < src_len; i ++) {
314 FX_BYTE ch = src_buf[i];
315 if (ch >= 0x80 || _PDF_CharType[ch] == 'W' || ch == '#' ||
316 _PDF_CharType[ch] == 'D') {
317 dest_buf[dest_len++] = '#';
318 dest_buf[dest_len++] = "0123456789ABCDEF"[ch / 16];
319 dest_buf[dest_len++] = "0123456789ABCDEF"[ch % 16];
321 dest_buf[dest_len++] = ch;
324 dest_buf[dest_len] = 0;
328 CFX_ByteTextBuf& operator << (CFX_ByteTextBuf& buf, const CPDF_Object* pObj)
331 buf << FX_BSTRC(" null");
334 switch (pObj->GetType()) {
336 buf << FX_BSTRC(" null");
340 buf << " " << pObj->GetString();
342 case PDFOBJ_STRING: {
343 CFX_ByteString str = pObj->GetString();
344 FX_BOOL bHex = ((CPDF_String*)pObj)->IsHex();
345 buf << PDF_EncodeString(str, bHex);
349 CFX_ByteString str = pObj->GetString();
350 buf << FX_BSTRC("/") << PDF_NameEncode(str);
353 case PDFOBJ_REFERENCE: {
354 CPDF_Reference* p = (CPDF_Reference*)pObj;
355 buf << " " << p->GetRefObjNum() << FX_BSTRC(" 0 R ");
359 CPDF_Array* p = (CPDF_Array*)pObj;
360 buf << FX_BSTRC("[");
361 for (FX_DWORD i = 0; i < p->GetCount(); i ++) {
362 CPDF_Object* pElement = p->GetElement(i);
363 if (pElement->GetObjNum()) {
364 buf << " " << pElement->GetObjNum() << FX_BSTRC(" 0 R");
369 buf << FX_BSTRC("]");
372 case PDFOBJ_DICTIONARY: {
373 CPDF_Dictionary* p = (CPDF_Dictionary*)pObj;
374 buf << FX_BSTRC("<<");
375 FX_POSITION pos = p->GetStartPos();
378 CPDF_Object* pValue = p->GetNextElement(pos, key);
379 buf << FX_BSTRC("/") << PDF_NameEncode(key);
380 if (pValue->GetObjNum()) {
381 buf << " " << pValue->GetObjNum() << FX_BSTRC(" 0 R ");
386 buf << FX_BSTRC(">>");
389 case PDFOBJ_STREAM: {
390 CPDF_Stream* p = (CPDF_Stream*)pObj;
391 buf << p->GetDict() << FX_BSTRC("stream\r\n");
393 acc.LoadAllData(p, TRUE);
394 buf.AppendBlock(acc.GetData(), acc.GetSize());
395 buf << FX_BSTRC("\r\nendstream");
404 FX_FLOAT PDF_ClipFloat(FX_FLOAT f)
414 static CPDF_Object* SearchNumberNode(CPDF_Dictionary* pNode, int num)
416 CPDF_Array* pLimits = pNode->GetArray("Limits");
417 if (pLimits && (num < pLimits->GetInteger(0) || num > pLimits->GetInteger(1))) {
420 CPDF_Array* pNumbers = pNode->GetArray("Nums");
422 FX_DWORD dwCount = pNumbers->GetCount() / 2;
423 for (FX_DWORD i = 0; i < dwCount; i ++) {
424 int index = pNumbers->GetInteger(i * 2);
426 return pNumbers->GetElementValue(i * 2 + 1);
434 CPDF_Array* pKids = pNode->GetArray("Kids");
438 for (FX_DWORD i = 0; i < pKids->GetCount(); i ++) {
439 CPDF_Dictionary* pKid = pKids->GetDict(i);
443 CPDF_Object* pFound = SearchNumberNode(pKid, num);
450 CPDF_Object* CPDF_NumberTree::LookupValue(int num)
452 return SearchNumberNode(m_pRoot, num);