Merge to Master: Cover fpdftext.h API with embeddertests.
authorTom Sepez <tsepez@chromium.org>
Wed, 28 Jan 2015 23:55:04 +0000 (15:55 -0800)
committerTom Sepez <tsepez@chromium.org>
Wed, 28 Jan 2015 23:55:04 +0000 (15:55 -0800)
Original Review URL: https://codereview.chromium.org/878333003

TBR=thestig@chromium.org

Review URL: https://codereview.chromium.org/884873002

fpdfsdk/include/fpdftext.h
fpdfsdk/src/fpdftext.cpp
fpdfsdk/src/fpdftext_embeddertest.cpp
testing/resources/hello_world.in [new file with mode: 0644]
testing/resources/hello_world.pdf [new file with mode: 0644]
testing/resources/weblinks.in [new file with mode: 0644]
testing/resources/weblinks.pdf [new file with mode: 0644]

index 8b89779..d37715f 100644 (file)
@@ -16,32 +16,31 @@ extern "C" {
 
 // Function: FPDFText_LoadPage
 //                     Prepare information about all characters in a page.
-// Parameters: 
-//                     page    -       Handle to the page. Returned by FPDF_LoadPage function (in FPDFVIEW module).    
+// Parameters:
+//                     page    -       Handle to the page. Returned by FPDF_LoadPage function (in FPDFVIEW module).
 // Return value:
 //                     A handle to the text page information structure.
 //                     NULL if something goes wrong.
 // Comments:
 //                     Application must call FPDFText_ClosePage to release the text page information.
-//                     If you don't purchase Text Module , this function will return NULL.
-//     
+//
 DLLEXPORT FPDF_TEXTPAGE        STDCALL FPDFText_LoadPage(FPDF_PAGE page);
 
 // Function: FPDFText_ClosePage
 //                     Release all resources allocated for a text page information structure.
-// Parameters: 
+// Parameters:
 //                     text_page       -       Handle to a text page information structure. Returned by FPDFText_LoadPage function.
 // Return Value:
 //                     None.
 //
 DLLEXPORT void STDCALL FPDFText_ClosePage(FPDF_TEXTPAGE text_page);
-       
+
 // Function: FPDFText_CountChars
 //                     Get number of characters in a page.
-// Parameters: 
+// Parameters:
 //                     text_page       -       Handle to a text page information structure. Returned by FPDFText_LoadPage function.
 // Return value:
-//                     Number of characters in the page. Return -1 for error. 
+//                     Number of characters in the page. Return -1 for error.
 //                     Generated characters, like additional space characters, new line characters, are also counted.
 // Comments:
 //                     Characters in a page form a "stream", inside the stream, each character has an index.
@@ -52,7 +51,7 @@ DLLEXPORT int STDCALL FPDFText_CountChars(FPDF_TEXTPAGE text_page);
 
 // Function: FPDFText_GetUnicode
 //                     Get Unicode of a character in a page.
-// Parameters: 
+// Parameters:
 //                     text_page       -       Handle to a text page information structure. Returned by FPDFText_LoadPage function.
 //                     index           -       Zero-based index of the character.
 // Return value:
@@ -64,7 +63,7 @@ DLLEXPORT unsigned int STDCALL FPDFText_GetUnicode(FPDF_TEXTPAGE text_page, int
 
 // Function: FPDFText_GetFontSize
 //                     Get the font size of a particular character.
-// Parameters: 
+// Parameters:
 //                     text_page       -       Handle to a text page information structure. Returned by FPDFText_LoadPage function.
 //                     index           -       Zero-based index of the character.
 // Return value:
@@ -75,7 +74,7 @@ DLLEXPORT double STDCALL FPDFText_GetFontSize(FPDF_TEXTPAGE text_page, int index
 
 // Function: FPDFText_GetCharBox
 //                     Get bounding box of a particular character.
-// Parameters: 
+// Parameters:
 //                     text_page       -       Handle to a text page information structure. Returned by FPDFText_LoadPage function.
 //                     index           -       Zero-based index of the character.
 //                     left            -       Pointer to a double number receiving left position of the character box.
@@ -118,7 +117,7 @@ DLLEXPORT int STDCALL FPDFText_GetCharIndexAtPos(FPDF_TEXTPAGE text_page,
 //                     Number of characters written into the result buffer, including the trailing terminator.
 // Comments:
 //                     This function ignores characters without unicode information.
-//                     
+//
 DLLEXPORT int STDCALL FPDFText_GetText(FPDF_TEXTPAGE text_page, int start_index, int count, unsigned short* result);
 
 // Function: FPDFText_CountRects
@@ -163,10 +162,15 @@ DLLEXPORT void STDCALL FPDFText_GetRect(FPDF_TEXTPAGE text_page, int rect_index,
 //                     buffer          -       A unicode buffer.
 //                     buflen          -       Number of characters (not bytes) for the buffer, excluding an additional terminator.
 // Return Value:
-//                     If buffer is NULL or buflen is zero, return number of characters (not bytes) needed,
-//                     otherwise, return number of characters copied into the buffer.
+//                     If buffer is NULL or buflen is zero, return number of characters (not bytes) of text present within
+//                     the rectangle, excluding a terminating NUL.  Generally you should pass a buffer at least one larger
+//                     than this if you want a terminating NUL, which will be provided if space is available.
+//                     Otherwise, return number of characters copied into the buffer, including the terminating NUL
+//                     when space for it is available.
+// Comment:
+//                     If the buffer is too small, as much text as will fit is copied into it.
 //
-DLLEXPORT int STDCALL FPDFText_GetBoundedText(FPDF_TEXTPAGE text_page,double left, double top, 
+DLLEXPORT int STDCALL FPDFText_GetBoundedText(FPDF_TEXTPAGE text_page,double left, double top,
                                                                                          double right, double bottom,unsigned short* buffer,int buflen);
 
 
@@ -236,7 +240,7 @@ DLLEXPORT void STDCALL FPDFText_FindClose(FPDF_SCHHANDLE handle);
 //                     Prepare information about weblinks in a page.
 // Parameters:
 //                     text_page       -       Handle to a text page information structure. Returned by FPDFText_LoadPage function.
-// Return Value:       
+// Return Value:
 //                     A handle to the page's links information structure.
 //                     NULL if something goes wrong.
 // Comments:
@@ -296,7 +300,7 @@ DLLEXPORT int STDCALL FPDFLink_CountRects(FPDF_PAGELINK link_page, int link_inde
 // Return Value:
 //                     None.
 //
-DLLEXPORT void STDCALL FPDFLink_GetRect(FPDF_PAGELINK link_page, int link_index, int rect_index, 
+DLLEXPORT void STDCALL FPDFLink_GetRect(FPDF_PAGELINK link_page, int link_index, int rect_index,
                                                                                double* left, double* top,double* right, double* bottom);
 
 // Function: FPDFLink_CloseWebLinks
index 264631b..63de443 100644 (file)
@@ -235,12 +235,13 @@ DLLEXPORT void STDCALL FPDFLink_GetRect(FPDF_PAGELINK link_page,int link_index,
        IPDF_LinkExtract* pageLink=(IPDF_LinkExtract*)link_page;
        CFX_RectArray rectArray;
        pageLink->GetRects(link_index,rectArray);
-       CFX_FloatRect rect;
-       rect=rectArray.GetAt(rect_index);
-       *left=rect.left;
-       *right=rect.right;
-       *top=rect.top;
-       *bottom=rect.bottom;
+       if (rect_index >= 0 && rect_index < rectArray.GetSize()) {
+               CFX_FloatRect rect=rectArray.GetAt(rect_index);
+               *left=rect.left;
+               *right=rect.right;
+               *top=rect.top;
+               *bottom=rect.bottom;
+       }
 }
 DLLEXPORT void STDCALL FPDFLink_CloseWebLinks(FPDF_PAGELINK link_page)
 {
index 3198fe0..1457832 100644 (file)
 #include "../../fpdfsdk/include/fpdftext.h"
 #include "testing/gtest/include/gtest/gtest.h"
 
+namespace {
+
+static bool check_unsigned_shorts(const char* expected,
+                                  const unsigned short* actual,
+                                  size_t length) {
+  if (length > strlen(expected) + 1) {
+    return false;
+  }
+  for (size_t i = 0; i < length; ++i) {
+    if (actual[i] != static_cast<unsigned short>(expected[i])) {
+      return false;
+    }
+  }
+  return true;
+}
+
+}  // namespace
+
 class FPDFTextEmbeddertest : public EmbedderTest {
 };
 
+TEST_F(FPDFTextEmbeddertest, Text) {
+  EXPECT_TRUE(OpenDocument("testing/resources/hello_world.pdf"));
+  FPDF_FORMHANDLE form_handle = SetFormFillEnvironment();
+  FPDF_PAGE page = LoadPage(0, form_handle);
+  EXPECT_NE(nullptr, page);
+
+  FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page);
+  EXPECT_NE(nullptr, textpage);
+
+  const char expected[] = "Hello, world!\r\nGoodbye, world!";
+  unsigned short fixed_buffer[128];
+  memset(fixed_buffer, 0xbd, sizeof(fixed_buffer));
+
+  // Check includes the terminating NUL that is provided.
+  EXPECT_EQ(sizeof(expected), FPDFText_GetText(textpage, 0, 128, fixed_buffer));
+  EXPECT_TRUE(check_unsigned_shorts(expected, fixed_buffer, sizeof(expected)));
+
+  // Count does not include the terminating NUL in the string literal.
+  EXPECT_EQ(sizeof(expected) - 1, FPDFText_CountChars(textpage));
+  for (size_t i = 0; i < sizeof(expected) - 1; ++i) {
+    EXPECT_EQ(expected[i], FPDFText_GetUnicode(textpage, i)) << " at " << i;
+  }
+
+  EXPECT_EQ(12.0, FPDFText_GetFontSize(textpage, 0));
+  EXPECT_EQ(16.0, FPDFText_GetFontSize(textpage, 15));
+
+  double left = 0.0;
+  double right = 0.0;
+  double bottom = 0.0;
+  double top = 0.0;
+  FPDFText_GetCharBox(textpage, 4, &left, &right, &bottom, &top);
+  EXPECT_NEAR(41.071, left, 0.001);
+  EXPECT_NEAR(46.243, right, 0.001);
+  EXPECT_NEAR(49.844, bottom, 0.001);
+  EXPECT_NEAR(55.520, top, 0.001);
+
+  EXPECT_EQ(4, FPDFText_GetCharIndexAtPos(
+      textpage, 42.0, 50.0, 1.0, 1.0));
+  EXPECT_EQ(-1, FPDFText_GetCharIndexAtPos(
+      textpage, 0.0, 0.0, 1.0, 1.0));
+  EXPECT_EQ(-1, FPDFText_GetCharIndexAtPos(
+      textpage, 199.0, 199.0, 1.0, 1.0));
+
+  // Test out of range indicies.
+  EXPECT_EQ(-1, FPDFText_GetCharIndexAtPos(
+      textpage, 42.0, 10000000.0, 1.0, 1.0));
+  EXPECT_EQ(-1, FPDFText_GetCharIndexAtPos(
+      textpage, -1.0, 50.0, 1.0, 1.0));
+
+  // Count does not include the terminating NUL in the string literal.
+  EXPECT_EQ(2, FPDFText_CountRects(textpage, 0, sizeof(expected) - 1));
+
+  left = 0.0;
+  right = 0.0;
+  bottom = 0.0;
+  top = 0.0;
+  FPDFText_GetRect(textpage, 1, &left, &top, &right, &bottom);
+  EXPECT_NEAR(20.847, left, 0.001);
+  EXPECT_NEAR(135.167, right, 0.001);
+  EXPECT_NEAR(96.655, bottom, 0.001);
+  EXPECT_NEAR(116.000, top, 0.001);
+
+  // Test out of range indicies set outputs to (0.0, 0.0, 0.0, 0.0).
+  left = -1.0;
+  right = -1.0;
+  bottom = -1.0;
+  top = -1.0;
+  FPDFText_GetRect(textpage, -1, &left, &top, &right, &bottom);
+  EXPECT_EQ(0.0, left);
+  EXPECT_EQ(0.0, right);
+  EXPECT_EQ(0.0, bottom);
+  EXPECT_EQ(0.0, top);
+
+  left = -2.0;
+  right = -2.0;
+  bottom = -2.0;
+  top = -2.0;
+  FPDFText_GetRect(textpage, 2, &left, &top, &right, &bottom);
+  EXPECT_EQ(0.0, left);
+  EXPECT_EQ(0.0, right);
+  EXPECT_EQ(0.0, bottom);
+  EXPECT_EQ(0.0, top);
+
+  EXPECT_EQ(9, FPDFText_GetBoundedText(
+      textpage, 41.0, 56.0, 82.0, 48.0, 0, 0));
+
+  // Extract starting at character 4 as above.
+  memset(fixed_buffer, 0xbd, sizeof(fixed_buffer));
+  EXPECT_EQ(1, FPDFText_GetBoundedText(
+      textpage, 41.0, 56.0, 82.0, 48.0, fixed_buffer, 1));
+  EXPECT_TRUE(check_unsigned_shorts(expected + 4, fixed_buffer, 1));
+  EXPECT_EQ(0xbdbd, fixed_buffer[1]);
+
+  memset(fixed_buffer, 0xbd, sizeof(fixed_buffer));
+  EXPECT_EQ(9, FPDFText_GetBoundedText(
+      textpage, 41.0, 56.0, 82.0, 48.0, fixed_buffer, 9));
+  EXPECT_TRUE(check_unsigned_shorts(expected + 4, fixed_buffer, 9));
+  EXPECT_EQ(0xbdbd, fixed_buffer[9]);
+
+  memset(fixed_buffer, 0xbd, sizeof(fixed_buffer));
+  EXPECT_EQ(10, FPDFText_GetBoundedText(
+      textpage, 41.0, 56.0, 82.0, 48.0, fixed_buffer, 128));
+  EXPECT_TRUE(check_unsigned_shorts(expected + 4, fixed_buffer, 9));
+  EXPECT_EQ(0u, fixed_buffer[9]);
+  EXPECT_EQ(0xbdbd, fixed_buffer[10]);
+
+  FPDFText_ClosePage(textpage);
+  ClearFormFillEnvironment(form_handle);
+}
+
+TEST_F(FPDFTextEmbeddertest, TextSearch) {
+  EXPECT_TRUE(OpenDocument("testing/resources/hello_world.pdf"));
+  FPDF_FORMHANDLE form_handle = SetFormFillEnvironment();
+  FPDF_PAGE page = LoadPage(0, form_handle);
+  EXPECT_NE(nullptr, page);
+
+  FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page);
+  EXPECT_NE(nullptr, textpage);
+
+  // Avoid issues with system wchar_t width vs. FPDF_WideString.
+  const unsigned short nope[] = { 'n', 'o', 'p', 'e', '\0' };
+  const unsigned short world[] = { 'w', 'o', 'r', 'l', 'd', '\0' };
+  const unsigned short world_caps[] = { 'W', 'O', 'R', 'L', 'D', '\0' };
+  const unsigned short world_substr[] = { 'o', 'r', 'l', 'd', '\0' };
+
+  // No occurences of "nope" in test page.
+  FPDF_SCHHANDLE search = FPDFText_FindStart(textpage, nope, 0, 0);
+  EXPECT_NE(nullptr, search);
+  EXPECT_EQ(0, FPDFText_GetSchResultIndex(search));
+  EXPECT_EQ(0, FPDFText_GetSchCount(search));
+
+  // Advancing finds nothing.
+  EXPECT_FALSE(FPDFText_FindNext(search));
+  EXPECT_EQ(0, FPDFText_GetSchResultIndex(search));
+  EXPECT_EQ(0, FPDFText_GetSchCount(search));
+
+  // Retreating finds nothing.
+  EXPECT_FALSE(FPDFText_FindPrev(search));
+  EXPECT_EQ(0, FPDFText_GetSchResultIndex(search));
+  EXPECT_EQ(0, FPDFText_GetSchCount(search));
+  FPDFText_FindClose(search);
+
+  // Two occurences of "world" in test page.
+  search = FPDFText_FindStart(textpage, world, 0, 2);
+  EXPECT_NE(nullptr, search);
+
+  // Remains not found until advanced.
+  EXPECT_EQ(0, FPDFText_GetSchResultIndex(search));
+  EXPECT_EQ(0, FPDFText_GetSchCount(search));
+
+  // First occurence of "world" in this test page.
+  EXPECT_TRUE(FPDFText_FindNext(search));
+  EXPECT_EQ(7, FPDFText_GetSchResultIndex(search));
+  EXPECT_EQ(5, FPDFText_GetSchCount(search));
+
+  // Last occurence of "world" in this test page.
+  EXPECT_TRUE(FPDFText_FindNext(search));
+  EXPECT_EQ(24, FPDFText_GetSchResultIndex(search));
+  EXPECT_EQ(5, FPDFText_GetSchCount(search));
+
+  // Found position unchanged when fails to advance.
+  EXPECT_FALSE(FPDFText_FindNext(search));
+  EXPECT_EQ(24, FPDFText_GetSchResultIndex(search));
+  EXPECT_EQ(5, FPDFText_GetSchCount(search));
+
+  // Back to first occurence.
+  EXPECT_TRUE(FPDFText_FindPrev(search));
+  EXPECT_EQ(7, FPDFText_GetSchResultIndex(search));
+  EXPECT_EQ(5, FPDFText_GetSchCount(search));
+
+  // Found position unchanged when fails to retreat.
+  EXPECT_FALSE(FPDFText_FindPrev(search));
+  EXPECT_EQ(7, FPDFText_GetSchResultIndex(search));
+  EXPECT_EQ(5, FPDFText_GetSchCount(search));
+  FPDFText_FindClose(search);
+
+  // Exact search unaffected by case sensitiity and whole word flags.
+  search = FPDFText_FindStart(
+      textpage, world, FPDF_MATCHCASE | FPDF_MATCHWHOLEWORD, 0);
+  EXPECT_NE(nullptr, search);
+  EXPECT_TRUE(FPDFText_FindNext(search));
+  EXPECT_EQ(7, FPDFText_GetSchResultIndex(search));
+  EXPECT_EQ(5, FPDFText_GetSchCount(search));
+  FPDFText_FindClose(search);
+
+  // Default is case-insensitive, so matching agaist caps works.
+  search = FPDFText_FindStart(textpage, world_caps, 0, 0);
+  EXPECT_NE(nullptr, search);
+  EXPECT_TRUE(FPDFText_FindNext(search));
+  EXPECT_EQ(7, FPDFText_GetSchResultIndex(search));
+  EXPECT_EQ(5, FPDFText_GetSchCount(search));
+  FPDFText_FindClose(search);
+
+  // But can be made case sensitive, in which case this fails.
+  search = FPDFText_FindStart(textpage, world_caps, FPDF_MATCHCASE, 0);
+  EXPECT_FALSE(FPDFText_FindNext(search));
+  EXPECT_EQ(0, FPDFText_GetSchResultIndex(search));
+  EXPECT_EQ(0, FPDFText_GetSchCount(search));
+  FPDFText_FindClose(search);
+
+  // Default is match anywhere within word, so matching substirng works.
+  search = FPDFText_FindStart(textpage, world_substr, 0, 0);
+  EXPECT_TRUE(FPDFText_FindNext(search));
+  EXPECT_EQ(8, FPDFText_GetSchResultIndex(search));
+  EXPECT_EQ(4, FPDFText_GetSchCount(search));
+  FPDFText_FindClose(search);
+
+  // But can be made to mach word boundaries, in which case this fails.
+  search = FPDFText_FindStart(textpage, world_substr, FPDF_MATCHWHOLEWORD, 0);
+  EXPECT_FALSE(FPDFText_FindNext(search));
+  // TODO(tsepez): investigate strange index/count values in this state.
+  FPDFText_FindClose(search);
+
+  FPDFText_ClosePage(textpage);
+  ClearFormFillEnvironment(form_handle);
+}
+
 // Test that the page has characters despite a bad stream length.
 TEST_F(FPDFTextEmbeddertest, StreamLengthPastEndOfFile) {
   EXPECT_TRUE(OpenDocument("testing/resources/bug_57.pdf"));
   FPDF_FORMHANDLE form_handle = SetFormFillEnvironment();
   FPDF_PAGE page = LoadPage(0, form_handle);
   EXPECT_NE(nullptr, page);
+
   FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page);
   EXPECT_NE(nullptr, textpage);
   EXPECT_EQ(13, FPDFText_CountChars(textpage));
+
+  FPDFText_ClosePage(textpage);
+  ClearFormFillEnvironment(form_handle);
+}
+
+TEST_F(FPDFTextEmbeddertest, WebLinks) {
+  EXPECT_TRUE(OpenDocument("testing/resources/weblinks.pdf"));
+  FPDF_FORMHANDLE form_handle = SetFormFillEnvironment();
+  FPDF_PAGE page = LoadPage(0, form_handle);
+  EXPECT_NE(nullptr, page);
+
+  FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page);
+  EXPECT_NE(nullptr, textpage);
+
+  FPDF_PAGELINK pagelink = FPDFLink_LoadWebLinks(textpage);
+  EXPECT_NE(nullptr, pagelink);
+
+  // Page contains two HTTP-style URLs.
+  EXPECT_EQ(2, FPDFLink_CountWebLinks(pagelink));
+
+  // Only a terminating NUL required for bogus links.
+  EXPECT_EQ(1, FPDFLink_GetURL(pagelink, 2, nullptr, 0));
+  EXPECT_EQ(1, FPDFLink_GetURL(pagelink, 1400, nullptr, 0));
+  EXPECT_EQ(1, FPDFLink_GetURL(pagelink, -1, nullptr, 0));
+
+  // Query the number of characters required for each link (incl NUL).
+  EXPECT_EQ(25, FPDFLink_GetURL(pagelink, 0, nullptr, 0));
+  EXPECT_EQ(26, FPDFLink_GetURL(pagelink, 1, nullptr, 0));
+
+  const char expected_url[] = "http://example.com?q=foo";
+  unsigned short fixed_buffer[128];
+
+  // Retrieve a link with too small a buffer.  Buffer will not be
+  // NUL-terminated, but must not be modified past indicated length,
+  // so pre-fill with a pattern to check write bounds.
+  memset(fixed_buffer, 0xbd, sizeof(fixed_buffer));
+  EXPECT_EQ(1, FPDFLink_GetURL(pagelink, 0, fixed_buffer, 1));
+  EXPECT_TRUE(check_unsigned_shorts(expected_url, fixed_buffer, 1));
+  EXPECT_EQ(0xbdbd, fixed_buffer[1]);
+
+  // Check buffer that doesn't have space for a terminating NUL.
+  memset(fixed_buffer, 0xbd, sizeof(fixed_buffer));
+  EXPECT_EQ(sizeof(expected_url) - 1, FPDFLink_GetURL(
+      pagelink, 0, fixed_buffer, sizeof(expected_url) - 1));
+  EXPECT_TRUE(check_unsigned_shorts(
+      expected_url, fixed_buffer, sizeof(expected_url) - 1));
+  EXPECT_EQ(0xbdbd, fixed_buffer[sizeof(expected_url) - 1]);
+
+  // Retreive link with exactly-sized buffer.
+  memset(fixed_buffer, 0xbd, sizeof(fixed_buffer));
+  EXPECT_EQ(sizeof(expected_url), FPDFLink_GetURL(
+      pagelink, 0, fixed_buffer, sizeof(expected_url)));
+  EXPECT_TRUE(check_unsigned_shorts(
+      expected_url, fixed_buffer, sizeof(expected_url)));
+  EXPECT_EQ(0u, fixed_buffer[sizeof(expected_url) - 1]);
+  EXPECT_EQ(0xbdbd, fixed_buffer[sizeof(expected_url)]);
+
+  // Retreive link with ample-sized-buffer.
+  memset(fixed_buffer, 0xbd, sizeof(fixed_buffer));
+  EXPECT_EQ(sizeof(expected_url), FPDFLink_GetURL(
+      pagelink, 0, fixed_buffer, 128));
+  EXPECT_TRUE(check_unsigned_shorts(
+      expected_url, fixed_buffer, sizeof(expected_url)));
+  EXPECT_EQ(0u, fixed_buffer[sizeof(expected_url) - 1]);
+  EXPECT_EQ(0xbdbd, fixed_buffer[sizeof(expected_url)]);
+
+  // Each link rendered in a single rect in this test page.
+  EXPECT_EQ(1, FPDFLink_CountRects(pagelink, 0));
+  EXPECT_EQ(1, FPDFLink_CountRects(pagelink, 1));
+
+  // Each link rendered in a single rect in this test page.
+  EXPECT_EQ(0, FPDFLink_CountRects(pagelink, -1));
+  EXPECT_EQ(0, FPDFLink_CountRects(pagelink, 2));
+  EXPECT_EQ(0, FPDFLink_CountRects(pagelink, 10000));
+
+  // Check boundary of valid link index with valid rect index.
+  double left = 0.0;
+  double right = 0.0;
+  double top = 0.0;
+  double bottom = 0.0;
+  FPDFLink_GetRect(pagelink, 0, 0, &left, &top, &right, &bottom);
+  EXPECT_NEAR(50.791, left, 0.001);
+  EXPECT_NEAR(187.963, right, 0.001);
+  EXPECT_NEAR(97.624, bottom, 0.001);
+  EXPECT_NEAR(108.736, top, 0.001);
+
+  // Check that valid link with invalid rect index leaves parameters unchanged.
+  left = -1.0;
+  right = -1.0;
+  top = -1.0;
+  bottom = -1.0;
+  FPDFLink_GetRect(pagelink, 0, 1, &left, &top, &right, &bottom);
+  EXPECT_EQ(-1.0, left);
+  EXPECT_EQ(-1.0, right);
+  EXPECT_EQ(-1.0, bottom);
+  EXPECT_EQ(-1.0, top);
+
+  // Check that invalid link index leaves parameters unchanged.
+  left = -2.0;
+  right = -2.0;
+  top = -2.0;
+  bottom = -2.0;
+  FPDFLink_GetRect(pagelink, -1, 0, &left, &top, &right, &bottom);
+  EXPECT_EQ(-2.0, left);
+  EXPECT_EQ(-2.0, right);
+  EXPECT_EQ(-2.0, bottom);
+  EXPECT_EQ(-2.0, top);
+
+  FPDFLink_CloseWebLinks(pagelink);
+  FPDFText_ClosePage(textpage);
   ClearFormFillEnvironment(form_handle);
 }
diff --git a/testing/resources/hello_world.in b/testing/resources/hello_world.in
new file mode 100644 (file)
index 0000000..19fce0c
--- /dev/null
@@ -0,0 +1,56 @@
+{{header}}
+{{object 1 0}} <<
+  /Type /Catalog
+  /Pages 2 0 R
+>>
+{{object 2 0}} <<
+  /Type /Pages
+  /MediaBox [ 0 0 200 200 ]
+  /Count 1
+  /Kids [ 3 0 R ]
+>>
+endobj
+{{object 3 0}} <<
+  /Type /Page
+  /Parent 2 0 R
+  /Resources <<
+    /Font <<
+      /F1 4 0 R
+      /F2 5 0 R
+    >>
+  >>
+  /Contents 6 0 R
+>>
+endobj
+{{object 4 0}} <<
+  /Type /Font
+  /Subtype /Type1
+  /BaseFont /Times-Roman
+>>
+endobj
+{{object 5 0}} <<
+  /Type /Font
+  /Subtype /Type1
+  /BaseFont /Helvetica
+>>
+endobj
+{{object 6 0}} <<
+>>
+stream
+BT
+20 50 Td
+/F1 12 Tf
+(Hello, world!) Tj
+0 50 Td
+/F2 16 Tf
+(Goodbye, world!) Tj
+ET
+endstream
+endobj
+{{xref}}
+trailer <<
+  /Size 6
+  /Root 1 0 R
+>>
+{{startxref}}
+%%EOF
diff --git a/testing/resources/hello_world.pdf b/testing/resources/hello_world.pdf
new file mode 100644 (file)
index 0000000..84e7705
--- /dev/null
@@ -0,0 +1,66 @@
+%PDF-1.7
+% ò¤ô
+1 0 obj <<
+  /Type /Catalog
+  /Pages 2 0 R
+>>
+2 0 obj <<
+  /Type /Pages
+  /MediaBox [ 0 0 200 200 ]
+  /Count 1
+  /Kids [ 3 0 R ]
+>>
+endobj
+3 0 obj <<
+  /Type /Page
+  /Parent 2 0 R
+  /Resources <<
+    /Font <<
+      /F1 4 0 R
+      /F2 5 0 R
+    >>
+  >>
+  /Contents 6 0 R
+>>
+endobj
+4 0 obj <<
+  /Type /Font
+  /Subtype /Type1
+  /BaseFont /Times-Roman
+>>
+endobj
+5 0 obj <<
+  /Type /Font
+  /Subtype /Type1
+  /BaseFont /Helvetica
+>>
+endobj
+6 0 obj <<
+>>
+stream
+BT
+20 50 Td
+/F1 12 Tf
+(Hello, world!) Tj
+0 50 Td
+/F2 16 Tf
+(Goodbye, world!) Tj
+ET
+endstream
+endobj
+xref
+0 7
+0000000000 65536 f
+0000000015 00000 n
+0000000061 00000 n
+0000000154 00000 n
+0000000296 00000 n
+0000000374 00000 n
+0000000450 00000 n
+trailer <<
+  /Size 6
+  /Root 1 0 R
+>>
+startxref
+571
+%%EOF
diff --git a/testing/resources/weblinks.in b/testing/resources/weblinks.in
new file mode 100644 (file)
index 0000000..ed20b2e
--- /dev/null
@@ -0,0 +1,66 @@
+{{header}}
+{{object 1 0}} <<
+  /Type /Catalog
+  /Pages 2 0 R
+>>
+{{object 2 0}} <<
+  /Type /Pages
+  /MediaBox [ 0 0 600 600 ]
+  /Count 1
+  /Kids [ 3 0 R ]
+>>
+endobj
+{{object 3 0}} <<
+  /Type /Page
+  /Parent 2 0 R
+  /Resources <<
+    /Font <<
+      /F1 4 0 R
+      /F2 5 0 R
+    >>
+  >>
+  /Contents 6 0 R
+>>
+endobj
+{{object 4 0}} <<
+  /Type /Font
+  /Subtype /Type1
+  /BaseFont /Times-Roman
+>>
+endobj
+{{object 4 0}} <<
+  /Type /Font
+  /Subtype /Type1
+  /BaseFont /Helvetica
+>>
+endobj
+{{object 6 0}} <<
+>>
+stream
+BT
+/F1 12 Tf
+50 50 Td
+(Hello, world! This is not a link.) Tj
+0 50 Td
+(http://example.com?q=foo. This might be a link.) Tj
+/F2 16 Tf
+0 50 Td
+(https://example.com?q=foo. This might be a link in another font.) Tj
+0 50 Td
+(javascript:alert(0).  This might be a JS link.) Tj
+0 50 Td
+(ftp://example.org.  This might be a FTP link.) Tj
+0 50 Td
+(file:///home/foo/example.txt. This might be a file link.) Tj
+0 50 Td
+(This is a rather long and pointless piece of non-link text.) Tj
+ET
+endstream
+endobj
+{{xref}}
+trailer <<
+  /Size 6
+  /Root 1 0 R
+>>
+{{startxref}}
+%%EOF
diff --git a/testing/resources/weblinks.pdf b/testing/resources/weblinks.pdf
new file mode 100644 (file)
index 0000000..3921a37
--- /dev/null
@@ -0,0 +1,76 @@
+%PDF-1.7
+% ò¤ô
+1 0 obj <<
+  /Type /Catalog
+  /Pages 2 0 R
+>>
+2 0 obj <<
+  /Type /Pages
+  /MediaBox [ 0 0 600 600 ]
+  /Count 1
+  /Kids [ 3 0 R ]
+>>
+endobj
+3 0 obj <<
+  /Type /Page
+  /Parent 2 0 R
+  /Resources <<
+    /Font <<
+      /F1 4 0 R
+      /F2 5 0 R
+    >>
+  >>
+  /Contents 6 0 R
+>>
+endobj
+4 0 obj <<
+  /Type /Font
+  /Subtype /Type1
+  /BaseFont /Times-Roman
+>>
+endobj
+4 0 obj <<
+  /Type /Font
+  /Subtype /Type1
+  /BaseFont /Helvetica
+>>
+endobj
+6 0 obj <<
+>>
+stream
+BT
+/F1 12 Tf
+50 50 Td
+(Hello, world! This is not a link.) Tj
+0 50 Td
+(http://example.com?q=foo. This might be a link.) Tj
+/F2 16 Tf
+0 50 Td
+(https://example.com?q=foo. This might be a link in another font.) Tj
+0 50 Td
+(javascript:alert(0).  This might be a JS link.) Tj
+0 50 Td
+(ftp://example.org.  This might be a FTP link.) Tj
+0 50 Td
+(file:///home/foo/example.txt. This might be a file link.) Tj
+0 50 Td
+(This is a rather long and pointless piece of non-link text.) Tj
+ET
+endstream
+endobj
+xref
+0 7
+0000000000 65536 f
+0000000015 00000 n
+0000000061 00000 n
+0000000154 00000 n
+0000000374 00000 n
+0000000000 65536 f
+0000000450 00000 n
+trailer <<
+  /Size 6
+  /Root 1 0 R
+>>
+startxref
+963
+%%EOF