xref: /aosp_15_r20/external/pdfium/core/fpdfapi/parser/cpdf_parser_unittest.cpp (revision 3ac0a46f773bac49fa9476ec2b1cf3f8da5ec3a4)
1 // Copyright 2015 The PDFium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "core/fpdfapi/parser/cpdf_parser.h"
6 
7 #include <limits>
8 #include <memory>
9 #include <ostream>
10 #include <string>
11 #include <utility>
12 #include <vector>
13 
14 #include "core/fpdfapi/parser/cpdf_dictionary.h"
15 #include "core/fpdfapi/parser/cpdf_linearized_header.h"
16 #include "core/fpdfapi/parser/cpdf_object.h"
17 #include "core/fpdfapi/parser/cpdf_syntax_parser.h"
18 #include "core/fxcrt/cfx_read_only_span_stream.h"
19 #include "core/fxcrt/fx_extension.h"
20 #include "core/fxcrt/fx_stream.h"
21 #include "core/fxcrt/retain_ptr.h"
22 #include "testing/gmock/include/gmock/gmock.h"
23 #include "testing/gtest/include/gtest/gtest.h"
24 #include "testing/utils/path_service.h"
25 
26 using testing::ElementsAre;
27 using testing::Pair;
28 using testing::Return;
29 
30 namespace {
31 
GetObjInfo(const CPDF_Parser & parser,uint32_t obj_num)32 CPDF_Parser::ObjectInfo GetObjInfo(const CPDF_Parser& parser,
33                                    uint32_t obj_num) {
34   const auto* info = parser.GetCrossRefTable()->GetObjectInfo(obj_num);
35   return info ? *info : CPDF_Parser::ObjectInfo();
36 }
37 
38 class TestObjectsHolder final : public CPDF_Parser::ParsedObjectsHolder {
39  public:
40   TestObjectsHolder() = default;
41   ~TestObjectsHolder() override = default;
42 
43   // CPDF_Parser::ParsedObjectsHolder:
TryInit()44   bool TryInit() override { return true; }
45   MOCK_METHOD1(ParseIndirectObject, RetainPtr<CPDF_Object>(uint32_t objnum));
46 };
47 
48 }  // namespace
49 
50 // Test-only helper to support Gmock. Cannot be in an anonymous namespace.
operator ==(const CPDF_Parser::ObjectInfo & lhs,const CPDF_Parser::ObjectInfo & rhs)51 bool operator==(const CPDF_Parser::ObjectInfo& lhs,
52                 const CPDF_Parser::ObjectInfo& rhs) {
53   if (lhs.type != rhs.type) {
54     return false;
55   }
56 
57   if (lhs.gennum != rhs.gennum) {
58     return false;
59   }
60 
61   switch (lhs.type) {
62     case CPDF_Parser::ObjectType::kFree:
63       return true;
64     case CPDF_Parser::ObjectType::kNormal:
65       return lhs.pos == rhs.pos;
66     case CPDF_Parser::ObjectType::kCompressed:
67       return lhs.archive.obj_num == rhs.archive.obj_num &&
68              lhs.archive.obj_index == rhs.archive.obj_index;
69     case CPDF_Parser::ObjectType::kObjStream:
70       return false;
71   }
72 }
73 
74 // Test-only helper to let Gmock pretty-print `info`. Cannot be in an anonymous
75 // namespace.
operator <<(std::ostream & os,const CPDF_Parser::ObjectInfo & info)76 std::ostream& operator<<(std::ostream& os,
77                          const CPDF_Parser::ObjectInfo& info) {
78   os << "(";
79   switch (info.type) {
80     case CPDF_Parser::ObjectType::kFree:
81       os << "Free object";
82       break;
83     case CPDF_Parser::ObjectType::kNormal:
84       os << "Normal object, pos: " << info.pos;
85       break;
86     case CPDF_Parser::ObjectType::kCompressed:
87       os << "Compressed object, archive obj_num: " << info.archive.obj_num
88          << ", archive obj_index: " << info.archive.obj_index;
89       break;
90     case CPDF_Parser::ObjectType::kObjStream:
91       os << "ObjectStream object";
92       break;
93   }
94   os << ", gennum: " << info.gennum << ")";
95   return os;
96 }
97 
98 // A wrapper class to help test member functions of CPDF_Parser.
99 class CPDF_TestParser final : public CPDF_Parser {
100  public:
CPDF_TestParser()101   CPDF_TestParser() : CPDF_Parser(&object_holder_) {}
102   ~CPDF_TestParser() = default;
103 
104   // Setup reading from a file and initial states.
InitTestFromFile(const char * path)105   bool InitTestFromFile(const char* path) {
106     RetainPtr<IFX_SeekableReadStream> pFileAccess =
107         IFX_SeekableReadStream::CreateFromFilename(path);
108     if (!pFileAccess)
109       return false;
110 
111     // For the test file, the header is set at the beginning.
112     SetSyntaxParserForTesting(
113         std::make_unique<CPDF_SyntaxParser>(std::move(pFileAccess)));
114     return true;
115   }
116 
117   // Setup reading from a buffer and initial states.
InitTestFromBufferWithOffset(pdfium::span<const uint8_t> buffer,FX_FILESIZE header_offset)118   bool InitTestFromBufferWithOffset(pdfium::span<const uint8_t> buffer,
119                                     FX_FILESIZE header_offset) {
120     SetSyntaxParserForTesting(CPDF_SyntaxParser::CreateForTesting(
121         pdfium::MakeRetain<CFX_ReadOnlySpanStream>(buffer), header_offset));
122     return true;
123   }
124 
InitTestFromBuffer(pdfium::span<const uint8_t> buffer)125   bool InitTestFromBuffer(pdfium::span<const uint8_t> buffer) {
126     return InitTestFromBufferWithOffset(buffer, 0 /*header_offset*/);
127   }
128 
129   // Expose protected CPDF_Parser methods for testing.
130   using CPDF_Parser::LoadCrossRefV4;
131   using CPDF_Parser::ParseLinearizedHeader;
132   using CPDF_Parser::ParseStartXRef;
133   using CPDF_Parser::RebuildCrossRef;
134   using CPDF_Parser::StartParseInternal;
135 
object_holder()136   TestObjectsHolder& object_holder() { return object_holder_; }
137 
138  private:
139   TestObjectsHolder object_holder_;
140 };
141 
TEST(ParserTest,RebuildCrossRefCorrectly)142 TEST(ParserTest, RebuildCrossRefCorrectly) {
143   CPDF_TestParser parser;
144   std::string test_file;
145   ASSERT_TRUE(PathService::GetTestFilePath("parser_rebuildxref_correct.pdf",
146                                            &test_file));
147   ASSERT_TRUE(parser.InitTestFromFile(test_file.c_str())) << test_file;
148 
149   ASSERT_TRUE(parser.RebuildCrossRef());
150   const FX_FILESIZE offsets[] = {0, 15, 61, 154, 296, 374, 450};
151   const uint16_t versions[] = {0, 0, 2, 4, 6, 8, 0};
152   for (size_t i = 0; i < std::size(offsets); ++i)
153     EXPECT_EQ(offsets[i], GetObjInfo(parser, i).pos);
154   for (size_t i = 0; i < std::size(versions); ++i)
155     EXPECT_EQ(versions[i], GetObjInfo(parser, i).gennum);
156 
157   const CPDF_CrossRefTable* cross_ref_table = parser.GetCrossRefTable();
158   ASSERT_TRUE(cross_ref_table);
159   EXPECT_EQ(0u, cross_ref_table->trailer_object_number());
160 }
161 
TEST(ParserTest,RebuildCrossRefFailed)162 TEST(ParserTest, RebuildCrossRefFailed) {
163   CPDF_TestParser parser;
164   std::string test_file;
165   ASSERT_TRUE(PathService::GetTestFilePath(
166       "parser_rebuildxref_error_notrailer.pdf", &test_file));
167   ASSERT_TRUE(parser.InitTestFromFile(test_file.c_str())) << test_file;
168 
169   ASSERT_FALSE(parser.RebuildCrossRef());
170 }
171 
TEST(ParserTest,LoadCrossRefV4)172 TEST(ParserTest, LoadCrossRefV4) {
173   {
174     static const unsigned char kXrefTable[] =
175         "xref \n"
176         "0 6 \n"
177         "0000000003 65535 f \n"
178         "0000000017 00000 n \n"
179         "0000000081 00000 n \n"
180         "0000000000 00007 f \n"
181         "0000000331 00000 n \n"
182         "0000000409 00000 n \n"
183         "trail";  // Needed to end cross ref table reading.
184     CPDF_TestParser parser;
185     ASSERT_TRUE(parser.InitTestFromBuffer(kXrefTable));
186 
187     ASSERT_TRUE(parser.LoadCrossRefV4(0, false));
188     static const FX_FILESIZE kOffsets[] = {0, 17, 81, 0, 331, 409};
189     static const CPDF_TestParser::ObjectType kTypes[] = {
190         CPDF_TestParser::ObjectType::kFree,
191         CPDF_TestParser::ObjectType::kNotCompressed,
192         CPDF_TestParser::ObjectType::kNotCompressed,
193         CPDF_TestParser::ObjectType::kFree,
194         CPDF_TestParser::ObjectType::kNotCompressed,
195         CPDF_TestParser::ObjectType::kNotCompressed};
196     static_assert(std::size(kOffsets) == std::size(kTypes),
197                   "kOffsets / kTypes size mismatch");
198     for (size_t i = 0; i < std::size(kOffsets); ++i) {
199       EXPECT_EQ(kOffsets[i], GetObjInfo(parser, i).pos);
200       EXPECT_EQ(kTypes[i], GetObjInfo(parser, i).type);
201     }
202   }
203   {
204     static const unsigned char kXrefTable[] =
205         "xref \n"
206         "0 1 \n"
207         "0000000000 65535 f \n"
208         "3 1 \n"
209         "0000025325 00000 n \n"
210         "8 2 \n"
211         "0000025518 00002 n \n"
212         "0000025635 00000 n \n"
213         "12 1 \n"
214         "0000025777 00000 n \n"
215         "trail";  // Needed to end cross ref table reading.
216     CPDF_TestParser parser;
217     ASSERT_TRUE(parser.InitTestFromBuffer(kXrefTable));
218 
219     ASSERT_TRUE(parser.LoadCrossRefV4(0, false));
220     static const FX_FILESIZE kOffsets[] = {0, 0,     0,     25325, 0, 0,    0,
221                                            0, 25518, 25635, 0,     0, 25777};
222     static const CPDF_TestParser::ObjectType kTypes[] = {
223         CPDF_TestParser::ObjectType::kFree,
224         CPDF_TestParser::ObjectType::kFree,
225         CPDF_TestParser::ObjectType::kFree,
226         CPDF_TestParser::ObjectType::kNotCompressed,
227         CPDF_TestParser::ObjectType::kFree,
228         CPDF_TestParser::ObjectType::kFree,
229         CPDF_TestParser::ObjectType::kFree,
230         CPDF_TestParser::ObjectType::kFree,
231         CPDF_TestParser::ObjectType::kNotCompressed,
232         CPDF_TestParser::ObjectType::kNotCompressed,
233         CPDF_TestParser::ObjectType::kFree,
234         CPDF_TestParser::ObjectType::kFree,
235         CPDF_TestParser::ObjectType::kNotCompressed};
236     static_assert(std::size(kOffsets) == std::size(kTypes),
237                   "kOffsets / kTypes size mismatch");
238     for (size_t i = 0; i < std::size(kOffsets); ++i) {
239       EXPECT_EQ(kOffsets[i], GetObjInfo(parser, i).pos);
240       EXPECT_EQ(kTypes[i], GetObjInfo(parser, i).type);
241     }
242   }
243   {
244     static const unsigned char kXrefTable[] =
245         "xref \n"
246         "0 1 \n"
247         "0000000000 65535 f \n"
248         "3 1 \n"
249         "0000025325 00000 n \n"
250         "8 2 \n"
251         "0000000000 65535 f \n"
252         "0000025635 00000 n \n"
253         "12 1 \n"
254         "0000025777 00000 n \n"
255         "trail";  // Needed to end cross ref table reading.
256     CPDF_TestParser parser;
257     ASSERT_TRUE(parser.InitTestFromBuffer(kXrefTable));
258 
259     ASSERT_TRUE(parser.LoadCrossRefV4(0, false));
260     static const FX_FILESIZE kOffsets[] = {0, 0, 0,     25325, 0, 0,    0,
261                                            0, 0, 25635, 0,     0, 25777};
262     static const CPDF_TestParser::ObjectType kTypes[] = {
263         CPDF_TestParser::ObjectType::kFree,
264         CPDF_TestParser::ObjectType::kFree,
265         CPDF_TestParser::ObjectType::kFree,
266         CPDF_TestParser::ObjectType::kNotCompressed,
267         CPDF_TestParser::ObjectType::kFree,
268         CPDF_TestParser::ObjectType::kFree,
269         CPDF_TestParser::ObjectType::kFree,
270         CPDF_TestParser::ObjectType::kFree,
271         CPDF_TestParser::ObjectType::kFree,
272         CPDF_TestParser::ObjectType::kNotCompressed,
273         CPDF_TestParser::ObjectType::kFree,
274         CPDF_TestParser::ObjectType::kFree,
275         CPDF_TestParser::ObjectType::kNotCompressed};
276     static_assert(std::size(kOffsets) == std::size(kTypes),
277                   "kOffsets / kTypes size mismatch");
278     for (size_t i = 0; i < std::size(kOffsets); ++i) {
279       EXPECT_EQ(kOffsets[i], GetObjInfo(parser, i).pos);
280       EXPECT_EQ(kTypes[i], GetObjInfo(parser, i).type);
281     }
282   }
283   {
284     static const unsigned char kXrefTable[] =
285         "xref \n"
286         "0 7 \n"
287         "0000000002 65535 f \n"
288         "0000000023 00000 n \n"
289         "0000000003 65535 f \n"
290         "0000000004 65535 f \n"
291         "0000000000 65535 f \n"
292         "0000000045 00000 n \n"
293         "0000000179 00000 n \n"
294         "trail";  // Needed to end cross ref table reading.
295     CPDF_TestParser parser;
296     ASSERT_TRUE(parser.InitTestFromBuffer(kXrefTable));
297 
298     ASSERT_TRUE(parser.LoadCrossRefV4(0, false));
299     static const FX_FILESIZE kOffsets[] = {0, 23, 0, 0, 0, 45, 179};
300     static const CPDF_TestParser::ObjectType kTypes[] = {
301         CPDF_TestParser::ObjectType::kFree,
302         CPDF_TestParser::ObjectType::kNotCompressed,
303         CPDF_TestParser::ObjectType::kFree,
304         CPDF_TestParser::ObjectType::kFree,
305         CPDF_TestParser::ObjectType::kFree,
306         CPDF_TestParser::ObjectType::kNotCompressed,
307         CPDF_TestParser::ObjectType::kNotCompressed};
308     static_assert(std::size(kOffsets) == std::size(kTypes),
309                   "kOffsets / kTypes size mismatch");
310     for (size_t i = 0; i < std::size(kOffsets); ++i) {
311       EXPECT_EQ(kOffsets[i], GetObjInfo(parser, i).pos);
312       EXPECT_EQ(kTypes[i], GetObjInfo(parser, i).type);
313     }
314   }
315   {
316     // Regression test for https://crbug.com/945624 - Make sure the parser
317     // can correctly handle table sizes that are multiples of the read size,
318     // which is 1024.
319     std::string xref_table = "xref \n 0 2048 \n";
320     xref_table.reserve(41000);
321     for (int i = 0; i < 2048; ++i) {
322       char buffer[21];
323       snprintf(buffer, sizeof(buffer), "%010d 00000 n \n", i + 1);
324       xref_table += buffer;
325     }
326     xref_table += "trail";  // Needed to end cross ref table reading.
327     CPDF_TestParser parser;
328     ASSERT_TRUE(parser.InitTestFromBuffer(
329         pdfium::make_span(reinterpret_cast<const uint8_t*>(xref_table.c_str()),
330                           xref_table.size())));
331 
332     ASSERT_TRUE(parser.LoadCrossRefV4(0, false));
333     for (size_t i = 0; i < 2048; ++i) {
334       EXPECT_EQ(static_cast<int>(i) + 1, GetObjInfo(parser, i).pos);
335       EXPECT_EQ(CPDF_TestParser::ObjectType::kNotCompressed,
336                 GetObjInfo(parser, i).type);
337     }
338   }
339 }
340 
TEST(ParserTest,ParseStartXRef)341 TEST(ParserTest, ParseStartXRef) {
342   CPDF_TestParser parser;
343   std::string test_file;
344   ASSERT_TRUE(
345       PathService::GetTestFilePath("annotation_stamp_with_ap.pdf", &test_file));
346   ASSERT_TRUE(parser.InitTestFromFile(test_file.c_str())) << test_file;
347 
348   EXPECT_EQ(100940, parser.ParseStartXRef());
349   RetainPtr<CPDF_Object> cross_ref_v5_obj =
350       parser.ParseIndirectObjectAt(100940, 0);
351   ASSERT_TRUE(cross_ref_v5_obj);
352   EXPECT_EQ(75u, cross_ref_v5_obj->GetObjNum());
353 }
354 
TEST(ParserTest,ParseStartXRefWithHeaderOffset)355 TEST(ParserTest, ParseStartXRefWithHeaderOffset) {
356   static constexpr FX_FILESIZE kTestHeaderOffset = 765;
357   std::string test_file;
358   ASSERT_TRUE(
359       PathService::GetTestFilePath("annotation_stamp_with_ap.pdf", &test_file));
360   RetainPtr<IFX_SeekableReadStream> pFileAccess =
361       IFX_SeekableReadStream::CreateFromFilename(test_file.c_str());
362   ASSERT_TRUE(pFileAccess);
363 
364   std::vector<unsigned char> data(pFileAccess->GetSize() + kTestHeaderOffset);
365   ASSERT_TRUE(pFileAccess->ReadBlockAtOffset(
366       pdfium::make_span(data).subspan(kTestHeaderOffset), 0));
367   CPDF_TestParser parser;
368   parser.InitTestFromBufferWithOffset(data, kTestHeaderOffset);
369 
370   EXPECT_EQ(100940, parser.ParseStartXRef());
371   RetainPtr<CPDF_Object> cross_ref_v5_obj =
372       parser.ParseIndirectObjectAt(100940, 0);
373   ASSERT_TRUE(cross_ref_v5_obj);
374   EXPECT_EQ(75u, cross_ref_v5_obj->GetObjNum());
375 }
376 
TEST(ParserTest,ParseLinearizedWithHeaderOffset)377 TEST(ParserTest, ParseLinearizedWithHeaderOffset) {
378   static constexpr FX_FILESIZE kTestHeaderOffset = 765;
379   std::string test_file;
380   ASSERT_TRUE(PathService::GetTestFilePath("linearized.pdf", &test_file));
381   RetainPtr<IFX_SeekableReadStream> pFileAccess =
382       IFX_SeekableReadStream::CreateFromFilename(test_file.c_str());
383   ASSERT_TRUE(pFileAccess);
384 
385   std::vector<unsigned char> data(pFileAccess->GetSize() + kTestHeaderOffset);
386   ASSERT_TRUE(pFileAccess->ReadBlockAtOffset(
387       pdfium::make_span(data).subspan(kTestHeaderOffset), 0));
388 
389   CPDF_TestParser parser;
390   parser.InitTestFromBufferWithOffset(data, kTestHeaderOffset);
391   EXPECT_TRUE(parser.ParseLinearizedHeader());
392 
393   const CPDF_CrossRefTable* cross_ref_table = parser.GetCrossRefTable();
394   ASSERT_TRUE(cross_ref_table);
395   EXPECT_EQ(0u, cross_ref_table->trailer_object_number());
396 }
397 
TEST(ParserTest,BadStartXrefShouldNotBuildCrossRefTable)398 TEST(ParserTest, BadStartXrefShouldNotBuildCrossRefTable) {
399   const unsigned char kData[] =
400       "%PDF1-7 0 obj <</Size 2 /W [0 0 0]\n>>\n"
401       "stream\n"
402       "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\n"
403       "endstream\n"
404       "endobj\n"
405       "startxref\n"
406       "6\n"
407       "%%EOF\n";
408   CPDF_TestParser parser;
409   ASSERT_TRUE(parser.InitTestFromBuffer(kData));
410   EXPECT_EQ(CPDF_Parser::FORMAT_ERROR, parser.StartParseInternal());
411   ASSERT_TRUE(parser.GetCrossRefTable());
412   EXPECT_EQ(0u, parser.GetCrossRefTable()->objects_info().size());
413 }
414 
415 class ParserXRefTest : public testing::Test {
416  public:
417   ParserXRefTest() = default;
418   ~ParserXRefTest() override = default;
419 
420   // testing::Test:
SetUp()421   void SetUp() override {
422     // Satisfy CPDF_Parser's checks, so the test data below can concentrate on
423     // the /XRef stream and avoid also providing other valid dictionaries.
424     dummy_root_ = pdfium::MakeRetain<CPDF_Dictionary>();
425     EXPECT_CALL(parser().object_holder(), ParseIndirectObject)
426         .WillRepeatedly(Return(dummy_root_));
427   }
428 
parser()429   CPDF_TestParser& parser() { return parser_; }
430 
431  private:
432   RetainPtr<CPDF_Dictionary> dummy_root_;
433   CPDF_TestParser parser_;
434 };
435 
TEST_F(ParserXRefTest,XrefObjectIndicesTooBig)436 TEST_F(ParserXRefTest, XrefObjectIndicesTooBig) {
437   // Since /Index starts at 4194303, the object number will go past
438   // `kMaxObjectNumber`.
439   static_assert(CPDF_Parser::kMaxObjectNumber == 4194304,
440                 "Unexpected kMaxObjectNumber");
441   const unsigned char kData[] =
442       "%PDF1-7\n%\xa0\xf2\xa4\xf4\n"
443       "7 0 obj <<\n"
444       "  /Filter /ASCIIHexDecode\n"
445       "  /Index [4194303 3]\n"
446       "  /Root 1 0 R\n"
447       "  /Size 4194306\n"
448       "  /W [1 1 1]\n"
449       ">>\n"
450       "stream\n"
451       "01 00 00\n"
452       "01 0F 00\n"
453       "01 12 00\n"
454       "endstream\n"
455       "endobj\n"
456       "startxref\n"
457       "14\n"
458       "%%EOF\n";
459   ASSERT_TRUE(parser().InitTestFromBuffer(kData));
460   EXPECT_EQ(CPDF_Parser::SUCCESS, parser().StartParseInternal());
461   EXPECT_FALSE(parser().xref_table_rebuilt());
462   ASSERT_TRUE(parser().GetCrossRefTable());
463   const auto& objects_info = parser().GetCrossRefTable()->objects_info();
464 
465   // This should be the only object from table. Subsequent objects have object
466   // numbers that are too big.
467   CPDF_Parser::ObjectInfo only_valid_object;
468   only_valid_object.type = CPDF_Parser::ObjectType::kNormal;
469   only_valid_object.pos = 0;
470 
471   // TODO(thestig): Should the xref table contain object 4194305?
472   // Consider reworking CPDF_Parser's object representation to avoid having to
473   // store this placeholder object.
474   CPDF_Parser::ObjectInfo placeholder_object;
475   placeholder_object.type = CPDF_Parser::ObjectType::kFree;
476   placeholder_object.pos = 0;
477 
478   EXPECT_THAT(objects_info, ElementsAre(Pair(4194303, only_valid_object),
479                                         Pair(4194305, placeholder_object)));
480 }
481 
TEST_F(ParserXRefTest,XrefHasInvalidArchiveObjectNumber)482 TEST_F(ParserXRefTest, XrefHasInvalidArchiveObjectNumber) {
483   // 0xFF in the first object in the xref object stream is invalid.
484   const unsigned char kData[] =
485       "%PDF1-7\n%\xa0\xf2\xa4\xf4\n"
486       "7 0 obj <<\n"
487       "  /Filter /ASCIIHexDecode\n"
488       "  /Root 1 0 R\n"
489       "  /Size 3\n"
490       "  /W [1 1 1]\n"
491       ">>\n"
492       "stream\n"
493       "02 FF 00\n"
494       "01 0F 00\n"
495       "01 12 00\n"
496       "endstream\n"
497       "endobj\n"
498       "startxref\n"
499       "14\n"
500       "%%EOF\n";
501   ASSERT_TRUE(parser().InitTestFromBuffer(kData));
502   EXPECT_EQ(CPDF_Parser::SUCCESS, parser().StartParseInternal());
503   EXPECT_FALSE(parser().xref_table_rebuilt());
504 
505   const CPDF_CrossRefTable* cross_ref_table = parser().GetCrossRefTable();
506   ASSERT_TRUE(cross_ref_table);
507   EXPECT_EQ(7u, cross_ref_table->trailer_object_number());
508   const auto& objects_info = cross_ref_table->objects_info();
509 
510   // The expectation is for the parser to skip over the first object, and
511   // continue parsing the remaining objects. So these are the second and third
512   // objects.
513   CPDF_Parser::ObjectInfo expected_objects[2];
514   expected_objects[0].type = CPDF_Parser::ObjectType::kNormal;
515   expected_objects[0].pos = 15;
516   expected_objects[1].type = CPDF_Parser::ObjectType::kNormal;
517   expected_objects[1].pos = 18;
518 
519   EXPECT_THAT(objects_info, ElementsAre(Pair(1, expected_objects[0]),
520                                         Pair(2, expected_objects[1])));
521 }
522 
TEST_F(ParserXRefTest,XrefHasInvalidObjectType)523 TEST_F(ParserXRefTest, XrefHasInvalidObjectType) {
524   // The XRef object is a dictionary and not a stream.
525   const unsigned char kData[] =
526       "%PDF1-7\n%\xa0\xf2\xa4\xf4\n"
527       "7 0 obj <<\n"
528       "  /Filter /ASCIIHexDecode\n"
529       "  /Root 1 0 R\n"
530       "  /Size 3\n"
531       "  /W [1 1 1]\n"
532       ">>\n"
533       "endobj\n"
534       "startxref\n"
535       "14\n"
536       "%%EOF\n";
537 
538   ASSERT_TRUE(parser().InitTestFromBuffer(kData));
539   EXPECT_EQ(CPDF_Parser::FORMAT_ERROR, parser().StartParseInternal());
540 }
541 
TEST_F(ParserXRefTest,XrefHasInvalidPrevValue)542 TEST_F(ParserXRefTest, XrefHasInvalidPrevValue) {
543   // The /Prev value is an absolute offset, so it should never be negative.
544   const unsigned char kData[] =
545       "%PDF1-7\n%\xa0\xf2\xa4\xf4\n"
546       "7 0 obj <<\n"
547       "  /Filter /ASCIIHexDecode\n"
548       "  /Root 1 0 R\n"
549       "  /Size 3\n"
550       "  /W [1 1 1]\n"
551       "  /Prev -1\n"
552       ">>\n"
553       "stream\n"
554       "02 FF 00\n"
555       "01 0F 00\n"
556       "01 12 00\n"
557       "endstream\n"
558       "endobj\n"
559       "startxref\n"
560       "14\n"
561       "%%EOF\n";
562 
563   ASSERT_TRUE(parser().InitTestFromBuffer(kData));
564   EXPECT_EQ(CPDF_Parser::FORMAT_ERROR, parser().StartParseInternal());
565 }
566 
TEST_F(ParserXRefTest,XrefHasInvalidSizeValue)567 TEST_F(ParserXRefTest, XrefHasInvalidSizeValue) {
568   // The /Size value should never be negative.
569   const unsigned char kData[] =
570       "%PDF1-7\n%\xa0\xf2\xa4\xf4\n"
571       "7 0 obj <<\n"
572       "  /Filter /ASCIIHexDecode\n"
573       "  /Root 1 0 R\n"
574       "  /Size 3\n"
575       "  /W [1 1 1]\n"
576       "  /Size -1\n"
577       ">>\n"
578       "stream\n"
579       "02 FF 00\n"
580       "01 0F 00\n"
581       "01 12 00\n"
582       "endstream\n"
583       "endobj\n"
584       "startxref\n"
585       "14\n"
586       "%%EOF\n";
587 
588   ASSERT_TRUE(parser().InitTestFromBuffer(kData));
589   EXPECT_EQ(CPDF_Parser::FORMAT_ERROR, parser().StartParseInternal());
590 }
591 
TEST_F(ParserXRefTest,XrefHasInvalidWidth)592 TEST_F(ParserXRefTest, XrefHasInvalidWidth) {
593   // The /W array needs to have at least 3 values.
594   const unsigned char kData[] =
595       "%PDF1-7\n%\xa0\xf2\xa4\xf4\n"
596       "7 0 obj <<\n"
597       "  /Filter /ASCIIHexDecode\n"
598       "  /Root 1 0 R\n"
599       "  /Size 3\n"
600       "  /W [1 1]\n"
601       ">>\n"
602       "stream\n"
603       "02 FF 00\n"
604       "01 0F 00\n"
605       "01 12 00\n"
606       "endstream\n"
607       "endobj\n"
608       "startxref\n"
609       "14\n"
610       "%%EOF\n";
611 
612   ASSERT_TRUE(parser().InitTestFromBuffer(kData));
613 
614   // StartParseInternal() succeeded not because XRef parsing succeeded, but
615   // because RebuildCrossRef() got lucky with the data stream. Therefore, don't
616   // bother checking the garbage output.
617   EXPECT_EQ(CPDF_Parser::SUCCESS, parser().StartParseInternal());
618   EXPECT_TRUE(parser().xref_table_rebuilt());
619 }
620 
TEST_F(ParserXRefTest,XrefFirstWidthEntryIsZero)621 TEST_F(ParserXRefTest, XrefFirstWidthEntryIsZero) {
622   // When the first /W array entry is 0, it implies the objects are all of the
623   // normal type.
624   const unsigned char kData[] =
625       "%PDF1-7\n%\xa0\xf2\xa4\xf4\n"
626       "7 0 obj <<\n"
627       "  /Filter /ASCIIHexDecode\n"
628       "  /Root 1 0 R\n"
629       "  /Size 2\n"
630       "  /W [0 1 1]\n"
631       ">>\n"
632       "stream\n"
633       "0F 00\n"
634       "12 00\n"
635       "endstream\n"
636       "endobj\n"
637       "startxref\n"
638       "14\n"
639       "%%EOF\n";
640 
641   ASSERT_TRUE(parser().InitTestFromBuffer(kData));
642   EXPECT_EQ(CPDF_Parser::SUCCESS, parser().StartParseInternal());
643   EXPECT_FALSE(parser().xref_table_rebuilt());
644   ASSERT_TRUE(parser().GetCrossRefTable());
645   const auto& objects_info = parser().GetCrossRefTable()->objects_info();
646 
647   CPDF_Parser::ObjectInfo expected_result[2];
648   expected_result[0].type = CPDF_Parser::ObjectType::kNormal;
649   expected_result[0].pos = 15;
650   expected_result[1].type = CPDF_Parser::ObjectType::kNormal;
651   expected_result[1].pos = 18;
652   EXPECT_THAT(objects_info, ElementsAre(Pair(0, expected_result[0]),
653                                         Pair(1, expected_result[1])));
654 }
655 
TEST_F(ParserXRefTest,XrefWithValidIndex)656 TEST_F(ParserXRefTest, XrefWithValidIndex) {
657   // The /Index specifies objects (2), (4, 5), (80, 81, 82).
658   const unsigned char kData[] =
659       "%PDF1-7\n%\xa0\xf2\xa4\xf4\n"
660       "7 0 obj <<\n"
661       "  /Filter /ASCIIHexDecode\n"
662       "  /Root 1 0 R\n"
663       "  /Size 83\n"
664       "  /Index [2 1 4 2 80 3]\n"
665       "  /W [1 1 1]\n"
666       ">>\n"
667       "stream\n"
668       "01 00 00\n"
669       "01 0F 00\n"
670       "01 12 00\n"
671       "01 20 00\n"
672       "01 22 00\n"
673       "01 25 00\n"
674       "endstream\n"
675       "endobj\n"
676       "startxref\n"
677       "14\n"
678       "%%EOF\n";
679 
680   ASSERT_TRUE(parser().InitTestFromBuffer(kData));
681   EXPECT_EQ(CPDF_Parser::SUCCESS, parser().StartParseInternal());
682   EXPECT_FALSE(parser().xref_table_rebuilt());
683   ASSERT_TRUE(parser().GetCrossRefTable());
684   const auto& objects_info = parser().GetCrossRefTable()->objects_info();
685 
686   CPDF_Parser::ObjectInfo expected_result[6];
687   expected_result[0].type = CPDF_Parser::ObjectType::kNormal;
688   expected_result[0].pos = 0;
689   expected_result[1].type = CPDF_Parser::ObjectType::kNormal;
690   expected_result[1].pos = 15;
691   expected_result[2].type = CPDF_Parser::ObjectType::kNormal;
692   expected_result[2].pos = 18;
693   expected_result[3].type = CPDF_Parser::ObjectType::kNormal;
694   expected_result[3].pos = 32;
695   expected_result[4].type = CPDF_Parser::ObjectType::kNormal;
696   expected_result[4].pos = 34;
697   expected_result[5].type = CPDF_Parser::ObjectType::kNormal;
698   expected_result[5].pos = 37;
699   EXPECT_THAT(
700       objects_info,
701       ElementsAre(Pair(2, expected_result[0]), Pair(4, expected_result[1]),
702                   Pair(5, expected_result[2]), Pair(80, expected_result[3]),
703                   Pair(81, expected_result[4]), Pair(82, expected_result[5])));
704 }
705 
TEST_F(ParserXRefTest,XrefIndexWithRepeatedObject)706 TEST_F(ParserXRefTest, XrefIndexWithRepeatedObject) {
707   // The /Index specifies objects (2, 3), (3). AKA the sub-sections overlap.
708   const unsigned char kData[] =
709       "%PDF1-7\n%\xa0\xf2\xa4\xf4\n"
710       "7 0 obj <<\n"
711       "  /Filter /ASCIIHexDecode\n"
712       "  /Root 1 0 R\n"
713       "  /Size 4\n"
714       "  /Index [2 2 3 1]\n"
715       "  /W [1 1 1]\n"
716       ">>\n"
717       "stream\n"
718       "01 00 00\n"
719       "01 0F 00\n"
720       "01 12 00\n"
721       "endstream\n"
722       "endobj\n"
723       "startxref\n"
724       "14\n"
725       "%%EOF\n";
726 
727   ASSERT_TRUE(parser().InitTestFromBuffer(kData));
728   EXPECT_EQ(CPDF_Parser::SUCCESS, parser().StartParseInternal());
729   EXPECT_FALSE(parser().xref_table_rebuilt());
730   ASSERT_TRUE(parser().GetCrossRefTable());
731   const auto& objects_info = parser().GetCrossRefTable()->objects_info();
732 
733   CPDF_Parser::ObjectInfo expected_result[2];
734   expected_result[0].type = CPDF_Parser::ObjectType::kNormal;
735   expected_result[0].pos = 0;
736   expected_result[1].type = CPDF_Parser::ObjectType::kNormal;
737   // Since the /Index does not follow the spec, this is one of the 2 possible
738   // values that a parser can come up with.
739   expected_result[1].pos = 15;
740   EXPECT_THAT(objects_info, ElementsAre(Pair(2, expected_result[0]),
741                                         Pair(3, expected_result[1])));
742 }
743 
TEST_F(ParserXRefTest,XrefIndexWithOutOfOrderObjects)744 TEST_F(ParserXRefTest, XrefIndexWithOutOfOrderObjects) {
745   // The /Index specifies objects (3, 4), (2), which is not in ascending order.
746   const unsigned char kData[] =
747       "%PDF1-7\n%\xa0\xf2\xa4\xf4\n"
748       "7 0 obj <<\n"
749       "  /Filter /ASCIIHexDecode\n"
750       "  /Root 1 0 R\n"
751       "  /Size 5\n"
752       "  /Index [3 2 2 1]\n"
753       "  /W [1 1 1]\n"
754       ">>\n"
755       "stream\n"
756       "01 00 00\n"
757       "01 0F 00\n"
758       "01 12 00\n"
759       "endstream\n"
760       "endobj\n"
761       "startxref\n"
762       "14\n"
763       "%%EOF\n";
764 
765   ASSERT_TRUE(parser().InitTestFromBuffer(kData));
766   EXPECT_EQ(CPDF_Parser::SUCCESS, parser().StartParseInternal());
767   EXPECT_FALSE(parser().xref_table_rebuilt());
768   ASSERT_TRUE(parser().GetCrossRefTable());
769   const auto& objects_info = parser().GetCrossRefTable()->objects_info();
770 
771   // Although the /Index does not follow the spec, the parser tolerates it.
772   CPDF_Parser::ObjectInfo expected_result[3];
773   expected_result[0].type = CPDF_Parser::ObjectType::kNormal;
774   expected_result[0].pos = 18;
775   expected_result[1].type = CPDF_Parser::ObjectType::kNormal;
776   expected_result[1].pos = 0;
777   expected_result[2].type = CPDF_Parser::ObjectType::kNormal;
778   expected_result[2].pos = 15;
779   EXPECT_THAT(objects_info, ElementsAre(Pair(2, expected_result[0]),
780                                         Pair(3, expected_result[1]),
781                                         Pair(4, expected_result[2])));
782 }
783 
TEST_F(ParserXRefTest,XrefWithIndexAndWrongSize)784 TEST_F(ParserXRefTest, XrefWithIndexAndWrongSize) {
785   // The /Index specifies objects (2), (80, 81), so the /Size should be 82,
786   // but is actually 81.
787   const unsigned char kData[] =
788       "%PDF1-7\n%\xa0\xf2\xa4\xf4\n"
789       "7 0 obj <<\n"
790       "  /Filter /ASCIIHexDecode\n"
791       "  /Root 1 0 R\n"
792       "  /Size 81\n"
793       "  /Index [2 1 80 2]\n"
794       "  /W [1 1 1]\n"
795       ">>\n"
796       "stream\n"
797       "01 00 00\n"
798       "01 0F 00\n"
799       "01 12 00\n"
800       "endstream\n"
801       "endobj\n"
802       "startxref\n"
803       "14\n"
804       "%%EOF\n";
805 
806   ASSERT_TRUE(parser().InitTestFromBuffer(kData));
807   EXPECT_EQ(CPDF_Parser::SUCCESS, parser().StartParseInternal());
808   EXPECT_FALSE(parser().xref_table_rebuilt());
809   ASSERT_TRUE(parser().GetCrossRefTable());
810   const auto& objects_info = parser().GetCrossRefTable()->objects_info();
811 
812   CPDF_Parser::ObjectInfo expected_result[3];
813   expected_result[0].type = CPDF_Parser::ObjectType::kNormal;
814   expected_result[0].pos = 0;
815   expected_result[1].type = CPDF_Parser::ObjectType::kNormal;
816   expected_result[1].pos = 15;
817   expected_result[2].type = CPDF_Parser::ObjectType::kNormal;
818   expected_result[2].pos = 18;
819   EXPECT_THAT(objects_info, ElementsAre(Pair(2, expected_result[0]),
820                                         Pair(80, expected_result[1]),
821                                         Pair(81, expected_result[2])));
822 }
823