1 // Copyright 2015 The PDFium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "core/fpdfapi/parser/cpdf_parser.h"
6
7 #include <limits>
8 #include <memory>
9 #include <ostream>
10 #include <string>
11 #include <utility>
12 #include <vector>
13
14 #include "core/fpdfapi/parser/cpdf_dictionary.h"
15 #include "core/fpdfapi/parser/cpdf_linearized_header.h"
16 #include "core/fpdfapi/parser/cpdf_object.h"
17 #include "core/fpdfapi/parser/cpdf_syntax_parser.h"
18 #include "core/fxcrt/cfx_read_only_span_stream.h"
19 #include "core/fxcrt/fx_extension.h"
20 #include "core/fxcrt/fx_stream.h"
21 #include "core/fxcrt/retain_ptr.h"
22 #include "testing/gmock/include/gmock/gmock.h"
23 #include "testing/gtest/include/gtest/gtest.h"
24 #include "testing/utils/path_service.h"
25
26 using testing::ElementsAre;
27 using testing::Pair;
28 using testing::Return;
29
30 namespace {
31
GetObjInfo(const CPDF_Parser & parser,uint32_t obj_num)32 CPDF_Parser::ObjectInfo GetObjInfo(const CPDF_Parser& parser,
33 uint32_t obj_num) {
34 const auto* info = parser.GetCrossRefTable()->GetObjectInfo(obj_num);
35 return info ? *info : CPDF_Parser::ObjectInfo();
36 }
37
38 class TestObjectsHolder final : public CPDF_Parser::ParsedObjectsHolder {
39 public:
40 TestObjectsHolder() = default;
41 ~TestObjectsHolder() override = default;
42
43 // CPDF_Parser::ParsedObjectsHolder:
TryInit()44 bool TryInit() override { return true; }
45 MOCK_METHOD1(ParseIndirectObject, RetainPtr<CPDF_Object>(uint32_t objnum));
46 };
47
48 } // namespace
49
50 // Test-only helper to support Gmock. Cannot be in an anonymous namespace.
operator ==(const CPDF_Parser::ObjectInfo & lhs,const CPDF_Parser::ObjectInfo & rhs)51 bool operator==(const CPDF_Parser::ObjectInfo& lhs,
52 const CPDF_Parser::ObjectInfo& rhs) {
53 if (lhs.type != rhs.type) {
54 return false;
55 }
56
57 if (lhs.gennum != rhs.gennum) {
58 return false;
59 }
60
61 switch (lhs.type) {
62 case CPDF_Parser::ObjectType::kFree:
63 return true;
64 case CPDF_Parser::ObjectType::kNormal:
65 return lhs.pos == rhs.pos;
66 case CPDF_Parser::ObjectType::kCompressed:
67 return lhs.archive.obj_num == rhs.archive.obj_num &&
68 lhs.archive.obj_index == rhs.archive.obj_index;
69 case CPDF_Parser::ObjectType::kObjStream:
70 return false;
71 }
72 }
73
74 // Test-only helper to let Gmock pretty-print `info`. Cannot be in an anonymous
75 // namespace.
operator <<(std::ostream & os,const CPDF_Parser::ObjectInfo & info)76 std::ostream& operator<<(std::ostream& os,
77 const CPDF_Parser::ObjectInfo& info) {
78 os << "(";
79 switch (info.type) {
80 case CPDF_Parser::ObjectType::kFree:
81 os << "Free object";
82 break;
83 case CPDF_Parser::ObjectType::kNormal:
84 os << "Normal object, pos: " << info.pos;
85 break;
86 case CPDF_Parser::ObjectType::kCompressed:
87 os << "Compressed object, archive obj_num: " << info.archive.obj_num
88 << ", archive obj_index: " << info.archive.obj_index;
89 break;
90 case CPDF_Parser::ObjectType::kObjStream:
91 os << "ObjectStream object";
92 break;
93 }
94 os << ", gennum: " << info.gennum << ")";
95 return os;
96 }
97
98 // A wrapper class to help test member functions of CPDF_Parser.
99 class CPDF_TestParser final : public CPDF_Parser {
100 public:
CPDF_TestParser()101 CPDF_TestParser() : CPDF_Parser(&object_holder_) {}
102 ~CPDF_TestParser() = default;
103
104 // Setup reading from a file and initial states.
InitTestFromFile(const char * path)105 bool InitTestFromFile(const char* path) {
106 RetainPtr<IFX_SeekableReadStream> pFileAccess =
107 IFX_SeekableReadStream::CreateFromFilename(path);
108 if (!pFileAccess)
109 return false;
110
111 // For the test file, the header is set at the beginning.
112 SetSyntaxParserForTesting(
113 std::make_unique<CPDF_SyntaxParser>(std::move(pFileAccess)));
114 return true;
115 }
116
117 // Setup reading from a buffer and initial states.
InitTestFromBufferWithOffset(pdfium::span<const uint8_t> buffer,FX_FILESIZE header_offset)118 bool InitTestFromBufferWithOffset(pdfium::span<const uint8_t> buffer,
119 FX_FILESIZE header_offset) {
120 SetSyntaxParserForTesting(CPDF_SyntaxParser::CreateForTesting(
121 pdfium::MakeRetain<CFX_ReadOnlySpanStream>(buffer), header_offset));
122 return true;
123 }
124
InitTestFromBuffer(pdfium::span<const uint8_t> buffer)125 bool InitTestFromBuffer(pdfium::span<const uint8_t> buffer) {
126 return InitTestFromBufferWithOffset(buffer, 0 /*header_offset*/);
127 }
128
129 // Expose protected CPDF_Parser methods for testing.
130 using CPDF_Parser::LoadCrossRefV4;
131 using CPDF_Parser::ParseLinearizedHeader;
132 using CPDF_Parser::ParseStartXRef;
133 using CPDF_Parser::RebuildCrossRef;
134 using CPDF_Parser::StartParseInternal;
135
object_holder()136 TestObjectsHolder& object_holder() { return object_holder_; }
137
138 private:
139 TestObjectsHolder object_holder_;
140 };
141
TEST(ParserTest,RebuildCrossRefCorrectly)142 TEST(ParserTest, RebuildCrossRefCorrectly) {
143 CPDF_TestParser parser;
144 std::string test_file;
145 ASSERT_TRUE(PathService::GetTestFilePath("parser_rebuildxref_correct.pdf",
146 &test_file));
147 ASSERT_TRUE(parser.InitTestFromFile(test_file.c_str())) << test_file;
148
149 ASSERT_TRUE(parser.RebuildCrossRef());
150 const FX_FILESIZE offsets[] = {0, 15, 61, 154, 296, 374, 450};
151 const uint16_t versions[] = {0, 0, 2, 4, 6, 8, 0};
152 for (size_t i = 0; i < std::size(offsets); ++i)
153 EXPECT_EQ(offsets[i], GetObjInfo(parser, i).pos);
154 for (size_t i = 0; i < std::size(versions); ++i)
155 EXPECT_EQ(versions[i], GetObjInfo(parser, i).gennum);
156
157 const CPDF_CrossRefTable* cross_ref_table = parser.GetCrossRefTable();
158 ASSERT_TRUE(cross_ref_table);
159 EXPECT_EQ(0u, cross_ref_table->trailer_object_number());
160 }
161
TEST(ParserTest,RebuildCrossRefFailed)162 TEST(ParserTest, RebuildCrossRefFailed) {
163 CPDF_TestParser parser;
164 std::string test_file;
165 ASSERT_TRUE(PathService::GetTestFilePath(
166 "parser_rebuildxref_error_notrailer.pdf", &test_file));
167 ASSERT_TRUE(parser.InitTestFromFile(test_file.c_str())) << test_file;
168
169 ASSERT_FALSE(parser.RebuildCrossRef());
170 }
171
TEST(ParserTest,LoadCrossRefV4)172 TEST(ParserTest, LoadCrossRefV4) {
173 {
174 static const unsigned char kXrefTable[] =
175 "xref \n"
176 "0 6 \n"
177 "0000000003 65535 f \n"
178 "0000000017 00000 n \n"
179 "0000000081 00000 n \n"
180 "0000000000 00007 f \n"
181 "0000000331 00000 n \n"
182 "0000000409 00000 n \n"
183 "trail"; // Needed to end cross ref table reading.
184 CPDF_TestParser parser;
185 ASSERT_TRUE(parser.InitTestFromBuffer(kXrefTable));
186
187 ASSERT_TRUE(parser.LoadCrossRefV4(0, false));
188 static const FX_FILESIZE kOffsets[] = {0, 17, 81, 0, 331, 409};
189 static const CPDF_TestParser::ObjectType kTypes[] = {
190 CPDF_TestParser::ObjectType::kFree,
191 CPDF_TestParser::ObjectType::kNotCompressed,
192 CPDF_TestParser::ObjectType::kNotCompressed,
193 CPDF_TestParser::ObjectType::kFree,
194 CPDF_TestParser::ObjectType::kNotCompressed,
195 CPDF_TestParser::ObjectType::kNotCompressed};
196 static_assert(std::size(kOffsets) == std::size(kTypes),
197 "kOffsets / kTypes size mismatch");
198 for (size_t i = 0; i < std::size(kOffsets); ++i) {
199 EXPECT_EQ(kOffsets[i], GetObjInfo(parser, i).pos);
200 EXPECT_EQ(kTypes[i], GetObjInfo(parser, i).type);
201 }
202 }
203 {
204 static const unsigned char kXrefTable[] =
205 "xref \n"
206 "0 1 \n"
207 "0000000000 65535 f \n"
208 "3 1 \n"
209 "0000025325 00000 n \n"
210 "8 2 \n"
211 "0000025518 00002 n \n"
212 "0000025635 00000 n \n"
213 "12 1 \n"
214 "0000025777 00000 n \n"
215 "trail"; // Needed to end cross ref table reading.
216 CPDF_TestParser parser;
217 ASSERT_TRUE(parser.InitTestFromBuffer(kXrefTable));
218
219 ASSERT_TRUE(parser.LoadCrossRefV4(0, false));
220 static const FX_FILESIZE kOffsets[] = {0, 0, 0, 25325, 0, 0, 0,
221 0, 25518, 25635, 0, 0, 25777};
222 static const CPDF_TestParser::ObjectType kTypes[] = {
223 CPDF_TestParser::ObjectType::kFree,
224 CPDF_TestParser::ObjectType::kFree,
225 CPDF_TestParser::ObjectType::kFree,
226 CPDF_TestParser::ObjectType::kNotCompressed,
227 CPDF_TestParser::ObjectType::kFree,
228 CPDF_TestParser::ObjectType::kFree,
229 CPDF_TestParser::ObjectType::kFree,
230 CPDF_TestParser::ObjectType::kFree,
231 CPDF_TestParser::ObjectType::kNotCompressed,
232 CPDF_TestParser::ObjectType::kNotCompressed,
233 CPDF_TestParser::ObjectType::kFree,
234 CPDF_TestParser::ObjectType::kFree,
235 CPDF_TestParser::ObjectType::kNotCompressed};
236 static_assert(std::size(kOffsets) == std::size(kTypes),
237 "kOffsets / kTypes size mismatch");
238 for (size_t i = 0; i < std::size(kOffsets); ++i) {
239 EXPECT_EQ(kOffsets[i], GetObjInfo(parser, i).pos);
240 EXPECT_EQ(kTypes[i], GetObjInfo(parser, i).type);
241 }
242 }
243 {
244 static const unsigned char kXrefTable[] =
245 "xref \n"
246 "0 1 \n"
247 "0000000000 65535 f \n"
248 "3 1 \n"
249 "0000025325 00000 n \n"
250 "8 2 \n"
251 "0000000000 65535 f \n"
252 "0000025635 00000 n \n"
253 "12 1 \n"
254 "0000025777 00000 n \n"
255 "trail"; // Needed to end cross ref table reading.
256 CPDF_TestParser parser;
257 ASSERT_TRUE(parser.InitTestFromBuffer(kXrefTable));
258
259 ASSERT_TRUE(parser.LoadCrossRefV4(0, false));
260 static const FX_FILESIZE kOffsets[] = {0, 0, 0, 25325, 0, 0, 0,
261 0, 0, 25635, 0, 0, 25777};
262 static const CPDF_TestParser::ObjectType kTypes[] = {
263 CPDF_TestParser::ObjectType::kFree,
264 CPDF_TestParser::ObjectType::kFree,
265 CPDF_TestParser::ObjectType::kFree,
266 CPDF_TestParser::ObjectType::kNotCompressed,
267 CPDF_TestParser::ObjectType::kFree,
268 CPDF_TestParser::ObjectType::kFree,
269 CPDF_TestParser::ObjectType::kFree,
270 CPDF_TestParser::ObjectType::kFree,
271 CPDF_TestParser::ObjectType::kFree,
272 CPDF_TestParser::ObjectType::kNotCompressed,
273 CPDF_TestParser::ObjectType::kFree,
274 CPDF_TestParser::ObjectType::kFree,
275 CPDF_TestParser::ObjectType::kNotCompressed};
276 static_assert(std::size(kOffsets) == std::size(kTypes),
277 "kOffsets / kTypes size mismatch");
278 for (size_t i = 0; i < std::size(kOffsets); ++i) {
279 EXPECT_EQ(kOffsets[i], GetObjInfo(parser, i).pos);
280 EXPECT_EQ(kTypes[i], GetObjInfo(parser, i).type);
281 }
282 }
283 {
284 static const unsigned char kXrefTable[] =
285 "xref \n"
286 "0 7 \n"
287 "0000000002 65535 f \n"
288 "0000000023 00000 n \n"
289 "0000000003 65535 f \n"
290 "0000000004 65535 f \n"
291 "0000000000 65535 f \n"
292 "0000000045 00000 n \n"
293 "0000000179 00000 n \n"
294 "trail"; // Needed to end cross ref table reading.
295 CPDF_TestParser parser;
296 ASSERT_TRUE(parser.InitTestFromBuffer(kXrefTable));
297
298 ASSERT_TRUE(parser.LoadCrossRefV4(0, false));
299 static const FX_FILESIZE kOffsets[] = {0, 23, 0, 0, 0, 45, 179};
300 static const CPDF_TestParser::ObjectType kTypes[] = {
301 CPDF_TestParser::ObjectType::kFree,
302 CPDF_TestParser::ObjectType::kNotCompressed,
303 CPDF_TestParser::ObjectType::kFree,
304 CPDF_TestParser::ObjectType::kFree,
305 CPDF_TestParser::ObjectType::kFree,
306 CPDF_TestParser::ObjectType::kNotCompressed,
307 CPDF_TestParser::ObjectType::kNotCompressed};
308 static_assert(std::size(kOffsets) == std::size(kTypes),
309 "kOffsets / kTypes size mismatch");
310 for (size_t i = 0; i < std::size(kOffsets); ++i) {
311 EXPECT_EQ(kOffsets[i], GetObjInfo(parser, i).pos);
312 EXPECT_EQ(kTypes[i], GetObjInfo(parser, i).type);
313 }
314 }
315 {
316 // Regression test for https://crbug.com/945624 - Make sure the parser
317 // can correctly handle table sizes that are multiples of the read size,
318 // which is 1024.
319 std::string xref_table = "xref \n 0 2048 \n";
320 xref_table.reserve(41000);
321 for (int i = 0; i < 2048; ++i) {
322 char buffer[21];
323 snprintf(buffer, sizeof(buffer), "%010d 00000 n \n", i + 1);
324 xref_table += buffer;
325 }
326 xref_table += "trail"; // Needed to end cross ref table reading.
327 CPDF_TestParser parser;
328 ASSERT_TRUE(parser.InitTestFromBuffer(
329 pdfium::make_span(reinterpret_cast<const uint8_t*>(xref_table.c_str()),
330 xref_table.size())));
331
332 ASSERT_TRUE(parser.LoadCrossRefV4(0, false));
333 for (size_t i = 0; i < 2048; ++i) {
334 EXPECT_EQ(static_cast<int>(i) + 1, GetObjInfo(parser, i).pos);
335 EXPECT_EQ(CPDF_TestParser::ObjectType::kNotCompressed,
336 GetObjInfo(parser, i).type);
337 }
338 }
339 }
340
TEST(ParserTest,ParseStartXRef)341 TEST(ParserTest, ParseStartXRef) {
342 CPDF_TestParser parser;
343 std::string test_file;
344 ASSERT_TRUE(
345 PathService::GetTestFilePath("annotation_stamp_with_ap.pdf", &test_file));
346 ASSERT_TRUE(parser.InitTestFromFile(test_file.c_str())) << test_file;
347
348 EXPECT_EQ(100940, parser.ParseStartXRef());
349 RetainPtr<CPDF_Object> cross_ref_v5_obj =
350 parser.ParseIndirectObjectAt(100940, 0);
351 ASSERT_TRUE(cross_ref_v5_obj);
352 EXPECT_EQ(75u, cross_ref_v5_obj->GetObjNum());
353 }
354
TEST(ParserTest,ParseStartXRefWithHeaderOffset)355 TEST(ParserTest, ParseStartXRefWithHeaderOffset) {
356 static constexpr FX_FILESIZE kTestHeaderOffset = 765;
357 std::string test_file;
358 ASSERT_TRUE(
359 PathService::GetTestFilePath("annotation_stamp_with_ap.pdf", &test_file));
360 RetainPtr<IFX_SeekableReadStream> pFileAccess =
361 IFX_SeekableReadStream::CreateFromFilename(test_file.c_str());
362 ASSERT_TRUE(pFileAccess);
363
364 std::vector<unsigned char> data(pFileAccess->GetSize() + kTestHeaderOffset);
365 ASSERT_TRUE(pFileAccess->ReadBlockAtOffset(
366 pdfium::make_span(data).subspan(kTestHeaderOffset), 0));
367 CPDF_TestParser parser;
368 parser.InitTestFromBufferWithOffset(data, kTestHeaderOffset);
369
370 EXPECT_EQ(100940, parser.ParseStartXRef());
371 RetainPtr<CPDF_Object> cross_ref_v5_obj =
372 parser.ParseIndirectObjectAt(100940, 0);
373 ASSERT_TRUE(cross_ref_v5_obj);
374 EXPECT_EQ(75u, cross_ref_v5_obj->GetObjNum());
375 }
376
TEST(ParserTest,ParseLinearizedWithHeaderOffset)377 TEST(ParserTest, ParseLinearizedWithHeaderOffset) {
378 static constexpr FX_FILESIZE kTestHeaderOffset = 765;
379 std::string test_file;
380 ASSERT_TRUE(PathService::GetTestFilePath("linearized.pdf", &test_file));
381 RetainPtr<IFX_SeekableReadStream> pFileAccess =
382 IFX_SeekableReadStream::CreateFromFilename(test_file.c_str());
383 ASSERT_TRUE(pFileAccess);
384
385 std::vector<unsigned char> data(pFileAccess->GetSize() + kTestHeaderOffset);
386 ASSERT_TRUE(pFileAccess->ReadBlockAtOffset(
387 pdfium::make_span(data).subspan(kTestHeaderOffset), 0));
388
389 CPDF_TestParser parser;
390 parser.InitTestFromBufferWithOffset(data, kTestHeaderOffset);
391 EXPECT_TRUE(parser.ParseLinearizedHeader());
392
393 const CPDF_CrossRefTable* cross_ref_table = parser.GetCrossRefTable();
394 ASSERT_TRUE(cross_ref_table);
395 EXPECT_EQ(0u, cross_ref_table->trailer_object_number());
396 }
397
TEST(ParserTest,BadStartXrefShouldNotBuildCrossRefTable)398 TEST(ParserTest, BadStartXrefShouldNotBuildCrossRefTable) {
399 const unsigned char kData[] =
400 "%PDF1-7 0 obj <</Size 2 /W [0 0 0]\n>>\n"
401 "stream\n"
402 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\n"
403 "endstream\n"
404 "endobj\n"
405 "startxref\n"
406 "6\n"
407 "%%EOF\n";
408 CPDF_TestParser parser;
409 ASSERT_TRUE(parser.InitTestFromBuffer(kData));
410 EXPECT_EQ(CPDF_Parser::FORMAT_ERROR, parser.StartParseInternal());
411 ASSERT_TRUE(parser.GetCrossRefTable());
412 EXPECT_EQ(0u, parser.GetCrossRefTable()->objects_info().size());
413 }
414
415 class ParserXRefTest : public testing::Test {
416 public:
417 ParserXRefTest() = default;
418 ~ParserXRefTest() override = default;
419
420 // testing::Test:
SetUp()421 void SetUp() override {
422 // Satisfy CPDF_Parser's checks, so the test data below can concentrate on
423 // the /XRef stream and avoid also providing other valid dictionaries.
424 dummy_root_ = pdfium::MakeRetain<CPDF_Dictionary>();
425 EXPECT_CALL(parser().object_holder(), ParseIndirectObject)
426 .WillRepeatedly(Return(dummy_root_));
427 }
428
parser()429 CPDF_TestParser& parser() { return parser_; }
430
431 private:
432 RetainPtr<CPDF_Dictionary> dummy_root_;
433 CPDF_TestParser parser_;
434 };
435
TEST_F(ParserXRefTest,XrefObjectIndicesTooBig)436 TEST_F(ParserXRefTest, XrefObjectIndicesTooBig) {
437 // Since /Index starts at 4194303, the object number will go past
438 // `kMaxObjectNumber`.
439 static_assert(CPDF_Parser::kMaxObjectNumber == 4194304,
440 "Unexpected kMaxObjectNumber");
441 const unsigned char kData[] =
442 "%PDF1-7\n%\xa0\xf2\xa4\xf4\n"
443 "7 0 obj <<\n"
444 " /Filter /ASCIIHexDecode\n"
445 " /Index [4194303 3]\n"
446 " /Root 1 0 R\n"
447 " /Size 4194306\n"
448 " /W [1 1 1]\n"
449 ">>\n"
450 "stream\n"
451 "01 00 00\n"
452 "01 0F 00\n"
453 "01 12 00\n"
454 "endstream\n"
455 "endobj\n"
456 "startxref\n"
457 "14\n"
458 "%%EOF\n";
459 ASSERT_TRUE(parser().InitTestFromBuffer(kData));
460 EXPECT_EQ(CPDF_Parser::SUCCESS, parser().StartParseInternal());
461 EXPECT_FALSE(parser().xref_table_rebuilt());
462 ASSERT_TRUE(parser().GetCrossRefTable());
463 const auto& objects_info = parser().GetCrossRefTable()->objects_info();
464
465 // This should be the only object from table. Subsequent objects have object
466 // numbers that are too big.
467 CPDF_Parser::ObjectInfo only_valid_object;
468 only_valid_object.type = CPDF_Parser::ObjectType::kNormal;
469 only_valid_object.pos = 0;
470
471 // TODO(thestig): Should the xref table contain object 4194305?
472 // Consider reworking CPDF_Parser's object representation to avoid having to
473 // store this placeholder object.
474 CPDF_Parser::ObjectInfo placeholder_object;
475 placeholder_object.type = CPDF_Parser::ObjectType::kFree;
476 placeholder_object.pos = 0;
477
478 EXPECT_THAT(objects_info, ElementsAre(Pair(4194303, only_valid_object),
479 Pair(4194305, placeholder_object)));
480 }
481
TEST_F(ParserXRefTest,XrefHasInvalidArchiveObjectNumber)482 TEST_F(ParserXRefTest, XrefHasInvalidArchiveObjectNumber) {
483 // 0xFF in the first object in the xref object stream is invalid.
484 const unsigned char kData[] =
485 "%PDF1-7\n%\xa0\xf2\xa4\xf4\n"
486 "7 0 obj <<\n"
487 " /Filter /ASCIIHexDecode\n"
488 " /Root 1 0 R\n"
489 " /Size 3\n"
490 " /W [1 1 1]\n"
491 ">>\n"
492 "stream\n"
493 "02 FF 00\n"
494 "01 0F 00\n"
495 "01 12 00\n"
496 "endstream\n"
497 "endobj\n"
498 "startxref\n"
499 "14\n"
500 "%%EOF\n";
501 ASSERT_TRUE(parser().InitTestFromBuffer(kData));
502 EXPECT_EQ(CPDF_Parser::SUCCESS, parser().StartParseInternal());
503 EXPECT_FALSE(parser().xref_table_rebuilt());
504
505 const CPDF_CrossRefTable* cross_ref_table = parser().GetCrossRefTable();
506 ASSERT_TRUE(cross_ref_table);
507 EXPECT_EQ(7u, cross_ref_table->trailer_object_number());
508 const auto& objects_info = cross_ref_table->objects_info();
509
510 // The expectation is for the parser to skip over the first object, and
511 // continue parsing the remaining objects. So these are the second and third
512 // objects.
513 CPDF_Parser::ObjectInfo expected_objects[2];
514 expected_objects[0].type = CPDF_Parser::ObjectType::kNormal;
515 expected_objects[0].pos = 15;
516 expected_objects[1].type = CPDF_Parser::ObjectType::kNormal;
517 expected_objects[1].pos = 18;
518
519 EXPECT_THAT(objects_info, ElementsAre(Pair(1, expected_objects[0]),
520 Pair(2, expected_objects[1])));
521 }
522
TEST_F(ParserXRefTest,XrefHasInvalidObjectType)523 TEST_F(ParserXRefTest, XrefHasInvalidObjectType) {
524 // The XRef object is a dictionary and not a stream.
525 const unsigned char kData[] =
526 "%PDF1-7\n%\xa0\xf2\xa4\xf4\n"
527 "7 0 obj <<\n"
528 " /Filter /ASCIIHexDecode\n"
529 " /Root 1 0 R\n"
530 " /Size 3\n"
531 " /W [1 1 1]\n"
532 ">>\n"
533 "endobj\n"
534 "startxref\n"
535 "14\n"
536 "%%EOF\n";
537
538 ASSERT_TRUE(parser().InitTestFromBuffer(kData));
539 EXPECT_EQ(CPDF_Parser::FORMAT_ERROR, parser().StartParseInternal());
540 }
541
TEST_F(ParserXRefTest,XrefHasInvalidPrevValue)542 TEST_F(ParserXRefTest, XrefHasInvalidPrevValue) {
543 // The /Prev value is an absolute offset, so it should never be negative.
544 const unsigned char kData[] =
545 "%PDF1-7\n%\xa0\xf2\xa4\xf4\n"
546 "7 0 obj <<\n"
547 " /Filter /ASCIIHexDecode\n"
548 " /Root 1 0 R\n"
549 " /Size 3\n"
550 " /W [1 1 1]\n"
551 " /Prev -1\n"
552 ">>\n"
553 "stream\n"
554 "02 FF 00\n"
555 "01 0F 00\n"
556 "01 12 00\n"
557 "endstream\n"
558 "endobj\n"
559 "startxref\n"
560 "14\n"
561 "%%EOF\n";
562
563 ASSERT_TRUE(parser().InitTestFromBuffer(kData));
564 EXPECT_EQ(CPDF_Parser::FORMAT_ERROR, parser().StartParseInternal());
565 }
566
TEST_F(ParserXRefTest,XrefHasInvalidSizeValue)567 TEST_F(ParserXRefTest, XrefHasInvalidSizeValue) {
568 // The /Size value should never be negative.
569 const unsigned char kData[] =
570 "%PDF1-7\n%\xa0\xf2\xa4\xf4\n"
571 "7 0 obj <<\n"
572 " /Filter /ASCIIHexDecode\n"
573 " /Root 1 0 R\n"
574 " /Size 3\n"
575 " /W [1 1 1]\n"
576 " /Size -1\n"
577 ">>\n"
578 "stream\n"
579 "02 FF 00\n"
580 "01 0F 00\n"
581 "01 12 00\n"
582 "endstream\n"
583 "endobj\n"
584 "startxref\n"
585 "14\n"
586 "%%EOF\n";
587
588 ASSERT_TRUE(parser().InitTestFromBuffer(kData));
589 EXPECT_EQ(CPDF_Parser::FORMAT_ERROR, parser().StartParseInternal());
590 }
591
TEST_F(ParserXRefTest,XrefHasInvalidWidth)592 TEST_F(ParserXRefTest, XrefHasInvalidWidth) {
593 // The /W array needs to have at least 3 values.
594 const unsigned char kData[] =
595 "%PDF1-7\n%\xa0\xf2\xa4\xf4\n"
596 "7 0 obj <<\n"
597 " /Filter /ASCIIHexDecode\n"
598 " /Root 1 0 R\n"
599 " /Size 3\n"
600 " /W [1 1]\n"
601 ">>\n"
602 "stream\n"
603 "02 FF 00\n"
604 "01 0F 00\n"
605 "01 12 00\n"
606 "endstream\n"
607 "endobj\n"
608 "startxref\n"
609 "14\n"
610 "%%EOF\n";
611
612 ASSERT_TRUE(parser().InitTestFromBuffer(kData));
613
614 // StartParseInternal() succeeded not because XRef parsing succeeded, but
615 // because RebuildCrossRef() got lucky with the data stream. Therefore, don't
616 // bother checking the garbage output.
617 EXPECT_EQ(CPDF_Parser::SUCCESS, parser().StartParseInternal());
618 EXPECT_TRUE(parser().xref_table_rebuilt());
619 }
620
TEST_F(ParserXRefTest,XrefFirstWidthEntryIsZero)621 TEST_F(ParserXRefTest, XrefFirstWidthEntryIsZero) {
622 // When the first /W array entry is 0, it implies the objects are all of the
623 // normal type.
624 const unsigned char kData[] =
625 "%PDF1-7\n%\xa0\xf2\xa4\xf4\n"
626 "7 0 obj <<\n"
627 " /Filter /ASCIIHexDecode\n"
628 " /Root 1 0 R\n"
629 " /Size 2\n"
630 " /W [0 1 1]\n"
631 ">>\n"
632 "stream\n"
633 "0F 00\n"
634 "12 00\n"
635 "endstream\n"
636 "endobj\n"
637 "startxref\n"
638 "14\n"
639 "%%EOF\n";
640
641 ASSERT_TRUE(parser().InitTestFromBuffer(kData));
642 EXPECT_EQ(CPDF_Parser::SUCCESS, parser().StartParseInternal());
643 EXPECT_FALSE(parser().xref_table_rebuilt());
644 ASSERT_TRUE(parser().GetCrossRefTable());
645 const auto& objects_info = parser().GetCrossRefTable()->objects_info();
646
647 CPDF_Parser::ObjectInfo expected_result[2];
648 expected_result[0].type = CPDF_Parser::ObjectType::kNormal;
649 expected_result[0].pos = 15;
650 expected_result[1].type = CPDF_Parser::ObjectType::kNormal;
651 expected_result[1].pos = 18;
652 EXPECT_THAT(objects_info, ElementsAre(Pair(0, expected_result[0]),
653 Pair(1, expected_result[1])));
654 }
655
TEST_F(ParserXRefTest,XrefWithValidIndex)656 TEST_F(ParserXRefTest, XrefWithValidIndex) {
657 // The /Index specifies objects (2), (4, 5), (80, 81, 82).
658 const unsigned char kData[] =
659 "%PDF1-7\n%\xa0\xf2\xa4\xf4\n"
660 "7 0 obj <<\n"
661 " /Filter /ASCIIHexDecode\n"
662 " /Root 1 0 R\n"
663 " /Size 83\n"
664 " /Index [2 1 4 2 80 3]\n"
665 " /W [1 1 1]\n"
666 ">>\n"
667 "stream\n"
668 "01 00 00\n"
669 "01 0F 00\n"
670 "01 12 00\n"
671 "01 20 00\n"
672 "01 22 00\n"
673 "01 25 00\n"
674 "endstream\n"
675 "endobj\n"
676 "startxref\n"
677 "14\n"
678 "%%EOF\n";
679
680 ASSERT_TRUE(parser().InitTestFromBuffer(kData));
681 EXPECT_EQ(CPDF_Parser::SUCCESS, parser().StartParseInternal());
682 EXPECT_FALSE(parser().xref_table_rebuilt());
683 ASSERT_TRUE(parser().GetCrossRefTable());
684 const auto& objects_info = parser().GetCrossRefTable()->objects_info();
685
686 CPDF_Parser::ObjectInfo expected_result[6];
687 expected_result[0].type = CPDF_Parser::ObjectType::kNormal;
688 expected_result[0].pos = 0;
689 expected_result[1].type = CPDF_Parser::ObjectType::kNormal;
690 expected_result[1].pos = 15;
691 expected_result[2].type = CPDF_Parser::ObjectType::kNormal;
692 expected_result[2].pos = 18;
693 expected_result[3].type = CPDF_Parser::ObjectType::kNormal;
694 expected_result[3].pos = 32;
695 expected_result[4].type = CPDF_Parser::ObjectType::kNormal;
696 expected_result[4].pos = 34;
697 expected_result[5].type = CPDF_Parser::ObjectType::kNormal;
698 expected_result[5].pos = 37;
699 EXPECT_THAT(
700 objects_info,
701 ElementsAre(Pair(2, expected_result[0]), Pair(4, expected_result[1]),
702 Pair(5, expected_result[2]), Pair(80, expected_result[3]),
703 Pair(81, expected_result[4]), Pair(82, expected_result[5])));
704 }
705
TEST_F(ParserXRefTest,XrefIndexWithRepeatedObject)706 TEST_F(ParserXRefTest, XrefIndexWithRepeatedObject) {
707 // The /Index specifies objects (2, 3), (3). AKA the sub-sections overlap.
708 const unsigned char kData[] =
709 "%PDF1-7\n%\xa0\xf2\xa4\xf4\n"
710 "7 0 obj <<\n"
711 " /Filter /ASCIIHexDecode\n"
712 " /Root 1 0 R\n"
713 " /Size 4\n"
714 " /Index [2 2 3 1]\n"
715 " /W [1 1 1]\n"
716 ">>\n"
717 "stream\n"
718 "01 00 00\n"
719 "01 0F 00\n"
720 "01 12 00\n"
721 "endstream\n"
722 "endobj\n"
723 "startxref\n"
724 "14\n"
725 "%%EOF\n";
726
727 ASSERT_TRUE(parser().InitTestFromBuffer(kData));
728 EXPECT_EQ(CPDF_Parser::SUCCESS, parser().StartParseInternal());
729 EXPECT_FALSE(parser().xref_table_rebuilt());
730 ASSERT_TRUE(parser().GetCrossRefTable());
731 const auto& objects_info = parser().GetCrossRefTable()->objects_info();
732
733 CPDF_Parser::ObjectInfo expected_result[2];
734 expected_result[0].type = CPDF_Parser::ObjectType::kNormal;
735 expected_result[0].pos = 0;
736 expected_result[1].type = CPDF_Parser::ObjectType::kNormal;
737 // Since the /Index does not follow the spec, this is one of the 2 possible
738 // values that a parser can come up with.
739 expected_result[1].pos = 15;
740 EXPECT_THAT(objects_info, ElementsAre(Pair(2, expected_result[0]),
741 Pair(3, expected_result[1])));
742 }
743
TEST_F(ParserXRefTest,XrefIndexWithOutOfOrderObjects)744 TEST_F(ParserXRefTest, XrefIndexWithOutOfOrderObjects) {
745 // The /Index specifies objects (3, 4), (2), which is not in ascending order.
746 const unsigned char kData[] =
747 "%PDF1-7\n%\xa0\xf2\xa4\xf4\n"
748 "7 0 obj <<\n"
749 " /Filter /ASCIIHexDecode\n"
750 " /Root 1 0 R\n"
751 " /Size 5\n"
752 " /Index [3 2 2 1]\n"
753 " /W [1 1 1]\n"
754 ">>\n"
755 "stream\n"
756 "01 00 00\n"
757 "01 0F 00\n"
758 "01 12 00\n"
759 "endstream\n"
760 "endobj\n"
761 "startxref\n"
762 "14\n"
763 "%%EOF\n";
764
765 ASSERT_TRUE(parser().InitTestFromBuffer(kData));
766 EXPECT_EQ(CPDF_Parser::SUCCESS, parser().StartParseInternal());
767 EXPECT_FALSE(parser().xref_table_rebuilt());
768 ASSERT_TRUE(parser().GetCrossRefTable());
769 const auto& objects_info = parser().GetCrossRefTable()->objects_info();
770
771 // Although the /Index does not follow the spec, the parser tolerates it.
772 CPDF_Parser::ObjectInfo expected_result[3];
773 expected_result[0].type = CPDF_Parser::ObjectType::kNormal;
774 expected_result[0].pos = 18;
775 expected_result[1].type = CPDF_Parser::ObjectType::kNormal;
776 expected_result[1].pos = 0;
777 expected_result[2].type = CPDF_Parser::ObjectType::kNormal;
778 expected_result[2].pos = 15;
779 EXPECT_THAT(objects_info, ElementsAre(Pair(2, expected_result[0]),
780 Pair(3, expected_result[1]),
781 Pair(4, expected_result[2])));
782 }
783
TEST_F(ParserXRefTest,XrefWithIndexAndWrongSize)784 TEST_F(ParserXRefTest, XrefWithIndexAndWrongSize) {
785 // The /Index specifies objects (2), (80, 81), so the /Size should be 82,
786 // but is actually 81.
787 const unsigned char kData[] =
788 "%PDF1-7\n%\xa0\xf2\xa4\xf4\n"
789 "7 0 obj <<\n"
790 " /Filter /ASCIIHexDecode\n"
791 " /Root 1 0 R\n"
792 " /Size 81\n"
793 " /Index [2 1 80 2]\n"
794 " /W [1 1 1]\n"
795 ">>\n"
796 "stream\n"
797 "01 00 00\n"
798 "01 0F 00\n"
799 "01 12 00\n"
800 "endstream\n"
801 "endobj\n"
802 "startxref\n"
803 "14\n"
804 "%%EOF\n";
805
806 ASSERT_TRUE(parser().InitTestFromBuffer(kData));
807 EXPECT_EQ(CPDF_Parser::SUCCESS, parser().StartParseInternal());
808 EXPECT_FALSE(parser().xref_table_rebuilt());
809 ASSERT_TRUE(parser().GetCrossRefTable());
810 const auto& objects_info = parser().GetCrossRefTable()->objects_info();
811
812 CPDF_Parser::ObjectInfo expected_result[3];
813 expected_result[0].type = CPDF_Parser::ObjectType::kNormal;
814 expected_result[0].pos = 0;
815 expected_result[1].type = CPDF_Parser::ObjectType::kNormal;
816 expected_result[1].pos = 15;
817 expected_result[2].type = CPDF_Parser::ObjectType::kNormal;
818 expected_result[2].pos = 18;
819 EXPECT_THAT(objects_info, ElementsAre(Pair(2, expected_result[0]),
820 Pair(80, expected_result[1]),
821 Pair(81, expected_result[2])));
822 }
823