1 // Copyright 2017 The PDFium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "core/fpdfapi/parser/cpdf_hint_tables.h"
6
7 #include <memory>
8 #include <string>
9 #include <utility>
10
11 #include "core/fpdfapi/page/test_with_page_module.h"
12 #include "core/fpdfapi/parser/cpdf_data_avail.h"
13 #include "core/fpdfapi/parser/cpdf_dictionary.h"
14 #include "core/fpdfapi/parser/cpdf_linearized_header.h"
15 #include "core/fpdfapi/parser/cpdf_object.h"
16 #include "core/fpdfapi/parser/cpdf_read_validator.h"
17 #include "core/fpdfapi/parser/cpdf_stream.h"
18 #include "core/fpdfapi/parser/cpdf_syntax_parser.h"
19 #include "core/fxcrt/bytestring.h"
20 #include "core/fxcrt/cfx_read_only_string_stream.h"
21 #include "core/fxcrt/fx_stream.h"
22 #include "testing/gmock/include/gmock/gmock.h"
23 #include "testing/gtest/include/gtest/gtest.h"
24 #include "testing/utils/path_service.h"
25 #include "third_party/base/check.h"
26
27 namespace {
28
MakeValidatorFromFile(const std::string & file_name)29 RetainPtr<CPDF_ReadValidator> MakeValidatorFromFile(
30 const std::string& file_name) {
31 std::string file_path;
32 PathService::GetTestFilePath(file_name, &file_path);
33 DCHECK(!file_path.empty());
34 return pdfium::MakeRetain<CPDF_ReadValidator>(
35 IFX_SeekableReadStream::CreateFromFilename(file_path.c_str()), nullptr);
36 }
37
MakeDataAvailFromFile(const std::string & file_name)38 std::unique_ptr<CPDF_DataAvail> MakeDataAvailFromFile(
39 const std::string& file_name) {
40 return std::make_unique<CPDF_DataAvail>(nullptr,
41 MakeValidatorFromFile(file_name));
42 }
43
44 class TestLinearizedHeader final : public CPDF_LinearizedHeader {
45 public:
TestLinearizedHeader(const CPDF_Dictionary * pDict,FX_FILESIZE szLastXRefOffset)46 TestLinearizedHeader(const CPDF_Dictionary* pDict,
47 FX_FILESIZE szLastXRefOffset)
48 : CPDF_LinearizedHeader(pDict, szLastXRefOffset) {}
49
MakeHeader(ByteString inline_data)50 static std::unique_ptr<CPDF_LinearizedHeader> MakeHeader(
51 ByteString inline_data) {
52 CPDF_SyntaxParser parser(
53 pdfium::MakeRetain<CFX_ReadOnlyStringStream>(std::move(inline_data)));
54 RetainPtr<CPDF_Dictionary> dict =
55 ToDictionary(parser.GetObjectBody(nullptr));
56 DCHECK(dict);
57 return std::make_unique<TestLinearizedHeader>(dict.Get(), 0);
58 }
59 };
60
61 } // namespace
62
63 // Needs page module for encoding Hint table stream.
64 using HintTablesTest = TestWithPageModule;
65
TEST_F(HintTablesTest,Load)66 TEST_F(HintTablesTest, Load) {
67 auto data_avail = MakeDataAvailFromFile("feature_linearized_loading.pdf");
68 ASSERT_EQ(CPDF_DataAvail::kDataAvailable, data_avail->IsDocAvail(nullptr));
69
70 ASSERT_TRUE(data_avail->GetHintTablesForTest());
71
72 const CPDF_HintTables* hint_tables = data_avail->GetHintTablesForTest();
73 FX_FILESIZE page_start = 0;
74 FX_FILESIZE page_length = 0;
75 uint32_t page_obj_num = 0;
76
77 ASSERT_TRUE(
78 hint_tables->GetPagePos(0, &page_start, &page_length, &page_obj_num));
79 EXPECT_EQ(777, page_start);
80 EXPECT_EQ(4328, page_length);
81 EXPECT_EQ(39u, page_obj_num);
82
83 ASSERT_TRUE(
84 hint_tables->GetPagePos(1, &page_start, &page_length, &page_obj_num));
85 EXPECT_EQ(5105, page_start);
86 EXPECT_EQ(767, page_length);
87 EXPECT_EQ(1u, page_obj_num);
88
89 ASSERT_FALSE(
90 hint_tables->GetPagePos(2, &page_start, &page_length, &page_obj_num));
91 }
92
TEST_F(HintTablesTest,PageAndGroupInfos)93 TEST_F(HintTablesTest, PageAndGroupInfos) {
94 auto data_avail = MakeDataAvailFromFile("feature_linearized_loading.pdf");
95 ASSERT_EQ(CPDF_DataAvail::kDataAvailable, data_avail->IsDocAvail(nullptr));
96
97 const CPDF_HintTables* hint_tables = data_avail->GetHintTablesForTest();
98 ASSERT_TRUE(hint_tables);
99 ASSERT_EQ(2u, hint_tables->PageInfos().size());
100
101 EXPECT_EQ(5u, hint_tables->PageInfos()[0].objects_count());
102 EXPECT_EQ(777, hint_tables->PageInfos()[0].page_offset());
103 EXPECT_EQ(4328u, hint_tables->PageInfos()[0].page_length());
104 EXPECT_EQ(39u, hint_tables->PageInfos()[0].start_obj_num());
105 ASSERT_EQ(2u, hint_tables->PageInfos()[0].Identifiers().size());
106
107 EXPECT_EQ(0u, hint_tables->PageInfos()[0].Identifiers()[0]);
108 EXPECT_EQ(0u, hint_tables->PageInfos()[0].Identifiers()[1]);
109
110 EXPECT_EQ(3u, hint_tables->PageInfos()[1].objects_count());
111 EXPECT_EQ(5105, hint_tables->PageInfos()[1].page_offset());
112 EXPECT_EQ(767u, hint_tables->PageInfos()[1].page_length());
113 EXPECT_EQ(1u, hint_tables->PageInfos()[1].start_obj_num());
114 ASSERT_EQ(3u, hint_tables->PageInfos()[1].Identifiers().size());
115
116 EXPECT_EQ(2u, hint_tables->PageInfos()[1].Identifiers()[0]);
117 EXPECT_EQ(5u, hint_tables->PageInfos()[1].Identifiers()[1]);
118 EXPECT_EQ(3u, hint_tables->PageInfos()[1].Identifiers()[2]);
119
120 // SharedGroupInfo
121 ASSERT_EQ(6u, hint_tables->SharedGroupInfos().size());
122
123 EXPECT_EQ(777, hint_tables->SharedGroupInfos()[0].m_szOffset);
124 EXPECT_EQ(254u, hint_tables->SharedGroupInfos()[0].m_dwLength);
125 EXPECT_EQ(39u, hint_tables->SharedGroupInfos()[0].m_dwStartObjNum);
126 EXPECT_EQ(1u, hint_tables->SharedGroupInfos()[0].m_dwObjectsCount);
127
128 EXPECT_EQ(1031, hint_tables->SharedGroupInfos()[1].m_szOffset);
129 EXPECT_EQ(389u, hint_tables->SharedGroupInfos()[1].m_dwLength);
130 EXPECT_EQ(40u, hint_tables->SharedGroupInfos()[1].m_dwStartObjNum);
131 EXPECT_EQ(1u, hint_tables->SharedGroupInfos()[1].m_dwObjectsCount);
132
133 EXPECT_EQ(1420, hint_tables->SharedGroupInfos()[2].m_szOffset);
134 EXPECT_EQ(726u, hint_tables->SharedGroupInfos()[2].m_dwLength);
135 EXPECT_EQ(41u, hint_tables->SharedGroupInfos()[2].m_dwStartObjNum);
136 EXPECT_EQ(1u, hint_tables->SharedGroupInfos()[2].m_dwObjectsCount);
137
138 EXPECT_EQ(2146, hint_tables->SharedGroupInfos()[3].m_szOffset);
139 EXPECT_EQ(290u, hint_tables->SharedGroupInfos()[3].m_dwLength);
140 EXPECT_EQ(42u, hint_tables->SharedGroupInfos()[3].m_dwStartObjNum);
141 EXPECT_EQ(1u, hint_tables->SharedGroupInfos()[3].m_dwObjectsCount);
142
143 EXPECT_EQ(2436, hint_tables->SharedGroupInfos()[4].m_szOffset);
144 EXPECT_EQ(2669u, hint_tables->SharedGroupInfos()[4].m_dwLength);
145 EXPECT_EQ(43u, hint_tables->SharedGroupInfos()[4].m_dwStartObjNum);
146 EXPECT_EQ(1u, hint_tables->SharedGroupInfos()[4].m_dwObjectsCount);
147
148 EXPECT_EQ(10939, hint_tables->SharedGroupInfos()[5].m_szOffset);
149 EXPECT_EQ(544u, hint_tables->SharedGroupInfos()[5].m_dwLength);
150 EXPECT_EQ(4u, hint_tables->SharedGroupInfos()[5].m_dwStartObjNum);
151 EXPECT_EQ(1u, hint_tables->SharedGroupInfos()[5].m_dwObjectsCount);
152 }
153
TEST_F(HintTablesTest,FirstPageOffset)154 TEST_F(HintTablesTest, FirstPageOffset) {
155 // Test that valid hint table is loaded, and have correct offset of first page
156 // object.
157 const auto linearized_header = TestLinearizedHeader::MakeHeader(
158 "<< /Linearized 1 /L 19326762 /H [ 123730 3816 ] /O 5932 /E 639518 /N "
159 "102 /T 19220281 >>");
160 ASSERT_TRUE(linearized_header);
161 // This hint table is extracted from linearized file, generated by qpdf tool.
162 RetainPtr<CPDF_ReadValidator> validator =
163 MakeValidatorFromFile("hint_table_102p.bin");
164 CPDF_SyntaxParser parser(validator, 0);
165 RetainPtr<CPDF_Stream> stream = ToStream(parser.GetObjectBody(nullptr));
166 ASSERT_TRUE(stream);
167 auto hint_tables = std::make_unique<CPDF_HintTables>(validator.Get(),
168 linearized_header.get());
169 // Check that hint table will load.
170 ASSERT_TRUE(hint_tables->LoadHintStream(stream.Get()));
171 // Check that hint table have correct first page offset.
172 // 127546 is predefined real value from original file.
173 EXPECT_EQ(127546, hint_tables->GetFirstPageObjOffset());
174 }
175