1 // Copyright 2016 The PDFium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7 #include "core/fpdfapi/parser/cpdf_hint_tables.h"
8
9 #include <limits>
10
11 #include "core/fpdfapi/parser/cpdf_array.h"
12 #include "core/fpdfapi/parser/cpdf_data_avail.h"
13 #include "core/fpdfapi/parser/cpdf_dictionary.h"
14 #include "core/fpdfapi/parser/cpdf_document.h"
15 #include "core/fpdfapi/parser/cpdf_linearized_header.h"
16 #include "core/fpdfapi/parser/cpdf_parser.h"
17 #include "core/fpdfapi/parser/cpdf_read_validator.h"
18 #include "core/fpdfapi/parser/cpdf_stream.h"
19 #include "core/fpdfapi/parser/cpdf_stream_acc.h"
20 #include "core/fpdfapi/parser/cpdf_syntax_parser.h"
21 #include "core/fxcrt/cfx_bitstream.h"
22 #include "core/fxcrt/fx_safe_types.h"
23 #include "third_party/base/check.h"
24 #include "third_party/base/containers/span.h"
25 #include "third_party/base/numerics/safe_conversions.h"
26
27 namespace {
28
CanReadFromBitStream(const CFX_BitStream * hStream,const FX_SAFE_UINT32 & bits)29 bool CanReadFromBitStream(const CFX_BitStream* hStream,
30 const FX_SAFE_UINT32& bits) {
31 return bits.IsValid() && hStream->BitsRemaining() >= bits.ValueOrDie();
32 }
33
34 // Sanity check values from the page table header. The note in the PDF 1.7
35 // reference for Table F.3 says the valid range is only 0 through 32. Though 0
36 // is not useful either.
IsValidPageOffsetHintTableBitCount(uint32_t bits)37 bool IsValidPageOffsetHintTableBitCount(uint32_t bits) {
38 return bits > 0 && bits <= 32;
39 }
40
41 } // namespace
42
43 CPDF_HintTables::PageInfo::PageInfo() = default;
44 CPDF_HintTables::PageInfo::~PageInfo() = default;
45
46 // static
Parse(CPDF_SyntaxParser * parser,const CPDF_LinearizedHeader * pLinearized)47 std::unique_ptr<CPDF_HintTables> CPDF_HintTables::Parse(
48 CPDF_SyntaxParser* parser,
49 const CPDF_LinearizedHeader* pLinearized) {
50 DCHECK(parser);
51 if (!pLinearized || pLinearized->GetPageCount() <= 1 ||
52 !pLinearized->HasHintTable()) {
53 return nullptr;
54 }
55
56 const FX_FILESIZE szHintStart = pLinearized->GetHintStart();
57 const uint32_t szHintLength = pLinearized->GetHintLength();
58
59 if (!parser->GetValidator()->CheckDataRangeAndRequestIfUnavailable(
60 szHintStart, szHintLength)) {
61 return nullptr;
62 }
63
64 parser->SetPos(szHintStart);
65 RetainPtr<CPDF_Stream> hints_stream = ToStream(
66 parser->GetIndirectObject(nullptr, CPDF_SyntaxParser::ParseType::kLoose));
67
68 if (!hints_stream)
69 return nullptr;
70
71 auto pHintTables = std::make_unique<CPDF_HintTables>(
72 parser->GetValidator().Get(), pLinearized);
73 if (!pHintTables->LoadHintStream(hints_stream.Get()))
74 return nullptr;
75
76 return pHintTables;
77 }
78
CPDF_HintTables(CPDF_ReadValidator * pValidator,const CPDF_LinearizedHeader * pLinearized)79 CPDF_HintTables::CPDF_HintTables(CPDF_ReadValidator* pValidator,
80 const CPDF_LinearizedHeader* pLinearized)
81 : m_pValidator(pValidator), m_pLinearized(pLinearized) {
82 DCHECK(m_pLinearized);
83 }
84
85 CPDF_HintTables::~CPDF_HintTables() = default;
86
ReadPageHintTable(CFX_BitStream * hStream)87 bool CPDF_HintTables::ReadPageHintTable(CFX_BitStream* hStream) {
88 const uint32_t nPages = m_pLinearized->GetPageCount();
89 if (nPages < 1 || nPages >= CPDF_Document::kPageMaxNum)
90 return false;
91
92 const uint32_t nFirstPageNum = m_pLinearized->GetFirstPageNo();
93 if (nFirstPageNum >= nPages)
94 return false;
95
96 if (!hStream || hStream->IsEOF())
97 return false;
98
99 const uint32_t kHeaderSize = 288;
100 if (hStream->BitsRemaining() < kHeaderSize)
101 return false;
102
103 // Item 1: The least number of objects in a page.
104 const uint32_t dwObjLeastNum = hStream->GetBits(32);
105 if (!dwObjLeastNum || dwObjLeastNum >= CPDF_Parser::kMaxObjectNumber)
106 return false;
107
108 // Item 2: The location of the first page's page object.
109 const FX_FILESIZE szFirstObjLoc =
110 HintsOffsetToFileOffset(hStream->GetBits(32));
111 if (!szFirstObjLoc)
112 return false;
113
114 m_szFirstPageObjOffset = szFirstObjLoc;
115
116 // Item 3: The number of bits needed to represent the difference
117 // between the greatest and least number of objects in a page.
118 const uint32_t dwDeltaObjectsBits = hStream->GetBits(16);
119 if (!IsValidPageOffsetHintTableBitCount(dwDeltaObjectsBits))
120 return false;
121
122 // Item 4: The least length of a page in bytes.
123 const uint32_t dwPageLeastLen = hStream->GetBits(32);
124 if (!dwPageLeastLen)
125 return false;
126
127 // Item 5: The number of bits needed to represent the difference
128 // between the greatest and least length of a page, in bytes.
129 const uint32_t dwDeltaPageLenBits = hStream->GetBits(16);
130 if (!IsValidPageOffsetHintTableBitCount(dwDeltaPageLenBits))
131 return false;
132
133 // Skip Item 6, 7, 8, 9 total 96 bits.
134 hStream->SkipBits(96);
135
136 // Item 10: The number of bits needed to represent the greatest
137 // number of shared object references.
138 const uint32_t dwSharedObjBits = hStream->GetBits(16);
139 if (!IsValidPageOffsetHintTableBitCount(dwSharedObjBits))
140 return false;
141
142 // Item 11: The number of bits needed to represent the numerically
143 // greatest shared object identifier used by the pages.
144 const uint32_t dwSharedIdBits = hStream->GetBits(16);
145 if (!IsValidPageOffsetHintTableBitCount(dwSharedIdBits))
146 return false;
147
148 // Item 12: The number of bits needed to represent the numerator of
149 // the fractional position for each shared object reference. For each
150 // shared object referenced from a page, there is an indication of
151 // where in the page's content stream the object is first referenced.
152 const uint32_t dwSharedNumeratorBits = hStream->GetBits(16);
153 if (dwSharedNumeratorBits > 32)
154 return false;
155
156 // Item 13: Skip Item 13 which has 16 bits.
157 hStream->SkipBits(16);
158
159 FX_SAFE_UINT32 required_bits = dwDeltaObjectsBits;
160 required_bits *= nPages;
161 if (!CanReadFromBitStream(hStream, required_bits))
162 return false;
163
164 m_PageInfos = std::vector<PageInfo>(nPages);
165 m_PageInfos[nFirstPageNum].set_start_obj_num(
166 m_pLinearized->GetFirstPageObjNum());
167 // The object number of remaining pages starts from 1.
168 FX_SAFE_UINT32 dwStartObjNum = 1;
169 for (uint32_t i = 0; i < nPages; ++i) {
170 FX_SAFE_UINT32 safeDeltaObj = hStream->GetBits(dwDeltaObjectsBits);
171 safeDeltaObj += dwObjLeastNum;
172 if (!safeDeltaObj.IsValid())
173 return false;
174 m_PageInfos[i].set_objects_count(safeDeltaObj.ValueOrDie());
175 if (i == nFirstPageNum)
176 continue;
177 m_PageInfos[i].set_start_obj_num(dwStartObjNum.ValueOrDie());
178 dwStartObjNum += m_PageInfos[i].objects_count();
179 if (!dwStartObjNum.IsValid() ||
180 dwStartObjNum.ValueOrDie() >= CPDF_Parser::kMaxObjectNumber) {
181 return false;
182 }
183 }
184 hStream->ByteAlign();
185
186 required_bits = dwDeltaPageLenBits;
187 required_bits *= nPages;
188 if (!CanReadFromBitStream(hStream, required_bits))
189 return false;
190
191 for (uint32_t i = 0; i < nPages; ++i) {
192 FX_SAFE_UINT32 safePageLen = hStream->GetBits(dwDeltaPageLenBits);
193 safePageLen += dwPageLeastLen;
194 if (!safePageLen.IsValid())
195 return false;
196 m_PageInfos[i].set_page_length(safePageLen.ValueOrDie());
197 }
198
199 DCHECK(m_szFirstPageObjOffset);
200 m_PageInfos[nFirstPageNum].set_page_offset(m_szFirstPageObjOffset);
201 FX_FILESIZE prev_page_end = m_pLinearized->GetFirstPageEndOffset();
202 for (uint32_t i = 0; i < nPages; ++i) {
203 if (i == nFirstPageNum)
204 continue;
205 m_PageInfos[i].set_page_offset(prev_page_end);
206 prev_page_end += m_PageInfos[i].page_length();
207 }
208 hStream->ByteAlign();
209
210 // Number of shared objects.
211 required_bits = dwSharedObjBits;
212 required_bits *= nPages;
213 if (!CanReadFromBitStream(hStream, required_bits))
214 return false;
215
216 std::vector<uint32_t> dwNSharedObjsArray(nPages);
217 for (uint32_t i = 0; i < nPages; i++)
218 dwNSharedObjsArray[i] = hStream->GetBits(dwSharedObjBits);
219 hStream->ByteAlign();
220
221 // Array of identifiers, size = nshared_objects.
222 for (uint32_t i = 0; i < nPages; i++) {
223 required_bits = dwSharedIdBits;
224 required_bits *= dwNSharedObjsArray[i];
225 if (!CanReadFromBitStream(hStream, required_bits))
226 return false;
227
228 for (uint32_t j = 0; j < dwNSharedObjsArray[i]; j++)
229 m_PageInfos[i].AddIdentifier(hStream->GetBits(dwSharedIdBits));
230 }
231 hStream->ByteAlign();
232
233 if (dwSharedNumeratorBits) {
234 for (uint32_t i = 0; i < nPages; i++) {
235 FX_SAFE_UINT32 safeSize = dwNSharedObjsArray[i];
236 safeSize *= dwSharedNumeratorBits;
237 if (!CanReadFromBitStream(hStream, safeSize))
238 return false;
239
240 hStream->SkipBits(safeSize.ValueOrDie());
241 }
242 hStream->ByteAlign();
243 }
244
245 FX_SAFE_UINT32 safeTotalPageLen = nPages;
246 safeTotalPageLen *= dwDeltaPageLenBits;
247 if (!CanReadFromBitStream(hStream, safeTotalPageLen))
248 return false;
249
250 hStream->SkipBits(safeTotalPageLen.ValueOrDie());
251 hStream->ByteAlign();
252 return true;
253 }
254
ReadSharedObjHintTable(CFX_BitStream * hStream,uint32_t offset)255 bool CPDF_HintTables::ReadSharedObjHintTable(CFX_BitStream* hStream,
256 uint32_t offset) {
257 if (!hStream || hStream->IsEOF())
258 return false;
259
260 FX_SAFE_UINT32 bit_offset = offset;
261 bit_offset *= 8;
262 if (!bit_offset.IsValid() || hStream->GetPos() > bit_offset.ValueOrDie())
263 return false;
264 hStream->SkipBits((bit_offset - hStream->GetPos()).ValueOrDie());
265
266 const uint32_t kHeaderSize = 192;
267 if (hStream->BitsRemaining() < kHeaderSize)
268 return false;
269
270 // Item 1: The object number of the first object in the shared objects
271 // section.
272 uint32_t dwFirstSharedObjNum = hStream->GetBits(32);
273 if (!dwFirstSharedObjNum)
274 return false;
275
276 // Item 2: The location of the first object in the shared objects section.
277 const FX_FILESIZE szFirstSharedObjLoc =
278 HintsOffsetToFileOffset(hStream->GetBits(32));
279 if (!szFirstSharedObjLoc)
280 return false;
281
282 // Item 3: The number of shared object entries for the first page.
283 m_nFirstPageSharedObjs = hStream->GetBits(32);
284
285 // Item 4: The number of shared object entries for the shared objects
286 // section, including the number of shared object entries for the first page.
287 uint32_t dwSharedObjTotal = hStream->GetBits(32);
288
289 // Item 5: The number of bits needed to represent the greatest number of
290 // objects in a shared object group.
291 uint32_t dwSharedObjNumBits = hStream->GetBits(16);
292 if (dwSharedObjNumBits > 32)
293 return false;
294
295 // Item 6: The least length of a shared object group in bytes.
296 uint32_t dwGroupLeastLen = hStream->GetBits(32);
297
298 // Item 7: The number of bits needed to represent the difference between the
299 // greatest and least length of a shared object group, in bytes.
300 uint32_t dwDeltaGroupLen = hStream->GetBits(16);
301
302 // Trying to decode more than 32 bits isn't going to work when we write into
303 // a uint32_t. Decoding 0 bits also makes no sense.
304 if (!IsValidPageOffsetHintTableBitCount(dwDeltaGroupLen))
305 return false;
306
307 if (dwFirstSharedObjNum >= CPDF_Parser::kMaxObjectNumber ||
308 m_nFirstPageSharedObjs >= CPDF_Parser::kMaxObjectNumber ||
309 dwSharedObjTotal >= CPDF_Parser::kMaxObjectNumber) {
310 return false;
311 }
312
313 FX_SAFE_UINT32 required_bits = dwSharedObjTotal;
314 required_bits *= dwDeltaGroupLen;
315 if (!CanReadFromBitStream(hStream, required_bits))
316 return false;
317
318 if (dwSharedObjTotal > 0) {
319 uint32_t dwLastSharedObj = dwSharedObjTotal - 1;
320 if (dwLastSharedObj > m_nFirstPageSharedObjs) {
321 FX_SAFE_UINT32 safeObjNum = dwFirstSharedObjNum;
322 safeObjNum += dwLastSharedObj - m_nFirstPageSharedObjs;
323 if (!safeObjNum.IsValid())
324 return false;
325 }
326 }
327
328 m_SharedObjGroupInfos.resize(dwSharedObjTotal);
329 // Table F.6 – Shared object hint table, shared object group entries:
330 // Item 1: A number that, when added to the least shared object
331 // group length.
332 FX_SAFE_FILESIZE prev_shared_group_end_offset = m_szFirstPageObjOffset;
333 for (uint32_t i = 0; i < dwSharedObjTotal; ++i) {
334 if (i == m_nFirstPageSharedObjs)
335 prev_shared_group_end_offset = szFirstSharedObjLoc;
336
337 FX_SAFE_UINT32 safeObjLen = hStream->GetBits(dwDeltaGroupLen);
338 safeObjLen += dwGroupLeastLen;
339 if (!safeObjLen.IsValid())
340 return false;
341
342 m_SharedObjGroupInfos[i].m_dwLength = safeObjLen.ValueOrDie();
343 m_SharedObjGroupInfos[i].m_szOffset =
344 prev_shared_group_end_offset.ValueOrDie();
345 prev_shared_group_end_offset += m_SharedObjGroupInfos[i].m_dwLength;
346 if (!prev_shared_group_end_offset.IsValid())
347 return false;
348 }
349
350 hStream->ByteAlign();
351 {
352 // Item 2: A flag indicating whether the shared object signature (item 3) is
353 // present.
354 uint32_t signature_count = 0;
355 for (uint32_t i = 0; i < dwSharedObjTotal; ++i) {
356 signature_count += hStream->GetBits(1);
357 }
358 hStream->ByteAlign();
359 // Item 3: (Only if item 2 is 1) The shared object signature, a 16-byte MD5
360 // hash that uniquely identifies the resource that the group of objects
361 // represents.
362 if (signature_count) {
363 required_bits = signature_count;
364 required_bits *= 128;
365 if (!CanReadFromBitStream(hStream, required_bits))
366 return false;
367
368 hStream->SkipBits(required_bits.ValueOrDie());
369 hStream->ByteAlign();
370 }
371 }
372 // Item 4: A number equal to 1 less than the number of objects in the group.
373 FX_SAFE_UINT32 cur_obj_num = m_pLinearized->GetFirstPageObjNum();
374 for (uint32_t i = 0; i < dwSharedObjTotal; ++i) {
375 if (i == m_nFirstPageSharedObjs)
376 cur_obj_num = dwFirstSharedObjNum;
377
378 FX_SAFE_UINT32 obj_count =
379 dwSharedObjNumBits ? hStream->GetBits(dwSharedObjNumBits) : 0;
380 obj_count += 1;
381 if (!obj_count.IsValid())
382 return false;
383
384 uint32_t obj_num = cur_obj_num.ValueOrDie();
385 cur_obj_num += obj_count.ValueOrDie();
386 if (!cur_obj_num.IsValid())
387 return false;
388
389 m_SharedObjGroupInfos[i].m_dwStartObjNum = obj_num;
390 m_SharedObjGroupInfos[i].m_dwObjectsCount = obj_count.ValueOrDie();
391 }
392
393 hStream->ByteAlign();
394 return true;
395 }
396
GetPagePos(uint32_t index,FX_FILESIZE * szPageStartPos,FX_FILESIZE * szPageLength,uint32_t * dwObjNum) const397 bool CPDF_HintTables::GetPagePos(uint32_t index,
398 FX_FILESIZE* szPageStartPos,
399 FX_FILESIZE* szPageLength,
400 uint32_t* dwObjNum) const {
401 if (index >= m_pLinearized->GetPageCount())
402 return false;
403
404 *szPageStartPos = m_PageInfos[index].page_offset();
405 *szPageLength = m_PageInfos[index].page_length();
406 *dwObjNum = m_PageInfos[index].start_obj_num();
407 return true;
408 }
409
CheckPage(uint32_t index)410 CPDF_DataAvail::DocAvailStatus CPDF_HintTables::CheckPage(uint32_t index) {
411 if (index == m_pLinearized->GetFirstPageNo())
412 return CPDF_DataAvail::kDataAvailable;
413
414 if (index >= m_pLinearized->GetPageCount())
415 return CPDF_DataAvail::kDataError;
416
417 const uint32_t dwLength = m_PageInfos[index].page_length();
418 if (!dwLength)
419 return CPDF_DataAvail::kDataError;
420
421 if (!m_pValidator->CheckDataRangeAndRequestIfUnavailable(
422 m_PageInfos[index].page_offset(), dwLength)) {
423 return CPDF_DataAvail::kDataNotAvailable;
424 }
425
426 // Download data of shared objects in the page.
427 for (const uint32_t dwIndex : m_PageInfos[index].Identifiers()) {
428 if (dwIndex >= m_SharedObjGroupInfos.size())
429 continue;
430 const SharedObjGroupInfo& shared_group_info =
431 m_SharedObjGroupInfos[dwIndex];
432
433 if (!shared_group_info.m_szOffset || !shared_group_info.m_dwLength)
434 return CPDF_DataAvail::kDataError;
435
436 if (!m_pValidator->CheckDataRangeAndRequestIfUnavailable(
437 shared_group_info.m_szOffset, shared_group_info.m_dwLength)) {
438 return CPDF_DataAvail::kDataNotAvailable;
439 }
440 }
441 return CPDF_DataAvail::kDataAvailable;
442 }
443
LoadHintStream(CPDF_Stream * pHintStream)444 bool CPDF_HintTables::LoadHintStream(CPDF_Stream* pHintStream) {
445 if (!pHintStream || !m_pLinearized->HasHintTable())
446 return false;
447
448 RetainPtr<const CPDF_Dictionary> pDict = pHintStream->GetDict();
449 if (!pDict)
450 return false;
451
452 RetainPtr<const CPDF_Object> pOffset = pDict->GetObjectFor("S");
453 if (!pOffset || !pOffset->IsNumber())
454 return false;
455
456 int shared_hint_table_offset = pOffset->GetInteger();
457 if (shared_hint_table_offset <= 0)
458 return false;
459
460 auto pAcc =
461 pdfium::MakeRetain<CPDF_StreamAcc>(pdfium::WrapRetain(pHintStream));
462 pAcc->LoadAllDataFiltered();
463
464 uint32_t size = pAcc->GetSize();
465 // The header section of page offset hint table is 36 bytes.
466 // The header section of shared object hint table is 24 bytes.
467 // Hint table has at least 60 bytes.
468 const uint32_t kMinStreamLength = 60;
469 if (size < kMinStreamLength)
470 return false;
471
472 FX_SAFE_UINT32 safe_shared_hint_table_offset = shared_hint_table_offset;
473 if (!safe_shared_hint_table_offset.IsValid() ||
474 size < safe_shared_hint_table_offset.ValueOrDie()) {
475 return false;
476 }
477
478 CFX_BitStream bs(pAcc->GetSpan().subspan(0, size));
479 return ReadPageHintTable(&bs) &&
480 ReadSharedObjHintTable(&bs, shared_hint_table_offset);
481 }
482
HintsOffsetToFileOffset(uint32_t hints_offset) const483 FX_FILESIZE CPDF_HintTables::HintsOffsetToFileOffset(
484 uint32_t hints_offset) const {
485 FX_SAFE_FILESIZE file_offset = hints_offset;
486 if (!file_offset.IsValid())
487 return 0;
488
489 // The resulting positions shall be interpreted as if the primary hint stream
490 // itself were not present. That is, a position greater than the hint stream
491 // offset shall have the hint stream length added to it to determine the
492 // actual offset relative to the beginning of the file.
493 // See ISO 32000-1:2008 spec, annex F.4 (Hint tables).
494 // Note: The PDF spec does not mention this, but positions equal to the hint
495 // stream offset also need to have the hint stream length added to it. e.g.
496 // There exists linearized PDFs generated by Adobe software that have this
497 // property.
498 if (file_offset.ValueOrDie() >= m_pLinearized->GetHintStart())
499 file_offset += m_pLinearized->GetHintLength();
500
501 return file_offset.ValueOrDefault(0);
502 }
503