xref: /aosp_15_r20/external/pdfium/core/fpdfapi/parser/cpdf_hint_tables.cpp (revision 3ac0a46f773bac49fa9476ec2b1cf3f8da5ec3a4)
1 // Copyright 2016 The PDFium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 
7 #include "core/fpdfapi/parser/cpdf_hint_tables.h"
8 
9 #include <limits>
10 
11 #include "core/fpdfapi/parser/cpdf_array.h"
12 #include "core/fpdfapi/parser/cpdf_data_avail.h"
13 #include "core/fpdfapi/parser/cpdf_dictionary.h"
14 #include "core/fpdfapi/parser/cpdf_document.h"
15 #include "core/fpdfapi/parser/cpdf_linearized_header.h"
16 #include "core/fpdfapi/parser/cpdf_parser.h"
17 #include "core/fpdfapi/parser/cpdf_read_validator.h"
18 #include "core/fpdfapi/parser/cpdf_stream.h"
19 #include "core/fpdfapi/parser/cpdf_stream_acc.h"
20 #include "core/fpdfapi/parser/cpdf_syntax_parser.h"
21 #include "core/fxcrt/cfx_bitstream.h"
22 #include "core/fxcrt/fx_safe_types.h"
23 #include "third_party/base/check.h"
24 #include "third_party/base/containers/span.h"
25 #include "third_party/base/numerics/safe_conversions.h"
26 
27 namespace {
28 
CanReadFromBitStream(const CFX_BitStream * hStream,const FX_SAFE_UINT32 & bits)29 bool CanReadFromBitStream(const CFX_BitStream* hStream,
30                           const FX_SAFE_UINT32& bits) {
31   return bits.IsValid() && hStream->BitsRemaining() >= bits.ValueOrDie();
32 }
33 
34 // Sanity check values from the page table header. The note in the PDF 1.7
35 // reference for Table F.3 says the valid range is only 0 through 32. Though 0
36 // is not useful either.
IsValidPageOffsetHintTableBitCount(uint32_t bits)37 bool IsValidPageOffsetHintTableBitCount(uint32_t bits) {
38   return bits > 0 && bits <= 32;
39 }
40 
41 }  // namespace
42 
43 CPDF_HintTables::PageInfo::PageInfo() = default;
44 CPDF_HintTables::PageInfo::~PageInfo() = default;
45 
46 //  static
Parse(CPDF_SyntaxParser * parser,const CPDF_LinearizedHeader * pLinearized)47 std::unique_ptr<CPDF_HintTables> CPDF_HintTables::Parse(
48     CPDF_SyntaxParser* parser,
49     const CPDF_LinearizedHeader* pLinearized) {
50   DCHECK(parser);
51   if (!pLinearized || pLinearized->GetPageCount() <= 1 ||
52       !pLinearized->HasHintTable()) {
53     return nullptr;
54   }
55 
56   const FX_FILESIZE szHintStart = pLinearized->GetHintStart();
57   const uint32_t szHintLength = pLinearized->GetHintLength();
58 
59   if (!parser->GetValidator()->CheckDataRangeAndRequestIfUnavailable(
60           szHintStart, szHintLength)) {
61     return nullptr;
62   }
63 
64   parser->SetPos(szHintStart);
65   RetainPtr<CPDF_Stream> hints_stream = ToStream(
66       parser->GetIndirectObject(nullptr, CPDF_SyntaxParser::ParseType::kLoose));
67 
68   if (!hints_stream)
69     return nullptr;
70 
71   auto pHintTables = std::make_unique<CPDF_HintTables>(
72       parser->GetValidator().Get(), pLinearized);
73   if (!pHintTables->LoadHintStream(hints_stream.Get()))
74     return nullptr;
75 
76   return pHintTables;
77 }
78 
CPDF_HintTables(CPDF_ReadValidator * pValidator,const CPDF_LinearizedHeader * pLinearized)79 CPDF_HintTables::CPDF_HintTables(CPDF_ReadValidator* pValidator,
80                                  const CPDF_LinearizedHeader* pLinearized)
81     : m_pValidator(pValidator), m_pLinearized(pLinearized) {
82   DCHECK(m_pLinearized);
83 }
84 
85 CPDF_HintTables::~CPDF_HintTables() = default;
86 
ReadPageHintTable(CFX_BitStream * hStream)87 bool CPDF_HintTables::ReadPageHintTable(CFX_BitStream* hStream) {
88   const uint32_t nPages = m_pLinearized->GetPageCount();
89   if (nPages < 1 || nPages >= CPDF_Document::kPageMaxNum)
90     return false;
91 
92   const uint32_t nFirstPageNum = m_pLinearized->GetFirstPageNo();
93   if (nFirstPageNum >= nPages)
94     return false;
95 
96   if (!hStream || hStream->IsEOF())
97     return false;
98 
99   const uint32_t kHeaderSize = 288;
100   if (hStream->BitsRemaining() < kHeaderSize)
101     return false;
102 
103   // Item 1: The least number of objects in a page.
104   const uint32_t dwObjLeastNum = hStream->GetBits(32);
105   if (!dwObjLeastNum || dwObjLeastNum >= CPDF_Parser::kMaxObjectNumber)
106     return false;
107 
108   // Item 2: The location of the first page's page object.
109   const FX_FILESIZE szFirstObjLoc =
110       HintsOffsetToFileOffset(hStream->GetBits(32));
111   if (!szFirstObjLoc)
112     return false;
113 
114   m_szFirstPageObjOffset = szFirstObjLoc;
115 
116   // Item 3: The number of bits needed to represent the difference
117   // between the greatest and least number of objects in a page.
118   const uint32_t dwDeltaObjectsBits = hStream->GetBits(16);
119   if (!IsValidPageOffsetHintTableBitCount(dwDeltaObjectsBits))
120     return false;
121 
122   // Item 4: The least length of a page in bytes.
123   const uint32_t dwPageLeastLen = hStream->GetBits(32);
124   if (!dwPageLeastLen)
125     return false;
126 
127   // Item 5: The number of bits needed to represent the difference
128   // between the greatest and least length of a page, in bytes.
129   const uint32_t dwDeltaPageLenBits = hStream->GetBits(16);
130   if (!IsValidPageOffsetHintTableBitCount(dwDeltaPageLenBits))
131     return false;
132 
133   // Skip Item 6, 7, 8, 9 total 96 bits.
134   hStream->SkipBits(96);
135 
136   // Item 10: The number of bits needed to represent the greatest
137   // number of shared object references.
138   const uint32_t dwSharedObjBits = hStream->GetBits(16);
139   if (!IsValidPageOffsetHintTableBitCount(dwSharedObjBits))
140     return false;
141 
142   // Item 11: The number of bits needed to represent the numerically
143   // greatest shared object identifier used by the pages.
144   const uint32_t dwSharedIdBits = hStream->GetBits(16);
145   if (!IsValidPageOffsetHintTableBitCount(dwSharedIdBits))
146     return false;
147 
148   // Item 12: The number of bits needed to represent the numerator of
149   // the fractional position for each shared object reference. For each
150   // shared object referenced from a page, there is an indication of
151   // where in the page's content stream the object is first referenced.
152   const uint32_t dwSharedNumeratorBits = hStream->GetBits(16);
153   if (dwSharedNumeratorBits > 32)
154     return false;
155 
156   // Item 13: Skip Item 13 which has 16 bits.
157   hStream->SkipBits(16);
158 
159   FX_SAFE_UINT32 required_bits = dwDeltaObjectsBits;
160   required_bits *= nPages;
161   if (!CanReadFromBitStream(hStream, required_bits))
162     return false;
163 
164   m_PageInfos = std::vector<PageInfo>(nPages);
165   m_PageInfos[nFirstPageNum].set_start_obj_num(
166       m_pLinearized->GetFirstPageObjNum());
167   // The object number of remaining pages starts from 1.
168   FX_SAFE_UINT32 dwStartObjNum = 1;
169   for (uint32_t i = 0; i < nPages; ++i) {
170     FX_SAFE_UINT32 safeDeltaObj = hStream->GetBits(dwDeltaObjectsBits);
171     safeDeltaObj += dwObjLeastNum;
172     if (!safeDeltaObj.IsValid())
173       return false;
174     m_PageInfos[i].set_objects_count(safeDeltaObj.ValueOrDie());
175     if (i == nFirstPageNum)
176       continue;
177     m_PageInfos[i].set_start_obj_num(dwStartObjNum.ValueOrDie());
178     dwStartObjNum += m_PageInfos[i].objects_count();
179     if (!dwStartObjNum.IsValid() ||
180         dwStartObjNum.ValueOrDie() >= CPDF_Parser::kMaxObjectNumber) {
181       return false;
182     }
183   }
184   hStream->ByteAlign();
185 
186   required_bits = dwDeltaPageLenBits;
187   required_bits *= nPages;
188   if (!CanReadFromBitStream(hStream, required_bits))
189     return false;
190 
191   for (uint32_t i = 0; i < nPages; ++i) {
192     FX_SAFE_UINT32 safePageLen = hStream->GetBits(dwDeltaPageLenBits);
193     safePageLen += dwPageLeastLen;
194     if (!safePageLen.IsValid())
195       return false;
196     m_PageInfos[i].set_page_length(safePageLen.ValueOrDie());
197   }
198 
199   DCHECK(m_szFirstPageObjOffset);
200   m_PageInfos[nFirstPageNum].set_page_offset(m_szFirstPageObjOffset);
201   FX_FILESIZE prev_page_end = m_pLinearized->GetFirstPageEndOffset();
202   for (uint32_t i = 0; i < nPages; ++i) {
203     if (i == nFirstPageNum)
204       continue;
205     m_PageInfos[i].set_page_offset(prev_page_end);
206     prev_page_end += m_PageInfos[i].page_length();
207   }
208   hStream->ByteAlign();
209 
210   // Number of shared objects.
211   required_bits = dwSharedObjBits;
212   required_bits *= nPages;
213   if (!CanReadFromBitStream(hStream, required_bits))
214     return false;
215 
216   std::vector<uint32_t> dwNSharedObjsArray(nPages);
217   for (uint32_t i = 0; i < nPages; i++)
218     dwNSharedObjsArray[i] = hStream->GetBits(dwSharedObjBits);
219   hStream->ByteAlign();
220 
221   // Array of identifiers, size = nshared_objects.
222   for (uint32_t i = 0; i < nPages; i++) {
223     required_bits = dwSharedIdBits;
224     required_bits *= dwNSharedObjsArray[i];
225     if (!CanReadFromBitStream(hStream, required_bits))
226       return false;
227 
228     for (uint32_t j = 0; j < dwNSharedObjsArray[i]; j++)
229       m_PageInfos[i].AddIdentifier(hStream->GetBits(dwSharedIdBits));
230   }
231   hStream->ByteAlign();
232 
233   if (dwSharedNumeratorBits) {
234     for (uint32_t i = 0; i < nPages; i++) {
235       FX_SAFE_UINT32 safeSize = dwNSharedObjsArray[i];
236       safeSize *= dwSharedNumeratorBits;
237       if (!CanReadFromBitStream(hStream, safeSize))
238         return false;
239 
240       hStream->SkipBits(safeSize.ValueOrDie());
241     }
242     hStream->ByteAlign();
243   }
244 
245   FX_SAFE_UINT32 safeTotalPageLen = nPages;
246   safeTotalPageLen *= dwDeltaPageLenBits;
247   if (!CanReadFromBitStream(hStream, safeTotalPageLen))
248     return false;
249 
250   hStream->SkipBits(safeTotalPageLen.ValueOrDie());
251   hStream->ByteAlign();
252   return true;
253 }
254 
ReadSharedObjHintTable(CFX_BitStream * hStream,uint32_t offset)255 bool CPDF_HintTables::ReadSharedObjHintTable(CFX_BitStream* hStream,
256                                              uint32_t offset) {
257   if (!hStream || hStream->IsEOF())
258     return false;
259 
260   FX_SAFE_UINT32 bit_offset = offset;
261   bit_offset *= 8;
262   if (!bit_offset.IsValid() || hStream->GetPos() > bit_offset.ValueOrDie())
263     return false;
264   hStream->SkipBits((bit_offset - hStream->GetPos()).ValueOrDie());
265 
266   const uint32_t kHeaderSize = 192;
267   if (hStream->BitsRemaining() < kHeaderSize)
268     return false;
269 
270   // Item 1: The object number of the first object in the shared objects
271   // section.
272   uint32_t dwFirstSharedObjNum = hStream->GetBits(32);
273   if (!dwFirstSharedObjNum)
274     return false;
275 
276   // Item 2: The location of the first object in the shared objects section.
277   const FX_FILESIZE szFirstSharedObjLoc =
278       HintsOffsetToFileOffset(hStream->GetBits(32));
279   if (!szFirstSharedObjLoc)
280     return false;
281 
282   // Item 3: The number of shared object entries for the first page.
283   m_nFirstPageSharedObjs = hStream->GetBits(32);
284 
285   // Item 4: The number of shared object entries for the shared objects
286   // section, including the number of shared object entries for the first page.
287   uint32_t dwSharedObjTotal = hStream->GetBits(32);
288 
289   // Item 5: The number of bits needed to represent the greatest number of
290   // objects in a shared object group.
291   uint32_t dwSharedObjNumBits = hStream->GetBits(16);
292   if (dwSharedObjNumBits > 32)
293     return false;
294 
295   // Item 6: The least length of a shared object group in bytes.
296   uint32_t dwGroupLeastLen = hStream->GetBits(32);
297 
298   // Item 7: The number of bits needed to represent the difference between the
299   // greatest and least length of a shared object group, in bytes.
300   uint32_t dwDeltaGroupLen = hStream->GetBits(16);
301 
302   // Trying to decode more than 32 bits isn't going to work when we write into
303   // a uint32_t. Decoding 0 bits also makes no sense.
304   if (!IsValidPageOffsetHintTableBitCount(dwDeltaGroupLen))
305     return false;
306 
307   if (dwFirstSharedObjNum >= CPDF_Parser::kMaxObjectNumber ||
308       m_nFirstPageSharedObjs >= CPDF_Parser::kMaxObjectNumber ||
309       dwSharedObjTotal >= CPDF_Parser::kMaxObjectNumber) {
310     return false;
311   }
312 
313   FX_SAFE_UINT32 required_bits = dwSharedObjTotal;
314   required_bits *= dwDeltaGroupLen;
315   if (!CanReadFromBitStream(hStream, required_bits))
316     return false;
317 
318   if (dwSharedObjTotal > 0) {
319     uint32_t dwLastSharedObj = dwSharedObjTotal - 1;
320     if (dwLastSharedObj > m_nFirstPageSharedObjs) {
321       FX_SAFE_UINT32 safeObjNum = dwFirstSharedObjNum;
322       safeObjNum += dwLastSharedObj - m_nFirstPageSharedObjs;
323       if (!safeObjNum.IsValid())
324         return false;
325     }
326   }
327 
328   m_SharedObjGroupInfos.resize(dwSharedObjTotal);
329   // Table F.6 –  Shared object hint table, shared object group entries:
330   // Item 1: A number that, when added to the least shared object
331   // group length.
332   FX_SAFE_FILESIZE prev_shared_group_end_offset = m_szFirstPageObjOffset;
333   for (uint32_t i = 0; i < dwSharedObjTotal; ++i) {
334     if (i == m_nFirstPageSharedObjs)
335       prev_shared_group_end_offset = szFirstSharedObjLoc;
336 
337     FX_SAFE_UINT32 safeObjLen = hStream->GetBits(dwDeltaGroupLen);
338     safeObjLen += dwGroupLeastLen;
339     if (!safeObjLen.IsValid())
340       return false;
341 
342     m_SharedObjGroupInfos[i].m_dwLength = safeObjLen.ValueOrDie();
343     m_SharedObjGroupInfos[i].m_szOffset =
344         prev_shared_group_end_offset.ValueOrDie();
345     prev_shared_group_end_offset += m_SharedObjGroupInfos[i].m_dwLength;
346     if (!prev_shared_group_end_offset.IsValid())
347       return false;
348   }
349 
350   hStream->ByteAlign();
351   {
352     // Item 2: A flag indicating whether the shared object signature (item 3) is
353     // present.
354     uint32_t signature_count = 0;
355     for (uint32_t i = 0; i < dwSharedObjTotal; ++i) {
356       signature_count += hStream->GetBits(1);
357     }
358     hStream->ByteAlign();
359     // Item 3: (Only if item 2 is 1) The shared object signature, a 16-byte MD5
360     // hash that uniquely identifies the resource that the group of objects
361     // represents.
362     if (signature_count) {
363       required_bits = signature_count;
364       required_bits *= 128;
365       if (!CanReadFromBitStream(hStream, required_bits))
366         return false;
367 
368       hStream->SkipBits(required_bits.ValueOrDie());
369       hStream->ByteAlign();
370     }
371   }
372   // Item 4: A number equal to 1 less than the number of objects in the group.
373   FX_SAFE_UINT32 cur_obj_num = m_pLinearized->GetFirstPageObjNum();
374   for (uint32_t i = 0; i < dwSharedObjTotal; ++i) {
375     if (i == m_nFirstPageSharedObjs)
376       cur_obj_num = dwFirstSharedObjNum;
377 
378     FX_SAFE_UINT32 obj_count =
379         dwSharedObjNumBits ? hStream->GetBits(dwSharedObjNumBits) : 0;
380     obj_count += 1;
381     if (!obj_count.IsValid())
382       return false;
383 
384     uint32_t obj_num = cur_obj_num.ValueOrDie();
385     cur_obj_num += obj_count.ValueOrDie();
386     if (!cur_obj_num.IsValid())
387       return false;
388 
389     m_SharedObjGroupInfos[i].m_dwStartObjNum = obj_num;
390     m_SharedObjGroupInfos[i].m_dwObjectsCount = obj_count.ValueOrDie();
391   }
392 
393   hStream->ByteAlign();
394   return true;
395 }
396 
GetPagePos(uint32_t index,FX_FILESIZE * szPageStartPos,FX_FILESIZE * szPageLength,uint32_t * dwObjNum) const397 bool CPDF_HintTables::GetPagePos(uint32_t index,
398                                  FX_FILESIZE* szPageStartPos,
399                                  FX_FILESIZE* szPageLength,
400                                  uint32_t* dwObjNum) const {
401   if (index >= m_pLinearized->GetPageCount())
402     return false;
403 
404   *szPageStartPos = m_PageInfos[index].page_offset();
405   *szPageLength = m_PageInfos[index].page_length();
406   *dwObjNum = m_PageInfos[index].start_obj_num();
407   return true;
408 }
409 
CheckPage(uint32_t index)410 CPDF_DataAvail::DocAvailStatus CPDF_HintTables::CheckPage(uint32_t index) {
411   if (index == m_pLinearized->GetFirstPageNo())
412     return CPDF_DataAvail::kDataAvailable;
413 
414   if (index >= m_pLinearized->GetPageCount())
415     return CPDF_DataAvail::kDataError;
416 
417   const uint32_t dwLength = m_PageInfos[index].page_length();
418   if (!dwLength)
419     return CPDF_DataAvail::kDataError;
420 
421   if (!m_pValidator->CheckDataRangeAndRequestIfUnavailable(
422           m_PageInfos[index].page_offset(), dwLength)) {
423     return CPDF_DataAvail::kDataNotAvailable;
424   }
425 
426   // Download data of shared objects in the page.
427   for (const uint32_t dwIndex : m_PageInfos[index].Identifiers()) {
428     if (dwIndex >= m_SharedObjGroupInfos.size())
429       continue;
430     const SharedObjGroupInfo& shared_group_info =
431         m_SharedObjGroupInfos[dwIndex];
432 
433     if (!shared_group_info.m_szOffset || !shared_group_info.m_dwLength)
434       return CPDF_DataAvail::kDataError;
435 
436     if (!m_pValidator->CheckDataRangeAndRequestIfUnavailable(
437             shared_group_info.m_szOffset, shared_group_info.m_dwLength)) {
438       return CPDF_DataAvail::kDataNotAvailable;
439     }
440   }
441   return CPDF_DataAvail::kDataAvailable;
442 }
443 
LoadHintStream(CPDF_Stream * pHintStream)444 bool CPDF_HintTables::LoadHintStream(CPDF_Stream* pHintStream) {
445   if (!pHintStream || !m_pLinearized->HasHintTable())
446     return false;
447 
448   RetainPtr<const CPDF_Dictionary> pDict = pHintStream->GetDict();
449   if (!pDict)
450     return false;
451 
452   RetainPtr<const CPDF_Object> pOffset = pDict->GetObjectFor("S");
453   if (!pOffset || !pOffset->IsNumber())
454     return false;
455 
456   int shared_hint_table_offset = pOffset->GetInteger();
457   if (shared_hint_table_offset <= 0)
458     return false;
459 
460   auto pAcc =
461       pdfium::MakeRetain<CPDF_StreamAcc>(pdfium::WrapRetain(pHintStream));
462   pAcc->LoadAllDataFiltered();
463 
464   uint32_t size = pAcc->GetSize();
465   // The header section of page offset hint table is 36 bytes.
466   // The header section of shared object hint table is 24 bytes.
467   // Hint table has at least 60 bytes.
468   const uint32_t kMinStreamLength = 60;
469   if (size < kMinStreamLength)
470     return false;
471 
472   FX_SAFE_UINT32 safe_shared_hint_table_offset = shared_hint_table_offset;
473   if (!safe_shared_hint_table_offset.IsValid() ||
474       size < safe_shared_hint_table_offset.ValueOrDie()) {
475     return false;
476   }
477 
478   CFX_BitStream bs(pAcc->GetSpan().subspan(0, size));
479   return ReadPageHintTable(&bs) &&
480          ReadSharedObjHintTable(&bs, shared_hint_table_offset);
481 }
482 
HintsOffsetToFileOffset(uint32_t hints_offset) const483 FX_FILESIZE CPDF_HintTables::HintsOffsetToFileOffset(
484     uint32_t hints_offset) const {
485   FX_SAFE_FILESIZE file_offset = hints_offset;
486   if (!file_offset.IsValid())
487     return 0;
488 
489   // The resulting positions shall be interpreted as if the primary hint stream
490   // itself were not present. That is, a position greater than the hint stream
491   // offset shall have the hint stream length added to it to determine the
492   // actual offset relative to the beginning of the file.
493   // See ISO 32000-1:2008 spec, annex F.4 (Hint tables).
494   // Note: The PDF spec does not mention this, but positions equal to the hint
495   // stream offset also need to have the hint stream length added to it. e.g.
496   // There exists linearized PDFs generated by Adobe software that have this
497   // property.
498   if (file_offset.ValueOrDie() >= m_pLinearized->GetHintStart())
499     file_offset += m_pLinearized->GetHintLength();
500 
501   return file_offset.ValueOrDefault(0);
502 }
503