xref: /aosp_15_r20/external/pdfium/core/fxcodec/basic/basicmodule.cpp (revision 3ac0a46f773bac49fa9476ec2b1cf3f8da5ec3a4)
1 // Copyright 2019 The PDFium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "core/fxcodec/basic/basicmodule.h"
6 
7 #include <stdint.h>
8 
9 #include <algorithm>
10 #include <utility>
11 
12 #include "core/fxcodec/scanlinedecoder.h"
13 #include "core/fxcrt/data_vector.h"
14 #include "core/fxcrt/fx_memory_wrappers.h"
15 #include "core/fxcrt/fx_safe_types.h"
16 #include "core/fxcrt/fx_system.h"
17 #include "core/fxcrt/span_util.h"
18 #include "third_party/base/check.h"
19 #include "third_party/base/numerics/safe_conversions.h"
20 
21 namespace fxcodec {
22 
23 namespace {
24 
25 class RLScanlineDecoder final : public ScanlineDecoder {
26  public:
27   RLScanlineDecoder();
28   ~RLScanlineDecoder() override;
29 
30   bool Create(pdfium::span<const uint8_t> src_buf,
31               int width,
32               int height,
33               int nComps,
34               int bpc);
35 
36   // ScanlineDecoder:
37   bool Rewind() override;
38   pdfium::span<uint8_t> GetNextLine() override;
39   uint32_t GetSrcOffset() override;
40 
41  private:
42   bool CheckDestSize();
43   void GetNextOperator();
44   void UpdateOperator(uint8_t used_bytes);
45 
46   DataVector<uint8_t> m_Scanline;
47   pdfium::span<const uint8_t> m_SrcBuf;
48   size_t m_dwLineBytes = 0;
49   size_t m_SrcOffset = 0;
50   bool m_bEOD = false;
51   uint8_t m_Operator = 0;
52 };
53 
54 RLScanlineDecoder::RLScanlineDecoder() = default;
55 
~RLScanlineDecoder()56 RLScanlineDecoder::~RLScanlineDecoder() {
57   // Span in superclass can't outlive our buffer.
58   m_pLastScanline = pdfium::span<uint8_t>();
59 }
60 
CheckDestSize()61 bool RLScanlineDecoder::CheckDestSize() {
62   size_t i = 0;
63   uint32_t old_size = 0;
64   uint32_t dest_size = 0;
65   while (i < m_SrcBuf.size()) {
66     if (m_SrcBuf[i] < 128) {
67       old_size = dest_size;
68       dest_size += m_SrcBuf[i] + 1;
69       if (dest_size < old_size) {
70         return false;
71       }
72       i += m_SrcBuf[i] + 2;
73     } else if (m_SrcBuf[i] > 128) {
74       old_size = dest_size;
75       dest_size += 257 - m_SrcBuf[i];
76       if (dest_size < old_size) {
77         return false;
78       }
79       i += 2;
80     } else {
81       break;
82     }
83   }
84   if (((uint32_t)m_OrigWidth * m_nComps * m_bpc * m_OrigHeight + 7) / 8 >
85       dest_size) {
86     return false;
87   }
88   return true;
89 }
90 
Create(pdfium::span<const uint8_t> src_buf,int width,int height,int nComps,int bpc)91 bool RLScanlineDecoder::Create(pdfium::span<const uint8_t> src_buf,
92                                int width,
93                                int height,
94                                int nComps,
95                                int bpc) {
96   m_SrcBuf = src_buf;
97   m_OutputWidth = m_OrigWidth = width;
98   m_OutputHeight = m_OrigHeight = height;
99   m_nComps = nComps;
100   m_bpc = bpc;
101   // Aligning the pitch to 4 bytes requires an integer overflow check.
102   FX_SAFE_UINT32 pitch = width;
103   pitch *= nComps;
104   pitch *= bpc;
105   pitch += 31;
106   pitch /= 32;
107   pitch *= 4;
108   if (!pitch.IsValid()) {
109     return false;
110   }
111   m_Pitch = pitch.ValueOrDie();
112   // Overflow should already have been checked before this is called.
113   m_dwLineBytes = (static_cast<uint32_t>(width) * nComps * bpc + 7) / 8;
114   m_Scanline.resize(m_Pitch);
115   return CheckDestSize();
116 }
117 
Rewind()118 bool RLScanlineDecoder::Rewind() {
119   fxcrt::spanclr(pdfium::make_span(m_Scanline));
120   m_SrcOffset = 0;
121   m_bEOD = false;
122   m_Operator = 0;
123   return true;
124 }
125 
GetNextLine()126 pdfium::span<uint8_t> RLScanlineDecoder::GetNextLine() {
127   if (m_SrcOffset == 0) {
128     GetNextOperator();
129   } else if (m_bEOD) {
130     return pdfium::span<uint8_t>();
131   }
132   uint32_t col_pos = 0;
133   bool eol = false;
134   auto scan_span = pdfium::make_span(m_Scanline);
135   fxcrt::spanclr(scan_span);
136   while (m_SrcOffset < m_SrcBuf.size() && !eol) {
137     if (m_Operator < 128) {
138       uint32_t copy_len = m_Operator + 1;
139       if (col_pos + copy_len >= m_dwLineBytes) {
140         copy_len =
141             pdfium::base::checked_cast<uint32_t>(m_dwLineBytes - col_pos);
142         eol = true;
143       }
144       if (copy_len >= m_SrcBuf.size() - m_SrcOffset) {
145         copy_len =
146             pdfium::base::checked_cast<uint32_t>(m_SrcBuf.size() - m_SrcOffset);
147         m_bEOD = true;
148       }
149       auto copy_span = m_SrcBuf.subspan(m_SrcOffset, copy_len);
150       fxcrt::spancpy(scan_span.subspan(col_pos), copy_span);
151       col_pos += copy_len;
152       UpdateOperator((uint8_t)copy_len);
153     } else if (m_Operator > 128) {
154       int fill = 0;
155       if (m_SrcOffset - 1 < m_SrcBuf.size() - 1) {
156         fill = m_SrcBuf[m_SrcOffset];
157       }
158       uint32_t duplicate_len = 257 - m_Operator;
159       if (col_pos + duplicate_len >= m_dwLineBytes) {
160         duplicate_len =
161             pdfium::base::checked_cast<uint32_t>(m_dwLineBytes - col_pos);
162         eol = true;
163       }
164       fxcrt::spanset(scan_span.subspan(col_pos, duplicate_len), fill);
165       col_pos += duplicate_len;
166       UpdateOperator((uint8_t)duplicate_len);
167     } else {
168       m_bEOD = true;
169       break;
170     }
171   }
172   return m_Scanline;
173 }
174 
GetSrcOffset()175 uint32_t RLScanlineDecoder::GetSrcOffset() {
176   return pdfium::base::checked_cast<uint32_t>(m_SrcOffset);
177 }
178 
GetNextOperator()179 void RLScanlineDecoder::GetNextOperator() {
180   if (m_SrcOffset >= m_SrcBuf.size()) {
181     m_Operator = 128;
182     return;
183   }
184   m_Operator = m_SrcBuf[m_SrcOffset];
185   m_SrcOffset++;
186 }
UpdateOperator(uint8_t used_bytes)187 void RLScanlineDecoder::UpdateOperator(uint8_t used_bytes) {
188   if (used_bytes == 0) {
189     return;
190   }
191   if (m_Operator < 128) {
192     DCHECK((uint32_t)m_Operator + 1 >= used_bytes);
193     if (used_bytes == m_Operator + 1) {
194       m_SrcOffset += used_bytes;
195       GetNextOperator();
196       return;
197     }
198     m_Operator -= used_bytes;
199     m_SrcOffset += used_bytes;
200     if (m_SrcOffset >= m_SrcBuf.size()) {
201       m_Operator = 128;
202     }
203     return;
204   }
205   uint8_t count = 257 - m_Operator;
206   DCHECK((uint32_t)count >= used_bytes);
207   if (used_bytes == count) {
208     m_SrcOffset++;
209     GetNextOperator();
210     return;
211   }
212   count -= used_bytes;
213   m_Operator = 257 - count;
214 }
215 
216 }  // namespace
217 
218 // static
CreateRunLengthDecoder(pdfium::span<const uint8_t> src_buf,int width,int height,int nComps,int bpc)219 std::unique_ptr<ScanlineDecoder> BasicModule::CreateRunLengthDecoder(
220     pdfium::span<const uint8_t> src_buf,
221     int width,
222     int height,
223     int nComps,
224     int bpc) {
225   auto pDecoder = std::make_unique<RLScanlineDecoder>();
226   if (!pDecoder->Create(src_buf, width, height, nComps, bpc))
227     return nullptr;
228 
229   return std::move(pDecoder);
230 }
231 
232 // static
RunLengthEncode(pdfium::span<const uint8_t> src_span)233 DataVector<uint8_t> BasicModule::RunLengthEncode(
234     pdfium::span<const uint8_t> src_span) {
235   if (src_span.empty())
236     return {};
237 
238   // Handle edge case.
239   if (src_span.size() == 1)
240     return {0, src_span[0], 128};
241 
242   // Worst case: 1 nonmatch, 2 match, 1 nonmatch, 2 match, etc. This becomes
243   // 4 output chars for every 3 input, plus up to 4 more for the 1-2 chars
244   // rounded off plus the terminating character.
245   FX_SAFE_SIZE_T estimated_size = src_span.size();
246   estimated_size += 2;
247   estimated_size /= 3;
248   estimated_size *= 4;
249   estimated_size += 1;
250   DataVector<uint8_t> result(estimated_size.ValueOrDie());
251 
252   // Set up span and counts.
253   auto result_span = pdfium::make_span(result);
254   uint32_t run_start = 0;
255   uint32_t run_end = 1;
256   uint8_t x = src_span[run_start];
257   uint8_t y = src_span[run_end];
258   while (run_end < src_span.size()) {
259     size_t max_len = std::min<size_t>(128, src_span.size() - run_start);
260     while (x == y && (run_end - run_start < max_len - 1))
261       y = src_span[++run_end];
262 
263     // Reached end with matched run. Update variables to expected values.
264     if (x == y) {
265       run_end++;
266       if (run_end < src_span.size())
267         y = src_span[run_end];
268     }
269     if (run_end - run_start > 1) {  // Matched run but not at end of input.
270       result_span[0] = 257 - (run_end - run_start);
271       result_span[1] = x;
272       x = y;
273       run_start = run_end;
274       run_end++;
275       if (run_end < src_span.size())
276         y = src_span[run_end];
277       result_span = result_span.subspan(2);
278       continue;
279     }
280     // Mismatched run
281     while (x != y && run_end <= run_start + max_len) {
282       result_span[run_end - run_start] = x;
283       x = y;
284       run_end++;
285       if (run_end == src_span.size()) {
286         if (run_end <= run_start + max_len) {
287           result_span[run_end - run_start] = x;
288           run_end++;
289         }
290         break;
291       }
292       y = src_span[run_end];
293     }
294     result_span[0] = run_end - run_start - 2;
295     result_span = result_span.subspan(run_end - run_start);
296     run_start = run_end - 1;
297   }
298   if (run_start < src_span.size()) {  // 1 leftover character
299     result_span[0] = 0;
300     result_span[1] = x;
301     result_span = result_span.subspan(2);
302   }
303   result_span[0] = 128;
304   size_t new_size = 1 + result.size() - result_span.size();
305   CHECK_LE(new_size, result.size());
306   result.resize(new_size);
307   return result;
308 }
309 
310 // static
A85Encode(pdfium::span<const uint8_t> src_span)311 DataVector<uint8_t> BasicModule::A85Encode(
312     pdfium::span<const uint8_t> src_span) {
313   DataVector<uint8_t> result;
314   if (src_span.empty())
315     return result;
316 
317   // Worst case: 5 output for each 4 input (plus up to 4 from leftover), plus
318   // 2 character new lines each 75 output chars plus 2 termination chars. May
319   // have fewer if there are special "z" chars.
320   FX_SAFE_SIZE_T estimated_size = src_span.size();
321   estimated_size /= 4;
322   estimated_size *= 5;
323   estimated_size += 4;
324   estimated_size += src_span.size() / 30;
325   estimated_size += 2;
326   result.resize(estimated_size.ValueOrDie());
327 
328   // Set up span and counts.
329   auto result_span = pdfium::make_span(result);
330   uint32_t pos = 0;
331   uint32_t line_length = 0;
332   while (src_span.size() >= 4 && pos < src_span.size() - 3) {
333     auto val_span = src_span.subspan(pos, 4);
334     uint32_t val = FXSYS_UINT32_GET_MSBFIRST(val_span);
335     pos += 4;
336     if (val == 0) {  // All zero special case
337       result_span[0] = 'z';
338       result_span = result_span.subspan(1);
339       line_length++;
340     } else {  // Compute base 85 characters and add 33.
341       for (int i = 4; i >= 0; i--) {
342         result_span[i] = (val % 85) + 33;
343         val /= 85;
344       }
345       result_span = result_span.subspan(5);
346       line_length += 5;
347     }
348     if (line_length >= 75) {  // Add a return.
349       result_span[0] = '\r';
350       result_span[1] = '\n';
351       result_span = result_span.subspan(2);
352       line_length = 0;
353     }
354   }
355   if (pos < src_span.size()) {  // Leftover bytes
356     uint32_t val = 0;
357     int count = 0;
358     while (pos < src_span.size()) {
359       val += (uint32_t)(src_span[pos]) << (8 * (3 - count));
360       count++;
361       pos++;
362     }
363     for (int i = 4; i >= 0; i--) {
364       if (i <= count)
365         result_span[i] = (val % 85) + 33;
366       val /= 85;
367     }
368     result_span = result_span.subspan(count + 1);
369   }
370 
371   // Terminating characters.
372   result_span[0] = '~';
373   result_span[1] = '>';
374   size_t new_size = 2 + result.size() - result_span.size();
375   CHECK_LE(new_size, result.size());
376   result.resize(new_size);
377   return result;
378 }
379 
380 }  // namespace fxcodec
381