xref: /aosp_15_r20/external/pdfium/samples/helpers/write.cc (revision 3ac0a46f773bac49fa9476ec2b1cf3f8da5ec3a4)
1 // Copyright 2018 The PDFium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "samples/helpers/write.h"
6 
7 #include <limits.h>
8 
9 #include <sstream>
10 #include <string>
11 #include <utility>
12 #include <vector>
13 
14 #include "public/cpp/fpdf_scopers.h"
15 #include "public/fpdf_annot.h"
16 #include "public/fpdf_attachment.h"
17 #include "public/fpdf_edit.h"
18 #include "public/fpdf_thumbnail.h"
19 #include "testing/fx_string_testhelpers.h"
20 #include "testing/image_diff/image_diff_png.h"
21 #include "third_party/base/notreached.h"
22 
23 #ifdef PDF_ENABLE_SKIA
24 #include "third_party/skia/include/core/SkPicture.h"  // nogncheck
25 #include "third_party/skia/include/core/SkStream.h"   // nogncheck
26 #endif
27 
28 namespace {
29 
CheckDimensions(int stride,int width,int height)30 bool CheckDimensions(int stride, int width, int height) {
31   if (stride < 0 || width < 0 || height < 0) {
32     return false;
33   }
34   if (height > 0 && stride > INT_MAX / height) {
35     return false;
36   }
37   return true;
38 }
39 
AnnotSubtypeToCString(FPDF_ANNOTATION_SUBTYPE subtype)40 const char* AnnotSubtypeToCString(FPDF_ANNOTATION_SUBTYPE subtype) {
41   if (subtype == FPDF_ANNOT_TEXT) {
42     return "Text";
43   }
44   if (subtype == FPDF_ANNOT_LINK) {
45     return "Link";
46   }
47   if (subtype == FPDF_ANNOT_FREETEXT) {
48     return "FreeText";
49   }
50   if (subtype == FPDF_ANNOT_LINE) {
51     return "Line";
52   }
53   if (subtype == FPDF_ANNOT_SQUARE) {
54     return "Square";
55   }
56   if (subtype == FPDF_ANNOT_CIRCLE) {
57     return "Circle";
58   }
59   if (subtype == FPDF_ANNOT_POLYGON) {
60     return "Polygon";
61   }
62   if (subtype == FPDF_ANNOT_POLYLINE) {
63     return "PolyLine";
64   }
65   if (subtype == FPDF_ANNOT_HIGHLIGHT) {
66     return "Highlight";
67   }
68   if (subtype == FPDF_ANNOT_UNDERLINE) {
69     return "Underline";
70   }
71   if (subtype == FPDF_ANNOT_SQUIGGLY) {
72     return "Squiggly";
73   }
74   if (subtype == FPDF_ANNOT_STRIKEOUT) {
75     return "StrikeOut";
76   }
77   if (subtype == FPDF_ANNOT_STAMP) {
78     return "Stamp";
79   }
80   if (subtype == FPDF_ANNOT_CARET) {
81     return "Caret";
82   }
83   if (subtype == FPDF_ANNOT_INK) {
84     return "Ink";
85   }
86   if (subtype == FPDF_ANNOT_POPUP) {
87     return "Popup";
88   }
89   if (subtype == FPDF_ANNOT_FILEATTACHMENT) {
90     return "FileAttachment";
91   }
92   if (subtype == FPDF_ANNOT_SOUND) {
93     return "Sound";
94   }
95   if (subtype == FPDF_ANNOT_MOVIE) {
96     return "Movie";
97   }
98   if (subtype == FPDF_ANNOT_WIDGET) {
99     return "Widget";
100   }
101   if (subtype == FPDF_ANNOT_SCREEN) {
102     return "Screen";
103   }
104   if (subtype == FPDF_ANNOT_PRINTERMARK) {
105     return "PrinterMark";
106   }
107   if (subtype == FPDF_ANNOT_TRAPNET) {
108     return "TrapNet";
109   }
110   if (subtype == FPDF_ANNOT_WATERMARK) {
111     return "Watermark";
112   }
113   if (subtype == FPDF_ANNOT_THREED) {
114     return "3D";
115   }
116   if (subtype == FPDF_ANNOT_RICHMEDIA) {
117     return "RichMedia";
118   }
119   if (subtype == FPDF_ANNOT_XFAWIDGET) {
120     return "XFAWidget";
121   }
122   NOTREACHED_NORETURN();
123 }
124 
AppendFlagString(const char * flag,std::string * output)125 void AppendFlagString(const char* flag, std::string* output) {
126   if (!output->empty()) {
127     *output += ", ";
128   }
129   *output += flag;
130 }
131 
AnnotFlagsToString(int flags)132 std::string AnnotFlagsToString(int flags) {
133   std::string str;
134   if (flags & FPDF_ANNOT_FLAG_INVISIBLE) {
135     AppendFlagString("Invisible", &str);
136   }
137   if (flags & FPDF_ANNOT_FLAG_HIDDEN) {
138     AppendFlagString("Hidden", &str);
139   }
140   if (flags & FPDF_ANNOT_FLAG_PRINT) {
141     AppendFlagString("Print", &str);
142   }
143   if (flags & FPDF_ANNOT_FLAG_NOZOOM) {
144     AppendFlagString("NoZoom", &str);
145   }
146   if (flags & FPDF_ANNOT_FLAG_NOROTATE) {
147     AppendFlagString("NoRotate", &str);
148   }
149   if (flags & FPDF_ANNOT_FLAG_NOVIEW) {
150     AppendFlagString("NoView", &str);
151   }
152   if (flags & FPDF_ANNOT_FLAG_READONLY) {
153     AppendFlagString("ReadOnly", &str);
154   }
155   if (flags & FPDF_ANNOT_FLAG_LOCKED) {
156     AppendFlagString("Locked", &str);
157   }
158   if (flags & FPDF_ANNOT_FLAG_TOGGLENOVIEW) {
159     AppendFlagString("ToggleNoView", &str);
160   }
161   return str;
162 }
163 
PageObjectTypeToCString(int type)164 const char* PageObjectTypeToCString(int type) {
165   if (type == FPDF_PAGEOBJ_TEXT) {
166     return "Text";
167   }
168   if (type == FPDF_PAGEOBJ_PATH) {
169     return "Path";
170   }
171   if (type == FPDF_PAGEOBJ_IMAGE) {
172     return "Image";
173   }
174   if (type == FPDF_PAGEOBJ_SHADING) {
175     return "Shading";
176   }
177   if (type == FPDF_PAGEOBJ_FORM) {
178     return "Form";
179   }
180   NOTREACHED_NORETURN();
181 }
182 
EncodePng(pdfium::span<const uint8_t> input,int width,int height,int stride,int format)183 std::vector<uint8_t> EncodePng(pdfium::span<const uint8_t> input,
184                                int width,
185                                int height,
186                                int stride,
187                                int format) {
188   std::vector<uint8_t> png;
189   switch (format) {
190     case FPDFBitmap_Unknown:
191       break;
192     case FPDFBitmap_Gray:
193       png = image_diff_png::EncodeGrayPNG(input, width, height, stride);
194       break;
195     case FPDFBitmap_BGR:
196       png = image_diff_png::EncodeBGRPNG(input, width, height, stride);
197       break;
198     case FPDFBitmap_BGRx:
199       png = image_diff_png::EncodeBGRAPNG(input, width, height, stride,
200                                           /*discard_transparency=*/true);
201       break;
202     case FPDFBitmap_BGRA:
203       png = image_diff_png::EncodeBGRAPNG(input, width, height, stride,
204                                           /*discard_transparency=*/false);
205       break;
206     default:
207       NOTREACHED_NORETURN();
208   }
209   return png;
210 }
211 
212 #ifdef _WIN32
EnhMetaFileProc(HDC hdc,HANDLETABLE * handle_table,const ENHMETARECORD * record,int objects_count,LPARAM param)213 int CALLBACK EnhMetaFileProc(HDC hdc,
214                              HANDLETABLE* handle_table,
215                              const ENHMETARECORD* record,
216                              int objects_count,
217                              LPARAM param) {
218   std::vector<const ENHMETARECORD*>& items =
219       *reinterpret_cast<std::vector<const ENHMETARECORD*>*>(param);
220   items.push_back(record);
221   return 1;
222 }
223 #endif  // _WIN32
224 
GeneratePageOutputFilename(const char * pdf_name,int page_num,const char * extension)225 std::string GeneratePageOutputFilename(const char* pdf_name,
226                                        int page_num,
227                                        const char* extension) {
228   std::ostringstream stream;
229   stream << pdf_name << "." << page_num << "." << extension;
230   std::string filename = stream.str();
231   if (filename.size() >= 256) {
232     fprintf(stderr, "Filename %s is too long\n", filename.c_str());
233     return std::string();
234   }
235 
236   return filename;
237 }
238 
GenerateImageOutputFilename(const char * pdf_name,int page_num,int image_num,const char * extension)239 std::string GenerateImageOutputFilename(const char* pdf_name,
240                                         int page_num,
241                                         int image_num,
242                                         const char* extension) {
243   std::ostringstream stream;
244   stream << pdf_name << "." << page_num << "." << image_num << "." << extension;
245   std::string filename = stream.str();
246   if (filename.size() >= 256) {
247     fprintf(stderr, "Filename %s for saving image is too long.\n",
248             filename.c_str());
249     return std::string();
250   }
251 
252   return filename;
253 }
254 
255 }  // namespace
256 
WritePpm(const char * pdf_name,int num,void * buffer_void,int stride,int width,int height)257 std::string WritePpm(const char* pdf_name,
258                      int num,
259                      void* buffer_void,
260                      int stride,
261                      int width,
262                      int height) {
263   if (!CheckDimensions(stride, width, height)) {
264     return "";
265   }
266 
267   int out_len = width * height;
268   if (out_len > INT_MAX / 3) {
269     return "";
270   }
271 
272   out_len *= 3;
273 
274   std::string filename = GeneratePageOutputFilename(pdf_name, num, "ppm");
275   if (filename.empty()) {
276     return std::string();
277   }
278   FILE* fp = fopen(filename.c_str(), "wb");
279   if (!fp) {
280     return std::string();
281   }
282 
283   fprintf(fp, "P6\n# PDF test render\n%d %d\n255\n", width, height);
284   // Source data is B, G, R, unused.
285   // Dest data is R, G, B.
286   const uint8_t* buffer = reinterpret_cast<const uint8_t*>(buffer_void);
287   std::vector<uint8_t> result(out_len);
288   for (int h = 0; h < height; ++h) {
289     const uint8_t* src_line = buffer + (stride * h);
290     uint8_t* dest_line = result.data() + (width * h * 3);
291     for (int w = 0; w < width; ++w) {
292       // R
293       dest_line[w * 3] = src_line[(w * 4) + 2];
294       // G
295       dest_line[(w * 3) + 1] = src_line[(w * 4) + 1];
296       // B
297       dest_line[(w * 3) + 2] = src_line[w * 4];
298     }
299   }
300   if (fwrite(result.data(), out_len, 1, fp) != 1) {
301     fprintf(stderr, "Failed to write to %s\n", filename.c_str());
302   }
303 
304   fclose(fp);
305   return filename;
306 }
307 
WriteText(FPDF_TEXTPAGE textpage,const char * pdf_name,int num)308 void WriteText(FPDF_TEXTPAGE textpage, const char* pdf_name, int num) {
309   std::string filename = GeneratePageOutputFilename(pdf_name, num, "txt");
310   if (filename.empty()) {
311     return;
312   }
313   FILE* fp = fopen(filename.c_str(), "w");
314   if (!fp) {
315     fprintf(stderr, "Failed to open %s for output\n", filename.c_str());
316     return;
317   }
318 
319   // Output in UTF32-LE.
320   uint32_t bom = 0x0000FEFF;
321   if (fwrite(&bom, sizeof(bom), 1, fp) != 1) {
322     fprintf(stderr, "Failed to write to %s\n", filename.c_str());
323     (void)fclose(fp);
324     return;
325   }
326 
327   for (int i = 0; i < FPDFText_CountChars(textpage); i++) {
328     uint32_t c = FPDFText_GetUnicode(textpage, i);
329     if (fwrite(&c, sizeof(c), 1, fp) != 1) {
330       fprintf(stderr, "Failed to write to %s\n", filename.c_str());
331       break;
332     }
333   }
334   (void)fclose(fp);
335 }
336 
WriteAnnot(FPDF_PAGE page,const char * pdf_name,int num)337 void WriteAnnot(FPDF_PAGE page, const char* pdf_name, int num) {
338   // Open the output text file.
339   std::string filename = GeneratePageOutputFilename(pdf_name, num, "annot.txt");
340   if (filename.empty()) {
341     return;
342   }
343   FILE* fp = fopen(filename.c_str(), "w");
344   if (!fp) {
345     fprintf(stderr, "Failed to open %s for output\n", filename.c_str());
346     return;
347   }
348 
349   int annot_count = FPDFPage_GetAnnotCount(page);
350   fprintf(fp, "Number of annotations: %d\n\n", annot_count);
351 
352   // Iterate through all annotations on this page.
353   for (int i = 0; i < annot_count; ++i) {
354     // Retrieve the annotation object and its subtype.
355     fprintf(fp, "Annotation #%d:\n", i + 1);
356     ScopedFPDFAnnotation annot(FPDFPage_GetAnnot(page, i));
357     if (!annot) {
358       fprintf(fp, "Failed to retrieve annotation!\n\n");
359       continue;
360     }
361 
362     FPDF_ANNOTATION_SUBTYPE subtype = FPDFAnnot_GetSubtype(annot.get());
363     fprintf(fp, "Subtype: %s\n", AnnotSubtypeToCString(subtype));
364 
365     // Retrieve the annotation flags.
366     fprintf(fp, "Flags set: %s\n",
367             AnnotFlagsToString(FPDFAnnot_GetFlags(annot.get())).c_str());
368 
369     // Retrieve the annotation's object count and object types.
370     const int obj_count = FPDFAnnot_GetObjectCount(annot.get());
371     fprintf(fp, "Number of objects: %d\n", obj_count);
372     if (obj_count > 0) {
373       fprintf(fp, "Object types: ");
374       for (int j = 0; j < obj_count; ++j) {
375         const char* type = PageObjectTypeToCString(
376             FPDFPageObj_GetType(FPDFAnnot_GetObject(annot.get(), j)));
377         fprintf(fp, "%s  ", type);
378       }
379       fprintf(fp, "\n");
380     }
381 
382     // Retrieve the annotation's color and interior color.
383     unsigned int R;
384     unsigned int G;
385     unsigned int B;
386     unsigned int A;
387     if (FPDFAnnot_GetColor(annot.get(), FPDFANNOT_COLORTYPE_Color, &R, &G, &B,
388                            &A)) {
389       fprintf(fp, "Color in RGBA: %d %d %d %d\n", R, G, B, A);
390     } else {
391       fprintf(fp, "Failed to retrieve color.\n");
392     }
393     if (FPDFAnnot_GetColor(annot.get(), FPDFANNOT_COLORTYPE_InteriorColor, &R,
394                            &G, &B, &A)) {
395       fprintf(fp, "Interior color in RGBA: %d %d %d %d\n", R, G, B, A);
396     } else {
397       fprintf(fp, "Failed to retrieve interior color.\n");
398     }
399 
400     // Retrieve the annotation's contents and author.
401     static constexpr char kContentsKey[] = "Contents";
402     static constexpr char kAuthorKey[] = "T";
403     unsigned long length_bytes =
404         FPDFAnnot_GetStringValue(annot.get(), kContentsKey, nullptr, 0);
405     std::vector<FPDF_WCHAR> buf = GetFPDFWideStringBuffer(length_bytes);
406     FPDFAnnot_GetStringValue(annot.get(), kContentsKey, buf.data(),
407                              length_bytes);
408     fprintf(fp, "Content: %ls\n", GetPlatformWString(buf.data()).c_str());
409     length_bytes =
410         FPDFAnnot_GetStringValue(annot.get(), kAuthorKey, nullptr, 0);
411     buf = GetFPDFWideStringBuffer(length_bytes);
412     FPDFAnnot_GetStringValue(annot.get(), kAuthorKey, buf.data(), length_bytes);
413     fprintf(fp, "Author: %ls\n", GetPlatformWString(buf.data()).c_str());
414 
415     // Retrieve the annotation's quadpoints if it is a markup annotation.
416     if (FPDFAnnot_HasAttachmentPoints(annot.get())) {
417       size_t qp_count = FPDFAnnot_CountAttachmentPoints(annot.get());
418       fprintf(fp, "Number of quadpoints sets: %zu\n", qp_count);
419 
420       // Iterate through all quadpoints of the current annotation
421       for (size_t j = 0; j < qp_count; ++j) {
422         FS_QUADPOINTSF quadpoints;
423         if (FPDFAnnot_GetAttachmentPoints(annot.get(), j, &quadpoints)) {
424           fprintf(fp,
425                   "Quadpoints set #%zu: (%.3f, %.3f), (%.3f, %.3f), "
426                   "(%.3f, %.3f), (%.3f, %.3f)\n",
427                   j + 1, quadpoints.x1, quadpoints.y1, quadpoints.x2,
428                   quadpoints.y2, quadpoints.x3, quadpoints.y3, quadpoints.x4,
429                   quadpoints.y4);
430         } else {
431           fprintf(fp, "Failed to retrieve quadpoints set #%zu.\n", j + 1);
432         }
433       }
434     }
435 
436     // Retrieve the annotation's rectangle coordinates.
437     FS_RECTF rect;
438     if (FPDFAnnot_GetRect(annot.get(), &rect)) {
439       fprintf(fp, "Rectangle: l - %.3f, b - %.3f, r - %.3f, t - %.3f\n\n",
440               rect.left, rect.bottom, rect.right, rect.top);
441     } else {
442       fprintf(fp, "Failed to retrieve annotation rectangle.\n");
443     }
444   }
445 
446   (void)fclose(fp);
447 }
448 
WritePng(const char * pdf_name,int num,void * buffer,int stride,int width,int height)449 std::string WritePng(const char* pdf_name,
450                      int num,
451                      void* buffer,
452                      int stride,
453                      int width,
454                      int height) {
455   if (!CheckDimensions(stride, width, height)) {
456     return "";
457   }
458 
459   auto input =
460       pdfium::make_span(static_cast<uint8_t*>(buffer), stride * height);
461   std::vector<uint8_t> png_encoding =
462       EncodePng(input, width, height, stride, FPDFBitmap_BGRA);
463   if (png_encoding.empty()) {
464     fprintf(stderr, "Failed to convert bitmap to PNG\n");
465     return "";
466   }
467 
468   std::string filename = GeneratePageOutputFilename(pdf_name, num, "png");
469   if (filename.empty()) {
470     return std::string();
471   }
472   FILE* fp = fopen(filename.c_str(), "wb");
473   if (!fp) {
474     fprintf(stderr, "Failed to open %s for output\n", filename.c_str());
475     return std::string();
476   }
477 
478   size_t bytes_written =
479       fwrite(&png_encoding.front(), 1, png_encoding.size(), fp);
480   if (bytes_written != png_encoding.size()) {
481     fprintf(stderr, "Failed to write to %s\n", filename.c_str());
482   }
483 
484   (void)fclose(fp);
485   return filename;
486 }
487 
488 #ifdef _WIN32
WriteBmp(const char * pdf_name,int num,void * buffer,int stride,int width,int height)489 std::string WriteBmp(const char* pdf_name,
490                      int num,
491                      void* buffer,
492                      int stride,
493                      int width,
494                      int height) {
495   if (!CheckDimensions(stride, width, height)) {
496     return std::string();
497   }
498 
499   int out_len = stride * height;
500   if (out_len > INT_MAX / 3) {
501     return std::string();
502   }
503 
504   std::string filename = GeneratePageOutputFilename(pdf_name, num, "bmp");
505   if (filename.empty()) {
506     return std::string();
507   }
508   FILE* fp = fopen(filename.c_str(), "wb");
509   if (!fp) {
510     return std::string();
511   }
512 
513   BITMAPINFO bmi = {};
514   bmi.bmiHeader.biSize = sizeof(bmi) - sizeof(RGBQUAD);
515   bmi.bmiHeader.biWidth = width;
516   bmi.bmiHeader.biHeight = -height;  // top-down image
517   bmi.bmiHeader.biPlanes = 1;
518   bmi.bmiHeader.biBitCount = 32;
519   bmi.bmiHeader.biCompression = BI_RGB;
520   bmi.bmiHeader.biSizeImage = 0;
521 
522   BITMAPFILEHEADER file_header = {};
523   file_header.bfType = 0x4d42;
524   file_header.bfSize = sizeof(file_header) + bmi.bmiHeader.biSize + out_len;
525   file_header.bfOffBits = file_header.bfSize - out_len;
526 
527   if (fwrite(&file_header, sizeof(file_header), 1, fp) != 1 ||
528       fwrite(&bmi, bmi.bmiHeader.biSize, 1, fp) != 1 ||
529       fwrite(buffer, out_len, 1, fp) != 1) {
530     fprintf(stderr, "Failed to write to %s\n", filename.c_str());
531   }
532   fclose(fp);
533   return filename;
534 }
535 
WriteEmf(FPDF_PAGE page,const char * pdf_name,int num)536 void WriteEmf(FPDF_PAGE page, const char* pdf_name, int num) {
537   std::string filename = GeneratePageOutputFilename(pdf_name, num, "emf");
538   if (filename.empty()) {
539     return;
540   }
541 
542   HDC dc = CreateEnhMetaFileA(nullptr, filename.c_str(), nullptr, nullptr);
543 
544   int width = static_cast<int>(FPDF_GetPageWidthF(page));
545   int height = static_cast<int>(FPDF_GetPageHeightF(page));
546   HRGN rgn = CreateRectRgn(0, 0, width, height);
547   SelectClipRgn(dc, rgn);
548   DeleteObject(rgn);
549 
550   SelectObject(dc, GetStockObject(NULL_PEN));
551   SelectObject(dc, GetStockObject(WHITE_BRUSH));
552   // If a PS_NULL pen is used, the dimensions of the rectangle are 1 pixel less.
553   Rectangle(dc, 0, 0, width + 1, height + 1);
554 
555   FPDF_RenderPage(dc, page, 0, 0, width, height, 0, FPDF_ANNOT | FPDF_PRINTING);
556 
557   DeleteEnhMetaFile(CloseEnhMetaFile(dc));
558 }
559 
WritePS(FPDF_PAGE page,const char * pdf_name,int num)560 void WritePS(FPDF_PAGE page, const char* pdf_name, int num) {
561   std::string filename = GeneratePageOutputFilename(pdf_name, num, "ps");
562   if (filename.empty()) {
563     return;
564   }
565   FILE* fp = fopen(filename.c_str(), "wb");
566   if (!fp) {
567     return;
568   }
569 
570   HDC dc = CreateEnhMetaFileA(nullptr, nullptr, nullptr, nullptr);
571 
572   int width = static_cast<int>(FPDF_GetPageWidthF(page));
573   int height = static_cast<int>(FPDF_GetPageHeightF(page));
574   FPDF_RenderPage(dc, page, 0, 0, width, height, 0, FPDF_ANNOT | FPDF_PRINTING);
575 
576   HENHMETAFILE emf = CloseEnhMetaFile(dc);
577   std::vector<const ENHMETARECORD*> items;
578   EnumEnhMetaFile(nullptr, emf, &EnhMetaFileProc, &items, nullptr);
579   for (const ENHMETARECORD* record : items) {
580     if (record->iType != EMR_GDICOMMENT) {
581       continue;
582     }
583 
584     const auto* comment = reinterpret_cast<const EMRGDICOMMENT*>(record);
585     const char* data = reinterpret_cast<const char*>(comment->Data);
586     uint16_t size = *reinterpret_cast<const uint16_t*>(data);
587     if (fwrite(data + sizeof(uint16_t), size, 1, fp) != 1) {
588       fprintf(stderr, "Failed to write to %s\n", filename.c_str());
589       break;
590     }
591   }
592   fclose(fp);
593   DeleteEnhMetaFile(emf);
594 }
595 #endif  // _WIN32
596 
597 #ifdef PDF_ENABLE_SKIA
WriteToSkWStream(const std::string & pdf_name,int num,const std::string & extension)598 std::unique_ptr<SkWStream> WriteToSkWStream(const std::string& pdf_name,
599                                             int num,
600                                             const std::string& extension) {
601   std::string discarded_filename;
602   return WriteToSkWStream(pdf_name, num, extension, discarded_filename);
603 }
604 
WriteToSkWStream(const std::string & pdf_name,int num,const std::string & extension,std::string & filename)605 std::unique_ptr<SkWStream> WriteToSkWStream(const std::string& pdf_name,
606                                             int num,
607                                             const std::string& extension,
608                                             std::string& filename) {
609   filename =
610       GeneratePageOutputFilename(pdf_name.c_str(), num, extension.c_str());
611   if (filename.empty()) {
612     return nullptr;
613   }
614 
615   auto stream = std::make_unique<SkFILEWStream>(filename.c_str());
616   if (!stream->isValid()) {
617     return nullptr;
618   }
619 
620   return stream;
621 }
622 
WriteSkp(const char * pdf_name,int num,const SkPicture & picture)623 std::string WriteSkp(const char* pdf_name, int num, const SkPicture& picture) {
624   std::string filename;
625   std::unique_ptr<SkWStream> stream =
626       WriteToSkWStream(pdf_name, num, "skp", filename);
627   if (!stream) {
628     return "";
629   }
630 
631   picture.serialize(stream.get());
632   return filename;
633 }
634 #endif  // PDF_ENABLE_SKIA
635 
636 enum class ThumbnailDecodeType { kBitmap, kRawStream, kDecodedStream };
637 
GetThumbnailFilename(char * name_buf,size_t name_buf_size,const char * pdf_name,int page_num,ThumbnailDecodeType decode_type)638 bool GetThumbnailFilename(char* name_buf,
639                           size_t name_buf_size,
640                           const char* pdf_name,
641                           int page_num,
642                           ThumbnailDecodeType decode_type) {
643   const char* format;
644   switch (decode_type) {
645     case ThumbnailDecodeType::kBitmap:
646       format = "%s.thumbnail.%d.png";
647       break;
648     case ThumbnailDecodeType::kDecodedStream:
649       format = "%s.thumbnail.decoded.%d.bin";
650       break;
651     case ThumbnailDecodeType::kRawStream:
652       format = "%s.thumbnail.raw.%d.bin";
653       break;
654   }
655 
656   int chars_formatted =
657       snprintf(name_buf, name_buf_size, format, pdf_name, page_num);
658   if (chars_formatted < 0 ||
659       static_cast<size_t>(chars_formatted) >= name_buf_size) {
660     fprintf(stderr, "Filename %s for saving is too long.\n", name_buf);
661     return false;
662   }
663 
664   return true;
665 }
666 
WriteBufferToFile(const void * buf,size_t buflen,const char * filename,const char * filetype)667 void WriteBufferToFile(const void* buf,
668                        size_t buflen,
669                        const char* filename,
670                        const char* filetype) {
671   FILE* fp = fopen(filename, "wb");
672   if (!fp) {
673     fprintf(stderr, "Failed to open %s for saving %s.", filename, filetype);
674     return;
675   }
676 
677   size_t bytes_written = fwrite(buf, 1, buflen, fp);
678   if (bytes_written == buflen) {
679     fprintf(stderr, "Successfully wrote %s %s.\n", filetype, filename);
680   } else {
681     fprintf(stderr, "Failed to write to %s.\n", filename);
682   }
683   fclose(fp);
684 }
685 
EncodeBitmapToPng(ScopedFPDFBitmap bitmap)686 std::vector<uint8_t> EncodeBitmapToPng(ScopedFPDFBitmap bitmap) {
687   std::vector<uint8_t> png_encoding;
688   int format = FPDFBitmap_GetFormat(bitmap.get());
689   if (format == FPDFBitmap_Unknown) {
690     return png_encoding;
691   }
692 
693   int width = FPDFBitmap_GetWidth(bitmap.get());
694   int height = FPDFBitmap_GetHeight(bitmap.get());
695   int stride = FPDFBitmap_GetStride(bitmap.get());
696   if (!CheckDimensions(stride, width, height)) {
697     return png_encoding;
698   }
699 
700   auto input = pdfium::make_span(
701       static_cast<const uint8_t*>(FPDFBitmap_GetBuffer(bitmap.get())),
702       stride * height);
703 
704   png_encoding = EncodePng(input, width, height, stride, format);
705   return png_encoding;
706 }
707 
WriteAttachments(FPDF_DOCUMENT doc,const std::string & name)708 void WriteAttachments(FPDF_DOCUMENT doc, const std::string& name) {
709   for (int i = 0; i < FPDFDoc_GetAttachmentCount(doc); ++i) {
710     FPDF_ATTACHMENT attachment = FPDFDoc_GetAttachment(doc, i);
711 
712     // Retrieve the attachment file name.
713     std::string attachment_name;
714     unsigned long length_bytes = FPDFAttachment_GetName(attachment, nullptr, 0);
715     if (length_bytes) {
716       std::vector<FPDF_WCHAR> buf = GetFPDFWideStringBuffer(length_bytes);
717       unsigned long actual_length_bytes =
718           FPDFAttachment_GetName(attachment, buf.data(), length_bytes);
719       if (actual_length_bytes == length_bytes) {
720         attachment_name = GetPlatformString(buf.data());
721       }
722     }
723     if (attachment_name.empty()) {
724       fprintf(stderr, "Attachment #%d has an empty file name.\n", i + 1);
725       continue;
726     }
727 
728     // Calculate the full attachment file name.
729     char save_name[256];
730     int chars_formatted =
731         snprintf(save_name, sizeof(save_name), "%s.attachment.%s", name.c_str(),
732                  attachment_name.c_str());
733     if (chars_formatted < 0 ||
734         static_cast<size_t>(chars_formatted) >= sizeof(save_name)) {
735       fprintf(stderr, "Filename %s is too long.\n", save_name);
736       continue;
737     }
738 
739     // Retrieve the attachment.
740     if (!FPDFAttachment_GetFile(attachment, nullptr, 0, &length_bytes)) {
741       fprintf(stderr, "Failed to retrieve attachment \"%s\".\n",
742               attachment_name.c_str());
743       continue;
744     }
745 
746     std::vector<char> data_buf(length_bytes);
747     if (length_bytes) {
748       unsigned long actual_length_bytes;
749       if (!FPDFAttachment_GetFile(attachment, data_buf.data(), length_bytes,
750                                   &actual_length_bytes)) {
751         fprintf(stderr, "Failed to retrieve attachment \"%s\".\n",
752                 attachment_name.c_str());
753         continue;
754       }
755     }
756 
757     // Write the attachment file. Since a PDF document could have 0-byte files
758     // as attachments, we should allow saving the 0-byte attachments to files.
759     WriteBufferToFile(data_buf.data(), length_bytes, save_name, "attachment");
760   }
761 }
762 
WriteImages(FPDF_PAGE page,const char * pdf_name,int page_num)763 void WriteImages(FPDF_PAGE page, const char* pdf_name, int page_num) {
764   for (int i = 0; i < FPDFPage_CountObjects(page); ++i) {
765     FPDF_PAGEOBJECT obj = FPDFPage_GetObject(page, i);
766     if (FPDFPageObj_GetType(obj) != FPDF_PAGEOBJ_IMAGE) {
767       continue;
768     }
769 
770     ScopedFPDFBitmap bitmap(FPDFImageObj_GetBitmap(obj));
771     if (!bitmap) {
772       fprintf(stderr, "Image object #%d on page #%d has an empty bitmap.\n",
773               i + 1, page_num + 1);
774       continue;
775     }
776 
777     std::string filename =
778         GenerateImageOutputFilename(pdf_name, page_num, i, "png");
779     if (filename.empty()) {
780       continue;
781     }
782 
783     std::vector<uint8_t> png_encoding = EncodeBitmapToPng(std::move(bitmap));
784     if (png_encoding.empty()) {
785       fprintf(stderr,
786               "Failed to convert image object #%d, on page #%d to png.\n",
787               i + 1, page_num + 1);
788       continue;
789     }
790 
791     WriteBufferToFile(&png_encoding.front(), png_encoding.size(),
792                       filename.c_str(), "image");
793   }
794 }
795 
WriteRenderedImages(FPDF_DOCUMENT doc,FPDF_PAGE page,const char * pdf_name,int page_num)796 void WriteRenderedImages(FPDF_DOCUMENT doc,
797                          FPDF_PAGE page,
798                          const char* pdf_name,
799                          int page_num) {
800   for (int i = 0; i < FPDFPage_CountObjects(page); ++i) {
801     FPDF_PAGEOBJECT obj = FPDFPage_GetObject(page, i);
802     if (FPDFPageObj_GetType(obj) != FPDF_PAGEOBJ_IMAGE) {
803       continue;
804     }
805 
806     ScopedFPDFBitmap bitmap(FPDFImageObj_GetRenderedBitmap(doc, page, obj));
807     if (!bitmap) {
808       fprintf(stderr, "Image object #%d on page #%d has an empty bitmap.\n",
809               i + 1, page_num + 1);
810       continue;
811     }
812 
813     std::string filename =
814         GenerateImageOutputFilename(pdf_name, page_num, i, "png");
815     if (filename.empty()) {
816       continue;
817     }
818 
819     std::vector<uint8_t> png_encoding = EncodeBitmapToPng(std::move(bitmap));
820     if (png_encoding.empty()) {
821       fprintf(stderr,
822               "Failed to convert image object #%d, on page #%d to png.\n",
823               i + 1, page_num + 1);
824       continue;
825     }
826 
827     WriteBufferToFile(&png_encoding.front(), png_encoding.size(),
828                       filename.c_str(), "image");
829   }
830 }
831 
WriteDecodedThumbnailStream(FPDF_PAGE page,const char * pdf_name,int page_num)832 void WriteDecodedThumbnailStream(FPDF_PAGE page,
833                                  const char* pdf_name,
834                                  int page_num) {
835   char filename[256];
836   if (!GetThumbnailFilename(filename, sizeof(filename), pdf_name, page_num,
837                             ThumbnailDecodeType::kDecodedStream)) {
838     return;
839   }
840 
841   unsigned long decoded_data_size =
842       FPDFPage_GetDecodedThumbnailData(page, nullptr, 0u);
843 
844   // Only continue if there actually is a thumbnail for this page
845   if (decoded_data_size == 0) {
846     fprintf(stderr, "Failed to get decoded thumbnail for page #%d.\n",
847             page_num + 1);
848     return;
849   }
850 
851   std::vector<uint8_t> thumb_buf(decoded_data_size);
852   if (FPDFPage_GetDecodedThumbnailData(
853           page, thumb_buf.data(), decoded_data_size) != decoded_data_size) {
854     fprintf(stderr, "Failed to get decoded thumbnail data for %s.\n", filename);
855     return;
856   }
857 
858   WriteBufferToFile(thumb_buf.data(), decoded_data_size, filename,
859                     "decoded thumbnail");
860 }
861 
WriteRawThumbnailStream(FPDF_PAGE page,const char * pdf_name,int page_num)862 void WriteRawThumbnailStream(FPDF_PAGE page,
863                              const char* pdf_name,
864                              int page_num) {
865   char filename[256];
866   if (!GetThumbnailFilename(filename, sizeof(filename), pdf_name, page_num,
867                             ThumbnailDecodeType::kRawStream)) {
868     return;
869   }
870 
871   unsigned long raw_data_size = FPDFPage_GetRawThumbnailData(page, nullptr, 0u);
872 
873   // Only continue if there actually is a thumbnail for this page
874   if (raw_data_size == 0) {
875     fprintf(stderr, "Failed to get raw thumbnail data for page #%d.\n",
876             page_num + 1);
877     return;
878   }
879 
880   std::vector<uint8_t> thumb_buf(raw_data_size);
881   if (FPDFPage_GetRawThumbnailData(page, thumb_buf.data(), raw_data_size) !=
882       raw_data_size) {
883     fprintf(stderr, "Failed to get raw thumbnail data for %s.\n", filename);
884     return;
885   }
886 
887   WriteBufferToFile(thumb_buf.data(), raw_data_size, filename, "raw thumbnail");
888 }
889 
WriteThumbnail(FPDF_PAGE page,const char * pdf_name,int page_num)890 void WriteThumbnail(FPDF_PAGE page, const char* pdf_name, int page_num) {
891   char filename[256];
892   if (!GetThumbnailFilename(filename, sizeof(filename), pdf_name, page_num,
893                             ThumbnailDecodeType::kBitmap)) {
894     return;
895   }
896 
897   ScopedFPDFBitmap thumb_bitmap(FPDFPage_GetThumbnailAsBitmap(page));
898   if (!thumb_bitmap) {
899     fprintf(stderr, "Thumbnail of page #%d has an empty bitmap.\n",
900             page_num + 1);
901     return;
902   }
903 
904   std::vector<uint8_t> png_encoding =
905       EncodeBitmapToPng(std::move(thumb_bitmap));
906   if (png_encoding.empty()) {
907     fprintf(stderr, "Failed to convert thumbnail of page #%d to png.\n",
908             page_num + 1);
909     return;
910   }
911 
912   WriteBufferToFile(&png_encoding.front(), png_encoding.size(), filename,
913                     "thumbnail");
914 }
915