1 // Copyright 2018 The PDFium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "samples/helpers/write.h"
6
7 #include <limits.h>
8
9 #include <sstream>
10 #include <string>
11 #include <utility>
12 #include <vector>
13
14 #include "public/cpp/fpdf_scopers.h"
15 #include "public/fpdf_annot.h"
16 #include "public/fpdf_attachment.h"
17 #include "public/fpdf_edit.h"
18 #include "public/fpdf_thumbnail.h"
19 #include "testing/fx_string_testhelpers.h"
20 #include "testing/image_diff/image_diff_png.h"
21 #include "third_party/base/notreached.h"
22
23 #ifdef PDF_ENABLE_SKIA
24 #include "third_party/skia/include/core/SkPicture.h" // nogncheck
25 #include "third_party/skia/include/core/SkStream.h" // nogncheck
26 #endif
27
28 namespace {
29
CheckDimensions(int stride,int width,int height)30 bool CheckDimensions(int stride, int width, int height) {
31 if (stride < 0 || width < 0 || height < 0) {
32 return false;
33 }
34 if (height > 0 && stride > INT_MAX / height) {
35 return false;
36 }
37 return true;
38 }
39
AnnotSubtypeToCString(FPDF_ANNOTATION_SUBTYPE subtype)40 const char* AnnotSubtypeToCString(FPDF_ANNOTATION_SUBTYPE subtype) {
41 if (subtype == FPDF_ANNOT_TEXT) {
42 return "Text";
43 }
44 if (subtype == FPDF_ANNOT_LINK) {
45 return "Link";
46 }
47 if (subtype == FPDF_ANNOT_FREETEXT) {
48 return "FreeText";
49 }
50 if (subtype == FPDF_ANNOT_LINE) {
51 return "Line";
52 }
53 if (subtype == FPDF_ANNOT_SQUARE) {
54 return "Square";
55 }
56 if (subtype == FPDF_ANNOT_CIRCLE) {
57 return "Circle";
58 }
59 if (subtype == FPDF_ANNOT_POLYGON) {
60 return "Polygon";
61 }
62 if (subtype == FPDF_ANNOT_POLYLINE) {
63 return "PolyLine";
64 }
65 if (subtype == FPDF_ANNOT_HIGHLIGHT) {
66 return "Highlight";
67 }
68 if (subtype == FPDF_ANNOT_UNDERLINE) {
69 return "Underline";
70 }
71 if (subtype == FPDF_ANNOT_SQUIGGLY) {
72 return "Squiggly";
73 }
74 if (subtype == FPDF_ANNOT_STRIKEOUT) {
75 return "StrikeOut";
76 }
77 if (subtype == FPDF_ANNOT_STAMP) {
78 return "Stamp";
79 }
80 if (subtype == FPDF_ANNOT_CARET) {
81 return "Caret";
82 }
83 if (subtype == FPDF_ANNOT_INK) {
84 return "Ink";
85 }
86 if (subtype == FPDF_ANNOT_POPUP) {
87 return "Popup";
88 }
89 if (subtype == FPDF_ANNOT_FILEATTACHMENT) {
90 return "FileAttachment";
91 }
92 if (subtype == FPDF_ANNOT_SOUND) {
93 return "Sound";
94 }
95 if (subtype == FPDF_ANNOT_MOVIE) {
96 return "Movie";
97 }
98 if (subtype == FPDF_ANNOT_WIDGET) {
99 return "Widget";
100 }
101 if (subtype == FPDF_ANNOT_SCREEN) {
102 return "Screen";
103 }
104 if (subtype == FPDF_ANNOT_PRINTERMARK) {
105 return "PrinterMark";
106 }
107 if (subtype == FPDF_ANNOT_TRAPNET) {
108 return "TrapNet";
109 }
110 if (subtype == FPDF_ANNOT_WATERMARK) {
111 return "Watermark";
112 }
113 if (subtype == FPDF_ANNOT_THREED) {
114 return "3D";
115 }
116 if (subtype == FPDF_ANNOT_RICHMEDIA) {
117 return "RichMedia";
118 }
119 if (subtype == FPDF_ANNOT_XFAWIDGET) {
120 return "XFAWidget";
121 }
122 NOTREACHED_NORETURN();
123 }
124
AppendFlagString(const char * flag,std::string * output)125 void AppendFlagString(const char* flag, std::string* output) {
126 if (!output->empty()) {
127 *output += ", ";
128 }
129 *output += flag;
130 }
131
AnnotFlagsToString(int flags)132 std::string AnnotFlagsToString(int flags) {
133 std::string str;
134 if (flags & FPDF_ANNOT_FLAG_INVISIBLE) {
135 AppendFlagString("Invisible", &str);
136 }
137 if (flags & FPDF_ANNOT_FLAG_HIDDEN) {
138 AppendFlagString("Hidden", &str);
139 }
140 if (flags & FPDF_ANNOT_FLAG_PRINT) {
141 AppendFlagString("Print", &str);
142 }
143 if (flags & FPDF_ANNOT_FLAG_NOZOOM) {
144 AppendFlagString("NoZoom", &str);
145 }
146 if (flags & FPDF_ANNOT_FLAG_NOROTATE) {
147 AppendFlagString("NoRotate", &str);
148 }
149 if (flags & FPDF_ANNOT_FLAG_NOVIEW) {
150 AppendFlagString("NoView", &str);
151 }
152 if (flags & FPDF_ANNOT_FLAG_READONLY) {
153 AppendFlagString("ReadOnly", &str);
154 }
155 if (flags & FPDF_ANNOT_FLAG_LOCKED) {
156 AppendFlagString("Locked", &str);
157 }
158 if (flags & FPDF_ANNOT_FLAG_TOGGLENOVIEW) {
159 AppendFlagString("ToggleNoView", &str);
160 }
161 return str;
162 }
163
PageObjectTypeToCString(int type)164 const char* PageObjectTypeToCString(int type) {
165 if (type == FPDF_PAGEOBJ_TEXT) {
166 return "Text";
167 }
168 if (type == FPDF_PAGEOBJ_PATH) {
169 return "Path";
170 }
171 if (type == FPDF_PAGEOBJ_IMAGE) {
172 return "Image";
173 }
174 if (type == FPDF_PAGEOBJ_SHADING) {
175 return "Shading";
176 }
177 if (type == FPDF_PAGEOBJ_FORM) {
178 return "Form";
179 }
180 NOTREACHED_NORETURN();
181 }
182
EncodePng(pdfium::span<const uint8_t> input,int width,int height,int stride,int format)183 std::vector<uint8_t> EncodePng(pdfium::span<const uint8_t> input,
184 int width,
185 int height,
186 int stride,
187 int format) {
188 std::vector<uint8_t> png;
189 switch (format) {
190 case FPDFBitmap_Unknown:
191 break;
192 case FPDFBitmap_Gray:
193 png = image_diff_png::EncodeGrayPNG(input, width, height, stride);
194 break;
195 case FPDFBitmap_BGR:
196 png = image_diff_png::EncodeBGRPNG(input, width, height, stride);
197 break;
198 case FPDFBitmap_BGRx:
199 png = image_diff_png::EncodeBGRAPNG(input, width, height, stride,
200 /*discard_transparency=*/true);
201 break;
202 case FPDFBitmap_BGRA:
203 png = image_diff_png::EncodeBGRAPNG(input, width, height, stride,
204 /*discard_transparency=*/false);
205 break;
206 default:
207 NOTREACHED_NORETURN();
208 }
209 return png;
210 }
211
212 #ifdef _WIN32
EnhMetaFileProc(HDC hdc,HANDLETABLE * handle_table,const ENHMETARECORD * record,int objects_count,LPARAM param)213 int CALLBACK EnhMetaFileProc(HDC hdc,
214 HANDLETABLE* handle_table,
215 const ENHMETARECORD* record,
216 int objects_count,
217 LPARAM param) {
218 std::vector<const ENHMETARECORD*>& items =
219 *reinterpret_cast<std::vector<const ENHMETARECORD*>*>(param);
220 items.push_back(record);
221 return 1;
222 }
223 #endif // _WIN32
224
GeneratePageOutputFilename(const char * pdf_name,int page_num,const char * extension)225 std::string GeneratePageOutputFilename(const char* pdf_name,
226 int page_num,
227 const char* extension) {
228 std::ostringstream stream;
229 stream << pdf_name << "." << page_num << "." << extension;
230 std::string filename = stream.str();
231 if (filename.size() >= 256) {
232 fprintf(stderr, "Filename %s is too long\n", filename.c_str());
233 return std::string();
234 }
235
236 return filename;
237 }
238
GenerateImageOutputFilename(const char * pdf_name,int page_num,int image_num,const char * extension)239 std::string GenerateImageOutputFilename(const char* pdf_name,
240 int page_num,
241 int image_num,
242 const char* extension) {
243 std::ostringstream stream;
244 stream << pdf_name << "." << page_num << "." << image_num << "." << extension;
245 std::string filename = stream.str();
246 if (filename.size() >= 256) {
247 fprintf(stderr, "Filename %s for saving image is too long.\n",
248 filename.c_str());
249 return std::string();
250 }
251
252 return filename;
253 }
254
255 } // namespace
256
WritePpm(const char * pdf_name,int num,void * buffer_void,int stride,int width,int height)257 std::string WritePpm(const char* pdf_name,
258 int num,
259 void* buffer_void,
260 int stride,
261 int width,
262 int height) {
263 if (!CheckDimensions(stride, width, height)) {
264 return "";
265 }
266
267 int out_len = width * height;
268 if (out_len > INT_MAX / 3) {
269 return "";
270 }
271
272 out_len *= 3;
273
274 std::string filename = GeneratePageOutputFilename(pdf_name, num, "ppm");
275 if (filename.empty()) {
276 return std::string();
277 }
278 FILE* fp = fopen(filename.c_str(), "wb");
279 if (!fp) {
280 return std::string();
281 }
282
283 fprintf(fp, "P6\n# PDF test render\n%d %d\n255\n", width, height);
284 // Source data is B, G, R, unused.
285 // Dest data is R, G, B.
286 const uint8_t* buffer = reinterpret_cast<const uint8_t*>(buffer_void);
287 std::vector<uint8_t> result(out_len);
288 for (int h = 0; h < height; ++h) {
289 const uint8_t* src_line = buffer + (stride * h);
290 uint8_t* dest_line = result.data() + (width * h * 3);
291 for (int w = 0; w < width; ++w) {
292 // R
293 dest_line[w * 3] = src_line[(w * 4) + 2];
294 // G
295 dest_line[(w * 3) + 1] = src_line[(w * 4) + 1];
296 // B
297 dest_line[(w * 3) + 2] = src_line[w * 4];
298 }
299 }
300 if (fwrite(result.data(), out_len, 1, fp) != 1) {
301 fprintf(stderr, "Failed to write to %s\n", filename.c_str());
302 }
303
304 fclose(fp);
305 return filename;
306 }
307
WriteText(FPDF_TEXTPAGE textpage,const char * pdf_name,int num)308 void WriteText(FPDF_TEXTPAGE textpage, const char* pdf_name, int num) {
309 std::string filename = GeneratePageOutputFilename(pdf_name, num, "txt");
310 if (filename.empty()) {
311 return;
312 }
313 FILE* fp = fopen(filename.c_str(), "w");
314 if (!fp) {
315 fprintf(stderr, "Failed to open %s for output\n", filename.c_str());
316 return;
317 }
318
319 // Output in UTF32-LE.
320 uint32_t bom = 0x0000FEFF;
321 if (fwrite(&bom, sizeof(bom), 1, fp) != 1) {
322 fprintf(stderr, "Failed to write to %s\n", filename.c_str());
323 (void)fclose(fp);
324 return;
325 }
326
327 for (int i = 0; i < FPDFText_CountChars(textpage); i++) {
328 uint32_t c = FPDFText_GetUnicode(textpage, i);
329 if (fwrite(&c, sizeof(c), 1, fp) != 1) {
330 fprintf(stderr, "Failed to write to %s\n", filename.c_str());
331 break;
332 }
333 }
334 (void)fclose(fp);
335 }
336
WriteAnnot(FPDF_PAGE page,const char * pdf_name,int num)337 void WriteAnnot(FPDF_PAGE page, const char* pdf_name, int num) {
338 // Open the output text file.
339 std::string filename = GeneratePageOutputFilename(pdf_name, num, "annot.txt");
340 if (filename.empty()) {
341 return;
342 }
343 FILE* fp = fopen(filename.c_str(), "w");
344 if (!fp) {
345 fprintf(stderr, "Failed to open %s for output\n", filename.c_str());
346 return;
347 }
348
349 int annot_count = FPDFPage_GetAnnotCount(page);
350 fprintf(fp, "Number of annotations: %d\n\n", annot_count);
351
352 // Iterate through all annotations on this page.
353 for (int i = 0; i < annot_count; ++i) {
354 // Retrieve the annotation object and its subtype.
355 fprintf(fp, "Annotation #%d:\n", i + 1);
356 ScopedFPDFAnnotation annot(FPDFPage_GetAnnot(page, i));
357 if (!annot) {
358 fprintf(fp, "Failed to retrieve annotation!\n\n");
359 continue;
360 }
361
362 FPDF_ANNOTATION_SUBTYPE subtype = FPDFAnnot_GetSubtype(annot.get());
363 fprintf(fp, "Subtype: %s\n", AnnotSubtypeToCString(subtype));
364
365 // Retrieve the annotation flags.
366 fprintf(fp, "Flags set: %s\n",
367 AnnotFlagsToString(FPDFAnnot_GetFlags(annot.get())).c_str());
368
369 // Retrieve the annotation's object count and object types.
370 const int obj_count = FPDFAnnot_GetObjectCount(annot.get());
371 fprintf(fp, "Number of objects: %d\n", obj_count);
372 if (obj_count > 0) {
373 fprintf(fp, "Object types: ");
374 for (int j = 0; j < obj_count; ++j) {
375 const char* type = PageObjectTypeToCString(
376 FPDFPageObj_GetType(FPDFAnnot_GetObject(annot.get(), j)));
377 fprintf(fp, "%s ", type);
378 }
379 fprintf(fp, "\n");
380 }
381
382 // Retrieve the annotation's color and interior color.
383 unsigned int R;
384 unsigned int G;
385 unsigned int B;
386 unsigned int A;
387 if (FPDFAnnot_GetColor(annot.get(), FPDFANNOT_COLORTYPE_Color, &R, &G, &B,
388 &A)) {
389 fprintf(fp, "Color in RGBA: %d %d %d %d\n", R, G, B, A);
390 } else {
391 fprintf(fp, "Failed to retrieve color.\n");
392 }
393 if (FPDFAnnot_GetColor(annot.get(), FPDFANNOT_COLORTYPE_InteriorColor, &R,
394 &G, &B, &A)) {
395 fprintf(fp, "Interior color in RGBA: %d %d %d %d\n", R, G, B, A);
396 } else {
397 fprintf(fp, "Failed to retrieve interior color.\n");
398 }
399
400 // Retrieve the annotation's contents and author.
401 static constexpr char kContentsKey[] = "Contents";
402 static constexpr char kAuthorKey[] = "T";
403 unsigned long length_bytes =
404 FPDFAnnot_GetStringValue(annot.get(), kContentsKey, nullptr, 0);
405 std::vector<FPDF_WCHAR> buf = GetFPDFWideStringBuffer(length_bytes);
406 FPDFAnnot_GetStringValue(annot.get(), kContentsKey, buf.data(),
407 length_bytes);
408 fprintf(fp, "Content: %ls\n", GetPlatformWString(buf.data()).c_str());
409 length_bytes =
410 FPDFAnnot_GetStringValue(annot.get(), kAuthorKey, nullptr, 0);
411 buf = GetFPDFWideStringBuffer(length_bytes);
412 FPDFAnnot_GetStringValue(annot.get(), kAuthorKey, buf.data(), length_bytes);
413 fprintf(fp, "Author: %ls\n", GetPlatformWString(buf.data()).c_str());
414
415 // Retrieve the annotation's quadpoints if it is a markup annotation.
416 if (FPDFAnnot_HasAttachmentPoints(annot.get())) {
417 size_t qp_count = FPDFAnnot_CountAttachmentPoints(annot.get());
418 fprintf(fp, "Number of quadpoints sets: %zu\n", qp_count);
419
420 // Iterate through all quadpoints of the current annotation
421 for (size_t j = 0; j < qp_count; ++j) {
422 FS_QUADPOINTSF quadpoints;
423 if (FPDFAnnot_GetAttachmentPoints(annot.get(), j, &quadpoints)) {
424 fprintf(fp,
425 "Quadpoints set #%zu: (%.3f, %.3f), (%.3f, %.3f), "
426 "(%.3f, %.3f), (%.3f, %.3f)\n",
427 j + 1, quadpoints.x1, quadpoints.y1, quadpoints.x2,
428 quadpoints.y2, quadpoints.x3, quadpoints.y3, quadpoints.x4,
429 quadpoints.y4);
430 } else {
431 fprintf(fp, "Failed to retrieve quadpoints set #%zu.\n", j + 1);
432 }
433 }
434 }
435
436 // Retrieve the annotation's rectangle coordinates.
437 FS_RECTF rect;
438 if (FPDFAnnot_GetRect(annot.get(), &rect)) {
439 fprintf(fp, "Rectangle: l - %.3f, b - %.3f, r - %.3f, t - %.3f\n\n",
440 rect.left, rect.bottom, rect.right, rect.top);
441 } else {
442 fprintf(fp, "Failed to retrieve annotation rectangle.\n");
443 }
444 }
445
446 (void)fclose(fp);
447 }
448
WritePng(const char * pdf_name,int num,void * buffer,int stride,int width,int height)449 std::string WritePng(const char* pdf_name,
450 int num,
451 void* buffer,
452 int stride,
453 int width,
454 int height) {
455 if (!CheckDimensions(stride, width, height)) {
456 return "";
457 }
458
459 auto input =
460 pdfium::make_span(static_cast<uint8_t*>(buffer), stride * height);
461 std::vector<uint8_t> png_encoding =
462 EncodePng(input, width, height, stride, FPDFBitmap_BGRA);
463 if (png_encoding.empty()) {
464 fprintf(stderr, "Failed to convert bitmap to PNG\n");
465 return "";
466 }
467
468 std::string filename = GeneratePageOutputFilename(pdf_name, num, "png");
469 if (filename.empty()) {
470 return std::string();
471 }
472 FILE* fp = fopen(filename.c_str(), "wb");
473 if (!fp) {
474 fprintf(stderr, "Failed to open %s for output\n", filename.c_str());
475 return std::string();
476 }
477
478 size_t bytes_written =
479 fwrite(&png_encoding.front(), 1, png_encoding.size(), fp);
480 if (bytes_written != png_encoding.size()) {
481 fprintf(stderr, "Failed to write to %s\n", filename.c_str());
482 }
483
484 (void)fclose(fp);
485 return filename;
486 }
487
488 #ifdef _WIN32
WriteBmp(const char * pdf_name,int num,void * buffer,int stride,int width,int height)489 std::string WriteBmp(const char* pdf_name,
490 int num,
491 void* buffer,
492 int stride,
493 int width,
494 int height) {
495 if (!CheckDimensions(stride, width, height)) {
496 return std::string();
497 }
498
499 int out_len = stride * height;
500 if (out_len > INT_MAX / 3) {
501 return std::string();
502 }
503
504 std::string filename = GeneratePageOutputFilename(pdf_name, num, "bmp");
505 if (filename.empty()) {
506 return std::string();
507 }
508 FILE* fp = fopen(filename.c_str(), "wb");
509 if (!fp) {
510 return std::string();
511 }
512
513 BITMAPINFO bmi = {};
514 bmi.bmiHeader.biSize = sizeof(bmi) - sizeof(RGBQUAD);
515 bmi.bmiHeader.biWidth = width;
516 bmi.bmiHeader.biHeight = -height; // top-down image
517 bmi.bmiHeader.biPlanes = 1;
518 bmi.bmiHeader.biBitCount = 32;
519 bmi.bmiHeader.biCompression = BI_RGB;
520 bmi.bmiHeader.biSizeImage = 0;
521
522 BITMAPFILEHEADER file_header = {};
523 file_header.bfType = 0x4d42;
524 file_header.bfSize = sizeof(file_header) + bmi.bmiHeader.biSize + out_len;
525 file_header.bfOffBits = file_header.bfSize - out_len;
526
527 if (fwrite(&file_header, sizeof(file_header), 1, fp) != 1 ||
528 fwrite(&bmi, bmi.bmiHeader.biSize, 1, fp) != 1 ||
529 fwrite(buffer, out_len, 1, fp) != 1) {
530 fprintf(stderr, "Failed to write to %s\n", filename.c_str());
531 }
532 fclose(fp);
533 return filename;
534 }
535
WriteEmf(FPDF_PAGE page,const char * pdf_name,int num)536 void WriteEmf(FPDF_PAGE page, const char* pdf_name, int num) {
537 std::string filename = GeneratePageOutputFilename(pdf_name, num, "emf");
538 if (filename.empty()) {
539 return;
540 }
541
542 HDC dc = CreateEnhMetaFileA(nullptr, filename.c_str(), nullptr, nullptr);
543
544 int width = static_cast<int>(FPDF_GetPageWidthF(page));
545 int height = static_cast<int>(FPDF_GetPageHeightF(page));
546 HRGN rgn = CreateRectRgn(0, 0, width, height);
547 SelectClipRgn(dc, rgn);
548 DeleteObject(rgn);
549
550 SelectObject(dc, GetStockObject(NULL_PEN));
551 SelectObject(dc, GetStockObject(WHITE_BRUSH));
552 // If a PS_NULL pen is used, the dimensions of the rectangle are 1 pixel less.
553 Rectangle(dc, 0, 0, width + 1, height + 1);
554
555 FPDF_RenderPage(dc, page, 0, 0, width, height, 0, FPDF_ANNOT | FPDF_PRINTING);
556
557 DeleteEnhMetaFile(CloseEnhMetaFile(dc));
558 }
559
WritePS(FPDF_PAGE page,const char * pdf_name,int num)560 void WritePS(FPDF_PAGE page, const char* pdf_name, int num) {
561 std::string filename = GeneratePageOutputFilename(pdf_name, num, "ps");
562 if (filename.empty()) {
563 return;
564 }
565 FILE* fp = fopen(filename.c_str(), "wb");
566 if (!fp) {
567 return;
568 }
569
570 HDC dc = CreateEnhMetaFileA(nullptr, nullptr, nullptr, nullptr);
571
572 int width = static_cast<int>(FPDF_GetPageWidthF(page));
573 int height = static_cast<int>(FPDF_GetPageHeightF(page));
574 FPDF_RenderPage(dc, page, 0, 0, width, height, 0, FPDF_ANNOT | FPDF_PRINTING);
575
576 HENHMETAFILE emf = CloseEnhMetaFile(dc);
577 std::vector<const ENHMETARECORD*> items;
578 EnumEnhMetaFile(nullptr, emf, &EnhMetaFileProc, &items, nullptr);
579 for (const ENHMETARECORD* record : items) {
580 if (record->iType != EMR_GDICOMMENT) {
581 continue;
582 }
583
584 const auto* comment = reinterpret_cast<const EMRGDICOMMENT*>(record);
585 const char* data = reinterpret_cast<const char*>(comment->Data);
586 uint16_t size = *reinterpret_cast<const uint16_t*>(data);
587 if (fwrite(data + sizeof(uint16_t), size, 1, fp) != 1) {
588 fprintf(stderr, "Failed to write to %s\n", filename.c_str());
589 break;
590 }
591 }
592 fclose(fp);
593 DeleteEnhMetaFile(emf);
594 }
595 #endif // _WIN32
596
597 #ifdef PDF_ENABLE_SKIA
WriteToSkWStream(const std::string & pdf_name,int num,const std::string & extension)598 std::unique_ptr<SkWStream> WriteToSkWStream(const std::string& pdf_name,
599 int num,
600 const std::string& extension) {
601 std::string discarded_filename;
602 return WriteToSkWStream(pdf_name, num, extension, discarded_filename);
603 }
604
WriteToSkWStream(const std::string & pdf_name,int num,const std::string & extension,std::string & filename)605 std::unique_ptr<SkWStream> WriteToSkWStream(const std::string& pdf_name,
606 int num,
607 const std::string& extension,
608 std::string& filename) {
609 filename =
610 GeneratePageOutputFilename(pdf_name.c_str(), num, extension.c_str());
611 if (filename.empty()) {
612 return nullptr;
613 }
614
615 auto stream = std::make_unique<SkFILEWStream>(filename.c_str());
616 if (!stream->isValid()) {
617 return nullptr;
618 }
619
620 return stream;
621 }
622
WriteSkp(const char * pdf_name,int num,const SkPicture & picture)623 std::string WriteSkp(const char* pdf_name, int num, const SkPicture& picture) {
624 std::string filename;
625 std::unique_ptr<SkWStream> stream =
626 WriteToSkWStream(pdf_name, num, "skp", filename);
627 if (!stream) {
628 return "";
629 }
630
631 picture.serialize(stream.get());
632 return filename;
633 }
634 #endif // PDF_ENABLE_SKIA
635
636 enum class ThumbnailDecodeType { kBitmap, kRawStream, kDecodedStream };
637
GetThumbnailFilename(char * name_buf,size_t name_buf_size,const char * pdf_name,int page_num,ThumbnailDecodeType decode_type)638 bool GetThumbnailFilename(char* name_buf,
639 size_t name_buf_size,
640 const char* pdf_name,
641 int page_num,
642 ThumbnailDecodeType decode_type) {
643 const char* format;
644 switch (decode_type) {
645 case ThumbnailDecodeType::kBitmap:
646 format = "%s.thumbnail.%d.png";
647 break;
648 case ThumbnailDecodeType::kDecodedStream:
649 format = "%s.thumbnail.decoded.%d.bin";
650 break;
651 case ThumbnailDecodeType::kRawStream:
652 format = "%s.thumbnail.raw.%d.bin";
653 break;
654 }
655
656 int chars_formatted =
657 snprintf(name_buf, name_buf_size, format, pdf_name, page_num);
658 if (chars_formatted < 0 ||
659 static_cast<size_t>(chars_formatted) >= name_buf_size) {
660 fprintf(stderr, "Filename %s for saving is too long.\n", name_buf);
661 return false;
662 }
663
664 return true;
665 }
666
WriteBufferToFile(const void * buf,size_t buflen,const char * filename,const char * filetype)667 void WriteBufferToFile(const void* buf,
668 size_t buflen,
669 const char* filename,
670 const char* filetype) {
671 FILE* fp = fopen(filename, "wb");
672 if (!fp) {
673 fprintf(stderr, "Failed to open %s for saving %s.", filename, filetype);
674 return;
675 }
676
677 size_t bytes_written = fwrite(buf, 1, buflen, fp);
678 if (bytes_written == buflen) {
679 fprintf(stderr, "Successfully wrote %s %s.\n", filetype, filename);
680 } else {
681 fprintf(stderr, "Failed to write to %s.\n", filename);
682 }
683 fclose(fp);
684 }
685
EncodeBitmapToPng(ScopedFPDFBitmap bitmap)686 std::vector<uint8_t> EncodeBitmapToPng(ScopedFPDFBitmap bitmap) {
687 std::vector<uint8_t> png_encoding;
688 int format = FPDFBitmap_GetFormat(bitmap.get());
689 if (format == FPDFBitmap_Unknown) {
690 return png_encoding;
691 }
692
693 int width = FPDFBitmap_GetWidth(bitmap.get());
694 int height = FPDFBitmap_GetHeight(bitmap.get());
695 int stride = FPDFBitmap_GetStride(bitmap.get());
696 if (!CheckDimensions(stride, width, height)) {
697 return png_encoding;
698 }
699
700 auto input = pdfium::make_span(
701 static_cast<const uint8_t*>(FPDFBitmap_GetBuffer(bitmap.get())),
702 stride * height);
703
704 png_encoding = EncodePng(input, width, height, stride, format);
705 return png_encoding;
706 }
707
WriteAttachments(FPDF_DOCUMENT doc,const std::string & name)708 void WriteAttachments(FPDF_DOCUMENT doc, const std::string& name) {
709 for (int i = 0; i < FPDFDoc_GetAttachmentCount(doc); ++i) {
710 FPDF_ATTACHMENT attachment = FPDFDoc_GetAttachment(doc, i);
711
712 // Retrieve the attachment file name.
713 std::string attachment_name;
714 unsigned long length_bytes = FPDFAttachment_GetName(attachment, nullptr, 0);
715 if (length_bytes) {
716 std::vector<FPDF_WCHAR> buf = GetFPDFWideStringBuffer(length_bytes);
717 unsigned long actual_length_bytes =
718 FPDFAttachment_GetName(attachment, buf.data(), length_bytes);
719 if (actual_length_bytes == length_bytes) {
720 attachment_name = GetPlatformString(buf.data());
721 }
722 }
723 if (attachment_name.empty()) {
724 fprintf(stderr, "Attachment #%d has an empty file name.\n", i + 1);
725 continue;
726 }
727
728 // Calculate the full attachment file name.
729 char save_name[256];
730 int chars_formatted =
731 snprintf(save_name, sizeof(save_name), "%s.attachment.%s", name.c_str(),
732 attachment_name.c_str());
733 if (chars_formatted < 0 ||
734 static_cast<size_t>(chars_formatted) >= sizeof(save_name)) {
735 fprintf(stderr, "Filename %s is too long.\n", save_name);
736 continue;
737 }
738
739 // Retrieve the attachment.
740 if (!FPDFAttachment_GetFile(attachment, nullptr, 0, &length_bytes)) {
741 fprintf(stderr, "Failed to retrieve attachment \"%s\".\n",
742 attachment_name.c_str());
743 continue;
744 }
745
746 std::vector<char> data_buf(length_bytes);
747 if (length_bytes) {
748 unsigned long actual_length_bytes;
749 if (!FPDFAttachment_GetFile(attachment, data_buf.data(), length_bytes,
750 &actual_length_bytes)) {
751 fprintf(stderr, "Failed to retrieve attachment \"%s\".\n",
752 attachment_name.c_str());
753 continue;
754 }
755 }
756
757 // Write the attachment file. Since a PDF document could have 0-byte files
758 // as attachments, we should allow saving the 0-byte attachments to files.
759 WriteBufferToFile(data_buf.data(), length_bytes, save_name, "attachment");
760 }
761 }
762
WriteImages(FPDF_PAGE page,const char * pdf_name,int page_num)763 void WriteImages(FPDF_PAGE page, const char* pdf_name, int page_num) {
764 for (int i = 0; i < FPDFPage_CountObjects(page); ++i) {
765 FPDF_PAGEOBJECT obj = FPDFPage_GetObject(page, i);
766 if (FPDFPageObj_GetType(obj) != FPDF_PAGEOBJ_IMAGE) {
767 continue;
768 }
769
770 ScopedFPDFBitmap bitmap(FPDFImageObj_GetBitmap(obj));
771 if (!bitmap) {
772 fprintf(stderr, "Image object #%d on page #%d has an empty bitmap.\n",
773 i + 1, page_num + 1);
774 continue;
775 }
776
777 std::string filename =
778 GenerateImageOutputFilename(pdf_name, page_num, i, "png");
779 if (filename.empty()) {
780 continue;
781 }
782
783 std::vector<uint8_t> png_encoding = EncodeBitmapToPng(std::move(bitmap));
784 if (png_encoding.empty()) {
785 fprintf(stderr,
786 "Failed to convert image object #%d, on page #%d to png.\n",
787 i + 1, page_num + 1);
788 continue;
789 }
790
791 WriteBufferToFile(&png_encoding.front(), png_encoding.size(),
792 filename.c_str(), "image");
793 }
794 }
795
WriteRenderedImages(FPDF_DOCUMENT doc,FPDF_PAGE page,const char * pdf_name,int page_num)796 void WriteRenderedImages(FPDF_DOCUMENT doc,
797 FPDF_PAGE page,
798 const char* pdf_name,
799 int page_num) {
800 for (int i = 0; i < FPDFPage_CountObjects(page); ++i) {
801 FPDF_PAGEOBJECT obj = FPDFPage_GetObject(page, i);
802 if (FPDFPageObj_GetType(obj) != FPDF_PAGEOBJ_IMAGE) {
803 continue;
804 }
805
806 ScopedFPDFBitmap bitmap(FPDFImageObj_GetRenderedBitmap(doc, page, obj));
807 if (!bitmap) {
808 fprintf(stderr, "Image object #%d on page #%d has an empty bitmap.\n",
809 i + 1, page_num + 1);
810 continue;
811 }
812
813 std::string filename =
814 GenerateImageOutputFilename(pdf_name, page_num, i, "png");
815 if (filename.empty()) {
816 continue;
817 }
818
819 std::vector<uint8_t> png_encoding = EncodeBitmapToPng(std::move(bitmap));
820 if (png_encoding.empty()) {
821 fprintf(stderr,
822 "Failed to convert image object #%d, on page #%d to png.\n",
823 i + 1, page_num + 1);
824 continue;
825 }
826
827 WriteBufferToFile(&png_encoding.front(), png_encoding.size(),
828 filename.c_str(), "image");
829 }
830 }
831
WriteDecodedThumbnailStream(FPDF_PAGE page,const char * pdf_name,int page_num)832 void WriteDecodedThumbnailStream(FPDF_PAGE page,
833 const char* pdf_name,
834 int page_num) {
835 char filename[256];
836 if (!GetThumbnailFilename(filename, sizeof(filename), pdf_name, page_num,
837 ThumbnailDecodeType::kDecodedStream)) {
838 return;
839 }
840
841 unsigned long decoded_data_size =
842 FPDFPage_GetDecodedThumbnailData(page, nullptr, 0u);
843
844 // Only continue if there actually is a thumbnail for this page
845 if (decoded_data_size == 0) {
846 fprintf(stderr, "Failed to get decoded thumbnail for page #%d.\n",
847 page_num + 1);
848 return;
849 }
850
851 std::vector<uint8_t> thumb_buf(decoded_data_size);
852 if (FPDFPage_GetDecodedThumbnailData(
853 page, thumb_buf.data(), decoded_data_size) != decoded_data_size) {
854 fprintf(stderr, "Failed to get decoded thumbnail data for %s.\n", filename);
855 return;
856 }
857
858 WriteBufferToFile(thumb_buf.data(), decoded_data_size, filename,
859 "decoded thumbnail");
860 }
861
WriteRawThumbnailStream(FPDF_PAGE page,const char * pdf_name,int page_num)862 void WriteRawThumbnailStream(FPDF_PAGE page,
863 const char* pdf_name,
864 int page_num) {
865 char filename[256];
866 if (!GetThumbnailFilename(filename, sizeof(filename), pdf_name, page_num,
867 ThumbnailDecodeType::kRawStream)) {
868 return;
869 }
870
871 unsigned long raw_data_size = FPDFPage_GetRawThumbnailData(page, nullptr, 0u);
872
873 // Only continue if there actually is a thumbnail for this page
874 if (raw_data_size == 0) {
875 fprintf(stderr, "Failed to get raw thumbnail data for page #%d.\n",
876 page_num + 1);
877 return;
878 }
879
880 std::vector<uint8_t> thumb_buf(raw_data_size);
881 if (FPDFPage_GetRawThumbnailData(page, thumb_buf.data(), raw_data_size) !=
882 raw_data_size) {
883 fprintf(stderr, "Failed to get raw thumbnail data for %s.\n", filename);
884 return;
885 }
886
887 WriteBufferToFile(thumb_buf.data(), raw_data_size, filename, "raw thumbnail");
888 }
889
WriteThumbnail(FPDF_PAGE page,const char * pdf_name,int page_num)890 void WriteThumbnail(FPDF_PAGE page, const char* pdf_name, int page_num) {
891 char filename[256];
892 if (!GetThumbnailFilename(filename, sizeof(filename), pdf_name, page_num,
893 ThumbnailDecodeType::kBitmap)) {
894 return;
895 }
896
897 ScopedFPDFBitmap thumb_bitmap(FPDFPage_GetThumbnailAsBitmap(page));
898 if (!thumb_bitmap) {
899 fprintf(stderr, "Thumbnail of page #%d has an empty bitmap.\n",
900 page_num + 1);
901 return;
902 }
903
904 std::vector<uint8_t> png_encoding =
905 EncodeBitmapToPng(std::move(thumb_bitmap));
906 if (png_encoding.empty()) {
907 fprintf(stderr, "Failed to convert thumbnail of page #%d to png.\n",
908 page_num + 1);
909 return;
910 }
911
912 WriteBufferToFile(&png_encoding.front(), png_encoding.size(), filename,
913 "thumbnail");
914 }
915