1 // Copyright 2016 The PDFium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include <iterator>
6
7 #include "public/fpdf_structtree.h"
8 #include "testing/embedder_test.h"
9 #include "testing/fx_string_testhelpers.h"
10 #include "third_party/abseil-cpp/absl/types/optional.h"
11
12 class FPDFStructTreeEmbedderTest : public EmbedderTest {};
13
TEST_F(FPDFStructTreeEmbedderTest,GetAltText)14 TEST_F(FPDFStructTreeEmbedderTest, GetAltText) {
15 ASSERT_TRUE(OpenDocument("tagged_alt_text.pdf"));
16 FPDF_PAGE page = LoadPage(0);
17 ASSERT_TRUE(page);
18
19 {
20 ScopedFPDFStructTree struct_tree(FPDF_StructTree_GetForPage(page));
21 ASSERT_TRUE(struct_tree);
22 ASSERT_EQ(1, FPDF_StructTree_CountChildren(struct_tree.get()));
23
24 FPDF_STRUCTELEMENT element =
25 FPDF_StructTree_GetChildAtIndex(struct_tree.get(), -1);
26 EXPECT_FALSE(element);
27 element = FPDF_StructTree_GetChildAtIndex(struct_tree.get(), 1);
28 EXPECT_FALSE(element);
29 element = FPDF_StructTree_GetChildAtIndex(struct_tree.get(), 0);
30 ASSERT_TRUE(element);
31 EXPECT_EQ(-1, FPDF_StructElement_GetMarkedContentID(element));
32 EXPECT_EQ(0U, FPDF_StructElement_GetAltText(element, nullptr, 0));
33
34 ASSERT_EQ(1, FPDF_StructElement_CountChildren(element));
35 FPDF_STRUCTELEMENT child_element =
36 FPDF_StructElement_GetChildAtIndex(element, -1);
37 EXPECT_FALSE(child_element);
38 child_element = FPDF_StructElement_GetChildAtIndex(element, 1);
39 EXPECT_FALSE(child_element);
40 child_element = FPDF_StructElement_GetChildAtIndex(element, 0);
41 ASSERT_TRUE(child_element);
42 EXPECT_EQ(-1, FPDF_StructElement_GetMarkedContentID(child_element));
43 EXPECT_EQ(0U, FPDF_StructElement_GetAltText(child_element, nullptr, 0));
44
45 ASSERT_EQ(1, FPDF_StructElement_CountChildren(child_element));
46 FPDF_STRUCTELEMENT gchild_element =
47 FPDF_StructElement_GetChildAtIndex(child_element, -1);
48 EXPECT_FALSE(gchild_element);
49 gchild_element = FPDF_StructElement_GetChildAtIndex(child_element, 1);
50 EXPECT_FALSE(gchild_element);
51 gchild_element = FPDF_StructElement_GetChildAtIndex(child_element, 0);
52 ASSERT_TRUE(gchild_element);
53 EXPECT_EQ(-1, FPDF_StructElement_GetMarkedContentID(gchild_element));
54 ASSERT_EQ(24U, FPDF_StructElement_GetAltText(gchild_element, nullptr, 0));
55
56 unsigned short buffer[12];
57 memset(buffer, 0, sizeof(buffer));
58 // Deliberately pass in a small buffer size to make sure |buffer| remains
59 // untouched.
60 ASSERT_EQ(24U, FPDF_StructElement_GetAltText(gchild_element, buffer, 1));
61 for (size_t i = 0; i < std::size(buffer); ++i)
62 EXPECT_EQ(0U, buffer[i]);
63
64 EXPECT_EQ(-1, FPDF_StructElement_GetMarkedContentID(gchild_element));
65 ASSERT_EQ(24U, FPDF_StructElement_GetAltText(gchild_element, buffer,
66 sizeof(buffer)));
67 EXPECT_EQ(L"Black Image", GetPlatformWString(buffer));
68
69 ASSERT_EQ(1, FPDF_StructElement_CountChildren(gchild_element));
70 FPDF_STRUCTELEMENT ggchild_element =
71 FPDF_StructElement_GetChildAtIndex(gchild_element, 0);
72 EXPECT_FALSE(ggchild_element);
73 }
74
75 UnloadPage(page);
76 }
77
TEST_F(FPDFStructTreeEmbedderTest,GetActualText)78 TEST_F(FPDFStructTreeEmbedderTest, GetActualText) {
79 ASSERT_TRUE(OpenDocument("tagged_actual_text.pdf"));
80 FPDF_PAGE page = LoadPage(0);
81 ASSERT_TRUE(page);
82
83 {
84 ScopedFPDFStructTree struct_tree(FPDF_StructTree_GetForPage(page));
85 ASSERT_TRUE(struct_tree);
86 ASSERT_EQ(1, FPDF_StructTree_CountChildren(struct_tree.get()));
87
88 EXPECT_EQ(0U, FPDF_StructElement_GetActualText(nullptr, nullptr, 0));
89
90 FPDF_STRUCTELEMENT element =
91 FPDF_StructTree_GetChildAtIndex(struct_tree.get(), 0);
92 ASSERT_TRUE(element);
93 EXPECT_EQ(0U, FPDF_StructElement_GetActualText(element, nullptr, 0));
94
95 ASSERT_EQ(1, FPDF_StructElement_CountChildren(element));
96 FPDF_STRUCTELEMENT child_element =
97 FPDF_StructElement_GetChildAtIndex(element, 0);
98 ASSERT_TRUE(child_element);
99 EXPECT_EQ(0U, FPDF_StructElement_GetActualText(child_element, nullptr, 0));
100
101 ASSERT_EQ(1, FPDF_StructElement_CountChildren(child_element));
102 FPDF_STRUCTELEMENT gchild_element =
103 FPDF_StructElement_GetChildAtIndex(child_element, 0);
104 ASSERT_TRUE(gchild_element);
105 ASSERT_EQ(24U,
106 FPDF_StructElement_GetActualText(gchild_element, nullptr, 0));
107
108 unsigned short buffer[12] = {};
109 // Deliberately pass in a small buffer size to make sure |buffer| remains
110 // untouched.
111 ASSERT_EQ(24U, FPDF_StructElement_GetActualText(gchild_element, buffer, 1));
112 for (size_t i = 0; i < std::size(buffer); ++i)
113 EXPECT_EQ(0U, buffer[i]);
114 ASSERT_EQ(24U, FPDF_StructElement_GetActualText(gchild_element, buffer,
115 sizeof(buffer)));
116 EXPECT_EQ(L"Actual Text", GetPlatformWString(buffer));
117 }
118
119 UnloadPage(page);
120 }
121
TEST_F(FPDFStructTreeEmbedderTest,GetStringAttribute)122 TEST_F(FPDFStructTreeEmbedderTest, GetStringAttribute) {
123 ASSERT_TRUE(OpenDocument("tagged_table.pdf"));
124 FPDF_PAGE page = LoadPage(0);
125 ASSERT_TRUE(page);
126
127 {
128 ScopedFPDFStructTree struct_tree(FPDF_StructTree_GetForPage(page));
129 ASSERT_TRUE(struct_tree);
130 ASSERT_EQ(1, FPDF_StructTree_CountChildren(struct_tree.get()));
131
132 FPDF_STRUCTELEMENT document =
133 FPDF_StructTree_GetChildAtIndex(struct_tree.get(), 0);
134 ASSERT_TRUE(document);
135
136 constexpr int kBufLen = 100;
137 uint16_t buffer[kBufLen] = {0};
138 EXPECT_EQ(18U, FPDF_StructElement_GetType(document, buffer, kBufLen));
139 EXPECT_EQ("Document", GetPlatformString(buffer));
140
141 ASSERT_EQ(1, FPDF_StructElement_CountChildren(document));
142 FPDF_STRUCTELEMENT table = FPDF_StructElement_GetChildAtIndex(document, 0);
143 ASSERT_TRUE(table);
144
145 EXPECT_EQ(12U, FPDF_StructElement_GetType(table, buffer, kBufLen));
146 EXPECT_EQ("Table", GetPlatformString(buffer));
147
148 // The table should have an attribute "Summary" set to the empty string.
149 EXPECT_EQ(2U, FPDF_StructElement_GetStringAttribute(table, "Summary",
150 buffer, kBufLen));
151
152 ASSERT_EQ(2, FPDF_StructElement_CountChildren(table));
153 FPDF_STRUCTELEMENT row = FPDF_StructElement_GetChildAtIndex(table, 0);
154 ASSERT_TRUE(row);
155
156 ASSERT_EQ(2, FPDF_StructElement_CountChildren(row));
157 FPDF_STRUCTELEMENT header_cell = FPDF_StructElement_GetChildAtIndex(row, 0);
158 ASSERT_TRUE(header_cell);
159
160 EXPECT_EQ(6U, FPDF_StructElement_GetType(header_cell, buffer, kBufLen));
161 EXPECT_EQ("TH", GetPlatformString(buffer));
162
163 // The header should have an attribute "Scope" with a scope of "Row".
164 EXPECT_EQ(8U, FPDF_StructElement_GetStringAttribute(header_cell, "Scope",
165 buffer, kBufLen));
166 EXPECT_EQ("Row", GetPlatformString(buffer));
167
168 // The header has an attribute "ColSpan", but it's not a string so it
169 // returns null.
170 EXPECT_EQ(0U, FPDF_StructElement_GetStringAttribute(header_cell, "ColSpan",
171 buffer, kBufLen));
172
173 // An unsupported attribute should return 0.
174 EXPECT_EQ(0U, FPDF_StructElement_GetStringAttribute(header_cell, "Other",
175 buffer, kBufLen));
176
177 // A null struct element should not crash.
178 EXPECT_EQ(0U, FPDF_StructElement_GetStringAttribute(nullptr, "Other",
179 buffer, kBufLen));
180 }
181
182 UnloadPage(page);
183 }
184
TEST_F(FPDFStructTreeEmbedderTest,GetStringAttributeBadStructElement)185 TEST_F(FPDFStructTreeEmbedderTest, GetStringAttributeBadStructElement) {
186 ASSERT_TRUE(OpenDocument("tagged_table_bad_elem.pdf"));
187 FPDF_PAGE page = LoadPage(0);
188 ASSERT_TRUE(page);
189
190 {
191 ScopedFPDFStructTree struct_tree(FPDF_StructTree_GetForPage(page));
192 ASSERT_TRUE(struct_tree);
193 ASSERT_EQ(1, FPDF_StructTree_CountChildren(struct_tree.get()));
194
195 FPDF_STRUCTELEMENT document =
196 FPDF_StructTree_GetChildAtIndex(struct_tree.get(), 0);
197 ASSERT_TRUE(document);
198
199 constexpr int kBufLen = 100;
200 uint16_t buffer[kBufLen] = {0};
201 EXPECT_EQ(18U, FPDF_StructElement_GetType(document, buffer, kBufLen));
202 EXPECT_EQ("Document", GetPlatformString(buffer));
203
204 // The table can be retrieved, even though it does not have /Type.
205 ASSERT_EQ(1, FPDF_StructElement_CountChildren(document));
206 FPDF_STRUCTELEMENT table = FPDF_StructElement_GetChildAtIndex(document, 0);
207 ASSERT_TRUE(table);
208
209 EXPECT_EQ(12U, FPDF_StructElement_GetType(table, buffer, kBufLen));
210 EXPECT_EQ("Table", GetPlatformString(buffer));
211
212 // The table entry cannot be retrieved, as the element is malformed.
213 EXPECT_EQ(0U, FPDF_StructElement_GetStringAttribute(table, "Summary",
214 buffer, kBufLen));
215
216 // The row can be retrieved, even though it had an invalid /Type.
217 ASSERT_EQ(1, FPDF_StructElement_CountChildren(table));
218 FPDF_STRUCTELEMENT row = FPDF_StructElement_GetChildAtIndex(table, 0);
219 EXPECT_TRUE(row);
220 }
221
222 UnloadPage(page);
223 }
224
TEST_F(FPDFStructTreeEmbedderTest,GetID)225 TEST_F(FPDFStructTreeEmbedderTest, GetID) {
226 ASSERT_TRUE(OpenDocument("tagged_table.pdf"));
227 FPDF_PAGE page = LoadPage(0);
228 ASSERT_TRUE(page);
229
230 {
231 ScopedFPDFStructTree struct_tree(FPDF_StructTree_GetForPage(page));
232 ASSERT_TRUE(struct_tree);
233 ASSERT_EQ(1, FPDF_StructTree_CountChildren(struct_tree.get()));
234
235 FPDF_STRUCTELEMENT document =
236 FPDF_StructTree_GetChildAtIndex(struct_tree.get(), 0);
237 ASSERT_TRUE(document);
238
239 constexpr int kBufLen = 100;
240 uint16_t buffer[kBufLen] = {0};
241 EXPECT_EQ(18U, FPDF_StructElement_GetType(document, buffer, kBufLen));
242 EXPECT_EQ("Document", GetPlatformString(buffer));
243
244 // The document has no ID.
245 EXPECT_EQ(0U, FPDF_StructElement_GetID(document, buffer, kBufLen));
246
247 ASSERT_EQ(1, FPDF_StructElement_CountChildren(document));
248 FPDF_STRUCTELEMENT table = FPDF_StructElement_GetChildAtIndex(document, 0);
249 ASSERT_TRUE(table);
250
251 EXPECT_EQ(12U, FPDF_StructElement_GetType(table, buffer, kBufLen));
252 EXPECT_EQ("Table", GetPlatformString(buffer));
253
254 // The table has an ID.
255 EXPECT_EQ(14U, FPDF_StructElement_GetID(table, buffer, kBufLen));
256 EXPECT_EQ("node12", GetPlatformString(buffer));
257
258 // The first child of the table is a row, which has an empty ID.
259 // It returns 2U, the length of an empty string, instead of 0U,
260 // representing null.
261 ASSERT_EQ(2, FPDF_StructElement_CountChildren(table));
262 FPDF_STRUCTELEMENT row = FPDF_StructElement_GetChildAtIndex(table, 0);
263 ASSERT_TRUE(row);
264 EXPECT_EQ(2U, FPDF_StructElement_GetID(row, buffer, kBufLen));
265 }
266
267 UnloadPage(page);
268 }
269
TEST_F(FPDFStructTreeEmbedderTest,GetLang)270 TEST_F(FPDFStructTreeEmbedderTest, GetLang) {
271 ASSERT_TRUE(OpenDocument("tagged_table.pdf"));
272 FPDF_PAGE page = LoadPage(0);
273 ASSERT_TRUE(page);
274
275 {
276 ScopedFPDFStructTree struct_tree(FPDF_StructTree_GetForPage(page));
277 ASSERT_TRUE(struct_tree);
278 ASSERT_EQ(1, FPDF_StructTree_CountChildren(struct_tree.get()));
279
280 FPDF_STRUCTELEMENT document =
281 FPDF_StructTree_GetChildAtIndex(struct_tree.get(), 0);
282 ASSERT_TRUE(document);
283
284 constexpr int kBufLen = 100;
285 uint16_t buffer[kBufLen] = {0};
286 EXPECT_EQ(18U, FPDF_StructElement_GetType(document, buffer, kBufLen));
287 EXPECT_EQ("Document", GetPlatformString(buffer));
288
289 // Nullptr test
290 EXPECT_EQ(0U, FPDF_StructElement_GetLang(nullptr, buffer, kBufLen));
291
292 // The document has a language.
293 EXPECT_EQ(12U, FPDF_StructElement_GetLang(document, buffer, kBufLen));
294 EXPECT_EQ("en-US", GetPlatformString(buffer));
295
296 ASSERT_EQ(1, FPDF_StructElement_CountChildren(document));
297 FPDF_STRUCTELEMENT table = FPDF_StructElement_GetChildAtIndex(document, 0);
298 ASSERT_TRUE(table);
299
300 // The first child is a table, with a language.
301 EXPECT_EQ(12U, FPDF_StructElement_GetType(table, buffer, kBufLen));
302 EXPECT_EQ("Table", GetPlatformString(buffer));
303
304 EXPECT_EQ(6U, FPDF_StructElement_GetLang(table, buffer, kBufLen));
305 EXPECT_EQ("hu", GetPlatformString(buffer));
306
307 // The first child of the table is a row, which doesn't have a
308 // language explicitly set on it.
309 ASSERT_EQ(2, FPDF_StructElement_CountChildren(table));
310 FPDF_STRUCTELEMENT row = FPDF_StructElement_GetChildAtIndex(table, 0);
311 ASSERT_TRUE(row);
312 EXPECT_EQ(0U, FPDF_StructElement_GetLang(row, buffer, kBufLen));
313 }
314
315 UnloadPage(page);
316 }
317
318 // See also FPDFEditEmbedderTest.TraverseMarkedContentID, which traverses the
319 // marked contents using FPDFPageObj_GetMark() and related API.
TEST_F(FPDFStructTreeEmbedderTest,GetMarkedContentID)320 TEST_F(FPDFStructTreeEmbedderTest, GetMarkedContentID) {
321 ASSERT_TRUE(OpenDocument("marked_content_id.pdf"));
322 FPDF_PAGE page = LoadPage(0);
323 ASSERT_TRUE(page);
324
325 {
326 ScopedFPDFStructTree struct_tree(FPDF_StructTree_GetForPage(page));
327 ASSERT_TRUE(struct_tree);
328 ASSERT_EQ(1, FPDF_StructTree_CountChildren(struct_tree.get()));
329
330 FPDF_STRUCTELEMENT element =
331 FPDF_StructTree_GetChildAtIndex(struct_tree.get(), 0);
332 EXPECT_EQ(0, FPDF_StructElement_GetMarkedContentID(element));
333 }
334
335 UnloadPage(page);
336 }
337
TEST_F(FPDFStructTreeEmbedderTest,GetMarkedContentIdAtIndex)338 TEST_F(FPDFStructTreeEmbedderTest, GetMarkedContentIdAtIndex) {
339 ASSERT_TRUE(OpenDocument("tagged_marked_content.pdf"));
340 FPDF_PAGE page = LoadPage(0);
341 ASSERT_TRUE(page);
342
343 {
344 ScopedFPDFStructTree struct_tree(FPDF_StructTree_GetForPage(page));
345 ASSERT_TRUE(struct_tree);
346 ASSERT_EQ(4, FPDF_StructTree_CountChildren(struct_tree.get()));
347
348 // K is an integer MCID
349 FPDF_STRUCTELEMENT child1 =
350 FPDF_StructTree_GetChildAtIndex(struct_tree.get(), 0);
351 ASSERT_TRUE(child1);
352 // Legacy API
353 EXPECT_EQ(0, FPDF_StructElement_GetMarkedContentID(child1));
354
355 // K is a dict containing MCR object reference
356 FPDF_STRUCTELEMENT child2 =
357 FPDF_StructTree_GetChildAtIndex(struct_tree.get(), 1);
358 ASSERT_TRUE(child2);
359
360 // K is an array containing dict MCR object reference and integer MCID
361 FPDF_STRUCTELEMENT child3 =
362 FPDF_StructTree_GetChildAtIndex(struct_tree.get(), 2);
363 ASSERT_TRUE(child3);
364
365 // K does not exist
366 FPDF_STRUCTELEMENT child4 =
367 FPDF_StructTree_GetChildAtIndex(struct_tree.get(), 3);
368 ASSERT_TRUE(child4);
369
370 // New APIs
371 EXPECT_EQ(-1, FPDF_StructElement_GetMarkedContentIdCount(nullptr));
372 EXPECT_EQ(-1, FPDF_StructElement_GetMarkedContentIdAtIndex(nullptr, 0));
373 EXPECT_EQ(-1, FPDF_StructElement_GetMarkedContentIdAtIndex(child1, -1));
374 EXPECT_EQ(-1, FPDF_StructElement_GetMarkedContentIdAtIndex(child1, 1));
375 EXPECT_EQ(1, FPDF_StructElement_GetMarkedContentIdCount(child1));
376 EXPECT_EQ(0, FPDF_StructElement_GetMarkedContentIdAtIndex(child1, 0));
377
378 EXPECT_EQ(1, FPDF_StructElement_GetMarkedContentIdCount(child2));
379 EXPECT_EQ(1, FPDF_StructElement_GetMarkedContentIdAtIndex(child2, 0));
380
381 EXPECT_EQ(2, FPDF_StructElement_GetMarkedContentIdCount(child3));
382 EXPECT_EQ(2, FPDF_StructElement_GetMarkedContentIdAtIndex(child3, 0));
383 EXPECT_EQ(3, FPDF_StructElement_GetMarkedContentIdAtIndex(child3, 1));
384
385 EXPECT_EQ(-1, FPDF_StructElement_GetMarkedContentIdCount(child4));
386 EXPECT_EQ(-1, FPDF_StructElement_GetMarkedContentIdAtIndex(child4, 0));
387 }
388
389 UnloadPage(page);
390 }
391
TEST_F(FPDFStructTreeEmbedderTest,GetType)392 TEST_F(FPDFStructTreeEmbedderTest, GetType) {
393 ASSERT_TRUE(OpenDocument("tagged_alt_text.pdf"));
394 FPDF_PAGE page = LoadPage(0);
395 ASSERT_TRUE(page);
396
397 {
398 ScopedFPDFStructTree struct_tree(FPDF_StructTree_GetForPage(page));
399 ASSERT_TRUE(struct_tree);
400 ASSERT_EQ(1, FPDF_StructTree_CountChildren(struct_tree.get()));
401
402 FPDF_STRUCTELEMENT element =
403 FPDF_StructTree_GetChildAtIndex(struct_tree.get(), 0);
404 ASSERT_TRUE(element);
405
406 // test nullptr inputs
407 unsigned short buffer[12];
408 ASSERT_EQ(0U, FPDF_StructElement_GetType(nullptr, buffer, sizeof(buffer)));
409 ASSERT_EQ(0U, FPDF_StructElement_GetType(nullptr, nullptr, 0));
410 ASSERT_EQ(18U, FPDF_StructElement_GetType(element, nullptr, 0));
411
412 memset(buffer, 0, sizeof(buffer));
413 // Deliberately pass in a small buffer size to make sure |buffer| remains
414 // untouched.
415 ASSERT_EQ(18U, FPDF_StructElement_GetType(element, buffer, 1));
416 for (size_t i = 0; i < std::size(buffer); ++i)
417 EXPECT_EQ(0U, buffer[i]);
418
419 ASSERT_EQ(18U, FPDF_StructElement_GetType(element, buffer, sizeof(buffer)));
420 EXPECT_EQ(L"Document", GetPlatformWString(buffer));
421 }
422
423 UnloadPage(page);
424 }
425
TEST_F(FPDFStructTreeEmbedderTest,GetObjType)426 TEST_F(FPDFStructTreeEmbedderTest, GetObjType) {
427 ASSERT_TRUE(OpenDocument("tagged_table_bad_elem.pdf"));
428 FPDF_PAGE page = LoadPage(0);
429 ASSERT_TRUE(page);
430
431 {
432 ScopedFPDFStructTree struct_tree(FPDF_StructTree_GetForPage(page));
433 ASSERT_TRUE(struct_tree);
434 ASSERT_EQ(1, FPDF_StructTree_CountChildren(struct_tree.get()));
435
436 FPDF_STRUCTELEMENT child =
437 FPDF_StructTree_GetChildAtIndex(struct_tree.get(), 0);
438 ASSERT_TRUE(child);
439
440 // test nullptr inputs
441 unsigned short buffer[28] = {};
442 ASSERT_EQ(0U,
443 FPDF_StructElement_GetObjType(nullptr, buffer, sizeof(buffer)));
444 ASSERT_EQ(0U, FPDF_StructElement_GetObjType(nullptr, nullptr, 0));
445 ASSERT_EQ(22U, FPDF_StructElement_GetObjType(child, nullptr, 0));
446
447 // Deliberately pass in a small buffer size to make sure `buffer` remains
448 // untouched.
449 ASSERT_EQ(22U, FPDF_StructElement_GetObjType(child, buffer, 1));
450 for (size_t i = 0; i < std::size(buffer); ++i)
451 EXPECT_EQ(0U, buffer[i]);
452
453 ASSERT_EQ(22U,
454 FPDF_StructElement_GetObjType(child, buffer, sizeof(buffer)));
455 EXPECT_EQ(L"StructElem", GetPlatformWString(buffer));
456
457 ASSERT_EQ(1, FPDF_StructElement_CountChildren(child));
458 FPDF_STRUCTELEMENT gchild = FPDF_StructElement_GetChildAtIndex(child, 0);
459 memset(buffer, 0, sizeof(buffer));
460 // Missing /Type in `gchild`
461 ASSERT_EQ(0U,
462 FPDF_StructElement_GetObjType(gchild, buffer, sizeof(buffer)));
463 // Buffer is untouched.
464 for (size_t i = 0; i < std::size(buffer); ++i)
465 EXPECT_EQ(0U, buffer[i]);
466
467 ASSERT_EQ(1, FPDF_StructElement_CountChildren(gchild));
468 FPDF_STRUCTELEMENT ggchild = FPDF_StructElement_GetChildAtIndex(gchild, 0);
469 ASSERT_EQ(28U,
470 FPDF_StructElement_GetObjType(ggchild, buffer, sizeof(buffer)));
471 // Reading bad elem also works.
472 EXPECT_EQ(L"NotStructElem", GetPlatformWString(buffer));
473 }
474
475 UnloadPage(page);
476 }
477
TEST_F(FPDFStructTreeEmbedderTest,GetParent)478 TEST_F(FPDFStructTreeEmbedderTest, GetParent) {
479 ASSERT_TRUE(OpenDocument("tagged_alt_text.pdf"));
480 FPDF_PAGE page = LoadPage(0);
481 ASSERT_TRUE(page);
482
483 {
484 ScopedFPDFStructTree struct_tree(FPDF_StructTree_GetForPage(page));
485 ASSERT_TRUE(struct_tree);
486 ASSERT_EQ(1, FPDF_StructTree_CountChildren(struct_tree.get()));
487
488 FPDF_STRUCTELEMENT parent =
489 FPDF_StructTree_GetChildAtIndex(struct_tree.get(), 0);
490 ASSERT_TRUE(parent);
491
492 ASSERT_EQ(1, FPDF_StructElement_CountChildren(parent));
493
494 FPDF_STRUCTELEMENT child = FPDF_StructElement_GetChildAtIndex(parent, 0);
495 ASSERT_TRUE(child);
496
497 // test nullptr inputs
498 ASSERT_EQ(nullptr, FPDF_StructElement_GetParent(nullptr));
499
500 ASSERT_EQ(parent, FPDF_StructElement_GetParent(child));
501
502 // The parent of `parent` is StructTreeRoot and no longer a StructElement.
503 // We currently handle this case by returning a nullptr.
504 ASSERT_EQ(nullptr, FPDF_StructElement_GetParent(parent));
505 }
506
507 UnloadPage(page);
508 }
509
TEST_F(FPDFStructTreeEmbedderTest,GetTitle)510 TEST_F(FPDFStructTreeEmbedderTest, GetTitle) {
511 ASSERT_TRUE(OpenDocument("tagged_alt_text.pdf"));
512 FPDF_PAGE page = LoadPage(0);
513 ASSERT_TRUE(page);
514
515 {
516 ScopedFPDFStructTree struct_tree(FPDF_StructTree_GetForPage(page));
517 ASSERT_TRUE(struct_tree);
518 ASSERT_EQ(1, FPDF_StructTree_CountChildren(struct_tree.get()));
519
520 FPDF_STRUCTELEMENT element =
521 FPDF_StructTree_GetChildAtIndex(struct_tree.get(), 0);
522 ASSERT_TRUE(element);
523
524 // test nullptr inputs
525 unsigned short buffer[13];
526 ASSERT_EQ(0U, FPDF_StructElement_GetTitle(nullptr, buffer, sizeof(buffer)));
527 ASSERT_EQ(0U, FPDF_StructElement_GetTitle(nullptr, nullptr, 0));
528 ASSERT_EQ(20U, FPDF_StructElement_GetTitle(element, nullptr, 0));
529
530 memset(buffer, 0, sizeof(buffer));
531 // Deliberately pass in a small buffer size to make sure |buffer| remains
532 // untouched.
533 ASSERT_EQ(20U, FPDF_StructElement_GetTitle(element, buffer, 1));
534 for (size_t i = 0; i < std::size(buffer); ++i)
535 EXPECT_EQ(0U, buffer[i]);
536
537 ASSERT_EQ(20U,
538 FPDF_StructElement_GetTitle(element, buffer, sizeof(buffer)));
539
540 EXPECT_EQ(L"TitleText", GetPlatformWString(buffer));
541
542 ASSERT_EQ(1, FPDF_StructElement_CountChildren(element));
543 FPDF_STRUCTELEMENT child_element =
544 FPDF_StructElement_GetChildAtIndex(element, 0);
545 ASSERT_TRUE(element);
546
547 ASSERT_EQ(26U, FPDF_StructElement_GetTitle(child_element, buffer,
548 sizeof(buffer)));
549 EXPECT_EQ(L"symbol: 100k", GetPlatformWString(buffer));
550 }
551
552 UnloadPage(page);
553 }
554
TEST_F(FPDFStructTreeEmbedderTest,GetAttributes)555 TEST_F(FPDFStructTreeEmbedderTest, GetAttributes) {
556 ASSERT_TRUE(OpenDocument("tagged_table.pdf"));
557 FPDF_PAGE page = LoadPage(0);
558 ASSERT_TRUE(page);
559
560 {
561 ScopedFPDFStructTree struct_tree(FPDF_StructTree_GetForPage(page));
562 ASSERT_TRUE(struct_tree);
563 ASSERT_EQ(1, FPDF_StructTree_CountChildren(struct_tree.get()));
564
565 FPDF_STRUCTELEMENT document =
566 FPDF_StructTree_GetChildAtIndex(struct_tree.get(), 0);
567 ASSERT_TRUE(document);
568
569 ASSERT_EQ(1, FPDF_StructElement_CountChildren(document));
570 ASSERT_EQ(-1, FPDF_StructElement_GetAttributeCount(document));
571 FPDF_STRUCTELEMENT table = FPDF_StructElement_GetChildAtIndex(document, 0);
572 ASSERT_TRUE(table);
573
574 ASSERT_EQ(2, FPDF_StructElement_CountChildren(table));
575
576 {
577 FPDF_STRUCTELEMENT tr = FPDF_StructElement_GetChildAtIndex(table, 0);
578 ASSERT_TRUE(tr);
579
580 ASSERT_EQ(2, FPDF_StructElement_CountChildren(tr));
581 FPDF_STRUCTELEMENT th = FPDF_StructElement_GetChildAtIndex(tr, 0);
582 ASSERT_TRUE(th);
583
584 ASSERT_EQ(2, FPDF_StructElement_GetAttributeCount(th));
585
586 // nullptr test
587 ASSERT_EQ(nullptr, FPDF_StructElement_GetAttributeAtIndex(document, 0));
588 ASSERT_EQ(nullptr, FPDF_StructElement_GetAttributeAtIndex(document, -1));
589 ASSERT_EQ(nullptr, FPDF_StructElement_GetAttributeAtIndex(th, 2));
590
591 FPDF_STRUCTELEMENT_ATTR attr =
592 FPDF_StructElement_GetAttributeAtIndex(th, 1);
593 ASSERT_TRUE(attr);
594
595 ASSERT_EQ(2, FPDF_StructElement_Attr_GetCount(attr));
596 ASSERT_FALSE(
597 FPDF_StructElement_Attr_GetName(attr, 1, nullptr, 0U, nullptr));
598 unsigned long buffer_len_needed = ULONG_MAX;
599 // Pass buffer = nullptr to obtain the size of the buffer needed,
600 ASSERT_TRUE(FPDF_StructElement_Attr_GetName(attr, 1, nullptr, 0,
601 &buffer_len_needed));
602 EXPECT_EQ(2U, buffer_len_needed);
603 char buffer[8] = {};
604 unsigned long out_len = ULONG_MAX;
605 // Deliberately pass in a small buffer size to make sure `buffer` remains
606 // untouched.
607 ASSERT_TRUE(
608 FPDF_StructElement_Attr_GetName(attr, 1, buffer, 1, &out_len));
609 EXPECT_EQ(2U, out_len);
610 for (size_t i = 0; i < std::size(buffer); ++i)
611 EXPECT_EQ(0, buffer[i]);
612
613 ASSERT_TRUE(FPDF_StructElement_Attr_GetName(attr, 1, buffer,
614 sizeof(buffer), &out_len));
615 EXPECT_EQ(2U, out_len);
616 EXPECT_STREQ("O", buffer);
617 EXPECT_EQ(FPDF_OBJECT_NAME,
618 FPDF_StructElement_Attr_GetType(attr, buffer));
619
620 unsigned short str_val[12] = {};
621 ASSERT_TRUE(FPDF_StructElement_Attr_GetStringValue(
622 attr, buffer, str_val, sizeof(str_val), &out_len));
623 EXPECT_EQ(12U, out_len);
624 EXPECT_EQ(L"Table", GetPlatformWString(str_val));
625
626 memset(buffer, 0, sizeof(buffer));
627 ASSERT_TRUE(FPDF_StructElement_Attr_GetName(attr, 0, buffer,
628 sizeof(buffer), &out_len));
629 EXPECT_EQ(8U, out_len);
630 EXPECT_STREQ("ColSpan", buffer);
631 EXPECT_EQ(FPDF_OBJECT_NUMBER,
632 FPDF_StructElement_Attr_GetType(attr, buffer));
633 float num_val;
634 ASSERT_TRUE(
635 FPDF_StructElement_Attr_GetNumberValue(attr, buffer, &num_val));
636 EXPECT_FLOAT_EQ(2.0f, num_val);
637 }
638
639 {
640 FPDF_STRUCTELEMENT tr = FPDF_StructElement_GetChildAtIndex(table, 1);
641 ASSERT_TRUE(tr);
642
643 ASSERT_EQ(1, FPDF_StructElement_GetAttributeCount(tr));
644 // nullptr when index out of range
645 ASSERT_EQ(nullptr, FPDF_StructElement_GetAttributeAtIndex(tr, 1));
646
647 ASSERT_EQ(2, FPDF_StructElement_CountChildren(tr));
648 FPDF_STRUCTELEMENT td = FPDF_StructElement_GetChildAtIndex(tr, 1);
649 ASSERT_TRUE(td);
650 {
651 // Test counting and obtaining attributes via reference
652 ASSERT_EQ(1, FPDF_StructElement_GetAttributeCount(td));
653 FPDF_STRUCTELEMENT_ATTR attr =
654 FPDF_StructElement_GetAttributeAtIndex(td, 0);
655 ASSERT_TRUE(attr);
656 ASSERT_EQ(4, FPDF_StructElement_Attr_GetCount(attr));
657 // Test string and blob type
658 {
659 char buffer[16] = {};
660 unsigned long out_len = ULONG_MAX;
661 ASSERT_TRUE(FPDF_StructElement_Attr_GetName(
662 attr, 0, buffer, sizeof(buffer), &out_len));
663 EXPECT_EQ(8U, out_len);
664 EXPECT_STREQ("ColProp", buffer);
665
666 EXPECT_EQ(FPDF_OBJECT_STRING,
667 FPDF_StructElement_Attr_GetType(attr, buffer));
668
669 unsigned short str_val[12] = {};
670 ASSERT_TRUE(FPDF_StructElement_Attr_GetStringValue(
671 attr, buffer, str_val, sizeof(str_val), &out_len));
672 EXPECT_EQ(8U, out_len);
673 EXPECT_EQ(L"Sum", GetPlatformWString(str_val));
674
675 char blob_val[3] = {};
676 ASSERT_TRUE(FPDF_StructElement_Attr_GetBlobValue(
677 attr, buffer, blob_val, sizeof(blob_val), &out_len));
678 EXPECT_EQ(3U, out_len);
679 EXPECT_EQ('S', blob_val[0]);
680 EXPECT_EQ('u', blob_val[1]);
681 EXPECT_EQ('m', blob_val[2]);
682 }
683
684 // Test boolean type
685 {
686 char buffer[16] = {};
687 unsigned long out_len = ULONG_MAX;
688 ASSERT_TRUE(FPDF_StructElement_Attr_GetName(
689 attr, 1, buffer, sizeof(buffer), &out_len));
690 EXPECT_EQ(7U, out_len);
691 EXPECT_STREQ("CurUSD", buffer);
692
693 EXPECT_EQ(FPDF_OBJECT_BOOLEAN,
694 FPDF_StructElement_Attr_GetType(attr, buffer));
695 FPDF_BOOL val;
696 ASSERT_TRUE(
697 FPDF_StructElement_Attr_GetBooleanValue(attr, buffer, &val));
698 EXPECT_TRUE(val);
699 }
700
701 // Test reference to number
702 {
703 char buffer[16] = {};
704 unsigned long out_len = ULONG_MAX;
705 ASSERT_TRUE(FPDF_StructElement_Attr_GetName(
706 attr, 3, buffer, sizeof(buffer), &out_len));
707 EXPECT_EQ(8U, out_len);
708 EXPECT_STREQ("RowSpan", buffer);
709
710 EXPECT_EQ(FPDF_OBJECT_REFERENCE,
711 FPDF_StructElement_Attr_GetType(attr, buffer));
712 float val;
713 ASSERT_TRUE(
714 FPDF_StructElement_Attr_GetNumberValue(attr, buffer, &val));
715 EXPECT_FLOAT_EQ(3, val);
716 }
717 }
718 }
719 }
720
721 UnloadPage(page);
722 }
723
TEST_F(FPDFStructTreeEmbedderTest,GetStructTreeForNestedTaggedPDF)724 TEST_F(FPDFStructTreeEmbedderTest, GetStructTreeForNestedTaggedPDF) {
725 ASSERT_TRUE(OpenDocument("tagged_nested.pdf"));
726 FPDF_PAGE page = LoadPage(0);
727 ASSERT_TRUE(page);
728
729 {
730 // This call should not crash. https://crbug.com/pdfium/1480
731 ScopedFPDFStructTree struct_tree(FPDF_StructTree_GetForPage(page));
732 ASSERT_TRUE(struct_tree);
733 }
734 UnloadPage(page);
735 }
736
TEST_F(FPDFStructTreeEmbedderTest,MarkedContentReferenceAndObjectReference)737 TEST_F(FPDFStructTreeEmbedderTest, MarkedContentReferenceAndObjectReference) {
738 ASSERT_TRUE(OpenDocument("tagged_mcr_objr.pdf"));
739 FPDF_PAGE page = LoadPage(0);
740 ASSERT_TRUE(page);
741
742 {
743 ScopedFPDFStructTree struct_tree(FPDF_StructTree_GetForPage(page));
744 ASSERT_TRUE(struct_tree);
745 ASSERT_EQ(1, FPDF_StructTree_CountChildren(struct_tree.get()));
746
747 FPDF_STRUCTELEMENT object8 =
748 FPDF_StructTree_GetChildAtIndex(struct_tree.get(), 0);
749 ASSERT_TRUE(object8);
750 unsigned short buffer[12];
751 ASSERT_EQ(18U, FPDF_StructElement_GetType(object8, buffer, sizeof(buffer)));
752 EXPECT_EQ(L"Document", GetPlatformWString(buffer));
753 EXPECT_EQ(-1, FPDF_StructElement_GetMarkedContentID(object8));
754 ASSERT_EQ(2, FPDF_StructElement_CountChildren(object8));
755
756 // First branch. 10 -> 12 -> 13 -> Inline dict.
757 FPDF_STRUCTELEMENT object10 =
758 FPDF_StructElement_GetChildAtIndex(object8, 0);
759 ASSERT_TRUE(object10);
760 ASSERT_EQ(20U,
761 FPDF_StructElement_GetType(object10, buffer, sizeof(buffer)));
762 EXPECT_EQ(L"NonStruct", GetPlatformWString(buffer));
763 EXPECT_EQ(-1, FPDF_StructElement_GetMarkedContentID(object10));
764 ASSERT_EQ(1, FPDF_StructElement_CountChildren(object10));
765
766 FPDF_STRUCTELEMENT object12 =
767 FPDF_StructElement_GetChildAtIndex(object10, 0);
768 ASSERT_TRUE(object12);
769 ASSERT_EQ(4U, FPDF_StructElement_GetType(object12, buffer, sizeof(buffer)));
770 EXPECT_EQ(L"P", GetPlatformWString(buffer));
771 EXPECT_EQ(-1, FPDF_StructElement_GetMarkedContentID(object12));
772 ASSERT_EQ(1, FPDF_StructElement_CountChildren(object12));
773
774 FPDF_STRUCTELEMENT object13 =
775 FPDF_StructElement_GetChildAtIndex(object12, 0);
776 ASSERT_TRUE(object13);
777 ASSERT_EQ(20U,
778 FPDF_StructElement_GetType(object13, buffer, sizeof(buffer)));
779 EXPECT_EQ(L"NonStruct", GetPlatformWString(buffer));
780 EXPECT_EQ(-1, FPDF_StructElement_GetMarkedContentID(object13));
781 ASSERT_EQ(1, FPDF_StructElement_CountChildren(object13));
782
783 // TODO(crbug.com/pdfium/672): Fetch this child element.
784 EXPECT_FALSE(FPDF_StructElement_GetChildAtIndex(object13, 0));
785
786 // Second branch. 11 -> 14 -> Inline dict.
787 // -> 15 -> Inline dict.
788 FPDF_STRUCTELEMENT object11 =
789 FPDF_StructElement_GetChildAtIndex(object8, 1);
790 ASSERT_TRUE(object11);
791 ASSERT_EQ(4U, FPDF_StructElement_GetType(object11, buffer, sizeof(buffer)));
792 EXPECT_EQ(L"P", GetPlatformWString(buffer));
793 EXPECT_EQ(-1, FPDF_StructElement_GetMarkedContentID(object11));
794 ASSERT_EQ(1, FPDF_StructElement_CountChildren(object11));
795
796 FPDF_STRUCTELEMENT object14 =
797 FPDF_StructElement_GetChildAtIndex(object11, 0);
798 ASSERT_TRUE(object14);
799 ASSERT_EQ(20U,
800 FPDF_StructElement_GetType(object14, buffer, sizeof(buffer)));
801 EXPECT_EQ(L"NonStruct", GetPlatformWString(buffer));
802 EXPECT_EQ(-1, FPDF_StructElement_GetMarkedContentID(object14));
803 ASSERT_EQ(2, FPDF_StructElement_CountChildren(object14));
804
805 // TODO(crbug.com/pdfium/672): Object 15 should be at index 1.
806 EXPECT_FALSE(FPDF_StructElement_GetChildAtIndex(object14, 1));
807 FPDF_STRUCTELEMENT object15 =
808 FPDF_StructElement_GetChildAtIndex(object14, 0);
809 ASSERT_TRUE(object15);
810 ASSERT_EQ(20U,
811 FPDF_StructElement_GetType(object15, buffer, sizeof(buffer)));
812 EXPECT_EQ(L"NonStruct", GetPlatformWString(buffer));
813 EXPECT_EQ(-1, FPDF_StructElement_GetMarkedContentID(object15));
814 ASSERT_EQ(1, FPDF_StructElement_CountChildren(object15));
815
816 // TODO(crbug.com/pdfium/672): Fetch this child element.
817 EXPECT_FALSE(FPDF_StructElement_GetChildAtIndex(object15, 0));
818 }
819
820 UnloadPage(page);
821 }
822
TEST_F(FPDFStructTreeEmbedderTest,Bug1768)823 TEST_F(FPDFStructTreeEmbedderTest, Bug1768) {
824 ASSERT_TRUE(OpenDocument("bug_1768.pdf"));
825 FPDF_PAGE page = LoadPage(0);
826 ASSERT_TRUE(page);
827
828 {
829 ScopedFPDFStructTree struct_tree(FPDF_StructTree_GetForPage(page));
830 ASSERT_TRUE(struct_tree);
831 ASSERT_EQ(1, FPDF_StructTree_CountChildren(struct_tree.get()));
832
833 // TODO(crbug.com/pdfium/1768): Fetch this child element. Then consider
834 // writing more of the test to make sure other elements in the tree can be
835 // fetched correctly as well.
836 EXPECT_FALSE(FPDF_StructTree_GetChildAtIndex(struct_tree.get(), 0));
837 }
838
839 UnloadPage(page);
840 }
841
TEST_F(FPDFStructTreeEmbedderTest,Bug1296920)842 TEST_F(FPDFStructTreeEmbedderTest, Bug1296920) {
843 ASSERT_TRUE(OpenDocument("bug_1296920.pdf"));
844 FPDF_PAGE page = LoadPage(0);
845 ASSERT_TRUE(page);
846
847 {
848 ScopedFPDFStructTree struct_tree(FPDF_StructTree_GetForPage(page));
849 ASSERT_TRUE(struct_tree);
850 ASSERT_EQ(1, FPDF_StructTree_CountChildren(struct_tree.get()));
851
852 // Destroying this tree should not crash.
853 }
854
855 UnloadPage(page);
856 }
857
TEST_F(FPDFStructTreeEmbedderTest,Bug1443100)858 TEST_F(FPDFStructTreeEmbedderTest, Bug1443100) {
859 ASSERT_TRUE(OpenDocument("tagged_table_bad_parent.pdf"));
860 FPDF_PAGE page = LoadPage(0);
861 ASSERT_TRUE(page);
862
863 {
864 // Calling these APIs should not trigger a dangling pointer.
865 ScopedFPDFStructTree struct_tree(FPDF_StructTree_GetForPage(page));
866 ASSERT_TRUE(struct_tree);
867 ASSERT_EQ(1, FPDF_StructTree_CountChildren(struct_tree.get()));
868 }
869
870 UnloadPage(page);
871 }
872