xref: /aosp_15_r20/external/pdfium/fpdfsdk/fpdf_structtree_embeddertest.cpp (revision 3ac0a46f773bac49fa9476ec2b1cf3f8da5ec3a4)
1 // Copyright 2016 The PDFium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include <iterator>
6 
7 #include "public/fpdf_structtree.h"
8 #include "testing/embedder_test.h"
9 #include "testing/fx_string_testhelpers.h"
10 #include "third_party/abseil-cpp/absl/types/optional.h"
11 
12 class FPDFStructTreeEmbedderTest : public EmbedderTest {};
13 
TEST_F(FPDFStructTreeEmbedderTest,GetAltText)14 TEST_F(FPDFStructTreeEmbedderTest, GetAltText) {
15   ASSERT_TRUE(OpenDocument("tagged_alt_text.pdf"));
16   FPDF_PAGE page = LoadPage(0);
17   ASSERT_TRUE(page);
18 
19   {
20     ScopedFPDFStructTree struct_tree(FPDF_StructTree_GetForPage(page));
21     ASSERT_TRUE(struct_tree);
22     ASSERT_EQ(1, FPDF_StructTree_CountChildren(struct_tree.get()));
23 
24     FPDF_STRUCTELEMENT element =
25         FPDF_StructTree_GetChildAtIndex(struct_tree.get(), -1);
26     EXPECT_FALSE(element);
27     element = FPDF_StructTree_GetChildAtIndex(struct_tree.get(), 1);
28     EXPECT_FALSE(element);
29     element = FPDF_StructTree_GetChildAtIndex(struct_tree.get(), 0);
30     ASSERT_TRUE(element);
31     EXPECT_EQ(-1, FPDF_StructElement_GetMarkedContentID(element));
32     EXPECT_EQ(0U, FPDF_StructElement_GetAltText(element, nullptr, 0));
33 
34     ASSERT_EQ(1, FPDF_StructElement_CountChildren(element));
35     FPDF_STRUCTELEMENT child_element =
36         FPDF_StructElement_GetChildAtIndex(element, -1);
37     EXPECT_FALSE(child_element);
38     child_element = FPDF_StructElement_GetChildAtIndex(element, 1);
39     EXPECT_FALSE(child_element);
40     child_element = FPDF_StructElement_GetChildAtIndex(element, 0);
41     ASSERT_TRUE(child_element);
42     EXPECT_EQ(-1, FPDF_StructElement_GetMarkedContentID(child_element));
43     EXPECT_EQ(0U, FPDF_StructElement_GetAltText(child_element, nullptr, 0));
44 
45     ASSERT_EQ(1, FPDF_StructElement_CountChildren(child_element));
46     FPDF_STRUCTELEMENT gchild_element =
47         FPDF_StructElement_GetChildAtIndex(child_element, -1);
48     EXPECT_FALSE(gchild_element);
49     gchild_element = FPDF_StructElement_GetChildAtIndex(child_element, 1);
50     EXPECT_FALSE(gchild_element);
51     gchild_element = FPDF_StructElement_GetChildAtIndex(child_element, 0);
52     ASSERT_TRUE(gchild_element);
53     EXPECT_EQ(-1, FPDF_StructElement_GetMarkedContentID(gchild_element));
54     ASSERT_EQ(24U, FPDF_StructElement_GetAltText(gchild_element, nullptr, 0));
55 
56     unsigned short buffer[12];
57     memset(buffer, 0, sizeof(buffer));
58     // Deliberately pass in a small buffer size to make sure |buffer| remains
59     // untouched.
60     ASSERT_EQ(24U, FPDF_StructElement_GetAltText(gchild_element, buffer, 1));
61     for (size_t i = 0; i < std::size(buffer); ++i)
62       EXPECT_EQ(0U, buffer[i]);
63 
64     EXPECT_EQ(-1, FPDF_StructElement_GetMarkedContentID(gchild_element));
65     ASSERT_EQ(24U, FPDF_StructElement_GetAltText(gchild_element, buffer,
66                                                  sizeof(buffer)));
67     EXPECT_EQ(L"Black Image", GetPlatformWString(buffer));
68 
69     ASSERT_EQ(1, FPDF_StructElement_CountChildren(gchild_element));
70     FPDF_STRUCTELEMENT ggchild_element =
71         FPDF_StructElement_GetChildAtIndex(gchild_element, 0);
72     EXPECT_FALSE(ggchild_element);
73   }
74 
75   UnloadPage(page);
76 }
77 
TEST_F(FPDFStructTreeEmbedderTest,GetActualText)78 TEST_F(FPDFStructTreeEmbedderTest, GetActualText) {
79   ASSERT_TRUE(OpenDocument("tagged_actual_text.pdf"));
80   FPDF_PAGE page = LoadPage(0);
81   ASSERT_TRUE(page);
82 
83   {
84     ScopedFPDFStructTree struct_tree(FPDF_StructTree_GetForPage(page));
85     ASSERT_TRUE(struct_tree);
86     ASSERT_EQ(1, FPDF_StructTree_CountChildren(struct_tree.get()));
87 
88     EXPECT_EQ(0U, FPDF_StructElement_GetActualText(nullptr, nullptr, 0));
89 
90     FPDF_STRUCTELEMENT element =
91         FPDF_StructTree_GetChildAtIndex(struct_tree.get(), 0);
92     ASSERT_TRUE(element);
93     EXPECT_EQ(0U, FPDF_StructElement_GetActualText(element, nullptr, 0));
94 
95     ASSERT_EQ(1, FPDF_StructElement_CountChildren(element));
96     FPDF_STRUCTELEMENT child_element =
97         FPDF_StructElement_GetChildAtIndex(element, 0);
98     ASSERT_TRUE(child_element);
99     EXPECT_EQ(0U, FPDF_StructElement_GetActualText(child_element, nullptr, 0));
100 
101     ASSERT_EQ(1, FPDF_StructElement_CountChildren(child_element));
102     FPDF_STRUCTELEMENT gchild_element =
103         FPDF_StructElement_GetChildAtIndex(child_element, 0);
104     ASSERT_TRUE(gchild_element);
105     ASSERT_EQ(24U,
106               FPDF_StructElement_GetActualText(gchild_element, nullptr, 0));
107 
108     unsigned short buffer[12] = {};
109     // Deliberately pass in a small buffer size to make sure |buffer| remains
110     // untouched.
111     ASSERT_EQ(24U, FPDF_StructElement_GetActualText(gchild_element, buffer, 1));
112     for (size_t i = 0; i < std::size(buffer); ++i)
113       EXPECT_EQ(0U, buffer[i]);
114     ASSERT_EQ(24U, FPDF_StructElement_GetActualText(gchild_element, buffer,
115                                                     sizeof(buffer)));
116     EXPECT_EQ(L"Actual Text", GetPlatformWString(buffer));
117   }
118 
119   UnloadPage(page);
120 }
121 
TEST_F(FPDFStructTreeEmbedderTest,GetStringAttribute)122 TEST_F(FPDFStructTreeEmbedderTest, GetStringAttribute) {
123   ASSERT_TRUE(OpenDocument("tagged_table.pdf"));
124   FPDF_PAGE page = LoadPage(0);
125   ASSERT_TRUE(page);
126 
127   {
128     ScopedFPDFStructTree struct_tree(FPDF_StructTree_GetForPage(page));
129     ASSERT_TRUE(struct_tree);
130     ASSERT_EQ(1, FPDF_StructTree_CountChildren(struct_tree.get()));
131 
132     FPDF_STRUCTELEMENT document =
133         FPDF_StructTree_GetChildAtIndex(struct_tree.get(), 0);
134     ASSERT_TRUE(document);
135 
136     constexpr int kBufLen = 100;
137     uint16_t buffer[kBufLen] = {0};
138     EXPECT_EQ(18U, FPDF_StructElement_GetType(document, buffer, kBufLen));
139     EXPECT_EQ("Document", GetPlatformString(buffer));
140 
141     ASSERT_EQ(1, FPDF_StructElement_CountChildren(document));
142     FPDF_STRUCTELEMENT table = FPDF_StructElement_GetChildAtIndex(document, 0);
143     ASSERT_TRUE(table);
144 
145     EXPECT_EQ(12U, FPDF_StructElement_GetType(table, buffer, kBufLen));
146     EXPECT_EQ("Table", GetPlatformString(buffer));
147 
148     // The table should have an attribute "Summary" set to the empty string.
149     EXPECT_EQ(2U, FPDF_StructElement_GetStringAttribute(table, "Summary",
150                                                         buffer, kBufLen));
151 
152     ASSERT_EQ(2, FPDF_StructElement_CountChildren(table));
153     FPDF_STRUCTELEMENT row = FPDF_StructElement_GetChildAtIndex(table, 0);
154     ASSERT_TRUE(row);
155 
156     ASSERT_EQ(2, FPDF_StructElement_CountChildren(row));
157     FPDF_STRUCTELEMENT header_cell = FPDF_StructElement_GetChildAtIndex(row, 0);
158     ASSERT_TRUE(header_cell);
159 
160     EXPECT_EQ(6U, FPDF_StructElement_GetType(header_cell, buffer, kBufLen));
161     EXPECT_EQ("TH", GetPlatformString(buffer));
162 
163     // The header should have an attribute "Scope" with a scope of "Row".
164     EXPECT_EQ(8U, FPDF_StructElement_GetStringAttribute(header_cell, "Scope",
165                                                         buffer, kBufLen));
166     EXPECT_EQ("Row", GetPlatformString(buffer));
167 
168     // The header has an attribute "ColSpan", but it's not a string so it
169     // returns null.
170     EXPECT_EQ(0U, FPDF_StructElement_GetStringAttribute(header_cell, "ColSpan",
171                                                         buffer, kBufLen));
172 
173     // An unsupported attribute should return 0.
174     EXPECT_EQ(0U, FPDF_StructElement_GetStringAttribute(header_cell, "Other",
175                                                         buffer, kBufLen));
176 
177     // A null struct element should not crash.
178     EXPECT_EQ(0U, FPDF_StructElement_GetStringAttribute(nullptr, "Other",
179                                                         buffer, kBufLen));
180   }
181 
182   UnloadPage(page);
183 }
184 
TEST_F(FPDFStructTreeEmbedderTest,GetStringAttributeBadStructElement)185 TEST_F(FPDFStructTreeEmbedderTest, GetStringAttributeBadStructElement) {
186   ASSERT_TRUE(OpenDocument("tagged_table_bad_elem.pdf"));
187   FPDF_PAGE page = LoadPage(0);
188   ASSERT_TRUE(page);
189 
190   {
191     ScopedFPDFStructTree struct_tree(FPDF_StructTree_GetForPage(page));
192     ASSERT_TRUE(struct_tree);
193     ASSERT_EQ(1, FPDF_StructTree_CountChildren(struct_tree.get()));
194 
195     FPDF_STRUCTELEMENT document =
196         FPDF_StructTree_GetChildAtIndex(struct_tree.get(), 0);
197     ASSERT_TRUE(document);
198 
199     constexpr int kBufLen = 100;
200     uint16_t buffer[kBufLen] = {0};
201     EXPECT_EQ(18U, FPDF_StructElement_GetType(document, buffer, kBufLen));
202     EXPECT_EQ("Document", GetPlatformString(buffer));
203 
204     // The table can be retrieved, even though it does not have /Type.
205     ASSERT_EQ(1, FPDF_StructElement_CountChildren(document));
206     FPDF_STRUCTELEMENT table = FPDF_StructElement_GetChildAtIndex(document, 0);
207     ASSERT_TRUE(table);
208 
209     EXPECT_EQ(12U, FPDF_StructElement_GetType(table, buffer, kBufLen));
210     EXPECT_EQ("Table", GetPlatformString(buffer));
211 
212     // The table entry cannot be retrieved, as the element is malformed.
213     EXPECT_EQ(0U, FPDF_StructElement_GetStringAttribute(table, "Summary",
214                                                         buffer, kBufLen));
215 
216     // The row can be retrieved, even though it had an invalid /Type.
217     ASSERT_EQ(1, FPDF_StructElement_CountChildren(table));
218     FPDF_STRUCTELEMENT row = FPDF_StructElement_GetChildAtIndex(table, 0);
219     EXPECT_TRUE(row);
220   }
221 
222   UnloadPage(page);
223 }
224 
TEST_F(FPDFStructTreeEmbedderTest,GetID)225 TEST_F(FPDFStructTreeEmbedderTest, GetID) {
226   ASSERT_TRUE(OpenDocument("tagged_table.pdf"));
227   FPDF_PAGE page = LoadPage(0);
228   ASSERT_TRUE(page);
229 
230   {
231     ScopedFPDFStructTree struct_tree(FPDF_StructTree_GetForPage(page));
232     ASSERT_TRUE(struct_tree);
233     ASSERT_EQ(1, FPDF_StructTree_CountChildren(struct_tree.get()));
234 
235     FPDF_STRUCTELEMENT document =
236         FPDF_StructTree_GetChildAtIndex(struct_tree.get(), 0);
237     ASSERT_TRUE(document);
238 
239     constexpr int kBufLen = 100;
240     uint16_t buffer[kBufLen] = {0};
241     EXPECT_EQ(18U, FPDF_StructElement_GetType(document, buffer, kBufLen));
242     EXPECT_EQ("Document", GetPlatformString(buffer));
243 
244     // The document has no ID.
245     EXPECT_EQ(0U, FPDF_StructElement_GetID(document, buffer, kBufLen));
246 
247     ASSERT_EQ(1, FPDF_StructElement_CountChildren(document));
248     FPDF_STRUCTELEMENT table = FPDF_StructElement_GetChildAtIndex(document, 0);
249     ASSERT_TRUE(table);
250 
251     EXPECT_EQ(12U, FPDF_StructElement_GetType(table, buffer, kBufLen));
252     EXPECT_EQ("Table", GetPlatformString(buffer));
253 
254     // The table has an ID.
255     EXPECT_EQ(14U, FPDF_StructElement_GetID(table, buffer, kBufLen));
256     EXPECT_EQ("node12", GetPlatformString(buffer));
257 
258     // The first child of the table is a row, which has an empty ID.
259     // It returns 2U, the length of an empty string, instead of 0U,
260     // representing null.
261     ASSERT_EQ(2, FPDF_StructElement_CountChildren(table));
262     FPDF_STRUCTELEMENT row = FPDF_StructElement_GetChildAtIndex(table, 0);
263     ASSERT_TRUE(row);
264     EXPECT_EQ(2U, FPDF_StructElement_GetID(row, buffer, kBufLen));
265   }
266 
267   UnloadPage(page);
268 }
269 
TEST_F(FPDFStructTreeEmbedderTest,GetLang)270 TEST_F(FPDFStructTreeEmbedderTest, GetLang) {
271   ASSERT_TRUE(OpenDocument("tagged_table.pdf"));
272   FPDF_PAGE page = LoadPage(0);
273   ASSERT_TRUE(page);
274 
275   {
276     ScopedFPDFStructTree struct_tree(FPDF_StructTree_GetForPage(page));
277     ASSERT_TRUE(struct_tree);
278     ASSERT_EQ(1, FPDF_StructTree_CountChildren(struct_tree.get()));
279 
280     FPDF_STRUCTELEMENT document =
281         FPDF_StructTree_GetChildAtIndex(struct_tree.get(), 0);
282     ASSERT_TRUE(document);
283 
284     constexpr int kBufLen = 100;
285     uint16_t buffer[kBufLen] = {0};
286     EXPECT_EQ(18U, FPDF_StructElement_GetType(document, buffer, kBufLen));
287     EXPECT_EQ("Document", GetPlatformString(buffer));
288 
289     // Nullptr test
290     EXPECT_EQ(0U, FPDF_StructElement_GetLang(nullptr, buffer, kBufLen));
291 
292     // The document has a language.
293     EXPECT_EQ(12U, FPDF_StructElement_GetLang(document, buffer, kBufLen));
294     EXPECT_EQ("en-US", GetPlatformString(buffer));
295 
296     ASSERT_EQ(1, FPDF_StructElement_CountChildren(document));
297     FPDF_STRUCTELEMENT table = FPDF_StructElement_GetChildAtIndex(document, 0);
298     ASSERT_TRUE(table);
299 
300     // The first child is a table, with a language.
301     EXPECT_EQ(12U, FPDF_StructElement_GetType(table, buffer, kBufLen));
302     EXPECT_EQ("Table", GetPlatformString(buffer));
303 
304     EXPECT_EQ(6U, FPDF_StructElement_GetLang(table, buffer, kBufLen));
305     EXPECT_EQ("hu", GetPlatformString(buffer));
306 
307     // The first child of the table is a row, which doesn't have a
308     // language explicitly set on it.
309     ASSERT_EQ(2, FPDF_StructElement_CountChildren(table));
310     FPDF_STRUCTELEMENT row = FPDF_StructElement_GetChildAtIndex(table, 0);
311     ASSERT_TRUE(row);
312     EXPECT_EQ(0U, FPDF_StructElement_GetLang(row, buffer, kBufLen));
313   }
314 
315   UnloadPage(page);
316 }
317 
318 // See also FPDFEditEmbedderTest.TraverseMarkedContentID, which traverses the
319 // marked contents using FPDFPageObj_GetMark() and related API.
TEST_F(FPDFStructTreeEmbedderTest,GetMarkedContentID)320 TEST_F(FPDFStructTreeEmbedderTest, GetMarkedContentID) {
321   ASSERT_TRUE(OpenDocument("marked_content_id.pdf"));
322   FPDF_PAGE page = LoadPage(0);
323   ASSERT_TRUE(page);
324 
325   {
326     ScopedFPDFStructTree struct_tree(FPDF_StructTree_GetForPage(page));
327     ASSERT_TRUE(struct_tree);
328     ASSERT_EQ(1, FPDF_StructTree_CountChildren(struct_tree.get()));
329 
330     FPDF_STRUCTELEMENT element =
331         FPDF_StructTree_GetChildAtIndex(struct_tree.get(), 0);
332     EXPECT_EQ(0, FPDF_StructElement_GetMarkedContentID(element));
333   }
334 
335   UnloadPage(page);
336 }
337 
TEST_F(FPDFStructTreeEmbedderTest,GetMarkedContentIdAtIndex)338 TEST_F(FPDFStructTreeEmbedderTest, GetMarkedContentIdAtIndex) {
339   ASSERT_TRUE(OpenDocument("tagged_marked_content.pdf"));
340   FPDF_PAGE page = LoadPage(0);
341   ASSERT_TRUE(page);
342 
343   {
344     ScopedFPDFStructTree struct_tree(FPDF_StructTree_GetForPage(page));
345     ASSERT_TRUE(struct_tree);
346     ASSERT_EQ(4, FPDF_StructTree_CountChildren(struct_tree.get()));
347 
348     // K is an integer MCID
349     FPDF_STRUCTELEMENT child1 =
350         FPDF_StructTree_GetChildAtIndex(struct_tree.get(), 0);
351     ASSERT_TRUE(child1);
352     // Legacy API
353     EXPECT_EQ(0, FPDF_StructElement_GetMarkedContentID(child1));
354 
355     // K is a dict containing MCR object reference
356     FPDF_STRUCTELEMENT child2 =
357         FPDF_StructTree_GetChildAtIndex(struct_tree.get(), 1);
358     ASSERT_TRUE(child2);
359 
360     // K is an array containing dict MCR object reference and integer MCID
361     FPDF_STRUCTELEMENT child3 =
362         FPDF_StructTree_GetChildAtIndex(struct_tree.get(), 2);
363     ASSERT_TRUE(child3);
364 
365     // K does not exist
366     FPDF_STRUCTELEMENT child4 =
367         FPDF_StructTree_GetChildAtIndex(struct_tree.get(), 3);
368     ASSERT_TRUE(child4);
369 
370     // New APIs
371     EXPECT_EQ(-1, FPDF_StructElement_GetMarkedContentIdCount(nullptr));
372     EXPECT_EQ(-1, FPDF_StructElement_GetMarkedContentIdAtIndex(nullptr, 0));
373     EXPECT_EQ(-1, FPDF_StructElement_GetMarkedContentIdAtIndex(child1, -1));
374     EXPECT_EQ(-1, FPDF_StructElement_GetMarkedContentIdAtIndex(child1, 1));
375     EXPECT_EQ(1, FPDF_StructElement_GetMarkedContentIdCount(child1));
376     EXPECT_EQ(0, FPDF_StructElement_GetMarkedContentIdAtIndex(child1, 0));
377 
378     EXPECT_EQ(1, FPDF_StructElement_GetMarkedContentIdCount(child2));
379     EXPECT_EQ(1, FPDF_StructElement_GetMarkedContentIdAtIndex(child2, 0));
380 
381     EXPECT_EQ(2, FPDF_StructElement_GetMarkedContentIdCount(child3));
382     EXPECT_EQ(2, FPDF_StructElement_GetMarkedContentIdAtIndex(child3, 0));
383     EXPECT_EQ(3, FPDF_StructElement_GetMarkedContentIdAtIndex(child3, 1));
384 
385     EXPECT_EQ(-1, FPDF_StructElement_GetMarkedContentIdCount(child4));
386     EXPECT_EQ(-1, FPDF_StructElement_GetMarkedContentIdAtIndex(child4, 0));
387   }
388 
389   UnloadPage(page);
390 }
391 
TEST_F(FPDFStructTreeEmbedderTest,GetType)392 TEST_F(FPDFStructTreeEmbedderTest, GetType) {
393   ASSERT_TRUE(OpenDocument("tagged_alt_text.pdf"));
394   FPDF_PAGE page = LoadPage(0);
395   ASSERT_TRUE(page);
396 
397   {
398     ScopedFPDFStructTree struct_tree(FPDF_StructTree_GetForPage(page));
399     ASSERT_TRUE(struct_tree);
400     ASSERT_EQ(1, FPDF_StructTree_CountChildren(struct_tree.get()));
401 
402     FPDF_STRUCTELEMENT element =
403         FPDF_StructTree_GetChildAtIndex(struct_tree.get(), 0);
404     ASSERT_TRUE(element);
405 
406     // test nullptr inputs
407     unsigned short buffer[12];
408     ASSERT_EQ(0U, FPDF_StructElement_GetType(nullptr, buffer, sizeof(buffer)));
409     ASSERT_EQ(0U, FPDF_StructElement_GetType(nullptr, nullptr, 0));
410     ASSERT_EQ(18U, FPDF_StructElement_GetType(element, nullptr, 0));
411 
412     memset(buffer, 0, sizeof(buffer));
413     // Deliberately pass in a small buffer size to make sure |buffer| remains
414     // untouched.
415     ASSERT_EQ(18U, FPDF_StructElement_GetType(element, buffer, 1));
416     for (size_t i = 0; i < std::size(buffer); ++i)
417       EXPECT_EQ(0U, buffer[i]);
418 
419     ASSERT_EQ(18U, FPDF_StructElement_GetType(element, buffer, sizeof(buffer)));
420     EXPECT_EQ(L"Document", GetPlatformWString(buffer));
421   }
422 
423   UnloadPage(page);
424 }
425 
TEST_F(FPDFStructTreeEmbedderTest,GetObjType)426 TEST_F(FPDFStructTreeEmbedderTest, GetObjType) {
427   ASSERT_TRUE(OpenDocument("tagged_table_bad_elem.pdf"));
428   FPDF_PAGE page = LoadPage(0);
429   ASSERT_TRUE(page);
430 
431   {
432     ScopedFPDFStructTree struct_tree(FPDF_StructTree_GetForPage(page));
433     ASSERT_TRUE(struct_tree);
434     ASSERT_EQ(1, FPDF_StructTree_CountChildren(struct_tree.get()));
435 
436     FPDF_STRUCTELEMENT child =
437         FPDF_StructTree_GetChildAtIndex(struct_tree.get(), 0);
438     ASSERT_TRUE(child);
439 
440     // test nullptr inputs
441     unsigned short buffer[28] = {};
442     ASSERT_EQ(0U,
443               FPDF_StructElement_GetObjType(nullptr, buffer, sizeof(buffer)));
444     ASSERT_EQ(0U, FPDF_StructElement_GetObjType(nullptr, nullptr, 0));
445     ASSERT_EQ(22U, FPDF_StructElement_GetObjType(child, nullptr, 0));
446 
447     // Deliberately pass in a small buffer size to make sure `buffer` remains
448     // untouched.
449     ASSERT_EQ(22U, FPDF_StructElement_GetObjType(child, buffer, 1));
450     for (size_t i = 0; i < std::size(buffer); ++i)
451       EXPECT_EQ(0U, buffer[i]);
452 
453     ASSERT_EQ(22U,
454               FPDF_StructElement_GetObjType(child, buffer, sizeof(buffer)));
455     EXPECT_EQ(L"StructElem", GetPlatformWString(buffer));
456 
457     ASSERT_EQ(1, FPDF_StructElement_CountChildren(child));
458     FPDF_STRUCTELEMENT gchild = FPDF_StructElement_GetChildAtIndex(child, 0);
459     memset(buffer, 0, sizeof(buffer));
460     // Missing /Type in `gchild`
461     ASSERT_EQ(0U,
462               FPDF_StructElement_GetObjType(gchild, buffer, sizeof(buffer)));
463     // Buffer is untouched.
464     for (size_t i = 0; i < std::size(buffer); ++i)
465       EXPECT_EQ(0U, buffer[i]);
466 
467     ASSERT_EQ(1, FPDF_StructElement_CountChildren(gchild));
468     FPDF_STRUCTELEMENT ggchild = FPDF_StructElement_GetChildAtIndex(gchild, 0);
469     ASSERT_EQ(28U,
470               FPDF_StructElement_GetObjType(ggchild, buffer, sizeof(buffer)));
471     // Reading bad elem also works.
472     EXPECT_EQ(L"NotStructElem", GetPlatformWString(buffer));
473   }
474 
475   UnloadPage(page);
476 }
477 
TEST_F(FPDFStructTreeEmbedderTest,GetParent)478 TEST_F(FPDFStructTreeEmbedderTest, GetParent) {
479   ASSERT_TRUE(OpenDocument("tagged_alt_text.pdf"));
480   FPDF_PAGE page = LoadPage(0);
481   ASSERT_TRUE(page);
482 
483   {
484     ScopedFPDFStructTree struct_tree(FPDF_StructTree_GetForPage(page));
485     ASSERT_TRUE(struct_tree);
486     ASSERT_EQ(1, FPDF_StructTree_CountChildren(struct_tree.get()));
487 
488     FPDF_STRUCTELEMENT parent =
489         FPDF_StructTree_GetChildAtIndex(struct_tree.get(), 0);
490     ASSERT_TRUE(parent);
491 
492     ASSERT_EQ(1, FPDF_StructElement_CountChildren(parent));
493 
494     FPDF_STRUCTELEMENT child = FPDF_StructElement_GetChildAtIndex(parent, 0);
495     ASSERT_TRUE(child);
496 
497     // test nullptr inputs
498     ASSERT_EQ(nullptr, FPDF_StructElement_GetParent(nullptr));
499 
500     ASSERT_EQ(parent, FPDF_StructElement_GetParent(child));
501 
502     // The parent of `parent` is StructTreeRoot and no longer a StructElement.
503     // We currently handle this case by returning a nullptr.
504     ASSERT_EQ(nullptr, FPDF_StructElement_GetParent(parent));
505   }
506 
507   UnloadPage(page);
508 }
509 
TEST_F(FPDFStructTreeEmbedderTest,GetTitle)510 TEST_F(FPDFStructTreeEmbedderTest, GetTitle) {
511   ASSERT_TRUE(OpenDocument("tagged_alt_text.pdf"));
512   FPDF_PAGE page = LoadPage(0);
513   ASSERT_TRUE(page);
514 
515   {
516     ScopedFPDFStructTree struct_tree(FPDF_StructTree_GetForPage(page));
517     ASSERT_TRUE(struct_tree);
518     ASSERT_EQ(1, FPDF_StructTree_CountChildren(struct_tree.get()));
519 
520     FPDF_STRUCTELEMENT element =
521         FPDF_StructTree_GetChildAtIndex(struct_tree.get(), 0);
522     ASSERT_TRUE(element);
523 
524     // test nullptr inputs
525     unsigned short buffer[13];
526     ASSERT_EQ(0U, FPDF_StructElement_GetTitle(nullptr, buffer, sizeof(buffer)));
527     ASSERT_EQ(0U, FPDF_StructElement_GetTitle(nullptr, nullptr, 0));
528     ASSERT_EQ(20U, FPDF_StructElement_GetTitle(element, nullptr, 0));
529 
530     memset(buffer, 0, sizeof(buffer));
531     // Deliberately pass in a small buffer size to make sure |buffer| remains
532     // untouched.
533     ASSERT_EQ(20U, FPDF_StructElement_GetTitle(element, buffer, 1));
534     for (size_t i = 0; i < std::size(buffer); ++i)
535       EXPECT_EQ(0U, buffer[i]);
536 
537     ASSERT_EQ(20U,
538               FPDF_StructElement_GetTitle(element, buffer, sizeof(buffer)));
539 
540     EXPECT_EQ(L"TitleText", GetPlatformWString(buffer));
541 
542     ASSERT_EQ(1, FPDF_StructElement_CountChildren(element));
543     FPDF_STRUCTELEMENT child_element =
544         FPDF_StructElement_GetChildAtIndex(element, 0);
545     ASSERT_TRUE(element);
546 
547     ASSERT_EQ(26U, FPDF_StructElement_GetTitle(child_element, buffer,
548                                                sizeof(buffer)));
549     EXPECT_EQ(L"symbol: 100k", GetPlatformWString(buffer));
550   }
551 
552   UnloadPage(page);
553 }
554 
TEST_F(FPDFStructTreeEmbedderTest,GetAttributes)555 TEST_F(FPDFStructTreeEmbedderTest, GetAttributes) {
556   ASSERT_TRUE(OpenDocument("tagged_table.pdf"));
557   FPDF_PAGE page = LoadPage(0);
558   ASSERT_TRUE(page);
559 
560   {
561     ScopedFPDFStructTree struct_tree(FPDF_StructTree_GetForPage(page));
562     ASSERT_TRUE(struct_tree);
563     ASSERT_EQ(1, FPDF_StructTree_CountChildren(struct_tree.get()));
564 
565     FPDF_STRUCTELEMENT document =
566         FPDF_StructTree_GetChildAtIndex(struct_tree.get(), 0);
567     ASSERT_TRUE(document);
568 
569     ASSERT_EQ(1, FPDF_StructElement_CountChildren(document));
570     ASSERT_EQ(-1, FPDF_StructElement_GetAttributeCount(document));
571     FPDF_STRUCTELEMENT table = FPDF_StructElement_GetChildAtIndex(document, 0);
572     ASSERT_TRUE(table);
573 
574     ASSERT_EQ(2, FPDF_StructElement_CountChildren(table));
575 
576     {
577       FPDF_STRUCTELEMENT tr = FPDF_StructElement_GetChildAtIndex(table, 0);
578       ASSERT_TRUE(tr);
579 
580       ASSERT_EQ(2, FPDF_StructElement_CountChildren(tr));
581       FPDF_STRUCTELEMENT th = FPDF_StructElement_GetChildAtIndex(tr, 0);
582       ASSERT_TRUE(th);
583 
584       ASSERT_EQ(2, FPDF_StructElement_GetAttributeCount(th));
585 
586       // nullptr test
587       ASSERT_EQ(nullptr, FPDF_StructElement_GetAttributeAtIndex(document, 0));
588       ASSERT_EQ(nullptr, FPDF_StructElement_GetAttributeAtIndex(document, -1));
589       ASSERT_EQ(nullptr, FPDF_StructElement_GetAttributeAtIndex(th, 2));
590 
591       FPDF_STRUCTELEMENT_ATTR attr =
592           FPDF_StructElement_GetAttributeAtIndex(th, 1);
593       ASSERT_TRUE(attr);
594 
595       ASSERT_EQ(2, FPDF_StructElement_Attr_GetCount(attr));
596       ASSERT_FALSE(
597           FPDF_StructElement_Attr_GetName(attr, 1, nullptr, 0U, nullptr));
598       unsigned long buffer_len_needed = ULONG_MAX;
599       // Pass buffer = nullptr to obtain the size of the buffer needed,
600       ASSERT_TRUE(FPDF_StructElement_Attr_GetName(attr, 1, nullptr, 0,
601                                                   &buffer_len_needed));
602       EXPECT_EQ(2U, buffer_len_needed);
603       char buffer[8] = {};
604       unsigned long out_len = ULONG_MAX;
605       // Deliberately pass in a small buffer size to make sure `buffer` remains
606       // untouched.
607       ASSERT_TRUE(
608           FPDF_StructElement_Attr_GetName(attr, 1, buffer, 1, &out_len));
609       EXPECT_EQ(2U, out_len);
610       for (size_t i = 0; i < std::size(buffer); ++i)
611         EXPECT_EQ(0, buffer[i]);
612 
613       ASSERT_TRUE(FPDF_StructElement_Attr_GetName(attr, 1, buffer,
614                                                   sizeof(buffer), &out_len));
615       EXPECT_EQ(2U, out_len);
616       EXPECT_STREQ("O", buffer);
617       EXPECT_EQ(FPDF_OBJECT_NAME,
618                 FPDF_StructElement_Attr_GetType(attr, buffer));
619 
620       unsigned short str_val[12] = {};
621       ASSERT_TRUE(FPDF_StructElement_Attr_GetStringValue(
622           attr, buffer, str_val, sizeof(str_val), &out_len));
623       EXPECT_EQ(12U, out_len);
624       EXPECT_EQ(L"Table", GetPlatformWString(str_val));
625 
626       memset(buffer, 0, sizeof(buffer));
627       ASSERT_TRUE(FPDF_StructElement_Attr_GetName(attr, 0, buffer,
628                                                   sizeof(buffer), &out_len));
629       EXPECT_EQ(8U, out_len);
630       EXPECT_STREQ("ColSpan", buffer);
631       EXPECT_EQ(FPDF_OBJECT_NUMBER,
632                 FPDF_StructElement_Attr_GetType(attr, buffer));
633       float num_val;
634       ASSERT_TRUE(
635           FPDF_StructElement_Attr_GetNumberValue(attr, buffer, &num_val));
636       EXPECT_FLOAT_EQ(2.0f, num_val);
637     }
638 
639     {
640       FPDF_STRUCTELEMENT tr = FPDF_StructElement_GetChildAtIndex(table, 1);
641       ASSERT_TRUE(tr);
642 
643       ASSERT_EQ(1, FPDF_StructElement_GetAttributeCount(tr));
644       // nullptr when index out of range
645       ASSERT_EQ(nullptr, FPDF_StructElement_GetAttributeAtIndex(tr, 1));
646 
647       ASSERT_EQ(2, FPDF_StructElement_CountChildren(tr));
648       FPDF_STRUCTELEMENT td = FPDF_StructElement_GetChildAtIndex(tr, 1);
649       ASSERT_TRUE(td);
650       {
651         // Test counting and obtaining attributes via reference
652         ASSERT_EQ(1, FPDF_StructElement_GetAttributeCount(td));
653         FPDF_STRUCTELEMENT_ATTR attr =
654             FPDF_StructElement_GetAttributeAtIndex(td, 0);
655         ASSERT_TRUE(attr);
656         ASSERT_EQ(4, FPDF_StructElement_Attr_GetCount(attr));
657         // Test string and blob type
658         {
659           char buffer[16] = {};
660           unsigned long out_len = ULONG_MAX;
661           ASSERT_TRUE(FPDF_StructElement_Attr_GetName(
662               attr, 0, buffer, sizeof(buffer), &out_len));
663           EXPECT_EQ(8U, out_len);
664           EXPECT_STREQ("ColProp", buffer);
665 
666           EXPECT_EQ(FPDF_OBJECT_STRING,
667                     FPDF_StructElement_Attr_GetType(attr, buffer));
668 
669           unsigned short str_val[12] = {};
670           ASSERT_TRUE(FPDF_StructElement_Attr_GetStringValue(
671               attr, buffer, str_val, sizeof(str_val), &out_len));
672           EXPECT_EQ(8U, out_len);
673           EXPECT_EQ(L"Sum", GetPlatformWString(str_val));
674 
675           char blob_val[3] = {};
676           ASSERT_TRUE(FPDF_StructElement_Attr_GetBlobValue(
677               attr, buffer, blob_val, sizeof(blob_val), &out_len));
678           EXPECT_EQ(3U, out_len);
679           EXPECT_EQ('S', blob_val[0]);
680           EXPECT_EQ('u', blob_val[1]);
681           EXPECT_EQ('m', blob_val[2]);
682         }
683 
684         // Test boolean type
685         {
686           char buffer[16] = {};
687           unsigned long out_len = ULONG_MAX;
688           ASSERT_TRUE(FPDF_StructElement_Attr_GetName(
689               attr, 1, buffer, sizeof(buffer), &out_len));
690           EXPECT_EQ(7U, out_len);
691           EXPECT_STREQ("CurUSD", buffer);
692 
693           EXPECT_EQ(FPDF_OBJECT_BOOLEAN,
694                     FPDF_StructElement_Attr_GetType(attr, buffer));
695           FPDF_BOOL val;
696           ASSERT_TRUE(
697               FPDF_StructElement_Attr_GetBooleanValue(attr, buffer, &val));
698           EXPECT_TRUE(val);
699         }
700 
701         // Test reference to number
702         {
703           char buffer[16] = {};
704           unsigned long out_len = ULONG_MAX;
705           ASSERT_TRUE(FPDF_StructElement_Attr_GetName(
706               attr, 3, buffer, sizeof(buffer), &out_len));
707           EXPECT_EQ(8U, out_len);
708           EXPECT_STREQ("RowSpan", buffer);
709 
710           EXPECT_EQ(FPDF_OBJECT_REFERENCE,
711                     FPDF_StructElement_Attr_GetType(attr, buffer));
712           float val;
713           ASSERT_TRUE(
714               FPDF_StructElement_Attr_GetNumberValue(attr, buffer, &val));
715           EXPECT_FLOAT_EQ(3, val);
716         }
717       }
718     }
719   }
720 
721   UnloadPage(page);
722 }
723 
TEST_F(FPDFStructTreeEmbedderTest,GetStructTreeForNestedTaggedPDF)724 TEST_F(FPDFStructTreeEmbedderTest, GetStructTreeForNestedTaggedPDF) {
725   ASSERT_TRUE(OpenDocument("tagged_nested.pdf"));
726   FPDF_PAGE page = LoadPage(0);
727   ASSERT_TRUE(page);
728 
729   {
730     // This call should not crash. https://crbug.com/pdfium/1480
731     ScopedFPDFStructTree struct_tree(FPDF_StructTree_GetForPage(page));
732     ASSERT_TRUE(struct_tree);
733   }
734   UnloadPage(page);
735 }
736 
TEST_F(FPDFStructTreeEmbedderTest,MarkedContentReferenceAndObjectReference)737 TEST_F(FPDFStructTreeEmbedderTest, MarkedContentReferenceAndObjectReference) {
738   ASSERT_TRUE(OpenDocument("tagged_mcr_objr.pdf"));
739   FPDF_PAGE page = LoadPage(0);
740   ASSERT_TRUE(page);
741 
742   {
743     ScopedFPDFStructTree struct_tree(FPDF_StructTree_GetForPage(page));
744     ASSERT_TRUE(struct_tree);
745     ASSERT_EQ(1, FPDF_StructTree_CountChildren(struct_tree.get()));
746 
747     FPDF_STRUCTELEMENT object8 =
748         FPDF_StructTree_GetChildAtIndex(struct_tree.get(), 0);
749     ASSERT_TRUE(object8);
750     unsigned short buffer[12];
751     ASSERT_EQ(18U, FPDF_StructElement_GetType(object8, buffer, sizeof(buffer)));
752     EXPECT_EQ(L"Document", GetPlatformWString(buffer));
753     EXPECT_EQ(-1, FPDF_StructElement_GetMarkedContentID(object8));
754     ASSERT_EQ(2, FPDF_StructElement_CountChildren(object8));
755 
756     // First branch. 10 -> 12 -> 13 -> Inline dict.
757     FPDF_STRUCTELEMENT object10 =
758         FPDF_StructElement_GetChildAtIndex(object8, 0);
759     ASSERT_TRUE(object10);
760     ASSERT_EQ(20U,
761               FPDF_StructElement_GetType(object10, buffer, sizeof(buffer)));
762     EXPECT_EQ(L"NonStruct", GetPlatformWString(buffer));
763     EXPECT_EQ(-1, FPDF_StructElement_GetMarkedContentID(object10));
764     ASSERT_EQ(1, FPDF_StructElement_CountChildren(object10));
765 
766     FPDF_STRUCTELEMENT object12 =
767         FPDF_StructElement_GetChildAtIndex(object10, 0);
768     ASSERT_TRUE(object12);
769     ASSERT_EQ(4U, FPDF_StructElement_GetType(object12, buffer, sizeof(buffer)));
770     EXPECT_EQ(L"P", GetPlatformWString(buffer));
771     EXPECT_EQ(-1, FPDF_StructElement_GetMarkedContentID(object12));
772     ASSERT_EQ(1, FPDF_StructElement_CountChildren(object12));
773 
774     FPDF_STRUCTELEMENT object13 =
775         FPDF_StructElement_GetChildAtIndex(object12, 0);
776     ASSERT_TRUE(object13);
777     ASSERT_EQ(20U,
778               FPDF_StructElement_GetType(object13, buffer, sizeof(buffer)));
779     EXPECT_EQ(L"NonStruct", GetPlatformWString(buffer));
780     EXPECT_EQ(-1, FPDF_StructElement_GetMarkedContentID(object13));
781     ASSERT_EQ(1, FPDF_StructElement_CountChildren(object13));
782 
783     // TODO(crbug.com/pdfium/672): Fetch this child element.
784     EXPECT_FALSE(FPDF_StructElement_GetChildAtIndex(object13, 0));
785 
786     // Second branch. 11 -> 14 -> Inline dict.
787     //                         -> 15 -> Inline dict.
788     FPDF_STRUCTELEMENT object11 =
789         FPDF_StructElement_GetChildAtIndex(object8, 1);
790     ASSERT_TRUE(object11);
791     ASSERT_EQ(4U, FPDF_StructElement_GetType(object11, buffer, sizeof(buffer)));
792     EXPECT_EQ(L"P", GetPlatformWString(buffer));
793     EXPECT_EQ(-1, FPDF_StructElement_GetMarkedContentID(object11));
794     ASSERT_EQ(1, FPDF_StructElement_CountChildren(object11));
795 
796     FPDF_STRUCTELEMENT object14 =
797         FPDF_StructElement_GetChildAtIndex(object11, 0);
798     ASSERT_TRUE(object14);
799     ASSERT_EQ(20U,
800               FPDF_StructElement_GetType(object14, buffer, sizeof(buffer)));
801     EXPECT_EQ(L"NonStruct", GetPlatformWString(buffer));
802     EXPECT_EQ(-1, FPDF_StructElement_GetMarkedContentID(object14));
803     ASSERT_EQ(2, FPDF_StructElement_CountChildren(object14));
804 
805     // TODO(crbug.com/pdfium/672): Object 15 should be at index 1.
806     EXPECT_FALSE(FPDF_StructElement_GetChildAtIndex(object14, 1));
807     FPDF_STRUCTELEMENT object15 =
808         FPDF_StructElement_GetChildAtIndex(object14, 0);
809     ASSERT_TRUE(object15);
810     ASSERT_EQ(20U,
811               FPDF_StructElement_GetType(object15, buffer, sizeof(buffer)));
812     EXPECT_EQ(L"NonStruct", GetPlatformWString(buffer));
813     EXPECT_EQ(-1, FPDF_StructElement_GetMarkedContentID(object15));
814     ASSERT_EQ(1, FPDF_StructElement_CountChildren(object15));
815 
816     // TODO(crbug.com/pdfium/672): Fetch this child element.
817     EXPECT_FALSE(FPDF_StructElement_GetChildAtIndex(object15, 0));
818   }
819 
820   UnloadPage(page);
821 }
822 
TEST_F(FPDFStructTreeEmbedderTest,Bug1768)823 TEST_F(FPDFStructTreeEmbedderTest, Bug1768) {
824   ASSERT_TRUE(OpenDocument("bug_1768.pdf"));
825   FPDF_PAGE page = LoadPage(0);
826   ASSERT_TRUE(page);
827 
828   {
829     ScopedFPDFStructTree struct_tree(FPDF_StructTree_GetForPage(page));
830     ASSERT_TRUE(struct_tree);
831     ASSERT_EQ(1, FPDF_StructTree_CountChildren(struct_tree.get()));
832 
833     // TODO(crbug.com/pdfium/1768): Fetch this child element. Then consider
834     // writing more of the test to make sure other elements in the tree can be
835     // fetched correctly as well.
836     EXPECT_FALSE(FPDF_StructTree_GetChildAtIndex(struct_tree.get(), 0));
837   }
838 
839   UnloadPage(page);
840 }
841 
TEST_F(FPDFStructTreeEmbedderTest,Bug1296920)842 TEST_F(FPDFStructTreeEmbedderTest, Bug1296920) {
843   ASSERT_TRUE(OpenDocument("bug_1296920.pdf"));
844   FPDF_PAGE page = LoadPage(0);
845   ASSERT_TRUE(page);
846 
847   {
848     ScopedFPDFStructTree struct_tree(FPDF_StructTree_GetForPage(page));
849     ASSERT_TRUE(struct_tree);
850     ASSERT_EQ(1, FPDF_StructTree_CountChildren(struct_tree.get()));
851 
852     // Destroying this tree should not crash.
853   }
854 
855   UnloadPage(page);
856 }
857 
TEST_F(FPDFStructTreeEmbedderTest,Bug1443100)858 TEST_F(FPDFStructTreeEmbedderTest, Bug1443100) {
859   ASSERT_TRUE(OpenDocument("tagged_table_bad_parent.pdf"));
860   FPDF_PAGE page = LoadPage(0);
861   ASSERT_TRUE(page);
862 
863   {
864     // Calling these APIs should not trigger a dangling pointer.
865     ScopedFPDFStructTree struct_tree(FPDF_StructTree_GetForPage(page));
866     ASSERT_TRUE(struct_tree);
867     ASSERT_EQ(1, FPDF_StructTree_CountChildren(struct_tree.get()));
868   }
869 
870   UnloadPage(page);
871 }
872