xref: /aosp_15_r20/external/pdfium/core/fpdfapi/font/cpdf_tounicodemap_unittest.cpp (revision 3ac0a46f773bac49fa9476ec2b1cf3f8da5ec3a4)
1 // Copyright 2015 The PDFium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "core/fpdfapi/font/cpdf_tounicodemap.h"
6 
7 #include "core/fpdfapi/parser/cpdf_stream.h"
8 #include "core/fxcrt/retain_ptr.h"
9 #include "testing/gmock/include/gmock/gmock.h"
10 #include "testing/gtest/include/gtest/gtest.h"
11 #include "third_party/base/containers/span.h"
12 
TEST(cpdf_tounicodemap,StringToCode)13 TEST(cpdf_tounicodemap, StringToCode) {
14   EXPECT_THAT(CPDF_ToUnicodeMap::StringToCode("<0001>"), testing::Optional(1u));
15   EXPECT_THAT(CPDF_ToUnicodeMap::StringToCode("<c2>"), testing::Optional(194u));
16   EXPECT_THAT(CPDF_ToUnicodeMap::StringToCode("<A2>"), testing::Optional(162u));
17   EXPECT_THAT(CPDF_ToUnicodeMap::StringToCode("<Af2>"),
18               testing::Optional(2802u));
19   EXPECT_THAT(CPDF_ToUnicodeMap::StringToCode("<FFFFFFFF>"),
20               testing::Optional(4294967295u));
21 
22   // Whitespaces within the string are ignored.
23   EXPECT_THAT(CPDF_ToUnicodeMap::StringToCode("<00\n0\r1>"),
24               testing::Optional(1u));
25   EXPECT_THAT(CPDF_ToUnicodeMap::StringToCode("<c 2>"),
26               testing::Optional(194u));
27   EXPECT_THAT(CPDF_ToUnicodeMap::StringToCode("<A2\r\n>"),
28               testing::Optional(162u));
29 
30   // Integer overflow
31   EXPECT_FALSE(CPDF_ToUnicodeMap::StringToCode("<100000000>").has_value());
32   EXPECT_FALSE(CPDF_ToUnicodeMap::StringToCode("<1abcdFFFF>").has_value());
33 
34   // Invalid string
35   EXPECT_FALSE(CPDF_ToUnicodeMap::StringToCode("").has_value());
36   EXPECT_FALSE(CPDF_ToUnicodeMap::StringToCode("<>").has_value());
37   EXPECT_FALSE(CPDF_ToUnicodeMap::StringToCode("12").has_value());
38   EXPECT_FALSE(CPDF_ToUnicodeMap::StringToCode("<12").has_value());
39   EXPECT_FALSE(CPDF_ToUnicodeMap::StringToCode("12>").has_value());
40   EXPECT_FALSE(CPDF_ToUnicodeMap::StringToCode("<1-7>").has_value());
41   EXPECT_FALSE(CPDF_ToUnicodeMap::StringToCode("00AB").has_value());
42   EXPECT_FALSE(CPDF_ToUnicodeMap::StringToCode("<00NN>").has_value());
43 }
44 
TEST(cpdf_tounicodemap,StringToWideString)45 TEST(cpdf_tounicodemap, StringToWideString) {
46   EXPECT_EQ(L"", CPDF_ToUnicodeMap::StringToWideString(""));
47   EXPECT_EQ(L"", CPDF_ToUnicodeMap::StringToWideString("1234"));
48   EXPECT_EQ(L"", CPDF_ToUnicodeMap::StringToWideString("<c2"));
49   EXPECT_EQ(L"", CPDF_ToUnicodeMap::StringToWideString("<c2D2"));
50   EXPECT_EQ(L"", CPDF_ToUnicodeMap::StringToWideString("c2ab>"));
51 
52   WideString res = L"\xc2ab";
53   EXPECT_EQ(res, CPDF_ToUnicodeMap::StringToWideString("<c2ab>"));
54   EXPECT_EQ(res, CPDF_ToUnicodeMap::StringToWideString("<c2abab>"));
55   EXPECT_EQ(res, CPDF_ToUnicodeMap::StringToWideString("<c2ab 1234>"));
56 
57   res += L"\xfaab";
58   EXPECT_EQ(res, CPDF_ToUnicodeMap::StringToWideString("<c2abFaAb>"));
59   EXPECT_EQ(res, CPDF_ToUnicodeMap::StringToWideString("<c2abFaAb12>"));
60 }
61 
TEST(cpdf_tounicodemap,HandleBeginBFRangeAvoidIntegerOverflow)62 TEST(cpdf_tounicodemap, HandleBeginBFRangeAvoidIntegerOverflow) {
63   // Make sure there won't be infinite loops due to integer overflows in
64   // HandleBeginBFRange().
65   {
66     static constexpr uint8_t kInput1[] =
67         "beginbfrange<FFFFFFFF><FFFFFFFF>[<0041>]endbfrange";
68     auto stream = pdfium::MakeRetain<CPDF_Stream>();
69     stream->SetData(pdfium::make_span(kInput1));
70     CPDF_ToUnicodeMap map(stream);
71     EXPECT_STREQ(L"A", map.Lookup(0xffffffff).c_str());
72   }
73   {
74     static constexpr uint8_t kInput2[] =
75         "beginbfrange<FFFFFFFF><FFFFFFFF><0042>endbfrange";
76     auto stream = pdfium::MakeRetain<CPDF_Stream>();
77     stream->SetData(pdfium::make_span(kInput2));
78     CPDF_ToUnicodeMap map(stream);
79     EXPECT_STREQ(L"B", map.Lookup(0xffffffff).c_str());
80   }
81   {
82     static constexpr uint8_t kInput3[] =
83         "beginbfrange<FFFFFFFF><FFFFFFFF><00410042>endbfrange";
84     auto stream = pdfium::MakeRetain<CPDF_Stream>();
85     stream->SetData(pdfium::make_span(kInput3));
86     CPDF_ToUnicodeMap map(stream);
87     EXPECT_STREQ(L"AB", map.Lookup(0xffffffff).c_str());
88   }
89 }
90 
TEST(cpdf_tounicodemap,InsertIntoMultimap)91 TEST(cpdf_tounicodemap, InsertIntoMultimap) {
92   {
93     // Both the CIDs and the unicodes are different.
94     static constexpr uint8_t kInput1[] =
95         "beginbfchar<1><0041><2><0042>endbfchar";
96     auto stream = pdfium::MakeRetain<CPDF_Stream>();
97     stream->SetData(pdfium::make_span(kInput1));
98     CPDF_ToUnicodeMap map(stream);
99     EXPECT_EQ(1u, map.ReverseLookup(0x0041));
100     EXPECT_EQ(2u, map.ReverseLookup(0x0042));
101     EXPECT_EQ(1u, map.GetUnicodeCountByCharcodeForTesting(1u));
102     EXPECT_EQ(1u, map.GetUnicodeCountByCharcodeForTesting(2u));
103   }
104   {
105     // The same CID with different unicodes.
106     static constexpr uint8_t kInput2[] =
107         "beginbfrange<0><0><0041><0><0><0042>endbfrange";
108     auto stream = pdfium::MakeRetain<CPDF_Stream>();
109     stream->SetData(pdfium::make_span(kInput2));
110     CPDF_ToUnicodeMap map(stream);
111     EXPECT_EQ(0u, map.ReverseLookup(0x0041));
112     EXPECT_EQ(0u, map.ReverseLookup(0x0042));
113     EXPECT_EQ(2u, map.GetUnicodeCountByCharcodeForTesting(0u));
114   }
115   {
116     // Duplicate mappings of CID 0 to unicode "A". There should be only 1 entry
117     // in `m_Multimap`.
118     static constexpr uint8_t kInput3[] =
119         "beginbfrange<0><0>[<0041>]endbfrange\n"
120         "beginbfchar<0><0041>endbfchar";
121     auto stream = pdfium::MakeRetain<CPDF_Stream>();
122     stream->SetData(pdfium::make_span(kInput3));
123     CPDF_ToUnicodeMap map(stream);
124     EXPECT_EQ(0u, map.ReverseLookup(0x0041));
125     EXPECT_EQ(1u, map.GetUnicodeCountByCharcodeForTesting(0u));
126   }
127 }
128