xref: /aosp_15_r20/external/abseil-cpp/absl/debugging/internal/decode_rust_punycode_test.cc (revision 9356374a3709195abf420251b3e825997ff56c0f)
1 // Copyright 2024 The Abseil Authors
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //     https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "absl/debugging/internal/decode_rust_punycode.h"
16 
17 #include <cstddef>
18 #include <cstring>
19 #include <string>
20 
21 #include "gmock/gmock.h"
22 #include "gtest/gtest.h"
23 #include "absl/base/config.h"
24 
25 namespace absl {
26 ABSL_NAMESPACE_BEGIN
27 namespace debugging_internal {
28 namespace {
29 
30 using ::testing::AllOf;
31 using ::testing::Eq;
32 using ::testing::IsNull;
33 using ::testing::Pointee;
34 using ::testing::ResultOf;
35 using ::testing::StrEq;
36 
37 class DecodeRustPunycodeTest : public ::testing::Test {
38  protected:
FillBufferWithNonzeroBytes()39   void FillBufferWithNonzeroBytes() {
40     // The choice of nonzero value to fill with is arbitrary.  The point is just
41     // to fail tests if DecodeRustPunycode forgets to write the final NUL
42     // character.
43     std::memset(buffer_storage_, 0xab, sizeof(buffer_storage_));
44   }
45 
WithAmpleSpace()46   DecodeRustPunycodeOptions WithAmpleSpace() {
47     FillBufferWithNonzeroBytes();
48 
49     DecodeRustPunycodeOptions options;
50     options.punycode_begin = punycode_.data();
51     options.punycode_end = punycode_.data() + punycode_.size();
52     options.out_begin = buffer_storage_;
53     options.out_end = buffer_storage_ + sizeof(buffer_storage_);
54     return options;
55   }
56 
WithJustEnoughSpace()57   DecodeRustPunycodeOptions WithJustEnoughSpace() {
58     FillBufferWithNonzeroBytes();
59 
60     const size_t begin_offset = sizeof(buffer_storage_) - plaintext_.size() - 1;
61     DecodeRustPunycodeOptions options;
62     options.punycode_begin = punycode_.data();
63     options.punycode_end = punycode_.data() + punycode_.size();
64     options.out_begin = buffer_storage_ + begin_offset;
65     options.out_end = buffer_storage_ + sizeof(buffer_storage_);
66     return options;
67   }
68 
WithOneByteTooFew()69   DecodeRustPunycodeOptions WithOneByteTooFew() {
70     FillBufferWithNonzeroBytes();
71 
72     const size_t begin_offset = sizeof(buffer_storage_) - plaintext_.size();
73     DecodeRustPunycodeOptions options;
74     options.punycode_begin = punycode_.data();
75     options.punycode_end = punycode_.data() + punycode_.size();
76     options.out_begin = buffer_storage_ + begin_offset;
77     options.out_end = buffer_storage_ + sizeof(buffer_storage_);
78     return options;
79   }
80 
81   // Matches a correct return value of DecodeRustPunycode when `golden` is the
82   // expected plaintext output.
PointsToTheNulAfter(const std::string & golden)83   auto PointsToTheNulAfter(const std::string& golden) {
84     const size_t golden_size = golden.size();
85     return AllOf(
86         Pointee(Eq('\0')),
87         ResultOf("preceding string body",
88                  [golden_size](const char* p) { return p - golden_size; },
89                  StrEq(golden)));
90   }
91 
92   std::string punycode_;
93   std::string plaintext_;
94   char buffer_storage_[1024];
95 };
96 
TEST_F(DecodeRustPunycodeTest,MapsEmptyToEmpty)97 TEST_F(DecodeRustPunycodeTest, MapsEmptyToEmpty) {
98   punycode_ = "";
99   plaintext_ = "";
100 
101   ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
102               PointsToTheNulAfter(plaintext_));
103   ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
104               PointsToTheNulAfter(plaintext_));
105   EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
106 }
107 
TEST_F(DecodeRustPunycodeTest,StripsTheTrailingDelimiterFromAPureRunOfBasicChars)108 TEST_F(DecodeRustPunycodeTest,
109        StripsTheTrailingDelimiterFromAPureRunOfBasicChars) {
110   punycode_ = "foo_";
111   plaintext_ = "foo";
112 
113   ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
114               PointsToTheNulAfter(plaintext_));
115   ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
116               PointsToTheNulAfter(plaintext_));
117   EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
118 }
119 
TEST_F(DecodeRustPunycodeTest,TreatsTheLastUnderscoreAsTheDelimiter)120 TEST_F(DecodeRustPunycodeTest, TreatsTheLastUnderscoreAsTheDelimiter) {
121   punycode_ = "foo_bar_";
122   plaintext_ = "foo_bar";
123 
124   ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
125               PointsToTheNulAfter(plaintext_));
126   ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
127               PointsToTheNulAfter(plaintext_));
128   EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
129 }
130 
TEST_F(DecodeRustPunycodeTest,AcceptsALeadingUnderscoreIfNotTheDelimiter)131 TEST_F(DecodeRustPunycodeTest, AcceptsALeadingUnderscoreIfNotTheDelimiter) {
132   punycode_ = "_foo_";
133   plaintext_ = "_foo";
134 
135   ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
136               PointsToTheNulAfter(plaintext_));
137   ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
138               PointsToTheNulAfter(plaintext_));
139   EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
140 }
141 
TEST_F(DecodeRustPunycodeTest,RejectsALeadingUnderscoreDelimiter)142 TEST_F(DecodeRustPunycodeTest, RejectsALeadingUnderscoreDelimiter) {
143   punycode_ = "_foo";
144 
145   EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull());
146 }
147 
TEST_F(DecodeRustPunycodeTest,RejectsEmbeddedNul)148 TEST_F(DecodeRustPunycodeTest, RejectsEmbeddedNul) {
149   punycode_ = std::string("foo\0bar_", 8);
150 
151   EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull());
152 }
153 
TEST_F(DecodeRustPunycodeTest,RejectsAsciiCharsOtherThanIdentifierChars)154 TEST_F(DecodeRustPunycodeTest, RejectsAsciiCharsOtherThanIdentifierChars) {
155   punycode_ = "foo\007_";
156   EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull());
157 
158   punycode_ = "foo-_";
159   EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull());
160 
161   punycode_ = "foo;_";
162   EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull());
163 
164   punycode_ = "foo\177_";
165   EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull());
166 }
167 
TEST_F(DecodeRustPunycodeTest,RejectsRawNonAsciiChars)168 TEST_F(DecodeRustPunycodeTest, RejectsRawNonAsciiChars) {
169   punycode_ = "\x80";
170   EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull());
171 
172   punycode_ = "\x80_";
173   EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull());
174 
175   punycode_ = "\xff";
176   EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull());
177 
178   punycode_ = "\xff_";
179   EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull());
180 }
181 
TEST_F(DecodeRustPunycodeTest,RecognizesU0080)182 TEST_F(DecodeRustPunycodeTest, RecognizesU0080) {
183   // a encodes 0, so the output is the smallest non-ASCII code point standing
184   // alone.  (U+0080 PAD is not an identifier character, but DecodeRustPunycode
185   // does not check whether non-ASCII characters could belong to an identifier.)
186   punycode_ = "a";
187   plaintext_ = "\xc2\x80";
188 
189   ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
190               PointsToTheNulAfter(plaintext_));
191   ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
192               PointsToTheNulAfter(plaintext_));
193   EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
194 }
195 
TEST_F(DecodeRustPunycodeTest,OneByteDeltaSequencesMustBeA)196 TEST_F(DecodeRustPunycodeTest, OneByteDeltaSequencesMustBeA) {
197   // Because bias = 72 for the first code point, any digit but a/A is nonfinal
198   // in one of the first two bytes of a delta sequence.
199   punycode_ = "b";
200   EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull());
201 
202   punycode_ = "z";
203   EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull());
204 
205   punycode_ = "0";
206   EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull());
207 
208   punycode_ = "9";
209   EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull());
210 }
211 
TEST_F(DecodeRustPunycodeTest,AcceptsDeltaSequenceBA)212 TEST_F(DecodeRustPunycodeTest, AcceptsDeltaSequenceBA) {
213   punycode_ = "ba";
214   plaintext_ = "\xc2\x81";
215 
216   ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
217               PointsToTheNulAfter(plaintext_));
218   ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
219               PointsToTheNulAfter(plaintext_));
220   EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
221 }
222 
TEST_F(DecodeRustPunycodeTest,AcceptsOtherDeltaSequencesWithSecondByteA)223 TEST_F(DecodeRustPunycodeTest, AcceptsOtherDeltaSequencesWithSecondByteA) {
224   punycode_ = "ca";
225   plaintext_ = "\xc2\x82";
226   EXPECT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
227               PointsToTheNulAfter(plaintext_));
228 
229   punycode_ = "za";
230   plaintext_ = "\xc2\x99";
231   EXPECT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
232               PointsToTheNulAfter(plaintext_));
233 
234   punycode_ = "0a";
235   plaintext_ = "\xc2\x9a";
236   EXPECT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
237               PointsToTheNulAfter(plaintext_));
238 
239   punycode_ = "1a";
240   plaintext_ = "\xc2\x9b";
241   EXPECT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
242               PointsToTheNulAfter(plaintext_));
243 
244   punycode_ = "9a";
245   plaintext_ = "£";  // Pound sign, U+00A3
246   EXPECT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
247               PointsToTheNulAfter(plaintext_));
248 }
249 
TEST_F(DecodeRustPunycodeTest,RejectsDeltaWhereTheSecondAndLastDigitIsNotA)250 TEST_F(DecodeRustPunycodeTest, RejectsDeltaWhereTheSecondAndLastDigitIsNotA) {
251   punycode_ = "bb";
252   EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull());
253 
254   punycode_ = "zz";
255   EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull());
256 
257   punycode_ = "00";
258   EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull());
259 
260   punycode_ = "99";
261   EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull());
262 }
263 
TEST_F(DecodeRustPunycodeTest,AcceptsDeltasWithSecondByteBFollowedByA)264 TEST_F(DecodeRustPunycodeTest, AcceptsDeltasWithSecondByteBFollowedByA) {
265   punycode_ = "bba";
266   plaintext_ = "¤";  // U+00A4
267   EXPECT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
268               PointsToTheNulAfter(plaintext_));
269 
270   punycode_ = "cba";
271   plaintext_ = "¥";  // U+00A5
272   EXPECT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
273               PointsToTheNulAfter(plaintext_));
274 
275   punycode_ = "zba";
276   plaintext_ = "¼";  // U+00BC
277   EXPECT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
278               PointsToTheNulAfter(plaintext_));
279 
280   punycode_ = "0ba";
281   plaintext_ = "½";  // U+00BD
282   EXPECT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
283               PointsToTheNulAfter(plaintext_));
284 
285   punycode_ = "1ba";
286   plaintext_ = "¾";  // U+00BE
287   EXPECT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
288               PointsToTheNulAfter(plaintext_));
289 
290   punycode_ = "9ba";
291   plaintext_ = "Æ";  // U+00C6
292   EXPECT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
293               PointsToTheNulAfter(plaintext_));
294 }
295 
296 // Tests beyond this point use characters allowed in identifiers, so you can
297 // prepend _RNvC1cu<decimal length><underscore if [0-9_] follows> to a test
298 // input and run it through another Rust demangler to verify that the
299 // corresponding golden output is correct.
300 
TEST_F(DecodeRustPunycodeTest,AcceptsTwoByteCharAlone)301 TEST_F(DecodeRustPunycodeTest, AcceptsTwoByteCharAlone) {
302   punycode_ = "0ca";
303   plaintext_ = "à";
304 
305   ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
306               PointsToTheNulAfter(plaintext_));
307   ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
308               PointsToTheNulAfter(plaintext_));
309   EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
310 }
311 
TEST_F(DecodeRustPunycodeTest,AcceptsTwoByteCharBeforeBasicChars)312 TEST_F(DecodeRustPunycodeTest, AcceptsTwoByteCharBeforeBasicChars) {
313   punycode_ = "_la_mode_yya";
314   plaintext_ = "à_la_mode";
315 
316   ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
317               PointsToTheNulAfter(plaintext_));
318   ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
319               PointsToTheNulAfter(plaintext_));
320   EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
321 }
322 
TEST_F(DecodeRustPunycodeTest,AcceptsTwoByteCharAmidBasicChars)323 TEST_F(DecodeRustPunycodeTest, AcceptsTwoByteCharAmidBasicChars) {
324   punycode_ = "verre__vin_m4a";
325   plaintext_ = "verre_à_vin";
326 
327   ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
328               PointsToTheNulAfter(plaintext_));
329   ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
330               PointsToTheNulAfter(plaintext_));
331   EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
332 }
333 
TEST_F(DecodeRustPunycodeTest,AcceptsTwoByteCharAfterBasicChars)334 TEST_F(DecodeRustPunycodeTest, AcceptsTwoByteCharAfterBasicChars) {
335   punycode_ = "belt_3na";
336   plaintext_ = "beltà";
337 
338   ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
339               PointsToTheNulAfter(plaintext_));
340   ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
341               PointsToTheNulAfter(plaintext_));
342   EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
343 }
344 
TEST_F(DecodeRustPunycodeTest,AcceptsRepeatedTwoByteChar)345 TEST_F(DecodeRustPunycodeTest, AcceptsRepeatedTwoByteChar) {
346   punycode_ = "0caaaa";
347   plaintext_ = "àààà";
348 
349   ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
350               PointsToTheNulAfter(plaintext_));
351   ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
352               PointsToTheNulAfter(plaintext_));
353   EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
354 }
355 
TEST_F(DecodeRustPunycodeTest,AcceptsNearbyTwoByteCharsInOrder)356 TEST_F(DecodeRustPunycodeTest, AcceptsNearbyTwoByteCharsInOrder) {
357   punycode_ = "3camsuz";
358   plaintext_ = "ãéïôù";
359 
360   ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
361               PointsToTheNulAfter(plaintext_));
362   ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
363               PointsToTheNulAfter(plaintext_));
364   EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
365 }
366 
TEST_F(DecodeRustPunycodeTest,AcceptsNearbyTwoByteCharsOutOfOrder)367 TEST_F(DecodeRustPunycodeTest, AcceptsNearbyTwoByteCharsOutOfOrder) {
368   punycode_ = "3caltsx";
369   plaintext_ = "ùéôãï";
370 
371   ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
372               PointsToTheNulAfter(plaintext_));
373   ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
374               PointsToTheNulAfter(plaintext_));
375   EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
376 }
377 
TEST_F(DecodeRustPunycodeTest,AcceptsThreeByteCharAlone)378 TEST_F(DecodeRustPunycodeTest, AcceptsThreeByteCharAlone) {
379   punycode_ = "fiq";
380   plaintext_ = "中";
381 
382   ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
383               PointsToTheNulAfter(plaintext_));
384   ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
385               PointsToTheNulAfter(plaintext_));
386   EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
387 }
388 
TEST_F(DecodeRustPunycodeTest,AcceptsRepeatedThreeByteChar)389 TEST_F(DecodeRustPunycodeTest, AcceptsRepeatedThreeByteChar) {
390   punycode_ = "fiqaaaa";
391   plaintext_ = "中中中中中";
392 
393   ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
394               PointsToTheNulAfter(plaintext_));
395   ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
396               PointsToTheNulAfter(plaintext_));
397   EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
398 }
399 
TEST_F(DecodeRustPunycodeTest,AcceptsThreeByteCharsInOrder)400 TEST_F(DecodeRustPunycodeTest, AcceptsThreeByteCharsInOrder) {
401   punycode_ = "fiq228c";
402   plaintext_ = "中文";
403 
404   ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
405               PointsToTheNulAfter(plaintext_));
406   ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
407               PointsToTheNulAfter(plaintext_));
408   EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
409 }
410 
TEST_F(DecodeRustPunycodeTest,AcceptsNearbyThreeByteCharsOutOfOrder)411 TEST_F(DecodeRustPunycodeTest, AcceptsNearbyThreeByteCharsOutOfOrder) {
412   punycode_ = "fiq128c";
413   plaintext_ = "文中";
414 
415   ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
416               PointsToTheNulAfter(plaintext_));
417   ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
418               PointsToTheNulAfter(plaintext_));
419   EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
420 }
421 
TEST_F(DecodeRustPunycodeTest,AcceptsFourByteCharAlone)422 TEST_F(DecodeRustPunycodeTest, AcceptsFourByteCharAlone) {
423   punycode_ = "uy7h";
424   plaintext_ = "��";
425 
426   ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
427               PointsToTheNulAfter(plaintext_));
428   ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
429               PointsToTheNulAfter(plaintext_));
430   EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
431 }
432 
TEST_F(DecodeRustPunycodeTest,AcceptsFourByteCharBeforeBasicChars)433 TEST_F(DecodeRustPunycodeTest, AcceptsFourByteCharBeforeBasicChars) {
434   punycode_ = "jack__uh63d";
435   plaintext_ = "jack_��";
436 
437   ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
438               PointsToTheNulAfter(plaintext_));
439   ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
440               PointsToTheNulAfter(plaintext_));
441   EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
442 }
443 
TEST_F(DecodeRustPunycodeTest,AcceptsFourByteCharAmidBasicChars)444 TEST_F(DecodeRustPunycodeTest, AcceptsFourByteCharAmidBasicChars) {
445   punycode_ = "jack__of_hearts_ki37n";
446   plaintext_ = "jack_��_of_hearts";
447 
448   ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
449               PointsToTheNulAfter(plaintext_));
450   ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
451               PointsToTheNulAfter(plaintext_));
452   EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
453 }
454 
TEST_F(DecodeRustPunycodeTest,AcceptsFourByteCharAfterBasicChars)455 TEST_F(DecodeRustPunycodeTest, AcceptsFourByteCharAfterBasicChars) {
456   punycode_ = "_of_hearts_kz45i";
457   plaintext_ = "��_of_hearts";
458 
459   ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
460               PointsToTheNulAfter(plaintext_));
461   ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
462               PointsToTheNulAfter(plaintext_));
463   EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
464 }
465 
TEST_F(DecodeRustPunycodeTest,AcceptsRepeatedFourByteChar)466 TEST_F(DecodeRustPunycodeTest, AcceptsRepeatedFourByteChar) {
467   punycode_ = "uy7haaaa";
468   plaintext_ = "����������";
469 
470   ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
471               PointsToTheNulAfter(plaintext_));
472   ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
473               PointsToTheNulAfter(plaintext_));
474   EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
475 }
476 
TEST_F(DecodeRustPunycodeTest,AcceptsNearbyFourByteCharsInOrder)477 TEST_F(DecodeRustPunycodeTest, AcceptsNearbyFourByteCharsInOrder) {
478   punycode_ = "8x7hcjmf";
479   plaintext_ = "����������";
480 
481   ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
482               PointsToTheNulAfter(plaintext_));
483   ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
484               PointsToTheNulAfter(plaintext_));
485   EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
486 }
487 
TEST_F(DecodeRustPunycodeTest,AcceptsNearbyFourByteCharsOutOfOrder)488 TEST_F(DecodeRustPunycodeTest, AcceptsNearbyFourByteCharsOutOfOrder) {
489   punycode_ = "8x7hcild";
490   plaintext_ = "����������";
491 
492   ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
493               PointsToTheNulAfter(plaintext_));
494   ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
495               PointsToTheNulAfter(plaintext_));
496   EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
497 }
498 
TEST_F(DecodeRustPunycodeTest,AcceptsAMixtureOfByteLengths)499 TEST_F(DecodeRustPunycodeTest, AcceptsAMixtureOfByteLengths) {
500   punycode_ = "3caltsx2079ivf8aiuy7cja3a6ak";
501   plaintext_ = "ùéôãï中文����������";
502 
503   ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
504               PointsToTheNulAfter(plaintext_));
505   ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
506               PointsToTheNulAfter(plaintext_));
507   EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
508 }
509 
TEST_F(DecodeRustPunycodeTest,RejectsOverlargeDeltas)510 TEST_F(DecodeRustPunycodeTest, RejectsOverlargeDeltas) {
511   punycode_ = "123456789a";
512 
513   EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull());
514 }
515 
516 // Finally, we test on a few prose and poetry snippets as a defense in depth.
517 // If our artificial short test inputs did not exercise a bug that is tickled by
518 // patterns typical of real human writing, maybe real human writing will catch
519 // that.
520 //
521 // These test inputs are extracted from texts old enough to be out of copyright
522 // that probe a variety of ranges of code-point space.  All are longer than 32
523 // code points, so they exercise the carrying of seminibbles from one uint64_t
524 // to the next higher one in BoundedUtf8LengthSequence.
525 
526 // The first three lines of the Old English epic _Beowulf_, mostly ASCII with a
527 // few archaic two-byte letters interspersed.
TEST_F(DecodeRustPunycodeTest,Beowulf)528 TEST_F(DecodeRustPunycodeTest, Beowulf) {
529   punycode_ = "hwt_we_gardena_in_geardagum_"
530               "eodcyninga_rym_gefrunon_"
531               "hu_a_elingas_ellen_fremedon_hxg9c70do9alau";
532   plaintext_ = "hwæt_we_gardena_in_geardagum_"
533                "þeodcyninga_þrym_gefrunon_"
534                "hu_ða_æþelingas_ellen_fremedon";
535 
536   ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
537               PointsToTheNulAfter(plaintext_));
538   ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
539               PointsToTheNulAfter(plaintext_));
540   EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
541 }
542 
543 // The whole of 過故人莊 by the 8th-century Chinese poet 孟浩然
544 // (Meng Haoran), exercising three-byte-character processing.
TEST_F(DecodeRustPunycodeTest,MengHaoran)545 TEST_F(DecodeRustPunycodeTest, MengHaoran) {
546   punycode_ = "gmq4ss0cfvao1e2wg8mcw8b0wkl9a7tt90a8riuvbk7t8kbv9a66ogofvzlf6"
547               "3d01ybn1u28dyqi5q2cxyyxnk5d2gx1ks9ddvfm17bk6gbsd6wftrav60u4ta";
548   plaintext_ = "故人具雞黍" "邀我至田家"
549                "綠樹村邊合" "青山郭外斜"
550                "開軒面場圃" "把酒話桑麻"
551                "待到重陽日" "還來就菊花";
552 
553   ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
554               PointsToTheNulAfter(plaintext_));
555   ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
556               PointsToTheNulAfter(plaintext_));
557   EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
558 }
559 
560 // A poem of the 8th-century Japanese poet 山上憶良 (Yamanoue no Okura).
561 // Japanese mixes two-byte and three-byte characters: a good workout for codecs.
TEST_F(DecodeRustPunycodeTest,YamanoueNoOkura)562 TEST_F(DecodeRustPunycodeTest, YamanoueNoOkura) {
563   punycode_ = "48jdaa3a6ccpepjrsmlb0q4bwcdtid8fg6c0cai9822utqeruk3om0u4f2wbp0"
564               "em23do0op23cc2ff70mb6tae8aq759gja";
565   plaintext_ = "瓜食めば"
566                "子ども思ほゆ"
567                "栗食めば"
568                "まして偲はゆ"
569                "何処より"
570                "来りしものそ"
571                "眼交に"
572                "もとな懸りて"
573                "安眠し寝さぬ";
574 
575   ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
576               PointsToTheNulAfter(plaintext_));
577   ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
578               PointsToTheNulAfter(plaintext_));
579   EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
580 }
581 
582 // The first two lines of the Phoenician-language inscription on the sarcophagus
583 // of Eshmunazar II of Sidon, 6th century BCE.  Phoenician and many other
584 // archaic scripts are allocated in the Supplemental Multilingual Plane (U+10000
585 // through U+1FFFF) and thus exercise four-byte-character processing.
TEST_F(DecodeRustPunycodeTest,EshmunazarSarcophagus)586 TEST_F(DecodeRustPunycodeTest, EshmunazarSarcophagus) {
587   punycode_ = "wj9caaabaabbaaohcacxvhdc7bgxbccbdcjeacddcedcdlddbdbddcdbdcknfcee"
588               "ifel8del2a7inq9fhcpxikms7a4a9ac9ataaa0g";
589   plaintext_ = "��������������������������"
590                "������������������������������������"
591                "����������������������������"
592                "��������������������������������"
593                "��������������������������������"
594                "��������������������������";
595 
596   ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
597               PointsToTheNulAfter(plaintext_));
598   ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
599               PointsToTheNulAfter(plaintext_));
600   EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
601 }
602 
603 }  // namespace
604 }  // namespace debugging_internal
605 ABSL_NAMESPACE_END
606 }  // namespace absl
607