1 // Copyright 2024 The Abseil Authors
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "absl/debugging/internal/decode_rust_punycode.h"
16
17 #include <cstddef>
18 #include <cstring>
19 #include <string>
20
21 #include "gmock/gmock.h"
22 #include "gtest/gtest.h"
23 #include "absl/base/config.h"
24
25 namespace absl {
26 ABSL_NAMESPACE_BEGIN
27 namespace debugging_internal {
28 namespace {
29
30 using ::testing::AllOf;
31 using ::testing::Eq;
32 using ::testing::IsNull;
33 using ::testing::Pointee;
34 using ::testing::ResultOf;
35 using ::testing::StrEq;
36
37 class DecodeRustPunycodeTest : public ::testing::Test {
38 protected:
FillBufferWithNonzeroBytes()39 void FillBufferWithNonzeroBytes() {
40 // The choice of nonzero value to fill with is arbitrary. The point is just
41 // to fail tests if DecodeRustPunycode forgets to write the final NUL
42 // character.
43 std::memset(buffer_storage_, 0xab, sizeof(buffer_storage_));
44 }
45
WithAmpleSpace()46 DecodeRustPunycodeOptions WithAmpleSpace() {
47 FillBufferWithNonzeroBytes();
48
49 DecodeRustPunycodeOptions options;
50 options.punycode_begin = punycode_.data();
51 options.punycode_end = punycode_.data() + punycode_.size();
52 options.out_begin = buffer_storage_;
53 options.out_end = buffer_storage_ + sizeof(buffer_storage_);
54 return options;
55 }
56
WithJustEnoughSpace()57 DecodeRustPunycodeOptions WithJustEnoughSpace() {
58 FillBufferWithNonzeroBytes();
59
60 const size_t begin_offset = sizeof(buffer_storage_) - plaintext_.size() - 1;
61 DecodeRustPunycodeOptions options;
62 options.punycode_begin = punycode_.data();
63 options.punycode_end = punycode_.data() + punycode_.size();
64 options.out_begin = buffer_storage_ + begin_offset;
65 options.out_end = buffer_storage_ + sizeof(buffer_storage_);
66 return options;
67 }
68
WithOneByteTooFew()69 DecodeRustPunycodeOptions WithOneByteTooFew() {
70 FillBufferWithNonzeroBytes();
71
72 const size_t begin_offset = sizeof(buffer_storage_) - plaintext_.size();
73 DecodeRustPunycodeOptions options;
74 options.punycode_begin = punycode_.data();
75 options.punycode_end = punycode_.data() + punycode_.size();
76 options.out_begin = buffer_storage_ + begin_offset;
77 options.out_end = buffer_storage_ + sizeof(buffer_storage_);
78 return options;
79 }
80
81 // Matches a correct return value of DecodeRustPunycode when `golden` is the
82 // expected plaintext output.
PointsToTheNulAfter(const std::string & golden)83 auto PointsToTheNulAfter(const std::string& golden) {
84 const size_t golden_size = golden.size();
85 return AllOf(
86 Pointee(Eq('\0')),
87 ResultOf("preceding string body",
88 [golden_size](const char* p) { return p - golden_size; },
89 StrEq(golden)));
90 }
91
92 std::string punycode_;
93 std::string plaintext_;
94 char buffer_storage_[1024];
95 };
96
TEST_F(DecodeRustPunycodeTest,MapsEmptyToEmpty)97 TEST_F(DecodeRustPunycodeTest, MapsEmptyToEmpty) {
98 punycode_ = "";
99 plaintext_ = "";
100
101 ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
102 PointsToTheNulAfter(plaintext_));
103 ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
104 PointsToTheNulAfter(plaintext_));
105 EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
106 }
107
TEST_F(DecodeRustPunycodeTest,StripsTheTrailingDelimiterFromAPureRunOfBasicChars)108 TEST_F(DecodeRustPunycodeTest,
109 StripsTheTrailingDelimiterFromAPureRunOfBasicChars) {
110 punycode_ = "foo_";
111 plaintext_ = "foo";
112
113 ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
114 PointsToTheNulAfter(plaintext_));
115 ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
116 PointsToTheNulAfter(plaintext_));
117 EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
118 }
119
TEST_F(DecodeRustPunycodeTest,TreatsTheLastUnderscoreAsTheDelimiter)120 TEST_F(DecodeRustPunycodeTest, TreatsTheLastUnderscoreAsTheDelimiter) {
121 punycode_ = "foo_bar_";
122 plaintext_ = "foo_bar";
123
124 ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
125 PointsToTheNulAfter(plaintext_));
126 ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
127 PointsToTheNulAfter(plaintext_));
128 EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
129 }
130
TEST_F(DecodeRustPunycodeTest,AcceptsALeadingUnderscoreIfNotTheDelimiter)131 TEST_F(DecodeRustPunycodeTest, AcceptsALeadingUnderscoreIfNotTheDelimiter) {
132 punycode_ = "_foo_";
133 plaintext_ = "_foo";
134
135 ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
136 PointsToTheNulAfter(plaintext_));
137 ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
138 PointsToTheNulAfter(plaintext_));
139 EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
140 }
141
TEST_F(DecodeRustPunycodeTest,RejectsALeadingUnderscoreDelimiter)142 TEST_F(DecodeRustPunycodeTest, RejectsALeadingUnderscoreDelimiter) {
143 punycode_ = "_foo";
144
145 EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull());
146 }
147
TEST_F(DecodeRustPunycodeTest,RejectsEmbeddedNul)148 TEST_F(DecodeRustPunycodeTest, RejectsEmbeddedNul) {
149 punycode_ = std::string("foo\0bar_", 8);
150
151 EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull());
152 }
153
TEST_F(DecodeRustPunycodeTest,RejectsAsciiCharsOtherThanIdentifierChars)154 TEST_F(DecodeRustPunycodeTest, RejectsAsciiCharsOtherThanIdentifierChars) {
155 punycode_ = "foo\007_";
156 EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull());
157
158 punycode_ = "foo-_";
159 EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull());
160
161 punycode_ = "foo;_";
162 EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull());
163
164 punycode_ = "foo\177_";
165 EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull());
166 }
167
TEST_F(DecodeRustPunycodeTest,RejectsRawNonAsciiChars)168 TEST_F(DecodeRustPunycodeTest, RejectsRawNonAsciiChars) {
169 punycode_ = "\x80";
170 EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull());
171
172 punycode_ = "\x80_";
173 EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull());
174
175 punycode_ = "\xff";
176 EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull());
177
178 punycode_ = "\xff_";
179 EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull());
180 }
181
TEST_F(DecodeRustPunycodeTest,RecognizesU0080)182 TEST_F(DecodeRustPunycodeTest, RecognizesU0080) {
183 // a encodes 0, so the output is the smallest non-ASCII code point standing
184 // alone. (U+0080 PAD is not an identifier character, but DecodeRustPunycode
185 // does not check whether non-ASCII characters could belong to an identifier.)
186 punycode_ = "a";
187 plaintext_ = "\xc2\x80";
188
189 ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
190 PointsToTheNulAfter(plaintext_));
191 ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
192 PointsToTheNulAfter(plaintext_));
193 EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
194 }
195
TEST_F(DecodeRustPunycodeTest,OneByteDeltaSequencesMustBeA)196 TEST_F(DecodeRustPunycodeTest, OneByteDeltaSequencesMustBeA) {
197 // Because bias = 72 for the first code point, any digit but a/A is nonfinal
198 // in one of the first two bytes of a delta sequence.
199 punycode_ = "b";
200 EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull());
201
202 punycode_ = "z";
203 EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull());
204
205 punycode_ = "0";
206 EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull());
207
208 punycode_ = "9";
209 EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull());
210 }
211
TEST_F(DecodeRustPunycodeTest,AcceptsDeltaSequenceBA)212 TEST_F(DecodeRustPunycodeTest, AcceptsDeltaSequenceBA) {
213 punycode_ = "ba";
214 plaintext_ = "\xc2\x81";
215
216 ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
217 PointsToTheNulAfter(plaintext_));
218 ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
219 PointsToTheNulAfter(plaintext_));
220 EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
221 }
222
TEST_F(DecodeRustPunycodeTest,AcceptsOtherDeltaSequencesWithSecondByteA)223 TEST_F(DecodeRustPunycodeTest, AcceptsOtherDeltaSequencesWithSecondByteA) {
224 punycode_ = "ca";
225 plaintext_ = "\xc2\x82";
226 EXPECT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
227 PointsToTheNulAfter(plaintext_));
228
229 punycode_ = "za";
230 plaintext_ = "\xc2\x99";
231 EXPECT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
232 PointsToTheNulAfter(plaintext_));
233
234 punycode_ = "0a";
235 plaintext_ = "\xc2\x9a";
236 EXPECT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
237 PointsToTheNulAfter(plaintext_));
238
239 punycode_ = "1a";
240 plaintext_ = "\xc2\x9b";
241 EXPECT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
242 PointsToTheNulAfter(plaintext_));
243
244 punycode_ = "9a";
245 plaintext_ = "£"; // Pound sign, U+00A3
246 EXPECT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
247 PointsToTheNulAfter(plaintext_));
248 }
249
TEST_F(DecodeRustPunycodeTest,RejectsDeltaWhereTheSecondAndLastDigitIsNotA)250 TEST_F(DecodeRustPunycodeTest, RejectsDeltaWhereTheSecondAndLastDigitIsNotA) {
251 punycode_ = "bb";
252 EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull());
253
254 punycode_ = "zz";
255 EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull());
256
257 punycode_ = "00";
258 EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull());
259
260 punycode_ = "99";
261 EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull());
262 }
263
TEST_F(DecodeRustPunycodeTest,AcceptsDeltasWithSecondByteBFollowedByA)264 TEST_F(DecodeRustPunycodeTest, AcceptsDeltasWithSecondByteBFollowedByA) {
265 punycode_ = "bba";
266 plaintext_ = "¤"; // U+00A4
267 EXPECT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
268 PointsToTheNulAfter(plaintext_));
269
270 punycode_ = "cba";
271 plaintext_ = "¥"; // U+00A5
272 EXPECT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
273 PointsToTheNulAfter(plaintext_));
274
275 punycode_ = "zba";
276 plaintext_ = "¼"; // U+00BC
277 EXPECT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
278 PointsToTheNulAfter(plaintext_));
279
280 punycode_ = "0ba";
281 plaintext_ = "½"; // U+00BD
282 EXPECT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
283 PointsToTheNulAfter(plaintext_));
284
285 punycode_ = "1ba";
286 plaintext_ = "¾"; // U+00BE
287 EXPECT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
288 PointsToTheNulAfter(plaintext_));
289
290 punycode_ = "9ba";
291 plaintext_ = "Æ"; // U+00C6
292 EXPECT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
293 PointsToTheNulAfter(plaintext_));
294 }
295
296 // Tests beyond this point use characters allowed in identifiers, so you can
297 // prepend _RNvC1cu<decimal length><underscore if [0-9_] follows> to a test
298 // input and run it through another Rust demangler to verify that the
299 // corresponding golden output is correct.
300
TEST_F(DecodeRustPunycodeTest,AcceptsTwoByteCharAlone)301 TEST_F(DecodeRustPunycodeTest, AcceptsTwoByteCharAlone) {
302 punycode_ = "0ca";
303 plaintext_ = "à";
304
305 ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
306 PointsToTheNulAfter(plaintext_));
307 ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
308 PointsToTheNulAfter(plaintext_));
309 EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
310 }
311
TEST_F(DecodeRustPunycodeTest,AcceptsTwoByteCharBeforeBasicChars)312 TEST_F(DecodeRustPunycodeTest, AcceptsTwoByteCharBeforeBasicChars) {
313 punycode_ = "_la_mode_yya";
314 plaintext_ = "à_la_mode";
315
316 ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
317 PointsToTheNulAfter(plaintext_));
318 ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
319 PointsToTheNulAfter(plaintext_));
320 EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
321 }
322
TEST_F(DecodeRustPunycodeTest,AcceptsTwoByteCharAmidBasicChars)323 TEST_F(DecodeRustPunycodeTest, AcceptsTwoByteCharAmidBasicChars) {
324 punycode_ = "verre__vin_m4a";
325 plaintext_ = "verre_à_vin";
326
327 ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
328 PointsToTheNulAfter(plaintext_));
329 ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
330 PointsToTheNulAfter(plaintext_));
331 EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
332 }
333
TEST_F(DecodeRustPunycodeTest,AcceptsTwoByteCharAfterBasicChars)334 TEST_F(DecodeRustPunycodeTest, AcceptsTwoByteCharAfterBasicChars) {
335 punycode_ = "belt_3na";
336 plaintext_ = "beltà";
337
338 ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
339 PointsToTheNulAfter(plaintext_));
340 ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
341 PointsToTheNulAfter(plaintext_));
342 EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
343 }
344
TEST_F(DecodeRustPunycodeTest,AcceptsRepeatedTwoByteChar)345 TEST_F(DecodeRustPunycodeTest, AcceptsRepeatedTwoByteChar) {
346 punycode_ = "0caaaa";
347 plaintext_ = "àààà";
348
349 ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
350 PointsToTheNulAfter(plaintext_));
351 ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
352 PointsToTheNulAfter(plaintext_));
353 EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
354 }
355
TEST_F(DecodeRustPunycodeTest,AcceptsNearbyTwoByteCharsInOrder)356 TEST_F(DecodeRustPunycodeTest, AcceptsNearbyTwoByteCharsInOrder) {
357 punycode_ = "3camsuz";
358 plaintext_ = "ãéïôù";
359
360 ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
361 PointsToTheNulAfter(plaintext_));
362 ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
363 PointsToTheNulAfter(plaintext_));
364 EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
365 }
366
TEST_F(DecodeRustPunycodeTest,AcceptsNearbyTwoByteCharsOutOfOrder)367 TEST_F(DecodeRustPunycodeTest, AcceptsNearbyTwoByteCharsOutOfOrder) {
368 punycode_ = "3caltsx";
369 plaintext_ = "ùéôãï";
370
371 ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
372 PointsToTheNulAfter(plaintext_));
373 ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
374 PointsToTheNulAfter(plaintext_));
375 EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
376 }
377
TEST_F(DecodeRustPunycodeTest,AcceptsThreeByteCharAlone)378 TEST_F(DecodeRustPunycodeTest, AcceptsThreeByteCharAlone) {
379 punycode_ = "fiq";
380 plaintext_ = "中";
381
382 ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
383 PointsToTheNulAfter(plaintext_));
384 ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
385 PointsToTheNulAfter(plaintext_));
386 EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
387 }
388
TEST_F(DecodeRustPunycodeTest,AcceptsRepeatedThreeByteChar)389 TEST_F(DecodeRustPunycodeTest, AcceptsRepeatedThreeByteChar) {
390 punycode_ = "fiqaaaa";
391 plaintext_ = "中中中中中";
392
393 ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
394 PointsToTheNulAfter(plaintext_));
395 ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
396 PointsToTheNulAfter(plaintext_));
397 EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
398 }
399
TEST_F(DecodeRustPunycodeTest,AcceptsThreeByteCharsInOrder)400 TEST_F(DecodeRustPunycodeTest, AcceptsThreeByteCharsInOrder) {
401 punycode_ = "fiq228c";
402 plaintext_ = "中文";
403
404 ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
405 PointsToTheNulAfter(plaintext_));
406 ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
407 PointsToTheNulAfter(plaintext_));
408 EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
409 }
410
TEST_F(DecodeRustPunycodeTest,AcceptsNearbyThreeByteCharsOutOfOrder)411 TEST_F(DecodeRustPunycodeTest, AcceptsNearbyThreeByteCharsOutOfOrder) {
412 punycode_ = "fiq128c";
413 plaintext_ = "文中";
414
415 ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
416 PointsToTheNulAfter(plaintext_));
417 ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
418 PointsToTheNulAfter(plaintext_));
419 EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
420 }
421
TEST_F(DecodeRustPunycodeTest,AcceptsFourByteCharAlone)422 TEST_F(DecodeRustPunycodeTest, AcceptsFourByteCharAlone) {
423 punycode_ = "uy7h";
424 plaintext_ = "";
425
426 ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
427 PointsToTheNulAfter(plaintext_));
428 ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
429 PointsToTheNulAfter(plaintext_));
430 EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
431 }
432
TEST_F(DecodeRustPunycodeTest,AcceptsFourByteCharBeforeBasicChars)433 TEST_F(DecodeRustPunycodeTest, AcceptsFourByteCharBeforeBasicChars) {
434 punycode_ = "jack__uh63d";
435 plaintext_ = "jack_";
436
437 ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
438 PointsToTheNulAfter(plaintext_));
439 ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
440 PointsToTheNulAfter(plaintext_));
441 EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
442 }
443
TEST_F(DecodeRustPunycodeTest,AcceptsFourByteCharAmidBasicChars)444 TEST_F(DecodeRustPunycodeTest, AcceptsFourByteCharAmidBasicChars) {
445 punycode_ = "jack__of_hearts_ki37n";
446 plaintext_ = "jack__of_hearts";
447
448 ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
449 PointsToTheNulAfter(plaintext_));
450 ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
451 PointsToTheNulAfter(plaintext_));
452 EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
453 }
454
TEST_F(DecodeRustPunycodeTest,AcceptsFourByteCharAfterBasicChars)455 TEST_F(DecodeRustPunycodeTest, AcceptsFourByteCharAfterBasicChars) {
456 punycode_ = "_of_hearts_kz45i";
457 plaintext_ = "_of_hearts";
458
459 ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
460 PointsToTheNulAfter(plaintext_));
461 ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
462 PointsToTheNulAfter(plaintext_));
463 EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
464 }
465
TEST_F(DecodeRustPunycodeTest,AcceptsRepeatedFourByteChar)466 TEST_F(DecodeRustPunycodeTest, AcceptsRepeatedFourByteChar) {
467 punycode_ = "uy7haaaa";
468 plaintext_ = "";
469
470 ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
471 PointsToTheNulAfter(plaintext_));
472 ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
473 PointsToTheNulAfter(plaintext_));
474 EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
475 }
476
TEST_F(DecodeRustPunycodeTest,AcceptsNearbyFourByteCharsInOrder)477 TEST_F(DecodeRustPunycodeTest, AcceptsNearbyFourByteCharsInOrder) {
478 punycode_ = "8x7hcjmf";
479 plaintext_ = "";
480
481 ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
482 PointsToTheNulAfter(plaintext_));
483 ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
484 PointsToTheNulAfter(plaintext_));
485 EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
486 }
487
TEST_F(DecodeRustPunycodeTest,AcceptsNearbyFourByteCharsOutOfOrder)488 TEST_F(DecodeRustPunycodeTest, AcceptsNearbyFourByteCharsOutOfOrder) {
489 punycode_ = "8x7hcild";
490 plaintext_ = "";
491
492 ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
493 PointsToTheNulAfter(plaintext_));
494 ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
495 PointsToTheNulAfter(plaintext_));
496 EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
497 }
498
TEST_F(DecodeRustPunycodeTest,AcceptsAMixtureOfByteLengths)499 TEST_F(DecodeRustPunycodeTest, AcceptsAMixtureOfByteLengths) {
500 punycode_ = "3caltsx2079ivf8aiuy7cja3a6ak";
501 plaintext_ = "ùéôãï中文";
502
503 ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
504 PointsToTheNulAfter(plaintext_));
505 ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
506 PointsToTheNulAfter(plaintext_));
507 EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
508 }
509
TEST_F(DecodeRustPunycodeTest,RejectsOverlargeDeltas)510 TEST_F(DecodeRustPunycodeTest, RejectsOverlargeDeltas) {
511 punycode_ = "123456789a";
512
513 EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull());
514 }
515
516 // Finally, we test on a few prose and poetry snippets as a defense in depth.
517 // If our artificial short test inputs did not exercise a bug that is tickled by
518 // patterns typical of real human writing, maybe real human writing will catch
519 // that.
520 //
521 // These test inputs are extracted from texts old enough to be out of copyright
522 // that probe a variety of ranges of code-point space. All are longer than 32
523 // code points, so they exercise the carrying of seminibbles from one uint64_t
524 // to the next higher one in BoundedUtf8LengthSequence.
525
526 // The first three lines of the Old English epic _Beowulf_, mostly ASCII with a
527 // few archaic two-byte letters interspersed.
TEST_F(DecodeRustPunycodeTest,Beowulf)528 TEST_F(DecodeRustPunycodeTest, Beowulf) {
529 punycode_ = "hwt_we_gardena_in_geardagum_"
530 "eodcyninga_rym_gefrunon_"
531 "hu_a_elingas_ellen_fremedon_hxg9c70do9alau";
532 plaintext_ = "hwæt_we_gardena_in_geardagum_"
533 "þeodcyninga_þrym_gefrunon_"
534 "hu_ða_æþelingas_ellen_fremedon";
535
536 ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
537 PointsToTheNulAfter(plaintext_));
538 ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
539 PointsToTheNulAfter(plaintext_));
540 EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
541 }
542
543 // The whole of 過故人莊 by the 8th-century Chinese poet 孟浩然
544 // (Meng Haoran), exercising three-byte-character processing.
TEST_F(DecodeRustPunycodeTest,MengHaoran)545 TEST_F(DecodeRustPunycodeTest, MengHaoran) {
546 punycode_ = "gmq4ss0cfvao1e2wg8mcw8b0wkl9a7tt90a8riuvbk7t8kbv9a66ogofvzlf6"
547 "3d01ybn1u28dyqi5q2cxyyxnk5d2gx1ks9ddvfm17bk6gbsd6wftrav60u4ta";
548 plaintext_ = "故人具雞黍" "邀我至田家"
549 "綠樹村邊合" "青山郭外斜"
550 "開軒面場圃" "把酒話桑麻"
551 "待到重陽日" "還來就菊花";
552
553 ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
554 PointsToTheNulAfter(plaintext_));
555 ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
556 PointsToTheNulAfter(plaintext_));
557 EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
558 }
559
560 // A poem of the 8th-century Japanese poet 山上憶良 (Yamanoue no Okura).
561 // Japanese mixes two-byte and three-byte characters: a good workout for codecs.
TEST_F(DecodeRustPunycodeTest,YamanoueNoOkura)562 TEST_F(DecodeRustPunycodeTest, YamanoueNoOkura) {
563 punycode_ = "48jdaa3a6ccpepjrsmlb0q4bwcdtid8fg6c0cai9822utqeruk3om0u4f2wbp0"
564 "em23do0op23cc2ff70mb6tae8aq759gja";
565 plaintext_ = "瓜食めば"
566 "子ども思ほゆ"
567 "栗食めば"
568 "まして偲はゆ"
569 "何処より"
570 "来りしものそ"
571 "眼交に"
572 "もとな懸りて"
573 "安眠し寝さぬ";
574
575 ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
576 PointsToTheNulAfter(plaintext_));
577 ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
578 PointsToTheNulAfter(plaintext_));
579 EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
580 }
581
582 // The first two lines of the Phoenician-language inscription on the sarcophagus
583 // of Eshmunazar II of Sidon, 6th century BCE. Phoenician and many other
584 // archaic scripts are allocated in the Supplemental Multilingual Plane (U+10000
585 // through U+1FFFF) and thus exercise four-byte-character processing.
TEST_F(DecodeRustPunycodeTest,EshmunazarSarcophagus)586 TEST_F(DecodeRustPunycodeTest, EshmunazarSarcophagus) {
587 punycode_ = "wj9caaabaabbaaohcacxvhdc7bgxbccbdcjeacddcedcdlddbdbddcdbdcknfcee"
588 "ifel8del2a7inq9fhcpxikms7a4a9ac9ataaa0g";
589 plaintext_ = ""
590 ""
591 ""
592 ""
593 ""
594 "";
595
596 ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
597 PointsToTheNulAfter(plaintext_));
598 ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
599 PointsToTheNulAfter(plaintext_));
600 EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
601 }
602
603 } // namespace
604 } // namespace debugging_internal
605 ABSL_NAMESPACE_END
606 } // namespace absl
607