xref: /aosp_15_r20/external/llvm/unittests/Support/UnicodeTest.cpp (revision 9880d6810fe72a1726cb53787c6711e909410d58)
1*9880d681SAndroid Build Coastguard Worker //===- unittests/Support/UnicodeTest.cpp - Unicode.h tests ----------------===//
2*9880d681SAndroid Build Coastguard Worker //
3*9880d681SAndroid Build Coastguard Worker //                     The LLVM Compiler Infrastructure
4*9880d681SAndroid Build Coastguard Worker //
5*9880d681SAndroid Build Coastguard Worker // This file is distributed under the University of Illinois Open Source
6*9880d681SAndroid Build Coastguard Worker // License. See LICENSE.TXT for details.
7*9880d681SAndroid Build Coastguard Worker //
8*9880d681SAndroid Build Coastguard Worker //===----------------------------------------------------------------------===//
9*9880d681SAndroid Build Coastguard Worker 
10*9880d681SAndroid Build Coastguard Worker #include "llvm/Support/Unicode.h"
11*9880d681SAndroid Build Coastguard Worker #include "gtest/gtest.h"
12*9880d681SAndroid Build Coastguard Worker 
13*9880d681SAndroid Build Coastguard Worker namespace llvm {
14*9880d681SAndroid Build Coastguard Worker namespace sys {
15*9880d681SAndroid Build Coastguard Worker namespace unicode {
16*9880d681SAndroid Build Coastguard Worker namespace {
17*9880d681SAndroid Build Coastguard Worker 
TEST(Unicode,columnWidthUTF8)18*9880d681SAndroid Build Coastguard Worker TEST(Unicode, columnWidthUTF8) {
19*9880d681SAndroid Build Coastguard Worker   EXPECT_EQ(0, columnWidthUTF8(""));
20*9880d681SAndroid Build Coastguard Worker   EXPECT_EQ(1, columnWidthUTF8(" "));
21*9880d681SAndroid Build Coastguard Worker   EXPECT_EQ(1, columnWidthUTF8("a"));
22*9880d681SAndroid Build Coastguard Worker   EXPECT_EQ(1, columnWidthUTF8("~"));
23*9880d681SAndroid Build Coastguard Worker 
24*9880d681SAndroid Build Coastguard Worker   EXPECT_EQ(6, columnWidthUTF8("abcdef"));
25*9880d681SAndroid Build Coastguard Worker 
26*9880d681SAndroid Build Coastguard Worker   EXPECT_EQ(-1, columnWidthUTF8("\x01"));
27*9880d681SAndroid Build Coastguard Worker   EXPECT_EQ(-1, columnWidthUTF8("aaaaaaaaaa\x01"));
28*9880d681SAndroid Build Coastguard Worker   EXPECT_EQ(-1, columnWidthUTF8("\342\200\213")); // 200B ZERO WIDTH SPACE
29*9880d681SAndroid Build Coastguard Worker 
30*9880d681SAndroid Build Coastguard Worker   // 00AD SOFT HYPHEN is displayed on most terminals as a space or a dash. Some
31*9880d681SAndroid Build Coastguard Worker   // text editors display it only when a line is broken at it, some use it as a
32*9880d681SAndroid Build Coastguard Worker   // line-break hint, but don't display. We choose terminal-oriented
33*9880d681SAndroid Build Coastguard Worker   // interpretation.
34*9880d681SAndroid Build Coastguard Worker   EXPECT_EQ(1, columnWidthUTF8("\302\255"));
35*9880d681SAndroid Build Coastguard Worker 
36*9880d681SAndroid Build Coastguard Worker   EXPECT_EQ(0, columnWidthUTF8("\314\200"));     // 0300 COMBINING GRAVE ACCENT
37*9880d681SAndroid Build Coastguard Worker   EXPECT_EQ(1, columnWidthUTF8("\340\270\201")); // 0E01 THAI CHARACTER KO KAI
38*9880d681SAndroid Build Coastguard Worker   EXPECT_EQ(2, columnWidthUTF8("\344\270\200")); // CJK UNIFIED IDEOGRAPH-4E00
39*9880d681SAndroid Build Coastguard Worker 
40*9880d681SAndroid Build Coastguard Worker   EXPECT_EQ(4, columnWidthUTF8("\344\270\200\344\270\200"));
41*9880d681SAndroid Build Coastguard Worker   EXPECT_EQ(3, columnWidthUTF8("q\344\270\200"));
42*9880d681SAndroid Build Coastguard Worker   EXPECT_EQ(3, columnWidthUTF8("\314\200\340\270\201\344\270\200"));
43*9880d681SAndroid Build Coastguard Worker 
44*9880d681SAndroid Build Coastguard Worker   // Invalid UTF-8 strings, columnWidthUTF8 should error out.
45*9880d681SAndroid Build Coastguard Worker   EXPECT_EQ(-2, columnWidthUTF8("\344"));
46*9880d681SAndroid Build Coastguard Worker   EXPECT_EQ(-2, columnWidthUTF8("\344\270"));
47*9880d681SAndroid Build Coastguard Worker   EXPECT_EQ(-2, columnWidthUTF8("\344\270\033"));
48*9880d681SAndroid Build Coastguard Worker   EXPECT_EQ(-2, columnWidthUTF8("\344\270\300"));
49*9880d681SAndroid Build Coastguard Worker   EXPECT_EQ(-2, columnWidthUTF8("\377\366\355"));
50*9880d681SAndroid Build Coastguard Worker 
51*9880d681SAndroid Build Coastguard Worker   EXPECT_EQ(-2, columnWidthUTF8("qwer\344"));
52*9880d681SAndroid Build Coastguard Worker   EXPECT_EQ(-2, columnWidthUTF8("qwer\344\270"));
53*9880d681SAndroid Build Coastguard Worker   EXPECT_EQ(-2, columnWidthUTF8("qwer\344\270\033"));
54*9880d681SAndroid Build Coastguard Worker   EXPECT_EQ(-2, columnWidthUTF8("qwer\344\270\300"));
55*9880d681SAndroid Build Coastguard Worker   EXPECT_EQ(-2, columnWidthUTF8("qwer\377\366\355"));
56*9880d681SAndroid Build Coastguard Worker 
57*9880d681SAndroid Build Coastguard Worker   // UTF-8 sequences longer than 4 bytes correspond to unallocated Unicode
58*9880d681SAndroid Build Coastguard Worker   // characters.
59*9880d681SAndroid Build Coastguard Worker   EXPECT_EQ(-2, columnWidthUTF8("\370\200\200\200\200"));     // U+200000
60*9880d681SAndroid Build Coastguard Worker   EXPECT_EQ(-2, columnWidthUTF8("\374\200\200\200\200\200")); // U+4000000
61*9880d681SAndroid Build Coastguard Worker }
62*9880d681SAndroid Build Coastguard Worker 
TEST(Unicode,isPrintable)63*9880d681SAndroid Build Coastguard Worker TEST(Unicode, isPrintable) {
64*9880d681SAndroid Build Coastguard Worker   EXPECT_FALSE(isPrintable(0)); // <control-0000>-<control-001F>
65*9880d681SAndroid Build Coastguard Worker   EXPECT_FALSE(isPrintable(0x01));
66*9880d681SAndroid Build Coastguard Worker   EXPECT_FALSE(isPrintable(0x1F));
67*9880d681SAndroid Build Coastguard Worker   EXPECT_TRUE(isPrintable(' '));
68*9880d681SAndroid Build Coastguard Worker   EXPECT_TRUE(isPrintable('A'));
69*9880d681SAndroid Build Coastguard Worker   EXPECT_TRUE(isPrintable('~'));
70*9880d681SAndroid Build Coastguard Worker   EXPECT_FALSE(isPrintable(0x7F)); // <control-007F>..<control-009F>
71*9880d681SAndroid Build Coastguard Worker   EXPECT_FALSE(isPrintable(0x90));
72*9880d681SAndroid Build Coastguard Worker   EXPECT_FALSE(isPrintable(0x9F));
73*9880d681SAndroid Build Coastguard Worker 
74*9880d681SAndroid Build Coastguard Worker   EXPECT_TRUE(isPrintable(0xAC));
75*9880d681SAndroid Build Coastguard Worker   EXPECT_TRUE(isPrintable(0xAD)); // SOFT HYPHEN is displayed on most terminals
76*9880d681SAndroid Build Coastguard Worker                                   // as either a space or a dash.
77*9880d681SAndroid Build Coastguard Worker   EXPECT_TRUE(isPrintable(0xAE));
78*9880d681SAndroid Build Coastguard Worker 
79*9880d681SAndroid Build Coastguard Worker   EXPECT_TRUE(isPrintable(0x0377));  // GREEK SMALL LETTER PAMPHYLIAN DIGAMMA
80*9880d681SAndroid Build Coastguard Worker   EXPECT_FALSE(isPrintable(0x0378)); // <reserved-0378>..<reserved-0379>
81*9880d681SAndroid Build Coastguard Worker 
82*9880d681SAndroid Build Coastguard Worker   EXPECT_FALSE(isPrintable(0x0600)); // ARABIC NUMBER SIGN
83*9880d681SAndroid Build Coastguard Worker 
84*9880d681SAndroid Build Coastguard Worker   EXPECT_FALSE(isPrintable(0x1FFFF)); // <reserved-1F774>..<noncharacter-1FFFF>
85*9880d681SAndroid Build Coastguard Worker   EXPECT_TRUE(isPrintable(0x20000));  // CJK UNIFIED IDEOGRAPH-20000
86*9880d681SAndroid Build Coastguard Worker 
87*9880d681SAndroid Build Coastguard Worker   EXPECT_FALSE(isPrintable(0x10FFFF)); // noncharacter
88*9880d681SAndroid Build Coastguard Worker }
89*9880d681SAndroid Build Coastguard Worker 
90*9880d681SAndroid Build Coastguard Worker } // namespace
91*9880d681SAndroid Build Coastguard Worker } // namespace unicode
92*9880d681SAndroid Build Coastguard Worker } // namespace sys
93*9880d681SAndroid Build Coastguard Worker } // namespace llvm
94