xref: /aosp_15_r20/external/one-true-awk/testdir/T.utf (revision 9a7741de182b2776d7b30d6355f2585c0780a51b)
1*9a7741deSElliott Hughes#!/bin/sh
2*9a7741deSElliott Hughes
3*9a7741deSElliott Hughesecho T.utf: tests of utf functions
4*9a7741deSElliott Hughes
5*9a7741deSElliott Hughesawk=${awk-../a.out}
6*9a7741deSElliott Hughes
7*9a7741deSElliott Hughes$awk '
8*9a7741deSElliott HughesBEGIN {
9*9a7741deSElliott Hughes	FS = "\t"
10*9a7741deSElliott Hughes	awk = "../a.out"
11*9a7741deSElliott Hughes}
12*9a7741deSElliott HughesNF == 0 || $1 ~ /^#/ {
13*9a7741deSElliott Hughes	next
14*9a7741deSElliott Hughes}
15*9a7741deSElliott Hughes$1 ~ /try/ {	# new test
16*9a7741deSElliott Hughes	nt++
17*9a7741deSElliott Hughes	sub(/try [a-zA-Z_0-9]+ /, "")
18*9a7741deSElliott Hughes	prog = $0
19*9a7741deSElliott Hughes	printf("try %3d %s\n", nt, prog)
20*9a7741deSElliott Hughes	prog = sprintf("%s -F\"\\t\" '"'"'%s'"'"'", awk, prog)
21*9a7741deSElliott Hughes	# print "prog is", prog
22*9a7741deSElliott Hughes	nt2 = 0
23*9a7741deSElliott Hughes	while (getline > 0) {
24*9a7741deSElliott Hughes		if (NF == 0)	# blank line terminates a sequence
25*9a7741deSElliott Hughes			break
26*9a7741deSElliott Hughes		input = $1
27*9a7741deSElliott Hughes		for (i = 2; i < NF; i++)	# input data
28*9a7741deSElliott Hughes			input = input "\t" $i
29*9a7741deSElliott Hughes		test = sprintf("./echo '"'"'%s'"'"' | %s >foo1; ",
30*9a7741deSElliott Hughes			input, prog)
31*9a7741deSElliott Hughes		if ($NF == "\"\"")
32*9a7741deSElliott Hughes			output = ">foo2;"
33*9a7741deSElliott Hughes		else
34*9a7741deSElliott Hughes			output = sprintf("./echo '"'"'%s'"'"' >foo2; ", $NF)
35*9a7741deSElliott Hughes		gsub(/\\t/, "\t", output)
36*9a7741deSElliott Hughes		gsub(/\\n/, "\n", output)
37*9a7741deSElliott Hughes		run = sprintf("diff foo1 foo2 || echo test %d.%d failed",
38*9a7741deSElliott Hughes			nt, ++nt2)
39*9a7741deSElliott Hughes		# print  "input is", input
40*9a7741deSElliott Hughes		# print  "test is", test
41*9a7741deSElliott Hughes		# print  "output is", output
42*9a7741deSElliott Hughes		# print  "run is", run
43*9a7741deSElliott Hughes		system(test output run)
44*9a7741deSElliott Hughes	}
45*9a7741deSElliott Hughes	tt += nt2
46*9a7741deSElliott Hughes}
47*9a7741deSElliott HughesEND { print tt, "tests" }
48*9a7741deSElliott Hughes' <<\!!!!
49*9a7741deSElliott Hughes# General format:
50*9a7741deSElliott Hughes# try program as rest of line
51*9a7741deSElliott Hughes# $1	$2	$3	output1  (\t for tab, \n for newline,
52*9a7741deSElliott Hughes# $1	$2	$3	output2  ("" for null)
53*9a7741deSElliott Hughes# ... terminated by blank line
54*9a7741deSElliott Hughes
55*9a7741deSElliott Hughes# try another program...
56*9a7741deSElliott Hughes
57*9a7741deSElliott Hughestry length { print length($1) }
58*9a7741deSElliott Hughes	0
59*9a7741deSElliott Hughesa	1
60*9a7741deSElliott Hughesの今がその時だ	7
61*9a7741deSElliott HughesСейчас	6
62*9a7741deSElliott Hughes现在是时候了	6
63*9a7741deSElliott Hughes给所有的好男	6
64*9a7741deSElliott Hughes来参加聚会。	6
65*9a7741deSElliott Hughes��	1
66*9a7741deSElliott Hughes�� finger	8
67*9a7741deSElliott HughesΤωρα	4
68*9a7741deSElliott Hughesγια	3
69*9a7741deSElliott Hughesνα	2
70*9a7741deSElliott Hughesעכשיו	5
71*9a7741deSElliott Hughesלכל	3
72*9a7741deSElliott Hughesלבוא	4
73*9a7741deSElliott Hughesの今がその時だ	7
74*9a7741deSElliott Hughes지금이	3
75*9a7741deSElliott Hughes모든	2
76*9a7741deSElliott Hughes파티에	3
77*9a7741deSElliott HughesСейчас	6
78*9a7741deSElliott Hughesдля	3
79*9a7741deSElliott Hughesприйти	6
80*9a7741deSElliott Hughes
81*9a7741deSElliott Hughestry index { print index($1, $2) }
82*9a7741deSElliott Hughesabc	a	1
83*9a7741deSElliott Hughesabc	b	2
84*9a7741deSElliott Hughesabc	x	0
85*9a7741deSElliott Hughes现在是时候了	""	0
86*9a7741deSElliott Hughes现在是时候了	了	6
87*9a7741deSElliott Hughes现在是时候了	在是	2
88*9a7741deSElliott Hughes现在是时候了	x	0
89*9a7741deSElliott Hughesx在是时候了	x	2
90*9a7741deSElliott Hughes�� fingerすべての善人のためにすべての善人のために	f	3
91*9a7741deSElliott Hughes�� finger��	r��	8
92*9a7741deSElliott Hughes
93*9a7741deSElliott Hughestry substr { print substr($0, 2, 3) }
94*9a7741deSElliott Hughesabcdef	bcd
95*9a7741deSElliott HughesΤωρα ειναι η	ωρα
96*9a7741deSElliott HughesΤω	ω
97*9a7741deSElliott Hughes지금 이절호의	금 이
98*9a7741deSElliott Hughesxпyрийти	пyр
99*9a7741deSElliott Hughes
100*9a7741deSElliott Hughestry rematch { print $1 ~ $2 }
101*9a7741deSElliott Hughesabc	a	1
102*9a7741deSElliott Hughesabc	x	0
103*9a7741deSElliott Hughesすべての善人のために	の	1
104*9a7741deSElliott Hughesすべての善人のために	の.*の	1
105*9a7741deSElliott Hughesすべての善人のために	の.*て	0
106*9a7741deSElliott HughesΤωρα	ω+	1
107*9a7741deSElliott Hughes
108*9a7741deSElliott Hughes# replace first occurrence of $2 by $3 in $1
109*9a7741deSElliott Hughestry sub { n = sub($2, $3, $1); print n, $1 }
110*9a7741deSElliott Hughesabcdef	bc	XYZ	1 aXYZdef
111*9a7741deSElliott Hughesabcdef	xy	XYZ	0 abcdef
112*9a7741deSElliott Hughesの今がその時だ	の	NO	1 NO今がその時だ
113*9a7741deSElliott Hughes�� finger	��.*g	FING	1 FINGer
114*9a7741deSElliott HughesСейчас	.	x	1 xейчас
115*9a7741deSElliott Hughes
116*9a7741deSElliott Hughes# replace all occurrences of $2 by $3 in $1
117*9a7741deSElliott Hughestry gsub { n = gsub($2, $3, $1); print n, $1 }
118*9a7741deSElliott Hughesabcdef	bc	XYZ	1 aXYZdef
119*9a7741deSElliott Hughesabcdef	xy	XYZ	0 abcdef
120*9a7741deSElliott Hughesの今がその時だ	の	NO	2 NO今がそNO時だ
121*9a7741deSElliott Hughes�� finger	��.*g	FING	1 FINGer
122*9a7741deSElliott HughesСейчас	.	x	6 xxxxxx
123*9a7741deSElliott Hughes
124*9a7741deSElliott Hughestry match { print match($1, $2), RSTART, RLENGTH }
125*9a7741deSElliott Hughesabc	[^a]	2 2 1
126*9a7741deSElliott Hughesabc	[^ab]	3 3 1
127*9a7741deSElliott Hughesすべての善人のために	[^す]	2 2 1
128*9a7741deSElliott Hughesすべての善人のために	[^ぁ-ゖ]	5 5 1
129*9a7741deSElliott Hughesabc	a	1 1 1
130*9a7741deSElliott Hughesabc	x	0 0 -1
131*9a7741deSElliott Hughesすべての善人のために	の	4 4 1
132*9a7741deSElliott Hughesすべての善人のために	の.*の	4 4 4
133*9a7741deSElliott Hughesすべての善人のために	の.*て	0 0 -1
134*9a7741deSElliott HughesΤωρα	ω+	2 2 1
135*9a7741deSElliott HughesΤωρα	x+	0 0 -1
136*9a7741deSElliott HughesΤωρα	ω.	2 2 2
137*9a7741deSElliott Hughesすべての善人のために	[の]	4 4 1
138*9a7741deSElliott Hughesすべての善人のために	[ぁ-え]	0 0 -1
139*9a7741deSElliott Hughesすべての善人のために	[^ぁ-え]	1 1 1
140*9a7741deSElliott HughesΤωρα ειναι η	[α-ω]	2 2 1
141*9a7741deSElliott HughesΤωρα ειναι η	[α-ω]+	2 2 3
142*9a7741deSElliott HughesxxxΤωρα ειναι η	[Α-Ω]	4 4 1
143*9a7741deSElliott Hughesγια όλους τους καλούς ά	α.*α	3 3 15
144*9a7741deSElliott Hughesνα έρθει στο πά	[^ν]	2 2 1
145*9a7741deSElliott Hughes
146*9a7741deSElliott Hughes# FS="" should split into unicode chars
147*9a7741deSElliott Hughestry emptyFS BEGIN {FS=""} {print NF}
148*9a7741deSElliott Hughesすべての善人のために	10
149*9a7741deSElliott Hughesの今がその時だ	7
150*9a7741deSElliott HughesСейчас	6
151*9a7741deSElliott Hughes现在是时候了	6
152*9a7741deSElliott Hughes给所有的好男	6
153*9a7741deSElliott Hughes来参加聚会。	6
154*9a7741deSElliott Hughes��	1
155*9a7741deSElliott Hughes�� finger	8
156*9a7741deSElliott Hughes
157*9a7741deSElliott Hughes# printf(%N.Ns) for utf8 strings
158*9a7741deSElliott Hughestry printfs1 {printf("[%5.2s][%-5.2s]\n"), $1, $1}
159*9a7741deSElliott Hughesabcd	[   ab][ab   ]
160*9a7741deSElliott Hughes现在abc	[   现在][现在   ]
161*9a7741deSElliott Hughes现ωabc	[   现ω][现ω   ]
162*9a7741deSElliott Hughesωabc	[   ωa][ωa   ]
163*9a7741deSElliott HughesСейчас	[   Се][Се   ]
164*9a7741deSElliott HughesСейxyz	[   Се][Се   ]
165*9a7741deSElliott Hughes��	[    ��][��    ]
166*9a7741deSElliott Hughes
167*9a7741deSElliott Hughes# printf(%N.Ns) for utf8 strings
168*9a7741deSElliott Hughestry printfs2 {printf("[%5s][%-5s]\n"), $1, $1}
169*9a7741deSElliott Hughesabcd	[ abcd][abcd ]
170*9a7741deSElliott Hughes现在ab	[ 现在ab][现在ab ]
171*9a7741deSElliott Hughesa现在ab	[a现在ab][a现在ab]
172*9a7741deSElliott Hughesa现在abc	[a现在abc][a现在abc]
173*9a7741deSElliott Hughes现ωab	[ 现ωab][现ωab ]
174*9a7741deSElliott Hughesωabc	[ ωabc][ωabc ]
175*9a7741deSElliott HughesСейчас	[Сейчас][Сейчас]
176*9a7741deSElliott Hughes��	[    ��][��    ]
177*9a7741deSElliott Hughes
178*9a7741deSElliott Hughes# printf(%N.Ns) for utf8 strings
179*9a7741deSElliott Hughestry printfs3 {printf("[%.2s][%-.2s]\n"), $1, $1}
180*9a7741deSElliott Hughesabcd	[ab][ab]
181*9a7741deSElliott Hughes现在abc	[现在][现在]
182*9a7741deSElliott Hughes现ωabc	[现ω][现ω]
183*9a7741deSElliott Hughesω	[ω][ω]
184*9a7741deSElliott Hughes��	[��][��]
185*9a7741deSElliott Hughes
186*9a7741deSElliott Hughes# printf(%c) for utf
187*9a7741deSElliott Hughestry printfc {printf("%c %c\n", $1, substr($1,2,1))}
188*9a7741deSElliott Hughesすべての善人のために	す べ
189*9a7741deSElliott Hughesの今がその時だ	の 今
190*9a7741deSElliott HughesСейчас	С е
191*9a7741deSElliott Hughes现在是时候了	现 在
192*9a7741deSElliott Hughes����	�� ��
193*9a7741deSElliott Hughes
194*9a7741deSElliott Hughes!!!!
195