1*9a7741deSElliott Hughes#!/bin/sh 2*9a7741deSElliott Hughes 3*9a7741deSElliott Hughesecho T.utf: tests of utf functions 4*9a7741deSElliott Hughes 5*9a7741deSElliott Hughesawk=${awk-../a.out} 6*9a7741deSElliott Hughes 7*9a7741deSElliott Hughes$awk ' 8*9a7741deSElliott HughesBEGIN { 9*9a7741deSElliott Hughes FS = "\t" 10*9a7741deSElliott Hughes awk = "../a.out" 11*9a7741deSElliott Hughes} 12*9a7741deSElliott HughesNF == 0 || $1 ~ /^#/ { 13*9a7741deSElliott Hughes next 14*9a7741deSElliott Hughes} 15*9a7741deSElliott Hughes$1 ~ /try/ { # new test 16*9a7741deSElliott Hughes nt++ 17*9a7741deSElliott Hughes sub(/try [a-zA-Z_0-9]+ /, "") 18*9a7741deSElliott Hughes prog = $0 19*9a7741deSElliott Hughes printf("try %3d %s\n", nt, prog) 20*9a7741deSElliott Hughes prog = sprintf("%s -F\"\\t\" '"'"'%s'"'"'", awk, prog) 21*9a7741deSElliott Hughes # print "prog is", prog 22*9a7741deSElliott Hughes nt2 = 0 23*9a7741deSElliott Hughes while (getline > 0) { 24*9a7741deSElliott Hughes if (NF == 0) # blank line terminates a sequence 25*9a7741deSElliott Hughes break 26*9a7741deSElliott Hughes input = $1 27*9a7741deSElliott Hughes for (i = 2; i < NF; i++) # input data 28*9a7741deSElliott Hughes input = input "\t" $i 29*9a7741deSElliott Hughes test = sprintf("./echo '"'"'%s'"'"' | %s >foo1; ", 30*9a7741deSElliott Hughes input, prog) 31*9a7741deSElliott Hughes if ($NF == "\"\"") 32*9a7741deSElliott Hughes output = ">foo2;" 33*9a7741deSElliott Hughes else 34*9a7741deSElliott Hughes output = sprintf("./echo '"'"'%s'"'"' >foo2; ", $NF) 35*9a7741deSElliott Hughes gsub(/\\t/, "\t", output) 36*9a7741deSElliott Hughes gsub(/\\n/, "\n", output) 37*9a7741deSElliott Hughes run = sprintf("diff foo1 foo2 || echo test %d.%d failed", 38*9a7741deSElliott Hughes nt, ++nt2) 39*9a7741deSElliott Hughes # print "input is", input 40*9a7741deSElliott Hughes # print "test is", test 41*9a7741deSElliott Hughes # print "output is", output 42*9a7741deSElliott Hughes # print "run is", run 43*9a7741deSElliott Hughes system(test output run) 44*9a7741deSElliott Hughes } 45*9a7741deSElliott Hughes tt += nt2 46*9a7741deSElliott Hughes} 47*9a7741deSElliott HughesEND { print tt, "tests" } 48*9a7741deSElliott Hughes' <<\!!!! 49*9a7741deSElliott Hughes# General format: 50*9a7741deSElliott Hughes# try program as rest of line 51*9a7741deSElliott Hughes# $1 $2 $3 output1 (\t for tab, \n for newline, 52*9a7741deSElliott Hughes# $1 $2 $3 output2 ("" for null) 53*9a7741deSElliott Hughes# ... terminated by blank line 54*9a7741deSElliott Hughes 55*9a7741deSElliott Hughes# try another program... 56*9a7741deSElliott Hughes 57*9a7741deSElliott Hughestry length { print length($1) } 58*9a7741deSElliott Hughes 0 59*9a7741deSElliott Hughesa 1 60*9a7741deSElliott Hughesの今がその時だ 7 61*9a7741deSElliott HughesСейчас 6 62*9a7741deSElliott Hughes现在是时候了 6 63*9a7741deSElliott Hughes给所有的好男 6 64*9a7741deSElliott Hughes来参加聚会。 6 65*9a7741deSElliott Hughes 1 66*9a7741deSElliott Hughes finger 8 67*9a7741deSElliott HughesΤωρα 4 68*9a7741deSElliott Hughesγια 3 69*9a7741deSElliott Hughesνα 2 70*9a7741deSElliott Hughesעכשיו 5 71*9a7741deSElliott Hughesלכל 3 72*9a7741deSElliott Hughesלבוא 4 73*9a7741deSElliott Hughesの今がその時だ 7 74*9a7741deSElliott Hughes지금이 3 75*9a7741deSElliott Hughes모든 2 76*9a7741deSElliott Hughes파티에 3 77*9a7741deSElliott HughesСейчас 6 78*9a7741deSElliott Hughesдля 3 79*9a7741deSElliott Hughesприйти 6 80*9a7741deSElliott Hughes 81*9a7741deSElliott Hughestry index { print index($1, $2) } 82*9a7741deSElliott Hughesabc a 1 83*9a7741deSElliott Hughesabc b 2 84*9a7741deSElliott Hughesabc x 0 85*9a7741deSElliott Hughes现在是时候了 "" 0 86*9a7741deSElliott Hughes现在是时候了 了 6 87*9a7741deSElliott Hughes现在是时候了 在是 2 88*9a7741deSElliott Hughes现在是时候了 x 0 89*9a7741deSElliott Hughes现x在是时候了 x 2 90*9a7741deSElliott Hughes fingerすべての善人のためにすべての善人のために f 3 91*9a7741deSElliott Hughes finger r 8 92*9a7741deSElliott Hughes 93*9a7741deSElliott Hughestry substr { print substr($0, 2, 3) } 94*9a7741deSElliott Hughesabcdef bcd 95*9a7741deSElliott HughesΤωρα ειναι η ωρα 96*9a7741deSElliott HughesΤω ω 97*9a7741deSElliott Hughes지금 이절호의 금 이 98*9a7741deSElliott Hughesxпyрийти пyр 99*9a7741deSElliott Hughes 100*9a7741deSElliott Hughestry rematch { print $1 ~ $2 } 101*9a7741deSElliott Hughesabc a 1 102*9a7741deSElliott Hughesabc x 0 103*9a7741deSElliott Hughesすべての善人のために の 1 104*9a7741deSElliott Hughesすべての善人のために の.*の 1 105*9a7741deSElliott Hughesすべての善人のために の.*て 0 106*9a7741deSElliott HughesΤωρα ω+ 1 107*9a7741deSElliott Hughes 108*9a7741deSElliott Hughes# replace first occurrence of $2 by $3 in $1 109*9a7741deSElliott Hughestry sub { n = sub($2, $3, $1); print n, $1 } 110*9a7741deSElliott Hughesabcdef bc XYZ 1 aXYZdef 111*9a7741deSElliott Hughesabcdef xy XYZ 0 abcdef 112*9a7741deSElliott Hughesの今がその時だ の NO 1 NO今がその時だ 113*9a7741deSElliott Hughes finger .*g FING 1 FINGer 114*9a7741deSElliott HughesСейчас . x 1 xейчас 115*9a7741deSElliott Hughes 116*9a7741deSElliott Hughes# replace all occurrences of $2 by $3 in $1 117*9a7741deSElliott Hughestry gsub { n = gsub($2, $3, $1); print n, $1 } 118*9a7741deSElliott Hughesabcdef bc XYZ 1 aXYZdef 119*9a7741deSElliott Hughesabcdef xy XYZ 0 abcdef 120*9a7741deSElliott Hughesの今がその時だ の NO 2 NO今がそNO時だ 121*9a7741deSElliott Hughes finger .*g FING 1 FINGer 122*9a7741deSElliott HughesСейчас . x 6 xxxxxx 123*9a7741deSElliott Hughes 124*9a7741deSElliott Hughestry match { print match($1, $2), RSTART, RLENGTH } 125*9a7741deSElliott Hughesabc [^a] 2 2 1 126*9a7741deSElliott Hughesabc [^ab] 3 3 1 127*9a7741deSElliott Hughesすべての善人のために [^す] 2 2 1 128*9a7741deSElliott Hughesすべての善人のために [^ぁ-ゖ] 5 5 1 129*9a7741deSElliott Hughesabc a 1 1 1 130*9a7741deSElliott Hughesabc x 0 0 -1 131*9a7741deSElliott Hughesすべての善人のために の 4 4 1 132*9a7741deSElliott Hughesすべての善人のために の.*の 4 4 4 133*9a7741deSElliott Hughesすべての善人のために の.*て 0 0 -1 134*9a7741deSElliott HughesΤωρα ω+ 2 2 1 135*9a7741deSElliott HughesΤωρα x+ 0 0 -1 136*9a7741deSElliott HughesΤωρα ω. 2 2 2 137*9a7741deSElliott Hughesすべての善人のために [の] 4 4 1 138*9a7741deSElliott Hughesすべての善人のために [ぁ-え] 0 0 -1 139*9a7741deSElliott Hughesすべての善人のために [^ぁ-え] 1 1 1 140*9a7741deSElliott HughesΤωρα ειναι η [α-ω] 2 2 1 141*9a7741deSElliott HughesΤωρα ειναι η [α-ω]+ 2 2 3 142*9a7741deSElliott HughesxxxΤωρα ειναι η [Α-Ω] 4 4 1 143*9a7741deSElliott Hughesγια όλους τους καλούς ά α.*α 3 3 15 144*9a7741deSElliott Hughesνα έρθει στο πά [^ν] 2 2 1 145*9a7741deSElliott Hughes 146*9a7741deSElliott Hughes# FS="" should split into unicode chars 147*9a7741deSElliott Hughestry emptyFS BEGIN {FS=""} {print NF} 148*9a7741deSElliott Hughesすべての善人のために 10 149*9a7741deSElliott Hughesの今がその時だ 7 150*9a7741deSElliott HughesСейчас 6 151*9a7741deSElliott Hughes现在是时候了 6 152*9a7741deSElliott Hughes给所有的好男 6 153*9a7741deSElliott Hughes来参加聚会。 6 154*9a7741deSElliott Hughes 1 155*9a7741deSElliott Hughes finger 8 156*9a7741deSElliott Hughes 157*9a7741deSElliott Hughes# printf(%N.Ns) for utf8 strings 158*9a7741deSElliott Hughestry printfs1 {printf("[%5.2s][%-5.2s]\n"), $1, $1} 159*9a7741deSElliott Hughesabcd [ ab][ab ] 160*9a7741deSElliott Hughes现在abc [ 现在][现在 ] 161*9a7741deSElliott Hughes现ωabc [ 现ω][现ω ] 162*9a7741deSElliott Hughesωabc [ ωa][ωa ] 163*9a7741deSElliott HughesСейчас [ Се][Се ] 164*9a7741deSElliott HughesСейxyz [ Се][Се ] 165*9a7741deSElliott Hughes [ ][ ] 166*9a7741deSElliott Hughes 167*9a7741deSElliott Hughes# printf(%N.Ns) for utf8 strings 168*9a7741deSElliott Hughestry printfs2 {printf("[%5s][%-5s]\n"), $1, $1} 169*9a7741deSElliott Hughesabcd [ abcd][abcd ] 170*9a7741deSElliott Hughes现在ab [ 现在ab][现在ab ] 171*9a7741deSElliott Hughesa现在ab [a现在ab][a现在ab] 172*9a7741deSElliott Hughesa现在abc [a现在abc][a现在abc] 173*9a7741deSElliott Hughes现ωab [ 现ωab][现ωab ] 174*9a7741deSElliott Hughesωabc [ ωabc][ωabc ] 175*9a7741deSElliott HughesСейчас [Сейчас][Сейчас] 176*9a7741deSElliott Hughes [ ][ ] 177*9a7741deSElliott Hughes 178*9a7741deSElliott Hughes# printf(%N.Ns) for utf8 strings 179*9a7741deSElliott Hughestry printfs3 {printf("[%.2s][%-.2s]\n"), $1, $1} 180*9a7741deSElliott Hughesabcd [ab][ab] 181*9a7741deSElliott Hughes现在abc [现在][现在] 182*9a7741deSElliott Hughes现ωabc [现ω][现ω] 183*9a7741deSElliott Hughesω [ω][ω] 184*9a7741deSElliott Hughes [][] 185*9a7741deSElliott Hughes 186*9a7741deSElliott Hughes# printf(%c) for utf 187*9a7741deSElliott Hughestry printfc {printf("%c %c\n", $1, substr($1,2,1))} 188*9a7741deSElliott Hughesすべての善人のために す べ 189*9a7741deSElliott Hughesの今がその時だ の 今 190*9a7741deSElliott HughesСейчас С е 191*9a7741deSElliott Hughes现在是时候了 现 在 192*9a7741deSElliott Hughes 193*9a7741deSElliott Hughes 194*9a7741deSElliott Hughes!!!! 195