xref: /aosp_15_r20/external/cronet/third_party/apache-portable-runtime/src/test/internal/testucs.c (revision 6777b5387eb2ff775bb5750e3f5d96f37fb7352b)
1 /* Licensed to the Apache Software Foundation (ASF) under one or more
2  * contributor license agreements.  See the NOTICE file distributed with
3  * this work for additional information regarding copyright ownership.
4  * The ASF licenses this file to You under the Apache License, Version 2.0
5  * (the "License"); you may not use this file except in compliance with
6  * the License.  You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "apr.h"
18 #include "arch/win32/apr_arch_utf8.h"
19 #include <wchar.h>
20 #include <stdio.h>
21 #include <stdlib.h>
22 #include <string.h>
23 #include <assert.h>
24 
25 struct testval {
26     unsigned char n[8];
27     apr_size_t nl;
28     wchar_t w[4];
29     apr_size_t wl;
30 };
31 
32 #ifdef FOR_REFERENCE
33 /* For reference; a table of invalid utf-8 encoded ucs-2/ucs-4 sequences.
34  * The table consists of start, end pairs for all invalid ranges.
35  * NO_UCS2_PAIRS will pass the reservered D800-DFFF values, halting at FFFF
36  * FULL_UCS4_MAPPER represents all 31 bit values to 7FFF FFFF
37  *
38  * We already tested these, because we ensure there is a 1:1 mapping across
39  * the entire range of byte values in each position of 1 to 6 byte sequences.
40  */
41 struct testval malformed[] = [
42     [[0x80,], 1,],      /* 10000000  64 invalid leading continuation values */
43     [[0xBF,], 1,],      /* 10111111  64 invalid leading continuation values */
44     [[0xC0,0x80], 2,],                         /* overshort mapping of 0000 */
45     [[0xC1,0xBF], 2,],                         /* overshort mapping of 007F */
46     [[0xE0,0x80,0x80,], 3,],                   /* overshort mapping of 0000 */
47     [[0xE0,0x9F,0xBF,], 3,],                   /* overshort mapping of 07FF */
48 #ifndef NO_UCS2_PAIRS
49     [[0xED,0xA0,0x80,], 3,],    /* unexpected mapping of UCS-2 literal D800 */
50     [[0xED,0xBF,0xBF,], 3,],    /* unexpected mapping of UCS-2 literal DFFF */
51 #endif
52     [[0xF0,0x80,0x80,0x80,], 4,],              /* overshort mapping of 0000 */
53     [[0xF0,0x8F,0xBF,0xBF,], 4,],              /* overshort mapping of FFFF */
54 #ifdef NO_UCS2_PAIRS
55     [[0xF0,0x90,0x80,0x80,], 4,],      /* invalid too large value 0001 0000 */
56     [[0xF4,0x8F,0xBF,0xBF,], 4,],      /* invalid too large value 0010 FFFF */
57 #endif
58 #ifndef FULL_UCS4_MAPPER
59     [[0xF4,0x90,0x80,0x80,], 4,],      /* invalid too large value 0011 0000 */
60     [[0xF7,0xBF,0xBF,0xBF,], 4,],      /* invalid too large value 001F FFFF */
61 #endif
62     [[0xF8,0x80,0x80,0x80,0x80,], 5,],    /* overshort mapping of 0000 0000 */
63     [[0xF8,0x87,0xBF,0xBF,0xBF,], 5,],    /* overshort mapping of 001F FFFF */
64 #ifndef FULL_UCS4_MAPPER
65     [[0xF8,0x88,0x80,0x80,0x80,], 5,], /* invalid too large value 0020 0000 */
66     [[0xFB,0xBF,0xBF,0xBF,0xBF,], 5,], /* invalid too large value 03FF FFFF */
67 #endif
68     [[0xFC,0x80,0x80,0x80,0x80,0x80,], 6,],  /* overshort mapping 0000 0000 */
69     [[0xFC,0x83,0xBF,0xBF,0xBF,0xBF,], 6,],  /* overshort mapping 03FF FFFF */
70 #ifndef FULL_UCS4_MAPPER
71     [[0xFC,0x84,0x80,0x80,0x80,0x80,], 6,],  /* overshort mapping 0400 0000 */
72     [[0xFD,0xBF,0xBF,0xBF,0xBF,0xBF,], 6,],  /* overshort mapping 7FFF FFFF */
73 #endif
74     [[0xFE,], 1,],    /* 11111110  invalid "too large" value, no 7 byte seq */
75     [[0xFF,], 1,],    /* 11111111  invalid "too large" value, no 8 byte seq */
76 ];
77 #endif /* FOR_REFERENCE */
78 
displaynw(struct testval * f,struct testval * l)79 void displaynw(struct testval *f, struct testval *l)
80 {
81     char x[80], *t = x;
82     int i;
83     for (i = 0; i < f->nl; ++i)
84         t += sprintf(t, "%02X ", f->n[i]);
85     *(t++) = '-';
86     for (i = 0; i < l->nl; ++i)
87         t += sprintf(t, " %02X", l->n[i]);
88     *(t++) = ' ';
89     *(t++) = '=';
90     *(t++) = ' ';
91     for (i = 0; i < f->wl; ++i)
92         t += sprintf(t, "%04X ", f->w[i]);
93     *(t++) = '-';
94     for (i = 0; i < l->wl; ++i)
95         t += sprintf(t, " %04X", l->w[i]);
96     *t = '\0';
97     puts(x);
98 }
99 
100 /*
101  *  Test every possible byte value.
102  *  If the test passes or fails at this byte value we are done.
103  *  Otherwise iterate test_nrange again, appending another byte.
104  */
test_nrange(struct testval * p)105 void test_nrange(struct testval *p)
106 {
107     struct testval f, l, s;
108     apr_status_t rc;
109     int success = 0;
110 
111     memcpy (&s, p, sizeof(s));
112     ++s.nl;
113 
114     do {
115         apr_size_t nl = s.nl, wl = sizeof(s.w) / 2;
116         rc = apr_conv_utf8_to_ucs2(s.n, &nl, s.w, &wl);
117         s.wl = (sizeof(s.w) / 2) - wl;
118         if (!nl && rc == APR_SUCCESS) {
119             if (!success) {
120                 memcpy(&f, &s, sizeof(s));
121                 success = -1;
122             }
123             else {
124                 if (s.wl != l.wl
125                  || memcmp(s.w, l.w, (s.wl - 1) * 2) != 0
126                  || s.w[s.wl - 1] != l.w[l.wl - 1] + 1) {
127                     displaynw(&f, &l);
128                     memcpy(&f, &s, sizeof(s));
129                 }
130             }
131             memcpy(&l, &s, sizeof(s));
132         }
133         else {
134             if (success) {
135                 displaynw(&f, &l);
136                 success = 0;
137             }
138             if (rc == APR_INCOMPLETE) {
139                 test_nrange(&s);
140             }
141         }
142     } while (++s.n[s.nl - 1]);
143 
144     if (success) {
145         displaynw(&f, &l);
146         success = 0;
147     }
148 }
149 
150 /*
151  *  Test every possible word value.
152  *  Once we are finished, retest every possible word value.
153  *  if the test fails on the following null word, iterate test_nrange
154  *  again, appending another word.
155  *  This assures the output order of the two tests are in sync.
156  */
test_wrange(struct testval * p)157 void test_wrange(struct testval *p)
158 {
159     struct testval f, l, s;
160     apr_status_t rc;
161     int success = 0;
162 
163     memcpy (&s, p, sizeof(s));
164     ++s.wl;
165 
166     do {
167         apr_size_t nl = sizeof(s.n), wl = s.wl;
168         rc = apr_conv_ucs2_to_utf8(s.w, &wl, s.n, &nl);
169         s.nl = sizeof(s.n) - nl;
170         if (!wl && rc == APR_SUCCESS) {
171             if (!success) {
172                 memcpy(&f, &s, sizeof(s));
173                 success = -1;
174             }
175             else {
176                 if (s.nl != l.nl
177                  || memcmp(s.n, l.n, s.nl - 1) != 0
178                  || s.n[s.nl - 1] != l.n[l.nl - 1] + 1) {
179                     displaynw(&f, &l);
180                     memcpy(&f, &s, sizeof(s));
181                 }
182             }
183             memcpy(&l, &s, sizeof(s));
184         }
185         else {
186             if (success) {
187                 displaynw(&f, &l);
188                 success = 0;
189             }
190         }
191     } while (++s.w[s.wl - 1]);
192 
193     if (success) {
194         displaynw(&f, &l);
195         success = 0;
196     }
197 
198     do {
199         apr_size_t wl = s.wl, nl = sizeof(s.n);
200         rc = apr_conv_ucs2_to_utf8(s.w, &wl, s.n, &nl);
201         s.nl = sizeof(s.n) - s.nl;
202         if (rc == APR_INCOMPLETE) {
203             test_wrange(&s);
204         }
205     } while (++s.w[s.wl - 1]);
206 }
207 
208 /*
209  *  Test every possible byte value.
210  *  If the test passes or fails at this byte value we are done.
211  *  Otherwise iterate test_nrange again, appending another byte.
212  */
test_ranges()213 void test_ranges()
214 {
215     struct testval ntest, wtest;
216     apr_status_t nrc, wrc;
217     apr_size_t inlen;
218     unsigned long matches = 0;
219 
220     memset(&ntest, 0, sizeof(ntest));
221     ++ntest.nl;
222 
223     memset(&wtest, 0, sizeof(wtest));
224     ++wtest.wl;
225 
226     do {
227         do {
228             inlen = ntest.nl;
229             ntest.wl = sizeof(ntest.w) / 2;
230             nrc = apr_conv_utf8_to_ucs2(ntest.n, &inlen, ntest.w, &ntest.wl);
231             if (nrc == APR_SUCCESS) {
232                 ntest.wl = (sizeof(ntest.w) / 2) - ntest.wl;
233                 break;
234             }
235             if (nrc == APR_INCOMPLETE) {
236                 ++ntest.nl;
237                 if (ntest.nl > 6) {
238                     printf ("\n\nUnexpected utf8 sequence of >6 bytes;\n");
239                     exit(255);
240                 }
241                 continue;
242             }
243             else {
244                 while (!(++ntest.n[ntest.nl - 1])) {
245                     if (!(--ntest.nl))
246                         break;
247                 }
248             }
249         } while (ntest.nl);
250 
251         do {
252             inlen = wtest.wl;
253             wtest.nl = sizeof(wtest.n);
254             wrc = apr_conv_ucs2_to_utf8(wtest.w, &inlen, wtest.n, &wtest.nl);
255             if (wrc == APR_SUCCESS) {
256                 wtest.nl = sizeof(wtest.n) - wtest.nl;
257                 break;
258             }
259             else {
260                 if (!(++wtest.w[wtest.wl - 1])) {
261                     if (wtest.wl == 1)
262                         ++wtest.wl;
263                     else
264                         ++wtest.w[0];
265 
266                     /* On the second pass, ensure lead word is incomplete */
267                     do {
268                         inlen = 1;
269                         wtest.nl = sizeof(wtest.n);
270                         if (apr_conv_ucs2_to_utf8(wtest.w, &inlen, wtest.n, &wtest.nl)
271                                 == APR_INCOMPLETE)
272                             break;
273                         if (!(++wtest.w[0])) {
274                             wtest.wl = 0;
275                             break;
276                         }
277                     } while (1);
278                 }
279             }
280         } while (wtest.wl);
281 
282         if (!ntest.nl && !wtest.wl)
283             break;
284 
285         /* Identical? */
286         if ((wtest.nl != ntest.nl)
287          || (memcmp(wtest.n, ntest.n, ntest.nl) != 0)
288          || (wtest.wl != ntest.wl)
289          || (memcmp(ntest.w, wtest.w, wtest.wl * 2) != 0)) {
290             printf ("\n\nMismatch of w/n conversion at;\n");
291             displaynw(&ntest, &wtest);
292             exit(255);
293         }
294         ++matches;
295 
296         while (!(++ntest.n[ntest.nl - 1])) {
297             if (!(--ntest.nl))
298                 break;
299         }
300 
301         if (!(++wtest.w[wtest.wl - 1])) {
302             if (wtest.wl == 1)
303                 ++wtest.wl;
304             else
305                 ++wtest.w[0];
306 
307             /* On the second pass, ensure lead word is incomplete */
308             do {
309                 inlen = 1;
310                 wtest.nl = sizeof(wtest.n);
311                 if (apr_conv_ucs2_to_utf8(wtest.w, &inlen, wtest.n, &wtest.nl)
312                         == APR_INCOMPLETE)
313                     break;
314                 if (!(++wtest.w[0])) {
315                     wtest.wl = 0;
316                     break;
317                 }
318             } while (1);
319         }
320     } while (wtest.wl || ntest.nl);
321 
322     printf ("\n\nutf8 and ucs2 sequences of %lu transformations matched OK.\n",
323             matches);
324 }
325 
326 /*
327  *  Syntax: testucs [w|n]
328  *
329  *  If no arg or arg is not recognized, run equality sequence test.
330  */
main(int argc,char ** argv)331 int main(int argc, char **argv)
332 {
333     struct testval s;
334     memset (&s, 0, sizeof(s));
335 
336     if (argc >= 2 && apr_tolower(*argv[1]) != 'w') {
337         printf ("\n\nTesting Narrow Char Ranges\n");
338         test_nrange(&s);
339     }
340     else if (argc >= 2 && apr_tolower(*argv[1]) != 'n') {
341         printf ("\n\nTesting Wide Char Ranges\n");
342         test_wrange(&s);
343     }
344     else {
345         test_ranges();
346     }
347     return 0;
348 }
349