1 /* Licensed to the Apache Software Foundation (ASF) under one or more
2 * contributor license agreements. See the NOTICE file distributed with
3 * this work for additional information regarding copyright ownership.
4 * The ASF licenses this file to You under the Apache License, Version 2.0
5 * (the "License"); you may not use this file except in compliance with
6 * the License. You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "apr.h"
18 #include "arch/win32/apr_arch_utf8.h"
19 #include <wchar.h>
20 #include <stdio.h>
21 #include <stdlib.h>
22 #include <string.h>
23 #include <assert.h>
24
25 struct testval {
26 unsigned char n[8];
27 apr_size_t nl;
28 wchar_t w[4];
29 apr_size_t wl;
30 };
31
32 #ifdef FOR_REFERENCE
33 /* For reference; a table of invalid utf-8 encoded ucs-2/ucs-4 sequences.
34 * The table consists of start, end pairs for all invalid ranges.
35 * NO_UCS2_PAIRS will pass the reservered D800-DFFF values, halting at FFFF
36 * FULL_UCS4_MAPPER represents all 31 bit values to 7FFF FFFF
37 *
38 * We already tested these, because we ensure there is a 1:1 mapping across
39 * the entire range of byte values in each position of 1 to 6 byte sequences.
40 */
41 struct testval malformed[] = [
42 [[0x80,], 1,], /* 10000000 64 invalid leading continuation values */
43 [[0xBF,], 1,], /* 10111111 64 invalid leading continuation values */
44 [[0xC0,0x80], 2,], /* overshort mapping of 0000 */
45 [[0xC1,0xBF], 2,], /* overshort mapping of 007F */
46 [[0xE0,0x80,0x80,], 3,], /* overshort mapping of 0000 */
47 [[0xE0,0x9F,0xBF,], 3,], /* overshort mapping of 07FF */
48 #ifndef NO_UCS2_PAIRS
49 [[0xED,0xA0,0x80,], 3,], /* unexpected mapping of UCS-2 literal D800 */
50 [[0xED,0xBF,0xBF,], 3,], /* unexpected mapping of UCS-2 literal DFFF */
51 #endif
52 [[0xF0,0x80,0x80,0x80,], 4,], /* overshort mapping of 0000 */
53 [[0xF0,0x8F,0xBF,0xBF,], 4,], /* overshort mapping of FFFF */
54 #ifdef NO_UCS2_PAIRS
55 [[0xF0,0x90,0x80,0x80,], 4,], /* invalid too large value 0001 0000 */
56 [[0xF4,0x8F,0xBF,0xBF,], 4,], /* invalid too large value 0010 FFFF */
57 #endif
58 #ifndef FULL_UCS4_MAPPER
59 [[0xF4,0x90,0x80,0x80,], 4,], /* invalid too large value 0011 0000 */
60 [[0xF7,0xBF,0xBF,0xBF,], 4,], /* invalid too large value 001F FFFF */
61 #endif
62 [[0xF8,0x80,0x80,0x80,0x80,], 5,], /* overshort mapping of 0000 0000 */
63 [[0xF8,0x87,0xBF,0xBF,0xBF,], 5,], /* overshort mapping of 001F FFFF */
64 #ifndef FULL_UCS4_MAPPER
65 [[0xF8,0x88,0x80,0x80,0x80,], 5,], /* invalid too large value 0020 0000 */
66 [[0xFB,0xBF,0xBF,0xBF,0xBF,], 5,], /* invalid too large value 03FF FFFF */
67 #endif
68 [[0xFC,0x80,0x80,0x80,0x80,0x80,], 6,], /* overshort mapping 0000 0000 */
69 [[0xFC,0x83,0xBF,0xBF,0xBF,0xBF,], 6,], /* overshort mapping 03FF FFFF */
70 #ifndef FULL_UCS4_MAPPER
71 [[0xFC,0x84,0x80,0x80,0x80,0x80,], 6,], /* overshort mapping 0400 0000 */
72 [[0xFD,0xBF,0xBF,0xBF,0xBF,0xBF,], 6,], /* overshort mapping 7FFF FFFF */
73 #endif
74 [[0xFE,], 1,], /* 11111110 invalid "too large" value, no 7 byte seq */
75 [[0xFF,], 1,], /* 11111111 invalid "too large" value, no 8 byte seq */
76 ];
77 #endif /* FOR_REFERENCE */
78
displaynw(struct testval * f,struct testval * l)79 void displaynw(struct testval *f, struct testval *l)
80 {
81 char x[80], *t = x;
82 int i;
83 for (i = 0; i < f->nl; ++i)
84 t += sprintf(t, "%02X ", f->n[i]);
85 *(t++) = '-';
86 for (i = 0; i < l->nl; ++i)
87 t += sprintf(t, " %02X", l->n[i]);
88 *(t++) = ' ';
89 *(t++) = '=';
90 *(t++) = ' ';
91 for (i = 0; i < f->wl; ++i)
92 t += sprintf(t, "%04X ", f->w[i]);
93 *(t++) = '-';
94 for (i = 0; i < l->wl; ++i)
95 t += sprintf(t, " %04X", l->w[i]);
96 *t = '\0';
97 puts(x);
98 }
99
100 /*
101 * Test every possible byte value.
102 * If the test passes or fails at this byte value we are done.
103 * Otherwise iterate test_nrange again, appending another byte.
104 */
test_nrange(struct testval * p)105 void test_nrange(struct testval *p)
106 {
107 struct testval f, l, s;
108 apr_status_t rc;
109 int success = 0;
110
111 memcpy (&s, p, sizeof(s));
112 ++s.nl;
113
114 do {
115 apr_size_t nl = s.nl, wl = sizeof(s.w) / 2;
116 rc = apr_conv_utf8_to_ucs2(s.n, &nl, s.w, &wl);
117 s.wl = (sizeof(s.w) / 2) - wl;
118 if (!nl && rc == APR_SUCCESS) {
119 if (!success) {
120 memcpy(&f, &s, sizeof(s));
121 success = -1;
122 }
123 else {
124 if (s.wl != l.wl
125 || memcmp(s.w, l.w, (s.wl - 1) * 2) != 0
126 || s.w[s.wl - 1] != l.w[l.wl - 1] + 1) {
127 displaynw(&f, &l);
128 memcpy(&f, &s, sizeof(s));
129 }
130 }
131 memcpy(&l, &s, sizeof(s));
132 }
133 else {
134 if (success) {
135 displaynw(&f, &l);
136 success = 0;
137 }
138 if (rc == APR_INCOMPLETE) {
139 test_nrange(&s);
140 }
141 }
142 } while (++s.n[s.nl - 1]);
143
144 if (success) {
145 displaynw(&f, &l);
146 success = 0;
147 }
148 }
149
150 /*
151 * Test every possible word value.
152 * Once we are finished, retest every possible word value.
153 * if the test fails on the following null word, iterate test_nrange
154 * again, appending another word.
155 * This assures the output order of the two tests are in sync.
156 */
test_wrange(struct testval * p)157 void test_wrange(struct testval *p)
158 {
159 struct testval f, l, s;
160 apr_status_t rc;
161 int success = 0;
162
163 memcpy (&s, p, sizeof(s));
164 ++s.wl;
165
166 do {
167 apr_size_t nl = sizeof(s.n), wl = s.wl;
168 rc = apr_conv_ucs2_to_utf8(s.w, &wl, s.n, &nl);
169 s.nl = sizeof(s.n) - nl;
170 if (!wl && rc == APR_SUCCESS) {
171 if (!success) {
172 memcpy(&f, &s, sizeof(s));
173 success = -1;
174 }
175 else {
176 if (s.nl != l.nl
177 || memcmp(s.n, l.n, s.nl - 1) != 0
178 || s.n[s.nl - 1] != l.n[l.nl - 1] + 1) {
179 displaynw(&f, &l);
180 memcpy(&f, &s, sizeof(s));
181 }
182 }
183 memcpy(&l, &s, sizeof(s));
184 }
185 else {
186 if (success) {
187 displaynw(&f, &l);
188 success = 0;
189 }
190 }
191 } while (++s.w[s.wl - 1]);
192
193 if (success) {
194 displaynw(&f, &l);
195 success = 0;
196 }
197
198 do {
199 apr_size_t wl = s.wl, nl = sizeof(s.n);
200 rc = apr_conv_ucs2_to_utf8(s.w, &wl, s.n, &nl);
201 s.nl = sizeof(s.n) - s.nl;
202 if (rc == APR_INCOMPLETE) {
203 test_wrange(&s);
204 }
205 } while (++s.w[s.wl - 1]);
206 }
207
208 /*
209 * Test every possible byte value.
210 * If the test passes or fails at this byte value we are done.
211 * Otherwise iterate test_nrange again, appending another byte.
212 */
test_ranges()213 void test_ranges()
214 {
215 struct testval ntest, wtest;
216 apr_status_t nrc, wrc;
217 apr_size_t inlen;
218 unsigned long matches = 0;
219
220 memset(&ntest, 0, sizeof(ntest));
221 ++ntest.nl;
222
223 memset(&wtest, 0, sizeof(wtest));
224 ++wtest.wl;
225
226 do {
227 do {
228 inlen = ntest.nl;
229 ntest.wl = sizeof(ntest.w) / 2;
230 nrc = apr_conv_utf8_to_ucs2(ntest.n, &inlen, ntest.w, &ntest.wl);
231 if (nrc == APR_SUCCESS) {
232 ntest.wl = (sizeof(ntest.w) / 2) - ntest.wl;
233 break;
234 }
235 if (nrc == APR_INCOMPLETE) {
236 ++ntest.nl;
237 if (ntest.nl > 6) {
238 printf ("\n\nUnexpected utf8 sequence of >6 bytes;\n");
239 exit(255);
240 }
241 continue;
242 }
243 else {
244 while (!(++ntest.n[ntest.nl - 1])) {
245 if (!(--ntest.nl))
246 break;
247 }
248 }
249 } while (ntest.nl);
250
251 do {
252 inlen = wtest.wl;
253 wtest.nl = sizeof(wtest.n);
254 wrc = apr_conv_ucs2_to_utf8(wtest.w, &inlen, wtest.n, &wtest.nl);
255 if (wrc == APR_SUCCESS) {
256 wtest.nl = sizeof(wtest.n) - wtest.nl;
257 break;
258 }
259 else {
260 if (!(++wtest.w[wtest.wl - 1])) {
261 if (wtest.wl == 1)
262 ++wtest.wl;
263 else
264 ++wtest.w[0];
265
266 /* On the second pass, ensure lead word is incomplete */
267 do {
268 inlen = 1;
269 wtest.nl = sizeof(wtest.n);
270 if (apr_conv_ucs2_to_utf8(wtest.w, &inlen, wtest.n, &wtest.nl)
271 == APR_INCOMPLETE)
272 break;
273 if (!(++wtest.w[0])) {
274 wtest.wl = 0;
275 break;
276 }
277 } while (1);
278 }
279 }
280 } while (wtest.wl);
281
282 if (!ntest.nl && !wtest.wl)
283 break;
284
285 /* Identical? */
286 if ((wtest.nl != ntest.nl)
287 || (memcmp(wtest.n, ntest.n, ntest.nl) != 0)
288 || (wtest.wl != ntest.wl)
289 || (memcmp(ntest.w, wtest.w, wtest.wl * 2) != 0)) {
290 printf ("\n\nMismatch of w/n conversion at;\n");
291 displaynw(&ntest, &wtest);
292 exit(255);
293 }
294 ++matches;
295
296 while (!(++ntest.n[ntest.nl - 1])) {
297 if (!(--ntest.nl))
298 break;
299 }
300
301 if (!(++wtest.w[wtest.wl - 1])) {
302 if (wtest.wl == 1)
303 ++wtest.wl;
304 else
305 ++wtest.w[0];
306
307 /* On the second pass, ensure lead word is incomplete */
308 do {
309 inlen = 1;
310 wtest.nl = sizeof(wtest.n);
311 if (apr_conv_ucs2_to_utf8(wtest.w, &inlen, wtest.n, &wtest.nl)
312 == APR_INCOMPLETE)
313 break;
314 if (!(++wtest.w[0])) {
315 wtest.wl = 0;
316 break;
317 }
318 } while (1);
319 }
320 } while (wtest.wl || ntest.nl);
321
322 printf ("\n\nutf8 and ucs2 sequences of %lu transformations matched OK.\n",
323 matches);
324 }
325
326 /*
327 * Syntax: testucs [w|n]
328 *
329 * If no arg or arg is not recognized, run equality sequence test.
330 */
main(int argc,char ** argv)331 int main(int argc, char **argv)
332 {
333 struct testval s;
334 memset (&s, 0, sizeof(s));
335
336 if (argc >= 2 && apr_tolower(*argv[1]) != 'w') {
337 printf ("\n\nTesting Narrow Char Ranges\n");
338 test_nrange(&s);
339 }
340 else if (argc >= 2 && apr_tolower(*argv[1]) != 'n') {
341 printf ("\n\nTesting Wide Char Ranges\n");
342 test_wrange(&s);
343 }
344 else {
345 test_ranges();
346 }
347 return 0;
348 }
349