1 /* demo_utf8towc() against libc mbrtowc()
2 *
3 * Copyright 2017 Rob Landley <[email protected]>
4
5 USE_DEMO_UTF8TOWC(NEWTOY(demo_utf8towc, 0, TOYFLAG_USR|TOYFLAG_BIN))
6
7 config DEMO_UTF8TOWC
8 bool "demo_utf8towc"
9 default n
10 help
11 usage: demo_utf8towc
12
13 Print differences between toybox's utf8 conversion routines vs libc du jour.
14 */
15
16 #include "toys.h"
17
demo_utf8towc_main(void)18 void demo_utf8towc_main(void)
19 {
20 mbstate_t mb;
21 int len1, len2, maxlen = 0;
22 unsigned h, u, wc2;
23 wchar_t wc1;
24 char *str = (void *)&h;
25
26 memset(&mb, 0, sizeof(mb));
27 // Although there are 0x10ffff unicode points, test all 4 byte combinations.
28 for (u = 1; u;) {
29 wc1 = wc2 = 0;
30 len2 = 4;
31
32 h = SWAP_BE32(u);
33 len1 = mbrtowc(&wc1, str, len2, &mb);
34 if (len1<0) memset(&mb, 0, sizeof(mb));
35 len2 = utf8towc(&wc2, str, len2);
36
37 if (wcwidth(wc2)>maxlen) maxlen = wcwidth(wc2);
38 if (len1 != len2 || wc1 != wc2)
39 printf("%x %d %x %d %x\n", u++, len1, wc1, len2, wc2);
40 else if (len2<1) u++;
41 else {
42 h = 1<<(8*(4-len2));
43 u &= ~(h-1);
44 u += h;
45 }
46
47 }
48 dprintf(2, "maxlen=%d\n", maxlen);
49 }
50