xref: /aosp_15_r20/external/toybox/toys/example/demo_utf8towc.c (revision cf5a6c84e2b8763fc1a7db14496fd4742913b199)
1 /* demo_utf8towc() against libc mbrtowc()
2  *
3  * Copyright 2017 Rob Landley <[email protected]>
4 
5 USE_DEMO_UTF8TOWC(NEWTOY(demo_utf8towc, 0, TOYFLAG_USR|TOYFLAG_BIN))
6 
7 config DEMO_UTF8TOWC
8   bool "demo_utf8towc"
9   default n
10   help
11     usage: demo_utf8towc
12 
13     Print differences between toybox's utf8 conversion routines vs libc du jour.
14 */
15 
16 #include "toys.h"
17 
demo_utf8towc_main(void)18 void demo_utf8towc_main(void)
19 {
20   mbstate_t mb;
21   int len1, len2, maxlen = 0;
22   unsigned h, u, wc2;
23   wchar_t wc1;
24   char *str = (void *)&h;
25 
26   memset(&mb, 0, sizeof(mb));
27   // Although there are 0x10ffff unicode points, test all 4 byte combinations.
28   for (u = 1; u;) {
29     wc1 = wc2 = 0;
30     len2 = 4;
31 
32     h = SWAP_BE32(u);
33     len1 = mbrtowc(&wc1, str, len2, &mb);
34     if (len1<0) memset(&mb, 0, sizeof(mb));
35     len2 = utf8towc(&wc2, str, len2);
36 
37     if (wcwidth(wc2)>maxlen) maxlen = wcwidth(wc2);
38     if (len1 != len2 || wc1 != wc2)
39       printf("%x %d %x %d %x\n", u++, len1, wc1, len2, wc2);
40     else if (len2<1) u++;
41     else {
42       h = 1<<(8*(4-len2));
43       u &= ~(h-1);
44       u += h;
45     }
46 
47   }
48   dprintf(2, "maxlen=%d\n", maxlen);
49 }
50