1*600f14f4SXin Li /*
2*600f14f4SXin Li * Copyright (C) 2001 Edmund Grimley Evans <[email protected]>
3*600f14f4SXin Li *
4*600f14f4SXin Li * This program is free software; you can redistribute it and/or modify
5*600f14f4SXin Li * it under the terms of the GNU General Public License as published by
6*600f14f4SXin Li * the Free Software Foundation; either version 2 of the License, or
7*600f14f4SXin Li * (at your option) any later version.
8*600f14f4SXin Li *
9*600f14f4SXin Li * This program is distributed in the hope that it will be useful,
10*600f14f4SXin Li * but WITHOUT ANY WARRANTY; without even the implied warranty of
11*600f14f4SXin Li * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12*600f14f4SXin Li * GNU General Public License for more details.
13*600f14f4SXin Li *
14*600f14f4SXin Li * You should have received a copy of the GNU General Public License along
15*600f14f4SXin Li * with this program; if not, write to the Free Software Foundation, Inc.,
16*600f14f4SXin Li * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
17*600f14f4SXin Li */
18*600f14f4SXin Li
19*600f14f4SXin Li #ifdef HAVE_CONFIG_H
20*600f14f4SXin Li # include <config.h>
21*600f14f4SXin Li #endif
22*600f14f4SXin Li
23*600f14f4SXin Li #include <assert.h>
24*600f14f4SXin Li #include <string.h>
25*600f14f4SXin Li
26*600f14f4SXin Li #include "charset.h"
27*600f14f4SXin Li
test_any(struct charset * charset)28*600f14f4SXin Li void test_any(struct charset *charset)
29*600f14f4SXin Li {
30*600f14f4SXin Li int wc;
31*600f14f4SXin Li char s[2];
32*600f14f4SXin Li
33*600f14f4SXin Li assert(charset);
34*600f14f4SXin Li
35*600f14f4SXin Li /* Decoder */
36*600f14f4SXin Li
37*600f14f4SXin Li assert(charset_mbtowc(charset, 0, 0, 0) == 0);
38*600f14f4SXin Li assert(charset_mbtowc(charset, 0, 0, 1) == 0);
39*600f14f4SXin Li assert(charset_mbtowc(charset, 0, (char *)(-1), 0) == 0);
40*600f14f4SXin Li
41*600f14f4SXin Li assert(charset_mbtowc(charset, 0, "a", 0) == 0);
42*600f14f4SXin Li assert(charset_mbtowc(charset, 0, "", 1) == 0);
43*600f14f4SXin Li assert(charset_mbtowc(charset, 0, "b", 1) == 1);
44*600f14f4SXin Li assert(charset_mbtowc(charset, 0, "", 2) == 0);
45*600f14f4SXin Li assert(charset_mbtowc(charset, 0, "c", 2) == 1);
46*600f14f4SXin Li
47*600f14f4SXin Li wc = 'x';
48*600f14f4SXin Li assert(charset_mbtowc(charset, &wc, "a", 0) == 0 && wc == 'x');
49*600f14f4SXin Li assert(charset_mbtowc(charset, &wc, "", 1) == 0 && wc == 0);
50*600f14f4SXin Li assert(charset_mbtowc(charset, &wc, "b", 1) == 1 && wc == 'b');
51*600f14f4SXin Li assert(charset_mbtowc(charset, &wc, "", 2) == 0 && wc == 0);
52*600f14f4SXin Li assert(charset_mbtowc(charset, &wc, "c", 2) == 1 && wc == 'c');
53*600f14f4SXin Li
54*600f14f4SXin Li /* Encoder */
55*600f14f4SXin Li
56*600f14f4SXin Li assert(charset_wctomb(charset, 0, 0) == 0);
57*600f14f4SXin Li
58*600f14f4SXin Li s[0] = s[1] = '.';
59*600f14f4SXin Li assert(charset_wctomb(charset, s, 0) == 1 &&
60*600f14f4SXin Li s[0] == '\0' && s[1] == '.');
61*600f14f4SXin Li assert(charset_wctomb(charset, s, 'x') == 1 &&
62*600f14f4SXin Li s[0] == 'x' && s[1] == '.');
63*600f14f4SXin Li }
64*600f14f4SXin Li
test_utf8()65*600f14f4SXin Li void test_utf8()
66*600f14f4SXin Li {
67*600f14f4SXin Li struct charset *charset;
68*600f14f4SXin Li int wc;
69*600f14f4SXin Li char s[8];
70*600f14f4SXin Li
71*600f14f4SXin Li charset = charset_find("UTF-8");
72*600f14f4SXin Li test_any(charset);
73*600f14f4SXin Li
74*600f14f4SXin Li /* Decoder */
75*600f14f4SXin Li wc = 0;
76*600f14f4SXin Li assert(charset_mbtowc(charset, &wc, "\177", 1) == 1 && wc == 127);
77*600f14f4SXin Li assert(charset_mbtowc(charset, &wc, "\200", 2) == -1);
78*600f14f4SXin Li assert(charset_mbtowc(charset, &wc, "\301\277", 9) == -1);
79*600f14f4SXin Li assert(charset_mbtowc(charset, &wc, "\302\200", 1) == -1);
80*600f14f4SXin Li assert(charset_mbtowc(charset, &wc, "\302\200", 2) == 2 && wc == 128);
81*600f14f4SXin Li assert(charset_mbtowc(charset, &wc, "\302\200", 3) == 2 && wc == 128);
82*600f14f4SXin Li assert(charset_mbtowc(charset, &wc, "\340\237\200", 9) == -1);
83*600f14f4SXin Li assert(charset_mbtowc(charset, &wc, "\340\240\200", 9) == 3 &&
84*600f14f4SXin Li wc == 1 << 11);
85*600f14f4SXin Li assert(charset_mbtowc(charset, &wc, "\360\217\277\277", 9) == -1);
86*600f14f4SXin Li assert(charset_mbtowc(charset, &wc, "\360\220\200\200", 9) == 4 &&
87*600f14f4SXin Li wc == 1 << 16);
88*600f14f4SXin Li assert(charset_mbtowc(charset, &wc, "\370\207\277\277\277", 9) == -1);
89*600f14f4SXin Li assert(charset_mbtowc(charset, &wc, "\370\210\200\200\200", 9) == 5 &&
90*600f14f4SXin Li wc == 1 << 21);
91*600f14f4SXin Li assert(charset_mbtowc(charset, &wc, "\374\203\277\277\277\277", 9) == -1);
92*600f14f4SXin Li assert(charset_mbtowc(charset, &wc, "\374\204\200\200\200\200", 9) == 6 &&
93*600f14f4SXin Li wc == 1 << 26);
94*600f14f4SXin Li assert(charset_mbtowc(charset, &wc, "\375\277\277\277\277\277", 9) == 6 &&
95*600f14f4SXin Li wc == 0x7fffffff);
96*600f14f4SXin Li
97*600f14f4SXin Li assert(charset_mbtowc(charset, &wc, "\302\000", 2) == -1);
98*600f14f4SXin Li assert(charset_mbtowc(charset, &wc, "\302\300", 2) == -1);
99*600f14f4SXin Li assert(charset_mbtowc(charset, &wc, "\340\040\200", 9) == -1);
100*600f14f4SXin Li assert(charset_mbtowc(charset, &wc, "\340\340\200", 9) == -1);
101*600f14f4SXin Li assert(charset_mbtowc(charset, &wc, "\340\240\000", 9) == -1);
102*600f14f4SXin Li assert(charset_mbtowc(charset, &wc, "\340\240\300", 9) == -1);
103*600f14f4SXin Li assert(charset_mbtowc(charset, &wc, "\360\020\200\200", 9) == -1);
104*600f14f4SXin Li assert(charset_mbtowc(charset, &wc, "\360\320\200\200", 9) == -1);
105*600f14f4SXin Li assert(charset_mbtowc(charset, &wc, "\360\220\000\200", 9) == -1);
106*600f14f4SXin Li assert(charset_mbtowc(charset, &wc, "\360\220\300\200", 9) == -1);
107*600f14f4SXin Li assert(charset_mbtowc(charset, &wc, "\360\220\200\000", 9) == -1);
108*600f14f4SXin Li assert(charset_mbtowc(charset, &wc, "\360\220\200\300", 9) == -1);
109*600f14f4SXin Li assert(charset_mbtowc(charset, &wc, "\375\077\277\277\277\277", 9) == -1);
110*600f14f4SXin Li assert(charset_mbtowc(charset, &wc, "\375\377\277\277\277\277", 9) == -1);
111*600f14f4SXin Li assert(charset_mbtowc(charset, &wc, "\375\277\077\277\277\277", 9) == -1);
112*600f14f4SXin Li assert(charset_mbtowc(charset, &wc, "\375\277\377\277\277\277", 9) == -1);
113*600f14f4SXin Li assert(charset_mbtowc(charset, &wc, "\375\277\277\277\077\277", 9) == -1);
114*600f14f4SXin Li assert(charset_mbtowc(charset, &wc, "\375\277\277\277\377\277", 9) == -1);
115*600f14f4SXin Li assert(charset_mbtowc(charset, &wc, "\375\277\277\277\277\077", 9) == -1);
116*600f14f4SXin Li assert(charset_mbtowc(charset, &wc, "\375\277\277\277\277\377", 9) == -1);
117*600f14f4SXin Li
118*600f14f4SXin Li assert(charset_mbtowc(charset, &wc, "\376\277\277\277\277\277", 9) == -1);
119*600f14f4SXin Li assert(charset_mbtowc(charset, &wc, "\377\277\277\277\277\277", 9) == -1);
120*600f14f4SXin Li
121*600f14f4SXin Li /* Encoder */
122*600f14f4SXin Li safe_strncpy(s, ".......", sizeof(s));
123*600f14f4SXin Li assert(charset_wctomb(charset, s, 1u << 31) == -1 &&
124*600f14f4SXin Li !strcmp(s, "......."));
125*600f14f4SXin Li assert(charset_wctomb(charset, s, 127) == 1 &&
126*600f14f4SXin Li !strcmp(s, "\177......"));
127*600f14f4SXin Li assert(charset_wctomb(charset, s, 128) == 2 &&
128*600f14f4SXin Li !strcmp(s, "\302\200....."));
129*600f14f4SXin Li assert(charset_wctomb(charset, s, 0x7ff) == 2 &&
130*600f14f4SXin Li !strcmp(s, "\337\277....."));
131*600f14f4SXin Li assert(charset_wctomb(charset, s, 0x800) == 3 &&
132*600f14f4SXin Li !strcmp(s, "\340\240\200...."));
133*600f14f4SXin Li assert(charset_wctomb(charset, s, 0xffff) == 3 &&
134*600f14f4SXin Li !strcmp(s, "\357\277\277...."));
135*600f14f4SXin Li assert(charset_wctomb(charset, s, 0x10000) == 4 &&
136*600f14f4SXin Li !strcmp(s, "\360\220\200\200..."));
137*600f14f4SXin Li assert(charset_wctomb(charset, s, 0x1fffff) == 4 &&
138*600f14f4SXin Li !strcmp(s, "\367\277\277\277..."));
139*600f14f4SXin Li assert(charset_wctomb(charset, s, 0x200000) == 5 &&
140*600f14f4SXin Li !strcmp(s, "\370\210\200\200\200.."));
141*600f14f4SXin Li assert(charset_wctomb(charset, s, 0x3ffffff) == 5 &&
142*600f14f4SXin Li !strcmp(s, "\373\277\277\277\277.."));
143*600f14f4SXin Li assert(charset_wctomb(charset, s, 0x4000000) == 6 &&
144*600f14f4SXin Li !strcmp(s, "\374\204\200\200\200\200."));
145*600f14f4SXin Li assert(charset_wctomb(charset, s, 0x7fffffff) == 6 &&
146*600f14f4SXin Li !strcmp(s, "\375\277\277\277\277\277."));
147*600f14f4SXin Li }
148*600f14f4SXin Li
test_ascii()149*600f14f4SXin Li void test_ascii()
150*600f14f4SXin Li {
151*600f14f4SXin Li struct charset *charset;
152*600f14f4SXin Li int wc;
153*600f14f4SXin Li char s[3];
154*600f14f4SXin Li
155*600f14f4SXin Li charset = charset_find("us-ascii");
156*600f14f4SXin Li test_any(charset);
157*600f14f4SXin Li
158*600f14f4SXin Li /* Decoder */
159*600f14f4SXin Li wc = 0;
160*600f14f4SXin Li assert(charset_mbtowc(charset, &wc, "\177", 2) == 1 && wc == 127);
161*600f14f4SXin Li assert(charset_mbtowc(charset, &wc, "\200", 2) == -1);
162*600f14f4SXin Li
163*600f14f4SXin Li /* Encoder */
164*600f14f4SXin Li safe_strncpy(s, "..", sizeof(s));
165*600f14f4SXin Li assert(charset_wctomb(charset, s, 256) == -1 && !strcmp(s, ".."));
166*600f14f4SXin Li assert(charset_wctomb(charset, s, 255) == -1);
167*600f14f4SXin Li assert(charset_wctomb(charset, s, 128) == -1);
168*600f14f4SXin Li assert(charset_wctomb(charset, s, 127) == 1 && !strcmp(s, "\177."));
169*600f14f4SXin Li }
170*600f14f4SXin Li
test_iso1()171*600f14f4SXin Li void test_iso1()
172*600f14f4SXin Li {
173*600f14f4SXin Li struct charset *charset;
174*600f14f4SXin Li int wc;
175*600f14f4SXin Li char s[3];
176*600f14f4SXin Li
177*600f14f4SXin Li charset = charset_find("iso-8859-1");
178*600f14f4SXin Li test_any(charset);
179*600f14f4SXin Li
180*600f14f4SXin Li /* Decoder */
181*600f14f4SXin Li wc = 0;
182*600f14f4SXin Li assert(charset_mbtowc(charset, &wc, "\302\200", 9) == 1 && wc == 0xc2);
183*600f14f4SXin Li
184*600f14f4SXin Li /* Encoder */
185*600f14f4SXin Li safe_strncpy(s, "..", sizeof(s));
186*600f14f4SXin Li assert(charset_wctomb(charset, s, 256) == -1 && !strcmp(s, ".."));
187*600f14f4SXin Li assert(charset_wctomb(charset, s, 255) == 1 && !strcmp(s, "\377."));
188*600f14f4SXin Li assert(charset_wctomb(charset, s, 128) == 1 && !strcmp(s, "\200."));
189*600f14f4SXin Li }
190*600f14f4SXin Li
test_iso2()191*600f14f4SXin Li void test_iso2()
192*600f14f4SXin Li {
193*600f14f4SXin Li struct charset *charset;
194*600f14f4SXin Li int wc;
195*600f14f4SXin Li char s[3];
196*600f14f4SXin Li
197*600f14f4SXin Li charset = charset_find("iso-8859-2");
198*600f14f4SXin Li test_any(charset);
199*600f14f4SXin Li
200*600f14f4SXin Li /* Decoder */
201*600f14f4SXin Li wc = 0;
202*600f14f4SXin Li assert(charset_mbtowc(charset, &wc, "\302\200", 9) == 1 && wc == 0xc2);
203*600f14f4SXin Li assert(charset_mbtowc(charset, &wc, "\377", 2) == 1 && wc == 0x2d9);
204*600f14f4SXin Li
205*600f14f4SXin Li /* Encoder */
206*600f14f4SXin Li safe_strncpy(s, "..", sizeof(s));
207*600f14f4SXin Li assert(charset_wctomb(charset, s, 256) == -1 && !strcmp(s, ".."));
208*600f14f4SXin Li assert(charset_wctomb(charset, s, 255) == -1 && !strcmp(s, ".."));
209*600f14f4SXin Li assert(charset_wctomb(charset, s, 258) == 1 && !strcmp(s, "\303."));
210*600f14f4SXin Li assert(charset_wctomb(charset, s, 128) == 1 && !strcmp(s, "\200."));
211*600f14f4SXin Li }
212*600f14f4SXin Li
test_convert()213*600f14f4SXin Li void test_convert()
214*600f14f4SXin Li {
215*600f14f4SXin Li const char *p;
216*600f14f4SXin Li char *q, *r;
217*600f14f4SXin Li char s[256];
218*600f14f4SXin Li size_t n, n2;
219*600f14f4SXin Li int i;
220*600f14f4SXin Li
221*600f14f4SXin Li p = "\000x\302\200\375\277\277\277\277\277";
222*600f14f4SXin Li assert(charset_convert("UTF-8", "UTF-8", p, 10, &q, &n) == 0 &&
223*600f14f4SXin Li n == 10 && !strcmp(p, q));
224*600f14f4SXin Li assert(charset_convert("UTF-8", "UTF-8", "x\301\277y", 4, &q, &n) == 2 &&
225*600f14f4SXin Li n == 4 && !strcmp(q, "x##y"));
226*600f14f4SXin Li assert(charset_convert("UTF-8", "UTF-8", "x\301\277y", 4, 0, &n) == 2 &&
227*600f14f4SXin Li n == 4);
228*600f14f4SXin Li assert(charset_convert("UTF-8", "UTF-8", "x\301\277y", 4, &q, 0) == 2 &&
229*600f14f4SXin Li !strcmp(q, "x##y"));
230*600f14f4SXin Li assert(charset_convert("UTF-8", "iso-8859-1",
231*600f14f4SXin Li "\302\200\304\200x", 5, &q, &n) == 1 &&
232*600f14f4SXin Li n == 3 && !strcmp(q, "\200?x"));
233*600f14f4SXin Li assert(charset_convert("iso-8859-1", "UTF-8",
234*600f14f4SXin Li "\000\200\377", 3, &q, &n) == 0 &&
235*600f14f4SXin Li n == 5 && !memcmp(q, "\000\302\200\303\277", 5));
236*600f14f4SXin Li assert(charset_convert("iso-8859-1", "iso-8859-1",
237*600f14f4SXin Li "\000\200\377", 3, &q, &n) == 0 &&
238*600f14f4SXin Li n == 3 && !memcmp(q, "\000\200\377", 3));
239*600f14f4SXin Li
240*600f14f4SXin Li assert(charset_convert("iso-8859-2", "utf-8", "\300", 1, &q, &n) == 0 &&
241*600f14f4SXin Li n == 2 && !strcmp(q, "\305\224"));
242*600f14f4SXin Li assert(charset_convert("utf-8", "iso-8859-2", "\305\224", 2, &q, &n) == 0 &&
243*600f14f4SXin Li n == 1 && !strcmp(q, "\300"));
244*600f14f4SXin Li
245*600f14f4SXin Li for (i = 0; i < 256; i++)
246*600f14f4SXin Li s[i] = i;
247*600f14f4SXin Li
248*600f14f4SXin Li assert(charset_convert("iso-8859-2", "utf-8", s, 256, &q, &n) == 0);
249*600f14f4SXin Li assert(charset_convert("utf-8", "iso-8859-2", q, n, &r, &n2) == 0);
250*600f14f4SXin Li assert(n2 == 256 && !memcmp(r, s, n2));
251*600f14f4SXin Li }
252*600f14f4SXin Li
main()253*600f14f4SXin Li int main()
254*600f14f4SXin Li {
255*600f14f4SXin Li test_utf8();
256*600f14f4SXin Li test_ascii();
257*600f14f4SXin Li test_iso1();
258*600f14f4SXin Li test_iso2();
259*600f14f4SXin Li
260*600f14f4SXin Li test_convert();
261*600f14f4SXin Li
262*600f14f4SXin Li return 0;
263*600f14f4SXin Li }
264