xref: /aosp_15_r20/external/flac/src/share/utf8/charset.c (revision 600f14f40d737144c998e2ec7a483122d3776fbc)
1*600f14f4SXin Li /*
2*600f14f4SXin Li  * Copyright (C) 2001 Edmund Grimley Evans <[email protected]>
3*600f14f4SXin Li  *
4*600f14f4SXin Li  * This program is free software; you can redistribute it and/or modify
5*600f14f4SXin Li  * it under the terms of the GNU General Public License as published by
6*600f14f4SXin Li  * the Free Software Foundation; either version 2 of the License, or
7*600f14f4SXin Li  * (at your option) any later version.
8*600f14f4SXin Li  *
9*600f14f4SXin Li  * This program is distributed in the hope that it will be useful,
10*600f14f4SXin Li  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11*600f14f4SXin Li  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12*600f14f4SXin Li  * GNU General Public License for more details.
13*600f14f4SXin Li  *
14*600f14f4SXin Li  * You should have received a copy of the GNU General Public License along
15*600f14f4SXin Li  * with this program; if not, write to the Free Software Foundation, Inc.,
16*600f14f4SXin Li  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
17*600f14f4SXin Li  */
18*600f14f4SXin Li 
19*600f14f4SXin Li /*
20*600f14f4SXin Li  * See the corresponding header file for a description of the functions
21*600f14f4SXin Li  * that this file provides.
22*600f14f4SXin Li  *
23*600f14f4SXin Li  * This was first written for Ogg Vorbis but could be of general use.
24*600f14f4SXin Li  *
25*600f14f4SXin Li  * The only deliberate assumption about data sizes is that a short has
26*600f14f4SXin Li  * at least 16 bits, but this code has only been tested on systems with
27*600f14f4SXin Li  * 8-bit char, 16-bit short and 32-bit int.
28*600f14f4SXin Li  */
29*600f14f4SXin Li 
30*600f14f4SXin Li #ifdef HAVE_CONFIG_H
31*600f14f4SXin Li #  include <config.h>
32*600f14f4SXin Li #endif
33*600f14f4SXin Li 
34*600f14f4SXin Li #if !defined _WIN32 && !defined HAVE_ICONV /* should be && defined USE_CHARSET_CONVERT */
35*600f14f4SXin Li 
36*600f14f4SXin Li #include <stdlib.h>
37*600f14f4SXin Li 
38*600f14f4SXin Li #include "share/alloc.h"
39*600f14f4SXin Li #include "charset.h"
40*600f14f4SXin Li 
41*600f14f4SXin Li #include "charmaps.h"
42*600f14f4SXin Li 
43*600f14f4SXin Li /*
44*600f14f4SXin Li  * This is like the standard strcasecmp, but it does not depend
45*600f14f4SXin Li  * on the locale. Locale-dependent functions can be dangerous:
46*600f14f4SXin Li  * we once had a bug involving strcasecmp("iso", "ISO") in a
47*600f14f4SXin Li  * Turkish locale!
48*600f14f4SXin Li  *
49*600f14f4SXin Li  * (I'm not really sure what the official standard says
50*600f14f4SXin Li  * about the sign of strcasecmp("Z", "["), but usually
51*600f14f4SXin Li  * we're only interested in whether it's zero.)
52*600f14f4SXin Li  */
53*600f14f4SXin Li 
ascii_strcasecmp(const char * s1,const char * s2)54*600f14f4SXin Li static int ascii_strcasecmp(const char *s1, const char *s2)
55*600f14f4SXin Li {
56*600f14f4SXin Li   char c1, c2;
57*600f14f4SXin Li 
58*600f14f4SXin Li   for (;; s1++, s2++) {
59*600f14f4SXin Li     if (!*s1 || !*s2)
60*600f14f4SXin Li       break;
61*600f14f4SXin Li     if (*s1 == *s2)
62*600f14f4SXin Li       continue;
63*600f14f4SXin Li     c1 = *s1;
64*600f14f4SXin Li     if ('a' <= c1 && c1 <= 'z')
65*600f14f4SXin Li       c1 += 'A' - 'a';
66*600f14f4SXin Li     c2 = *s2;
67*600f14f4SXin Li     if ('a' <= c2 && c2 <= 'z')
68*600f14f4SXin Li       c2 += 'A' - 'a';
69*600f14f4SXin Li     if (c1 != c2)
70*600f14f4SXin Li       break;
71*600f14f4SXin Li   }
72*600f14f4SXin Li   return (uint8_t)*s1 - (uint8_t)*s2;
73*600f14f4SXin Li }
74*600f14f4SXin Li 
75*600f14f4SXin Li /*
76*600f14f4SXin Li  * UTF-8 equivalents of the C library's wctomb() and mbtowc().
77*600f14f4SXin Li  */
78*600f14f4SXin Li 
utf8_mbtowc(int * pwc,const char * s,size_t n)79*600f14f4SXin Li int utf8_mbtowc(int *pwc, const char *s, size_t n)
80*600f14f4SXin Li {
81*600f14f4SXin Li   uint8_t c;
82*600f14f4SXin Li   int wc, i, k;
83*600f14f4SXin Li 
84*600f14f4SXin Li   if (!n || !s)
85*600f14f4SXin Li     return 0;
86*600f14f4SXin Li 
87*600f14f4SXin Li   c = *s;
88*600f14f4SXin Li   if (c < 0x80) {
89*600f14f4SXin Li     if (pwc)
90*600f14f4SXin Li       *pwc = c;
91*600f14f4SXin Li     return c ? 1 : 0;
92*600f14f4SXin Li   }
93*600f14f4SXin Li   else if (c < 0xc2)
94*600f14f4SXin Li     return -1;
95*600f14f4SXin Li   else if (c < 0xe0) {
96*600f14f4SXin Li     if (n >= 2 && (s[1] & 0xc0) == 0x80) {
97*600f14f4SXin Li       if (pwc)
98*600f14f4SXin Li 	*pwc = ((c & 0x1f) << 6) | (s[1] & 0x3f);
99*600f14f4SXin Li       return 2;
100*600f14f4SXin Li     }
101*600f14f4SXin Li     else
102*600f14f4SXin Li       return -1;
103*600f14f4SXin Li   }
104*600f14f4SXin Li   else if (c < 0xf0)
105*600f14f4SXin Li     k = 3;
106*600f14f4SXin Li   else if (c < 0xf8)
107*600f14f4SXin Li     k = 4;
108*600f14f4SXin Li   else if (c < 0xfc)
109*600f14f4SXin Li     k = 5;
110*600f14f4SXin Li   else if (c < 0xfe)
111*600f14f4SXin Li     k = 6;
112*600f14f4SXin Li   else
113*600f14f4SXin Li     return -1;
114*600f14f4SXin Li 
115*600f14f4SXin Li   if (n < (size_t)k)
116*600f14f4SXin Li     return -1;
117*600f14f4SXin Li   wc = *s++ & ((1 << (7 - k)) - 1);
118*600f14f4SXin Li   for (i = 1; i < k; i++) {
119*600f14f4SXin Li     if ((*s & 0xc0) != 0x80)
120*600f14f4SXin Li       return -1;
121*600f14f4SXin Li     wc = (wc << 6) | (*s++ & 0x3f);
122*600f14f4SXin Li   }
123*600f14f4SXin Li   if (wc < (1 << (5 * k - 4)))
124*600f14f4SXin Li     return -1;
125*600f14f4SXin Li   if (pwc)
126*600f14f4SXin Li     *pwc = wc;
127*600f14f4SXin Li   return k;
128*600f14f4SXin Li }
129*600f14f4SXin Li 
utf8_wctomb(char * s,int wc1)130*600f14f4SXin Li int utf8_wctomb(char *s, int wc1)
131*600f14f4SXin Li {
132*600f14f4SXin Li   uint32_t wc = wc1;
133*600f14f4SXin Li 
134*600f14f4SXin Li   if (!s)
135*600f14f4SXin Li     return 0;
136*600f14f4SXin Li   if (wc < (1u << 7)) {
137*600f14f4SXin Li     *s++ = wc;
138*600f14f4SXin Li     return 1;
139*600f14f4SXin Li   }
140*600f14f4SXin Li   else if (wc < (1u << 11)) {
141*600f14f4SXin Li     *s++ = 0xc0 | (wc >> 6);
142*600f14f4SXin Li     *s++ = 0x80 | (wc & 0x3f);
143*600f14f4SXin Li     return 2;
144*600f14f4SXin Li   }
145*600f14f4SXin Li   else if (wc < (1u << 16)) {
146*600f14f4SXin Li     *s++ = 0xe0 | (wc >> 12);
147*600f14f4SXin Li     *s++ = 0x80 | ((wc >> 6) & 0x3f);
148*600f14f4SXin Li     *s++ = 0x80 | (wc & 0x3f);
149*600f14f4SXin Li     return 3;
150*600f14f4SXin Li   }
151*600f14f4SXin Li   else if (wc < (1u << 21)) {
152*600f14f4SXin Li     *s++ = 0xf0 | (wc >> 18);
153*600f14f4SXin Li     *s++ = 0x80 | ((wc >> 12) & 0x3f);
154*600f14f4SXin Li     *s++ = 0x80 | ((wc >> 6) & 0x3f);
155*600f14f4SXin Li     *s++ = 0x80 | (wc & 0x3f);
156*600f14f4SXin Li     return 4;
157*600f14f4SXin Li   }
158*600f14f4SXin Li   else if (wc < (1u << 26)) {
159*600f14f4SXin Li     *s++ = 0xf8 | (wc >> 24);
160*600f14f4SXin Li     *s++ = 0x80 | ((wc >> 18) & 0x3f);
161*600f14f4SXin Li     *s++ = 0x80 | ((wc >> 12) & 0x3f);
162*600f14f4SXin Li     *s++ = 0x80 | ((wc >> 6) & 0x3f);
163*600f14f4SXin Li     *s++ = 0x80 | (wc & 0x3f);
164*600f14f4SXin Li     return 5;
165*600f14f4SXin Li   }
166*600f14f4SXin Li   else if (wc < (1u << 31)) {
167*600f14f4SXin Li     *s++ = 0xfc | (wc >> 30);
168*600f14f4SXin Li     *s++ = 0x80 | ((wc >> 24) & 0x3f);
169*600f14f4SXin Li     *s++ = 0x80 | ((wc >> 18) & 0x3f);
170*600f14f4SXin Li     *s++ = 0x80 | ((wc >> 12) & 0x3f);
171*600f14f4SXin Li     *s++ = 0x80 | ((wc >> 6) & 0x3f);
172*600f14f4SXin Li     *s++ = 0x80 | (wc & 0x3f);
173*600f14f4SXin Li     return 6;
174*600f14f4SXin Li   }
175*600f14f4SXin Li   else
176*600f14f4SXin Li     return -1;
177*600f14f4SXin Li }
178*600f14f4SXin Li 
179*600f14f4SXin Li /*
180*600f14f4SXin Li  * The charset "object" and methods.
181*600f14f4SXin Li  */
182*600f14f4SXin Li 
183*600f14f4SXin Li struct charset {
184*600f14f4SXin Li   int max;
185*600f14f4SXin Li   int (*mbtowc)(void *table, int *pwc, const char *s, size_t n);
186*600f14f4SXin Li   int (*wctomb)(void *table, char *s, int wc);
187*600f14f4SXin Li   void *map;
188*600f14f4SXin Li };
189*600f14f4SXin Li 
charset_mbtowc(struct charset * charset,int * pwc,const char * s,size_t n)190*600f14f4SXin Li int charset_mbtowc(struct charset *charset, int *pwc, const char *s, size_t n)
191*600f14f4SXin Li {
192*600f14f4SXin Li   return (*charset->mbtowc)(charset->map, pwc, s, n);
193*600f14f4SXin Li }
194*600f14f4SXin Li 
charset_wctomb(struct charset * charset,char * s,int wc)195*600f14f4SXin Li int charset_wctomb(struct charset *charset, char *s, int wc)
196*600f14f4SXin Li {
197*600f14f4SXin Li   return (*charset->wctomb)(charset->map, s, wc);
198*600f14f4SXin Li }
199*600f14f4SXin Li 
charset_max(struct charset * charset)200*600f14f4SXin Li int charset_max(struct charset *charset)
201*600f14f4SXin Li {
202*600f14f4SXin Li   return charset->max;
203*600f14f4SXin Li }
204*600f14f4SXin Li 
205*600f14f4SXin Li /*
206*600f14f4SXin Li  * Implementation of UTF-8.
207*600f14f4SXin Li  */
208*600f14f4SXin Li 
mbtowc_utf8(void * map,int * pwc,const char * s,size_t n)209*600f14f4SXin Li static int mbtowc_utf8(void *map, int *pwc, const char *s, size_t n)
210*600f14f4SXin Li {
211*600f14f4SXin Li   (void)map;
212*600f14f4SXin Li   return utf8_mbtowc(pwc, s, n);
213*600f14f4SXin Li }
214*600f14f4SXin Li 
wctomb_utf8(void * map,char * s,int wc)215*600f14f4SXin Li static int wctomb_utf8(void *map, char *s, int wc)
216*600f14f4SXin Li {
217*600f14f4SXin Li   (void)map;
218*600f14f4SXin Li   return utf8_wctomb(s, wc);
219*600f14f4SXin Li }
220*600f14f4SXin Li 
221*600f14f4SXin Li /*
222*600f14f4SXin Li  * Implementation of US-ASCII.
223*600f14f4SXin Li  * Probably on most architectures this compiles to less than 256 bytes
224*600f14f4SXin Li  * of code, so we can save space by not having a table for this one.
225*600f14f4SXin Li  */
226*600f14f4SXin Li 
mbtowc_ascii(void * map,int * pwc,const char * s,size_t n)227*600f14f4SXin Li static int mbtowc_ascii(void *map, int *pwc, const char *s, size_t n)
228*600f14f4SXin Li {
229*600f14f4SXin Li   int wc;
230*600f14f4SXin Li 
231*600f14f4SXin Li   (void)map;
232*600f14f4SXin Li   if (!n || !s)
233*600f14f4SXin Li     return 0;
234*600f14f4SXin Li   wc = (uint8_t)*s;
235*600f14f4SXin Li   if (wc & ~0x7f)
236*600f14f4SXin Li     return -1;
237*600f14f4SXin Li   if (pwc)
238*600f14f4SXin Li     *pwc = wc;
239*600f14f4SXin Li   return wc ? 1 : 0;
240*600f14f4SXin Li }
241*600f14f4SXin Li 
wctomb_ascii(void * map,char * s,int wc)242*600f14f4SXin Li static int wctomb_ascii(void *map, char *s, int wc)
243*600f14f4SXin Li {
244*600f14f4SXin Li   (void)map;
245*600f14f4SXin Li   if (!s)
246*600f14f4SXin Li     return 0;
247*600f14f4SXin Li   if (wc & ~0x7f)
248*600f14f4SXin Li     return -1;
249*600f14f4SXin Li   *s = wc;
250*600f14f4SXin Li   return 1;
251*600f14f4SXin Li }
252*600f14f4SXin Li 
253*600f14f4SXin Li /*
254*600f14f4SXin Li  * Implementation of ISO-8859-1.
255*600f14f4SXin Li  * Probably on most architectures this compiles to less than 256 bytes
256*600f14f4SXin Li  * of code, so we can save space by not having a table for this one.
257*600f14f4SXin Li  */
258*600f14f4SXin Li 
mbtowc_iso1(void * map,int * pwc,const char * s,size_t n)259*600f14f4SXin Li static int mbtowc_iso1(void *map, int *pwc, const char *s, size_t n)
260*600f14f4SXin Li {
261*600f14f4SXin Li   int wc;
262*600f14f4SXin Li 
263*600f14f4SXin Li   (void)map;
264*600f14f4SXin Li   if (!n || !s)
265*600f14f4SXin Li     return 0;
266*600f14f4SXin Li   wc = (uint8_t)*s;
267*600f14f4SXin Li   if (wc & ~0xff)
268*600f14f4SXin Li     return -1;
269*600f14f4SXin Li   if (pwc)
270*600f14f4SXin Li     *pwc = wc;
271*600f14f4SXin Li   return wc ? 1 : 0;
272*600f14f4SXin Li }
273*600f14f4SXin Li 
wctomb_iso1(void * map,char * s,int wc)274*600f14f4SXin Li static int wctomb_iso1(void *map, char *s, int wc)
275*600f14f4SXin Li {
276*600f14f4SXin Li   (void)map;
277*600f14f4SXin Li   if (!s)
278*600f14f4SXin Li     return 0;
279*600f14f4SXin Li   if (wc & ~0xff)
280*600f14f4SXin Li     return -1;
281*600f14f4SXin Li   *s = wc;
282*600f14f4SXin Li   return 1;
283*600f14f4SXin Li }
284*600f14f4SXin Li 
285*600f14f4SXin Li /*
286*600f14f4SXin Li  * Implementation of any 8-bit charset.
287*600f14f4SXin Li  */
288*600f14f4SXin Li 
289*600f14f4SXin Li struct map {
290*600f14f4SXin Li   const uint16_t *from;
291*600f14f4SXin Li   struct inverse_map *to;
292*600f14f4SXin Li };
293*600f14f4SXin Li 
mbtowc_8bit(void * map1,int * pwc,const char * s,size_t n)294*600f14f4SXin Li static int mbtowc_8bit(void *map1, int *pwc, const char *s, size_t n)
295*600f14f4SXin Li {
296*600f14f4SXin Li   struct map *map = map1;
297*600f14f4SXin Li   uint16_t wc;
298*600f14f4SXin Li 
299*600f14f4SXin Li   if (!n || !s)
300*600f14f4SXin Li     return 0;
301*600f14f4SXin Li   wc = map->from[(uint8_t)*s];
302*600f14f4SXin Li   if (wc == 0xffff)
303*600f14f4SXin Li     return -1;
304*600f14f4SXin Li   if (pwc)
305*600f14f4SXin Li     *pwc = (int)wc;
306*600f14f4SXin Li   return wc ? 1 : 0;
307*600f14f4SXin Li }
308*600f14f4SXin Li 
309*600f14f4SXin Li /*
310*600f14f4SXin Li  * For the inverse map we use a hash table, which has the advantages
311*600f14f4SXin Li  * of small constant memory requirement and simple memory allocation,
312*600f14f4SXin Li  * but the disadvantage of slow conversion in the worst case.
313*600f14f4SXin Li  * If you need real-time performance while letting a potentially
314*600f14f4SXin Li  * malicious user define their own map, then the method used in
315*600f14f4SXin Li  * linux/drivers/char/consolemap.c would be more appropriate.
316*600f14f4SXin Li  */
317*600f14f4SXin Li 
318*600f14f4SXin Li struct inverse_map {
319*600f14f4SXin Li   uint8_t first[256];
320*600f14f4SXin Li   uint8_t next[256];
321*600f14f4SXin Li };
322*600f14f4SXin Li 
323*600f14f4SXin Li /*
324*600f14f4SXin Li  * The simple hash is good enough for this application.
325*600f14f4SXin Li  * Use the alternative trivial hashes for testing.
326*600f14f4SXin Li  */
327*600f14f4SXin Li #define HASH(i) ((i) & 0xff)
328*600f14f4SXin Li /* #define HASH(i) 0 */
329*600f14f4SXin Li /* #define HASH(i) 99 */
330*600f14f4SXin Li 
make_inverse_map(const uint16_t * from)331*600f14f4SXin Li static struct inverse_map *make_inverse_map(const uint16_t *from)
332*600f14f4SXin Li {
333*600f14f4SXin Li   struct inverse_map *to;
334*600f14f4SXin Li   char used[256];
335*600f14f4SXin Li   int i, j, k;
336*600f14f4SXin Li 
337*600f14f4SXin Li   to = malloc(sizeof(struct inverse_map));
338*600f14f4SXin Li   if (!to)
339*600f14f4SXin Li     return 0;
340*600f14f4SXin Li   for (i = 0; i < 256; i++)
341*600f14f4SXin Li     to->first[i] = to->next[i] = used[i] = 0;
342*600f14f4SXin Li   for (i = 255; i >= 0; i--)
343*600f14f4SXin Li     if (from[i] != 0xffff) {
344*600f14f4SXin Li       k = HASH(from[i]);
345*600f14f4SXin Li       to->next[i] = to->first[k];
346*600f14f4SXin Li       to->first[k] = i;
347*600f14f4SXin Li       used[k] = 1;
348*600f14f4SXin Li     }
349*600f14f4SXin Li 
350*600f14f4SXin Li   /* Point the empty buckets at an empty list. */
351*600f14f4SXin Li   for (i = 0; i < 256; i++)
352*600f14f4SXin Li     if (!to->next[i])
353*600f14f4SXin Li       break;
354*600f14f4SXin Li   if (i < 256)
355*600f14f4SXin Li     for (j = 0; j < 256; j++)
356*600f14f4SXin Li       if (!used[j])
357*600f14f4SXin Li 	to->first[j] = i;
358*600f14f4SXin Li 
359*600f14f4SXin Li   return to;
360*600f14f4SXin Li }
361*600f14f4SXin Li 
wctomb_8bit(void * map1,char * s,int wc1)362*600f14f4SXin Li static int wctomb_8bit(void *map1, char *s, int wc1)
363*600f14f4SXin Li {
364*600f14f4SXin Li   struct map *map = map1;
365*600f14f4SXin Li   uint16_t wc = wc1;
366*600f14f4SXin Li   int i;
367*600f14f4SXin Li 
368*600f14f4SXin Li   if (!s)
369*600f14f4SXin Li     return 0;
370*600f14f4SXin Li 
371*600f14f4SXin Li   if (wc1 & ~0xffff)
372*600f14f4SXin Li     return -1;
373*600f14f4SXin Li 
374*600f14f4SXin Li   if (1) /* Change 1 to 0 to test the case where malloc fails. */
375*600f14f4SXin Li     if (!map->to)
376*600f14f4SXin Li       map->to = make_inverse_map(map->from);
377*600f14f4SXin Li 
378*600f14f4SXin Li   if (map->to) {
379*600f14f4SXin Li     /* Use the inverse map. */
380*600f14f4SXin Li     i = map->to->first[HASH(wc)];
381*600f14f4SXin Li     for (;;) {
382*600f14f4SXin Li       if (map->from[i] == wc) {
383*600f14f4SXin Li 	*s = i;
384*600f14f4SXin Li 	return 1;
385*600f14f4SXin Li       }
386*600f14f4SXin Li       if (!(i = map->to->next[i]))
387*600f14f4SXin Li 	break;
388*600f14f4SXin Li     }
389*600f14f4SXin Li   }
390*600f14f4SXin Li   else {
391*600f14f4SXin Li     /* We don't have an inverse map, so do a linear search. */
392*600f14f4SXin Li     for (i = 0; i < 256; i++)
393*600f14f4SXin Li       if (map->from[i] == wc) {
394*600f14f4SXin Li 	*s = i;
395*600f14f4SXin Li 	return 1;
396*600f14f4SXin Li       }
397*600f14f4SXin Li   }
398*600f14f4SXin Li 
399*600f14f4SXin Li   return -1;
400*600f14f4SXin Li }
401*600f14f4SXin Li 
402*600f14f4SXin Li /*
403*600f14f4SXin Li  * The "constructor" charset_find().
404*600f14f4SXin Li  */
405*600f14f4SXin Li 
406*600f14f4SXin Li struct charset charset_utf8 = {
407*600f14f4SXin Li   6,
408*600f14f4SXin Li   &mbtowc_utf8,
409*600f14f4SXin Li   &wctomb_utf8,
410*600f14f4SXin Li   0
411*600f14f4SXin Li };
412*600f14f4SXin Li 
413*600f14f4SXin Li struct charset charset_iso1 = {
414*600f14f4SXin Li   1,
415*600f14f4SXin Li   &mbtowc_iso1,
416*600f14f4SXin Li   &wctomb_iso1,
417*600f14f4SXin Li   0
418*600f14f4SXin Li };
419*600f14f4SXin Li 
420*600f14f4SXin Li struct charset charset_ascii = {
421*600f14f4SXin Li   1,
422*600f14f4SXin Li   &mbtowc_ascii,
423*600f14f4SXin Li   &wctomb_ascii,
424*600f14f4SXin Li   0
425*600f14f4SXin Li };
426*600f14f4SXin Li 
charset_find(const char * code)427*600f14f4SXin Li struct charset *charset_find(const char *code)
428*600f14f4SXin Li {
429*600f14f4SXin Li   int i;
430*600f14f4SXin Li 
431*600f14f4SXin Li   /* Find good (MIME) name. */
432*600f14f4SXin Li   for (i = 0; names[i].bad; i++)
433*600f14f4SXin Li     if (!ascii_strcasecmp(code, names[i].bad)) {
434*600f14f4SXin Li       code = names[i].good;
435*600f14f4SXin Li       break;
436*600f14f4SXin Li     }
437*600f14f4SXin Li 
438*600f14f4SXin Li   /* Recognise some charsets for which we avoid using a table. */
439*600f14f4SXin Li   if (!ascii_strcasecmp(code, "UTF-8"))
440*600f14f4SXin Li     return &charset_utf8;
441*600f14f4SXin Li   if (!ascii_strcasecmp(code, "US-ASCII"))
442*600f14f4SXin Li     return &charset_ascii;
443*600f14f4SXin Li   if (!ascii_strcasecmp(code, "ISO-8859-1"))
444*600f14f4SXin Li     return &charset_iso1;
445*600f14f4SXin Li 
446*600f14f4SXin Li   /* Look for a mapping for a simple 8-bit encoding. */
447*600f14f4SXin Li   for (i = 0; maps[i].name; i++)
448*600f14f4SXin Li     if (!ascii_strcasecmp(code, maps[i].name)) {
449*600f14f4SXin Li       if (!maps[i].charset) {
450*600f14f4SXin Li 	maps[i].charset = malloc(sizeof(struct charset));
451*600f14f4SXin Li 	if (maps[i].charset) {
452*600f14f4SXin Li 	  struct map *map = malloc(sizeof(struct map));
453*600f14f4SXin Li 	  if (!map) {
454*600f14f4SXin Li 	    free(maps[i].charset);
455*600f14f4SXin Li 	    maps[i].charset = 0;
456*600f14f4SXin Li 	  }
457*600f14f4SXin Li 	  else {
458*600f14f4SXin Li 	    maps[i].charset->max = 1;
459*600f14f4SXin Li 	    maps[i].charset->mbtowc = &mbtowc_8bit;
460*600f14f4SXin Li 	    maps[i].charset->wctomb = &wctomb_8bit;
461*600f14f4SXin Li 	    maps[i].charset->map = map;
462*600f14f4SXin Li 	    map->from = maps[i].map;
463*600f14f4SXin Li 	    map->to = 0; /* inverse mapping is created when required */
464*600f14f4SXin Li 	  }
465*600f14f4SXin Li 	}
466*600f14f4SXin Li       }
467*600f14f4SXin Li       return maps[i].charset;
468*600f14f4SXin Li     }
469*600f14f4SXin Li 
470*600f14f4SXin Li   return 0;
471*600f14f4SXin Li }
472*600f14f4SXin Li 
473*600f14f4SXin Li /*
474*600f14f4SXin Li  * Function to convert a buffer from one encoding to another.
475*600f14f4SXin Li  * Invalid bytes are replaced by '#', and characters that are
476*600f14f4SXin Li  * not available in the target encoding are replaced by '?'.
477*600f14f4SXin Li  * Each of TO and TOLEN may be zero, if the result is not needed.
478*600f14f4SXin Li  * The output buffer is null-terminated, so it is all right to
479*600f14f4SXin Li  * use charset_convert(fromcode, tocode, s, strlen(s), &t, 0).
480*600f14f4SXin Li  */
481*600f14f4SXin Li 
charset_convert(const char * fromcode,const char * tocode,const char * from,size_t fromlen,char ** to,size_t * tolen)482*600f14f4SXin Li int charset_convert(const char *fromcode, const char *tocode,
483*600f14f4SXin Li 		    const char *from, size_t fromlen,
484*600f14f4SXin Li 		    char **to, size_t *tolen)
485*600f14f4SXin Li {
486*600f14f4SXin Li   int ret = 0;
487*600f14f4SXin Li   struct charset *charset1, *charset2;
488*600f14f4SXin Li   char *tobuf, *p;
489*600f14f4SXin Li   int i, j, wc;
490*600f14f4SXin Li 
491*600f14f4SXin Li   charset1 = charset_find(fromcode);
492*600f14f4SXin Li   charset2 = charset_find(tocode);
493*600f14f4SXin Li   if (!charset1 || !charset2 )
494*600f14f4SXin Li     return -1;
495*600f14f4SXin Li 
496*600f14f4SXin Li   tobuf = safe_malloc_mul2add_(fromlen, /*times*/charset2->max, /*+*/1);
497*600f14f4SXin Li   if (!tobuf)
498*600f14f4SXin Li     return -2;
499*600f14f4SXin Li 
500*600f14f4SXin Li   for (p = tobuf; fromlen; from += i, fromlen -= i, p += j) {
501*600f14f4SXin Li     i = charset_mbtowc(charset1, &wc, from, fromlen);
502*600f14f4SXin Li     if (!i)
503*600f14f4SXin Li       i = 1;
504*600f14f4SXin Li     else if (i == -1) {
505*600f14f4SXin Li       i  = 1;
506*600f14f4SXin Li       wc = '#';
507*600f14f4SXin Li       ret = 2;
508*600f14f4SXin Li     }
509*600f14f4SXin Li     j = charset_wctomb(charset2, p, wc);
510*600f14f4SXin Li     if (j == -1) {
511*600f14f4SXin Li       if (!ret)
512*600f14f4SXin Li 	ret = 1;
513*600f14f4SXin Li       j = charset_wctomb(charset2, p, '?');
514*600f14f4SXin Li       if (j == -1)
515*600f14f4SXin Li 	j = 0;
516*600f14f4SXin Li     }
517*600f14f4SXin Li   }
518*600f14f4SXin Li 
519*600f14f4SXin Li   if (tolen)
520*600f14f4SXin Li     *tolen = p - tobuf;
521*600f14f4SXin Li   *p++ = '\0';
522*600f14f4SXin Li   if (to) {
523*600f14f4SXin Li     char *tobuf_saved = tobuf;
524*600f14f4SXin Li     *to = realloc(tobuf, p - tobuf);
525*600f14f4SXin Li     if (*to == NULL)
526*600f14f4SXin Li       *to = tobuf_saved;
527*600f14f4SXin Li   }
528*600f14f4SXin Li   else
529*600f14f4SXin Li     free(tobuf);
530*600f14f4SXin Li 
531*600f14f4SXin Li   return ret;
532*600f14f4SXin Li }
533*600f14f4SXin Li 
534*600f14f4SXin Li #endif /* USE_CHARSET_ICONV */
535