xref: /aosp_15_r20/external/flac/src/share/utf8/charset.h (revision 600f14f40d737144c998e2ec7a483122d3776fbc)
1*600f14f4SXin Li /*
2*600f14f4SXin Li  * Copyright (C) 2001 Edmund Grimley Evans <[email protected]>
3*600f14f4SXin Li  *
4*600f14f4SXin Li  * This program is free software; you can redistribute it and/or modify
5*600f14f4SXin Li  * it under the terms of the GNU General Public License as published by
6*600f14f4SXin Li  * the Free Software Foundation; either version 2 of the License, or
7*600f14f4SXin Li  * (at your option) any later version.
8*600f14f4SXin Li  *
9*600f14f4SXin Li  * This program is distributed in the hope that it will be useful,
10*600f14f4SXin Li  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11*600f14f4SXin Li  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12*600f14f4SXin Li  * GNU General Public License for more details.
13*600f14f4SXin Li  *
14*600f14f4SXin Li  * You should have received a copy of the GNU General Public License along
15*600f14f4SXin Li  * with this program; if not, write to the Free Software Foundation, Inc.,
16*600f14f4SXin Li  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
17*600f14f4SXin Li  */
18*600f14f4SXin Li 
19*600f14f4SXin Li #include <stdlib.h>
20*600f14f4SXin Li 
21*600f14f4SXin Li /*
22*600f14f4SXin Li  * These functions are like the C library's mbtowc() and wctomb(),
23*600f14f4SXin Li  * but instead of depending on the locale they always work in UTF-8,
24*600f14f4SXin Li  * and they use int instead of wchar_t.
25*600f14f4SXin Li  */
26*600f14f4SXin Li 
27*600f14f4SXin Li int utf8_mbtowc(int *pwc, const char *s, size_t n);
28*600f14f4SXin Li int utf8_wctomb(char *s, int wc);
29*600f14f4SXin Li 
30*600f14f4SXin Li /*
31*600f14f4SXin Li  * This is an object-oriented version of mbtowc() and wctomb().
32*600f14f4SXin Li  * The caller first uses charset_find() to get a pointer to struct
33*600f14f4SXin Li  * charset, then uses the mbtowc() and wctomb() methods on it.
34*600f14f4SXin Li  * The function charset_max() gives the maximum length of a
35*600f14f4SXin Li  * multibyte character in that encoding.
36*600f14f4SXin Li  * This API is only appropriate for stateless encodings like UTF-8
37*600f14f4SXin Li  * or ISO-8859-3, but I have no intention of implementing anything
38*600f14f4SXin Li  * other than UTF-8 and 8-bit encodings.
39*600f14f4SXin Li  *
40*600f14f4SXin Li  * MINOR BUG: If there is no memory charset_find() may return 0 and
41*600f14f4SXin Li  * there is no way to distinguish this case from an unknown encoding.
42*600f14f4SXin Li  */
43*600f14f4SXin Li 
44*600f14f4SXin Li struct charset;
45*600f14f4SXin Li 
46*600f14f4SXin Li struct charset *charset_find(const char *code);
47*600f14f4SXin Li 
48*600f14f4SXin Li int charset_mbtowc(struct charset *charset, int *pwc, const char *s, size_t n);
49*600f14f4SXin Li int charset_wctomb(struct charset *charset, char *s, int wc);
50*600f14f4SXin Li int charset_max(struct charset *charset);
51*600f14f4SXin Li 
52*600f14f4SXin Li /*
53*600f14f4SXin Li  * Function to convert a buffer from one encoding to another.
54*600f14f4SXin Li  * Invalid bytes are replaced by '#', and characters that are
55*600f14f4SXin Li  * not available in the target encoding are replaced by '?'.
56*600f14f4SXin Li  * Each of TO and TOLEN may be zero if the result is not wanted.
57*600f14f4SXin Li  * The input or output may contain null bytes, but the output
58*600f14f4SXin Li  * buffer is also null-terminated, so it is all right to
59*600f14f4SXin Li  * use charset_convert(fromcode, tocode, s, strlen(s), &t, 0).
60*600f14f4SXin Li  *
61*600f14f4SXin Li  * Return value:
62*600f14f4SXin Li  *
63*600f14f4SXin Li  *  -2 : memory allocation failed
64*600f14f4SXin Li  *  -1 : unknown encoding
65*600f14f4SXin Li  *   0 : data was converted exactly
66*600f14f4SXin Li  *   1 : valid data was converted approximately (using '?')
67*600f14f4SXin Li  *   2 : input was invalid (but still converted, using '#')
68*600f14f4SXin Li  */
69*600f14f4SXin Li 
70*600f14f4SXin Li int charset_convert(const char *fromcode, const char *tocode,
71*600f14f4SXin Li 		    const char *from, size_t fromlen,
72*600f14f4SXin Li 		    char **to, size_t *tolen);
73