Lines Matching +full:out +full:- +full:null

5  * rfc2044        (UTF-8 and UTF-16) F. Yergeau Alis Technologies
6 * rfc2781 UTF-16, an encoding of ISO 10646, P. Hoffman, F. Yergeau
7 * [ISO-10646] UTF-8 and UTF-16 in Annexes
8 * [ISO-8859-1] ISO Latin-1 characters codes.
9 * [UNICODE] The Unicode Consortium, "The Unicode Standard --
10 * Worldwide Character Encoding -- Version 1.0", Addison-
11 * Wesley, Volume 1, 1991, Volume 2, 1992. UTF-8 is
13 * [US-ASCII] Coded Character Set--7-bit American Standard Code for
14 * Information Interchange, ANSI X3.4-1986.
20 * Original code for IsoLatin1 and UTF-16 by "Martin J. Duerst" <[email protected]>
62 static xmlCharEncodingAliasPtr xmlCharEncodingAliases = NULL;
75 { "EUC-JP", XML_CHAR_ENCODING_EUC_JP },
79 { "ISO-10646-UCS-2", XML_CHAR_ENCODING_UCS2 },
80 { "ISO-10646-UCS-4", XML_CHAR_ENCODING_UCS4LE },
81 { "ISO-2022-JP", XML_CHAR_ENCODING_2022_JP },
82 { "ISO-8859-1", XML_CHAR_ENCODING_8859_1 },
83 { "ISO-8859-10", XML_CHAR_ENCODING_8859_10 },
84 { "ISO-8859-11", XML_CHAR_ENCODING_8859_11 },
85 { "ISO-8859-13", XML_CHAR_ENCODING_8859_13 },
86 { "ISO-8859-14", XML_CHAR_ENCODING_8859_14 },
87 { "ISO-8859-15", XML_CHAR_ENCODING_8859_15 },
88 { "ISO-8859-16", XML_CHAR_ENCODING_8859_16 },
89 { "ISO-8859-2", XML_CHAR_ENCODING_8859_2 },
90 { "ISO-8859-3", XML_CHAR_ENCODING_8859_3 },
91 { "ISO-8859-4", XML_CHAR_ENCODING_8859_4 },
92 { "ISO-8859-5", XML_CHAR_ENCODING_8859_5 },
93 { "ISO-8859-6", XML_CHAR_ENCODING_8859_6 },
94 { "ISO-8859-7", XML_CHAR_ENCODING_8859_7 },
95 { "ISO-8859-8", XML_CHAR_ENCODING_8859_8 },
96 { "ISO-8859-9", XML_CHAR_ENCODING_8859_9 },
97 { "ISO-LATIN-1", XML_CHAR_ENCODING_8859_1 },
98 { "ISO-LATIN-2", XML_CHAR_ENCODING_8859_2 },
100 { "UCS-2", XML_CHAR_ENCODING_UCS2 },
101 { "UCS-4", XML_CHAR_ENCODING_UCS4LE },
104 { "US-ASCII", XML_CHAR_ENCODING_ASCII },
105 { "UTF-16", XML_CHAR_ENCODING_UTF16 },
106 { "UTF-16BE", XML_CHAR_ENCODING_UTF16BE },
107 { "UTF-16LE", XML_CHAR_ENCODING_UTF16LE },
108 { "UTF-8", XML_CHAR_ENCODING_UTF8 },
114 asciiToAscii(unsigned char* out, int *outlen,
117 UTF8ToUTF8(unsigned char* out, int *outlen,
120 latin1ToUTF8(unsigned char* out, int *outlen,
123 UTF16LEToUTF8(unsigned char* out, int *outlen,
126 UTF16BEToUTF8(unsigned char* out, int *outlen,
146 #define UTF8ToLatin1 NULL
147 #define UTF8ToUTF16 NULL
148 #define UTF8ToUTF16LE NULL
149 #define UTF8ToUTF16BE NULL
155 UTF8ToHtmlWrapper(unsigned char *out, int *outlen,
158 #define UTF8ToHtmlWrapper NULL
173 ISO8859xToUTF8(unsigned char* out, int *outlen,
176 UTF8ToISO8859x(unsigned char *out, int *outlen,
186 NULL, XML_HANDLER_STATIC }
191 { (char *) name, NULL, NULL EMPTY_ICONV, NULL, NULL, NULL, \
196 #define MAKE_HANDLER(name, in, out) \ argument
199 (xmlCharEncodingOutputFunc) (void (*)(void)) out \
200 EMPTY_ICONV, NULL, NULL, NULL, XML_HANDLER_STATIC }
206 * https://www.iana.org/assignments/character-sets/character-sets.xhtml
209 MAKE_HANDLER(NULL, NULL, NULL), /* NONE */
210 MAKE_HANDLER("UTF-8", UTF8ToUTF8, UTF8ToUTF8),
211 MAKE_HANDLER("UTF-16LE", UTF16LEToUTF8, UTF8ToUTF16LE),
212 MAKE_HANDLER("UTF-16BE", UTF16BEToUTF8, UTF8ToUTF16BE),
213 MAKE_HANDLER("UCS-4LE", NULL, NULL),
214 MAKE_HANDLER("UCS-4BE", NULL, NULL),
215 MAKE_HANDLER("IBM037", NULL, NULL),
216 MAKE_HANDLER("ISO-10646-UCS-4", NULL, NULL), /* UCS4_2143 */
217 MAKE_HANDLER("ISO-10646-UCS-4", NULL, NULL), /* UCS4_2143 */
218 MAKE_HANDLER("ISO-10646-UCS-2", NULL, NULL),
219 MAKE_HANDLER("ISO-8859-1", latin1ToUTF8, UTF8ToLatin1),
220 MAKE_ISO_HANDLER("ISO-8859-2", 2),
221 MAKE_ISO_HANDLER("ISO-8859-3", 3),
222 MAKE_ISO_HANDLER("ISO-8859-4", 4),
223 MAKE_ISO_HANDLER("ISO-8859-5", 5),
224 MAKE_ISO_HANDLER("ISO-8859-6", 6),
225 MAKE_ISO_HANDLER("ISO-8859-7", 7),
226 MAKE_ISO_HANDLER("ISO-8859-8", 8),
227 MAKE_ISO_HANDLER("ISO-8859-9", 9),
228 MAKE_HANDLER("ISO-2022-JP", NULL, NULL),
229 MAKE_HANDLER("Shift_JIS", NULL, NULL),
230 MAKE_HANDLER("EUC-JP", NULL, NULL),
231 MAKE_HANDLER("US-ASCII", asciiToAscii, asciiToAscii),
232 MAKE_HANDLER("UTF-16", UTF16LEToUTF8, UTF8ToUTF16),
233 MAKE_HANDLER("HTML", NULL, UTF8ToHtmlWrapper),
234 MAKE_ISO_HANDLER("ISO-8859-10", 10),
235 MAKE_ISO_HANDLER("ISO-8859-11", 11),
236 MAKE_ISO_HANDLER("ISO-8859-13", 13),
237 MAKE_ISO_HANDLER("ISO-8859-14", 14),
238 MAKE_ISO_HANDLER("ISO-8859-15", 15),
239 MAKE_ISO_HANDLER("ISO-8859-16", 16),
247 static xmlCharEncodingHandlerPtr *globalHandlers = NULL;
273 * according to the non-normative appendix F of the XML-1.0 recommendation.
280 if (in == NULL) in xmlDetectCharEncoding()
303 * attempt an "auto-recognition" of UTF-16LE and in xmlDetectCharEncoding()
304 * UTF-16BE encodings. in xmlDetectCharEncoding()
315 * Errata on XML-1.0 June 20 2001 in xmlDetectCharEncoding()
322 /* For UTF-16 we can recognize by the BOM */ in xmlDetectCharEncoding()
336 * thread-safe.
344 if (xmlCharEncodingAliases == NULL) in xmlCleanupEncodingAliases()
348 if (xmlCharEncodingAliases[i].name != NULL) in xmlCleanupEncodingAliases()
350 if (xmlCharEncodingAliases[i].alias != NULL) in xmlCleanupEncodingAliases()
356 xmlCharEncodingAliases = NULL; in xmlCleanupEncodingAliases()
361 * @alias: the alias name as parsed, in UTF-8 format (ASCII actually)
363 * DEPRECATED: This function is not thread-safe.
367 * Returns NULL if not found, otherwise the original name
374 if (alias == NULL) in xmlGetEncodingAlias()
375 return(NULL); in xmlGetEncodingAlias()
377 if (xmlCharEncodingAliases == NULL) in xmlGetEncodingAlias()
378 return(NULL); in xmlGetEncodingAlias()
394 return(NULL); in xmlGetEncodingAlias()
399 * @name: the encoding name as parsed, in UTF-8 format (ASCII actually)
400 * @alias: the alias name as parsed, in UTF-8 format (ASCII actually)
403 * thread-safe.
408 * Returns 0 in case of success, -1 in case of error
416 if ((name == NULL) || (alias == NULL)) in xmlAddEncodingAlias()
417 return(-1); in xmlAddEncodingAlias()
434 if (tmp == NULL) in xmlAddEncodingAlias()
435 return(-1); in xmlAddEncodingAlias()
449 if (nameCopy == NULL) in xmlAddEncodingAlias()
450 return(-1); in xmlAddEncodingAlias()
460 if (nameCopy == NULL) in xmlAddEncodingAlias()
461 return(-1); in xmlAddEncodingAlias()
463 if (aliasCopy == NULL) { in xmlAddEncodingAlias()
465 return(-1); in xmlAddEncodingAlias()
475 * @alias: the alias name as parsed, in UTF-8 format (ASCII actually)
478 * thread-safe.
482 * Returns 0 in case of success, -1 in case of error
488 if (alias == NULL) in xmlDelEncodingAlias()
489 return(-1); in xmlDelEncodingAlias()
491 if (xmlCharEncodingAliases == NULL) in xmlDelEncodingAlias()
492 return(-1); in xmlDelEncodingAlias()
500 xmlCharEncodingAliasesNb--; in xmlDelEncodingAlias()
502 sizeof(xmlCharEncodingAlias) * (xmlCharEncodingAliasesNb - i)); in xmlDelEncodingAlias()
506 return(-1); in xmlDelEncodingAlias()
514 return(xmlStrcasecmp(BAD_CAST key, BAD_CAST entry->name)); in xmlCompareEncTableEntries()
522 if (name == NULL) in xmlParseCharEncodingInternal()
528 if (entry != NULL) in xmlParseCharEncodingInternal()
529 return(entry->enc); in xmlParseCharEncodingInternal()
536 * @name: the encoding name as parsed, in UTF-8 format (ASCII actually)
562 * C.f. http://www.w3.org/TR/REC-xml#charencoding
571 return("UTF-16"); in xmlGetCharEncodingName()
573 return("UTF-16"); in xmlGetCharEncodingName()
575 return("ISO-10646-UCS-4"); in xmlGetCharEncodingName()
577 return("ISO-10646-UCS-4"); in xmlGetCharEncodingName()
583 return(NULL); in xmlGetCharEncodingName()
596 * @name: the encoding name, in UTF-8 format (ASCII actually)
601 * thread-safe.
605 * Returns the xmlCharEncodingHandlerPtr created (or NULL in case of error).
615 char *up = NULL; in xmlNewCharEncodingHandler()
621 if (alias != NULL) in xmlNewCharEncodingHandler()
627 if (name == NULL) in xmlNewCharEncodingHandler()
628 return(NULL); in xmlNewCharEncodingHandler()
635 if (up == NULL) in xmlNewCharEncodingHandler()
636 return(NULL); in xmlNewCharEncodingHandler()
639 * allocate and fill-up an handler block. in xmlNewCharEncodingHandler()
643 if (handler == NULL) { in xmlNewCharEncodingHandler()
645 return(NULL); in xmlNewCharEncodingHandler()
648 handler->input = input; in xmlNewCharEncodingHandler()
649 handler->output = output; in xmlNewCharEncodingHandler()
650 handler->name = up; in xmlNewCharEncodingHandler()
651 handler->flags = XML_HANDLER_STATIC; in xmlNewCharEncodingHandler()
654 handler->iconv_in = NULL; in xmlNewCharEncodingHandler()
655 handler->iconv_out = NULL; in xmlNewCharEncodingHandler()
704 if (globalHandlers == NULL) return; in xmlCleanupCharEncodingHandlers()
709 nbCharEncodingHandler--; in xmlCleanupCharEncodingHandlers()
711 if (handler != NULL) { in xmlCleanupCharEncodingHandlers()
712 if (handler->name != NULL) in xmlCleanupCharEncodingHandlers()
713 xmlFree(handler->name); in xmlCleanupCharEncodingHandlers()
718 globalHandlers = NULL; in xmlCleanupCharEncodingHandlers()
727 * thread-safe.
733 if (handler == NULL) in xmlRegisterCharEncodingHandler()
735 if (globalHandlers == NULL) { in xmlRegisterCharEncodingHandler()
738 if (globalHandlers == NULL) in xmlRegisterCharEncodingHandler()
748 if (handler != NULL) { in xmlRegisterCharEncodingHandler()
749 if (handler->name != NULL) { in xmlRegisterCharEncodingHandler()
750 xmlFree(handler->name); in xmlRegisterCharEncodingHandler()
759 xmlCharEncConverter conv = { NULL, NULL, NULL, NULL, NULL }; in xmlInvokeConvImpl()
765 handler->input = in xmlInvokeConvImpl()
767 handler->output = in xmlInvokeConvImpl()
769 handler->ctxtDtor = conv.ctxtDtor; in xmlInvokeConvImpl()
770 handler->inputCtxt = conv.inputCtxt; in xmlInvokeConvImpl()
771 handler->outputCtxt = conv.outputCtxt; in xmlInvokeConvImpl()
784 * @out: pointer to resulting handler
786 * Search the non-default handlers for an exact match.
793 xmlCharEncodingHandler **out) { in xmlFindExtraHandler() argument
799 if (handler == NULL) in xmlFindExtraHandler()
803 handler->name = xmlMemStrdup(name); in xmlFindExtraHandler()
804 if (handler->name == NULL) { in xmlFindExtraHandler()
815 if (impl != NULL) { in xmlFindExtraHandler()
820 *out = handler; in xmlFindExtraHandler()
827 if (globalHandlers != NULL) { in xmlFindExtraHandler()
832 (const xmlChar *) h->name)) { in xmlFindExtraHandler()
833 if ((output ? h->output : h->input) != NULL) { in xmlFindExtraHandler()
834 *out = h; in xmlFindExtraHandler()
845 *out = handler; in xmlFindExtraHandler()
855 *out = handler; in xmlFindExtraHandler()
865 if (handler != NULL) { in xmlFindExtraHandler()
866 xmlFree(handler->name); in xmlFindExtraHandler()
876 * @out: pointer to result
881 * - Built-in handler (UTF-8, UTF-16, ISO-8859-1, ASCII)
882 * - User-registered global handler (deprecated)
883 * - iconv if enabled
884 * - ICU if enabled
888 * If the encoding is UTF-8, a NULL handler and no error code will
898 xmlCharEncodingHandler **out) { in xmlLookupCharEncodingHandler() argument
901 if (out == NULL) in xmlLookupCharEncodingHandler()
903 *out = NULL; in xmlLookupCharEncodingHandler()
908 /* Return NULL handler for UTF-8 */ in xmlLookupCharEncodingHandler()
914 if ((handler->input != NULL) || (handler->output != NULL)) { in xmlLookupCharEncodingHandler()
915 *out = (xmlCharEncodingHandler *) handler; in xmlLookupCharEncodingHandler()
919 if (handler->name != NULL) in xmlLookupCharEncodingHandler()
920 return(xmlFindExtraHandler(handler->name, handler->name, 0, in xmlLookupCharEncodingHandler()
921 NULL, NULL, out)); in xmlLookupCharEncodingHandler()
933 * Returns the handler or NULL if no handler was found or an error
950 * @out: pointer to result
955 * - Built-in handler (UTF-8, UTF-16, ISO-8859-1, ASCII)
956 * - Custom implementation if provided
957 * - User-registered global handler (deprecated)
958 * - iconv if enabled
959 * - ICU if enabled
963 * If the encoding is UTF-8, a NULL handler and no error code will
974 xmlCharEncodingHandler **out) { in xmlCreateCharEncodingHandler() argument
979 if (out == NULL) in xmlCreateCharEncodingHandler()
981 *out = NULL; in xmlCreateCharEncodingHandler()
983 if (name == NULL) in xmlCreateCharEncodingHandler()
988 if (nalias != NULL) in xmlCreateCharEncodingHandler()
993 /* Return NULL handler for UTF-8 */ in xmlCreateCharEncodingHandler()
999 if ((output ? handler->output : handler->input) != NULL) { in xmlCreateCharEncodingHandler()
1000 *out = (xmlCharEncodingHandler *) handler; in xmlCreateCharEncodingHandler()
1005 return(xmlFindExtraHandler(norig, name, output, impl, implCtxt, out)); in xmlCreateCharEncodingHandler()
1012 * @out: pointer to result
1017 * - Built-in handler (UTF-8, UTF-16, ISO-8859-1, ASCII)
1018 * - User-registered global handler (deprecated)
1019 * - iconv if enabled
1020 * - ICU if enabled
1024 * If the encoding is UTF-8, a NULL handler and no error code will
1034 xmlCharEncodingHandler **out) { in xmlOpenCharEncodingHandler() argument
1035 return(xmlCreateCharEncodingHandler(name, output, NULL, NULL, out)); in xmlOpenCharEncodingHandler()
1045 * If the encoding is UTF-8, this will return a no-op handler that
1048 * Returns the handler or NULL if no handler was found or an error
1056 * This handler shouldn't be used, but we must return a non-NULL in xmlFindCharEncodingHandler()
1059 if ((xmlStrcasecmp(BAD_CAST name, BAD_CAST "UTF-8") == 0) || in xmlFindCharEncodingHandler()
1082 * @out: a pointer to an array of bytes to store the result
1083 * @outlen: the length of @out
1094 xmlIconvConvert(unsigned char *out, int *outlen, in xmlIconvConvert() argument
1099 char *icv_out = (char *) out; in xmlIconvConvert()
1102 if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) { in xmlIconvConvert()
1103 if (outlen != NULL) *outlen = 0; in xmlIconvConvert()
1109 * Some versions take const, other versions take non-const input. in xmlIconvConvert()
1111 ret = iconv(ctxt->cd, (void *) &icv_in, &icv_inlen, &icv_out, &icv_outlen); in xmlIconvConvert()
1112 *inlen -= icv_inlen; in xmlIconvConvert()
1113 *outlen -= icv_outlen; in xmlIconvConvert()
1114 if (ret == (size_t) -1) { in xmlIconvConvert()
1120 * EINVAL means a truncated multi-byte sequence at the end in xmlIconvConvert()
1134 if (ctxt->cd != (iconv_t) -1) in xmlIconvFree()
1135 iconv_close(ctxt->cd); in xmlIconvFree()
1143 xmlIconvCtxt *inputCtxt = NULL, *outputCtxt = NULL; in xmlCharEncIconv()
1149 if (inputCtxt == NULL) { in xmlCharEncIconv()
1153 inputCtxt->cd = (iconv_t) -1; in xmlCharEncIconv()
1155 icv_in = iconv_open("UTF-8", name); in xmlCharEncIconv()
1156 if (icv_in == (iconv_t) -1) { in xmlCharEncIconv()
1165 inputCtxt->cd = icv_in; in xmlCharEncIconv()
1168 if (outputCtxt == NULL) { in xmlCharEncIconv()
1172 outputCtxt->cd = (iconv_t) -1; in xmlCharEncIconv()
1174 icv_out = iconv_open(name, "UTF-8"); in xmlCharEncIconv()
1175 if (icv_out == (iconv_t) -1) { in xmlCharEncIconv()
1184 outputCtxt->cd = icv_out; in xmlCharEncIconv()
1186 conv->input = xmlIconvConvert; in xmlCharEncIconv()
1187 conv->output = xmlIconvConvert; in xmlCharEncIconv()
1188 conv->ctxtDtor = xmlIconvFree; in xmlCharEncIconv()
1189 conv->inputCtxt = inputCtxt; in xmlCharEncIconv()
1190 conv->outputCtxt = outputCtxt; in xmlCharEncIconv()
1193 if (handler != NULL) { in xmlCharEncIconv()
1194 handler->iconv_in = icv_in; in xmlCharEncIconv()
1195 handler->iconv_out = icv_out; in xmlCharEncIconv()
1201 if (inputCtxt != NULL) in xmlCharEncIconv()
1203 if (outputCtxt != NULL) in xmlCharEncIconv()
1221 UConverter *uconv; /* for conversion between an encoding and UTF-16 */
1222 UConverter *utf8; /* for conversion between UTF-8 and UTF-16 */
1232 * @out: a pointer to an array of bytes to store the result
1233 * @outlen: the length of @out
1244 xmlUconvConvert(unsigned char *out, int *outlen, in xmlUconvConvert() argument
1248 char *ucv_out = (char *) out; in xmlUconvConvert()
1253 if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) { in xmlUconvConvert()
1254 if (outlen != NULL) in xmlUconvConvert()
1269 if (cd->isInput) { in xmlUconvConvert()
1270 source = cd->uconv; in xmlUconvConvert()
1271 target = cd->utf8; in xmlUconvConvert()
1273 source = cd->utf8; in xmlUconvConvert()
1274 target = cd->uconv; in xmlUconvConvert()
1278 &ucv_in, ucv_in + *inlen, cd->pivot_buf, in xmlUconvConvert()
1279 &cd->pivot_source, &cd->pivot_target, in xmlUconvConvert()
1280 cd->pivot_buf + ICU_PIVOT_BUF_SIZE, 0, 0, &err); in xmlUconvConvert()
1282 *inlen = ucv_in - (const char*) in; in xmlUconvConvert()
1283 *outlen = ucv_out - (char *) out; in xmlUconvConvert()
1319 openIcuConverter(const char* name, int isInput, xmlUconvCtxt **out) in openIcuConverter() argument
1324 *out = NULL; in openIcuConverter()
1327 if (conv == NULL) in openIcuConverter()
1330 conv->isInput = isInput; in openIcuConverter()
1331 conv->pivot_source = conv->pivot_buf; in openIcuConverter()
1332 conv->pivot_target = conv->pivot_buf; in openIcuConverter()
1335 conv->uconv = ucnv_open(name, &status); in openIcuConverter()
1341 ucnv_setToUCallBack(conv->uconv, UCNV_TO_U_CALLBACK_STOP, in openIcuConverter()
1342 NULL, NULL, NULL, &status); in openIcuConverter()
1345 ucnv_setFromUCallBack(conv->uconv, UCNV_FROM_U_CALLBACK_STOP, in openIcuConverter()
1346 NULL, NULL, NULL, &status); in openIcuConverter()
1352 conv->utf8 = ucnv_open("UTF-8", &status); in openIcuConverter()
1356 *out = conv; in openIcuConverter()
1360 if (conv->uconv) in openIcuConverter()
1361 ucnv_close(conv->uconv); in openIcuConverter()
1374 if (conv == NULL) in closeIcuConverter()
1376 ucnv_close(conv->uconv); in closeIcuConverter()
1377 ucnv_close(conv->utf8); in closeIcuConverter()
1389 xmlUconvCtxt *ucv_in = NULL; in xmlCharEncUconv()
1390 xmlUconvCtxt *ucv_out = NULL; in xmlCharEncUconv()
1400 conv->input = xmlUconvConvert; in xmlCharEncUconv()
1401 conv->output = xmlUconvConvert; in xmlCharEncUconv()
1402 conv->ctxtDtor = xmlUconvFree; in xmlCharEncUconv()
1403 conv->inputCtxt = ucv_in; in xmlCharEncUconv()
1404 conv->outputCtxt = ucv_out; in xmlCharEncUconv()
1409 if (ucv_in != NULL) in xmlCharEncUconv()
1411 if (ucv_out != NULL) in xmlCharEncUconv()
1419 * The real API used by libxml for on-the-fly conversion *
1454 * @out: a pointer to an array of bytes to store the result
1455 * @outlen: the length of @out
1466 xmlEncInputChunk(xmlCharEncodingHandler *handler, unsigned char *out, in xmlEncInputChunk() argument
1470 if (handler->input != NULL) { in xmlEncInputChunk()
1472 (xmlCharEncConvFunc) (void (*)(void)) handler->input; in xmlEncInputChunk()
1474 ret = conv(out, outlen, in, inlen, handler->inputCtxt); in xmlEncInputChunk()
1490 * @out: a pointer to an array of bytes to store the result
1491 * @outlen: the length of @out
1502 xmlEncOutputChunk(xmlCharEncodingHandler *handler, unsigned char *out, in xmlEncOutputChunk() argument
1506 if (handler->output != NULL) { in xmlEncOutputChunk()
1508 (xmlCharEncConvFunc) (void (*)(void)) handler->output; in xmlEncOutputChunk()
1510 ret = conv(out, outlen, in, inlen, handler->outputCtxt); in xmlEncOutputChunk()
1526 * @out: an xmlBuffer for the output.
1534 xmlCharEncFirstLine(xmlCharEncodingHandler *handler, xmlBufferPtr out, in xmlCharEncFirstLine() argument
1536 return(xmlCharEncInFunc(handler, out, in)); in xmlCharEncFirstLine()
1547 * Generic front-end for the encoding handler on parser input
1554 xmlBufPtr out, in; in xmlCharEncInput() local
1561 out = input->buffer; in xmlCharEncInput()
1562 in = input->raw; in xmlCharEncInput()
1580 availOut = xmlBufAvail(out); in xmlCharEncInput()
1600 ret = xmlEncInputChunk(input->encoder, xmlBufEnd(out), &c_out, in xmlCharEncInput()
1605 availIn -= c_in; in xmlCharEncInput()
1608 maxOut -= c_out; in xmlCharEncInput()
1609 xmlBufAddLen(out, c_out); in xmlCharEncInput()
1612 input->error = xmlEncConvertError(ret); in xmlCharEncInput()
1624 if (xmlBufGrow(out, 4096) < 0) { in xmlCharEncInput()
1625 input->error = XML_ERR_NO_MEMORY; in xmlCharEncInput()
1633 if (input->rawconsumed > ULONG_MAX - (unsigned long) totalIn) in xmlCharEncInput()
1634 input->rawconsumed = ULONG_MAX; in xmlCharEncInput()
1636 input->rawconsumed += totalIn; in xmlCharEncInput()
1645 * @out: an xmlBuffer for the output.
1648 * Generic front-end for the encoding handler input function
1653 xmlCharEncInFunc(xmlCharEncodingHandler * handler, xmlBufferPtr out, in xmlCharEncInFunc() argument
1660 if (handler == NULL) in xmlCharEncInFunc()
1662 if (out == NULL) in xmlCharEncInFunc()
1664 if (in == NULL) in xmlCharEncInFunc()
1667 toconv = in->use; in xmlCharEncInFunc()
1670 written = out->size - out->use -1; /* count '\0' */ in xmlCharEncInFunc()
1672 xmlBufferGrow(out, out->size + toconv * 2); in xmlCharEncInFunc()
1673 written = out->size - out->use - 1; in xmlCharEncInFunc()
1675 ret = xmlEncInputChunk(handler, &out->content[out->use], &written, in xmlCharEncInFunc()
1676 in->content, &toconv); in xmlCharEncInFunc()
1678 out->use += written; in xmlCharEncInFunc()
1679 out->content[out->use] = 0; in xmlCharEncInFunc()
1690 * Generic front-end for the encoding handler on parser output
1692 * output in case of non-stateless encoding needing to initiate their
1709 xmlBufPtr out; in xmlCharEncOutput() local
1711 if ((output == NULL) || (output->encoder == NULL) || in xmlCharEncOutput()
1712 (output->buffer == NULL) || (output->conv == NULL)) in xmlCharEncOutput()
1714 out = output->conv; in xmlCharEncOutput()
1715 in = output->buffer; in xmlCharEncOutput()
1719 written = xmlBufAvail(out); in xmlCharEncOutput()
1728 xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out, in xmlCharEncOutput()
1729 NULL, &c_in); in xmlCharEncOutput()
1730 xmlBufAddLen(out, c_out); in xmlCharEncOutput()
1741 if (xmlBufGrow(out, toconv * 4) < 0) { in xmlCharEncOutput()
1745 written = xmlBufAvail(out); in xmlCharEncOutput()
1752 ret = xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out, in xmlCharEncOutput()
1755 xmlBufAddLen(out, c_out); in xmlCharEncOutput()
1780 xmlBufGrow(out, charrefLen * 4); in xmlCharEncOutput()
1781 c_out = xmlBufAvail(out); in xmlCharEncOutput()
1783 ret = xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out, in xmlCharEncOutput()
1791 xmlBufAddLen(out, c_out); in xmlCharEncOutput()
1799 if (output->error == 0) in xmlCharEncOutput()
1800 output->error = xmlEncConvertError(ret); in xmlCharEncOutput()
1811 * @out: an xmlBuffer for the output.
1814 * Generic front-end for the encoding handler output function
1815 * a first call with @in == NULL has to be made firs to initiate the
1816 * output in case of non-stateless encoding needing to initiate their
1824 xmlCharEncOutFunc(xmlCharEncodingHandler *handler, xmlBufferPtr out, in xmlCharEncOutFunc() argument
1831 if (handler == NULL) return(XML_ENC_ERR_INTERNAL); in xmlCharEncOutFunc()
1832 if (out == NULL) return(XML_ENC_ERR_INTERNAL); in xmlCharEncOutFunc()
1836 written = out->size - out->use; in xmlCharEncOutFunc()
1839 written--; /* Gennady: count '/0' */ in xmlCharEncOutFunc()
1842 * First specific handling of in = NULL, i.e. the initialization call in xmlCharEncOutFunc()
1844 if (in == NULL) { in xmlCharEncOutFunc()
1847 xmlEncOutputChunk(handler, &out->content[out->use], &written, in xmlCharEncOutFunc()
1848 NULL, &toconv); in xmlCharEncOutFunc()
1849 out->use += written; in xmlCharEncOutFunc()
1850 out->content[out->use] = 0; in xmlCharEncOutFunc()
1857 toconv = in->use; in xmlCharEncOutFunc()
1859 xmlBufferGrow(out, toconv * 4); in xmlCharEncOutFunc()
1860 written = out->size - out->use - 1; in xmlCharEncOutFunc()
1862 ret = xmlEncOutputChunk(handler, &out->content[out->use], &written, in xmlCharEncOutFunc()
1863 in->content, &toconv); in xmlCharEncOutFunc()
1865 out->use += written; in xmlCharEncOutFunc()
1867 out->content[out->use] = 0; in xmlCharEncOutFunc()
1877 int len = in->use; in xmlCharEncOutFunc()
1878 const xmlChar *utf = (const xmlChar *) in->content; in xmlCharEncOutFunc()
1892 xmlBufferGrow(out, charrefLen * 4); in xmlCharEncOutFunc()
1893 written = out->size - out->use - 1; in xmlCharEncOutFunc()
1895 ret = xmlEncOutputChunk(handler, &out->content[out->use], &written, in xmlCharEncOutFunc()
1900 out->use += written; in xmlCharEncOutFunc()
1902 out->content[out->use] = 0; in xmlCharEncOutFunc()
1919 if (handler == NULL) in xmlCharEncCloseFunc()
1922 if (handler->flags & XML_HANDLER_STATIC) in xmlCharEncCloseFunc()
1925 xmlFree(handler->name); in xmlCharEncCloseFunc()
1926 if (handler->ctxtDtor != NULL) { in xmlCharEncCloseFunc()
1927 handler->ctxtDtor(handler->inputCtxt); in xmlCharEncCloseFunc()
1928 handler->ctxtDtor(handler->outputCtxt); in xmlCharEncCloseFunc()
1944 * of constant cost if the input is UTF-8 but can be costly if run
1945 * on non-UTF-8 input.
1947 * Returns the index in bytes from the beginning of the entity or -1
1954 if (ctxt == NULL) in xmlByteConsumed()
1955 return(-1); in xmlByteConsumed()
1956 in = ctxt->input; in xmlByteConsumed()
1957 if (in == NULL) in xmlByteConsumed()
1958 return(-1); in xmlByteConsumed()
1960 if ((in->buf != NULL) && (in->buf->encoder != NULL)) { in xmlByteConsumed()
1962 xmlCharEncodingHandler * handler = in->buf->encoder; in xmlByteConsumed()
1969 if (in->end - in->cur > 0) { in xmlByteConsumed()
1971 const unsigned char *cur = (const unsigned char *)in->cur; in xmlByteConsumed()
1975 if (convbuf == NULL) in xmlByteConsumed()
1976 return(-1); in xmlByteConsumed()
1978 toconv = in->end - cur; in xmlByteConsumed()
1985 return(-1); in xmlByteConsumed()
1988 if (in->buf->rawconsumed < (unsigned long) unused) in xmlByteConsumed()
1989 return(-1); in xmlByteConsumed()
1990 return(in->buf->rawconsumed - unused); in xmlByteConsumed()
1993 return(in->consumed + (in->cur - in->base)); in xmlByteConsumed()
2003 asciiToAscii(unsigned char* out, int *poutlen, in asciiToAscii() argument
2010 if (in == NULL) { in asciiToAscii()
2034 *poutlen = in - instart; in asciiToAscii()
2035 *pinlen = in - instart; in asciiToAscii()
2040 *out++ = c; in asciiToAscii()
2047 latin1ToUTF8(unsigned char* out, int *outlen, in latin1ToUTF8() argument
2050 unsigned char* outstart = out; in latin1ToUTF8()
2056 if ((out == NULL) || (in == NULL) || (outlen == NULL) || (inlen == NULL)) in latin1ToUTF8()
2059 outend = out + *outlen; in latin1ToUTF8()
2066 if (out >= outend) in latin1ToUTF8()
2068 *out++ = c; in latin1ToUTF8()
2070 if (outend - out < 2) in latin1ToUTF8()
2072 *out++ = (c >> 6) | 0xC0; in latin1ToUTF8()
2073 *out++ = (c & 0x3F) | 0x80; in latin1ToUTF8()
2079 ret = out - outstart; in latin1ToUTF8()
2082 *outlen = out - outstart; in latin1ToUTF8()
2083 *inlen = in - instart; in latin1ToUTF8()
2089 * @out: a pointer to an array of bytes to store the result
2090 * @outlen: the length of @out
2094 * Take a block of ISO Latin 1 chars in and try to convert it to an UTF-8
2095 * block of chars out.
2104 isolat1ToUTF8(unsigned char* out, int *outlen, in isolat1ToUTF8() argument
2106 return(latin1ToUTF8(out, outlen, in, inlen, NULL)); in isolat1ToUTF8()
2110 UTF8ToUTF8(unsigned char* out, int *outlen, in UTF8ToUTF8() argument
2116 if (in == NULL) { in UTF8ToUTF8()
2130 memcpy(out, in, len); in UTF8ToUTF8()
2140 UTF8ToLatin1(unsigned char* out, int *outlen, in UTF8ToLatin1() argument
2144 const unsigned char* outstart = out; in UTF8ToLatin1()
2150 if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) in UTF8ToLatin1()
2153 if (in == NULL) { in UTF8ToLatin1()
2160 outend = out + *outlen; in UTF8ToLatin1()
2162 if (out >= outend) in UTF8ToLatin1()
2168 *out++ = c; in UTF8ToLatin1()
2170 if (inend - in < 2) in UTF8ToLatin1()
2173 *out++ = (unsigned char) ((c << 6) | (*in & 0x3F)); in UTF8ToLatin1()
2182 ret = out - outstart; in UTF8ToLatin1()
2185 *outlen = out - outstart; in UTF8ToLatin1()
2186 *inlen = in - instart; in UTF8ToLatin1()
2192 * @out: a pointer to an array of bytes to store the result
2193 * @outlen: the length of @out
2194 * @in: a pointer to an array of UTF-8 chars
2197 * Take a block of UTF-8 chars in and try to convert it to an ISO Latin 1
2198 * block of chars out.
2207 UTF8Toisolat1(unsigned char* out, int *outlen, in UTF8Toisolat1() argument
2209 if ((out == NULL) || (outlen == NULL) || (in == NULL) || (inlen == NULL)) in UTF8Toisolat1()
2212 return(UTF8ToLatin1(out, outlen, in, inlen, NULL)); in UTF8Toisolat1()
2217 UTF16LEToUTF8(unsigned char *out, int *outlen, in UTF16LEToUTF8() argument
2222 unsigned char *outstart = out; in UTF16LEToUTF8()
2223 unsigned char *outend = out + *outlen; in UTF16LEToUTF8()
2231 if (out >= outend) in UTF16LEToUTF8()
2233 out[0] = c; in UTF16LEToUTF8()
2235 out += 1; in UTF16LEToUTF8()
2237 if (outend - out < 2) in UTF16LEToUTF8()
2239 out[0] = (c >> 6) | 0xC0; in UTF16LEToUTF8()
2240 out[1] = (c & 0x3F) | 0x80; in UTF16LEToUTF8()
2242 out += 2; in UTF16LEToUTF8()
2244 if (outend - out < 3) in UTF16LEToUTF8()
2246 out[0] = (c >> 12) | 0xE0; in UTF16LEToUTF8()
2247 out[1] = ((c >> 6) & 0x3F) | 0x80; in UTF16LEToUTF8()
2248 out[2] = (c & 0x3F) | 0x80; in UTF16LEToUTF8()
2250 out += 3; in UTF16LEToUTF8()
2257 if (inend - in < 4) in UTF16LEToUTF8()
2264 if (outend - out < 4) in UTF16LEToUTF8()
2266 c = (c << 10) + d - ((0xD800 << 10) + 0xDC00 - 0x10000); in UTF16LEToUTF8()
2267 out[0] = (c >> 18) | 0xF0; in UTF16LEToUTF8()
2268 out[1] = ((c >> 12) & 0x3F) | 0x80; in UTF16LEToUTF8()
2269 out[2] = ((c >> 6) & 0x3F) | 0x80; in UTF16LEToUTF8()
2270 out[3] = (c & 0x3F) | 0x80; in UTF16LEToUTF8()
2272 out += 4; in UTF16LEToUTF8()
2276 ret = out - outstart; in UTF16LEToUTF8()
2279 *outlen = out - outstart; in UTF16LEToUTF8()
2280 *inlen = in - instart; in UTF16LEToUTF8()
2286 UTF8ToUTF16LE(unsigned char *out, int *outlen, in UTF8ToUTF16LE() argument
2291 unsigned char *outstart = out; in UTF8ToUTF16LE()
2297 if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) in UTF8ToUTF16LE()
2299 if (in == NULL) { in UTF8ToUTF16LE()
2305 outend = out + (*outlen & ~1); in UTF8ToUTF16LE()
2310 if (out >= outend) in UTF8ToUTF16LE()
2312 out[0] = c; in UTF8ToUTF16LE()
2313 out[1] = 0; in UTF8ToUTF16LE()
2315 out += 2; in UTF8ToUTF16LE()
2338 if (inend - in < len) in UTF8ToUTF16LE()
2357 if (out >= outend) in UTF8ToUTF16LE()
2359 out[0] = c & 0xFF; in UTF8ToUTF16LE()
2360 out[1] = c >> 8; in UTF8ToUTF16LE()
2361 out += 2; in UTF8ToUTF16LE()
2363 if (outend - out < 4) in UTF8ToUTF16LE()
2365 c -= 0x10000; in UTF8ToUTF16LE()
2368 out[0] = c & 0xFF; in UTF8ToUTF16LE()
2369 out[1] = c >> 8; in UTF8ToUTF16LE()
2370 out[2] = d & 0xFF; in UTF8ToUTF16LE()
2371 out[3] = d >> 8; in UTF8ToUTF16LE()
2372 out += 4; in UTF8ToUTF16LE()
2379 ret = out - outstart; in UTF8ToUTF16LE()
2382 *outlen = out - outstart; in UTF8ToUTF16LE()
2383 *inlen = in - instart; in UTF8ToUTF16LE()
2391 if (in == NULL) { in UTF8ToUTF16()
2393 * initialization, add the Byte Order Mark for UTF-16LE in UTF8ToUTF16()
2406 return (UTF8ToUTF16LE(outb, outlen, in, inlen, NULL)); in UTF8ToUTF16()
2411 UTF16BEToUTF8(unsigned char *out, int *outlen, in UTF16BEToUTF8() argument
2416 unsigned char *outstart = out; in UTF16BEToUTF8()
2417 unsigned char *outend = out + *outlen; in UTF16BEToUTF8()
2425 if (out >= outend) in UTF16BEToUTF8()
2427 out[0] = c; in UTF16BEToUTF8()
2429 out += 1; in UTF16BEToUTF8()
2431 if (outend - out < 2) in UTF16BEToUTF8()
2433 out[0] = (c >> 6) | 0xC0; in UTF16BEToUTF8()
2434 out[1] = (c & 0x3F) | 0x80; in UTF16BEToUTF8()
2436 out += 2; in UTF16BEToUTF8()
2438 if (outend - out < 3) in UTF16BEToUTF8()
2440 out[0] = (c >> 12) | 0xE0; in UTF16BEToUTF8()
2441 out[1] = ((c >> 6) & 0x3F) | 0x80; in UTF16BEToUTF8()
2442 out[2] = (c & 0x3F) | 0x80; in UTF16BEToUTF8()
2444 out += 3; in UTF16BEToUTF8()
2451 if (inend - in < 4) in UTF16BEToUTF8()
2458 if (outend - out < 4) in UTF16BEToUTF8()
2460 c = (c << 10) + d - ((0xD800 << 10) + 0xDC00 - 0x10000); in UTF16BEToUTF8()
2461 out[0] = (c >> 18) | 0xF0; in UTF16BEToUTF8()
2462 out[1] = ((c >> 12) & 0x3F) | 0x80; in UTF16BEToUTF8()
2463 out[2] = ((c >> 6) & 0x3F) | 0x80; in UTF16BEToUTF8()
2464 out[3] = (c & 0x3F) | 0x80; in UTF16BEToUTF8()
2466 out += 4; in UTF16BEToUTF8()
2470 ret = out - outstart; in UTF16BEToUTF8()
2473 *outlen = out - outstart; in UTF16BEToUTF8()
2474 *inlen = in - instart; in UTF16BEToUTF8()
2480 UTF8ToUTF16BE(unsigned char *out, int *outlen, in UTF8ToUTF16BE() argument
2485 unsigned char *outstart = out; in UTF8ToUTF16BE()
2490 /* UTF-16BE has no BOM */ in UTF8ToUTF16BE()
2491 if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1); in UTF8ToUTF16BE()
2492 if (in == NULL) { in UTF8ToUTF16BE()
2498 outend = out + (*outlen & ~1); in UTF8ToUTF16BE()
2503 if (out >= outend) in UTF8ToUTF16BE()
2505 out[0] = 0; in UTF8ToUTF16BE()
2506 out[1] = c; in UTF8ToUTF16BE()
2508 out += 2; in UTF8ToUTF16BE()
2531 if (inend - in < len) in UTF8ToUTF16BE()
2550 if (out >= outend) in UTF8ToUTF16BE()
2552 out[0] = c >> 8; in UTF8ToUTF16BE()
2553 out[1] = c & 0xFF; in UTF8ToUTF16BE()
2554 out += 2; in UTF8ToUTF16BE()
2556 if (outend - out < 4) in UTF8ToUTF16BE()
2558 c -= 0x10000; in UTF8ToUTF16BE()
2561 out[0] = c >> 8; in UTF8ToUTF16BE()
2562 out[1] = c & 0xFF; in UTF8ToUTF16BE()
2563 out[2] = d >> 8; in UTF8ToUTF16BE()
2564 out[3] = d & 0xFF; in UTF8ToUTF16BE()
2565 out += 4; in UTF8ToUTF16BE()
2572 ret = out - outstart; in UTF8ToUTF16BE()
2575 *outlen = out - outstart; in UTF8ToUTF16BE()
2576 *inlen = in - instart; in UTF8ToUTF16BE()
2583 UTF8ToHtmlWrapper(unsigned char *out, int *outlen, in UTF8ToHtmlWrapper() argument
2586 return(UTF8ToHtml(out, outlen, in, inlen)); in UTF8ToHtmlWrapper()
2594 UTF8ToISO8859x(unsigned char *out, int *outlen, in UTF8ToISO8859x() argument
2599 unsigned char *outstart = out; in UTF8ToISO8859x()
2603 if (in == NULL) { in UTF8ToISO8859x()
2613 outend = out + *outlen; in UTF8ToISO8859x()
2618 if (out >= outend) in UTF8ToISO8859x()
2624 if (inend - in < 2) in UTF8ToISO8859x()
2634 if (out >= outend) in UTF8ToISO8859x()
2641 if (inend - in < 3) in UTF8ToISO8859x()
2653 if (out >= outend) in UTF8ToISO8859x()
2662 *out++ = d; in UTF8ToISO8859x()
2665 ret = out - outstart; in UTF8ToISO8859x()
2668 *outlen = out - outstart; in UTF8ToISO8859x()
2669 *inlen = in - instart; in UTF8ToISO8859x()
2674 ISO8859xToUTF8(unsigned char* out, int *outlen, in ISO8859xToUTF8() argument
2679 unsigned char* outstart = out; in ISO8859xToUTF8()
2683 outend = out + *outlen; in ISO8859xToUTF8()
2690 if (out >= outend) in ISO8859xToUTF8()
2692 *out++ = c; in ISO8859xToUTF8()
2694 c = unicodetable[c - 0x80]; in ISO8859xToUTF8()
2701 if (outend - out < 2) in ISO8859xToUTF8()
2703 *out++ = ((c >> 6) & 0x1F) | 0xC0; in ISO8859xToUTF8()
2704 *out++ = (c & 0x3F) | 0x80; in ISO8859xToUTF8()
2706 if (outend - out < 3) in ISO8859xToUTF8()
2708 *out++ = ((c >> 12) & 0x0F) | 0xE0; in ISO8859xToUTF8()
2709 *out++ = ((c >> 6) & 0x3F) | 0x80; in ISO8859xToUTF8()
2710 *out++ = (c & 0x3F) | 0x80; in ISO8859xToUTF8()
2717 ret = out - outstart; in ISO8859xToUTF8()
2720 *outlen = out - outstart; in ISO8859xToUTF8()
2721 *inlen = in - instart; in ISO8859xToUTF8()