1*22dc650dSSadaf Ebrahimi /*************************************************
2*22dc650dSSadaf Ebrahimi * Perl-Compatible Regular Expressions *
3*22dc650dSSadaf Ebrahimi *************************************************/
4*22dc650dSSadaf Ebrahimi
5*22dc650dSSadaf Ebrahimi /* PCRE is a library of functions to support regular expressions whose syntax
6*22dc650dSSadaf Ebrahimi and semantics are as close as possible to those of the Perl 5 language.
7*22dc650dSSadaf Ebrahimi
8*22dc650dSSadaf Ebrahimi Written by Philip Hazel
9*22dc650dSSadaf Ebrahimi Original API code Copyright (c) 1997-2012 University of Cambridge
10*22dc650dSSadaf Ebrahimi New API code Copyright (c) 2016 University of Cambridge
11*22dc650dSSadaf Ebrahimi
12*22dc650dSSadaf Ebrahimi -----------------------------------------------------------------------------
13*22dc650dSSadaf Ebrahimi Redistribution and use in source and binary forms, with or without
14*22dc650dSSadaf Ebrahimi modification, are permitted provided that the following conditions are met:
15*22dc650dSSadaf Ebrahimi
16*22dc650dSSadaf Ebrahimi * Redistributions of source code must retain the above copyright notice,
17*22dc650dSSadaf Ebrahimi this list of conditions and the following disclaimer.
18*22dc650dSSadaf Ebrahimi
19*22dc650dSSadaf Ebrahimi * Redistributions in binary form must reproduce the above copyright
20*22dc650dSSadaf Ebrahimi notice, this list of conditions and the following disclaimer in the
21*22dc650dSSadaf Ebrahimi documentation and/or other materials provided with the distribution.
22*22dc650dSSadaf Ebrahimi
23*22dc650dSSadaf Ebrahimi * Neither the name of the University of Cambridge nor the names of its
24*22dc650dSSadaf Ebrahimi contributors may be used to endorse or promote products derived from
25*22dc650dSSadaf Ebrahimi this software without specific prior written permission.
26*22dc650dSSadaf Ebrahimi
27*22dc650dSSadaf Ebrahimi THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
28*22dc650dSSadaf Ebrahimi AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29*22dc650dSSadaf Ebrahimi IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30*22dc650dSSadaf Ebrahimi ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
31*22dc650dSSadaf Ebrahimi LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32*22dc650dSSadaf Ebrahimi CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33*22dc650dSSadaf Ebrahimi SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34*22dc650dSSadaf Ebrahimi INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35*22dc650dSSadaf Ebrahimi CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36*22dc650dSSadaf Ebrahimi ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37*22dc650dSSadaf Ebrahimi POSSIBILITY OF SUCH DAMAGE.
38*22dc650dSSadaf Ebrahimi -----------------------------------------------------------------------------
39*22dc650dSSadaf Ebrahimi */
40*22dc650dSSadaf Ebrahimi
41*22dc650dSSadaf Ebrahimi
42*22dc650dSSadaf Ebrahimi /* This file contains a function that converts a Unicode character code point
43*22dc650dSSadaf Ebrahimi into a UTF string. The behaviour is different for each code unit width. */
44*22dc650dSSadaf Ebrahimi
45*22dc650dSSadaf Ebrahimi
46*22dc650dSSadaf Ebrahimi #ifdef HAVE_CONFIG_H
47*22dc650dSSadaf Ebrahimi #include "config.h"
48*22dc650dSSadaf Ebrahimi #endif
49*22dc650dSSadaf Ebrahimi
50*22dc650dSSadaf Ebrahimi #include "pcre2_internal.h"
51*22dc650dSSadaf Ebrahimi
52*22dc650dSSadaf Ebrahimi
53*22dc650dSSadaf Ebrahimi /* If SUPPORT_UNICODE is not defined, this function will never be called.
54*22dc650dSSadaf Ebrahimi Supply a dummy function because some compilers do not like empty source
55*22dc650dSSadaf Ebrahimi modules. */
56*22dc650dSSadaf Ebrahimi
57*22dc650dSSadaf Ebrahimi #ifndef SUPPORT_UNICODE
58*22dc650dSSadaf Ebrahimi unsigned int
PRIV(ord2utf)59*22dc650dSSadaf Ebrahimi PRIV(ord2utf)(uint32_t cvalue, PCRE2_UCHAR *buffer)
60*22dc650dSSadaf Ebrahimi {
61*22dc650dSSadaf Ebrahimi (void)(cvalue);
62*22dc650dSSadaf Ebrahimi (void)(buffer);
63*22dc650dSSadaf Ebrahimi return 0;
64*22dc650dSSadaf Ebrahimi }
65*22dc650dSSadaf Ebrahimi #else /* SUPPORT_UNICODE */
66*22dc650dSSadaf Ebrahimi
67*22dc650dSSadaf Ebrahimi
68*22dc650dSSadaf Ebrahimi /*************************************************
69*22dc650dSSadaf Ebrahimi * Convert code point to UTF *
70*22dc650dSSadaf Ebrahimi *************************************************/
71*22dc650dSSadaf Ebrahimi
72*22dc650dSSadaf Ebrahimi /*
73*22dc650dSSadaf Ebrahimi Arguments:
74*22dc650dSSadaf Ebrahimi cvalue the character value
75*22dc650dSSadaf Ebrahimi buffer pointer to buffer for result
76*22dc650dSSadaf Ebrahimi
77*22dc650dSSadaf Ebrahimi Returns: number of code units placed in the buffer
78*22dc650dSSadaf Ebrahimi */
79*22dc650dSSadaf Ebrahimi
80*22dc650dSSadaf Ebrahimi unsigned int
PRIV(ord2utf)81*22dc650dSSadaf Ebrahimi PRIV(ord2utf)(uint32_t cvalue, PCRE2_UCHAR *buffer)
82*22dc650dSSadaf Ebrahimi {
83*22dc650dSSadaf Ebrahimi /* Convert to UTF-8 */
84*22dc650dSSadaf Ebrahimi
85*22dc650dSSadaf Ebrahimi #if PCRE2_CODE_UNIT_WIDTH == 8
86*22dc650dSSadaf Ebrahimi int i, j;
87*22dc650dSSadaf Ebrahimi for (i = 0; i < PRIV(utf8_table1_size); i++)
88*22dc650dSSadaf Ebrahimi if ((int)cvalue <= PRIV(utf8_table1)[i]) break;
89*22dc650dSSadaf Ebrahimi buffer += i;
90*22dc650dSSadaf Ebrahimi for (j = i; j > 0; j--)
91*22dc650dSSadaf Ebrahimi {
92*22dc650dSSadaf Ebrahimi *buffer-- = 0x80 | (cvalue & 0x3f);
93*22dc650dSSadaf Ebrahimi cvalue >>= 6;
94*22dc650dSSadaf Ebrahimi }
95*22dc650dSSadaf Ebrahimi *buffer = PRIV(utf8_table2)[i] | cvalue;
96*22dc650dSSadaf Ebrahimi return i + 1;
97*22dc650dSSadaf Ebrahimi
98*22dc650dSSadaf Ebrahimi /* Convert to UTF-16 */
99*22dc650dSSadaf Ebrahimi
100*22dc650dSSadaf Ebrahimi #elif PCRE2_CODE_UNIT_WIDTH == 16
101*22dc650dSSadaf Ebrahimi if (cvalue <= 0xffff)
102*22dc650dSSadaf Ebrahimi {
103*22dc650dSSadaf Ebrahimi *buffer = (PCRE2_UCHAR)cvalue;
104*22dc650dSSadaf Ebrahimi return 1;
105*22dc650dSSadaf Ebrahimi }
106*22dc650dSSadaf Ebrahimi cvalue -= 0x10000;
107*22dc650dSSadaf Ebrahimi *buffer++ = 0xd800 | (cvalue >> 10);
108*22dc650dSSadaf Ebrahimi *buffer = 0xdc00 | (cvalue & 0x3ff);
109*22dc650dSSadaf Ebrahimi return 2;
110*22dc650dSSadaf Ebrahimi
111*22dc650dSSadaf Ebrahimi /* Convert to UTF-32 */
112*22dc650dSSadaf Ebrahimi
113*22dc650dSSadaf Ebrahimi #else
114*22dc650dSSadaf Ebrahimi *buffer = (PCRE2_UCHAR)cvalue;
115*22dc650dSSadaf Ebrahimi return 1;
116*22dc650dSSadaf Ebrahimi #endif
117*22dc650dSSadaf Ebrahimi }
118*22dc650dSSadaf Ebrahimi #endif /* SUPPORT_UNICODE */
119*22dc650dSSadaf Ebrahimi
120*22dc650dSSadaf Ebrahimi /* End of pcre_ord2utf.c */
121