xref: /aosp_15_r20/external/pcre/src/pcre2_ord2utf.c (revision 22dc650d8ae982c6770746019a6f94af92b0f024)
1*22dc650dSSadaf Ebrahimi /*************************************************
2*22dc650dSSadaf Ebrahimi *      Perl-Compatible Regular Expressions       *
3*22dc650dSSadaf Ebrahimi *************************************************/
4*22dc650dSSadaf Ebrahimi 
5*22dc650dSSadaf Ebrahimi /* PCRE is a library of functions to support regular expressions whose syntax
6*22dc650dSSadaf Ebrahimi and semantics are as close as possible to those of the Perl 5 language.
7*22dc650dSSadaf Ebrahimi 
8*22dc650dSSadaf Ebrahimi                        Written by Philip Hazel
9*22dc650dSSadaf Ebrahimi      Original API code Copyright (c) 1997-2012 University of Cambridge
10*22dc650dSSadaf Ebrahimi          New API code Copyright (c) 2016 University of Cambridge
11*22dc650dSSadaf Ebrahimi 
12*22dc650dSSadaf Ebrahimi -----------------------------------------------------------------------------
13*22dc650dSSadaf Ebrahimi Redistribution and use in source and binary forms, with or without
14*22dc650dSSadaf Ebrahimi modification, are permitted provided that the following conditions are met:
15*22dc650dSSadaf Ebrahimi 
16*22dc650dSSadaf Ebrahimi     * Redistributions of source code must retain the above copyright notice,
17*22dc650dSSadaf Ebrahimi       this list of conditions and the following disclaimer.
18*22dc650dSSadaf Ebrahimi 
19*22dc650dSSadaf Ebrahimi     * Redistributions in binary form must reproduce the above copyright
20*22dc650dSSadaf Ebrahimi       notice, this list of conditions and the following disclaimer in the
21*22dc650dSSadaf Ebrahimi       documentation and/or other materials provided with the distribution.
22*22dc650dSSadaf Ebrahimi 
23*22dc650dSSadaf Ebrahimi     * Neither the name of the University of Cambridge nor the names of its
24*22dc650dSSadaf Ebrahimi       contributors may be used to endorse or promote products derived from
25*22dc650dSSadaf Ebrahimi       this software without specific prior written permission.
26*22dc650dSSadaf Ebrahimi 
27*22dc650dSSadaf Ebrahimi THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
28*22dc650dSSadaf Ebrahimi AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29*22dc650dSSadaf Ebrahimi IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30*22dc650dSSadaf Ebrahimi ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
31*22dc650dSSadaf Ebrahimi LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32*22dc650dSSadaf Ebrahimi CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33*22dc650dSSadaf Ebrahimi SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34*22dc650dSSadaf Ebrahimi INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35*22dc650dSSadaf Ebrahimi CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36*22dc650dSSadaf Ebrahimi ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37*22dc650dSSadaf Ebrahimi POSSIBILITY OF SUCH DAMAGE.
38*22dc650dSSadaf Ebrahimi -----------------------------------------------------------------------------
39*22dc650dSSadaf Ebrahimi */
40*22dc650dSSadaf Ebrahimi 
41*22dc650dSSadaf Ebrahimi 
42*22dc650dSSadaf Ebrahimi /* This file contains a function that converts a Unicode character code point
43*22dc650dSSadaf Ebrahimi into a UTF string. The behaviour is different for each code unit width. */
44*22dc650dSSadaf Ebrahimi 
45*22dc650dSSadaf Ebrahimi 
46*22dc650dSSadaf Ebrahimi #ifdef HAVE_CONFIG_H
47*22dc650dSSadaf Ebrahimi #include "config.h"
48*22dc650dSSadaf Ebrahimi #endif
49*22dc650dSSadaf Ebrahimi 
50*22dc650dSSadaf Ebrahimi #include "pcre2_internal.h"
51*22dc650dSSadaf Ebrahimi 
52*22dc650dSSadaf Ebrahimi 
53*22dc650dSSadaf Ebrahimi /* If SUPPORT_UNICODE is not defined, this function will never be called.
54*22dc650dSSadaf Ebrahimi Supply a dummy function because some compilers do not like empty source
55*22dc650dSSadaf Ebrahimi modules. */
56*22dc650dSSadaf Ebrahimi 
57*22dc650dSSadaf Ebrahimi #ifndef SUPPORT_UNICODE
58*22dc650dSSadaf Ebrahimi unsigned int
PRIV(ord2utf)59*22dc650dSSadaf Ebrahimi PRIV(ord2utf)(uint32_t cvalue, PCRE2_UCHAR *buffer)
60*22dc650dSSadaf Ebrahimi {
61*22dc650dSSadaf Ebrahimi (void)(cvalue);
62*22dc650dSSadaf Ebrahimi (void)(buffer);
63*22dc650dSSadaf Ebrahimi return 0;
64*22dc650dSSadaf Ebrahimi }
65*22dc650dSSadaf Ebrahimi #else  /* SUPPORT_UNICODE */
66*22dc650dSSadaf Ebrahimi 
67*22dc650dSSadaf Ebrahimi 
68*22dc650dSSadaf Ebrahimi /*************************************************
69*22dc650dSSadaf Ebrahimi *          Convert code point to UTF             *
70*22dc650dSSadaf Ebrahimi *************************************************/
71*22dc650dSSadaf Ebrahimi 
72*22dc650dSSadaf Ebrahimi /*
73*22dc650dSSadaf Ebrahimi Arguments:
74*22dc650dSSadaf Ebrahimi   cvalue     the character value
75*22dc650dSSadaf Ebrahimi   buffer     pointer to buffer for result
76*22dc650dSSadaf Ebrahimi 
77*22dc650dSSadaf Ebrahimi Returns:     number of code units placed in the buffer
78*22dc650dSSadaf Ebrahimi */
79*22dc650dSSadaf Ebrahimi 
80*22dc650dSSadaf Ebrahimi unsigned int
PRIV(ord2utf)81*22dc650dSSadaf Ebrahimi PRIV(ord2utf)(uint32_t cvalue, PCRE2_UCHAR *buffer)
82*22dc650dSSadaf Ebrahimi {
83*22dc650dSSadaf Ebrahimi /* Convert to UTF-8 */
84*22dc650dSSadaf Ebrahimi 
85*22dc650dSSadaf Ebrahimi #if PCRE2_CODE_UNIT_WIDTH == 8
86*22dc650dSSadaf Ebrahimi int i, j;
87*22dc650dSSadaf Ebrahimi for (i = 0; i < PRIV(utf8_table1_size); i++)
88*22dc650dSSadaf Ebrahimi   if ((int)cvalue <= PRIV(utf8_table1)[i]) break;
89*22dc650dSSadaf Ebrahimi buffer += i;
90*22dc650dSSadaf Ebrahimi for (j = i; j > 0; j--)
91*22dc650dSSadaf Ebrahimi  {
92*22dc650dSSadaf Ebrahimi  *buffer-- = 0x80 | (cvalue & 0x3f);
93*22dc650dSSadaf Ebrahimi  cvalue >>= 6;
94*22dc650dSSadaf Ebrahimi  }
95*22dc650dSSadaf Ebrahimi *buffer = PRIV(utf8_table2)[i] | cvalue;
96*22dc650dSSadaf Ebrahimi return i + 1;
97*22dc650dSSadaf Ebrahimi 
98*22dc650dSSadaf Ebrahimi /* Convert to UTF-16 */
99*22dc650dSSadaf Ebrahimi 
100*22dc650dSSadaf Ebrahimi #elif PCRE2_CODE_UNIT_WIDTH == 16
101*22dc650dSSadaf Ebrahimi if (cvalue <= 0xffff)
102*22dc650dSSadaf Ebrahimi   {
103*22dc650dSSadaf Ebrahimi   *buffer = (PCRE2_UCHAR)cvalue;
104*22dc650dSSadaf Ebrahimi   return 1;
105*22dc650dSSadaf Ebrahimi   }
106*22dc650dSSadaf Ebrahimi cvalue -= 0x10000;
107*22dc650dSSadaf Ebrahimi *buffer++ = 0xd800 | (cvalue >> 10);
108*22dc650dSSadaf Ebrahimi *buffer = 0xdc00 | (cvalue & 0x3ff);
109*22dc650dSSadaf Ebrahimi return 2;
110*22dc650dSSadaf Ebrahimi 
111*22dc650dSSadaf Ebrahimi /* Convert to UTF-32 */
112*22dc650dSSadaf Ebrahimi 
113*22dc650dSSadaf Ebrahimi #else
114*22dc650dSSadaf Ebrahimi *buffer = (PCRE2_UCHAR)cvalue;
115*22dc650dSSadaf Ebrahimi return 1;
116*22dc650dSSadaf Ebrahimi #endif
117*22dc650dSSadaf Ebrahimi }
118*22dc650dSSadaf Ebrahimi #endif  /* SUPPORT_UNICODE */
119*22dc650dSSadaf Ebrahimi 
120*22dc650dSSadaf Ebrahimi /* End of pcre_ord2utf.c */
121