1 #ifndef Py_CPYTHON_UNICODEOBJECT_H
2 # error "this header file must not be included directly"
3 #endif
4
5 /* Py_UNICODE was the native Unicode storage format (code unit) used by
6 Python and represents a single Unicode element in the Unicode type.
7 With PEP 393, Py_UNICODE is deprecated and replaced with a
8 typedef to wchar_t. */
9 #define PY_UNICODE_TYPE wchar_t
10 /* Py_DEPRECATED(3.3) */ typedef wchar_t Py_UNICODE;
11
12 /* --- Internal Unicode Operations ---------------------------------------- */
13
14 #ifndef USE_UNICODE_WCHAR_CACHE
15 # define USE_UNICODE_WCHAR_CACHE 1
16 #endif /* USE_UNICODE_WCHAR_CACHE */
17
18 /* Since splitting on whitespace is an important use case, and
19 whitespace in most situations is solely ASCII whitespace, we
20 optimize for the common case by using a quick look-up table
21 _Py_ascii_whitespace (see below) with an inlined check.
22
23 */
24 #define Py_UNICODE_ISSPACE(ch) \
25 ((Py_UCS4)(ch) < 128U ? _Py_ascii_whitespace[(ch)] : _PyUnicode_IsWhitespace(ch))
26
27 #define Py_UNICODE_ISLOWER(ch) _PyUnicode_IsLowercase(ch)
28 #define Py_UNICODE_ISUPPER(ch) _PyUnicode_IsUppercase(ch)
29 #define Py_UNICODE_ISTITLE(ch) _PyUnicode_IsTitlecase(ch)
30 #define Py_UNICODE_ISLINEBREAK(ch) _PyUnicode_IsLinebreak(ch)
31
32 #define Py_UNICODE_TOLOWER(ch) _PyUnicode_ToLowercase(ch)
33 #define Py_UNICODE_TOUPPER(ch) _PyUnicode_ToUppercase(ch)
34 #define Py_UNICODE_TOTITLE(ch) _PyUnicode_ToTitlecase(ch)
35
36 #define Py_UNICODE_ISDECIMAL(ch) _PyUnicode_IsDecimalDigit(ch)
37 #define Py_UNICODE_ISDIGIT(ch) _PyUnicode_IsDigit(ch)
38 #define Py_UNICODE_ISNUMERIC(ch) _PyUnicode_IsNumeric(ch)
39 #define Py_UNICODE_ISPRINTABLE(ch) _PyUnicode_IsPrintable(ch)
40
41 #define Py_UNICODE_TODECIMAL(ch) _PyUnicode_ToDecimalDigit(ch)
42 #define Py_UNICODE_TODIGIT(ch) _PyUnicode_ToDigit(ch)
43 #define Py_UNICODE_TONUMERIC(ch) _PyUnicode_ToNumeric(ch)
44
45 #define Py_UNICODE_ISALPHA(ch) _PyUnicode_IsAlpha(ch)
46
47 #define Py_UNICODE_ISALNUM(ch) \
48 (Py_UNICODE_ISALPHA(ch) || \
49 Py_UNICODE_ISDECIMAL(ch) || \
50 Py_UNICODE_ISDIGIT(ch) || \
51 Py_UNICODE_ISNUMERIC(ch))
52
53 /* macros to work with surrogates */
54 #define Py_UNICODE_IS_SURROGATE(ch) (0xD800 <= (ch) && (ch) <= 0xDFFF)
55 #define Py_UNICODE_IS_HIGH_SURROGATE(ch) (0xD800 <= (ch) && (ch) <= 0xDBFF)
56 #define Py_UNICODE_IS_LOW_SURROGATE(ch) (0xDC00 <= (ch) && (ch) <= 0xDFFF)
57 /* Join two surrogate characters and return a single Py_UCS4 value. */
58 #define Py_UNICODE_JOIN_SURROGATES(high, low) \
59 (((((Py_UCS4)(high) & 0x03FF) << 10) | \
60 ((Py_UCS4)(low) & 0x03FF)) + 0x10000)
61 /* high surrogate = top 10 bits added to D800 */
62 #define Py_UNICODE_HIGH_SURROGATE(ch) (0xD800 - (0x10000 >> 10) + ((ch) >> 10))
63 /* low surrogate = bottom 10 bits added to DC00 */
64 #define Py_UNICODE_LOW_SURROGATE(ch) (0xDC00 + ((ch) & 0x3FF))
65
66 /* --- Unicode Type ------------------------------------------------------- */
67
68 /* ASCII-only strings created through PyUnicode_New use the PyASCIIObject
69 structure. state.ascii and state.compact are set, and the data
70 immediately follow the structure. utf8_length and wstr_length can be found
71 in the length field; the utf8 pointer is equal to the data pointer. */
72 typedef struct {
73 /* There are 4 forms of Unicode strings:
74
75 - compact ascii:
76
77 * structure = PyASCIIObject
78 * test: PyUnicode_IS_COMPACT_ASCII(op)
79 * kind = PyUnicode_1BYTE_KIND
80 * compact = 1
81 * ascii = 1
82 * ready = 1
83 * (length is the length of the utf8 and wstr strings)
84 * (data starts just after the structure)
85 * (since ASCII is decoded from UTF-8, the utf8 string are the data)
86
87 - compact:
88
89 * structure = PyCompactUnicodeObject
90 * test: PyUnicode_IS_COMPACT(op) && !PyUnicode_IS_ASCII(op)
91 * kind = PyUnicode_1BYTE_KIND, PyUnicode_2BYTE_KIND or
92 PyUnicode_4BYTE_KIND
93 * compact = 1
94 * ready = 1
95 * ascii = 0
96 * utf8 is not shared with data
97 * utf8_length = 0 if utf8 is NULL
98 * wstr is shared with data and wstr_length=length
99 if kind=PyUnicode_2BYTE_KIND and sizeof(wchar_t)=2
100 or if kind=PyUnicode_4BYTE_KIND and sizeof(wchar_t)=4
101 * wstr_length = 0 if wstr is NULL
102 * (data starts just after the structure)
103
104 - legacy string, not ready:
105
106 * structure = PyUnicodeObject
107 * test: kind == PyUnicode_WCHAR_KIND
108 * length = 0 (use wstr_length)
109 * hash = -1
110 * kind = PyUnicode_WCHAR_KIND
111 * compact = 0
112 * ascii = 0
113 * ready = 0
114 * interned = SSTATE_NOT_INTERNED
115 * wstr is not NULL
116 * data.any is NULL
117 * utf8 is NULL
118 * utf8_length = 0
119
120 - legacy string, ready:
121
122 * structure = PyUnicodeObject structure
123 * test: !PyUnicode_IS_COMPACT(op) && kind != PyUnicode_WCHAR_KIND
124 * kind = PyUnicode_1BYTE_KIND, PyUnicode_2BYTE_KIND or
125 PyUnicode_4BYTE_KIND
126 * compact = 0
127 * ready = 1
128 * data.any is not NULL
129 * utf8 is shared and utf8_length = length with data.any if ascii = 1
130 * utf8_length = 0 if utf8 is NULL
131 * wstr is shared with data.any and wstr_length = length
132 if kind=PyUnicode_2BYTE_KIND and sizeof(wchar_t)=2
133 or if kind=PyUnicode_4BYTE_KIND and sizeof(wchar_4)=4
134 * wstr_length = 0 if wstr is NULL
135
136 Compact strings use only one memory block (structure + characters),
137 whereas legacy strings use one block for the structure and one block
138 for characters.
139
140 Legacy strings are created by PyUnicode_FromUnicode() and
141 PyUnicode_FromStringAndSize(NULL, size) functions. They become ready
142 when PyUnicode_READY() is called.
143
144 See also _PyUnicode_CheckConsistency().
145 */
146 PyObject_HEAD
147 Py_ssize_t length; /* Number of code points in the string */
148 Py_hash_t hash; /* Hash value; -1 if not set */
149 struct {
150 /*
151 SSTATE_NOT_INTERNED (0)
152 SSTATE_INTERNED_MORTAL (1)
153 SSTATE_INTERNED_IMMORTAL (2)
154
155 If interned != SSTATE_NOT_INTERNED, the two references from the
156 dictionary to this object are *not* counted in ob_refcnt.
157 */
158 unsigned int interned:2;
159 /* Character size:
160
161 - PyUnicode_WCHAR_KIND (0):
162
163 * character type = wchar_t (16 or 32 bits, depending on the
164 platform)
165
166 - PyUnicode_1BYTE_KIND (1):
167
168 * character type = Py_UCS1 (8 bits, unsigned)
169 * all characters are in the range U+0000-U+00FF (latin1)
170 * if ascii is set, all characters are in the range U+0000-U+007F
171 (ASCII), otherwise at least one character is in the range
172 U+0080-U+00FF
173
174 - PyUnicode_2BYTE_KIND (2):
175
176 * character type = Py_UCS2 (16 bits, unsigned)
177 * all characters are in the range U+0000-U+FFFF (BMP)
178 * at least one character is in the range U+0100-U+FFFF
179
180 - PyUnicode_4BYTE_KIND (4):
181
182 * character type = Py_UCS4 (32 bits, unsigned)
183 * all characters are in the range U+0000-U+10FFFF
184 * at least one character is in the range U+10000-U+10FFFF
185 */
186 unsigned int kind:3;
187 /* Compact is with respect to the allocation scheme. Compact unicode
188 objects only require one memory block while non-compact objects use
189 one block for the PyUnicodeObject struct and another for its data
190 buffer. */
191 unsigned int compact:1;
192 /* The string only contains characters in the range U+0000-U+007F (ASCII)
193 and the kind is PyUnicode_1BYTE_KIND. If ascii is set and compact is
194 set, use the PyASCIIObject structure. */
195 unsigned int ascii:1;
196 /* The ready flag indicates whether the object layout is initialized
197 completely. This means that this is either a compact object, or
198 the data pointer is filled out. The bit is redundant, and helps
199 to minimize the test in PyUnicode_IS_READY(). */
200 unsigned int ready:1;
201 /* Padding to ensure that PyUnicode_DATA() is always aligned to
202 4 bytes (see issue #19537 on m68k). */
203 unsigned int :24;
204 } state;
205 wchar_t *wstr; /* wchar_t representation (null-terminated) */
206 } PyASCIIObject;
207
208 /* Non-ASCII strings allocated through PyUnicode_New use the
209 PyCompactUnicodeObject structure. state.compact is set, and the data
210 immediately follow the structure. */
211 typedef struct {
212 PyASCIIObject _base;
213 Py_ssize_t utf8_length; /* Number of bytes in utf8, excluding the
214 * terminating \0. */
215 char *utf8; /* UTF-8 representation (null-terminated) */
216 Py_ssize_t wstr_length; /* Number of code points in wstr, possible
217 * surrogates count as two code points. */
218 } PyCompactUnicodeObject;
219
220 /* Strings allocated through PyUnicode_FromUnicode(NULL, len) use the
221 PyUnicodeObject structure. The actual string data is initially in the wstr
222 block, and copied into the data block using _PyUnicode_Ready. */
223 typedef struct {
224 PyCompactUnicodeObject _base;
225 union {
226 void *any;
227 Py_UCS1 *latin1;
228 Py_UCS2 *ucs2;
229 Py_UCS4 *ucs4;
230 } data; /* Canonical, smallest-form Unicode buffer */
231 } PyUnicodeObject;
232
233 PyAPI_FUNC(int) _PyUnicode_CheckConsistency(
234 PyObject *op,
235 int check_content);
236
237
238 #define _PyASCIIObject_CAST(op) \
239 (assert(PyUnicode_Check(op)), \
240 _Py_CAST(PyASCIIObject*, (op)))
241 #define _PyCompactUnicodeObject_CAST(op) \
242 (assert(PyUnicode_Check(op)), \
243 _Py_CAST(PyCompactUnicodeObject*, (op)))
244 #define _PyUnicodeObject_CAST(op) \
245 (assert(PyUnicode_Check(op)), \
246 _Py_CAST(PyUnicodeObject*, (op)))
247
248
249 /* --- Flexible String Representation Helper Macros (PEP 393) -------------- */
250
251 /* Values for PyASCIIObject.state: */
252
253 /* Interning state. */
254 #define SSTATE_NOT_INTERNED 0
255 #define SSTATE_INTERNED_MORTAL 1
256 #define SSTATE_INTERNED_IMMORTAL 2
257
258 /* Use only if you know it's a string */
PyUnicode_CHECK_INTERNED(PyObject * op)259 static inline unsigned int PyUnicode_CHECK_INTERNED(PyObject *op) {
260 return _PyASCIIObject_CAST(op)->state.interned;
261 }
262 #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000
263 # define PyUnicode_CHECK_INTERNED(op) PyUnicode_CHECK_INTERNED(_PyObject_CAST(op))
264 #endif
265
266 /* Fast check to determine whether an object is ready. Equivalent to:
267 PyUnicode_IS_COMPACT(op) || _PyUnicodeObject_CAST(op)->data.any */
PyUnicode_IS_READY(PyObject * op)268 static inline unsigned int PyUnicode_IS_READY(PyObject *op) {
269 return _PyASCIIObject_CAST(op)->state.ready;
270 }
271 #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000
272 # define PyUnicode_IS_READY(op) PyUnicode_IS_READY(_PyObject_CAST(op))
273 #endif
274
275 /* Return true if the string contains only ASCII characters, or 0 if not. The
276 string may be compact (PyUnicode_IS_COMPACT_ASCII) or not, but must be
277 ready. */
PyUnicode_IS_ASCII(PyObject * op)278 static inline unsigned int PyUnicode_IS_ASCII(PyObject *op) {
279 assert(PyUnicode_IS_READY(op));
280 return _PyASCIIObject_CAST(op)->state.ascii;
281 }
282 #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000
283 # define PyUnicode_IS_ASCII(op) PyUnicode_IS_ASCII(_PyObject_CAST(op))
284 #endif
285
286 /* Return true if the string is compact or 0 if not.
287 No type checks or Ready calls are performed. */
PyUnicode_IS_COMPACT(PyObject * op)288 static inline unsigned int PyUnicode_IS_COMPACT(PyObject *op) {
289 return _PyASCIIObject_CAST(op)->state.compact;
290 }
291 #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000
292 # define PyUnicode_IS_COMPACT(op) PyUnicode_IS_COMPACT(_PyObject_CAST(op))
293 #endif
294
295 /* Return true if the string is a compact ASCII string (use PyASCIIObject
296 structure), or 0 if not. No type checks or Ready calls are performed. */
PyUnicode_IS_COMPACT_ASCII(PyObject * op)297 static inline int PyUnicode_IS_COMPACT_ASCII(PyObject *op) {
298 return (_PyASCIIObject_CAST(op)->state.ascii && PyUnicode_IS_COMPACT(op));
299 }
300 #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000
301 # define PyUnicode_IS_COMPACT_ASCII(op) PyUnicode_IS_COMPACT_ASCII(_PyObject_CAST(op))
302 #endif
303
304 enum PyUnicode_Kind {
305 /* String contains only wstr byte characters. This is only possible
306 when the string was created with a legacy API and _PyUnicode_Ready()
307 has not been called yet. */
308 PyUnicode_WCHAR_KIND = 0,
309 /* Return values of the PyUnicode_KIND() function: */
310 PyUnicode_1BYTE_KIND = 1,
311 PyUnicode_2BYTE_KIND = 2,
312 PyUnicode_4BYTE_KIND = 4
313 };
314
315 /* Return one of the PyUnicode_*_KIND values defined above. */
316 #define PyUnicode_KIND(op) \
317 (assert(PyUnicode_IS_READY(op)), \
318 _PyASCIIObject_CAST(op)->state.kind)
319
320 /* Return a void pointer to the raw unicode buffer. */
_PyUnicode_COMPACT_DATA(PyObject * op)321 static inline void* _PyUnicode_COMPACT_DATA(PyObject *op) {
322 if (PyUnicode_IS_ASCII(op)) {
323 return _Py_STATIC_CAST(void*, (_PyASCIIObject_CAST(op) + 1));
324 }
325 return _Py_STATIC_CAST(void*, (_PyCompactUnicodeObject_CAST(op) + 1));
326 }
327
_PyUnicode_NONCOMPACT_DATA(PyObject * op)328 static inline void* _PyUnicode_NONCOMPACT_DATA(PyObject *op) {
329 void *data;
330 assert(!PyUnicode_IS_COMPACT(op));
331 data = _PyUnicodeObject_CAST(op)->data.any;
332 assert(data != NULL);
333 return data;
334 }
335
PyUnicode_DATA(PyObject * op)336 static inline void* PyUnicode_DATA(PyObject *op) {
337 if (PyUnicode_IS_COMPACT(op)) {
338 return _PyUnicode_COMPACT_DATA(op);
339 }
340 return _PyUnicode_NONCOMPACT_DATA(op);
341 }
342 #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000
343 # define PyUnicode_DATA(op) PyUnicode_DATA(_PyObject_CAST(op))
344 #endif
345
346 /* Return pointers to the canonical representation cast to unsigned char,
347 Py_UCS2, or Py_UCS4 for direct character access.
348 No checks are performed, use PyUnicode_KIND() before to ensure
349 these will work correctly. */
350
351 #define PyUnicode_1BYTE_DATA(op) _Py_STATIC_CAST(Py_UCS1*, PyUnicode_DATA(op))
352 #define PyUnicode_2BYTE_DATA(op) _Py_STATIC_CAST(Py_UCS2*, PyUnicode_DATA(op))
353 #define PyUnicode_4BYTE_DATA(op) _Py_STATIC_CAST(Py_UCS4*, PyUnicode_DATA(op))
354
355 /* Returns the length of the unicode string. The caller has to make sure that
356 the string has it's canonical representation set before calling
357 this function. Call PyUnicode_(FAST_)Ready to ensure that. */
PyUnicode_GET_LENGTH(PyObject * op)358 static inline Py_ssize_t PyUnicode_GET_LENGTH(PyObject *op) {
359 assert(PyUnicode_IS_READY(op));
360 return _PyASCIIObject_CAST(op)->length;
361 }
362 #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000
363 # define PyUnicode_GET_LENGTH(op) PyUnicode_GET_LENGTH(_PyObject_CAST(op))
364 #endif
365
366 /* Write into the canonical representation, this function does not do any sanity
367 checks and is intended for usage in loops. The caller should cache the
368 kind and data pointers obtained from other function calls.
369 index is the index in the string (starts at 0) and value is the new
370 code point value which should be written to that location. */
PyUnicode_WRITE(int kind,void * data,Py_ssize_t index,Py_UCS4 value)371 static inline void PyUnicode_WRITE(int kind, void *data,
372 Py_ssize_t index, Py_UCS4 value)
373 {
374 if (kind == PyUnicode_1BYTE_KIND) {
375 assert(value <= 0xffU);
376 _Py_STATIC_CAST(Py_UCS1*, data)[index] = _Py_STATIC_CAST(Py_UCS1, value);
377 }
378 else if (kind == PyUnicode_2BYTE_KIND) {
379 assert(value <= 0xffffU);
380 _Py_STATIC_CAST(Py_UCS2*, data)[index] = _Py_STATIC_CAST(Py_UCS2, value);
381 }
382 else {
383 assert(kind == PyUnicode_4BYTE_KIND);
384 assert(value <= 0x10ffffU);
385 _Py_STATIC_CAST(Py_UCS4*, data)[index] = value;
386 }
387 }
388 #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000
389 #define PyUnicode_WRITE(kind, data, index, value) \
390 PyUnicode_WRITE(_Py_STATIC_CAST(int, kind), _Py_CAST(void*, data), \
391 (index), _Py_STATIC_CAST(Py_UCS4, value))
392 #endif
393
394 /* Read a code point from the string's canonical representation. No checks
395 or ready calls are performed. */
PyUnicode_READ(int kind,const void * data,Py_ssize_t index)396 static inline Py_UCS4 PyUnicode_READ(int kind,
397 const void *data, Py_ssize_t index)
398 {
399 if (kind == PyUnicode_1BYTE_KIND) {
400 return _Py_STATIC_CAST(const Py_UCS1*, data)[index];
401 }
402 if (kind == PyUnicode_2BYTE_KIND) {
403 return _Py_STATIC_CAST(const Py_UCS2*, data)[index];
404 }
405 assert(kind == PyUnicode_4BYTE_KIND);
406 return _Py_STATIC_CAST(const Py_UCS4*, data)[index];
407 }
408 #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000
409 #define PyUnicode_READ(kind, data, index) \
410 PyUnicode_READ(_Py_STATIC_CAST(int, kind), \
411 _Py_STATIC_CAST(const void*, data), \
412 (index))
413 #endif
414
415 /* PyUnicode_READ_CHAR() is less efficient than PyUnicode_READ() because it
416 calls PyUnicode_KIND() and might call it twice. For single reads, use
417 PyUnicode_READ_CHAR, for multiple consecutive reads callers should
418 cache kind and use PyUnicode_READ instead. */
PyUnicode_READ_CHAR(PyObject * unicode,Py_ssize_t index)419 static inline Py_UCS4 PyUnicode_READ_CHAR(PyObject *unicode, Py_ssize_t index)
420 {
421 int kind;
422 assert(PyUnicode_IS_READY(unicode));
423 kind = PyUnicode_KIND(unicode);
424 if (kind == PyUnicode_1BYTE_KIND) {
425 return PyUnicode_1BYTE_DATA(unicode)[index];
426 }
427 if (kind == PyUnicode_2BYTE_KIND) {
428 return PyUnicode_2BYTE_DATA(unicode)[index];
429 }
430 assert(kind == PyUnicode_4BYTE_KIND);
431 return PyUnicode_4BYTE_DATA(unicode)[index];
432 }
433 #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000
434 # define PyUnicode_READ_CHAR(unicode, index) \
435 PyUnicode_READ_CHAR(_PyObject_CAST(unicode), (index))
436 #endif
437
438 /* Return a maximum character value which is suitable for creating another
439 string based on op. This is always an approximation but more efficient
440 than iterating over the string. */
PyUnicode_MAX_CHAR_VALUE(PyObject * op)441 static inline Py_UCS4 PyUnicode_MAX_CHAR_VALUE(PyObject *op)
442 {
443 int kind;
444
445 assert(PyUnicode_IS_READY(op));
446 if (PyUnicode_IS_ASCII(op)) {
447 return 0x7fU;
448 }
449
450 kind = PyUnicode_KIND(op);
451 if (kind == PyUnicode_1BYTE_KIND) {
452 return 0xffU;
453 }
454 if (kind == PyUnicode_2BYTE_KIND) {
455 return 0xffffU;
456 }
457 assert(kind == PyUnicode_4BYTE_KIND);
458 return 0x10ffffU;
459 }
460 #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000
461 # define PyUnicode_MAX_CHAR_VALUE(op) \
462 PyUnicode_MAX_CHAR_VALUE(_PyObject_CAST(op))
463 #endif
464
465 /* === Public API ========================================================= */
466
467 /* --- Plain Py_UNICODE --------------------------------------------------- */
468
469 /* With PEP 393, this is the recommended way to allocate a new unicode object.
470 This function will allocate the object and its buffer in a single memory
471 block. Objects created using this function are not resizable. */
472 PyAPI_FUNC(PyObject*) PyUnicode_New(
473 Py_ssize_t size, /* Number of code points in the new string */
474 Py_UCS4 maxchar /* maximum code point value in the string */
475 );
476
477 /* Initializes the canonical string representation from the deprecated
478 wstr/Py_UNICODE representation. This function is used to convert Unicode
479 objects which were created using the old API to the new flexible format
480 introduced with PEP 393.
481
482 Don't call this function directly, use the public PyUnicode_READY() function
483 instead. */
484 PyAPI_FUNC(int) _PyUnicode_Ready(
485 PyObject *unicode /* Unicode object */
486 );
487
488 /* PyUnicode_READY() does less work than _PyUnicode_Ready() in the best
489 case. If the canonical representation is not yet set, it will still call
490 _PyUnicode_Ready().
491 Returns 0 on success and -1 on errors. */
PyUnicode_READY(PyObject * op)492 static inline int PyUnicode_READY(PyObject *op)
493 {
494 if (PyUnicode_IS_READY(op)) {
495 return 0;
496 }
497 return _PyUnicode_Ready(op);
498 }
499 #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000
500 # define PyUnicode_READY(op) PyUnicode_READY(_PyObject_CAST(op))
501 #endif
502
503 /* Get a copy of a Unicode string. */
504 PyAPI_FUNC(PyObject*) _PyUnicode_Copy(
505 PyObject *unicode
506 );
507
508 /* Copy character from one unicode object into another, this function performs
509 character conversion when necessary and falls back to memcpy() if possible.
510
511 Fail if to is too small (smaller than *how_many* or smaller than
512 len(from)-from_start), or if kind(from[from_start:from_start+how_many]) >
513 kind(to), or if *to* has more than 1 reference.
514
515 Return the number of written character, or return -1 and raise an exception
516 on error.
517
518 Pseudo-code:
519
520 how_many = min(how_many, len(from) - from_start)
521 to[to_start:to_start+how_many] = from[from_start:from_start+how_many]
522 return how_many
523
524 Note: The function doesn't write a terminating null character.
525 */
526 PyAPI_FUNC(Py_ssize_t) PyUnicode_CopyCharacters(
527 PyObject *to,
528 Py_ssize_t to_start,
529 PyObject *from,
530 Py_ssize_t from_start,
531 Py_ssize_t how_many
532 );
533
534 /* Unsafe version of PyUnicode_CopyCharacters(): don't check arguments and so
535 may crash if parameters are invalid (e.g. if the output string
536 is too short). */
537 PyAPI_FUNC(void) _PyUnicode_FastCopyCharacters(
538 PyObject *to,
539 Py_ssize_t to_start,
540 PyObject *from,
541 Py_ssize_t from_start,
542 Py_ssize_t how_many
543 );
544
545 /* Fill a string with a character: write fill_char into
546 unicode[start:start+length].
547
548 Fail if fill_char is bigger than the string maximum character, or if the
549 string has more than 1 reference.
550
551 Return the number of written character, or return -1 and raise an exception
552 on error. */
553 PyAPI_FUNC(Py_ssize_t) PyUnicode_Fill(
554 PyObject *unicode,
555 Py_ssize_t start,
556 Py_ssize_t length,
557 Py_UCS4 fill_char
558 );
559
560 /* Unsafe version of PyUnicode_Fill(): don't check arguments and so may crash
561 if parameters are invalid (e.g. if length is longer than the string). */
562 PyAPI_FUNC(void) _PyUnicode_FastFill(
563 PyObject *unicode,
564 Py_ssize_t start,
565 Py_ssize_t length,
566 Py_UCS4 fill_char
567 );
568
569 /* Create a new string from a buffer of Py_UCS1, Py_UCS2 or Py_UCS4 characters.
570 Scan the string to find the maximum character. */
571 PyAPI_FUNC(PyObject*) PyUnicode_FromKindAndData(
572 int kind,
573 const void *buffer,
574 Py_ssize_t size);
575
576 /* Create a new string from a buffer of ASCII characters.
577 WARNING: Don't check if the string contains any non-ASCII character. */
578 PyAPI_FUNC(PyObject*) _PyUnicode_FromASCII(
579 const char *buffer,
580 Py_ssize_t size);
581
582 /* Compute the maximum character of the substring unicode[start:end].
583 Return 127 for an empty string. */
584 PyAPI_FUNC(Py_UCS4) _PyUnicode_FindMaxChar (
585 PyObject *unicode,
586 Py_ssize_t start,
587 Py_ssize_t end);
588
589 /* --- Legacy deprecated API ---------------------------------------------- */
590
591 /* Create a Unicode Object from the Py_UNICODE buffer u of the given
592 size.
593
594 u may be NULL which causes the contents to be undefined. It is the
595 user's responsibility to fill in the needed data afterwards. Note
596 that modifying the Unicode object contents after construction is
597 only allowed if u was set to NULL.
598
599 The buffer is copied into the new object. */
600 Py_DEPRECATED(3.3) PyAPI_FUNC(PyObject*) PyUnicode_FromUnicode(
601 const Py_UNICODE *u, /* Unicode buffer */
602 Py_ssize_t size /* size of buffer */
603 );
604
605 /* Return a read-only pointer to the Unicode object's internal
606 Py_UNICODE buffer.
607 If the wchar_t/Py_UNICODE representation is not yet available, this
608 function will calculate it. */
609 Py_DEPRECATED(3.3) PyAPI_FUNC(Py_UNICODE *) PyUnicode_AsUnicode(
610 PyObject *unicode /* Unicode object */
611 );
612
613 /* Similar to PyUnicode_AsUnicode(), but raises a ValueError if the string
614 contains null characters. */
615 PyAPI_FUNC(const Py_UNICODE *) _PyUnicode_AsUnicode(
616 PyObject *unicode /* Unicode object */
617 );
618
619 /* Return a read-only pointer to the Unicode object's internal
620 Py_UNICODE buffer and save the length at size.
621 If the wchar_t/Py_UNICODE representation is not yet available, this
622 function will calculate it. */
623
624 Py_DEPRECATED(3.3) PyAPI_FUNC(Py_UNICODE *) PyUnicode_AsUnicodeAndSize(
625 PyObject *unicode, /* Unicode object */
626 Py_ssize_t *size /* location where to save the length */
627 );
628
629
630 /* Fast access macros */
631
632 Py_DEPRECATED(3.3)
PyUnicode_WSTR_LENGTH(PyObject * op)633 static inline Py_ssize_t PyUnicode_WSTR_LENGTH(PyObject *op)
634 {
635 if (PyUnicode_IS_COMPACT_ASCII(op)) {
636 return _PyASCIIObject_CAST(op)->length;
637 }
638 else {
639 return _PyCompactUnicodeObject_CAST(op)->wstr_length;
640 }
641 }
642 #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000
643 # define PyUnicode_WSTR_LENGTH(op) PyUnicode_WSTR_LENGTH(_PyObject_CAST(op))
644 #endif
645
646 /* Returns the deprecated Py_UNICODE representation's size in code units
647 (this includes surrogate pairs as 2 units).
648 If the Py_UNICODE representation is not available, it will be computed
649 on request. Use PyUnicode_GET_LENGTH() for the length in code points. */
650
651 Py_DEPRECATED(3.3)
PyUnicode_GET_SIZE(PyObject * op)652 static inline Py_ssize_t PyUnicode_GET_SIZE(PyObject *op)
653 {
654 _Py_COMP_DIAG_PUSH
655 _Py_COMP_DIAG_IGNORE_DEPR_DECLS
656 if (_PyASCIIObject_CAST(op)->wstr == _Py_NULL) {
657 (void)PyUnicode_AsUnicode(op);
658 assert(_PyASCIIObject_CAST(op)->wstr != _Py_NULL);
659 }
660 return PyUnicode_WSTR_LENGTH(op);
661 _Py_COMP_DIAG_POP
662 }
663 #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000
664 # define PyUnicode_GET_SIZE(op) PyUnicode_GET_SIZE(_PyObject_CAST(op))
665 #endif
666
667 Py_DEPRECATED(3.3)
PyUnicode_GET_DATA_SIZE(PyObject * op)668 static inline Py_ssize_t PyUnicode_GET_DATA_SIZE(PyObject *op)
669 {
670 _Py_COMP_DIAG_PUSH
671 _Py_COMP_DIAG_IGNORE_DEPR_DECLS
672 return PyUnicode_GET_SIZE(op) * Py_UNICODE_SIZE;
673 _Py_COMP_DIAG_POP
674 }
675 #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000
676 # define PyUnicode_GET_DATA_SIZE(op) PyUnicode_GET_DATA_SIZE(_PyObject_CAST(op))
677 #endif
678
679 /* Alias for PyUnicode_AsUnicode(). This will create a wchar_t/Py_UNICODE
680 representation on demand. Using this macro is very inefficient now,
681 try to port your code to use the new PyUnicode_*BYTE_DATA() macros or
682 use PyUnicode_WRITE() and PyUnicode_READ(). */
683
684 Py_DEPRECATED(3.3)
PyUnicode_AS_UNICODE(PyObject * op)685 static inline Py_UNICODE* PyUnicode_AS_UNICODE(PyObject *op)
686 {
687 wchar_t *wstr = _PyASCIIObject_CAST(op)->wstr;
688 if (wstr != _Py_NULL) {
689 return wstr;
690 }
691
692 _Py_COMP_DIAG_PUSH
693 _Py_COMP_DIAG_IGNORE_DEPR_DECLS
694 return PyUnicode_AsUnicode(op);
695 _Py_COMP_DIAG_POP
696 }
697 #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000
698 # define PyUnicode_AS_UNICODE(op) PyUnicode_AS_UNICODE(_PyObject_CAST(op))
699 #endif
700
701 Py_DEPRECATED(3.3)
PyUnicode_AS_DATA(PyObject * op)702 static inline const char* PyUnicode_AS_DATA(PyObject *op)
703 {
704 _Py_COMP_DIAG_PUSH
705 _Py_COMP_DIAG_IGNORE_DEPR_DECLS
706 Py_UNICODE *data = PyUnicode_AS_UNICODE(op);
707 // In C++, casting directly PyUnicode* to const char* is not valid
708 return _Py_STATIC_CAST(const char*, _Py_STATIC_CAST(const void*, data));
709 _Py_COMP_DIAG_POP
710 }
711 #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000
712 # define PyUnicode_AS_DATA(op) PyUnicode_AS_DATA(_PyObject_CAST(op))
713 #endif
714
715
716 /* --- _PyUnicodeWriter API ----------------------------------------------- */
717
718 typedef struct {
719 PyObject *buffer;
720 void *data;
721 enum PyUnicode_Kind kind;
722 Py_UCS4 maxchar;
723 Py_ssize_t size;
724 Py_ssize_t pos;
725
726 /* minimum number of allocated characters (default: 0) */
727 Py_ssize_t min_length;
728
729 /* minimum character (default: 127, ASCII) */
730 Py_UCS4 min_char;
731
732 /* If non-zero, overallocate the buffer (default: 0). */
733 unsigned char overallocate;
734
735 /* If readonly is 1, buffer is a shared string (cannot be modified)
736 and size is set to 0. */
737 unsigned char readonly;
738 } _PyUnicodeWriter ;
739
740 /* Initialize a Unicode writer.
741 *
742 * By default, the minimum buffer size is 0 character and overallocation is
743 * disabled. Set min_length, min_char and overallocate attributes to control
744 * the allocation of the buffer. */
745 PyAPI_FUNC(void)
746 _PyUnicodeWriter_Init(_PyUnicodeWriter *writer);
747
748 /* Prepare the buffer to write 'length' characters
749 with the specified maximum character.
750
751 Return 0 on success, raise an exception and return -1 on error. */
752 #define _PyUnicodeWriter_Prepare(WRITER, LENGTH, MAXCHAR) \
753 (((MAXCHAR) <= (WRITER)->maxchar \
754 && (LENGTH) <= (WRITER)->size - (WRITER)->pos) \
755 ? 0 \
756 : (((LENGTH) == 0) \
757 ? 0 \
758 : _PyUnicodeWriter_PrepareInternal((WRITER), (LENGTH), (MAXCHAR))))
759
760 /* Don't call this function directly, use the _PyUnicodeWriter_Prepare() macro
761 instead. */
762 PyAPI_FUNC(int)
763 _PyUnicodeWriter_PrepareInternal(_PyUnicodeWriter *writer,
764 Py_ssize_t length, Py_UCS4 maxchar);
765
766 /* Prepare the buffer to have at least the kind KIND.
767 For example, kind=PyUnicode_2BYTE_KIND ensures that the writer will
768 support characters in range U+000-U+FFFF.
769
770 Return 0 on success, raise an exception and return -1 on error. */
771 #define _PyUnicodeWriter_PrepareKind(WRITER, KIND) \
772 (assert((KIND) != PyUnicode_WCHAR_KIND), \
773 (KIND) <= (WRITER)->kind \
774 ? 0 \
775 : _PyUnicodeWriter_PrepareKindInternal((WRITER), (KIND)))
776
777 /* Don't call this function directly, use the _PyUnicodeWriter_PrepareKind()
778 macro instead. */
779 PyAPI_FUNC(int)
780 _PyUnicodeWriter_PrepareKindInternal(_PyUnicodeWriter *writer,
781 enum PyUnicode_Kind kind);
782
783 /* Append a Unicode character.
784 Return 0 on success, raise an exception and return -1 on error. */
785 PyAPI_FUNC(int)
786 _PyUnicodeWriter_WriteChar(_PyUnicodeWriter *writer,
787 Py_UCS4 ch
788 );
789
790 /* Append a Unicode string.
791 Return 0 on success, raise an exception and return -1 on error. */
792 PyAPI_FUNC(int)
793 _PyUnicodeWriter_WriteStr(_PyUnicodeWriter *writer,
794 PyObject *str /* Unicode string */
795 );
796
797 /* Append a substring of a Unicode string.
798 Return 0 on success, raise an exception and return -1 on error. */
799 PyAPI_FUNC(int)
800 _PyUnicodeWriter_WriteSubstring(_PyUnicodeWriter *writer,
801 PyObject *str, /* Unicode string */
802 Py_ssize_t start,
803 Py_ssize_t end
804 );
805
806 /* Append an ASCII-encoded byte string.
807 Return 0 on success, raise an exception and return -1 on error. */
808 PyAPI_FUNC(int)
809 _PyUnicodeWriter_WriteASCIIString(_PyUnicodeWriter *writer,
810 const char *str, /* ASCII-encoded byte string */
811 Py_ssize_t len /* number of bytes, or -1 if unknown */
812 );
813
814 /* Append a latin1-encoded byte string.
815 Return 0 on success, raise an exception and return -1 on error. */
816 PyAPI_FUNC(int)
817 _PyUnicodeWriter_WriteLatin1String(_PyUnicodeWriter *writer,
818 const char *str, /* latin1-encoded byte string */
819 Py_ssize_t len /* length in bytes */
820 );
821
822 /* Get the value of the writer as a Unicode string. Clear the
823 buffer of the writer. Raise an exception and return NULL
824 on error. */
825 PyAPI_FUNC(PyObject *)
826 _PyUnicodeWriter_Finish(_PyUnicodeWriter *writer);
827
828 /* Deallocate memory of a writer (clear its internal buffer). */
829 PyAPI_FUNC(void)
830 _PyUnicodeWriter_Dealloc(_PyUnicodeWriter *writer);
831
832
833 /* Format the object based on the format_spec, as defined in PEP 3101
834 (Advanced String Formatting). */
835 PyAPI_FUNC(int) _PyUnicode_FormatAdvancedWriter(
836 _PyUnicodeWriter *writer,
837 PyObject *obj,
838 PyObject *format_spec,
839 Py_ssize_t start,
840 Py_ssize_t end);
841
842 /* --- Manage the default encoding ---------------------------------------- */
843
844 /* Returns a pointer to the default encoding (UTF-8) of the
845 Unicode object unicode.
846
847 Like PyUnicode_AsUTF8AndSize(), this also caches the UTF-8 representation
848 in the unicodeobject.
849
850 _PyUnicode_AsString is a #define for PyUnicode_AsUTF8 to
851 support the previous internal function with the same behaviour.
852
853 Use of this API is DEPRECATED since no size information can be
854 extracted from the returned data.
855 */
856
857 PyAPI_FUNC(const char *) PyUnicode_AsUTF8(PyObject *unicode);
858
859 #define _PyUnicode_AsString PyUnicode_AsUTF8
860
861 /* --- UTF-7 Codecs ------------------------------------------------------- */
862
863 PyAPI_FUNC(PyObject*) _PyUnicode_EncodeUTF7(
864 PyObject *unicode, /* Unicode object */
865 int base64SetO, /* Encode RFC2152 Set O characters in base64 */
866 int base64WhiteSpace, /* Encode whitespace (sp, ht, nl, cr) in base64 */
867 const char *errors /* error handling */
868 );
869
870 /* --- UTF-8 Codecs ------------------------------------------------------- */
871
872 PyAPI_FUNC(PyObject*) _PyUnicode_AsUTF8String(
873 PyObject *unicode,
874 const char *errors);
875
876 /* --- UTF-32 Codecs ------------------------------------------------------ */
877
878 PyAPI_FUNC(PyObject*) _PyUnicode_EncodeUTF32(
879 PyObject *object, /* Unicode object */
880 const char *errors, /* error handling */
881 int byteorder /* byteorder to use 0=BOM+native;-1=LE,1=BE */
882 );
883
884 /* --- UTF-16 Codecs ------------------------------------------------------ */
885
886 /* Returns a Python string object holding the UTF-16 encoded value of
887 the Unicode data.
888
889 If byteorder is not 0, output is written according to the following
890 byte order:
891
892 byteorder == -1: little endian
893 byteorder == 0: native byte order (writes a BOM mark)
894 byteorder == 1: big endian
895
896 If byteorder is 0, the output string will always start with the
897 Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is
898 prepended.
899 */
900 PyAPI_FUNC(PyObject*) _PyUnicode_EncodeUTF16(
901 PyObject* unicode, /* Unicode object */
902 const char *errors, /* error handling */
903 int byteorder /* byteorder to use 0=BOM+native;-1=LE,1=BE */
904 );
905
906 /* --- Unicode-Escape Codecs ---------------------------------------------- */
907
908 /* Variant of PyUnicode_DecodeUnicodeEscape that supports partial decoding. */
909 PyAPI_FUNC(PyObject*) _PyUnicode_DecodeUnicodeEscapeStateful(
910 const char *string, /* Unicode-Escape encoded string */
911 Py_ssize_t length, /* size of string */
912 const char *errors, /* error handling */
913 Py_ssize_t *consumed /* bytes consumed */
914 );
915 /* Helper for PyUnicode_DecodeUnicodeEscape that detects invalid escape
916 chars. */
917 PyAPI_FUNC(PyObject*) _PyUnicode_DecodeUnicodeEscapeInternal(
918 const char *string, /* Unicode-Escape encoded string */
919 Py_ssize_t length, /* size of string */
920 const char *errors, /* error handling */
921 Py_ssize_t *consumed, /* bytes consumed */
922 const char **first_invalid_escape /* on return, points to first
923 invalid escaped char in
924 string. */
925 );
926
927 /* --- Raw-Unicode-Escape Codecs ---------------------------------------------- */
928
929 /* Variant of PyUnicode_DecodeRawUnicodeEscape that supports partial decoding. */
930 PyAPI_FUNC(PyObject*) _PyUnicode_DecodeRawUnicodeEscapeStateful(
931 const char *string, /* Unicode-Escape encoded string */
932 Py_ssize_t length, /* size of string */
933 const char *errors, /* error handling */
934 Py_ssize_t *consumed /* bytes consumed */
935 );
936
937 /* --- Latin-1 Codecs ----------------------------------------------------- */
938
939 PyAPI_FUNC(PyObject*) _PyUnicode_AsLatin1String(
940 PyObject* unicode,
941 const char* errors);
942
943 /* --- ASCII Codecs ------------------------------------------------------- */
944
945 PyAPI_FUNC(PyObject*) _PyUnicode_AsASCIIString(
946 PyObject* unicode,
947 const char* errors);
948
949 /* --- Character Map Codecs ----------------------------------------------- */
950
951 /* Translate an Unicode object by applying a character mapping table to
952 it and return the resulting Unicode object.
953
954 The mapping table must map Unicode ordinal integers to Unicode strings,
955 Unicode ordinal integers or None (causing deletion of the character).
956
957 Mapping tables may be dictionaries or sequences. Unmapped character
958 ordinals (ones which cause a LookupError) are left untouched and
959 are copied as-is.
960 */
961 PyAPI_FUNC(PyObject*) _PyUnicode_EncodeCharmap(
962 PyObject *unicode, /* Unicode object */
963 PyObject *mapping, /* encoding mapping */
964 const char *errors /* error handling */
965 );
966
967 /* --- Decimal Encoder ---------------------------------------------------- */
968
969 /* Coverts a Unicode object holding a decimal value to an ASCII string
970 for using in int, float and complex parsers.
971 Transforms code points that have decimal digit property to the
972 corresponding ASCII digit code points. Transforms spaces to ASCII.
973 Transforms code points starting from the first non-ASCII code point that
974 is neither a decimal digit nor a space to the end into '?'. */
975
976 PyAPI_FUNC(PyObject*) _PyUnicode_TransformDecimalAndSpaceToASCII(
977 PyObject *unicode /* Unicode object */
978 );
979
980 /* --- Methods & Slots ---------------------------------------------------- */
981
982 PyAPI_FUNC(PyObject *) _PyUnicode_JoinArray(
983 PyObject *separator,
984 PyObject *const *items,
985 Py_ssize_t seqlen
986 );
987
988 /* Test whether a unicode is equal to ASCII identifier. Return 1 if true,
989 0 otherwise. The right argument must be ASCII identifier.
990 Any error occurs inside will be cleared before return. */
991 PyAPI_FUNC(int) _PyUnicode_EqualToASCIIId(
992 PyObject *left, /* Left string */
993 _Py_Identifier *right /* Right identifier */
994 );
995
996 /* Test whether a unicode is equal to ASCII string. Return 1 if true,
997 0 otherwise. The right argument must be ASCII-encoded string.
998 Any error occurs inside will be cleared before return. */
999 PyAPI_FUNC(int) _PyUnicode_EqualToASCIIString(
1000 PyObject *left,
1001 const char *right /* ASCII-encoded string */
1002 );
1003
1004 /* Externally visible for str.strip(unicode) */
1005 PyAPI_FUNC(PyObject *) _PyUnicode_XStrip(
1006 PyObject *self,
1007 int striptype,
1008 PyObject *sepobj
1009 );
1010
1011 /* Using explicit passed-in values, insert the thousands grouping
1012 into the string pointed to by buffer. For the argument descriptions,
1013 see Objects/stringlib/localeutil.h */
1014 PyAPI_FUNC(Py_ssize_t) _PyUnicode_InsertThousandsGrouping(
1015 _PyUnicodeWriter *writer,
1016 Py_ssize_t n_buffer,
1017 PyObject *digits,
1018 Py_ssize_t d_pos,
1019 Py_ssize_t n_digits,
1020 Py_ssize_t min_width,
1021 const char *grouping,
1022 PyObject *thousands_sep,
1023 Py_UCS4 *maxchar);
1024
1025 /* === Characters Type APIs =============================================== */
1026
1027 /* Helper array used by Py_UNICODE_ISSPACE(). */
1028
1029 PyAPI_DATA(const unsigned char) _Py_ascii_whitespace[];
1030
1031 /* These should not be used directly. Use the Py_UNICODE_IS* and
1032 Py_UNICODE_TO* macros instead.
1033
1034 These APIs are implemented in Objects/unicodectype.c.
1035
1036 */
1037
1038 PyAPI_FUNC(int) _PyUnicode_IsLowercase(
1039 Py_UCS4 ch /* Unicode character */
1040 );
1041
1042 PyAPI_FUNC(int) _PyUnicode_IsUppercase(
1043 Py_UCS4 ch /* Unicode character */
1044 );
1045
1046 PyAPI_FUNC(int) _PyUnicode_IsTitlecase(
1047 Py_UCS4 ch /* Unicode character */
1048 );
1049
1050 PyAPI_FUNC(int) _PyUnicode_IsXidStart(
1051 Py_UCS4 ch /* Unicode character */
1052 );
1053
1054 PyAPI_FUNC(int) _PyUnicode_IsXidContinue(
1055 Py_UCS4 ch /* Unicode character */
1056 );
1057
1058 PyAPI_FUNC(int) _PyUnicode_IsWhitespace(
1059 const Py_UCS4 ch /* Unicode character */
1060 );
1061
1062 PyAPI_FUNC(int) _PyUnicode_IsLinebreak(
1063 const Py_UCS4 ch /* Unicode character */
1064 );
1065
1066 /* Py_DEPRECATED(3.3) */ PyAPI_FUNC(Py_UCS4) _PyUnicode_ToLowercase(
1067 Py_UCS4 ch /* Unicode character */
1068 );
1069
1070 /* Py_DEPRECATED(3.3) */ PyAPI_FUNC(Py_UCS4) _PyUnicode_ToUppercase(
1071 Py_UCS4 ch /* Unicode character */
1072 );
1073
1074 Py_DEPRECATED(3.3) PyAPI_FUNC(Py_UCS4) _PyUnicode_ToTitlecase(
1075 Py_UCS4 ch /* Unicode character */
1076 );
1077
1078 PyAPI_FUNC(int) _PyUnicode_ToLowerFull(
1079 Py_UCS4 ch, /* Unicode character */
1080 Py_UCS4 *res
1081 );
1082
1083 PyAPI_FUNC(int) _PyUnicode_ToTitleFull(
1084 Py_UCS4 ch, /* Unicode character */
1085 Py_UCS4 *res
1086 );
1087
1088 PyAPI_FUNC(int) _PyUnicode_ToUpperFull(
1089 Py_UCS4 ch, /* Unicode character */
1090 Py_UCS4 *res
1091 );
1092
1093 PyAPI_FUNC(int) _PyUnicode_ToFoldedFull(
1094 Py_UCS4 ch, /* Unicode character */
1095 Py_UCS4 *res
1096 );
1097
1098 PyAPI_FUNC(int) _PyUnicode_IsCaseIgnorable(
1099 Py_UCS4 ch /* Unicode character */
1100 );
1101
1102 PyAPI_FUNC(int) _PyUnicode_IsCased(
1103 Py_UCS4 ch /* Unicode character */
1104 );
1105
1106 PyAPI_FUNC(int) _PyUnicode_ToDecimalDigit(
1107 Py_UCS4 ch /* Unicode character */
1108 );
1109
1110 PyAPI_FUNC(int) _PyUnicode_ToDigit(
1111 Py_UCS4 ch /* Unicode character */
1112 );
1113
1114 PyAPI_FUNC(double) _PyUnicode_ToNumeric(
1115 Py_UCS4 ch /* Unicode character */
1116 );
1117
1118 PyAPI_FUNC(int) _PyUnicode_IsDecimalDigit(
1119 Py_UCS4 ch /* Unicode character */
1120 );
1121
1122 PyAPI_FUNC(int) _PyUnicode_IsDigit(
1123 Py_UCS4 ch /* Unicode character */
1124 );
1125
1126 PyAPI_FUNC(int) _PyUnicode_IsNumeric(
1127 Py_UCS4 ch /* Unicode character */
1128 );
1129
1130 PyAPI_FUNC(int) _PyUnicode_IsPrintable(
1131 Py_UCS4 ch /* Unicode character */
1132 );
1133
1134 PyAPI_FUNC(int) _PyUnicode_IsAlpha(
1135 Py_UCS4 ch /* Unicode character */
1136 );
1137
1138 PyAPI_FUNC(PyObject*) _PyUnicode_FormatLong(PyObject *, int, int, int);
1139
1140 /* Return an interned Unicode object for an Identifier; may fail if there is no memory.*/
1141 PyAPI_FUNC(PyObject*) _PyUnicode_FromId(_Py_Identifier*);
1142
1143 /* Fast equality check when the inputs are known to be exact unicode types
1144 and where the hash values are equal (i.e. a very probable match) */
1145 PyAPI_FUNC(int) _PyUnicode_EQ(PyObject *, PyObject *);
1146
1147 /* Equality check. Returns -1 on failure. */
1148 PyAPI_FUNC(int) _PyUnicode_Equal(PyObject *, PyObject *);
1149
1150 PyAPI_FUNC(int) _PyUnicode_WideCharString_Converter(PyObject *, void *);
1151 PyAPI_FUNC(int) _PyUnicode_WideCharString_Opt_Converter(PyObject *, void *);
1152
1153 PyAPI_FUNC(Py_ssize_t) _PyUnicode_ScanIdentifier(PyObject *);
1154