xref: /aosp_15_r20/prebuilts/build-tools/common/py3-stdlib/re/_constants.py (revision cda5da8d549138a6648c5ee6d7a49cf8f4a657be)
1#
2# Secret Labs' Regular Expression Engine
3#
4# various symbols used by the regular expression engine.
5# run this script to update the _sre include files!
6#
7# Copyright (c) 1998-2001 by Secret Labs AB.  All rights reserved.
8#
9# See the __init__.py file for information on usage and redistribution.
10#
11
12"""Internal support module for sre"""
13
14# update when constants are added or removed
15
16MAGIC = 20220615
17
18from _sre import MAXREPEAT, MAXGROUPS
19
20# SRE standard exception (access as sre.error)
21# should this really be here?
22
23class error(Exception):
24    """Exception raised for invalid regular expressions.
25
26    Attributes:
27
28        msg: The unformatted error message
29        pattern: The regular expression pattern
30        pos: The index in the pattern where compilation failed (may be None)
31        lineno: The line corresponding to pos (may be None)
32        colno: The column corresponding to pos (may be None)
33    """
34
35    __module__ = 're'
36
37    def __init__(self, msg, pattern=None, pos=None):
38        self.msg = msg
39        self.pattern = pattern
40        self.pos = pos
41        if pattern is not None and pos is not None:
42            msg = '%s at position %d' % (msg, pos)
43            if isinstance(pattern, str):
44                newline = '\n'
45            else:
46                newline = b'\n'
47            self.lineno = pattern.count(newline, 0, pos) + 1
48            self.colno = pos - pattern.rfind(newline, 0, pos)
49            if newline in pattern:
50                msg = '%s (line %d, column %d)' % (msg, self.lineno, self.colno)
51        else:
52            self.lineno = self.colno = None
53        super().__init__(msg)
54
55
56class _NamedIntConstant(int):
57    def __new__(cls, value, name):
58        self = super(_NamedIntConstant, cls).__new__(cls, value)
59        self.name = name
60        return self
61
62    def __repr__(self):
63        return self.name
64
65    __reduce__ = None
66
67MAXREPEAT = _NamedIntConstant(MAXREPEAT, 'MAXREPEAT')
68
69def _makecodes(*names):
70    items = [_NamedIntConstant(i, name) for i, name in enumerate(names)]
71    globals().update({item.name: item for item in items})
72    return items
73
74# operators
75OPCODES = _makecodes(
76    # failure=0 success=1 (just because it looks better that way :-)
77    'FAILURE', 'SUCCESS',
78
79    'ANY', 'ANY_ALL',
80    'ASSERT', 'ASSERT_NOT',
81    'AT',
82    'BRANCH',
83    'CATEGORY',
84    'CHARSET', 'BIGCHARSET',
85    'GROUPREF', 'GROUPREF_EXISTS',
86    'IN',
87    'INFO',
88    'JUMP',
89    'LITERAL',
90    'MARK',
91    'MAX_UNTIL',
92    'MIN_UNTIL',
93    'NOT_LITERAL',
94    'NEGATE',
95    'RANGE',
96    'REPEAT',
97    'REPEAT_ONE',
98    'SUBPATTERN',
99    'MIN_REPEAT_ONE',
100    'ATOMIC_GROUP',
101    'POSSESSIVE_REPEAT',
102    'POSSESSIVE_REPEAT_ONE',
103
104    'GROUPREF_IGNORE',
105    'IN_IGNORE',
106    'LITERAL_IGNORE',
107    'NOT_LITERAL_IGNORE',
108
109    'GROUPREF_LOC_IGNORE',
110    'IN_LOC_IGNORE',
111    'LITERAL_LOC_IGNORE',
112    'NOT_LITERAL_LOC_IGNORE',
113
114    'GROUPREF_UNI_IGNORE',
115    'IN_UNI_IGNORE',
116    'LITERAL_UNI_IGNORE',
117    'NOT_LITERAL_UNI_IGNORE',
118    'RANGE_UNI_IGNORE',
119
120    # The following opcodes are only occurred in the parser output,
121    # but not in the compiled code.
122    'MIN_REPEAT', 'MAX_REPEAT',
123)
124del OPCODES[-2:] # remove MIN_REPEAT and MAX_REPEAT
125
126# positions
127ATCODES = _makecodes(
128    'AT_BEGINNING', 'AT_BEGINNING_LINE', 'AT_BEGINNING_STRING',
129    'AT_BOUNDARY', 'AT_NON_BOUNDARY',
130    'AT_END', 'AT_END_LINE', 'AT_END_STRING',
131
132    'AT_LOC_BOUNDARY', 'AT_LOC_NON_BOUNDARY',
133
134    'AT_UNI_BOUNDARY', 'AT_UNI_NON_BOUNDARY',
135)
136
137# categories
138CHCODES = _makecodes(
139    'CATEGORY_DIGIT', 'CATEGORY_NOT_DIGIT',
140    'CATEGORY_SPACE', 'CATEGORY_NOT_SPACE',
141    'CATEGORY_WORD', 'CATEGORY_NOT_WORD',
142    'CATEGORY_LINEBREAK', 'CATEGORY_NOT_LINEBREAK',
143
144    'CATEGORY_LOC_WORD', 'CATEGORY_LOC_NOT_WORD',
145
146    'CATEGORY_UNI_DIGIT', 'CATEGORY_UNI_NOT_DIGIT',
147    'CATEGORY_UNI_SPACE', 'CATEGORY_UNI_NOT_SPACE',
148    'CATEGORY_UNI_WORD', 'CATEGORY_UNI_NOT_WORD',
149    'CATEGORY_UNI_LINEBREAK', 'CATEGORY_UNI_NOT_LINEBREAK',
150)
151
152
153# replacement operations for "ignore case" mode
154OP_IGNORE = {
155    LITERAL: LITERAL_IGNORE,
156    NOT_LITERAL: NOT_LITERAL_IGNORE,
157}
158
159OP_LOCALE_IGNORE = {
160    LITERAL: LITERAL_LOC_IGNORE,
161    NOT_LITERAL: NOT_LITERAL_LOC_IGNORE,
162}
163
164OP_UNICODE_IGNORE = {
165    LITERAL: LITERAL_UNI_IGNORE,
166    NOT_LITERAL: NOT_LITERAL_UNI_IGNORE,
167}
168
169AT_MULTILINE = {
170    AT_BEGINNING: AT_BEGINNING_LINE,
171    AT_END: AT_END_LINE
172}
173
174AT_LOCALE = {
175    AT_BOUNDARY: AT_LOC_BOUNDARY,
176    AT_NON_BOUNDARY: AT_LOC_NON_BOUNDARY
177}
178
179AT_UNICODE = {
180    AT_BOUNDARY: AT_UNI_BOUNDARY,
181    AT_NON_BOUNDARY: AT_UNI_NON_BOUNDARY
182}
183
184CH_LOCALE = {
185    CATEGORY_DIGIT: CATEGORY_DIGIT,
186    CATEGORY_NOT_DIGIT: CATEGORY_NOT_DIGIT,
187    CATEGORY_SPACE: CATEGORY_SPACE,
188    CATEGORY_NOT_SPACE: CATEGORY_NOT_SPACE,
189    CATEGORY_WORD: CATEGORY_LOC_WORD,
190    CATEGORY_NOT_WORD: CATEGORY_LOC_NOT_WORD,
191    CATEGORY_LINEBREAK: CATEGORY_LINEBREAK,
192    CATEGORY_NOT_LINEBREAK: CATEGORY_NOT_LINEBREAK
193}
194
195CH_UNICODE = {
196    CATEGORY_DIGIT: CATEGORY_UNI_DIGIT,
197    CATEGORY_NOT_DIGIT: CATEGORY_UNI_NOT_DIGIT,
198    CATEGORY_SPACE: CATEGORY_UNI_SPACE,
199    CATEGORY_NOT_SPACE: CATEGORY_UNI_NOT_SPACE,
200    CATEGORY_WORD: CATEGORY_UNI_WORD,
201    CATEGORY_NOT_WORD: CATEGORY_UNI_NOT_WORD,
202    CATEGORY_LINEBREAK: CATEGORY_UNI_LINEBREAK,
203    CATEGORY_NOT_LINEBREAK: CATEGORY_UNI_NOT_LINEBREAK
204}
205
206# flags
207SRE_FLAG_TEMPLATE = 1 # template mode (unknown purpose, deprecated)
208SRE_FLAG_IGNORECASE = 2 # case insensitive
209SRE_FLAG_LOCALE = 4 # honour system locale
210SRE_FLAG_MULTILINE = 8 # treat target as multiline string
211SRE_FLAG_DOTALL = 16 # treat target as a single string
212SRE_FLAG_UNICODE = 32 # use unicode "locale"
213SRE_FLAG_VERBOSE = 64 # ignore whitespace and comments
214SRE_FLAG_DEBUG = 128 # debugging
215SRE_FLAG_ASCII = 256 # use ascii "locale"
216
217# flags for INFO primitive
218SRE_INFO_PREFIX = 1 # has prefix
219SRE_INFO_LITERAL = 2 # entire pattern is literal (given by prefix)
220SRE_INFO_CHARSET = 4 # pattern starts with character from given set
221