xref: /aosp_15_r20/prebuilts/build-tools/common/py3-stdlib/string.py (revision cda5da8d549138a6648c5ee6d7a49cf8f4a657be)
1"""A collection of string constants.
2
3Public module variables:
4
5whitespace -- a string containing all ASCII whitespace
6ascii_lowercase -- a string containing all ASCII lowercase letters
7ascii_uppercase -- a string containing all ASCII uppercase letters
8ascii_letters -- a string containing all ASCII letters
9digits -- a string containing all ASCII decimal digits
10hexdigits -- a string containing all ASCII hexadecimal digits
11octdigits -- a string containing all ASCII octal digits
12punctuation -- a string containing all ASCII punctuation characters
13printable -- a string containing all ASCII characters considered printable
14
15"""
16
17__all__ = ["ascii_letters", "ascii_lowercase", "ascii_uppercase", "capwords",
18           "digits", "hexdigits", "octdigits", "printable", "punctuation",
19           "whitespace", "Formatter", "Template"]
20
21import _string
22
23# Some strings for ctype-style character classification
24whitespace = ' \t\n\r\v\f'
25ascii_lowercase = 'abcdefghijklmnopqrstuvwxyz'
26ascii_uppercase = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
27ascii_letters = ascii_lowercase + ascii_uppercase
28digits = '0123456789'
29hexdigits = digits + 'abcdef' + 'ABCDEF'
30octdigits = '01234567'
31punctuation = r"""!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~"""
32printable = digits + ascii_letters + punctuation + whitespace
33
34# Functions which aren't available as string methods.
35
36# Capitalize the words in a string, e.g. " aBc  dEf " -> "Abc Def".
37def capwords(s, sep=None):
38    """capwords(s [,sep]) -> string
39
40    Split the argument into words using split, capitalize each
41    word using capitalize, and join the capitalized words using
42    join.  If the optional second argument sep is absent or None,
43    runs of whitespace characters are replaced by a single space
44    and leading and trailing whitespace are removed, otherwise
45    sep is used to split and join the words.
46
47    """
48    return (sep or ' ').join(map(str.capitalize, s.split(sep)))
49
50
51####################################################################
52import re as _re
53from collections import ChainMap as _ChainMap
54
55_sentinel_dict = {}
56
57class Template:
58    """A string class for supporting $-substitutions."""
59
60    delimiter = '$'
61    # r'[a-z]' matches to non-ASCII letters when used with IGNORECASE, but
62    # without the ASCII flag.  We can't add re.ASCII to flags because of
63    # backward compatibility.  So we use the ?a local flag and [a-z] pattern.
64    # See https://bugs.python.org/issue31672
65    idpattern = r'(?a:[_a-z][_a-z0-9]*)'
66    braceidpattern = None
67    flags = _re.IGNORECASE
68
69    def __init_subclass__(cls):
70        super().__init_subclass__()
71        if 'pattern' in cls.__dict__:
72            pattern = cls.pattern
73        else:
74            delim = _re.escape(cls.delimiter)
75            id = cls.idpattern
76            bid = cls.braceidpattern or cls.idpattern
77            pattern = fr"""
78            {delim}(?:
79              (?P<escaped>{delim})  |   # Escape sequence of two delimiters
80              (?P<named>{id})       |   # delimiter and a Python identifier
81              {{(?P<braced>{bid})}} |   # delimiter and a braced identifier
82              (?P<invalid>)             # Other ill-formed delimiter exprs
83            )
84            """
85        cls.pattern = _re.compile(pattern, cls.flags | _re.VERBOSE)
86
87    def __init__(self, template):
88        self.template = template
89
90    # Search for $$, $identifier, ${identifier}, and any bare $'s
91
92    def _invalid(self, mo):
93        i = mo.start('invalid')
94        lines = self.template[:i].splitlines(keepends=True)
95        if not lines:
96            colno = 1
97            lineno = 1
98        else:
99            colno = i - len(''.join(lines[:-1]))
100            lineno = len(lines)
101        raise ValueError('Invalid placeholder in string: line %d, col %d' %
102                         (lineno, colno))
103
104    def substitute(self, mapping=_sentinel_dict, /, **kws):
105        if mapping is _sentinel_dict:
106            mapping = kws
107        elif kws:
108            mapping = _ChainMap(kws, mapping)
109        # Helper function for .sub()
110        def convert(mo):
111            # Check the most common path first.
112            named = mo.group('named') or mo.group('braced')
113            if named is not None:
114                return str(mapping[named])
115            if mo.group('escaped') is not None:
116                return self.delimiter
117            if mo.group('invalid') is not None:
118                self._invalid(mo)
119            raise ValueError('Unrecognized named group in pattern',
120                             self.pattern)
121        return self.pattern.sub(convert, self.template)
122
123    def safe_substitute(self, mapping=_sentinel_dict, /, **kws):
124        if mapping is _sentinel_dict:
125            mapping = kws
126        elif kws:
127            mapping = _ChainMap(kws, mapping)
128        # Helper function for .sub()
129        def convert(mo):
130            named = mo.group('named') or mo.group('braced')
131            if named is not None:
132                try:
133                    return str(mapping[named])
134                except KeyError:
135                    return mo.group()
136            if mo.group('escaped') is not None:
137                return self.delimiter
138            if mo.group('invalid') is not None:
139                return mo.group()
140            raise ValueError('Unrecognized named group in pattern',
141                             self.pattern)
142        return self.pattern.sub(convert, self.template)
143
144    def is_valid(self):
145        for mo in self.pattern.finditer(self.template):
146            if mo.group('invalid') is not None:
147                return False
148            if (mo.group('named') is None
149                and mo.group('braced') is None
150                and mo.group('escaped') is None):
151                # If all the groups are None, there must be
152                # another group we're not expecting
153                raise ValueError('Unrecognized named group in pattern',
154                    self.pattern)
155        return True
156
157    def get_identifiers(self):
158        ids = []
159        for mo in self.pattern.finditer(self.template):
160            named = mo.group('named') or mo.group('braced')
161            if named is not None and named not in ids:
162                # add a named group only the first time it appears
163                ids.append(named)
164            elif (named is None
165                and mo.group('invalid') is None
166                and mo.group('escaped') is None):
167                # If all the groups are None, there must be
168                # another group we're not expecting
169                raise ValueError('Unrecognized named group in pattern',
170                    self.pattern)
171        return ids
172
173# Initialize Template.pattern.  __init_subclass__() is automatically called
174# only for subclasses, not for the Template class itself.
175Template.__init_subclass__()
176
177
178########################################################################
179# the Formatter class
180# see PEP 3101 for details and purpose of this class
181
182# The hard parts are reused from the C implementation.  They're exposed as "_"
183# prefixed methods of str.
184
185# The overall parser is implemented in _string.formatter_parser.
186# The field name parser is implemented in _string.formatter_field_name_split
187
188class Formatter:
189    def format(self, format_string, /, *args, **kwargs):
190        return self.vformat(format_string, args, kwargs)
191
192    def vformat(self, format_string, args, kwargs):
193        used_args = set()
194        result, _ = self._vformat(format_string, args, kwargs, used_args, 2)
195        self.check_unused_args(used_args, args, kwargs)
196        return result
197
198    def _vformat(self, format_string, args, kwargs, used_args, recursion_depth,
199                 auto_arg_index=0):
200        if recursion_depth < 0:
201            raise ValueError('Max string recursion exceeded')
202        result = []
203        for literal_text, field_name, format_spec, conversion in \
204                self.parse(format_string):
205
206            # output the literal text
207            if literal_text:
208                result.append(literal_text)
209
210            # if there's a field, output it
211            if field_name is not None:
212                # this is some markup, find the object and do
213                #  the formatting
214
215                # handle arg indexing when empty field_names are given.
216                if field_name == '':
217                    if auto_arg_index is False:
218                        raise ValueError('cannot switch from manual field '
219                                         'specification to automatic field '
220                                         'numbering')
221                    field_name = str(auto_arg_index)
222                    auto_arg_index += 1
223                elif field_name.isdigit():
224                    if auto_arg_index:
225                        raise ValueError('cannot switch from manual field '
226                                         'specification to automatic field '
227                                         'numbering')
228                    # disable auto arg incrementing, if it gets
229                    # used later on, then an exception will be raised
230                    auto_arg_index = False
231
232                # given the field_name, find the object it references
233                #  and the argument it came from
234                obj, arg_used = self.get_field(field_name, args, kwargs)
235                used_args.add(arg_used)
236
237                # do any conversion on the resulting object
238                obj = self.convert_field(obj, conversion)
239
240                # expand the format spec, if needed
241                format_spec, auto_arg_index = self._vformat(
242                    format_spec, args, kwargs,
243                    used_args, recursion_depth-1,
244                    auto_arg_index=auto_arg_index)
245
246                # format the object and append to the result
247                result.append(self.format_field(obj, format_spec))
248
249        return ''.join(result), auto_arg_index
250
251
252    def get_value(self, key, args, kwargs):
253        if isinstance(key, int):
254            return args[key]
255        else:
256            return kwargs[key]
257
258
259    def check_unused_args(self, used_args, args, kwargs):
260        pass
261
262
263    def format_field(self, value, format_spec):
264        return format(value, format_spec)
265
266
267    def convert_field(self, value, conversion):
268        # do any conversion on the resulting object
269        if conversion is None:
270            return value
271        elif conversion == 's':
272            return str(value)
273        elif conversion == 'r':
274            return repr(value)
275        elif conversion == 'a':
276            return ascii(value)
277        raise ValueError("Unknown conversion specifier {0!s}".format(conversion))
278
279
280    # returns an iterable that contains tuples of the form:
281    # (literal_text, field_name, format_spec, conversion)
282    # literal_text can be zero length
283    # field_name can be None, in which case there's no
284    #  object to format and output
285    # if field_name is not None, it is looked up, formatted
286    #  with format_spec and conversion and then used
287    def parse(self, format_string):
288        return _string.formatter_parser(format_string)
289
290
291    # given a field_name, find the object it references.
292    #  field_name:   the field being looked up, e.g. "0.name"
293    #                 or "lookup[3]"
294    #  used_args:    a set of which args have been used
295    #  args, kwargs: as passed in to vformat
296    def get_field(self, field_name, args, kwargs):
297        first, rest = _string.formatter_field_name_split(field_name)
298
299        obj = self.get_value(first, args, kwargs)
300
301        # loop through the rest of the field_name, doing
302        #  getattr or getitem as needed
303        for is_attr, i in rest:
304            if is_attr:
305                obj = getattr(obj, i)
306            else:
307                obj = obj[i]
308
309        return obj, first
310