1"""
2Common tests shared by test_unicode, test_userstring and test_bytes.
3"""
4
5import unittest, string, sys, struct
6from test import support
7from test.support import import_helper
8from collections import UserList
9import random
10
11class Sequence:
12    def __init__(self, seq='wxyz'): self.seq = seq
13    def __len__(self): return len(self.seq)
14    def __getitem__(self, i): return self.seq[i]
15
16class BadSeq1(Sequence):
17    def __init__(self): self.seq = [7, 'hello', 123]
18    def __str__(self): return '{0} {1} {2}'.format(*self.seq)
19
20class BadSeq2(Sequence):
21    def __init__(self): self.seq = ['a', 'b', 'c']
22    def __len__(self): return 8
23
24class BaseTest:
25    # These tests are for buffers of values (bytes) and not
26    # specific to character interpretation, used for bytes objects
27    # and various string implementations
28
29    # The type to be tested
30    # Change in subclasses to change the behaviour of fixtesttype()
31    type2test = None
32
33    # Whether the "contained items" of the container are integers in
34    # range(0, 256) (i.e. bytes, bytearray) or strings of length 1
35    # (str)
36    contains_bytes = False
37
38    # All tests pass their arguments to the testing methods
39    # as str objects. fixtesttype() can be used to propagate
40    # these arguments to the appropriate type
41    def fixtype(self, obj):
42        if isinstance(obj, str):
43            return self.__class__.type2test(obj)
44        elif isinstance(obj, list):
45            return [self.fixtype(x) for x in obj]
46        elif isinstance(obj, tuple):
47            return tuple([self.fixtype(x) for x in obj])
48        elif isinstance(obj, dict):
49            return dict([
50               (self.fixtype(key), self.fixtype(value))
51               for (key, value) in obj.items()
52            ])
53        else:
54            return obj
55
56    def test_fixtype(self):
57        self.assertIs(type(self.fixtype("123")), self.type2test)
58
59    # check that obj.method(*args) returns result
60    def checkequal(self, result, obj, methodname, *args, **kwargs):
61        result = self.fixtype(result)
62        obj = self.fixtype(obj)
63        args = self.fixtype(args)
64        kwargs = {k: self.fixtype(v) for k,v in kwargs.items()}
65        realresult = getattr(obj, methodname)(*args, **kwargs)
66        self.assertEqual(
67            result,
68            realresult
69        )
70        # if the original is returned make sure that
71        # this doesn't happen with subclasses
72        if obj is realresult:
73            try:
74                class subtype(self.__class__.type2test):
75                    pass
76            except TypeError:
77                pass  # Skip this if we can't subclass
78            else:
79                obj = subtype(obj)
80                realresult = getattr(obj, methodname)(*args)
81                self.assertIsNot(obj, realresult)
82
83    # check that obj.method(*args) raises exc
84    def checkraises(self, exc, obj, methodname, *args, expected_msg=None):
85        obj = self.fixtype(obj)
86        args = self.fixtype(args)
87        with self.assertRaises(exc) as cm:
88            getattr(obj, methodname)(*args)
89        self.assertNotEqual(str(cm.exception), '')
90        if expected_msg is not None:
91            self.assertEqual(str(cm.exception), expected_msg)
92
93    # call obj.method(*args) without any checks
94    def checkcall(self, obj, methodname, *args):
95        obj = self.fixtype(obj)
96        args = self.fixtype(args)
97        getattr(obj, methodname)(*args)
98
99    def test_count(self):
100        self.checkequal(3, 'aaa', 'count', 'a')
101        self.checkequal(0, 'aaa', 'count', 'b')
102        self.checkequal(3, 'aaa', 'count', 'a')
103        self.checkequal(0, 'aaa', 'count', 'b')
104        self.checkequal(3, 'aaa', 'count', 'a')
105        self.checkequal(0, 'aaa', 'count', 'b')
106        self.checkequal(0, 'aaa', 'count', 'b')
107        self.checkequal(2, 'aaa', 'count', 'a', 1)
108        self.checkequal(0, 'aaa', 'count', 'a', 10)
109        self.checkequal(1, 'aaa', 'count', 'a', -1)
110        self.checkequal(3, 'aaa', 'count', 'a', -10)
111        self.checkequal(1, 'aaa', 'count', 'a', 0, 1)
112        self.checkequal(3, 'aaa', 'count', 'a', 0, 10)
113        self.checkequal(2, 'aaa', 'count', 'a', 0, -1)
114        self.checkequal(0, 'aaa', 'count', 'a', 0, -10)
115        self.checkequal(3, 'aaa', 'count', '', 1)
116        self.checkequal(1, 'aaa', 'count', '', 3)
117        self.checkequal(0, 'aaa', 'count', '', 10)
118        self.checkequal(2, 'aaa', 'count', '', -1)
119        self.checkequal(4, 'aaa', 'count', '', -10)
120
121        self.checkequal(1, '', 'count', '')
122        self.checkequal(0, '', 'count', '', 1, 1)
123        self.checkequal(0, '', 'count', '', sys.maxsize, 0)
124
125        self.checkequal(0, '', 'count', 'xx')
126        self.checkequal(0, '', 'count', 'xx', 1, 1)
127        self.checkequal(0, '', 'count', 'xx', sys.maxsize, 0)
128
129        self.checkraises(TypeError, 'hello', 'count')
130
131        if self.contains_bytes:
132            self.checkequal(0, 'hello', 'count', 42)
133        else:
134            self.checkraises(TypeError, 'hello', 'count', 42)
135
136        # For a variety of combinations,
137        #    verify that str.count() matches an equivalent function
138        #    replacing all occurrences and then differencing the string lengths
139        charset = ['', 'a', 'b']
140        digits = 7
141        base = len(charset)
142        teststrings = set()
143        for i in range(base ** digits):
144            entry = []
145            for j in range(digits):
146                i, m = divmod(i, base)
147                entry.append(charset[m])
148            teststrings.add(''.join(entry))
149        teststrings = [self.fixtype(ts) for ts in teststrings]
150        for i in teststrings:
151            n = len(i)
152            for j in teststrings:
153                r1 = i.count(j)
154                if j:
155                    r2, rem = divmod(n - len(i.replace(j, self.fixtype(''))),
156                                     len(j))
157                else:
158                    r2, rem = len(i)+1, 0
159                if rem or r1 != r2:
160                    self.assertEqual(rem, 0, '%s != 0 for %s' % (rem, i))
161                    self.assertEqual(r1, r2, '%s != %s for %s' % (r1, r2, i))
162
163    def test_find(self):
164        self.checkequal(0, 'abcdefghiabc', 'find', 'abc')
165        self.checkequal(9, 'abcdefghiabc', 'find', 'abc', 1)
166        self.checkequal(-1, 'abcdefghiabc', 'find', 'def', 4)
167
168        self.checkequal(0, 'abc', 'find', '', 0)
169        self.checkequal(3, 'abc', 'find', '', 3)
170        self.checkequal(-1, 'abc', 'find', '', 4)
171
172        # to check the ability to pass None as defaults
173        self.checkequal( 2, 'rrarrrrrrrrra', 'find', 'a')
174        self.checkequal(12, 'rrarrrrrrrrra', 'find', 'a', 4)
175        self.checkequal(-1, 'rrarrrrrrrrra', 'find', 'a', 4, 6)
176        self.checkequal(12, 'rrarrrrrrrrra', 'find', 'a', 4, None)
177        self.checkequal( 2, 'rrarrrrrrrrra', 'find', 'a', None, 6)
178
179        self.checkraises(TypeError, 'hello', 'find')
180
181        if self.contains_bytes:
182            self.checkequal(-1, 'hello', 'find', 42)
183        else:
184            self.checkraises(TypeError, 'hello', 'find', 42)
185
186        self.checkequal(0, '', 'find', '')
187        self.checkequal(-1, '', 'find', '', 1, 1)
188        self.checkequal(-1, '', 'find', '', sys.maxsize, 0)
189
190        self.checkequal(-1, '', 'find', 'xx')
191        self.checkequal(-1, '', 'find', 'xx', 1, 1)
192        self.checkequal(-1, '', 'find', 'xx', sys.maxsize, 0)
193
194        # issue 7458
195        self.checkequal(-1, 'ab', 'find', 'xxx', sys.maxsize + 1, 0)
196
197        # For a variety of combinations,
198        #    verify that str.find() matches __contains__
199        #    and that the found substring is really at that location
200        charset = ['', 'a', 'b', 'c']
201        digits = 5
202        base = len(charset)
203        teststrings = set()
204        for i in range(base ** digits):
205            entry = []
206            for j in range(digits):
207                i, m = divmod(i, base)
208                entry.append(charset[m])
209            teststrings.add(''.join(entry))
210        teststrings = [self.fixtype(ts) for ts in teststrings]
211        for i in teststrings:
212            for j in teststrings:
213                loc = i.find(j)
214                r1 = (loc != -1)
215                r2 = j in i
216                self.assertEqual(r1, r2)
217                if loc != -1:
218                    self.assertEqual(i[loc:loc+len(j)], j)
219
220    def test_rfind(self):
221        self.checkequal(9,  'abcdefghiabc', 'rfind', 'abc')
222        self.checkequal(12, 'abcdefghiabc', 'rfind', '')
223        self.checkequal(0, 'abcdefghiabc', 'rfind', 'abcd')
224        self.checkequal(-1, 'abcdefghiabc', 'rfind', 'abcz')
225
226        self.checkequal(3, 'abc', 'rfind', '', 0)
227        self.checkequal(3, 'abc', 'rfind', '', 3)
228        self.checkequal(-1, 'abc', 'rfind', '', 4)
229
230        # to check the ability to pass None as defaults
231        self.checkequal(12, 'rrarrrrrrrrra', 'rfind', 'a')
232        self.checkequal(12, 'rrarrrrrrrrra', 'rfind', 'a', 4)
233        self.checkequal(-1, 'rrarrrrrrrrra', 'rfind', 'a', 4, 6)
234        self.checkequal(12, 'rrarrrrrrrrra', 'rfind', 'a', 4, None)
235        self.checkequal( 2, 'rrarrrrrrrrra', 'rfind', 'a', None, 6)
236
237        self.checkraises(TypeError, 'hello', 'rfind')
238
239        if self.contains_bytes:
240            self.checkequal(-1, 'hello', 'rfind', 42)
241        else:
242            self.checkraises(TypeError, 'hello', 'rfind', 42)
243
244        # For a variety of combinations,
245        #    verify that str.rfind() matches __contains__
246        #    and that the found substring is really at that location
247        charset = ['', 'a', 'b', 'c']
248        digits = 5
249        base = len(charset)
250        teststrings = set()
251        for i in range(base ** digits):
252            entry = []
253            for j in range(digits):
254                i, m = divmod(i, base)
255                entry.append(charset[m])
256            teststrings.add(''.join(entry))
257        teststrings = [self.fixtype(ts) for ts in teststrings]
258        for i in teststrings:
259            for j in teststrings:
260                loc = i.rfind(j)
261                r1 = (loc != -1)
262                r2 = j in i
263                self.assertEqual(r1, r2)
264                if loc != -1:
265                    self.assertEqual(i[loc:loc+len(j)], j)
266
267        # issue 7458
268        self.checkequal(-1, 'ab', 'rfind', 'xxx', sys.maxsize + 1, 0)
269
270        # issue #15534
271        self.checkequal(0, '<......\u043c...', "rfind", "<")
272
273    def test_index(self):
274        self.checkequal(0, 'abcdefghiabc', 'index', '')
275        self.checkequal(3, 'abcdefghiabc', 'index', 'def')
276        self.checkequal(0, 'abcdefghiabc', 'index', 'abc')
277        self.checkequal(9, 'abcdefghiabc', 'index', 'abc', 1)
278
279        self.checkraises(ValueError, 'abcdefghiabc', 'index', 'hib')
280        self.checkraises(ValueError, 'abcdefghiab', 'index', 'abc', 1)
281        self.checkraises(ValueError, 'abcdefghi', 'index', 'ghi', 8)
282        self.checkraises(ValueError, 'abcdefghi', 'index', 'ghi', -1)
283
284        # to check the ability to pass None as defaults
285        self.checkequal( 2, 'rrarrrrrrrrra', 'index', 'a')
286        self.checkequal(12, 'rrarrrrrrrrra', 'index', 'a', 4)
287        self.checkraises(ValueError, 'rrarrrrrrrrra', 'index', 'a', 4, 6)
288        self.checkequal(12, 'rrarrrrrrrrra', 'index', 'a', 4, None)
289        self.checkequal( 2, 'rrarrrrrrrrra', 'index', 'a', None, 6)
290
291        self.checkraises(TypeError, 'hello', 'index')
292
293        if self.contains_bytes:
294            self.checkraises(ValueError, 'hello', 'index', 42)
295        else:
296            self.checkraises(TypeError, 'hello', 'index', 42)
297
298    def test_rindex(self):
299        self.checkequal(12, 'abcdefghiabc', 'rindex', '')
300        self.checkequal(3,  'abcdefghiabc', 'rindex', 'def')
301        self.checkequal(9,  'abcdefghiabc', 'rindex', 'abc')
302        self.checkequal(0,  'abcdefghiabc', 'rindex', 'abc', 0, -1)
303
304        self.checkraises(ValueError, 'abcdefghiabc', 'rindex', 'hib')
305        self.checkraises(ValueError, 'defghiabc', 'rindex', 'def', 1)
306        self.checkraises(ValueError, 'defghiabc', 'rindex', 'abc', 0, -1)
307        self.checkraises(ValueError, 'abcdefghi', 'rindex', 'ghi', 0, 8)
308        self.checkraises(ValueError, 'abcdefghi', 'rindex', 'ghi', 0, -1)
309
310        # to check the ability to pass None as defaults
311        self.checkequal(12, 'rrarrrrrrrrra', 'rindex', 'a')
312        self.checkequal(12, 'rrarrrrrrrrra', 'rindex', 'a', 4)
313        self.checkraises(ValueError, 'rrarrrrrrrrra', 'rindex', 'a', 4, 6)
314        self.checkequal(12, 'rrarrrrrrrrra', 'rindex', 'a', 4, None)
315        self.checkequal( 2, 'rrarrrrrrrrra', 'rindex', 'a', None, 6)
316
317        self.checkraises(TypeError, 'hello', 'rindex')
318
319        if self.contains_bytes:
320            self.checkraises(ValueError, 'hello', 'rindex', 42)
321        else:
322            self.checkraises(TypeError, 'hello', 'rindex', 42)
323
324    def test_find_periodic_pattern(self):
325        """Cover the special path for periodic patterns."""
326        def reference_find(p, s):
327            for i in range(len(s)):
328                if s.startswith(p, i):
329                    return i
330            return -1
331
332        rr = random.randrange
333        choices = random.choices
334        for _ in range(1000):
335            p0 = ''.join(choices('abcde', k=rr(10))) * rr(10, 20)
336            p = p0[:len(p0) - rr(10)] # pop off some characters
337            left = ''.join(choices('abcdef', k=rr(2000)))
338            right = ''.join(choices('abcdef', k=rr(2000)))
339            text = left + p + right
340            with self.subTest(p=p, text=text):
341                self.checkequal(reference_find(p, text),
342                                text, 'find', p)
343
344    def test_find_shift_table_overflow(self):
345        """When the table of 8-bit shifts overflows."""
346        N = 2**8 + 100
347
348        # first check the periodic case
349        # here, the shift for 'b' is N + 1.
350        pattern1 = 'a' * N + 'b' + 'a' * N
351        text1 = 'babbaa' * N + pattern1
352        self.checkequal(len(text1)-len(pattern1),
353                        text1, 'find', pattern1)
354
355        # now check the non-periodic case
356        # here, the shift for 'd' is 3*(N+1)+1
357        pattern2 = 'ddd' + 'abc' * N + "eee"
358        text2 = pattern2[:-1] + "ddeede" * 2 * N + pattern2 + "de" * N
359        self.checkequal(len(text2) - N*len("de") - len(pattern2),
360                        text2, 'find', pattern2)
361
362    def test_lower(self):
363        self.checkequal('hello', 'HeLLo', 'lower')
364        self.checkequal('hello', 'hello', 'lower')
365        self.checkraises(TypeError, 'hello', 'lower', 42)
366
367    def test_upper(self):
368        self.checkequal('HELLO', 'HeLLo', 'upper')
369        self.checkequal('HELLO', 'HELLO', 'upper')
370        self.checkraises(TypeError, 'hello', 'upper', 42)
371
372    def test_expandtabs(self):
373        self.checkequal('abc\rab      def\ng       hi', 'abc\rab\tdef\ng\thi',
374                        'expandtabs')
375        self.checkequal('abc\rab      def\ng       hi', 'abc\rab\tdef\ng\thi',
376                        'expandtabs', 8)
377        self.checkequal('abc\rab  def\ng   hi', 'abc\rab\tdef\ng\thi',
378                        'expandtabs', 4)
379        self.checkequal('abc\r\nab      def\ng       hi', 'abc\r\nab\tdef\ng\thi',
380                        'expandtabs')
381        self.checkequal('abc\r\nab      def\ng       hi', 'abc\r\nab\tdef\ng\thi',
382                        'expandtabs', 8)
383        self.checkequal('abc\r\nab  def\ng   hi', 'abc\r\nab\tdef\ng\thi',
384                        'expandtabs', 4)
385        self.checkequal('abc\r\nab\r\ndef\ng\r\nhi', 'abc\r\nab\r\ndef\ng\r\nhi',
386                        'expandtabs', 4)
387        # check keyword args
388        self.checkequal('abc\rab      def\ng       hi', 'abc\rab\tdef\ng\thi',
389                        'expandtabs', tabsize=8)
390        self.checkequal('abc\rab  def\ng   hi', 'abc\rab\tdef\ng\thi',
391                        'expandtabs', tabsize=4)
392
393        self.checkequal('  a\n b', ' \ta\n\tb', 'expandtabs', 1)
394
395        self.checkraises(TypeError, 'hello', 'expandtabs', 42, 42)
396        # This test is only valid when sizeof(int) == sizeof(void*) == 4.
397        if sys.maxsize < (1 << 32) and struct.calcsize('P') == 4:
398            self.checkraises(OverflowError,
399                             '\ta\n\tb', 'expandtabs', sys.maxsize)
400
401    def test_split(self):
402        # by a char
403        self.checkequal(['a', 'b', 'c', 'd'], 'a|b|c|d', 'split', '|')
404        self.checkequal(['a|b|c|d'], 'a|b|c|d', 'split', '|', 0)
405        self.checkequal(['a', 'b|c|d'], 'a|b|c|d', 'split', '|', 1)
406        self.checkequal(['a', 'b', 'c|d'], 'a|b|c|d', 'split', '|', 2)
407        self.checkequal(['a', 'b', 'c', 'd'], 'a|b|c|d', 'split', '|', 3)
408        self.checkequal(['a', 'b', 'c', 'd'], 'a|b|c|d', 'split', '|', 4)
409        self.checkequal(['a', 'b', 'c', 'd'], 'a|b|c|d', 'split', '|',
410                        sys.maxsize-2)
411        self.checkequal(['a|b|c|d'], 'a|b|c|d', 'split', '|', 0)
412        self.checkequal(['a', '', 'b||c||d'], 'a||b||c||d', 'split', '|', 2)
413        self.checkequal(['abcd'], 'abcd', 'split', '|')
414        self.checkequal([''], '', 'split', '|')
415        self.checkequal(['endcase ', ''], 'endcase |', 'split', '|')
416        self.checkequal(['', ' startcase'], '| startcase', 'split', '|')
417        self.checkequal(['', 'bothcase', ''], '|bothcase|', 'split', '|')
418        self.checkequal(['a', '', 'b\x00c\x00d'], 'a\x00\x00b\x00c\x00d', 'split', '\x00', 2)
419
420        self.checkequal(['a']*20, ('a|'*20)[:-1], 'split', '|')
421        self.checkequal(['a']*15 +['a|a|a|a|a'],
422                                   ('a|'*20)[:-1], 'split', '|', 15)
423
424        # by string
425        self.checkequal(['a', 'b', 'c', 'd'], 'a//b//c//d', 'split', '//')
426        self.checkequal(['a', 'b//c//d'], 'a//b//c//d', 'split', '//', 1)
427        self.checkequal(['a', 'b', 'c//d'], 'a//b//c//d', 'split', '//', 2)
428        self.checkequal(['a', 'b', 'c', 'd'], 'a//b//c//d', 'split', '//', 3)
429        self.checkequal(['a', 'b', 'c', 'd'], 'a//b//c//d', 'split', '//', 4)
430        self.checkequal(['a', 'b', 'c', 'd'], 'a//b//c//d', 'split', '//',
431                        sys.maxsize-10)
432        self.checkequal(['a//b//c//d'], 'a//b//c//d', 'split', '//', 0)
433        self.checkequal(['a', '', 'b////c////d'], 'a////b////c////d', 'split', '//', 2)
434        self.checkequal(['endcase ', ''], 'endcase test', 'split', 'test')
435        self.checkequal(['', ' begincase'], 'test begincase', 'split', 'test')
436        self.checkequal(['', ' bothcase ', ''], 'test bothcase test',
437                        'split', 'test')
438        self.checkequal(['a', 'bc'], 'abbbc', 'split', 'bb')
439        self.checkequal(['', ''], 'aaa', 'split', 'aaa')
440        self.checkequal(['aaa'], 'aaa', 'split', 'aaa', 0)
441        self.checkequal(['ab', 'ab'], 'abbaab', 'split', 'ba')
442        self.checkequal(['aaaa'], 'aaaa', 'split', 'aab')
443        self.checkequal([''], '', 'split', 'aaa')
444        self.checkequal(['aa'], 'aa', 'split', 'aaa')
445        self.checkequal(['A', 'bobb'], 'Abbobbbobb', 'split', 'bbobb')
446        self.checkequal(['A', 'B', ''], 'AbbobbBbbobb', 'split', 'bbobb')
447
448        self.checkequal(['a']*20, ('aBLAH'*20)[:-4], 'split', 'BLAH')
449        self.checkequal(['a']*20, ('aBLAH'*20)[:-4], 'split', 'BLAH', 19)
450        self.checkequal(['a']*18 + ['aBLAHa'], ('aBLAH'*20)[:-4],
451                        'split', 'BLAH', 18)
452
453        # with keyword args
454        self.checkequal(['a', 'b', 'c', 'd'], 'a|b|c|d', 'split', sep='|')
455        self.checkequal(['a', 'b|c|d'],
456                        'a|b|c|d', 'split', '|', maxsplit=1)
457        self.checkequal(['a', 'b|c|d'],
458                        'a|b|c|d', 'split', sep='|', maxsplit=1)
459        self.checkequal(['a', 'b|c|d'],
460                        'a|b|c|d', 'split', maxsplit=1, sep='|')
461        self.checkequal(['a', 'b c d'],
462                        'a b c d', 'split', maxsplit=1)
463
464        # argument type
465        self.checkraises(TypeError, 'hello', 'split', 42, 42, 42)
466
467        # null case
468        self.checkraises(ValueError, 'hello', 'split', '')
469        self.checkraises(ValueError, 'hello', 'split', '', 0)
470
471    def test_rsplit(self):
472        # without arg
473        self.checkequal(['a', 'b', 'c', 'd'], 'a b c d', 'rsplit')
474        self.checkequal(['a', 'b', 'c', 'd'], 'a  b  c d', 'rsplit')
475        self.checkequal([], '', 'rsplit')
476
477        # by a char
478        self.checkequal(['a', 'b', 'c', 'd'], 'a|b|c|d', 'rsplit', '|')
479        self.checkequal(['a|b|c', 'd'], 'a|b|c|d', 'rsplit', '|', 1)
480        self.checkequal(['a|b', 'c', 'd'], 'a|b|c|d', 'rsplit', '|', 2)
481        self.checkequal(['a', 'b', 'c', 'd'], 'a|b|c|d', 'rsplit', '|', 3)
482        self.checkequal(['a', 'b', 'c', 'd'], 'a|b|c|d', 'rsplit', '|', 4)
483        self.checkequal(['a', 'b', 'c', 'd'], 'a|b|c|d', 'rsplit', '|',
484                        sys.maxsize-100)
485        self.checkequal(['a|b|c|d'], 'a|b|c|d', 'rsplit', '|', 0)
486        self.checkequal(['a||b||c', '', 'd'], 'a||b||c||d', 'rsplit', '|', 2)
487        self.checkequal(['abcd'], 'abcd', 'rsplit', '|')
488        self.checkequal([''], '', 'rsplit', '|')
489        self.checkequal(['', ' begincase'], '| begincase', 'rsplit', '|')
490        self.checkequal(['endcase ', ''], 'endcase |', 'rsplit', '|')
491        self.checkequal(['', 'bothcase', ''], '|bothcase|', 'rsplit', '|')
492
493        self.checkequal(['a\x00\x00b', 'c', 'd'], 'a\x00\x00b\x00c\x00d', 'rsplit', '\x00', 2)
494
495        self.checkequal(['a']*20, ('a|'*20)[:-1], 'rsplit', '|')
496        self.checkequal(['a|a|a|a|a']+['a']*15,
497                        ('a|'*20)[:-1], 'rsplit', '|', 15)
498
499        # by string
500        self.checkequal(['a', 'b', 'c', 'd'], 'a//b//c//d', 'rsplit', '//')
501        self.checkequal(['a//b//c', 'd'], 'a//b//c//d', 'rsplit', '//', 1)
502        self.checkequal(['a//b', 'c', 'd'], 'a//b//c//d', 'rsplit', '//', 2)
503        self.checkequal(['a', 'b', 'c', 'd'], 'a//b//c//d', 'rsplit', '//', 3)
504        self.checkequal(['a', 'b', 'c', 'd'], 'a//b//c//d', 'rsplit', '//', 4)
505        self.checkequal(['a', 'b', 'c', 'd'], 'a//b//c//d', 'rsplit', '//',
506                        sys.maxsize-5)
507        self.checkequal(['a//b//c//d'], 'a//b//c//d', 'rsplit', '//', 0)
508        self.checkequal(['a////b////c', '', 'd'], 'a////b////c////d', 'rsplit', '//', 2)
509        self.checkequal(['', ' begincase'], 'test begincase', 'rsplit', 'test')
510        self.checkequal(['endcase ', ''], 'endcase test', 'rsplit', 'test')
511        self.checkequal(['', ' bothcase ', ''], 'test bothcase test',
512                        'rsplit', 'test')
513        self.checkequal(['ab', 'c'], 'abbbc', 'rsplit', 'bb')
514        self.checkequal(['', ''], 'aaa', 'rsplit', 'aaa')
515        self.checkequal(['aaa'], 'aaa', 'rsplit', 'aaa', 0)
516        self.checkequal(['ab', 'ab'], 'abbaab', 'rsplit', 'ba')
517        self.checkequal(['aaaa'], 'aaaa', 'rsplit', 'aab')
518        self.checkequal([''], '', 'rsplit', 'aaa')
519        self.checkequal(['aa'], 'aa', 'rsplit', 'aaa')
520        self.checkequal(['bbob', 'A'], 'bbobbbobbA', 'rsplit', 'bbobb')
521        self.checkequal(['', 'B', 'A'], 'bbobbBbbobbA', 'rsplit', 'bbobb')
522
523        self.checkequal(['a']*20, ('aBLAH'*20)[:-4], 'rsplit', 'BLAH')
524        self.checkequal(['a']*20, ('aBLAH'*20)[:-4], 'rsplit', 'BLAH', 19)
525        self.checkequal(['aBLAHa'] + ['a']*18, ('aBLAH'*20)[:-4],
526                        'rsplit', 'BLAH', 18)
527
528        # with keyword args
529        self.checkequal(['a', 'b', 'c', 'd'], 'a|b|c|d', 'rsplit', sep='|')
530        self.checkequal(['a', 'b', 'c', 'd'], 'a b c d', 'rsplit', sep=None)
531        self.checkequal(['a b c', 'd'],
532                        'a b c d', 'rsplit', sep=None, maxsplit=1)
533        self.checkequal(['a|b|c', 'd'],
534                        'a|b|c|d', 'rsplit', '|', maxsplit=1)
535        self.checkequal(['a|b|c', 'd'],
536                        'a|b|c|d', 'rsplit', sep='|', maxsplit=1)
537        self.checkequal(['a|b|c', 'd'],
538                        'a|b|c|d', 'rsplit', maxsplit=1, sep='|')
539        self.checkequal(['a b c', 'd'],
540                        'a b c d', 'rsplit', maxsplit=1)
541
542        # argument type
543        self.checkraises(TypeError, 'hello', 'rsplit', 42, 42, 42)
544
545        # null case
546        self.checkraises(ValueError, 'hello', 'rsplit', '')
547        self.checkraises(ValueError, 'hello', 'rsplit', '', 0)
548
549    def test_replace(self):
550        EQ = self.checkequal
551
552        # Operations on the empty string
553        EQ("", "", "replace", "", "")
554        EQ("A", "", "replace", "", "A")
555        EQ("", "", "replace", "A", "")
556        EQ("", "", "replace", "A", "A")
557        EQ("", "", "replace", "", "", 100)
558        EQ("A", "", "replace", "", "A", 100)
559        EQ("", "", "replace", "", "", sys.maxsize)
560
561        # interleave (from=="", 'to' gets inserted everywhere)
562        EQ("A", "A", "replace", "", "")
563        EQ("*A*", "A", "replace", "", "*")
564        EQ("*1A*1", "A", "replace", "", "*1")
565        EQ("*-#A*-#", "A", "replace", "", "*-#")
566        EQ("*-A*-A*-", "AA", "replace", "", "*-")
567        EQ("*-A*-A*-", "AA", "replace", "", "*-", -1)
568        EQ("*-A*-A*-", "AA", "replace", "", "*-", sys.maxsize)
569        EQ("*-A*-A*-", "AA", "replace", "", "*-", 4)
570        EQ("*-A*-A*-", "AA", "replace", "", "*-", 3)
571        EQ("*-A*-A", "AA", "replace", "", "*-", 2)
572        EQ("*-AA", "AA", "replace", "", "*-", 1)
573        EQ("AA", "AA", "replace", "", "*-", 0)
574
575        # single character deletion (from=="A", to=="")
576        EQ("", "A", "replace", "A", "")
577        EQ("", "AAA", "replace", "A", "")
578        EQ("", "AAA", "replace", "A", "", -1)
579        EQ("", "AAA", "replace", "A", "", sys.maxsize)
580        EQ("", "AAA", "replace", "A", "", 4)
581        EQ("", "AAA", "replace", "A", "", 3)
582        EQ("A", "AAA", "replace", "A", "", 2)
583        EQ("AA", "AAA", "replace", "A", "", 1)
584        EQ("AAA", "AAA", "replace", "A", "", 0)
585        EQ("", "AAAAAAAAAA", "replace", "A", "")
586        EQ("BCD", "ABACADA", "replace", "A", "")
587        EQ("BCD", "ABACADA", "replace", "A", "", -1)
588        EQ("BCD", "ABACADA", "replace", "A", "", sys.maxsize)
589        EQ("BCD", "ABACADA", "replace", "A", "", 5)
590        EQ("BCD", "ABACADA", "replace", "A", "", 4)
591        EQ("BCDA", "ABACADA", "replace", "A", "", 3)
592        EQ("BCADA", "ABACADA", "replace", "A", "", 2)
593        EQ("BACADA", "ABACADA", "replace", "A", "", 1)
594        EQ("ABACADA", "ABACADA", "replace", "A", "", 0)
595        EQ("BCD", "ABCAD", "replace", "A", "")
596        EQ("BCD", "ABCADAA", "replace", "A", "")
597        EQ("BCD", "BCD", "replace", "A", "")
598        EQ("*************", "*************", "replace", "A", "")
599        EQ("^A^", "^"+"A"*1000+"^", "replace", "A", "", 999)
600
601        # substring deletion (from=="the", to=="")
602        EQ("", "the", "replace", "the", "")
603        EQ("ater", "theater", "replace", "the", "")
604        EQ("", "thethe", "replace", "the", "")
605        EQ("", "thethethethe", "replace", "the", "")
606        EQ("aaaa", "theatheatheathea", "replace", "the", "")
607        EQ("that", "that", "replace", "the", "")
608        EQ("thaet", "thaet", "replace", "the", "")
609        EQ("here and re", "here and there", "replace", "the", "")
610        EQ("here and re and re", "here and there and there",
611           "replace", "the", "", sys.maxsize)
612        EQ("here and re and re", "here and there and there",
613           "replace", "the", "", -1)
614        EQ("here and re and re", "here and there and there",
615           "replace", "the", "", 3)
616        EQ("here and re and re", "here and there and there",
617           "replace", "the", "", 2)
618        EQ("here and re and there", "here and there and there",
619           "replace", "the", "", 1)
620        EQ("here and there and there", "here and there and there",
621           "replace", "the", "", 0)
622        EQ("here and re and re", "here and there and there", "replace", "the", "")
623
624        EQ("abc", "abc", "replace", "the", "")
625        EQ("abcdefg", "abcdefg", "replace", "the", "")
626
627        # substring deletion (from=="bob", to=="")
628        EQ("bob", "bbobob", "replace", "bob", "")
629        EQ("bobXbob", "bbobobXbbobob", "replace", "bob", "")
630        EQ("aaaaaaa", "aaaaaaabob", "replace", "bob", "")
631        EQ("aaaaaaa", "aaaaaaa", "replace", "bob", "")
632
633        # single character replace in place (len(from)==len(to)==1)
634        EQ("Who goes there?", "Who goes there?", "replace", "o", "o")
635        EQ("WhO gOes there?", "Who goes there?", "replace", "o", "O")
636        EQ("WhO gOes there?", "Who goes there?", "replace", "o", "O", sys.maxsize)
637        EQ("WhO gOes there?", "Who goes there?", "replace", "o", "O", -1)
638        EQ("WhO gOes there?", "Who goes there?", "replace", "o", "O", 3)
639        EQ("WhO gOes there?", "Who goes there?", "replace", "o", "O", 2)
640        EQ("WhO goes there?", "Who goes there?", "replace", "o", "O", 1)
641        EQ("Who goes there?", "Who goes there?", "replace", "o", "O", 0)
642
643        EQ("Who goes there?", "Who goes there?", "replace", "a", "q")
644        EQ("who goes there?", "Who goes there?", "replace", "W", "w")
645        EQ("wwho goes there?ww", "WWho goes there?WW", "replace", "W", "w")
646        EQ("Who goes there!", "Who goes there?", "replace", "?", "!")
647        EQ("Who goes there!!", "Who goes there??", "replace", "?", "!")
648
649        EQ("Who goes there?", "Who goes there?", "replace", ".", "!")
650
651        # substring replace in place (len(from)==len(to) > 1)
652        EQ("Th** ** a t**sue", "This is a tissue", "replace", "is", "**")
653        EQ("Th** ** a t**sue", "This is a tissue", "replace", "is", "**", sys.maxsize)
654        EQ("Th** ** a t**sue", "This is a tissue", "replace", "is", "**", -1)
655        EQ("Th** ** a t**sue", "This is a tissue", "replace", "is", "**", 4)
656        EQ("Th** ** a t**sue", "This is a tissue", "replace", "is", "**", 3)
657        EQ("Th** ** a tissue", "This is a tissue", "replace", "is", "**", 2)
658        EQ("Th** is a tissue", "This is a tissue", "replace", "is", "**", 1)
659        EQ("This is a tissue", "This is a tissue", "replace", "is", "**", 0)
660        EQ("cobob", "bobob", "replace", "bob", "cob")
661        EQ("cobobXcobocob", "bobobXbobobob", "replace", "bob", "cob")
662        EQ("bobob", "bobob", "replace", "bot", "bot")
663
664        # replace single character (len(from)==1, len(to)>1)
665        EQ("ReyKKjaviKK", "Reykjavik", "replace", "k", "KK")
666        EQ("ReyKKjaviKK", "Reykjavik", "replace", "k", "KK", -1)
667        EQ("ReyKKjaviKK", "Reykjavik", "replace", "k", "KK", sys.maxsize)
668        EQ("ReyKKjaviKK", "Reykjavik", "replace", "k", "KK", 2)
669        EQ("ReyKKjavik", "Reykjavik", "replace", "k", "KK", 1)
670        EQ("Reykjavik", "Reykjavik", "replace", "k", "KK", 0)
671        EQ("A----B----C----", "A.B.C.", "replace", ".", "----")
672        # issue #15534
673        EQ('...\u043c......&lt;', '...\u043c......<', "replace", "<", "&lt;")
674
675        EQ("Reykjavik", "Reykjavik", "replace", "q", "KK")
676
677        # replace substring (len(from)>1, len(to)!=len(from))
678        EQ("ham, ham, eggs and ham", "spam, spam, eggs and spam",
679           "replace", "spam", "ham")
680        EQ("ham, ham, eggs and ham", "spam, spam, eggs and spam",
681           "replace", "spam", "ham", sys.maxsize)
682        EQ("ham, ham, eggs and ham", "spam, spam, eggs and spam",
683           "replace", "spam", "ham", -1)
684        EQ("ham, ham, eggs and ham", "spam, spam, eggs and spam",
685           "replace", "spam", "ham", 4)
686        EQ("ham, ham, eggs and ham", "spam, spam, eggs and spam",
687           "replace", "spam", "ham", 3)
688        EQ("ham, ham, eggs and spam", "spam, spam, eggs and spam",
689           "replace", "spam", "ham", 2)
690        EQ("ham, spam, eggs and spam", "spam, spam, eggs and spam",
691           "replace", "spam", "ham", 1)
692        EQ("spam, spam, eggs and spam", "spam, spam, eggs and spam",
693           "replace", "spam", "ham", 0)
694
695        EQ("bobob", "bobobob", "replace", "bobob", "bob")
696        EQ("bobobXbobob", "bobobobXbobobob", "replace", "bobob", "bob")
697        EQ("BOBOBOB", "BOBOBOB", "replace", "bob", "bobby")
698
699        self.checkequal('one@two!three!', 'one!two!three!', 'replace', '!', '@', 1)
700        self.checkequal('onetwothree', 'one!two!three!', 'replace', '!', '')
701        self.checkequal('one@two@three!', 'one!two!three!', 'replace', '!', '@', 2)
702        self.checkequal('one@two@three@', 'one!two!three!', 'replace', '!', '@', 3)
703        self.checkequal('one@two@three@', 'one!two!three!', 'replace', '!', '@', 4)
704        self.checkequal('one!two!three!', 'one!two!three!', 'replace', '!', '@', 0)
705        self.checkequal('one@two@three@', 'one!two!three!', 'replace', '!', '@')
706        self.checkequal('one!two!three!', 'one!two!three!', 'replace', 'x', '@')
707        self.checkequal('one!two!three!', 'one!two!three!', 'replace', 'x', '@', 2)
708        self.checkequal('-a-b-c-', 'abc', 'replace', '', '-')
709        self.checkequal('-a-b-c', 'abc', 'replace', '', '-', 3)
710        self.checkequal('abc', 'abc', 'replace', '', '-', 0)
711        self.checkequal('', '', 'replace', '', '')
712        self.checkequal('abc', 'abc', 'replace', 'ab', '--', 0)
713        self.checkequal('abc', 'abc', 'replace', 'xy', '--')
714        # Next three for SF bug 422088: [OSF1 alpha] string.replace(); died with
715        # MemoryError due to empty result (platform malloc issue when requesting
716        # 0 bytes).
717        self.checkequal('', '123', 'replace', '123', '')
718        self.checkequal('', '123123', 'replace', '123', '')
719        self.checkequal('x', '123x123', 'replace', '123', '')
720
721        self.checkraises(TypeError, 'hello', 'replace')
722        self.checkraises(TypeError, 'hello', 'replace', 42)
723        self.checkraises(TypeError, 'hello', 'replace', 42, 'h')
724        self.checkraises(TypeError, 'hello', 'replace', 'h', 42)
725
726    @unittest.skipIf(sys.maxsize > (1 << 32) or struct.calcsize('P') != 4,
727                     'only applies to 32-bit platforms')
728    def test_replace_overflow(self):
729        # Check for overflow checking on 32 bit machines
730        A2_16 = "A" * (2**16)
731        self.checkraises(OverflowError, A2_16, "replace", "", A2_16)
732        self.checkraises(OverflowError, A2_16, "replace", "A", A2_16)
733        self.checkraises(OverflowError, A2_16, "replace", "AA", A2_16+A2_16)
734
735    def test_removeprefix(self):
736        self.checkequal('am', 'spam', 'removeprefix', 'sp')
737        self.checkequal('spamspam', 'spamspamspam', 'removeprefix', 'spam')
738        self.checkequal('spam', 'spam', 'removeprefix', 'python')
739        self.checkequal('spam', 'spam', 'removeprefix', 'spider')
740        self.checkequal('spam', 'spam', 'removeprefix', 'spam and eggs')
741
742        self.checkequal('', '', 'removeprefix', '')
743        self.checkequal('', '', 'removeprefix', 'abcde')
744        self.checkequal('abcde', 'abcde', 'removeprefix', '')
745        self.checkequal('', 'abcde', 'removeprefix', 'abcde')
746
747        self.checkraises(TypeError, 'hello', 'removeprefix')
748        self.checkraises(TypeError, 'hello', 'removeprefix', 42)
749        self.checkraises(TypeError, 'hello', 'removeprefix', 42, 'h')
750        self.checkraises(TypeError, 'hello', 'removeprefix', 'h', 42)
751        self.checkraises(TypeError, 'hello', 'removeprefix', ("he", "l"))
752
753    def test_removesuffix(self):
754        self.checkequal('sp', 'spam', 'removesuffix', 'am')
755        self.checkequal('spamspam', 'spamspamspam', 'removesuffix', 'spam')
756        self.checkequal('spam', 'spam', 'removesuffix', 'python')
757        self.checkequal('spam', 'spam', 'removesuffix', 'blam')
758        self.checkequal('spam', 'spam', 'removesuffix', 'eggs and spam')
759
760        self.checkequal('', '', 'removesuffix', '')
761        self.checkequal('', '', 'removesuffix', 'abcde')
762        self.checkequal('abcde', 'abcde', 'removesuffix', '')
763        self.checkequal('', 'abcde', 'removesuffix', 'abcde')
764
765        self.checkraises(TypeError, 'hello', 'removesuffix')
766        self.checkraises(TypeError, 'hello', 'removesuffix', 42)
767        self.checkraises(TypeError, 'hello', 'removesuffix', 42, 'h')
768        self.checkraises(TypeError, 'hello', 'removesuffix', 'h', 42)
769        self.checkraises(TypeError, 'hello', 'removesuffix', ("lo", "l"))
770
771    def test_capitalize(self):
772        self.checkequal(' hello ', ' hello ', 'capitalize')
773        self.checkequal('Hello ', 'Hello ','capitalize')
774        self.checkequal('Hello ', 'hello ','capitalize')
775        self.checkequal('Aaaa', 'aaaa', 'capitalize')
776        self.checkequal('Aaaa', 'AaAa', 'capitalize')
777
778        self.checkraises(TypeError, 'hello', 'capitalize', 42)
779
780    def test_additional_split(self):
781        self.checkequal(['this', 'is', 'the', 'split', 'function'],
782            'this is the split function', 'split')
783
784        # by whitespace
785        self.checkequal(['a', 'b', 'c', 'd'], 'a b c d ', 'split')
786        self.checkequal(['a', 'b c d'], 'a b c d', 'split', None, 1)
787        self.checkequal(['a', 'b', 'c d'], 'a b c d', 'split', None, 2)
788        self.checkequal(['a', 'b', 'c', 'd'], 'a b c d', 'split', None, 3)
789        self.checkequal(['a', 'b', 'c', 'd'], 'a b c d', 'split', None, 4)
790        self.checkequal(['a', 'b', 'c', 'd'], 'a b c d', 'split', None,
791                        sys.maxsize-1)
792        self.checkequal(['a b c d'], 'a b c d', 'split', None, 0)
793        self.checkequal(['a b c d'], '  a b c d', 'split', None, 0)
794        self.checkequal(['a', 'b', 'c  d'], 'a  b  c  d', 'split', None, 2)
795
796        self.checkequal([], '         ', 'split')
797        self.checkequal(['a'], '  a    ', 'split')
798        self.checkequal(['a', 'b'], '  a    b   ', 'split')
799        self.checkequal(['a', 'b   '], '  a    b   ', 'split', None, 1)
800        self.checkequal(['a    b   c   '], '  a    b   c   ', 'split', None, 0)
801        self.checkequal(['a', 'b   c   '], '  a    b   c   ', 'split', None, 1)
802        self.checkequal(['a', 'b', 'c   '], '  a    b   c   ', 'split', None, 2)
803        self.checkequal(['a', 'b', 'c'], '  a    b   c   ', 'split', None, 3)
804        self.checkequal(['a', 'b'], '\n\ta \t\r b \v ', 'split')
805        aaa = ' a '*20
806        self.checkequal(['a']*20, aaa, 'split')
807        self.checkequal(['a'] + [aaa[4:]], aaa, 'split', None, 1)
808        self.checkequal(['a']*19 + ['a '], aaa, 'split', None, 19)
809
810        for b in ('arf\tbarf', 'arf\nbarf', 'arf\rbarf',
811                  'arf\fbarf', 'arf\vbarf'):
812            self.checkequal(['arf', 'barf'], b, 'split')
813            self.checkequal(['arf', 'barf'], b, 'split', None)
814            self.checkequal(['arf', 'barf'], b, 'split', None, 2)
815
816    def test_additional_rsplit(self):
817        self.checkequal(['this', 'is', 'the', 'rsplit', 'function'],
818                         'this is the rsplit function', 'rsplit')
819
820        # by whitespace
821        self.checkequal(['a', 'b', 'c', 'd'], 'a b c d ', 'rsplit')
822        self.checkequal(['a b c', 'd'], 'a b c d', 'rsplit', None, 1)
823        self.checkequal(['a b', 'c', 'd'], 'a b c d', 'rsplit', None, 2)
824        self.checkequal(['a', 'b', 'c', 'd'], 'a b c d', 'rsplit', None, 3)
825        self.checkequal(['a', 'b', 'c', 'd'], 'a b c d', 'rsplit', None, 4)
826        self.checkequal(['a', 'b', 'c', 'd'], 'a b c d', 'rsplit', None,
827                        sys.maxsize-20)
828        self.checkequal(['a b c d'], 'a b c d', 'rsplit', None, 0)
829        self.checkequal(['a b c d'], 'a b c d  ', 'rsplit', None, 0)
830        self.checkequal(['a  b', 'c', 'd'], 'a  b  c  d', 'rsplit', None, 2)
831
832        self.checkequal([], '         ', 'rsplit')
833        self.checkequal(['a'], '  a    ', 'rsplit')
834        self.checkequal(['a', 'b'], '  a    b   ', 'rsplit')
835        self.checkequal(['  a', 'b'], '  a    b   ', 'rsplit', None, 1)
836        self.checkequal(['  a    b   c'], '  a    b   c   ', 'rsplit',
837                        None, 0)
838        self.checkequal(['  a    b','c'], '  a    b   c   ', 'rsplit',
839                        None, 1)
840        self.checkequal(['  a', 'b', 'c'], '  a    b   c   ', 'rsplit',
841                        None, 2)
842        self.checkequal(['a', 'b', 'c'], '  a    b   c   ', 'rsplit',
843                        None, 3)
844        self.checkequal(['a', 'b'], '\n\ta \t\r b \v ', 'rsplit', None, 88)
845        aaa = ' a '*20
846        self.checkequal(['a']*20, aaa, 'rsplit')
847        self.checkequal([aaa[:-4]] + ['a'], aaa, 'rsplit', None, 1)
848        self.checkequal([' a  a'] + ['a']*18, aaa, 'rsplit', None, 18)
849
850        for b in ('arf\tbarf', 'arf\nbarf', 'arf\rbarf',
851                  'arf\fbarf', 'arf\vbarf'):
852            self.checkequal(['arf', 'barf'], b, 'rsplit')
853            self.checkequal(['arf', 'barf'], b, 'rsplit', None)
854            self.checkequal(['arf', 'barf'], b, 'rsplit', None, 2)
855
856    def test_strip_whitespace(self):
857        self.checkequal('hello', '   hello   ', 'strip')
858        self.checkequal('hello   ', '   hello   ', 'lstrip')
859        self.checkequal('   hello', '   hello   ', 'rstrip')
860        self.checkequal('hello', 'hello', 'strip')
861
862        b = ' \t\n\r\f\vabc \t\n\r\f\v'
863        self.checkequal('abc', b, 'strip')
864        self.checkequal('abc \t\n\r\f\v', b, 'lstrip')
865        self.checkequal(' \t\n\r\f\vabc', b, 'rstrip')
866
867        # strip/lstrip/rstrip with None arg
868        self.checkequal('hello', '   hello   ', 'strip', None)
869        self.checkequal('hello   ', '   hello   ', 'lstrip', None)
870        self.checkequal('   hello', '   hello   ', 'rstrip', None)
871        self.checkequal('hello', 'hello', 'strip', None)
872
873    def test_strip(self):
874        # strip/lstrip/rstrip with str arg
875        self.checkequal('hello', 'xyzzyhelloxyzzy', 'strip', 'xyz')
876        self.checkequal('helloxyzzy', 'xyzzyhelloxyzzy', 'lstrip', 'xyz')
877        self.checkequal('xyzzyhello', 'xyzzyhelloxyzzy', 'rstrip', 'xyz')
878        self.checkequal('hello', 'hello', 'strip', 'xyz')
879        self.checkequal('', 'mississippi', 'strip', 'mississippi')
880
881        # only trim the start and end; does not strip internal characters
882        self.checkequal('mississipp', 'mississippi', 'strip', 'i')
883
884        self.checkraises(TypeError, 'hello', 'strip', 42, 42)
885        self.checkraises(TypeError, 'hello', 'lstrip', 42, 42)
886        self.checkraises(TypeError, 'hello', 'rstrip', 42, 42)
887
888    def test_ljust(self):
889        self.checkequal('abc       ', 'abc', 'ljust', 10)
890        self.checkequal('abc   ', 'abc', 'ljust', 6)
891        self.checkequal('abc', 'abc', 'ljust', 3)
892        self.checkequal('abc', 'abc', 'ljust', 2)
893        self.checkequal('abc*******', 'abc', 'ljust', 10, '*')
894        self.checkraises(TypeError, 'abc', 'ljust')
895
896    def test_rjust(self):
897        self.checkequal('       abc', 'abc', 'rjust', 10)
898        self.checkequal('   abc', 'abc', 'rjust', 6)
899        self.checkequal('abc', 'abc', 'rjust', 3)
900        self.checkequal('abc', 'abc', 'rjust', 2)
901        self.checkequal('*******abc', 'abc', 'rjust', 10, '*')
902        self.checkraises(TypeError, 'abc', 'rjust')
903
904    def test_center(self):
905        self.checkequal('   abc    ', 'abc', 'center', 10)
906        self.checkequal(' abc  ', 'abc', 'center', 6)
907        self.checkequal('abc', 'abc', 'center', 3)
908        self.checkequal('abc', 'abc', 'center', 2)
909        self.checkequal('***abc****', 'abc', 'center', 10, '*')
910        self.checkraises(TypeError, 'abc', 'center')
911
912    def test_swapcase(self):
913        self.checkequal('hEllO CoMPuTErS', 'HeLLo cOmpUteRs', 'swapcase')
914
915        self.checkraises(TypeError, 'hello', 'swapcase', 42)
916
917    def test_zfill(self):
918        self.checkequal('123', '123', 'zfill', 2)
919        self.checkequal('123', '123', 'zfill', 3)
920        self.checkequal('0123', '123', 'zfill', 4)
921        self.checkequal('+123', '+123', 'zfill', 3)
922        self.checkequal('+123', '+123', 'zfill', 4)
923        self.checkequal('+0123', '+123', 'zfill', 5)
924        self.checkequal('-123', '-123', 'zfill', 3)
925        self.checkequal('-123', '-123', 'zfill', 4)
926        self.checkequal('-0123', '-123', 'zfill', 5)
927        self.checkequal('000', '', 'zfill', 3)
928        self.checkequal('34', '34', 'zfill', 1)
929        self.checkequal('0034', '34', 'zfill', 4)
930
931        self.checkraises(TypeError, '123', 'zfill')
932
933    def test_islower(self):
934        self.checkequal(False, '', 'islower')
935        self.checkequal(True, 'a', 'islower')
936        self.checkequal(False, 'A', 'islower')
937        self.checkequal(False, '\n', 'islower')
938        self.checkequal(True, 'abc', 'islower')
939        self.checkequal(False, 'aBc', 'islower')
940        self.checkequal(True, 'abc\n', 'islower')
941        self.checkraises(TypeError, 'abc', 'islower', 42)
942
943    def test_isupper(self):
944        self.checkequal(False, '', 'isupper')
945        self.checkequal(False, 'a', 'isupper')
946        self.checkequal(True, 'A', 'isupper')
947        self.checkequal(False, '\n', 'isupper')
948        self.checkequal(True, 'ABC', 'isupper')
949        self.checkequal(False, 'AbC', 'isupper')
950        self.checkequal(True, 'ABC\n', 'isupper')
951        self.checkraises(TypeError, 'abc', 'isupper', 42)
952
953    def test_istitle(self):
954        self.checkequal(False, '', 'istitle')
955        self.checkequal(False, 'a', 'istitle')
956        self.checkequal(True, 'A', 'istitle')
957        self.checkequal(False, '\n', 'istitle')
958        self.checkequal(True, 'A Titlecased Line', 'istitle')
959        self.checkequal(True, 'A\nTitlecased Line', 'istitle')
960        self.checkequal(True, 'A Titlecased, Line', 'istitle')
961        self.checkequal(False, 'Not a capitalized String', 'istitle')
962        self.checkequal(False, 'Not\ta Titlecase String', 'istitle')
963        self.checkequal(False, 'Not--a Titlecase String', 'istitle')
964        self.checkequal(False, 'NOT', 'istitle')
965        self.checkraises(TypeError, 'abc', 'istitle', 42)
966
967    def test_isspace(self):
968        self.checkequal(False, '', 'isspace')
969        self.checkequal(False, 'a', 'isspace')
970        self.checkequal(True, ' ', 'isspace')
971        self.checkequal(True, '\t', 'isspace')
972        self.checkequal(True, '\r', 'isspace')
973        self.checkequal(True, '\n', 'isspace')
974        self.checkequal(True, ' \t\r\n', 'isspace')
975        self.checkequal(False, ' \t\r\na', 'isspace')
976        self.checkraises(TypeError, 'abc', 'isspace', 42)
977
978    def test_isalpha(self):
979        self.checkequal(False, '', 'isalpha')
980        self.checkequal(True, 'a', 'isalpha')
981        self.checkequal(True, 'A', 'isalpha')
982        self.checkequal(False, '\n', 'isalpha')
983        self.checkequal(True, 'abc', 'isalpha')
984        self.checkequal(False, 'aBc123', 'isalpha')
985        self.checkequal(False, 'abc\n', 'isalpha')
986        self.checkraises(TypeError, 'abc', 'isalpha', 42)
987
988    def test_isalnum(self):
989        self.checkequal(False, '', 'isalnum')
990        self.checkequal(True, 'a', 'isalnum')
991        self.checkequal(True, 'A', 'isalnum')
992        self.checkequal(False, '\n', 'isalnum')
993        self.checkequal(True, '123abc456', 'isalnum')
994        self.checkequal(True, 'a1b3c', 'isalnum')
995        self.checkequal(False, 'aBc000 ', 'isalnum')
996        self.checkequal(False, 'abc\n', 'isalnum')
997        self.checkraises(TypeError, 'abc', 'isalnum', 42)
998
999    def test_isascii(self):
1000        self.checkequal(True, '', 'isascii')
1001        self.checkequal(True, '\x00', 'isascii')
1002        self.checkequal(True, '\x7f', 'isascii')
1003        self.checkequal(True, '\x00\x7f', 'isascii')
1004        self.checkequal(False, '\x80', 'isascii')
1005        self.checkequal(False, '\xe9', 'isascii')
1006        # bytes.isascii() and bytearray.isascii() has optimization which
1007        # check 4 or 8 bytes at once.  So check some alignments.
1008        for p in range(8):
1009            self.checkequal(True, ' '*p + '\x7f', 'isascii')
1010            self.checkequal(False, ' '*p + '\x80', 'isascii')
1011            self.checkequal(True, ' '*p + '\x7f' + ' '*8, 'isascii')
1012            self.checkequal(False, ' '*p + '\x80' + ' '*8, 'isascii')
1013
1014    def test_isdigit(self):
1015        self.checkequal(False, '', 'isdigit')
1016        self.checkequal(False, 'a', 'isdigit')
1017        self.checkequal(True, '0', 'isdigit')
1018        self.checkequal(True, '0123456789', 'isdigit')
1019        self.checkequal(False, '0123456789a', 'isdigit')
1020
1021        self.checkraises(TypeError, 'abc', 'isdigit', 42)
1022
1023    def test_title(self):
1024        self.checkequal(' Hello ', ' hello ', 'title')
1025        self.checkequal('Hello ', 'hello ', 'title')
1026        self.checkequal('Hello ', 'Hello ', 'title')
1027        self.checkequal('Format This As Title String', "fOrMaT thIs aS titLe String", 'title')
1028        self.checkequal('Format,This-As*Title;String', "fOrMaT,thIs-aS*titLe;String", 'title', )
1029        self.checkequal('Getint', "getInt", 'title')
1030        self.checkraises(TypeError, 'hello', 'title', 42)
1031
1032    def test_splitlines(self):
1033        self.checkequal(['abc', 'def', '', 'ghi'], "abc\ndef\n\rghi", 'splitlines')
1034        self.checkequal(['abc', 'def', '', 'ghi'], "abc\ndef\n\r\nghi", 'splitlines')
1035        self.checkequal(['abc', 'def', 'ghi'], "abc\ndef\r\nghi", 'splitlines')
1036        self.checkequal(['abc', 'def', 'ghi'], "abc\ndef\r\nghi\n", 'splitlines')
1037        self.checkequal(['abc', 'def', 'ghi', ''], "abc\ndef\r\nghi\n\r", 'splitlines')
1038        self.checkequal(['', 'abc', 'def', 'ghi', ''], "\nabc\ndef\r\nghi\n\r", 'splitlines')
1039        self.checkequal(['', 'abc', 'def', 'ghi', ''],
1040                        "\nabc\ndef\r\nghi\n\r", 'splitlines', False)
1041        self.checkequal(['\n', 'abc\n', 'def\r\n', 'ghi\n', '\r'],
1042                        "\nabc\ndef\r\nghi\n\r", 'splitlines', True)
1043        self.checkequal(['', 'abc', 'def', 'ghi', ''], "\nabc\ndef\r\nghi\n\r",
1044                        'splitlines', keepends=False)
1045        self.checkequal(['\n', 'abc\n', 'def\r\n', 'ghi\n', '\r'],
1046                        "\nabc\ndef\r\nghi\n\r", 'splitlines', keepends=True)
1047
1048        self.checkraises(TypeError, 'abc', 'splitlines', 42, 42)
1049
1050
1051class CommonTest(BaseTest):
1052    # This testcase contains tests that can be used in all
1053    # stringlike classes. Currently this is str and UserString.
1054
1055    def test_hash(self):
1056        # SF bug 1054139:  += optimization was not invalidating cached hash value
1057        a = self.type2test('DNSSEC')
1058        b = self.type2test('')
1059        for c in a:
1060            b += c
1061            hash(b)
1062        self.assertEqual(hash(a), hash(b))
1063
1064    def test_capitalize_nonascii(self):
1065        # check that titlecased chars are lowered correctly
1066        # \u1ffc is the titlecased char
1067        self.checkequal('\u1ffc\u1ff3\u1ff3\u1ff3',
1068                        '\u1ff3\u1ff3\u1ffc\u1ffc', 'capitalize')
1069        # check with cased non-letter chars
1070        self.checkequal('\u24c5\u24e8\u24e3\u24d7\u24de\u24dd',
1071                        '\u24c5\u24ce\u24c9\u24bd\u24c4\u24c3', 'capitalize')
1072        self.checkequal('\u24c5\u24e8\u24e3\u24d7\u24de\u24dd',
1073                        '\u24df\u24e8\u24e3\u24d7\u24de\u24dd', 'capitalize')
1074        self.checkequal('\u2160\u2171\u2172',
1075                        '\u2160\u2161\u2162', 'capitalize')
1076        self.checkequal('\u2160\u2171\u2172',
1077                        '\u2170\u2171\u2172', 'capitalize')
1078        # check with Ll chars with no upper - nothing changes here
1079        self.checkequal('\u019b\u1d00\u1d86\u0221\u1fb7',
1080                        '\u019b\u1d00\u1d86\u0221\u1fb7', 'capitalize')
1081
1082
1083class MixinStrUnicodeUserStringTest:
1084    # additional tests that only work for
1085    # stringlike objects, i.e. str, UserString
1086
1087    def test_startswith(self):
1088        self.checkequal(True, 'hello', 'startswith', 'he')
1089        self.checkequal(True, 'hello', 'startswith', 'hello')
1090        self.checkequal(False, 'hello', 'startswith', 'hello world')
1091        self.checkequal(True, 'hello', 'startswith', '')
1092        self.checkequal(False, 'hello', 'startswith', 'ello')
1093        self.checkequal(True, 'hello', 'startswith', 'ello', 1)
1094        self.checkequal(True, 'hello', 'startswith', 'o', 4)
1095        self.checkequal(False, 'hello', 'startswith', 'o', 5)
1096        self.checkequal(True, 'hello', 'startswith', '', 5)
1097        self.checkequal(False, 'hello', 'startswith', 'lo', 6)
1098        self.checkequal(True, 'helloworld', 'startswith', 'lowo', 3)
1099        self.checkequal(True, 'helloworld', 'startswith', 'lowo', 3, 7)
1100        self.checkequal(False, 'helloworld', 'startswith', 'lowo', 3, 6)
1101        self.checkequal(True, '', 'startswith', '', 0, 1)
1102        self.checkequal(True, '', 'startswith', '', 0, 0)
1103        self.checkequal(False, '', 'startswith', '', 1, 0)
1104
1105        # test negative indices
1106        self.checkequal(True, 'hello', 'startswith', 'he', 0, -1)
1107        self.checkequal(True, 'hello', 'startswith', 'he', -53, -1)
1108        self.checkequal(False, 'hello', 'startswith', 'hello', 0, -1)
1109        self.checkequal(False, 'hello', 'startswith', 'hello world', -1, -10)
1110        self.checkequal(False, 'hello', 'startswith', 'ello', -5)
1111        self.checkequal(True, 'hello', 'startswith', 'ello', -4)
1112        self.checkequal(False, 'hello', 'startswith', 'o', -2)
1113        self.checkequal(True, 'hello', 'startswith', 'o', -1)
1114        self.checkequal(True, 'hello', 'startswith', '', -3, -3)
1115        self.checkequal(False, 'hello', 'startswith', 'lo', -9)
1116
1117        self.checkraises(TypeError, 'hello', 'startswith')
1118        self.checkraises(TypeError, 'hello', 'startswith', 42)
1119
1120        # test tuple arguments
1121        self.checkequal(True, 'hello', 'startswith', ('he', 'ha'))
1122        self.checkequal(False, 'hello', 'startswith', ('lo', 'llo'))
1123        self.checkequal(True, 'hello', 'startswith', ('hellox', 'hello'))
1124        self.checkequal(False, 'hello', 'startswith', ())
1125        self.checkequal(True, 'helloworld', 'startswith', ('hellowo',
1126                                                           'rld', 'lowo'), 3)
1127        self.checkequal(False, 'helloworld', 'startswith', ('hellowo', 'ello',
1128                                                            'rld'), 3)
1129        self.checkequal(True, 'hello', 'startswith', ('lo', 'he'), 0, -1)
1130        self.checkequal(False, 'hello', 'startswith', ('he', 'hel'), 0, 1)
1131        self.checkequal(True, 'hello', 'startswith', ('he', 'hel'), 0, 2)
1132
1133        self.checkraises(TypeError, 'hello', 'startswith', (42,))
1134
1135    def test_endswith(self):
1136        self.checkequal(True, 'hello', 'endswith', 'lo')
1137        self.checkequal(False, 'hello', 'endswith', 'he')
1138        self.checkequal(True, 'hello', 'endswith', '')
1139        self.checkequal(False, 'hello', 'endswith', 'hello world')
1140        self.checkequal(False, 'helloworld', 'endswith', 'worl')
1141        self.checkequal(True, 'helloworld', 'endswith', 'worl', 3, 9)
1142        self.checkequal(True, 'helloworld', 'endswith', 'world', 3, 12)
1143        self.checkequal(True, 'helloworld', 'endswith', 'lowo', 1, 7)
1144        self.checkequal(True, 'helloworld', 'endswith', 'lowo', 2, 7)
1145        self.checkequal(True, 'helloworld', 'endswith', 'lowo', 3, 7)
1146        self.checkequal(False, 'helloworld', 'endswith', 'lowo', 4, 7)
1147        self.checkequal(False, 'helloworld', 'endswith', 'lowo', 3, 8)
1148        self.checkequal(False, 'ab', 'endswith', 'ab', 0, 1)
1149        self.checkequal(False, 'ab', 'endswith', 'ab', 0, 0)
1150        self.checkequal(True, '', 'endswith', '', 0, 1)
1151        self.checkequal(True, '', 'endswith', '', 0, 0)
1152        self.checkequal(False, '', 'endswith', '', 1, 0)
1153
1154        # test negative indices
1155        self.checkequal(True, 'hello', 'endswith', 'lo', -2)
1156        self.checkequal(False, 'hello', 'endswith', 'he', -2)
1157        self.checkequal(True, 'hello', 'endswith', '', -3, -3)
1158        self.checkequal(False, 'hello', 'endswith', 'hello world', -10, -2)
1159        self.checkequal(False, 'helloworld', 'endswith', 'worl', -6)
1160        self.checkequal(True, 'helloworld', 'endswith', 'worl', -5, -1)
1161        self.checkequal(True, 'helloworld', 'endswith', 'worl', -5, 9)
1162        self.checkequal(True, 'helloworld', 'endswith', 'world', -7, 12)
1163        self.checkequal(True, 'helloworld', 'endswith', 'lowo', -99, -3)
1164        self.checkequal(True, 'helloworld', 'endswith', 'lowo', -8, -3)
1165        self.checkequal(True, 'helloworld', 'endswith', 'lowo', -7, -3)
1166        self.checkequal(False, 'helloworld', 'endswith', 'lowo', 3, -4)
1167        self.checkequal(False, 'helloworld', 'endswith', 'lowo', -8, -2)
1168
1169        self.checkraises(TypeError, 'hello', 'endswith')
1170        self.checkraises(TypeError, 'hello', 'endswith', 42)
1171
1172        # test tuple arguments
1173        self.checkequal(False, 'hello', 'endswith', ('he', 'ha'))
1174        self.checkequal(True, 'hello', 'endswith', ('lo', 'llo'))
1175        self.checkequal(True, 'hello', 'endswith', ('hellox', 'hello'))
1176        self.checkequal(False, 'hello', 'endswith', ())
1177        self.checkequal(True, 'helloworld', 'endswith', ('hellowo',
1178                                                           'rld', 'lowo'), 3)
1179        self.checkequal(False, 'helloworld', 'endswith', ('hellowo', 'ello',
1180                                                            'rld'), 3, -1)
1181        self.checkequal(True, 'hello', 'endswith', ('hell', 'ell'), 0, -1)
1182        self.checkequal(False, 'hello', 'endswith', ('he', 'hel'), 0, 1)
1183        self.checkequal(True, 'hello', 'endswith', ('he', 'hell'), 0, 4)
1184
1185        self.checkraises(TypeError, 'hello', 'endswith', (42,))
1186
1187    def test___contains__(self):
1188        self.checkequal(True, '', '__contains__', '')
1189        self.checkequal(True, 'abc', '__contains__', '')
1190        self.checkequal(False, 'abc', '__contains__', '\0')
1191        self.checkequal(True, '\0abc', '__contains__', '\0')
1192        self.checkequal(True, 'abc\0', '__contains__', '\0')
1193        self.checkequal(True, '\0abc', '__contains__', 'a')
1194        self.checkequal(True, 'asdf', '__contains__', 'asdf')
1195        self.checkequal(False, 'asd', '__contains__', 'asdf')
1196        self.checkequal(False, '', '__contains__', 'asdf')
1197
1198    def test_subscript(self):
1199        self.checkequal('a', 'abc', '__getitem__', 0)
1200        self.checkequal('c', 'abc', '__getitem__', -1)
1201        self.checkequal('a', 'abc', '__getitem__', 0)
1202        self.checkequal('abc', 'abc', '__getitem__', slice(0, 3))
1203        self.checkequal('abc', 'abc', '__getitem__', slice(0, 1000))
1204        self.checkequal('a', 'abc', '__getitem__', slice(0, 1))
1205        self.checkequal('', 'abc', '__getitem__', slice(0, 0))
1206
1207        self.checkraises(TypeError, 'abc', '__getitem__', 'def')
1208
1209        for idx_type in ('def', object()):
1210            expected_msg = "string indices must be integers, not '{}'".format(type(idx_type).__name__)
1211            self.checkraises(TypeError, 'abc', '__getitem__', idx_type, expected_msg=expected_msg)
1212
1213    def test_slice(self):
1214        self.checkequal('abc', 'abc', '__getitem__', slice(0, 1000))
1215        self.checkequal('abc', 'abc', '__getitem__', slice(0, 3))
1216        self.checkequal('ab', 'abc', '__getitem__', slice(0, 2))
1217        self.checkequal('bc', 'abc', '__getitem__', slice(1, 3))
1218        self.checkequal('b', 'abc', '__getitem__', slice(1, 2))
1219        self.checkequal('', 'abc', '__getitem__', slice(2, 2))
1220        self.checkequal('', 'abc', '__getitem__', slice(1000, 1000))
1221        self.checkequal('', 'abc', '__getitem__', slice(2000, 1000))
1222        self.checkequal('', 'abc', '__getitem__', slice(2, 1))
1223
1224        self.checkraises(TypeError, 'abc', '__getitem__', 'def')
1225
1226    def test_extended_getslice(self):
1227        # Test extended slicing by comparing with list slicing.
1228        s = string.ascii_letters + string.digits
1229        indices = (0, None, 1, 3, 41, sys.maxsize, -1, -2, -37)
1230        for start in indices:
1231            for stop in indices:
1232                # Skip step 0 (invalid)
1233                for step in indices[1:]:
1234                    L = list(s)[start:stop:step]
1235                    self.checkequal("".join(L), s, '__getitem__',
1236                                    slice(start, stop, step))
1237
1238    def test_mul(self):
1239        self.checkequal('', 'abc', '__mul__', -1)
1240        self.checkequal('', 'abc', '__mul__', 0)
1241        self.checkequal('abc', 'abc', '__mul__', 1)
1242        self.checkequal('abcabcabc', 'abc', '__mul__', 3)
1243        self.checkraises(TypeError, 'abc', '__mul__')
1244        self.checkraises(TypeError, 'abc', '__mul__', '')
1245        # XXX: on a 64-bit system, this doesn't raise an overflow error,
1246        # but either raises a MemoryError, or succeeds (if you have 54TiB)
1247        #self.checkraises(OverflowError, 10000*'abc', '__mul__', 2000000000)
1248
1249    def test_join(self):
1250        # join now works with any sequence type
1251        # moved here, because the argument order is
1252        # different in string.join
1253        self.checkequal('a b c d', ' ', 'join', ['a', 'b', 'c', 'd'])
1254        self.checkequal('abcd', '', 'join', ('a', 'b', 'c', 'd'))
1255        self.checkequal('bd', '', 'join', ('', 'b', '', 'd'))
1256        self.checkequal('ac', '', 'join', ('a', '', 'c', ''))
1257        self.checkequal('w x y z', ' ', 'join', Sequence())
1258        self.checkequal('abc', 'a', 'join', ('abc',))
1259        self.checkequal('z', 'a', 'join', UserList(['z']))
1260        self.checkequal('a.b.c', '.', 'join', ['a', 'b', 'c'])
1261        self.assertRaises(TypeError, '.'.join, ['a', 'b', 3])
1262        for i in [5, 25, 125]:
1263            self.checkequal(((('a' * i) + '-') * i)[:-1], '-', 'join',
1264                 ['a' * i] * i)
1265            self.checkequal(((('a' * i) + '-') * i)[:-1], '-', 'join',
1266                 ('a' * i,) * i)
1267
1268        #self.checkequal(str(BadSeq1()), ' ', 'join', BadSeq1())
1269        self.checkequal('a b c', ' ', 'join', BadSeq2())
1270
1271        self.checkraises(TypeError, ' ', 'join')
1272        self.checkraises(TypeError, ' ', 'join', None)
1273        self.checkraises(TypeError, ' ', 'join', 7)
1274        self.checkraises(TypeError, ' ', 'join', [1, 2, bytes()])
1275        try:
1276            def f():
1277                yield 4 + ""
1278            self.fixtype(' ').join(f())
1279        except TypeError as e:
1280            if '+' not in str(e):
1281                self.fail('join() ate exception message')
1282        else:
1283            self.fail('exception not raised')
1284
1285    def test_formatting(self):
1286        self.checkequal('+hello+', '+%s+', '__mod__', 'hello')
1287        self.checkequal('+10+', '+%d+', '__mod__', 10)
1288        self.checkequal('a', "%c", '__mod__', "a")
1289        self.checkequal('a', "%c", '__mod__', "a")
1290        self.checkequal('"', "%c", '__mod__', 34)
1291        self.checkequal('$', "%c", '__mod__', 36)
1292        self.checkequal('10', "%d", '__mod__', 10)
1293        self.checkequal('\x7f', "%c", '__mod__', 0x7f)
1294
1295        for ordinal in (-100, 0x200000):
1296            # unicode raises ValueError, str raises OverflowError
1297            self.checkraises((ValueError, OverflowError), '%c', '__mod__', ordinal)
1298
1299        longvalue = sys.maxsize + 10
1300        slongvalue = str(longvalue)
1301        self.checkequal(' 42', '%3ld', '__mod__', 42)
1302        self.checkequal('42', '%d', '__mod__', 42.0)
1303        self.checkequal(slongvalue, '%d', '__mod__', longvalue)
1304        self.checkcall('%d', '__mod__', float(longvalue))
1305        self.checkequal('0042.00', '%07.2f', '__mod__', 42)
1306        self.checkequal('0042.00', '%07.2F', '__mod__', 42)
1307
1308        self.checkraises(TypeError, 'abc', '__mod__')
1309        self.checkraises(TypeError, '%(foo)s', '__mod__', 42)
1310        self.checkraises(TypeError, '%s%s', '__mod__', (42,))
1311        self.checkraises(TypeError, '%c', '__mod__', (None,))
1312        self.checkraises(ValueError, '%(foo', '__mod__', {})
1313        self.checkraises(TypeError, '%(foo)s %(bar)s', '__mod__', ('foo', 42))
1314        self.checkraises(TypeError, '%d', '__mod__', "42") # not numeric
1315        self.checkraises(TypeError, '%d', '__mod__', (42+0j)) # no int conversion provided
1316
1317        # argument names with properly nested brackets are supported
1318        self.checkequal('bar', '%((foo))s', '__mod__', {'(foo)': 'bar'})
1319
1320        # 100 is a magic number in PyUnicode_Format, this forces a resize
1321        self.checkequal(103*'a'+'x', '%sx', '__mod__', 103*'a')
1322
1323        self.checkraises(TypeError, '%*s', '__mod__', ('foo', 'bar'))
1324        self.checkraises(TypeError, '%10.*f', '__mod__', ('foo', 42.))
1325        self.checkraises(ValueError, '%10', '__mod__', (42,))
1326
1327        # Outrageously large width or precision should raise ValueError.
1328        self.checkraises(ValueError, '%%%df' % (2**64), '__mod__', (3.2))
1329        self.checkraises(ValueError, '%%.%df' % (2**64), '__mod__', (3.2))
1330        self.checkraises(OverflowError, '%*s', '__mod__',
1331                         (sys.maxsize + 1, ''))
1332        self.checkraises(OverflowError, '%.*f', '__mod__',
1333                         (sys.maxsize + 1, 1. / 7))
1334
1335        class X(object): pass
1336        self.checkraises(TypeError, 'abc', '__mod__', X())
1337
1338    @support.cpython_only
1339    def test_formatting_c_limits(self):
1340        _testcapi = import_helper.import_module('_testcapi')
1341        SIZE_MAX = (1 << (_testcapi.PY_SSIZE_T_MAX.bit_length() + 1)) - 1
1342        self.checkraises(OverflowError, '%*s', '__mod__',
1343                         (_testcapi.PY_SSIZE_T_MAX + 1, ''))
1344        self.checkraises(OverflowError, '%.*f', '__mod__',
1345                         (_testcapi.INT_MAX + 1, 1. / 7))
1346        # Issue 15989
1347        self.checkraises(OverflowError, '%*s', '__mod__',
1348                         (SIZE_MAX + 1, ''))
1349        self.checkraises(OverflowError, '%.*f', '__mod__',
1350                         (_testcapi.UINT_MAX + 1, 1. / 7))
1351
1352    def test_floatformatting(self):
1353        # float formatting
1354        for prec in range(100):
1355            format = '%%.%if' % prec
1356            value = 0.01
1357            for x in range(60):
1358                value = value * 3.14159265359 / 3.0 * 10.0
1359                self.checkcall(format, "__mod__", value)
1360
1361    def test_inplace_rewrites(self):
1362        # Check that strings don't copy and modify cached single-character strings
1363        self.checkequal('a', 'A', 'lower')
1364        self.checkequal(True, 'A', 'isupper')
1365        self.checkequal('A', 'a', 'upper')
1366        self.checkequal(True, 'a', 'islower')
1367
1368        self.checkequal('a', 'A', 'replace', 'A', 'a')
1369        self.checkequal(True, 'A', 'isupper')
1370
1371        self.checkequal('A', 'a', 'capitalize')
1372        self.checkequal(True, 'a', 'islower')
1373
1374        self.checkequal('A', 'a', 'swapcase')
1375        self.checkequal(True, 'a', 'islower')
1376
1377        self.checkequal('A', 'a', 'title')
1378        self.checkequal(True, 'a', 'islower')
1379
1380    def test_partition(self):
1381
1382        self.checkequal(('this is the par', 'ti', 'tion method'),
1383            'this is the partition method', 'partition', 'ti')
1384
1385        # from raymond's original specification
1386        S = 'http://www.python.org'
1387        self.checkequal(('http', '://', 'www.python.org'), S, 'partition', '://')
1388        self.checkequal(('http://www.python.org', '', ''), S, 'partition', '?')
1389        self.checkequal(('', 'http://', 'www.python.org'), S, 'partition', 'http://')
1390        self.checkequal(('http://www.python.', 'org', ''), S, 'partition', 'org')
1391
1392        self.checkraises(ValueError, S, 'partition', '')
1393        self.checkraises(TypeError, S, 'partition', None)
1394
1395    def test_rpartition(self):
1396
1397        self.checkequal(('this is the rparti', 'ti', 'on method'),
1398            'this is the rpartition method', 'rpartition', 'ti')
1399
1400        # from raymond's original specification
1401        S = 'http://www.python.org'
1402        self.checkequal(('http', '://', 'www.python.org'), S, 'rpartition', '://')
1403        self.checkequal(('', '', 'http://www.python.org'), S, 'rpartition', '?')
1404        self.checkequal(('', 'http://', 'www.python.org'), S, 'rpartition', 'http://')
1405        self.checkequal(('http://www.python.', 'org', ''), S, 'rpartition', 'org')
1406
1407        self.checkraises(ValueError, S, 'rpartition', '')
1408        self.checkraises(TypeError, S, 'rpartition', None)
1409
1410    def test_none_arguments(self):
1411        # issue 11828
1412        s = 'hello'
1413        self.checkequal(2, s, 'find', 'l', None)
1414        self.checkequal(3, s, 'find', 'l', -2, None)
1415        self.checkequal(2, s, 'find', 'l', None, -2)
1416        self.checkequal(0, s, 'find', 'h', None, None)
1417
1418        self.checkequal(3, s, 'rfind', 'l', None)
1419        self.checkequal(3, s, 'rfind', 'l', -2, None)
1420        self.checkequal(2, s, 'rfind', 'l', None, -2)
1421        self.checkequal(0, s, 'rfind', 'h', None, None)
1422
1423        self.checkequal(2, s, 'index', 'l', None)
1424        self.checkequal(3, s, 'index', 'l', -2, None)
1425        self.checkequal(2, s, 'index', 'l', None, -2)
1426        self.checkequal(0, s, 'index', 'h', None, None)
1427
1428        self.checkequal(3, s, 'rindex', 'l', None)
1429        self.checkequal(3, s, 'rindex', 'l', -2, None)
1430        self.checkequal(2, s, 'rindex', 'l', None, -2)
1431        self.checkequal(0, s, 'rindex', 'h', None, None)
1432
1433        self.checkequal(2, s, 'count', 'l', None)
1434        self.checkequal(1, s, 'count', 'l', -2, None)
1435        self.checkequal(1, s, 'count', 'l', None, -2)
1436        self.checkequal(0, s, 'count', 'x', None, None)
1437
1438        self.checkequal(True, s, 'endswith', 'o', None)
1439        self.checkequal(True, s, 'endswith', 'lo', -2, None)
1440        self.checkequal(True, s, 'endswith', 'l', None, -2)
1441        self.checkequal(False, s, 'endswith', 'x', None, None)
1442
1443        self.checkequal(True, s, 'startswith', 'h', None)
1444        self.checkequal(True, s, 'startswith', 'l', -2, None)
1445        self.checkequal(True, s, 'startswith', 'h', None, -2)
1446        self.checkequal(False, s, 'startswith', 'x', None, None)
1447
1448    def test_find_etc_raise_correct_error_messages(self):
1449        # issue 11828
1450        s = 'hello'
1451        x = 'x'
1452        self.assertRaisesRegex(TypeError, r'^find\(', s.find,
1453                                x, None, None, None)
1454        self.assertRaisesRegex(TypeError, r'^rfind\(', s.rfind,
1455                                x, None, None, None)
1456        self.assertRaisesRegex(TypeError, r'^index\(', s.index,
1457                                x, None, None, None)
1458        self.assertRaisesRegex(TypeError, r'^rindex\(', s.rindex,
1459                                x, None, None, None)
1460        self.assertRaisesRegex(TypeError, r'^count\(', s.count,
1461                                x, None, None, None)
1462        self.assertRaisesRegex(TypeError, r'^startswith\(', s.startswith,
1463                                x, None, None, None)
1464        self.assertRaisesRegex(TypeError, r'^endswith\(', s.endswith,
1465                                x, None, None, None)
1466
1467        # issue #15534
1468        self.checkequal(10, "...\u043c......<", "find", "<")
1469
1470
1471class MixinStrUnicodeTest:
1472    # Additional tests that only work with str.
1473
1474    def test_bug1001011(self):
1475        # Make sure join returns a NEW object for single item sequences
1476        # involving a subclass.
1477        # Make sure that it is of the appropriate type.
1478        # Check the optimisation still occurs for standard objects.
1479        t = self.type2test
1480        class subclass(t):
1481            pass
1482        s1 = subclass("abcd")
1483        s2 = t().join([s1])
1484        self.assertIsNot(s1, s2)
1485        self.assertIs(type(s2), t)
1486
1487        s1 = t("abcd")
1488        s2 = t().join([s1])
1489        self.assertIs(s1, s2)
1490