xref: /aosp_15_r20/external/starlark-go/starlark/testdata/bytes.star (revision 4947cdc739c985f6d86941e22894f5cefe7c9e9a)
1*4947cdc7SCole Faust# Tests of 'bytes' (immutable byte strings).
2*4947cdc7SCole Faust
3*4947cdc7SCole Faustload("assert.star", "assert")
4*4947cdc7SCole Faust
5*4947cdc7SCole Faust# bytes(string) -- UTF-k to UTF-8 transcoding with U+FFFD replacement
6*4947cdc7SCole Fausthello = bytes("hello, 世界")
7*4947cdc7SCole Faustgoodbye = bytes("goodbye")
8*4947cdc7SCole Faustempty = bytes("")
9*4947cdc7SCole Faustnonprinting = bytes("\t\n\x7F\u200D")  # TAB, NEWLINE, DEL, ZERO_WIDTH_JOINER
10*4947cdc7SCole Faustassert.eq(bytes("hello, 世界"[:-1]), b"hello, 世��")
11*4947cdc7SCole Faust
12*4947cdc7SCole Faust# bytes(iterable of int) -- construct from numeric byte values
13*4947cdc7SCole Faustassert.eq(bytes([65, 66, 67]), b"ABC")
14*4947cdc7SCole Faustassert.eq(bytes((65, 66, 67)), b"ABC")
15*4947cdc7SCole Faustassert.eq(bytes([0xf0, 0x9f, 0x98, 0xbf]), b"��")
16*4947cdc7SCole Faustassert.fails(lambda: bytes([300]),
17*4947cdc7SCole Faust             "at index 0, 300 out of range .want value in unsigned 8-bit range")
18*4947cdc7SCole Faustassert.fails(lambda: bytes([b"a"]),
19*4947cdc7SCole Faust             "at index 0, got bytes, want int")
20*4947cdc7SCole Faustassert.fails(lambda: bytes(1), "want string, bytes, or iterable of ints")
21*4947cdc7SCole Faust
22*4947cdc7SCole Faust# literals
23*4947cdc7SCole Faustassert.eq(b"hello, 世界", hello)
24*4947cdc7SCole Faustassert.eq(b"goodbye", goodbye)
25*4947cdc7SCole Faustassert.eq(b"", empty)
26*4947cdc7SCole Faustassert.eq(b"\t\n\x7F\u200D", nonprinting)
27*4947cdc7SCole Faustassert.ne("abc", b"abc")
28*4947cdc7SCole Faustassert.eq(b"\012\xff\u0400\U0001F63F", b"\n\xffЀ��") # see scanner tests for more
29*4947cdc7SCole Faustassert.eq(rb"\r\n\t", b"\\r\\n\\t") # raw
30*4947cdc7SCole Faust
31*4947cdc7SCole Faust# type
32*4947cdc7SCole Faustassert.eq(type(hello), "bytes")
33*4947cdc7SCole Faust
34*4947cdc7SCole Faust# len
35*4947cdc7SCole Faustassert.eq(len(hello), 13)
36*4947cdc7SCole Faustassert.eq(len(goodbye), 7)
37*4947cdc7SCole Faustassert.eq(len(empty), 0)
38*4947cdc7SCole Faustassert.eq(len(b"A"), 1)
39*4947cdc7SCole Faustassert.eq(len(b"Ѐ"), 2)
40*4947cdc7SCole Faustassert.eq(len(b"世"), 3)
41*4947cdc7SCole Faustassert.eq(len(b"��"), 4)
42*4947cdc7SCole Faust
43*4947cdc7SCole Faust# truth
44*4947cdc7SCole Faustassert.true(hello)
45*4947cdc7SCole Faustassert.true(goodbye)
46*4947cdc7SCole Faustassert.true(not empty)
47*4947cdc7SCole Faust
48*4947cdc7SCole Faust# str(bytes) does UTF-8 to UTF-k transcoding.
49*4947cdc7SCole Faust# TODO(adonovan): specify.
50*4947cdc7SCole Faustassert.eq(str(hello), "hello, 世界")
51*4947cdc7SCole Faustassert.eq(str(hello[:-1]), "hello, 世��")  # incomplete UTF-8 encoding => U+FFFD
52*4947cdc7SCole Faustassert.eq(str(goodbye), "goodbye")
53*4947cdc7SCole Faustassert.eq(str(empty), "")
54*4947cdc7SCole Faustassert.eq(str(nonprinting), "\t\n\x7f\u200d")
55*4947cdc7SCole Faustassert.eq(str(b"\xED\xB0\x80"), "���") # UTF-8 encoding of unpaired surrogate => U+FFFD x 3
56*4947cdc7SCole Faust
57*4947cdc7SCole Faust# repr
58*4947cdc7SCole Faustassert.eq(repr(hello), r'b"hello, 世界"')
59*4947cdc7SCole Faustassert.eq(repr(hello[:-1]), r'b"hello, 世\xe7\x95"')  # (incomplete UTF-8 encoding )
60*4947cdc7SCole Faustassert.eq(repr(goodbye), 'b"goodbye"')
61*4947cdc7SCole Faustassert.eq(repr(empty), 'b""')
62*4947cdc7SCole Faustassert.eq(repr(nonprinting), 'b"\\t\\n\\x7f\\u200d"')
63*4947cdc7SCole Faust
64*4947cdc7SCole Faust# equality
65*4947cdc7SCole Faustassert.eq(hello, hello)
66*4947cdc7SCole Faustassert.ne(hello, goodbye)
67*4947cdc7SCole Faustassert.eq(b"goodbye", goodbye)
68*4947cdc7SCole Faust
69*4947cdc7SCole Faust# ordered comparison
70*4947cdc7SCole Faustassert.lt(b"abc", b"abd")
71*4947cdc7SCole Faustassert.lt(b"abc", b"abcd")
72*4947cdc7SCole Faustassert.lt(b"\x7f", b"\x80") # bytes compare as uint8, not int8
73*4947cdc7SCole Faust
74*4947cdc7SCole Faust# bytes are dict-hashable
75*4947cdc7SCole Faustdict = {hello: 1, goodbye: 2}
76*4947cdc7SCole Faustdict[b"goodbye"] = 3
77*4947cdc7SCole Faustassert.eq(len(dict), 2)
78*4947cdc7SCole Faustassert.eq(dict[goodbye], 3)
79*4947cdc7SCole Faust
80*4947cdc7SCole Faust# hash(bytes) is 32-bit FNV-1a.
81*4947cdc7SCole Faustassert.eq(hash(b""), 0x811c9dc5)
82*4947cdc7SCole Faustassert.eq(hash(b"a"), 0xe40c292c)
83*4947cdc7SCole Faustassert.eq(hash(b"ab"), 0x4d2505ca)
84*4947cdc7SCole Faustassert.eq(hash(b"abc"), 0x1a47e90b)
85*4947cdc7SCole Faust
86*4947cdc7SCole Faust# indexing
87*4947cdc7SCole Faustassert.eq(goodbye[0], b"g")
88*4947cdc7SCole Faustassert.eq(goodbye[-1], b"e")
89*4947cdc7SCole Faustassert.fails(lambda: goodbye[100], "out of range")
90*4947cdc7SCole Faust
91*4947cdc7SCole Faust# slicing
92*4947cdc7SCole Faustassert.eq(goodbye[:4], b"good")
93*4947cdc7SCole Faustassert.eq(goodbye[4:], b"bye")
94*4947cdc7SCole Faustassert.eq(goodbye[::2], b"gobe")
95*4947cdc7SCole Faustassert.eq(goodbye[3:4], b"d")  # special case: len=1
96*4947cdc7SCole Faustassert.eq(goodbye[4:4], b"")  # special case: len=0
97*4947cdc7SCole Faust
98*4947cdc7SCole Faust# bytes in bytes
99*4947cdc7SCole Faustassert.eq(b"bc" in b"abcd", True)
100*4947cdc7SCole Faustassert.eq(b"bc" in b"dcab", False)
101*4947cdc7SCole Faustassert.fails(lambda: "bc" in b"dcab", "requires bytes or int as left operand, not string")
102*4947cdc7SCole Faust
103*4947cdc7SCole Faust# int in bytes
104*4947cdc7SCole Faustassert.eq(97 in b"abc", True)  # 97='a'
105*4947cdc7SCole Faustassert.eq(100 in b"abc", False) # 100='d'
106*4947cdc7SCole Faustassert.fails(lambda: 256 in b"abc", "int in bytes: 256 out of range")
107*4947cdc7SCole Faustassert.fails(lambda: -1 in b"abc", "int in bytes: -1 out of range")
108*4947cdc7SCole Faust
109*4947cdc7SCole Faust# ord   TODO(adonovan): specify
110*4947cdc7SCole Faustassert.eq(ord(b"a"), 97)
111*4947cdc7SCole Faustassert.fails(lambda: ord(b"ab"), "ord: bytes has length 2, want 1")
112*4947cdc7SCole Faustassert.fails(lambda: ord(b""), "ord: bytes has length 0, want 1")
113*4947cdc7SCole Faust
114*4947cdc7SCole Faust# repeat (bytes * int)
115*4947cdc7SCole Faustassert.eq(goodbye * 3, b"goodbyegoodbyegoodbye")
116*4947cdc7SCole Faustassert.eq(3 * goodbye, b"goodbyegoodbyegoodbye")
117*4947cdc7SCole Faust
118*4947cdc7SCole Faust# elems() returns an iterable value over 1-byte substrings.
119*4947cdc7SCole Faustassert.eq(type(hello.elems()), "bytes.elems")
120*4947cdc7SCole Faustassert.eq(str(hello.elems()), "b\"hello, 世界\".elems()")
121*4947cdc7SCole Faustassert.eq(list(hello.elems()), [104, 101, 108, 108, 111, 44, 32, 228, 184, 150, 231, 149, 140])
122*4947cdc7SCole Faustassert.eq(bytes([104, 101, 108, 108, 111, 44, 32, 228, 184, 150, 231, 149, 140]), hello)
123*4947cdc7SCole Faustassert.eq(list(goodbye.elems()), [103, 111, 111, 100, 98, 121, 101])
124*4947cdc7SCole Faustassert.eq(list(empty.elems()), [])
125*4947cdc7SCole Faustassert.eq(bytes(hello.elems()), hello) # bytes(iterable) is dual to bytes.elems()
126*4947cdc7SCole Faust
127*4947cdc7SCole Faust# x[i] = ...
128*4947cdc7SCole Faustdef f():
129*4947cdc7SCole Faust    b"abc"[1] = b"B"
130*4947cdc7SCole Faust
131*4947cdc7SCole Faustassert.fails(f, "bytes.*does not support.*assignment")
132*4947cdc7SCole Faust
133*4947cdc7SCole Faust# TODO(adonovan): the specification is not finalized in many areas:
134*4947cdc7SCole Faust# - chr, ord functions
135*4947cdc7SCole Faust# - encoding/decoding bytes to string.
136*4947cdc7SCole Faust# - methods: find, index, split, etc.
137*4947cdc7SCole Faust#
138*4947cdc7SCole Faust# Summary of string operations (put this in spec).
139*4947cdc7SCole Faust#
140*4947cdc7SCole Faust# string to number:
141*4947cdc7SCole Faust# - bytes[i]  returns numeric value of ith byte.
142*4947cdc7SCole Faust# - ord(string)  returns numeric value of sole code point in string.
143*4947cdc7SCole Faust# - ord(string[i])  is not a useful operation: fails on non-ASCII; see below.
144*4947cdc7SCole Faust#   Q. Perhaps ord should return the first (not sole) code point? Then it becomes a UTF-8 decoder.
145*4947cdc7SCole Faust#      Perhaps ord(string, index=int) should apply the index and relax the len=1 check.
146*4947cdc7SCole Faust# - string.codepoint()  iterates over 1-codepoint substrings.
147*4947cdc7SCole Faust# - string.codepoint_ords()  iterates over numeric values of code points in string.
148*4947cdc7SCole Faust# - string.elems()  iterates over 1-element (UTF-k code) substrings.
149*4947cdc7SCole Faust# - string.elem_ords()  iterates over numeric UTF-k code values.
150*4947cdc7SCole Faust# - string.elem_ords()[i]  returns numeric value of ith element (UTF-k code).
151*4947cdc7SCole Faust# - string.elems()[i]  returns substring of a single element (UTF-k code).
152*4947cdc7SCole Faust# - int(string)  parses string as decimal (or other) numeric literal.
153*4947cdc7SCole Faust#
154*4947cdc7SCole Faust# number to string:
155*4947cdc7SCole Faust# - chr(int) returns string, UTF-k encoding of Unicode code point (like Python).
156*4947cdc7SCole Faust#   Redundant with '%c' % int (which Python2 calls 'unichr'.)
157*4947cdc7SCole Faust# - bytes(chr(int)) returns byte string containing UTF-8 encoding of one code point.
158*4947cdc7SCole Faust# - bytes([int]) returns 1-byte string (with regrettable list allocation).
159*4947cdc7SCole Faust# - str(int) - format number as decimal.
160