1// asmcheck
2
3// Copyright 2018 The Go Authors. All rights reserved.
4// Use of this source code is governed by a BSD-style
5// license that can be found in the LICENSE file.
6
7package codegen
8
9import "math/bits"
10
11// ----------------------- //
12//    bits.LeadingZeros    //
13// ----------------------- //
14
15func LeadingZeros(n uint) int {
16	// amd64/v1,amd64/v2:"BSRQ"
17	// amd64/v3:"LZCNTQ", -"BSRQ"
18	// s390x:"FLOGR"
19	// arm:"CLZ" arm64:"CLZ"
20	// mips:"CLZ"
21	// wasm:"I64Clz"
22	// ppc64x:"CNTLZD"
23	return bits.LeadingZeros(n)
24}
25
26func LeadingZeros64(n uint64) int {
27	// amd64/v1,amd64/v2:"BSRQ"
28	// amd64/v3:"LZCNTQ", -"BSRQ"
29	// s390x:"FLOGR"
30	// arm:"CLZ" arm64:"CLZ"
31	// mips:"CLZ"
32	// wasm:"I64Clz"
33	// ppc64x:"CNTLZD"
34	return bits.LeadingZeros64(n)
35}
36
37func LeadingZeros32(n uint32) int {
38	// amd64/v1,amd64/v2:"BSRQ","LEAQ",-"CMOVQEQ"
39	// amd64/v3: "LZCNTL",- "BSRL"
40	// s390x:"FLOGR"
41	// arm:"CLZ" arm64:"CLZW"
42	// mips:"CLZ"
43	// wasm:"I64Clz"
44	// ppc64x:"CNTLZW"
45	return bits.LeadingZeros32(n)
46}
47
48func LeadingZeros16(n uint16) int {
49	// amd64/v1,amd64/v2:"BSRL","LEAL",-"CMOVQEQ"
50	// amd64/v3: "LZCNTL",- "BSRL"
51	// s390x:"FLOGR"
52	// arm:"CLZ" arm64:"CLZ"
53	// mips:"CLZ"
54	// wasm:"I64Clz"
55	// ppc64x:"CNTLZD"
56	return bits.LeadingZeros16(n)
57}
58
59func LeadingZeros8(n uint8) int {
60	// amd64/v1,amd64/v2:"BSRL","LEAL",-"CMOVQEQ"
61	// amd64/v3: "LZCNTL",- "BSRL"
62	// s390x:"FLOGR"
63	// arm:"CLZ" arm64:"CLZ"
64	// mips:"CLZ"
65	// wasm:"I64Clz"
66	// ppc64x:"CNTLZD"
67	return bits.LeadingZeros8(n)
68}
69
70// --------------- //
71//    bits.Len*    //
72// --------------- //
73
74func Len(n uint) int {
75	// amd64/v1,amd64/v2:"BSRQ"
76	// amd64/v3: "LZCNTQ"
77	// s390x:"FLOGR"
78	// arm:"CLZ" arm64:"CLZ"
79	// mips:"CLZ"
80	// wasm:"I64Clz"
81	// ppc64x:"SUBC","CNTLZD"
82	return bits.Len(n)
83}
84
85func Len64(n uint64) int {
86	// amd64/v1,amd64/v2:"BSRQ"
87	// amd64/v3: "LZCNTQ"
88	// s390x:"FLOGR"
89	// arm:"CLZ" arm64:"CLZ"
90	// mips:"CLZ"
91	// wasm:"I64Clz"
92	// ppc64x:"SUBC","CNTLZD"
93	return bits.Len64(n)
94}
95
96func SubFromLen64(n uint64) int {
97	// ppc64x:"CNTLZD",-"SUBC"
98	return 64 - bits.Len64(n)
99}
100
101func Len32(n uint32) int {
102	// amd64/v1,amd64/v2:"BSRQ","LEAQ",-"CMOVQEQ"
103	// amd64/v3: "LZCNTL"
104	// s390x:"FLOGR"
105	// arm:"CLZ" arm64:"CLZ"
106	// mips:"CLZ"
107	// wasm:"I64Clz"
108	// ppc64x: "CNTLZW"
109	return bits.Len32(n)
110}
111
112func Len16(n uint16) int {
113	// amd64/v1,amd64/v2:"BSRL","LEAL",-"CMOVQEQ"
114	// amd64/v3: "LZCNTL"
115	// s390x:"FLOGR"
116	// arm:"CLZ" arm64:"CLZ"
117	// mips:"CLZ"
118	// wasm:"I64Clz"
119	// ppc64x:"SUBC","CNTLZD"
120	return bits.Len16(n)
121}
122
123func Len8(n uint8) int {
124	// amd64/v1,amd64/v2:"BSRL","LEAL",-"CMOVQEQ"
125	// amd64/v3: "LZCNTL"
126	// s390x:"FLOGR"
127	// arm:"CLZ" arm64:"CLZ"
128	// mips:"CLZ"
129	// wasm:"I64Clz"
130	// ppc64x:"SUBC","CNTLZD"
131	return bits.Len8(n)
132}
133
134// -------------------- //
135//    bits.OnesCount    //
136// -------------------- //
137
138// TODO(register args) Restore a m d 6 4 / v 1 :.*x86HasPOPCNT when only one ABI is tested.
139func OnesCount(n uint) int {
140	// amd64/v2:-".*x86HasPOPCNT" amd64/v3:-".*x86HasPOPCNT"
141	// amd64:"POPCNTQ"
142	// arm64:"VCNT","VUADDLV"
143	// s390x:"POPCNT"
144	// ppc64x:"POPCNTD"
145	// wasm:"I64Popcnt"
146	return bits.OnesCount(n)
147}
148
149func OnesCount64(n uint64) int {
150	// amd64/v2:-".*x86HasPOPCNT" amd64/v3:-".*x86HasPOPCNT"
151	// amd64:"POPCNTQ"
152	// arm64:"VCNT","VUADDLV"
153	// s390x:"POPCNT"
154	// ppc64x:"POPCNTD"
155	// wasm:"I64Popcnt"
156	return bits.OnesCount64(n)
157}
158
159func OnesCount32(n uint32) int {
160	// amd64/v2:-".*x86HasPOPCNT" amd64/v3:-".*x86HasPOPCNT"
161	// amd64:"POPCNTL"
162	// arm64:"VCNT","VUADDLV"
163	// s390x:"POPCNT"
164	// ppc64x:"POPCNTW"
165	// wasm:"I64Popcnt"
166	return bits.OnesCount32(n)
167}
168
169func OnesCount16(n uint16) int {
170	// amd64/v2:-".*x86HasPOPCNT" amd64/v3:-".*x86HasPOPCNT"
171	// amd64:"POPCNTL"
172	// arm64:"VCNT","VUADDLV"
173	// s390x:"POPCNT"
174	// ppc64x:"POPCNTW"
175	// wasm:"I64Popcnt"
176	return bits.OnesCount16(n)
177}
178
179func OnesCount8(n uint8) int {
180	// s390x:"POPCNT"
181	// ppc64x:"POPCNTB"
182	// wasm:"I64Popcnt"
183	return bits.OnesCount8(n)
184}
185
186// ----------------------- //
187//    bits.ReverseBytes    //
188// ----------------------- //
189
190func ReverseBytes(n uint) uint {
191	// amd64:"BSWAPQ"
192	// 386:"BSWAPL"
193	// s390x:"MOVDBR"
194	// arm64:"REV"
195	return bits.ReverseBytes(n)
196}
197
198func ReverseBytes64(n uint64) uint64 {
199	// amd64:"BSWAPQ"
200	// 386:"BSWAPL"
201	// s390x:"MOVDBR"
202	// arm64:"REV"
203	// ppc64x/power10: "BRD"
204	return bits.ReverseBytes64(n)
205}
206
207func ReverseBytes32(n uint32) uint32 {
208	// amd64:"BSWAPL"
209	// 386:"BSWAPL"
210	// s390x:"MOVWBR"
211	// arm64:"REVW"
212	// ppc64x/power10: "BRW"
213	return bits.ReverseBytes32(n)
214}
215
216func ReverseBytes16(n uint16) uint16 {
217	// amd64:"ROLW"
218	// arm64:"REV16W",-"UBFX",-"ORR"
219	// arm/5:"SLL","SRL","ORR"
220	// arm/6:"REV16"
221	// arm/7:"REV16"
222	// ppc64x/power10: "BRH"
223	return bits.ReverseBytes16(n)
224}
225
226// --------------------- //
227//    bits.RotateLeft    //
228// --------------------- //
229
230func RotateLeft64(n uint64) uint64 {
231	// amd64:"ROLQ"
232	// arm64:"ROR"
233	// ppc64x:"ROTL"
234	// s390x:"RISBGZ\t[$]0, [$]63, [$]37, "
235	// wasm:"I64Rotl"
236	return bits.RotateLeft64(n, 37)
237}
238
239func RotateLeft32(n uint32) uint32 {
240	// amd64:"ROLL" 386:"ROLL"
241	// arm:`MOVW\tR[0-9]+@>23`
242	// arm64:"RORW"
243	// ppc64x:"ROTLW"
244	// s390x:"RLL"
245	// wasm:"I32Rotl"
246	return bits.RotateLeft32(n, 9)
247}
248
249func RotateLeft16(n uint16, s int) uint16 {
250	// amd64:"ROLW" 386:"ROLW"
251	// arm64:"RORW",-"CSEL"
252	return bits.RotateLeft16(n, s)
253}
254
255func RotateLeft8(n uint8, s int) uint8 {
256	// amd64:"ROLB" 386:"ROLB"
257	// arm64:"LSL","LSR",-"CSEL"
258	return bits.RotateLeft8(n, s)
259}
260
261func RotateLeftVariable(n uint, m int) uint {
262	// amd64:"ROLQ"
263	// arm64:"ROR"
264	// ppc64x:"ROTL"
265	// s390x:"RLLG"
266	// wasm:"I64Rotl"
267	return bits.RotateLeft(n, m)
268}
269
270func RotateLeftVariable64(n uint64, m int) uint64 {
271	// amd64:"ROLQ"
272	// arm64:"ROR"
273	// ppc64x:"ROTL"
274	// s390x:"RLLG"
275	// wasm:"I64Rotl"
276	return bits.RotateLeft64(n, m)
277}
278
279func RotateLeftVariable32(n uint32, m int) uint32 {
280	// arm:`MOVW\tR[0-9]+@>R[0-9]+`
281	// amd64:"ROLL"
282	// arm64:"RORW"
283	// ppc64x:"ROTLW"
284	// s390x:"RLL"
285	// wasm:"I32Rotl"
286	return bits.RotateLeft32(n, m)
287}
288
289// ------------------------ //
290//    bits.TrailingZeros    //
291// ------------------------ //
292
293func TrailingZeros(n uint) int {
294	// amd64/v1,amd64/v2:"BSFQ","MOVL\t\\$64","CMOVQEQ"
295	// amd64/v3:"TZCNTQ"
296	// 386:"BSFL"
297	// arm:"CLZ"
298	// arm64:"RBIT","CLZ"
299	// s390x:"FLOGR"
300	// ppc64x/power8:"ANDN","POPCNTD"
301	// ppc64x/power9: "CNTTZD"
302	// wasm:"I64Ctz"
303	return bits.TrailingZeros(n)
304}
305
306func TrailingZeros64(n uint64) int {
307	// amd64/v1,amd64/v2:"BSFQ","MOVL\t\\$64","CMOVQEQ"
308	// amd64/v3:"TZCNTQ"
309	// 386:"BSFL"
310	// arm64:"RBIT","CLZ"
311	// s390x:"FLOGR"
312	// ppc64x/power8:"ANDN","POPCNTD"
313	// ppc64x/power9: "CNTTZD"
314	// wasm:"I64Ctz"
315	return bits.TrailingZeros64(n)
316}
317
318func TrailingZeros64Subtract(n uint64) int {
319	// ppc64x/power8:"NEG","SUBC","ANDN","POPCNTD"
320	// ppc64x/power9:"SUBC","CNTTZD"
321	return bits.TrailingZeros64(1 - n)
322}
323
324func TrailingZeros32(n uint32) int {
325	// amd64/v1,amd64/v2:"BTSQ\\t\\$32","BSFQ"
326	// amd64/v3:"TZCNTL"
327	// 386:"BSFL"
328	// arm:"CLZ"
329	// arm64:"RBITW","CLZW"
330	// s390x:"FLOGR","MOVWZ"
331	// ppc64x/power8:"ANDN","POPCNTW"
332	// ppc64x/power9: "CNTTZW"
333	// wasm:"I64Ctz"
334	return bits.TrailingZeros32(n)
335}
336
337func TrailingZeros16(n uint16) int {
338	// amd64:"BSFL","ORL\\t\\$65536"
339	// 386:"BSFL\t"
340	// arm:"ORR\t\\$65536","CLZ",-"MOVHU\tR"
341	// arm64:"ORR\t\\$65536","RBITW","CLZW",-"MOVHU\tR",-"RBIT\t",-"CLZ\t"
342	// s390x:"FLOGR","OR\t\\$65536"
343	// ppc64x/power8:"POPCNTD","ORIS\\t\\$1"
344	// ppc64x/power9:"CNTTZD","ORIS\\t\\$1"
345	// wasm:"I64Ctz"
346	return bits.TrailingZeros16(n)
347}
348
349func TrailingZeros8(n uint8) int {
350	// amd64:"BSFL","ORL\\t\\$256"
351	// 386:"BSFL"
352	// arm:"ORR\t\\$256","CLZ",-"MOVBU\tR"
353	// arm64:"ORR\t\\$256","RBITW","CLZW",-"MOVBU\tR",-"RBIT\t",-"CLZ\t"
354	// s390x:"FLOGR","OR\t\\$256"
355	// wasm:"I64Ctz"
356	return bits.TrailingZeros8(n)
357}
358
359// IterateBitsNN checks special handling of TrailingZerosNN when the input is known to be non-zero.
360
361func IterateBits(n uint) int {
362	i := 0
363	for n != 0 {
364		// amd64/v1,amd64/v2:"BSFQ",-"CMOVEQ"
365		// amd64/v3:"TZCNTQ"
366		i += bits.TrailingZeros(n)
367		n &= n - 1
368	}
369	return i
370}
371
372func IterateBits64(n uint64) int {
373	i := 0
374	for n != 0 {
375		// amd64/v1,amd64/v2:"BSFQ",-"CMOVEQ"
376		// amd64/v3:"TZCNTQ"
377		i += bits.TrailingZeros64(n)
378		n &= n - 1
379	}
380	return i
381}
382
383func IterateBits32(n uint32) int {
384	i := 0
385	for n != 0 {
386		// amd64/v1,amd64/v2:"BSFL",-"BTSQ"
387		// amd64/v3:"TZCNTL"
388		i += bits.TrailingZeros32(n)
389		n &= n - 1
390	}
391	return i
392}
393
394func IterateBits16(n uint16) int {
395	i := 0
396	for n != 0 {
397		// amd64/v1,amd64/v2:"BSFL",-"BTSL"
398		// amd64/v3:"TZCNTL"
399		// arm64:"RBITW","CLZW",-"ORR"
400		i += bits.TrailingZeros16(n)
401		n &= n - 1
402	}
403	return i
404}
405
406func IterateBits8(n uint8) int {
407	i := 0
408	for n != 0 {
409		// amd64/v1,amd64/v2:"BSFL",-"BTSL"
410		// amd64/v3:"TZCNTL"
411		// arm64:"RBITW","CLZW",-"ORR"
412		i += bits.TrailingZeros8(n)
413		n &= n - 1
414	}
415	return i
416}
417
418// --------------- //
419//    bits.Add*    //
420// --------------- //
421
422func Add(x, y, ci uint) (r, co uint) {
423	// arm64:"ADDS","ADCS","ADC",-"ADD\t",-"CMP"
424	// amd64:"NEGL","ADCQ","SBBQ","NEGQ"
425	// ppc64x: "ADDC", "ADDE", "ADDZE"
426	// s390x:"ADDE","ADDC\t[$]-1,"
427	// riscv64: "ADD","SLTU"
428	return bits.Add(x, y, ci)
429}
430
431func AddC(x, ci uint) (r, co uint) {
432	// arm64:"ADDS","ADCS","ADC",-"ADD\t",-"CMP"
433	// amd64:"NEGL","ADCQ","SBBQ","NEGQ"
434	// loong64: "ADDV", "SGTU"
435	// ppc64x: "ADDC", "ADDE", "ADDZE"
436	// s390x:"ADDE","ADDC\t[$]-1,"
437	// mips64:"ADDV","SGTU"
438	// riscv64: "ADD","SLTU"
439	return bits.Add(x, 7, ci)
440}
441
442func AddZ(x, y uint) (r, co uint) {
443	// arm64:"ADDS","ADC",-"ADCS",-"ADD\t",-"CMP"
444	// amd64:"ADDQ","SBBQ","NEGQ",-"NEGL",-"ADCQ"
445	// loong64: "ADDV", "SGTU"
446	// ppc64x: "ADDC", -"ADDE", "ADDZE"
447	// s390x:"ADDC",-"ADDC\t[$]-1,"
448	// mips64:"ADDV","SGTU"
449	// riscv64: "ADD","SLTU"
450	return bits.Add(x, y, 0)
451}
452
453func AddR(x, y, ci uint) uint {
454	// arm64:"ADDS","ADCS",-"ADD\t",-"CMP"
455	// amd64:"NEGL","ADCQ",-"SBBQ",-"NEGQ"
456	// loong64: "ADDV", -"SGTU"
457	// ppc64x: "ADDC", "ADDE", -"ADDZE"
458	// s390x:"ADDE","ADDC\t[$]-1,"
459	// mips64:"ADDV",-"SGTU"
460	// riscv64: "ADD",-"SLTU"
461	r, _ := bits.Add(x, y, ci)
462	return r
463}
464
465func AddM(p, q, r *[3]uint) {
466	var c uint
467	r[0], c = bits.Add(p[0], q[0], c)
468	// arm64:"ADCS",-"ADD\t",-"CMP"
469	// amd64:"ADCQ",-"NEGL",-"SBBQ",-"NEGQ"
470	// s390x:"ADDE",-"ADDC\t[$]-1,"
471	r[1], c = bits.Add(p[1], q[1], c)
472	r[2], c = bits.Add(p[2], q[2], c)
473}
474
475func Add64(x, y, ci uint64) (r, co uint64) {
476	// arm64:"ADDS","ADCS","ADC",-"ADD\t",-"CMP"
477	// amd64:"NEGL","ADCQ","SBBQ","NEGQ"
478	// loong64: "ADDV", "SGTU"
479	// ppc64x: "ADDC", "ADDE", "ADDZE"
480	// s390x:"ADDE","ADDC\t[$]-1,"
481	// mips64:"ADDV","SGTU"
482	// riscv64: "ADD","SLTU"
483	return bits.Add64(x, y, ci)
484}
485
486func Add64C(x, ci uint64) (r, co uint64) {
487	// arm64:"ADDS","ADCS","ADC",-"ADD\t",-"CMP"
488	// amd64:"NEGL","ADCQ","SBBQ","NEGQ"
489	// loong64: "ADDV", "SGTU"
490	// ppc64x: "ADDC", "ADDE", "ADDZE"
491	// s390x:"ADDE","ADDC\t[$]-1,"
492	// mips64:"ADDV","SGTU"
493	// riscv64: "ADD","SLTU"
494	return bits.Add64(x, 7, ci)
495}
496
497func Add64Z(x, y uint64) (r, co uint64) {
498	// arm64:"ADDS","ADC",-"ADCS",-"ADD\t",-"CMP"
499	// amd64:"ADDQ","SBBQ","NEGQ",-"NEGL",-"ADCQ"
500	// loong64: "ADDV", "SGTU"
501	// ppc64x: "ADDC", -"ADDE", "ADDZE"
502	// s390x:"ADDC",-"ADDC\t[$]-1,"
503	// mips64:"ADDV","SGTU"
504	// riscv64: "ADD","SLTU"
505	return bits.Add64(x, y, 0)
506}
507
508func Add64R(x, y, ci uint64) uint64 {
509	// arm64:"ADDS","ADCS",-"ADD\t",-"CMP"
510	// amd64:"NEGL","ADCQ",-"SBBQ",-"NEGQ"
511	// loong64: "ADDV", -"SGTU"
512	// ppc64x: "ADDC", "ADDE", -"ADDZE"
513	// s390x:"ADDE","ADDC\t[$]-1,"
514	// mips64:"ADDV",-"SGTU"
515	// riscv64: "ADD",-"SLTU"
516	r, _ := bits.Add64(x, y, ci)
517	return r
518}
519
520func Add64M(p, q, r *[3]uint64) {
521	var c uint64
522	r[0], c = bits.Add64(p[0], q[0], c)
523	// arm64:"ADCS",-"ADD\t",-"CMP"
524	// amd64:"ADCQ",-"NEGL",-"SBBQ",-"NEGQ"
525	// ppc64x: -"ADDC", "ADDE", -"ADDZE"
526	// s390x:"ADDE",-"ADDC\t[$]-1,"
527	r[1], c = bits.Add64(p[1], q[1], c)
528	r[2], c = bits.Add64(p[2], q[2], c)
529}
530
531func Add64M0(p, q, r *[3]uint64) {
532	var c uint64
533	r[0], c = bits.Add64(p[0], q[0], 0)
534	// ppc64x: -"ADDC", -"ADDE", "ADDZE\tR[1-9]"
535	r[1], c = bits.Add64(p[1], 0, c)
536	// ppc64x: -"ADDC", "ADDE", -"ADDZE"
537	r[2], c = bits.Add64(p[2], p[2], c)
538}
539
540func Add64MSaveC(p, q, r, c *[2]uint64) {
541	// ppc64x: "ADDC\tR", "ADDZE"
542	r[0], c[0] = bits.Add64(p[0], q[0], 0)
543	// ppc64x: "ADDC\t[$]-1", "ADDE", "ADDZE"
544	r[1], c[1] = bits.Add64(p[1], q[1], c[0])
545}
546
547func Add64PanicOnOverflowEQ(a, b uint64) uint64 {
548	r, c := bits.Add64(a, b, 0)
549	// s390x:"BRC\t[$]3,",-"ADDE"
550	if c == 1 {
551		panic("overflow")
552	}
553	return r
554}
555
556func Add64PanicOnOverflowNE(a, b uint64) uint64 {
557	r, c := bits.Add64(a, b, 0)
558	// s390x:"BRC\t[$]3,",-"ADDE"
559	if c != 0 {
560		panic("overflow")
561	}
562	return r
563}
564
565func Add64PanicOnOverflowGT(a, b uint64) uint64 {
566	r, c := bits.Add64(a, b, 0)
567	// s390x:"BRC\t[$]3,",-"ADDE"
568	if c > 0 {
569		panic("overflow")
570	}
571	return r
572}
573
574func Add64MPanicOnOverflowEQ(a, b [2]uint64) [2]uint64 {
575	var r [2]uint64
576	var c uint64
577	r[0], c = bits.Add64(a[0], b[0], c)
578	r[1], c = bits.Add64(a[1], b[1], c)
579	// s390x:"BRC\t[$]3,"
580	if c == 1 {
581		panic("overflow")
582	}
583	return r
584}
585
586func Add64MPanicOnOverflowNE(a, b [2]uint64) [2]uint64 {
587	var r [2]uint64
588	var c uint64
589	r[0], c = bits.Add64(a[0], b[0], c)
590	r[1], c = bits.Add64(a[1], b[1], c)
591	// s390x:"BRC\t[$]3,"
592	if c != 0 {
593		panic("overflow")
594	}
595	return r
596}
597
598func Add64MPanicOnOverflowGT(a, b [2]uint64) [2]uint64 {
599	var r [2]uint64
600	var c uint64
601	r[0], c = bits.Add64(a[0], b[0], c)
602	r[1], c = bits.Add64(a[1], b[1], c)
603	// s390x:"BRC\t[$]3,"
604	if c > 0 {
605		panic("overflow")
606	}
607	return r
608}
609
610// Verify independent carry chain operations are scheduled efficiently
611// and do not cause unnecessary save/restore of the CA bit.
612//
613// This is an example of why CarryChainTail priority must be lower
614// (earlier in the block) than Memory. f[0]=f1 could be scheduled
615// after the first two lower 64 bit limb adds, but before either
616// high 64 bit limbs are added.
617//
618// This is what happened on PPC64 when compiling
619// crypto/internal/edwards25519/field.feMulGeneric.
620func Add64MultipleChains(a, b, c, d [2]uint64) {
621	var cx, d1, d2 uint64
622	a1, a2 := a[0], a[1]
623	b1, b2 := b[0], b[1]
624	c1, c2 := c[0], c[1]
625
626	// ppc64x: "ADDC\tR\\d+,", -"ADDE", -"MOVD\tXER"
627	d1, cx = bits.Add64(a1, b1, 0)
628	// ppc64x: "ADDE", -"ADDC", -"MOVD\t.*, XER"
629	d2, _ = bits.Add64(a2, b2, cx)
630
631	// ppc64x: "ADDC\tR\\d+,", -"ADDE", -"MOVD\tXER"
632	d1, cx = bits.Add64(c1, d1, 0)
633	// ppc64x: "ADDE", -"ADDC", -"MOVD\t.*, XER"
634	d2, _ = bits.Add64(c2, d2, cx)
635	d[0] = d1
636	d[1] = d2
637}
638
639// --------------- //
640//    bits.Sub*    //
641// --------------- //
642
643func Sub(x, y, ci uint) (r, co uint) {
644	// amd64:"NEGL","SBBQ","NEGQ"
645	// arm64:"NEGS","SBCS","NGC","NEG",-"ADD",-"SUB",-"CMP"
646	// loong64:"SUBV","SGTU"
647	// ppc64x:"SUBC", "SUBE", "SUBZE", "NEG"
648	// s390x:"SUBE"
649	// mips64:"SUBV","SGTU"
650	// riscv64: "SUB","SLTU"
651	return bits.Sub(x, y, ci)
652}
653
654func SubC(x, ci uint) (r, co uint) {
655	// amd64:"NEGL","SBBQ","NEGQ"
656	// arm64:"NEGS","SBCS","NGC","NEG",-"ADD",-"SUB",-"CMP"
657	// loong64:"SUBV","SGTU"
658	// ppc64x:"SUBC", "SUBE", "SUBZE", "NEG"
659	// s390x:"SUBE"
660	// mips64:"SUBV","SGTU"
661	// riscv64: "SUB","SLTU"
662	return bits.Sub(x, 7, ci)
663}
664
665func SubZ(x, y uint) (r, co uint) {
666	// amd64:"SUBQ","SBBQ","NEGQ",-"NEGL"
667	// arm64:"SUBS","NGC","NEG",-"SBCS",-"ADD",-"SUB\t",-"CMP"
668	// loong64:"SUBV","SGTU"
669	// ppc64x:"SUBC", -"SUBE", "SUBZE", "NEG"
670	// s390x:"SUBC"
671	// mips64:"SUBV","SGTU"
672	// riscv64: "SUB","SLTU"
673	return bits.Sub(x, y, 0)
674}
675
676func SubR(x, y, ci uint) uint {
677	// amd64:"NEGL","SBBQ",-"NEGQ"
678	// arm64:"NEGS","SBCS",-"NGC",-"NEG\t",-"ADD",-"SUB",-"CMP"
679	// loong64:"SUBV",-"SGTU"
680	// ppc64x:"SUBC", "SUBE", -"SUBZE", -"NEG"
681	// s390x:"SUBE"
682	// riscv64: "SUB",-"SLTU"
683	r, _ := bits.Sub(x, y, ci)
684	return r
685}
686func SubM(p, q, r *[3]uint) {
687	var c uint
688	r[0], c = bits.Sub(p[0], q[0], c)
689	// amd64:"SBBQ",-"NEGL",-"NEGQ"
690	// arm64:"SBCS",-"NEGS",-"NGC",-"NEG",-"ADD",-"SUB",-"CMP"
691	// ppc64x:-"SUBC", "SUBE", -"SUBZE", -"NEG"
692	// s390x:"SUBE"
693	r[1], c = bits.Sub(p[1], q[1], c)
694	r[2], c = bits.Sub(p[2], q[2], c)
695}
696
697func Sub64(x, y, ci uint64) (r, co uint64) {
698	// amd64:"NEGL","SBBQ","NEGQ"
699	// arm64:"NEGS","SBCS","NGC","NEG",-"ADD",-"SUB",-"CMP"
700	// loong64:"SUBV","SGTU"
701	// ppc64x:"SUBC", "SUBE", "SUBZE", "NEG"
702	// s390x:"SUBE"
703	// mips64:"SUBV","SGTU"
704	// riscv64: "SUB","SLTU"
705	return bits.Sub64(x, y, ci)
706}
707
708func Sub64C(x, ci uint64) (r, co uint64) {
709	// amd64:"NEGL","SBBQ","NEGQ"
710	// arm64:"NEGS","SBCS","NGC","NEG",-"ADD",-"SUB",-"CMP"
711	// loong64:"SUBV","SGTU"
712	// ppc64x:"SUBC", "SUBE", "SUBZE", "NEG"
713	// s390x:"SUBE"
714	// mips64:"SUBV","SGTU"
715	// riscv64: "SUB","SLTU"
716	return bits.Sub64(x, 7, ci)
717}
718
719func Sub64Z(x, y uint64) (r, co uint64) {
720	// amd64:"SUBQ","SBBQ","NEGQ",-"NEGL"
721	// arm64:"SUBS","NGC","NEG",-"SBCS",-"ADD",-"SUB\t",-"CMP"
722	// loong64:"SUBV","SGTU"
723	// ppc64x:"SUBC", -"SUBE", "SUBZE", "NEG"
724	// s390x:"SUBC"
725	// mips64:"SUBV","SGTU"
726	// riscv64: "SUB","SLTU"
727	return bits.Sub64(x, y, 0)
728}
729
730func Sub64R(x, y, ci uint64) uint64 {
731	// amd64:"NEGL","SBBQ",-"NEGQ"
732	// arm64:"NEGS","SBCS",-"NGC",-"NEG\t",-"ADD",-"SUB",-"CMP"
733	// loong64:"SUBV",-"SGTU"
734	// ppc64x:"SUBC", "SUBE", -"SUBZE", -"NEG"
735	// s390x:"SUBE"
736	// riscv64: "SUB",-"SLTU"
737	r, _ := bits.Sub64(x, y, ci)
738	return r
739}
740func Sub64M(p, q, r *[3]uint64) {
741	var c uint64
742	r[0], c = bits.Sub64(p[0], q[0], c)
743	// amd64:"SBBQ",-"NEGL",-"NEGQ"
744	// arm64:"SBCS",-"NEGS",-"NGC",-"NEG",-"ADD",-"SUB",-"CMP"
745	// s390x:"SUBE"
746	r[1], c = bits.Sub64(p[1], q[1], c)
747	r[2], c = bits.Sub64(p[2], q[2], c)
748}
749
750func Sub64MSaveC(p, q, r, c *[2]uint64) {
751	// ppc64x:"SUBC\tR\\d+, R\\d+,", "SUBZE", "NEG"
752	r[0], c[0] = bits.Sub64(p[0], q[0], 0)
753	// ppc64x:"SUBC\tR\\d+, [$]0,", "SUBE", "SUBZE", "NEG"
754	r[1], c[1] = bits.Sub64(p[1], q[1], c[0])
755}
756
757func Sub64PanicOnOverflowEQ(a, b uint64) uint64 {
758	r, b := bits.Sub64(a, b, 0)
759	// s390x:"BRC\t[$]12,",-"ADDE",-"SUBE"
760	if b == 1 {
761		panic("overflow")
762	}
763	return r
764}
765
766func Sub64PanicOnOverflowNE(a, b uint64) uint64 {
767	r, b := bits.Sub64(a, b, 0)
768	// s390x:"BRC\t[$]12,",-"ADDE",-"SUBE"
769	if b != 0 {
770		panic("overflow")
771	}
772	return r
773}
774
775func Sub64PanicOnOverflowGT(a, b uint64) uint64 {
776	r, b := bits.Sub64(a, b, 0)
777	// s390x:"BRC\t[$]12,",-"ADDE",-"SUBE"
778	if b > 0 {
779		panic("overflow")
780	}
781	return r
782}
783
784func Sub64MPanicOnOverflowEQ(a, b [2]uint64) [2]uint64 {
785	var r [2]uint64
786	var c uint64
787	r[0], c = bits.Sub64(a[0], b[0], c)
788	r[1], c = bits.Sub64(a[1], b[1], c)
789	// s390x:"BRC\t[$]12,"
790	if c == 1 {
791		panic("overflow")
792	}
793	return r
794}
795
796func Sub64MPanicOnOverflowNE(a, b [2]uint64) [2]uint64 {
797	var r [2]uint64
798	var c uint64
799	r[0], c = bits.Sub64(a[0], b[0], c)
800	r[1], c = bits.Sub64(a[1], b[1], c)
801	// s390x:"BRC\t[$]12,"
802	if c != 0 {
803		panic("overflow")
804	}
805	return r
806}
807
808func Sub64MPanicOnOverflowGT(a, b [2]uint64) [2]uint64 {
809	var r [2]uint64
810	var c uint64
811	r[0], c = bits.Sub64(a[0], b[0], c)
812	r[1], c = bits.Sub64(a[1], b[1], c)
813	// s390x:"BRC\t[$]12,"
814	if c > 0 {
815		panic("overflow")
816	}
817	return r
818}
819
820// --------------- //
821//    bits.Mul*    //
822// --------------- //
823
824func Mul(x, y uint) (hi, lo uint) {
825	// amd64:"MULQ"
826	// arm64:"UMULH","MUL"
827	// ppc64x:"MULHDU","MULLD"
828	// s390x:"MLGR"
829	// mips64: "MULVU"
830	// riscv64:"MULHU","MUL"
831	return bits.Mul(x, y)
832}
833
834func Mul64(x, y uint64) (hi, lo uint64) {
835	// amd64:"MULQ"
836	// arm64:"UMULH","MUL"
837	// ppc64x:"MULHDU","MULLD"
838	// s390x:"MLGR"
839	// mips64: "MULVU"
840	// riscv64:"MULHU","MUL"
841	return bits.Mul64(x, y)
842}
843
844func Mul64HiOnly(x, y uint64) uint64 {
845	// arm64:"UMULH",-"MUL"
846	// riscv64:"MULHU",-"MUL\t"
847	hi, _ := bits.Mul64(x, y)
848	return hi
849}
850
851func Mul64LoOnly(x, y uint64) uint64 {
852	// arm64:"MUL",-"UMULH"
853	// riscv64:"MUL\t",-"MULHU"
854	_, lo := bits.Mul64(x, y)
855	return lo
856}
857
858// --------------- //
859//    bits.Div*    //
860// --------------- //
861
862func Div(hi, lo, x uint) (q, r uint) {
863	// amd64:"DIVQ"
864	return bits.Div(hi, lo, x)
865}
866
867func Div32(hi, lo, x uint32) (q, r uint32) {
868	// arm64:"ORR","UDIV","MSUB",-"UREM"
869	return bits.Div32(hi, lo, x)
870}
871
872func Div64(hi, lo, x uint64) (q, r uint64) {
873	// amd64:"DIVQ"
874	return bits.Div64(hi, lo, x)
875}
876
877func Div64degenerate(x uint64) (q, r uint64) {
878	// amd64:-"DIVQ"
879	return bits.Div64(0, x, 5)
880}
881