xref: /aosp_15_r20/external/mesa3d/src/nouveau/codegen/lib/gf100.asm (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1*61046927SAndroid Build Coastguard Worker.section #gf100_builtin_code
2*61046927SAndroid Build Coastguard Worker// DIV U32
3*61046927SAndroid Build Coastguard Worker//
4*61046927SAndroid Build Coastguard Worker// UNR recurrence (q = a / b):
5*61046927SAndroid Build Coastguard Worker// look for z such that 2^32 - b <= b * z < 2^32
6*61046927SAndroid Build Coastguard Worker// then q - 1 <= (a * z) / 2^32 <= q
7*61046927SAndroid Build Coastguard Worker//
8*61046927SAndroid Build Coastguard Worker// INPUT:   $r0: dividend, $r1: divisor
9*61046927SAndroid Build Coastguard Worker// OUTPUT:  $r0: result, $r1: modulus
10*61046927SAndroid Build Coastguard Worker// CLOBBER: $r2 - $r3, $p0 - $p1
11*61046927SAndroid Build Coastguard Worker// SIZE:    22 / 14 * 8 bytes
12*61046927SAndroid Build Coastguard Worker//
13*61046927SAndroid Build Coastguard Workergf100_div_u32:
14*61046927SAndroid Build Coastguard Worker   bfind u32 $r2 $r1
15*61046927SAndroid Build Coastguard Worker   xor b32 $r2 $r2 0x1f
16*61046927SAndroid Build Coastguard Worker   mov b32 $r3 0x1
17*61046927SAndroid Build Coastguard Worker   shl b32 $r2 $r3 clamp $r2
18*61046927SAndroid Build Coastguard Worker   cvt u32 $r1 neg u32 $r1
19*61046927SAndroid Build Coastguard Worker   mul $r3 u32 $r1 u32 $r2
20*61046927SAndroid Build Coastguard Worker   add $r2 (mul high u32 $r2 u32 $r3) $r2
21*61046927SAndroid Build Coastguard Worker   mul $r3 u32 $r1 u32 $r2
22*61046927SAndroid Build Coastguard Worker   add $r2 (mul high u32 $r2 u32 $r3) $r2
23*61046927SAndroid Build Coastguard Worker   mul $r3 u32 $r1 u32 $r2
24*61046927SAndroid Build Coastguard Worker   add $r2 (mul high u32 $r2 u32 $r3) $r2
25*61046927SAndroid Build Coastguard Worker   mul $r3 u32 $r1 u32 $r2
26*61046927SAndroid Build Coastguard Worker   add $r2 (mul high u32 $r2 u32 $r3) $r2
27*61046927SAndroid Build Coastguard Worker   mul $r3 u32 $r1 u32 $r2
28*61046927SAndroid Build Coastguard Worker   add $r2 (mul high u32 $r2 u32 $r3) $r2
29*61046927SAndroid Build Coastguard Worker   mov b32 $r3 $r0
30*61046927SAndroid Build Coastguard Worker   mul high $r0 u32 $r0 u32 $r2
31*61046927SAndroid Build Coastguard Worker   cvt u32 $r2 neg u32 $r1
32*61046927SAndroid Build Coastguard Worker   add $r1 (mul u32 $r1 u32 $r0) $r3
33*61046927SAndroid Build Coastguard Worker   set $p0 0x1 ge u32 $r1 $r2
34*61046927SAndroid Build Coastguard Worker   $p0 sub b32 $r1 $r1 $r2
35*61046927SAndroid Build Coastguard Worker   $p0 add b32 $r0 $r0 0x1
36*61046927SAndroid Build Coastguard Worker   $p0 set $p0 0x1 ge u32 $r1 $r2
37*61046927SAndroid Build Coastguard Worker   $p0 sub b32 $r1 $r1 $r2
38*61046927SAndroid Build Coastguard Worker   $p0 add b32 $r0 $r0 0x1
39*61046927SAndroid Build Coastguard Worker   ret
40*61046927SAndroid Build Coastguard Worker
41*61046927SAndroid Build Coastguard Worker// DIV S32, like DIV U32 after taking ABS(inputs)
42*61046927SAndroid Build Coastguard Worker//
43*61046927SAndroid Build Coastguard Worker// INPUT:   $r0: dividend, $r1: divisor
44*61046927SAndroid Build Coastguard Worker// OUTPUT:  $r0: result, $r1: modulus
45*61046927SAndroid Build Coastguard Worker// CLOBBER: $r2 - $r3, $p0 - $p3
46*61046927SAndroid Build Coastguard Worker//
47*61046927SAndroid Build Coastguard Workergf100_div_s32:
48*61046927SAndroid Build Coastguard Worker   set $p2 0x1 lt s32 $r0 0x0
49*61046927SAndroid Build Coastguard Worker   set $p3 0x1 lt s32 $r1 0x0 xor $p2
50*61046927SAndroid Build Coastguard Worker   cvt s32 $r0 abs s32 $r0
51*61046927SAndroid Build Coastguard Worker   cvt s32 $r1 abs s32 $r1
52*61046927SAndroid Build Coastguard Worker   bfind u32 $r2 $r1
53*61046927SAndroid Build Coastguard Worker   xor b32 $r2 $r2 0x1f
54*61046927SAndroid Build Coastguard Worker   mov b32 $r3 0x1
55*61046927SAndroid Build Coastguard Worker   shl b32 $r2 $r3 clamp $r2
56*61046927SAndroid Build Coastguard Worker   cvt u32 $r1 neg u32 $r1
57*61046927SAndroid Build Coastguard Worker   mul $r3 u32 $r1 u32 $r2
58*61046927SAndroid Build Coastguard Worker   add $r2 (mul high u32 $r2 u32 $r3) $r2
59*61046927SAndroid Build Coastguard Worker   mul $r3 u32 $r1 u32 $r2
60*61046927SAndroid Build Coastguard Worker   add $r2 (mul high u32 $r2 u32 $r3) $r2
61*61046927SAndroid Build Coastguard Worker   mul $r3 u32 $r1 u32 $r2
62*61046927SAndroid Build Coastguard Worker   add $r2 (mul high u32 $r2 u32 $r3) $r2
63*61046927SAndroid Build Coastguard Worker   mul $r3 u32 $r1 u32 $r2
64*61046927SAndroid Build Coastguard Worker   add $r2 (mul high u32 $r2 u32 $r3) $r2
65*61046927SAndroid Build Coastguard Worker   mul $r3 u32 $r1 u32 $r2
66*61046927SAndroid Build Coastguard Worker   add $r2 (mul high u32 $r2 u32 $r3) $r2
67*61046927SAndroid Build Coastguard Worker   mov b32 $r3 $r0
68*61046927SAndroid Build Coastguard Worker   mul high $r0 u32 $r0 u32 $r2
69*61046927SAndroid Build Coastguard Worker   cvt u32 $r2 neg u32 $r1
70*61046927SAndroid Build Coastguard Worker   add $r1 (mul u32 $r1 u32 $r0) $r3
71*61046927SAndroid Build Coastguard Worker   set $p0 0x1 ge u32 $r1 $r2
72*61046927SAndroid Build Coastguard Worker   $p0 sub b32 $r1 $r1 $r2
73*61046927SAndroid Build Coastguard Worker   $p0 add b32 $r0 $r0 0x1
74*61046927SAndroid Build Coastguard Worker   $p0 set $p0 0x1 ge u32 $r1 $r2
75*61046927SAndroid Build Coastguard Worker   $p0 sub b32 $r1 $r1 $r2
76*61046927SAndroid Build Coastguard Worker   $p0 add b32 $r0 $r0 0x1
77*61046927SAndroid Build Coastguard Worker   $p3 cvt s32 $r0 neg s32 $r0
78*61046927SAndroid Build Coastguard Worker   $p2 cvt s32 $r1 neg s32 $r1
79*61046927SAndroid Build Coastguard Worker   ret
80*61046927SAndroid Build Coastguard Worker
81*61046927SAndroid Build Coastguard Worker// RCP F64: Newton Raphson reciprocal(x): r_{i+1} = r_i * (2.0 - x * r_i)
82*61046927SAndroid Build Coastguard Worker//
83*61046927SAndroid Build Coastguard Worker// INPUT:   $r0d (x)
84*61046927SAndroid Build Coastguard Worker// OUTPUT:  $r0d (rcp(x))
85*61046927SAndroid Build Coastguard Worker// CLOBBER: $r2 - $r7
86*61046927SAndroid Build Coastguard Worker// SIZE:    9 * 8 bytes
87*61046927SAndroid Build Coastguard Worker//
88*61046927SAndroid Build Coastguard Workergf100_rcp_f64:
89*61046927SAndroid Build Coastguard Worker   nop
90*61046927SAndroid Build Coastguard Worker   ret
91*61046927SAndroid Build Coastguard Worker
92*61046927SAndroid Build Coastguard Worker// RSQ F64: Newton Raphson rsqrt(x): r_{i+1} = r_i * (1.5 - 0.5 * x * r_i * r_i)
93*61046927SAndroid Build Coastguard Worker//
94*61046927SAndroid Build Coastguard Worker// INPUT:   $r0d (x)
95*61046927SAndroid Build Coastguard Worker// OUTPUT:  $r0d (rsqrt(x))
96*61046927SAndroid Build Coastguard Worker// CLOBBER: $r2 - $r7
97*61046927SAndroid Build Coastguard Worker// SIZE:    14 * 8 bytes
98*61046927SAndroid Build Coastguard Worker//
99*61046927SAndroid Build Coastguard Workergf100_rsq_f64:
100*61046927SAndroid Build Coastguard Worker   nop
101*61046927SAndroid Build Coastguard Worker   ret
102*61046927SAndroid Build Coastguard Worker
103*61046927SAndroid Build Coastguard Worker.section #gf100_builtin_offsets
104*61046927SAndroid Build Coastguard Worker.b64 #gf100_div_u32
105*61046927SAndroid Build Coastguard Worker.b64 #gf100_div_s32
106*61046927SAndroid Build Coastguard Worker.b64 #gf100_rcp_f64
107*61046927SAndroid Build Coastguard Worker.b64 #gf100_rsq_f64
108