1*8fb009dcSAndroid Build Coastguard Worker// This file is generated from a similarly-named Perl script in the BoringSSL 2*8fb009dcSAndroid Build Coastguard Worker// source tree. Do not edit by hand. 3*8fb009dcSAndroid Build Coastguard Worker 4*8fb009dcSAndroid Build Coastguard Worker#include <openssl/asm_base.h> 5*8fb009dcSAndroid Build Coastguard Worker 6*8fb009dcSAndroid Build Coastguard Worker#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_AARCH64) && defined(_WIN32) 7*8fb009dcSAndroid Build Coastguard Worker#include <openssl/arm_arch.h> 8*8fb009dcSAndroid Build Coastguard Worker 9*8fb009dcSAndroid Build Coastguard Worker.text 10*8fb009dcSAndroid Build Coastguard Worker 11*8fb009dcSAndroid Build Coastguard Worker.globl bn_mul_mont 12*8fb009dcSAndroid Build Coastguard Worker 13*8fb009dcSAndroid Build Coastguard Worker.def bn_mul_mont 14*8fb009dcSAndroid Build Coastguard Worker .type 32 15*8fb009dcSAndroid Build Coastguard Worker.endef 16*8fb009dcSAndroid Build Coastguard Worker.align 5 17*8fb009dcSAndroid Build Coastguard Workerbn_mul_mont: 18*8fb009dcSAndroid Build Coastguard Worker AARCH64_SIGN_LINK_REGISTER 19*8fb009dcSAndroid Build Coastguard Worker tst x5,#7 20*8fb009dcSAndroid Build Coastguard Worker b.eq __bn_sqr8x_mont 21*8fb009dcSAndroid Build Coastguard Worker tst x5,#3 22*8fb009dcSAndroid Build Coastguard Worker b.eq __bn_mul4x_mont 23*8fb009dcSAndroid Build Coastguard WorkerLmul_mont: 24*8fb009dcSAndroid Build Coastguard Worker stp x29,x30,[sp,#-64]! 25*8fb009dcSAndroid Build Coastguard Worker add x29,sp,#0 26*8fb009dcSAndroid Build Coastguard Worker stp x19,x20,[sp,#16] 27*8fb009dcSAndroid Build Coastguard Worker stp x21,x22,[sp,#32] 28*8fb009dcSAndroid Build Coastguard Worker stp x23,x24,[sp,#48] 29*8fb009dcSAndroid Build Coastguard Worker 30*8fb009dcSAndroid Build Coastguard Worker ldr x9,[x2],#8 // bp[0] 31*8fb009dcSAndroid Build Coastguard Worker sub x22,sp,x5,lsl#3 32*8fb009dcSAndroid Build Coastguard Worker ldp x7,x8,[x1],#16 // ap[0..1] 33*8fb009dcSAndroid Build Coastguard Worker lsl x5,x5,#3 34*8fb009dcSAndroid Build Coastguard Worker ldr x4,[x4] // *n0 35*8fb009dcSAndroid Build Coastguard Worker and x22,x22,#-16 // ABI says so 36*8fb009dcSAndroid Build Coastguard Worker ldp x13,x14,[x3],#16 // np[0..1] 37*8fb009dcSAndroid Build Coastguard Worker 38*8fb009dcSAndroid Build Coastguard Worker mul x6,x7,x9 // ap[0]*bp[0] 39*8fb009dcSAndroid Build Coastguard Worker sub x21,x5,#16 // j=num-2 40*8fb009dcSAndroid Build Coastguard Worker umulh x7,x7,x9 41*8fb009dcSAndroid Build Coastguard Worker mul x10,x8,x9 // ap[1]*bp[0] 42*8fb009dcSAndroid Build Coastguard Worker umulh x11,x8,x9 43*8fb009dcSAndroid Build Coastguard Worker 44*8fb009dcSAndroid Build Coastguard Worker mul x15,x6,x4 // "tp[0]"*n0 45*8fb009dcSAndroid Build Coastguard Worker mov sp,x22 // alloca 46*8fb009dcSAndroid Build Coastguard Worker 47*8fb009dcSAndroid Build Coastguard Worker // (*) mul x12,x13,x15 // np[0]*m1 48*8fb009dcSAndroid Build Coastguard Worker umulh x13,x13,x15 49*8fb009dcSAndroid Build Coastguard Worker mul x16,x14,x15 // np[1]*m1 50*8fb009dcSAndroid Build Coastguard Worker // (*) adds x12,x12,x6 // discarded 51*8fb009dcSAndroid Build Coastguard Worker // (*) As for removal of first multiplication and addition 52*8fb009dcSAndroid Build Coastguard Worker // instructions. The outcome of first addition is 53*8fb009dcSAndroid Build Coastguard Worker // guaranteed to be zero, which leaves two computationally 54*8fb009dcSAndroid Build Coastguard Worker // significant outcomes: it either carries or not. Then 55*8fb009dcSAndroid Build Coastguard Worker // question is when does it carry? Is there alternative 56*8fb009dcSAndroid Build Coastguard Worker // way to deduce it? If you follow operations, you can 57*8fb009dcSAndroid Build Coastguard Worker // observe that condition for carry is quite simple: 58*8fb009dcSAndroid Build Coastguard Worker // x6 being non-zero. So that carry can be calculated 59*8fb009dcSAndroid Build Coastguard Worker // by adding -1 to x6. That's what next instruction does. 60*8fb009dcSAndroid Build Coastguard Worker subs xzr,x6,#1 // (*) 61*8fb009dcSAndroid Build Coastguard Worker umulh x17,x14,x15 62*8fb009dcSAndroid Build Coastguard Worker adc x13,x13,xzr 63*8fb009dcSAndroid Build Coastguard Worker cbz x21,L1st_skip 64*8fb009dcSAndroid Build Coastguard Worker 65*8fb009dcSAndroid Build Coastguard WorkerL1st: 66*8fb009dcSAndroid Build Coastguard Worker ldr x8,[x1],#8 67*8fb009dcSAndroid Build Coastguard Worker adds x6,x10,x7 68*8fb009dcSAndroid Build Coastguard Worker sub x21,x21,#8 // j-- 69*8fb009dcSAndroid Build Coastguard Worker adc x7,x11,xzr 70*8fb009dcSAndroid Build Coastguard Worker 71*8fb009dcSAndroid Build Coastguard Worker ldr x14,[x3],#8 72*8fb009dcSAndroid Build Coastguard Worker adds x12,x16,x13 73*8fb009dcSAndroid Build Coastguard Worker mul x10,x8,x9 // ap[j]*bp[0] 74*8fb009dcSAndroid Build Coastguard Worker adc x13,x17,xzr 75*8fb009dcSAndroid Build Coastguard Worker umulh x11,x8,x9 76*8fb009dcSAndroid Build Coastguard Worker 77*8fb009dcSAndroid Build Coastguard Worker adds x12,x12,x6 78*8fb009dcSAndroid Build Coastguard Worker mul x16,x14,x15 // np[j]*m1 79*8fb009dcSAndroid Build Coastguard Worker adc x13,x13,xzr 80*8fb009dcSAndroid Build Coastguard Worker umulh x17,x14,x15 81*8fb009dcSAndroid Build Coastguard Worker str x12,[x22],#8 // tp[j-1] 82*8fb009dcSAndroid Build Coastguard Worker cbnz x21,L1st 83*8fb009dcSAndroid Build Coastguard Worker 84*8fb009dcSAndroid Build Coastguard WorkerL1st_skip: 85*8fb009dcSAndroid Build Coastguard Worker adds x6,x10,x7 86*8fb009dcSAndroid Build Coastguard Worker sub x1,x1,x5 // rewind x1 87*8fb009dcSAndroid Build Coastguard Worker adc x7,x11,xzr 88*8fb009dcSAndroid Build Coastguard Worker 89*8fb009dcSAndroid Build Coastguard Worker adds x12,x16,x13 90*8fb009dcSAndroid Build Coastguard Worker sub x3,x3,x5 // rewind x3 91*8fb009dcSAndroid Build Coastguard Worker adc x13,x17,xzr 92*8fb009dcSAndroid Build Coastguard Worker 93*8fb009dcSAndroid Build Coastguard Worker adds x12,x12,x6 94*8fb009dcSAndroid Build Coastguard Worker sub x20,x5,#8 // i=num-1 95*8fb009dcSAndroid Build Coastguard Worker adcs x13,x13,x7 96*8fb009dcSAndroid Build Coastguard Worker 97*8fb009dcSAndroid Build Coastguard Worker adc x19,xzr,xzr // upmost overflow bit 98*8fb009dcSAndroid Build Coastguard Worker stp x12,x13,[x22] 99*8fb009dcSAndroid Build Coastguard Worker 100*8fb009dcSAndroid Build Coastguard WorkerLouter: 101*8fb009dcSAndroid Build Coastguard Worker ldr x9,[x2],#8 // bp[i] 102*8fb009dcSAndroid Build Coastguard Worker ldp x7,x8,[x1],#16 103*8fb009dcSAndroid Build Coastguard Worker ldr x23,[sp] // tp[0] 104*8fb009dcSAndroid Build Coastguard Worker add x22,sp,#8 105*8fb009dcSAndroid Build Coastguard Worker 106*8fb009dcSAndroid Build Coastguard Worker mul x6,x7,x9 // ap[0]*bp[i] 107*8fb009dcSAndroid Build Coastguard Worker sub x21,x5,#16 // j=num-2 108*8fb009dcSAndroid Build Coastguard Worker umulh x7,x7,x9 109*8fb009dcSAndroid Build Coastguard Worker ldp x13,x14,[x3],#16 110*8fb009dcSAndroid Build Coastguard Worker mul x10,x8,x9 // ap[1]*bp[i] 111*8fb009dcSAndroid Build Coastguard Worker adds x6,x6,x23 112*8fb009dcSAndroid Build Coastguard Worker umulh x11,x8,x9 113*8fb009dcSAndroid Build Coastguard Worker adc x7,x7,xzr 114*8fb009dcSAndroid Build Coastguard Worker 115*8fb009dcSAndroid Build Coastguard Worker mul x15,x6,x4 116*8fb009dcSAndroid Build Coastguard Worker sub x20,x20,#8 // i-- 117*8fb009dcSAndroid Build Coastguard Worker 118*8fb009dcSAndroid Build Coastguard Worker // (*) mul x12,x13,x15 // np[0]*m1 119*8fb009dcSAndroid Build Coastguard Worker umulh x13,x13,x15 120*8fb009dcSAndroid Build Coastguard Worker mul x16,x14,x15 // np[1]*m1 121*8fb009dcSAndroid Build Coastguard Worker // (*) adds x12,x12,x6 122*8fb009dcSAndroid Build Coastguard Worker subs xzr,x6,#1 // (*) 123*8fb009dcSAndroid Build Coastguard Worker umulh x17,x14,x15 124*8fb009dcSAndroid Build Coastguard Worker cbz x21,Linner_skip 125*8fb009dcSAndroid Build Coastguard Worker 126*8fb009dcSAndroid Build Coastguard WorkerLinner: 127*8fb009dcSAndroid Build Coastguard Worker ldr x8,[x1],#8 128*8fb009dcSAndroid Build Coastguard Worker adc x13,x13,xzr 129*8fb009dcSAndroid Build Coastguard Worker ldr x23,[x22],#8 // tp[j] 130*8fb009dcSAndroid Build Coastguard Worker adds x6,x10,x7 131*8fb009dcSAndroid Build Coastguard Worker sub x21,x21,#8 // j-- 132*8fb009dcSAndroid Build Coastguard Worker adc x7,x11,xzr 133*8fb009dcSAndroid Build Coastguard Worker 134*8fb009dcSAndroid Build Coastguard Worker adds x12,x16,x13 135*8fb009dcSAndroid Build Coastguard Worker ldr x14,[x3],#8 136*8fb009dcSAndroid Build Coastguard Worker adc x13,x17,xzr 137*8fb009dcSAndroid Build Coastguard Worker 138*8fb009dcSAndroid Build Coastguard Worker mul x10,x8,x9 // ap[j]*bp[i] 139*8fb009dcSAndroid Build Coastguard Worker adds x6,x6,x23 140*8fb009dcSAndroid Build Coastguard Worker umulh x11,x8,x9 141*8fb009dcSAndroid Build Coastguard Worker adc x7,x7,xzr 142*8fb009dcSAndroid Build Coastguard Worker 143*8fb009dcSAndroid Build Coastguard Worker mul x16,x14,x15 // np[j]*m1 144*8fb009dcSAndroid Build Coastguard Worker adds x12,x12,x6 145*8fb009dcSAndroid Build Coastguard Worker umulh x17,x14,x15 146*8fb009dcSAndroid Build Coastguard Worker str x12,[x22,#-16] // tp[j-1] 147*8fb009dcSAndroid Build Coastguard Worker cbnz x21,Linner 148*8fb009dcSAndroid Build Coastguard Worker 149*8fb009dcSAndroid Build Coastguard WorkerLinner_skip: 150*8fb009dcSAndroid Build Coastguard Worker ldr x23,[x22],#8 // tp[j] 151*8fb009dcSAndroid Build Coastguard Worker adc x13,x13,xzr 152*8fb009dcSAndroid Build Coastguard Worker adds x6,x10,x7 153*8fb009dcSAndroid Build Coastguard Worker sub x1,x1,x5 // rewind x1 154*8fb009dcSAndroid Build Coastguard Worker adc x7,x11,xzr 155*8fb009dcSAndroid Build Coastguard Worker 156*8fb009dcSAndroid Build Coastguard Worker adds x12,x16,x13 157*8fb009dcSAndroid Build Coastguard Worker sub x3,x3,x5 // rewind x3 158*8fb009dcSAndroid Build Coastguard Worker adcs x13,x17,x19 159*8fb009dcSAndroid Build Coastguard Worker adc x19,xzr,xzr 160*8fb009dcSAndroid Build Coastguard Worker 161*8fb009dcSAndroid Build Coastguard Worker adds x6,x6,x23 162*8fb009dcSAndroid Build Coastguard Worker adc x7,x7,xzr 163*8fb009dcSAndroid Build Coastguard Worker 164*8fb009dcSAndroid Build Coastguard Worker adds x12,x12,x6 165*8fb009dcSAndroid Build Coastguard Worker adcs x13,x13,x7 166*8fb009dcSAndroid Build Coastguard Worker adc x19,x19,xzr // upmost overflow bit 167*8fb009dcSAndroid Build Coastguard Worker stp x12,x13,[x22,#-16] 168*8fb009dcSAndroid Build Coastguard Worker 169*8fb009dcSAndroid Build Coastguard Worker cbnz x20,Louter 170*8fb009dcSAndroid Build Coastguard Worker 171*8fb009dcSAndroid Build Coastguard Worker // Final step. We see if result is larger than modulus, and 172*8fb009dcSAndroid Build Coastguard Worker // if it is, subtract the modulus. But comparison implies 173*8fb009dcSAndroid Build Coastguard Worker // subtraction. So we subtract modulus, see if it borrowed, 174*8fb009dcSAndroid Build Coastguard Worker // and conditionally copy original value. 175*8fb009dcSAndroid Build Coastguard Worker ldr x23,[sp] // tp[0] 176*8fb009dcSAndroid Build Coastguard Worker add x22,sp,#8 177*8fb009dcSAndroid Build Coastguard Worker ldr x14,[x3],#8 // np[0] 178*8fb009dcSAndroid Build Coastguard Worker subs x21,x5,#8 // j=num-1 and clear borrow 179*8fb009dcSAndroid Build Coastguard Worker mov x1,x0 180*8fb009dcSAndroid Build Coastguard WorkerLsub: 181*8fb009dcSAndroid Build Coastguard Worker sbcs x8,x23,x14 // tp[j]-np[j] 182*8fb009dcSAndroid Build Coastguard Worker ldr x23,[x22],#8 183*8fb009dcSAndroid Build Coastguard Worker sub x21,x21,#8 // j-- 184*8fb009dcSAndroid Build Coastguard Worker ldr x14,[x3],#8 185*8fb009dcSAndroid Build Coastguard Worker str x8,[x1],#8 // rp[j]=tp[j]-np[j] 186*8fb009dcSAndroid Build Coastguard Worker cbnz x21,Lsub 187*8fb009dcSAndroid Build Coastguard Worker 188*8fb009dcSAndroid Build Coastguard Worker sbcs x8,x23,x14 189*8fb009dcSAndroid Build Coastguard Worker sbcs x19,x19,xzr // did it borrow? 190*8fb009dcSAndroid Build Coastguard Worker str x8,[x1],#8 // rp[num-1] 191*8fb009dcSAndroid Build Coastguard Worker 192*8fb009dcSAndroid Build Coastguard Worker ldr x23,[sp] // tp[0] 193*8fb009dcSAndroid Build Coastguard Worker add x22,sp,#8 194*8fb009dcSAndroid Build Coastguard Worker ldr x8,[x0],#8 // rp[0] 195*8fb009dcSAndroid Build Coastguard Worker sub x5,x5,#8 // num-- 196*8fb009dcSAndroid Build Coastguard Worker nop 197*8fb009dcSAndroid Build Coastguard WorkerLcond_copy: 198*8fb009dcSAndroid Build Coastguard Worker sub x5,x5,#8 // num-- 199*8fb009dcSAndroid Build Coastguard Worker csel x14,x23,x8,lo // did it borrow? 200*8fb009dcSAndroid Build Coastguard Worker ldr x23,[x22],#8 201*8fb009dcSAndroid Build Coastguard Worker ldr x8,[x0],#8 202*8fb009dcSAndroid Build Coastguard Worker str xzr,[x22,#-16] // wipe tp 203*8fb009dcSAndroid Build Coastguard Worker str x14,[x0,#-16] 204*8fb009dcSAndroid Build Coastguard Worker cbnz x5,Lcond_copy 205*8fb009dcSAndroid Build Coastguard Worker 206*8fb009dcSAndroid Build Coastguard Worker csel x14,x23,x8,lo 207*8fb009dcSAndroid Build Coastguard Worker str xzr,[x22,#-8] // wipe tp 208*8fb009dcSAndroid Build Coastguard Worker str x14,[x0,#-8] 209*8fb009dcSAndroid Build Coastguard Worker 210*8fb009dcSAndroid Build Coastguard Worker ldp x19,x20,[x29,#16] 211*8fb009dcSAndroid Build Coastguard Worker mov sp,x29 212*8fb009dcSAndroid Build Coastguard Worker ldp x21,x22,[x29,#32] 213*8fb009dcSAndroid Build Coastguard Worker mov x0,#1 214*8fb009dcSAndroid Build Coastguard Worker ldp x23,x24,[x29,#48] 215*8fb009dcSAndroid Build Coastguard Worker ldr x29,[sp],#64 216*8fb009dcSAndroid Build Coastguard Worker AARCH64_VALIDATE_LINK_REGISTER 217*8fb009dcSAndroid Build Coastguard Worker ret 218*8fb009dcSAndroid Build Coastguard Worker 219*8fb009dcSAndroid Build Coastguard Worker.def __bn_sqr8x_mont 220*8fb009dcSAndroid Build Coastguard Worker .type 32 221*8fb009dcSAndroid Build Coastguard Worker.endef 222*8fb009dcSAndroid Build Coastguard Worker.align 5 223*8fb009dcSAndroid Build Coastguard Worker__bn_sqr8x_mont: 224*8fb009dcSAndroid Build Coastguard Worker // Not adding AARCH64_SIGN_LINK_REGISTER here because __bn_sqr8x_mont is jumped to 225*8fb009dcSAndroid Build Coastguard Worker // only from bn_mul_mont which has already signed the return address. 226*8fb009dcSAndroid Build Coastguard Worker cmp x1,x2 227*8fb009dcSAndroid Build Coastguard Worker b.ne __bn_mul4x_mont 228*8fb009dcSAndroid Build Coastguard WorkerLsqr8x_mont: 229*8fb009dcSAndroid Build Coastguard Worker stp x29,x30,[sp,#-128]! 230*8fb009dcSAndroid Build Coastguard Worker add x29,sp,#0 231*8fb009dcSAndroid Build Coastguard Worker stp x19,x20,[sp,#16] 232*8fb009dcSAndroid Build Coastguard Worker stp x21,x22,[sp,#32] 233*8fb009dcSAndroid Build Coastguard Worker stp x23,x24,[sp,#48] 234*8fb009dcSAndroid Build Coastguard Worker stp x25,x26,[sp,#64] 235*8fb009dcSAndroid Build Coastguard Worker stp x27,x28,[sp,#80] 236*8fb009dcSAndroid Build Coastguard Worker stp x0,x3,[sp,#96] // offload rp and np 237*8fb009dcSAndroid Build Coastguard Worker 238*8fb009dcSAndroid Build Coastguard Worker ldp x6,x7,[x1,#8*0] 239*8fb009dcSAndroid Build Coastguard Worker ldp x8,x9,[x1,#8*2] 240*8fb009dcSAndroid Build Coastguard Worker ldp x10,x11,[x1,#8*4] 241*8fb009dcSAndroid Build Coastguard Worker ldp x12,x13,[x1,#8*6] 242*8fb009dcSAndroid Build Coastguard Worker 243*8fb009dcSAndroid Build Coastguard Worker sub x2,sp,x5,lsl#4 244*8fb009dcSAndroid Build Coastguard Worker lsl x5,x5,#3 245*8fb009dcSAndroid Build Coastguard Worker ldr x4,[x4] // *n0 246*8fb009dcSAndroid Build Coastguard Worker mov sp,x2 // alloca 247*8fb009dcSAndroid Build Coastguard Worker sub x27,x5,#8*8 248*8fb009dcSAndroid Build Coastguard Worker b Lsqr8x_zero_start 249*8fb009dcSAndroid Build Coastguard Worker 250*8fb009dcSAndroid Build Coastguard WorkerLsqr8x_zero: 251*8fb009dcSAndroid Build Coastguard Worker sub x27,x27,#8*8 252*8fb009dcSAndroid Build Coastguard Worker stp xzr,xzr,[x2,#8*0] 253*8fb009dcSAndroid Build Coastguard Worker stp xzr,xzr,[x2,#8*2] 254*8fb009dcSAndroid Build Coastguard Worker stp xzr,xzr,[x2,#8*4] 255*8fb009dcSAndroid Build Coastguard Worker stp xzr,xzr,[x2,#8*6] 256*8fb009dcSAndroid Build Coastguard WorkerLsqr8x_zero_start: 257*8fb009dcSAndroid Build Coastguard Worker stp xzr,xzr,[x2,#8*8] 258*8fb009dcSAndroid Build Coastguard Worker stp xzr,xzr,[x2,#8*10] 259*8fb009dcSAndroid Build Coastguard Worker stp xzr,xzr,[x2,#8*12] 260*8fb009dcSAndroid Build Coastguard Worker stp xzr,xzr,[x2,#8*14] 261*8fb009dcSAndroid Build Coastguard Worker add x2,x2,#8*16 262*8fb009dcSAndroid Build Coastguard Worker cbnz x27,Lsqr8x_zero 263*8fb009dcSAndroid Build Coastguard Worker 264*8fb009dcSAndroid Build Coastguard Worker add x3,x1,x5 265*8fb009dcSAndroid Build Coastguard Worker add x1,x1,#8*8 266*8fb009dcSAndroid Build Coastguard Worker mov x19,xzr 267*8fb009dcSAndroid Build Coastguard Worker mov x20,xzr 268*8fb009dcSAndroid Build Coastguard Worker mov x21,xzr 269*8fb009dcSAndroid Build Coastguard Worker mov x22,xzr 270*8fb009dcSAndroid Build Coastguard Worker mov x23,xzr 271*8fb009dcSAndroid Build Coastguard Worker mov x24,xzr 272*8fb009dcSAndroid Build Coastguard Worker mov x25,xzr 273*8fb009dcSAndroid Build Coastguard Worker mov x26,xzr 274*8fb009dcSAndroid Build Coastguard Worker mov x2,sp 275*8fb009dcSAndroid Build Coastguard Worker str x4,[x29,#112] // offload n0 276*8fb009dcSAndroid Build Coastguard Worker 277*8fb009dcSAndroid Build Coastguard Worker // Multiply everything but a[i]*a[i] 278*8fb009dcSAndroid Build Coastguard Worker.align 4 279*8fb009dcSAndroid Build Coastguard WorkerLsqr8x_outer_loop: 280*8fb009dcSAndroid Build Coastguard Worker // a[1]a[0] (i) 281*8fb009dcSAndroid Build Coastguard Worker // a[2]a[0] 282*8fb009dcSAndroid Build Coastguard Worker // a[3]a[0] 283*8fb009dcSAndroid Build Coastguard Worker // a[4]a[0] 284*8fb009dcSAndroid Build Coastguard Worker // a[5]a[0] 285*8fb009dcSAndroid Build Coastguard Worker // a[6]a[0] 286*8fb009dcSAndroid Build Coastguard Worker // a[7]a[0] 287*8fb009dcSAndroid Build Coastguard Worker // a[2]a[1] (ii) 288*8fb009dcSAndroid Build Coastguard Worker // a[3]a[1] 289*8fb009dcSAndroid Build Coastguard Worker // a[4]a[1] 290*8fb009dcSAndroid Build Coastguard Worker // a[5]a[1] 291*8fb009dcSAndroid Build Coastguard Worker // a[6]a[1] 292*8fb009dcSAndroid Build Coastguard Worker // a[7]a[1] 293*8fb009dcSAndroid Build Coastguard Worker // a[3]a[2] (iii) 294*8fb009dcSAndroid Build Coastguard Worker // a[4]a[2] 295*8fb009dcSAndroid Build Coastguard Worker // a[5]a[2] 296*8fb009dcSAndroid Build Coastguard Worker // a[6]a[2] 297*8fb009dcSAndroid Build Coastguard Worker // a[7]a[2] 298*8fb009dcSAndroid Build Coastguard Worker // a[4]a[3] (iv) 299*8fb009dcSAndroid Build Coastguard Worker // a[5]a[3] 300*8fb009dcSAndroid Build Coastguard Worker // a[6]a[3] 301*8fb009dcSAndroid Build Coastguard Worker // a[7]a[3] 302*8fb009dcSAndroid Build Coastguard Worker // a[5]a[4] (v) 303*8fb009dcSAndroid Build Coastguard Worker // a[6]a[4] 304*8fb009dcSAndroid Build Coastguard Worker // a[7]a[4] 305*8fb009dcSAndroid Build Coastguard Worker // a[6]a[5] (vi) 306*8fb009dcSAndroid Build Coastguard Worker // a[7]a[5] 307*8fb009dcSAndroid Build Coastguard Worker // a[7]a[6] (vii) 308*8fb009dcSAndroid Build Coastguard Worker 309*8fb009dcSAndroid Build Coastguard Worker mul x14,x7,x6 // lo(a[1..7]*a[0]) (i) 310*8fb009dcSAndroid Build Coastguard Worker mul x15,x8,x6 311*8fb009dcSAndroid Build Coastguard Worker mul x16,x9,x6 312*8fb009dcSAndroid Build Coastguard Worker mul x17,x10,x6 313*8fb009dcSAndroid Build Coastguard Worker adds x20,x20,x14 // t[1]+lo(a[1]*a[0]) 314*8fb009dcSAndroid Build Coastguard Worker mul x14,x11,x6 315*8fb009dcSAndroid Build Coastguard Worker adcs x21,x21,x15 316*8fb009dcSAndroid Build Coastguard Worker mul x15,x12,x6 317*8fb009dcSAndroid Build Coastguard Worker adcs x22,x22,x16 318*8fb009dcSAndroid Build Coastguard Worker mul x16,x13,x6 319*8fb009dcSAndroid Build Coastguard Worker adcs x23,x23,x17 320*8fb009dcSAndroid Build Coastguard Worker umulh x17,x7,x6 // hi(a[1..7]*a[0]) 321*8fb009dcSAndroid Build Coastguard Worker adcs x24,x24,x14 322*8fb009dcSAndroid Build Coastguard Worker umulh x14,x8,x6 323*8fb009dcSAndroid Build Coastguard Worker adcs x25,x25,x15 324*8fb009dcSAndroid Build Coastguard Worker umulh x15,x9,x6 325*8fb009dcSAndroid Build Coastguard Worker adcs x26,x26,x16 326*8fb009dcSAndroid Build Coastguard Worker umulh x16,x10,x6 327*8fb009dcSAndroid Build Coastguard Worker stp x19,x20,[x2],#8*2 // t[0..1] 328*8fb009dcSAndroid Build Coastguard Worker adc x19,xzr,xzr // t[8] 329*8fb009dcSAndroid Build Coastguard Worker adds x21,x21,x17 // t[2]+lo(a[1]*a[0]) 330*8fb009dcSAndroid Build Coastguard Worker umulh x17,x11,x6 331*8fb009dcSAndroid Build Coastguard Worker adcs x22,x22,x14 332*8fb009dcSAndroid Build Coastguard Worker umulh x14,x12,x6 333*8fb009dcSAndroid Build Coastguard Worker adcs x23,x23,x15 334*8fb009dcSAndroid Build Coastguard Worker umulh x15,x13,x6 335*8fb009dcSAndroid Build Coastguard Worker adcs x24,x24,x16 336*8fb009dcSAndroid Build Coastguard Worker mul x16,x8,x7 // lo(a[2..7]*a[1]) (ii) 337*8fb009dcSAndroid Build Coastguard Worker adcs x25,x25,x17 338*8fb009dcSAndroid Build Coastguard Worker mul x17,x9,x7 339*8fb009dcSAndroid Build Coastguard Worker adcs x26,x26,x14 340*8fb009dcSAndroid Build Coastguard Worker mul x14,x10,x7 341*8fb009dcSAndroid Build Coastguard Worker adc x19,x19,x15 342*8fb009dcSAndroid Build Coastguard Worker 343*8fb009dcSAndroid Build Coastguard Worker mul x15,x11,x7 344*8fb009dcSAndroid Build Coastguard Worker adds x22,x22,x16 345*8fb009dcSAndroid Build Coastguard Worker mul x16,x12,x7 346*8fb009dcSAndroid Build Coastguard Worker adcs x23,x23,x17 347*8fb009dcSAndroid Build Coastguard Worker mul x17,x13,x7 348*8fb009dcSAndroid Build Coastguard Worker adcs x24,x24,x14 349*8fb009dcSAndroid Build Coastguard Worker umulh x14,x8,x7 // hi(a[2..7]*a[1]) 350*8fb009dcSAndroid Build Coastguard Worker adcs x25,x25,x15 351*8fb009dcSAndroid Build Coastguard Worker umulh x15,x9,x7 352*8fb009dcSAndroid Build Coastguard Worker adcs x26,x26,x16 353*8fb009dcSAndroid Build Coastguard Worker umulh x16,x10,x7 354*8fb009dcSAndroid Build Coastguard Worker adcs x19,x19,x17 355*8fb009dcSAndroid Build Coastguard Worker umulh x17,x11,x7 356*8fb009dcSAndroid Build Coastguard Worker stp x21,x22,[x2],#8*2 // t[2..3] 357*8fb009dcSAndroid Build Coastguard Worker adc x20,xzr,xzr // t[9] 358*8fb009dcSAndroid Build Coastguard Worker adds x23,x23,x14 359*8fb009dcSAndroid Build Coastguard Worker umulh x14,x12,x7 360*8fb009dcSAndroid Build Coastguard Worker adcs x24,x24,x15 361*8fb009dcSAndroid Build Coastguard Worker umulh x15,x13,x7 362*8fb009dcSAndroid Build Coastguard Worker adcs x25,x25,x16 363*8fb009dcSAndroid Build Coastguard Worker mul x16,x9,x8 // lo(a[3..7]*a[2]) (iii) 364*8fb009dcSAndroid Build Coastguard Worker adcs x26,x26,x17 365*8fb009dcSAndroid Build Coastguard Worker mul x17,x10,x8 366*8fb009dcSAndroid Build Coastguard Worker adcs x19,x19,x14 367*8fb009dcSAndroid Build Coastguard Worker mul x14,x11,x8 368*8fb009dcSAndroid Build Coastguard Worker adc x20,x20,x15 369*8fb009dcSAndroid Build Coastguard Worker 370*8fb009dcSAndroid Build Coastguard Worker mul x15,x12,x8 371*8fb009dcSAndroid Build Coastguard Worker adds x24,x24,x16 372*8fb009dcSAndroid Build Coastguard Worker mul x16,x13,x8 373*8fb009dcSAndroid Build Coastguard Worker adcs x25,x25,x17 374*8fb009dcSAndroid Build Coastguard Worker umulh x17,x9,x8 // hi(a[3..7]*a[2]) 375*8fb009dcSAndroid Build Coastguard Worker adcs x26,x26,x14 376*8fb009dcSAndroid Build Coastguard Worker umulh x14,x10,x8 377*8fb009dcSAndroid Build Coastguard Worker adcs x19,x19,x15 378*8fb009dcSAndroid Build Coastguard Worker umulh x15,x11,x8 379*8fb009dcSAndroid Build Coastguard Worker adcs x20,x20,x16 380*8fb009dcSAndroid Build Coastguard Worker umulh x16,x12,x8 381*8fb009dcSAndroid Build Coastguard Worker stp x23,x24,[x2],#8*2 // t[4..5] 382*8fb009dcSAndroid Build Coastguard Worker adc x21,xzr,xzr // t[10] 383*8fb009dcSAndroid Build Coastguard Worker adds x25,x25,x17 384*8fb009dcSAndroid Build Coastguard Worker umulh x17,x13,x8 385*8fb009dcSAndroid Build Coastguard Worker adcs x26,x26,x14 386*8fb009dcSAndroid Build Coastguard Worker mul x14,x10,x9 // lo(a[4..7]*a[3]) (iv) 387*8fb009dcSAndroid Build Coastguard Worker adcs x19,x19,x15 388*8fb009dcSAndroid Build Coastguard Worker mul x15,x11,x9 389*8fb009dcSAndroid Build Coastguard Worker adcs x20,x20,x16 390*8fb009dcSAndroid Build Coastguard Worker mul x16,x12,x9 391*8fb009dcSAndroid Build Coastguard Worker adc x21,x21,x17 392*8fb009dcSAndroid Build Coastguard Worker 393*8fb009dcSAndroid Build Coastguard Worker mul x17,x13,x9 394*8fb009dcSAndroid Build Coastguard Worker adds x26,x26,x14 395*8fb009dcSAndroid Build Coastguard Worker umulh x14,x10,x9 // hi(a[4..7]*a[3]) 396*8fb009dcSAndroid Build Coastguard Worker adcs x19,x19,x15 397*8fb009dcSAndroid Build Coastguard Worker umulh x15,x11,x9 398*8fb009dcSAndroid Build Coastguard Worker adcs x20,x20,x16 399*8fb009dcSAndroid Build Coastguard Worker umulh x16,x12,x9 400*8fb009dcSAndroid Build Coastguard Worker adcs x21,x21,x17 401*8fb009dcSAndroid Build Coastguard Worker umulh x17,x13,x9 402*8fb009dcSAndroid Build Coastguard Worker stp x25,x26,[x2],#8*2 // t[6..7] 403*8fb009dcSAndroid Build Coastguard Worker adc x22,xzr,xzr // t[11] 404*8fb009dcSAndroid Build Coastguard Worker adds x19,x19,x14 405*8fb009dcSAndroid Build Coastguard Worker mul x14,x11,x10 // lo(a[5..7]*a[4]) (v) 406*8fb009dcSAndroid Build Coastguard Worker adcs x20,x20,x15 407*8fb009dcSAndroid Build Coastguard Worker mul x15,x12,x10 408*8fb009dcSAndroid Build Coastguard Worker adcs x21,x21,x16 409*8fb009dcSAndroid Build Coastguard Worker mul x16,x13,x10 410*8fb009dcSAndroid Build Coastguard Worker adc x22,x22,x17 411*8fb009dcSAndroid Build Coastguard Worker 412*8fb009dcSAndroid Build Coastguard Worker umulh x17,x11,x10 // hi(a[5..7]*a[4]) 413*8fb009dcSAndroid Build Coastguard Worker adds x20,x20,x14 414*8fb009dcSAndroid Build Coastguard Worker umulh x14,x12,x10 415*8fb009dcSAndroid Build Coastguard Worker adcs x21,x21,x15 416*8fb009dcSAndroid Build Coastguard Worker umulh x15,x13,x10 417*8fb009dcSAndroid Build Coastguard Worker adcs x22,x22,x16 418*8fb009dcSAndroid Build Coastguard Worker mul x16,x12,x11 // lo(a[6..7]*a[5]) (vi) 419*8fb009dcSAndroid Build Coastguard Worker adc x23,xzr,xzr // t[12] 420*8fb009dcSAndroid Build Coastguard Worker adds x21,x21,x17 421*8fb009dcSAndroid Build Coastguard Worker mul x17,x13,x11 422*8fb009dcSAndroid Build Coastguard Worker adcs x22,x22,x14 423*8fb009dcSAndroid Build Coastguard Worker umulh x14,x12,x11 // hi(a[6..7]*a[5]) 424*8fb009dcSAndroid Build Coastguard Worker adc x23,x23,x15 425*8fb009dcSAndroid Build Coastguard Worker 426*8fb009dcSAndroid Build Coastguard Worker umulh x15,x13,x11 427*8fb009dcSAndroid Build Coastguard Worker adds x22,x22,x16 428*8fb009dcSAndroid Build Coastguard Worker mul x16,x13,x12 // lo(a[7]*a[6]) (vii) 429*8fb009dcSAndroid Build Coastguard Worker adcs x23,x23,x17 430*8fb009dcSAndroid Build Coastguard Worker umulh x17,x13,x12 // hi(a[7]*a[6]) 431*8fb009dcSAndroid Build Coastguard Worker adc x24,xzr,xzr // t[13] 432*8fb009dcSAndroid Build Coastguard Worker adds x23,x23,x14 433*8fb009dcSAndroid Build Coastguard Worker sub x27,x3,x1 // done yet? 434*8fb009dcSAndroid Build Coastguard Worker adc x24,x24,x15 435*8fb009dcSAndroid Build Coastguard Worker 436*8fb009dcSAndroid Build Coastguard Worker adds x24,x24,x16 437*8fb009dcSAndroid Build Coastguard Worker sub x14,x3,x5 // rewinded ap 438*8fb009dcSAndroid Build Coastguard Worker adc x25,xzr,xzr // t[14] 439*8fb009dcSAndroid Build Coastguard Worker add x25,x25,x17 440*8fb009dcSAndroid Build Coastguard Worker 441*8fb009dcSAndroid Build Coastguard Worker cbz x27,Lsqr8x_outer_break 442*8fb009dcSAndroid Build Coastguard Worker 443*8fb009dcSAndroid Build Coastguard Worker mov x4,x6 444*8fb009dcSAndroid Build Coastguard Worker ldp x6,x7,[x2,#8*0] 445*8fb009dcSAndroid Build Coastguard Worker ldp x8,x9,[x2,#8*2] 446*8fb009dcSAndroid Build Coastguard Worker ldp x10,x11,[x2,#8*4] 447*8fb009dcSAndroid Build Coastguard Worker ldp x12,x13,[x2,#8*6] 448*8fb009dcSAndroid Build Coastguard Worker adds x19,x19,x6 449*8fb009dcSAndroid Build Coastguard Worker adcs x20,x20,x7 450*8fb009dcSAndroid Build Coastguard Worker ldp x6,x7,[x1,#8*0] 451*8fb009dcSAndroid Build Coastguard Worker adcs x21,x21,x8 452*8fb009dcSAndroid Build Coastguard Worker adcs x22,x22,x9 453*8fb009dcSAndroid Build Coastguard Worker ldp x8,x9,[x1,#8*2] 454*8fb009dcSAndroid Build Coastguard Worker adcs x23,x23,x10 455*8fb009dcSAndroid Build Coastguard Worker adcs x24,x24,x11 456*8fb009dcSAndroid Build Coastguard Worker ldp x10,x11,[x1,#8*4] 457*8fb009dcSAndroid Build Coastguard Worker adcs x25,x25,x12 458*8fb009dcSAndroid Build Coastguard Worker mov x0,x1 459*8fb009dcSAndroid Build Coastguard Worker adcs x26,xzr,x13 460*8fb009dcSAndroid Build Coastguard Worker ldp x12,x13,[x1,#8*6] 461*8fb009dcSAndroid Build Coastguard Worker add x1,x1,#8*8 462*8fb009dcSAndroid Build Coastguard Worker //adc x28,xzr,xzr // moved below 463*8fb009dcSAndroid Build Coastguard Worker mov x27,#-8*8 464*8fb009dcSAndroid Build Coastguard Worker 465*8fb009dcSAndroid Build Coastguard Worker // a[8]a[0] 466*8fb009dcSAndroid Build Coastguard Worker // a[9]a[0] 467*8fb009dcSAndroid Build Coastguard Worker // a[a]a[0] 468*8fb009dcSAndroid Build Coastguard Worker // a[b]a[0] 469*8fb009dcSAndroid Build Coastguard Worker // a[c]a[0] 470*8fb009dcSAndroid Build Coastguard Worker // a[d]a[0] 471*8fb009dcSAndroid Build Coastguard Worker // a[e]a[0] 472*8fb009dcSAndroid Build Coastguard Worker // a[f]a[0] 473*8fb009dcSAndroid Build Coastguard Worker // a[8]a[1] 474*8fb009dcSAndroid Build Coastguard Worker // a[f]a[1]........................ 475*8fb009dcSAndroid Build Coastguard Worker // a[8]a[2] 476*8fb009dcSAndroid Build Coastguard Worker // a[f]a[2]........................ 477*8fb009dcSAndroid Build Coastguard Worker // a[8]a[3] 478*8fb009dcSAndroid Build Coastguard Worker // a[f]a[3]........................ 479*8fb009dcSAndroid Build Coastguard Worker // a[8]a[4] 480*8fb009dcSAndroid Build Coastguard Worker // a[f]a[4]........................ 481*8fb009dcSAndroid Build Coastguard Worker // a[8]a[5] 482*8fb009dcSAndroid Build Coastguard Worker // a[f]a[5]........................ 483*8fb009dcSAndroid Build Coastguard Worker // a[8]a[6] 484*8fb009dcSAndroid Build Coastguard Worker // a[f]a[6]........................ 485*8fb009dcSAndroid Build Coastguard Worker // a[8]a[7] 486*8fb009dcSAndroid Build Coastguard Worker // a[f]a[7]........................ 487*8fb009dcSAndroid Build Coastguard WorkerLsqr8x_mul: 488*8fb009dcSAndroid Build Coastguard Worker mul x14,x6,x4 489*8fb009dcSAndroid Build Coastguard Worker adc x28,xzr,xzr // carry bit, modulo-scheduled 490*8fb009dcSAndroid Build Coastguard Worker mul x15,x7,x4 491*8fb009dcSAndroid Build Coastguard Worker add x27,x27,#8 492*8fb009dcSAndroid Build Coastguard Worker mul x16,x8,x4 493*8fb009dcSAndroid Build Coastguard Worker mul x17,x9,x4 494*8fb009dcSAndroid Build Coastguard Worker adds x19,x19,x14 495*8fb009dcSAndroid Build Coastguard Worker mul x14,x10,x4 496*8fb009dcSAndroid Build Coastguard Worker adcs x20,x20,x15 497*8fb009dcSAndroid Build Coastguard Worker mul x15,x11,x4 498*8fb009dcSAndroid Build Coastguard Worker adcs x21,x21,x16 499*8fb009dcSAndroid Build Coastguard Worker mul x16,x12,x4 500*8fb009dcSAndroid Build Coastguard Worker adcs x22,x22,x17 501*8fb009dcSAndroid Build Coastguard Worker mul x17,x13,x4 502*8fb009dcSAndroid Build Coastguard Worker adcs x23,x23,x14 503*8fb009dcSAndroid Build Coastguard Worker umulh x14,x6,x4 504*8fb009dcSAndroid Build Coastguard Worker adcs x24,x24,x15 505*8fb009dcSAndroid Build Coastguard Worker umulh x15,x7,x4 506*8fb009dcSAndroid Build Coastguard Worker adcs x25,x25,x16 507*8fb009dcSAndroid Build Coastguard Worker umulh x16,x8,x4 508*8fb009dcSAndroid Build Coastguard Worker adcs x26,x26,x17 509*8fb009dcSAndroid Build Coastguard Worker umulh x17,x9,x4 510*8fb009dcSAndroid Build Coastguard Worker adc x28,x28,xzr 511*8fb009dcSAndroid Build Coastguard Worker str x19,[x2],#8 512*8fb009dcSAndroid Build Coastguard Worker adds x19,x20,x14 513*8fb009dcSAndroid Build Coastguard Worker umulh x14,x10,x4 514*8fb009dcSAndroid Build Coastguard Worker adcs x20,x21,x15 515*8fb009dcSAndroid Build Coastguard Worker umulh x15,x11,x4 516*8fb009dcSAndroid Build Coastguard Worker adcs x21,x22,x16 517*8fb009dcSAndroid Build Coastguard Worker umulh x16,x12,x4 518*8fb009dcSAndroid Build Coastguard Worker adcs x22,x23,x17 519*8fb009dcSAndroid Build Coastguard Worker umulh x17,x13,x4 520*8fb009dcSAndroid Build Coastguard Worker ldr x4,[x0,x27] 521*8fb009dcSAndroid Build Coastguard Worker adcs x23,x24,x14 522*8fb009dcSAndroid Build Coastguard Worker adcs x24,x25,x15 523*8fb009dcSAndroid Build Coastguard Worker adcs x25,x26,x16 524*8fb009dcSAndroid Build Coastguard Worker adcs x26,x28,x17 525*8fb009dcSAndroid Build Coastguard Worker //adc x28,xzr,xzr // moved above 526*8fb009dcSAndroid Build Coastguard Worker cbnz x27,Lsqr8x_mul 527*8fb009dcSAndroid Build Coastguard Worker // note that carry flag is guaranteed 528*8fb009dcSAndroid Build Coastguard Worker // to be zero at this point 529*8fb009dcSAndroid Build Coastguard Worker cmp x1,x3 // done yet? 530*8fb009dcSAndroid Build Coastguard Worker b.eq Lsqr8x_break 531*8fb009dcSAndroid Build Coastguard Worker 532*8fb009dcSAndroid Build Coastguard Worker ldp x6,x7,[x2,#8*0] 533*8fb009dcSAndroid Build Coastguard Worker ldp x8,x9,[x2,#8*2] 534*8fb009dcSAndroid Build Coastguard Worker ldp x10,x11,[x2,#8*4] 535*8fb009dcSAndroid Build Coastguard Worker ldp x12,x13,[x2,#8*6] 536*8fb009dcSAndroid Build Coastguard Worker adds x19,x19,x6 537*8fb009dcSAndroid Build Coastguard Worker ldr x4,[x0,#-8*8] 538*8fb009dcSAndroid Build Coastguard Worker adcs x20,x20,x7 539*8fb009dcSAndroid Build Coastguard Worker ldp x6,x7,[x1,#8*0] 540*8fb009dcSAndroid Build Coastguard Worker adcs x21,x21,x8 541*8fb009dcSAndroid Build Coastguard Worker adcs x22,x22,x9 542*8fb009dcSAndroid Build Coastguard Worker ldp x8,x9,[x1,#8*2] 543*8fb009dcSAndroid Build Coastguard Worker adcs x23,x23,x10 544*8fb009dcSAndroid Build Coastguard Worker adcs x24,x24,x11 545*8fb009dcSAndroid Build Coastguard Worker ldp x10,x11,[x1,#8*4] 546*8fb009dcSAndroid Build Coastguard Worker adcs x25,x25,x12 547*8fb009dcSAndroid Build Coastguard Worker mov x27,#-8*8 548*8fb009dcSAndroid Build Coastguard Worker adcs x26,x26,x13 549*8fb009dcSAndroid Build Coastguard Worker ldp x12,x13,[x1,#8*6] 550*8fb009dcSAndroid Build Coastguard Worker add x1,x1,#8*8 551*8fb009dcSAndroid Build Coastguard Worker //adc x28,xzr,xzr // moved above 552*8fb009dcSAndroid Build Coastguard Worker b Lsqr8x_mul 553*8fb009dcSAndroid Build Coastguard Worker 554*8fb009dcSAndroid Build Coastguard Worker.align 4 555*8fb009dcSAndroid Build Coastguard WorkerLsqr8x_break: 556*8fb009dcSAndroid Build Coastguard Worker ldp x6,x7,[x0,#8*0] 557*8fb009dcSAndroid Build Coastguard Worker add x1,x0,#8*8 558*8fb009dcSAndroid Build Coastguard Worker ldp x8,x9,[x0,#8*2] 559*8fb009dcSAndroid Build Coastguard Worker sub x14,x3,x1 // is it last iteration? 560*8fb009dcSAndroid Build Coastguard Worker ldp x10,x11,[x0,#8*4] 561*8fb009dcSAndroid Build Coastguard Worker sub x15,x2,x14 562*8fb009dcSAndroid Build Coastguard Worker ldp x12,x13,[x0,#8*6] 563*8fb009dcSAndroid Build Coastguard Worker cbz x14,Lsqr8x_outer_loop 564*8fb009dcSAndroid Build Coastguard Worker 565*8fb009dcSAndroid Build Coastguard Worker stp x19,x20,[x2,#8*0] 566*8fb009dcSAndroid Build Coastguard Worker ldp x19,x20,[x15,#8*0] 567*8fb009dcSAndroid Build Coastguard Worker stp x21,x22,[x2,#8*2] 568*8fb009dcSAndroid Build Coastguard Worker ldp x21,x22,[x15,#8*2] 569*8fb009dcSAndroid Build Coastguard Worker stp x23,x24,[x2,#8*4] 570*8fb009dcSAndroid Build Coastguard Worker ldp x23,x24,[x15,#8*4] 571*8fb009dcSAndroid Build Coastguard Worker stp x25,x26,[x2,#8*6] 572*8fb009dcSAndroid Build Coastguard Worker mov x2,x15 573*8fb009dcSAndroid Build Coastguard Worker ldp x25,x26,[x15,#8*6] 574*8fb009dcSAndroid Build Coastguard Worker b Lsqr8x_outer_loop 575*8fb009dcSAndroid Build Coastguard Worker 576*8fb009dcSAndroid Build Coastguard Worker.align 4 577*8fb009dcSAndroid Build Coastguard WorkerLsqr8x_outer_break: 578*8fb009dcSAndroid Build Coastguard Worker // Now multiply above result by 2 and add a[n-1]*a[n-1]|...|a[0]*a[0] 579*8fb009dcSAndroid Build Coastguard Worker ldp x7,x9,[x14,#8*0] // recall that x14 is &a[0] 580*8fb009dcSAndroid Build Coastguard Worker ldp x15,x16,[sp,#8*1] 581*8fb009dcSAndroid Build Coastguard Worker ldp x11,x13,[x14,#8*2] 582*8fb009dcSAndroid Build Coastguard Worker add x1,x14,#8*4 583*8fb009dcSAndroid Build Coastguard Worker ldp x17,x14,[sp,#8*3] 584*8fb009dcSAndroid Build Coastguard Worker 585*8fb009dcSAndroid Build Coastguard Worker stp x19,x20,[x2,#8*0] 586*8fb009dcSAndroid Build Coastguard Worker mul x19,x7,x7 587*8fb009dcSAndroid Build Coastguard Worker stp x21,x22,[x2,#8*2] 588*8fb009dcSAndroid Build Coastguard Worker umulh x7,x7,x7 589*8fb009dcSAndroid Build Coastguard Worker stp x23,x24,[x2,#8*4] 590*8fb009dcSAndroid Build Coastguard Worker mul x8,x9,x9 591*8fb009dcSAndroid Build Coastguard Worker stp x25,x26,[x2,#8*6] 592*8fb009dcSAndroid Build Coastguard Worker mov x2,sp 593*8fb009dcSAndroid Build Coastguard Worker umulh x9,x9,x9 594*8fb009dcSAndroid Build Coastguard Worker adds x20,x7,x15,lsl#1 595*8fb009dcSAndroid Build Coastguard Worker extr x15,x16,x15,#63 596*8fb009dcSAndroid Build Coastguard Worker sub x27,x5,#8*4 597*8fb009dcSAndroid Build Coastguard Worker 598*8fb009dcSAndroid Build Coastguard WorkerLsqr4x_shift_n_add: 599*8fb009dcSAndroid Build Coastguard Worker adcs x21,x8,x15 600*8fb009dcSAndroid Build Coastguard Worker extr x16,x17,x16,#63 601*8fb009dcSAndroid Build Coastguard Worker sub x27,x27,#8*4 602*8fb009dcSAndroid Build Coastguard Worker adcs x22,x9,x16 603*8fb009dcSAndroid Build Coastguard Worker ldp x15,x16,[x2,#8*5] 604*8fb009dcSAndroid Build Coastguard Worker mul x10,x11,x11 605*8fb009dcSAndroid Build Coastguard Worker ldp x7,x9,[x1],#8*2 606*8fb009dcSAndroid Build Coastguard Worker umulh x11,x11,x11 607*8fb009dcSAndroid Build Coastguard Worker mul x12,x13,x13 608*8fb009dcSAndroid Build Coastguard Worker umulh x13,x13,x13 609*8fb009dcSAndroid Build Coastguard Worker extr x17,x14,x17,#63 610*8fb009dcSAndroid Build Coastguard Worker stp x19,x20,[x2,#8*0] 611*8fb009dcSAndroid Build Coastguard Worker adcs x23,x10,x17 612*8fb009dcSAndroid Build Coastguard Worker extr x14,x15,x14,#63 613*8fb009dcSAndroid Build Coastguard Worker stp x21,x22,[x2,#8*2] 614*8fb009dcSAndroid Build Coastguard Worker adcs x24,x11,x14 615*8fb009dcSAndroid Build Coastguard Worker ldp x17,x14,[x2,#8*7] 616*8fb009dcSAndroid Build Coastguard Worker extr x15,x16,x15,#63 617*8fb009dcSAndroid Build Coastguard Worker adcs x25,x12,x15 618*8fb009dcSAndroid Build Coastguard Worker extr x16,x17,x16,#63 619*8fb009dcSAndroid Build Coastguard Worker adcs x26,x13,x16 620*8fb009dcSAndroid Build Coastguard Worker ldp x15,x16,[x2,#8*9] 621*8fb009dcSAndroid Build Coastguard Worker mul x6,x7,x7 622*8fb009dcSAndroid Build Coastguard Worker ldp x11,x13,[x1],#8*2 623*8fb009dcSAndroid Build Coastguard Worker umulh x7,x7,x7 624*8fb009dcSAndroid Build Coastguard Worker mul x8,x9,x9 625*8fb009dcSAndroid Build Coastguard Worker umulh x9,x9,x9 626*8fb009dcSAndroid Build Coastguard Worker stp x23,x24,[x2,#8*4] 627*8fb009dcSAndroid Build Coastguard Worker extr x17,x14,x17,#63 628*8fb009dcSAndroid Build Coastguard Worker stp x25,x26,[x2,#8*6] 629*8fb009dcSAndroid Build Coastguard Worker add x2,x2,#8*8 630*8fb009dcSAndroid Build Coastguard Worker adcs x19,x6,x17 631*8fb009dcSAndroid Build Coastguard Worker extr x14,x15,x14,#63 632*8fb009dcSAndroid Build Coastguard Worker adcs x20,x7,x14 633*8fb009dcSAndroid Build Coastguard Worker ldp x17,x14,[x2,#8*3] 634*8fb009dcSAndroid Build Coastguard Worker extr x15,x16,x15,#63 635*8fb009dcSAndroid Build Coastguard Worker cbnz x27,Lsqr4x_shift_n_add 636*8fb009dcSAndroid Build Coastguard Worker ldp x1,x4,[x29,#104] // pull np and n0 637*8fb009dcSAndroid Build Coastguard Worker 638*8fb009dcSAndroid Build Coastguard Worker adcs x21,x8,x15 639*8fb009dcSAndroid Build Coastguard Worker extr x16,x17,x16,#63 640*8fb009dcSAndroid Build Coastguard Worker adcs x22,x9,x16 641*8fb009dcSAndroid Build Coastguard Worker ldp x15,x16,[x2,#8*5] 642*8fb009dcSAndroid Build Coastguard Worker mul x10,x11,x11 643*8fb009dcSAndroid Build Coastguard Worker umulh x11,x11,x11 644*8fb009dcSAndroid Build Coastguard Worker stp x19,x20,[x2,#8*0] 645*8fb009dcSAndroid Build Coastguard Worker mul x12,x13,x13 646*8fb009dcSAndroid Build Coastguard Worker umulh x13,x13,x13 647*8fb009dcSAndroid Build Coastguard Worker stp x21,x22,[x2,#8*2] 648*8fb009dcSAndroid Build Coastguard Worker extr x17,x14,x17,#63 649*8fb009dcSAndroid Build Coastguard Worker adcs x23,x10,x17 650*8fb009dcSAndroid Build Coastguard Worker extr x14,x15,x14,#63 651*8fb009dcSAndroid Build Coastguard Worker ldp x19,x20,[sp,#8*0] 652*8fb009dcSAndroid Build Coastguard Worker adcs x24,x11,x14 653*8fb009dcSAndroid Build Coastguard Worker extr x15,x16,x15,#63 654*8fb009dcSAndroid Build Coastguard Worker ldp x6,x7,[x1,#8*0] 655*8fb009dcSAndroid Build Coastguard Worker adcs x25,x12,x15 656*8fb009dcSAndroid Build Coastguard Worker extr x16,xzr,x16,#63 657*8fb009dcSAndroid Build Coastguard Worker ldp x8,x9,[x1,#8*2] 658*8fb009dcSAndroid Build Coastguard Worker adc x26,x13,x16 659*8fb009dcSAndroid Build Coastguard Worker ldp x10,x11,[x1,#8*4] 660*8fb009dcSAndroid Build Coastguard Worker 661*8fb009dcSAndroid Build Coastguard Worker // Reduce by 512 bits per iteration 662*8fb009dcSAndroid Build Coastguard Worker mul x28,x4,x19 // t[0]*n0 663*8fb009dcSAndroid Build Coastguard Worker ldp x12,x13,[x1,#8*6] 664*8fb009dcSAndroid Build Coastguard Worker add x3,x1,x5 665*8fb009dcSAndroid Build Coastguard Worker ldp x21,x22,[sp,#8*2] 666*8fb009dcSAndroid Build Coastguard Worker stp x23,x24,[x2,#8*4] 667*8fb009dcSAndroid Build Coastguard Worker ldp x23,x24,[sp,#8*4] 668*8fb009dcSAndroid Build Coastguard Worker stp x25,x26,[x2,#8*6] 669*8fb009dcSAndroid Build Coastguard Worker ldp x25,x26,[sp,#8*6] 670*8fb009dcSAndroid Build Coastguard Worker add x1,x1,#8*8 671*8fb009dcSAndroid Build Coastguard Worker mov x30,xzr // initial top-most carry 672*8fb009dcSAndroid Build Coastguard Worker mov x2,sp 673*8fb009dcSAndroid Build Coastguard Worker mov x27,#8 674*8fb009dcSAndroid Build Coastguard Worker 675*8fb009dcSAndroid Build Coastguard WorkerLsqr8x_reduction: 676*8fb009dcSAndroid Build Coastguard Worker // (*) mul x14,x6,x28 // lo(n[0-7])*lo(t[0]*n0) 677*8fb009dcSAndroid Build Coastguard Worker mul x15,x7,x28 678*8fb009dcSAndroid Build Coastguard Worker sub x27,x27,#1 679*8fb009dcSAndroid Build Coastguard Worker mul x16,x8,x28 680*8fb009dcSAndroid Build Coastguard Worker str x28,[x2],#8 // put aside t[0]*n0 for tail processing 681*8fb009dcSAndroid Build Coastguard Worker mul x17,x9,x28 682*8fb009dcSAndroid Build Coastguard Worker // (*) adds xzr,x19,x14 683*8fb009dcSAndroid Build Coastguard Worker subs xzr,x19,#1 // (*) 684*8fb009dcSAndroid Build Coastguard Worker mul x14,x10,x28 685*8fb009dcSAndroid Build Coastguard Worker adcs x19,x20,x15 686*8fb009dcSAndroid Build Coastguard Worker mul x15,x11,x28 687*8fb009dcSAndroid Build Coastguard Worker adcs x20,x21,x16 688*8fb009dcSAndroid Build Coastguard Worker mul x16,x12,x28 689*8fb009dcSAndroid Build Coastguard Worker adcs x21,x22,x17 690*8fb009dcSAndroid Build Coastguard Worker mul x17,x13,x28 691*8fb009dcSAndroid Build Coastguard Worker adcs x22,x23,x14 692*8fb009dcSAndroid Build Coastguard Worker umulh x14,x6,x28 // hi(n[0-7])*lo(t[0]*n0) 693*8fb009dcSAndroid Build Coastguard Worker adcs x23,x24,x15 694*8fb009dcSAndroid Build Coastguard Worker umulh x15,x7,x28 695*8fb009dcSAndroid Build Coastguard Worker adcs x24,x25,x16 696*8fb009dcSAndroid Build Coastguard Worker umulh x16,x8,x28 697*8fb009dcSAndroid Build Coastguard Worker adcs x25,x26,x17 698*8fb009dcSAndroid Build Coastguard Worker umulh x17,x9,x28 699*8fb009dcSAndroid Build Coastguard Worker adc x26,xzr,xzr 700*8fb009dcSAndroid Build Coastguard Worker adds x19,x19,x14 701*8fb009dcSAndroid Build Coastguard Worker umulh x14,x10,x28 702*8fb009dcSAndroid Build Coastguard Worker adcs x20,x20,x15 703*8fb009dcSAndroid Build Coastguard Worker umulh x15,x11,x28 704*8fb009dcSAndroid Build Coastguard Worker adcs x21,x21,x16 705*8fb009dcSAndroid Build Coastguard Worker umulh x16,x12,x28 706*8fb009dcSAndroid Build Coastguard Worker adcs x22,x22,x17 707*8fb009dcSAndroid Build Coastguard Worker umulh x17,x13,x28 708*8fb009dcSAndroid Build Coastguard Worker mul x28,x4,x19 // next t[0]*n0 709*8fb009dcSAndroid Build Coastguard Worker adcs x23,x23,x14 710*8fb009dcSAndroid Build Coastguard Worker adcs x24,x24,x15 711*8fb009dcSAndroid Build Coastguard Worker adcs x25,x25,x16 712*8fb009dcSAndroid Build Coastguard Worker adc x26,x26,x17 713*8fb009dcSAndroid Build Coastguard Worker cbnz x27,Lsqr8x_reduction 714*8fb009dcSAndroid Build Coastguard Worker 715*8fb009dcSAndroid Build Coastguard Worker ldp x14,x15,[x2,#8*0] 716*8fb009dcSAndroid Build Coastguard Worker ldp x16,x17,[x2,#8*2] 717*8fb009dcSAndroid Build Coastguard Worker mov x0,x2 718*8fb009dcSAndroid Build Coastguard Worker sub x27,x3,x1 // done yet? 719*8fb009dcSAndroid Build Coastguard Worker adds x19,x19,x14 720*8fb009dcSAndroid Build Coastguard Worker adcs x20,x20,x15 721*8fb009dcSAndroid Build Coastguard Worker ldp x14,x15,[x2,#8*4] 722*8fb009dcSAndroid Build Coastguard Worker adcs x21,x21,x16 723*8fb009dcSAndroid Build Coastguard Worker adcs x22,x22,x17 724*8fb009dcSAndroid Build Coastguard Worker ldp x16,x17,[x2,#8*6] 725*8fb009dcSAndroid Build Coastguard Worker adcs x23,x23,x14 726*8fb009dcSAndroid Build Coastguard Worker adcs x24,x24,x15 727*8fb009dcSAndroid Build Coastguard Worker adcs x25,x25,x16 728*8fb009dcSAndroid Build Coastguard Worker adcs x26,x26,x17 729*8fb009dcSAndroid Build Coastguard Worker //adc x28,xzr,xzr // moved below 730*8fb009dcSAndroid Build Coastguard Worker cbz x27,Lsqr8x8_post_condition 731*8fb009dcSAndroid Build Coastguard Worker 732*8fb009dcSAndroid Build Coastguard Worker ldr x4,[x2,#-8*8] 733*8fb009dcSAndroid Build Coastguard Worker ldp x6,x7,[x1,#8*0] 734*8fb009dcSAndroid Build Coastguard Worker ldp x8,x9,[x1,#8*2] 735*8fb009dcSAndroid Build Coastguard Worker ldp x10,x11,[x1,#8*4] 736*8fb009dcSAndroid Build Coastguard Worker mov x27,#-8*8 737*8fb009dcSAndroid Build Coastguard Worker ldp x12,x13,[x1,#8*6] 738*8fb009dcSAndroid Build Coastguard Worker add x1,x1,#8*8 739*8fb009dcSAndroid Build Coastguard Worker 740*8fb009dcSAndroid Build Coastguard WorkerLsqr8x_tail: 741*8fb009dcSAndroid Build Coastguard Worker mul x14,x6,x4 742*8fb009dcSAndroid Build Coastguard Worker adc x28,xzr,xzr // carry bit, modulo-scheduled 743*8fb009dcSAndroid Build Coastguard Worker mul x15,x7,x4 744*8fb009dcSAndroid Build Coastguard Worker add x27,x27,#8 745*8fb009dcSAndroid Build Coastguard Worker mul x16,x8,x4 746*8fb009dcSAndroid Build Coastguard Worker mul x17,x9,x4 747*8fb009dcSAndroid Build Coastguard Worker adds x19,x19,x14 748*8fb009dcSAndroid Build Coastguard Worker mul x14,x10,x4 749*8fb009dcSAndroid Build Coastguard Worker adcs x20,x20,x15 750*8fb009dcSAndroid Build Coastguard Worker mul x15,x11,x4 751*8fb009dcSAndroid Build Coastguard Worker adcs x21,x21,x16 752*8fb009dcSAndroid Build Coastguard Worker mul x16,x12,x4 753*8fb009dcSAndroid Build Coastguard Worker adcs x22,x22,x17 754*8fb009dcSAndroid Build Coastguard Worker mul x17,x13,x4 755*8fb009dcSAndroid Build Coastguard Worker adcs x23,x23,x14 756*8fb009dcSAndroid Build Coastguard Worker umulh x14,x6,x4 757*8fb009dcSAndroid Build Coastguard Worker adcs x24,x24,x15 758*8fb009dcSAndroid Build Coastguard Worker umulh x15,x7,x4 759*8fb009dcSAndroid Build Coastguard Worker adcs x25,x25,x16 760*8fb009dcSAndroid Build Coastguard Worker umulh x16,x8,x4 761*8fb009dcSAndroid Build Coastguard Worker adcs x26,x26,x17 762*8fb009dcSAndroid Build Coastguard Worker umulh x17,x9,x4 763*8fb009dcSAndroid Build Coastguard Worker adc x28,x28,xzr 764*8fb009dcSAndroid Build Coastguard Worker str x19,[x2],#8 765*8fb009dcSAndroid Build Coastguard Worker adds x19,x20,x14 766*8fb009dcSAndroid Build Coastguard Worker umulh x14,x10,x4 767*8fb009dcSAndroid Build Coastguard Worker adcs x20,x21,x15 768*8fb009dcSAndroid Build Coastguard Worker umulh x15,x11,x4 769*8fb009dcSAndroid Build Coastguard Worker adcs x21,x22,x16 770*8fb009dcSAndroid Build Coastguard Worker umulh x16,x12,x4 771*8fb009dcSAndroid Build Coastguard Worker adcs x22,x23,x17 772*8fb009dcSAndroid Build Coastguard Worker umulh x17,x13,x4 773*8fb009dcSAndroid Build Coastguard Worker ldr x4,[x0,x27] 774*8fb009dcSAndroid Build Coastguard Worker adcs x23,x24,x14 775*8fb009dcSAndroid Build Coastguard Worker adcs x24,x25,x15 776*8fb009dcSAndroid Build Coastguard Worker adcs x25,x26,x16 777*8fb009dcSAndroid Build Coastguard Worker adcs x26,x28,x17 778*8fb009dcSAndroid Build Coastguard Worker //adc x28,xzr,xzr // moved above 779*8fb009dcSAndroid Build Coastguard Worker cbnz x27,Lsqr8x_tail 780*8fb009dcSAndroid Build Coastguard Worker // note that carry flag is guaranteed 781*8fb009dcSAndroid Build Coastguard Worker // to be zero at this point 782*8fb009dcSAndroid Build Coastguard Worker ldp x6,x7,[x2,#8*0] 783*8fb009dcSAndroid Build Coastguard Worker sub x27,x3,x1 // done yet? 784*8fb009dcSAndroid Build Coastguard Worker sub x16,x3,x5 // rewinded np 785*8fb009dcSAndroid Build Coastguard Worker ldp x8,x9,[x2,#8*2] 786*8fb009dcSAndroid Build Coastguard Worker ldp x10,x11,[x2,#8*4] 787*8fb009dcSAndroid Build Coastguard Worker ldp x12,x13,[x2,#8*6] 788*8fb009dcSAndroid Build Coastguard Worker cbz x27,Lsqr8x_tail_break 789*8fb009dcSAndroid Build Coastguard Worker 790*8fb009dcSAndroid Build Coastguard Worker ldr x4,[x0,#-8*8] 791*8fb009dcSAndroid Build Coastguard Worker adds x19,x19,x6 792*8fb009dcSAndroid Build Coastguard Worker adcs x20,x20,x7 793*8fb009dcSAndroid Build Coastguard Worker ldp x6,x7,[x1,#8*0] 794*8fb009dcSAndroid Build Coastguard Worker adcs x21,x21,x8 795*8fb009dcSAndroid Build Coastguard Worker adcs x22,x22,x9 796*8fb009dcSAndroid Build Coastguard Worker ldp x8,x9,[x1,#8*2] 797*8fb009dcSAndroid Build Coastguard Worker adcs x23,x23,x10 798*8fb009dcSAndroid Build Coastguard Worker adcs x24,x24,x11 799*8fb009dcSAndroid Build Coastguard Worker ldp x10,x11,[x1,#8*4] 800*8fb009dcSAndroid Build Coastguard Worker adcs x25,x25,x12 801*8fb009dcSAndroid Build Coastguard Worker mov x27,#-8*8 802*8fb009dcSAndroid Build Coastguard Worker adcs x26,x26,x13 803*8fb009dcSAndroid Build Coastguard Worker ldp x12,x13,[x1,#8*6] 804*8fb009dcSAndroid Build Coastguard Worker add x1,x1,#8*8 805*8fb009dcSAndroid Build Coastguard Worker //adc x28,xzr,xzr // moved above 806*8fb009dcSAndroid Build Coastguard Worker b Lsqr8x_tail 807*8fb009dcSAndroid Build Coastguard Worker 808*8fb009dcSAndroid Build Coastguard Worker.align 4 809*8fb009dcSAndroid Build Coastguard WorkerLsqr8x_tail_break: 810*8fb009dcSAndroid Build Coastguard Worker ldr x4,[x29,#112] // pull n0 811*8fb009dcSAndroid Build Coastguard Worker add x27,x2,#8*8 // end of current t[num] window 812*8fb009dcSAndroid Build Coastguard Worker 813*8fb009dcSAndroid Build Coastguard Worker subs xzr,x30,#1 // "move" top-most carry to carry bit 814*8fb009dcSAndroid Build Coastguard Worker adcs x14,x19,x6 815*8fb009dcSAndroid Build Coastguard Worker adcs x15,x20,x7 816*8fb009dcSAndroid Build Coastguard Worker ldp x19,x20,[x0,#8*0] 817*8fb009dcSAndroid Build Coastguard Worker adcs x21,x21,x8 818*8fb009dcSAndroid Build Coastguard Worker ldp x6,x7,[x16,#8*0] // recall that x16 is &n[0] 819*8fb009dcSAndroid Build Coastguard Worker adcs x22,x22,x9 820*8fb009dcSAndroid Build Coastguard Worker ldp x8,x9,[x16,#8*2] 821*8fb009dcSAndroid Build Coastguard Worker adcs x23,x23,x10 822*8fb009dcSAndroid Build Coastguard Worker adcs x24,x24,x11 823*8fb009dcSAndroid Build Coastguard Worker ldp x10,x11,[x16,#8*4] 824*8fb009dcSAndroid Build Coastguard Worker adcs x25,x25,x12 825*8fb009dcSAndroid Build Coastguard Worker adcs x26,x26,x13 826*8fb009dcSAndroid Build Coastguard Worker ldp x12,x13,[x16,#8*6] 827*8fb009dcSAndroid Build Coastguard Worker add x1,x16,#8*8 828*8fb009dcSAndroid Build Coastguard Worker adc x30,xzr,xzr // top-most carry 829*8fb009dcSAndroid Build Coastguard Worker mul x28,x4,x19 830*8fb009dcSAndroid Build Coastguard Worker stp x14,x15,[x2,#8*0] 831*8fb009dcSAndroid Build Coastguard Worker stp x21,x22,[x2,#8*2] 832*8fb009dcSAndroid Build Coastguard Worker ldp x21,x22,[x0,#8*2] 833*8fb009dcSAndroid Build Coastguard Worker stp x23,x24,[x2,#8*4] 834*8fb009dcSAndroid Build Coastguard Worker ldp x23,x24,[x0,#8*4] 835*8fb009dcSAndroid Build Coastguard Worker cmp x27,x29 // did we hit the bottom? 836*8fb009dcSAndroid Build Coastguard Worker stp x25,x26,[x2,#8*6] 837*8fb009dcSAndroid Build Coastguard Worker mov x2,x0 // slide the window 838*8fb009dcSAndroid Build Coastguard Worker ldp x25,x26,[x0,#8*6] 839*8fb009dcSAndroid Build Coastguard Worker mov x27,#8 840*8fb009dcSAndroid Build Coastguard Worker b.ne Lsqr8x_reduction 841*8fb009dcSAndroid Build Coastguard Worker 842*8fb009dcSAndroid Build Coastguard Worker // Final step. We see if result is larger than modulus, and 843*8fb009dcSAndroid Build Coastguard Worker // if it is, subtract the modulus. But comparison implies 844*8fb009dcSAndroid Build Coastguard Worker // subtraction. So we subtract modulus, see if it borrowed, 845*8fb009dcSAndroid Build Coastguard Worker // and conditionally copy original value. 846*8fb009dcSAndroid Build Coastguard Worker ldr x0,[x29,#96] // pull rp 847*8fb009dcSAndroid Build Coastguard Worker add x2,x2,#8*8 848*8fb009dcSAndroid Build Coastguard Worker subs x14,x19,x6 849*8fb009dcSAndroid Build Coastguard Worker sbcs x15,x20,x7 850*8fb009dcSAndroid Build Coastguard Worker sub x27,x5,#8*8 851*8fb009dcSAndroid Build Coastguard Worker mov x3,x0 // x0 copy 852*8fb009dcSAndroid Build Coastguard Worker 853*8fb009dcSAndroid Build Coastguard WorkerLsqr8x_sub: 854*8fb009dcSAndroid Build Coastguard Worker sbcs x16,x21,x8 855*8fb009dcSAndroid Build Coastguard Worker ldp x6,x7,[x1,#8*0] 856*8fb009dcSAndroid Build Coastguard Worker sbcs x17,x22,x9 857*8fb009dcSAndroid Build Coastguard Worker stp x14,x15,[x0,#8*0] 858*8fb009dcSAndroid Build Coastguard Worker sbcs x14,x23,x10 859*8fb009dcSAndroid Build Coastguard Worker ldp x8,x9,[x1,#8*2] 860*8fb009dcSAndroid Build Coastguard Worker sbcs x15,x24,x11 861*8fb009dcSAndroid Build Coastguard Worker stp x16,x17,[x0,#8*2] 862*8fb009dcSAndroid Build Coastguard Worker sbcs x16,x25,x12 863*8fb009dcSAndroid Build Coastguard Worker ldp x10,x11,[x1,#8*4] 864*8fb009dcSAndroid Build Coastguard Worker sbcs x17,x26,x13 865*8fb009dcSAndroid Build Coastguard Worker ldp x12,x13,[x1,#8*6] 866*8fb009dcSAndroid Build Coastguard Worker add x1,x1,#8*8 867*8fb009dcSAndroid Build Coastguard Worker ldp x19,x20,[x2,#8*0] 868*8fb009dcSAndroid Build Coastguard Worker sub x27,x27,#8*8 869*8fb009dcSAndroid Build Coastguard Worker ldp x21,x22,[x2,#8*2] 870*8fb009dcSAndroid Build Coastguard Worker ldp x23,x24,[x2,#8*4] 871*8fb009dcSAndroid Build Coastguard Worker ldp x25,x26,[x2,#8*6] 872*8fb009dcSAndroid Build Coastguard Worker add x2,x2,#8*8 873*8fb009dcSAndroid Build Coastguard Worker stp x14,x15,[x0,#8*4] 874*8fb009dcSAndroid Build Coastguard Worker sbcs x14,x19,x6 875*8fb009dcSAndroid Build Coastguard Worker stp x16,x17,[x0,#8*6] 876*8fb009dcSAndroid Build Coastguard Worker add x0,x0,#8*8 877*8fb009dcSAndroid Build Coastguard Worker sbcs x15,x20,x7 878*8fb009dcSAndroid Build Coastguard Worker cbnz x27,Lsqr8x_sub 879*8fb009dcSAndroid Build Coastguard Worker 880*8fb009dcSAndroid Build Coastguard Worker sbcs x16,x21,x8 881*8fb009dcSAndroid Build Coastguard Worker mov x2,sp 882*8fb009dcSAndroid Build Coastguard Worker add x1,sp,x5 883*8fb009dcSAndroid Build Coastguard Worker ldp x6,x7,[x3,#8*0] 884*8fb009dcSAndroid Build Coastguard Worker sbcs x17,x22,x9 885*8fb009dcSAndroid Build Coastguard Worker stp x14,x15,[x0,#8*0] 886*8fb009dcSAndroid Build Coastguard Worker sbcs x14,x23,x10 887*8fb009dcSAndroid Build Coastguard Worker ldp x8,x9,[x3,#8*2] 888*8fb009dcSAndroid Build Coastguard Worker sbcs x15,x24,x11 889*8fb009dcSAndroid Build Coastguard Worker stp x16,x17,[x0,#8*2] 890*8fb009dcSAndroid Build Coastguard Worker sbcs x16,x25,x12 891*8fb009dcSAndroid Build Coastguard Worker ldp x19,x20,[x1,#8*0] 892*8fb009dcSAndroid Build Coastguard Worker sbcs x17,x26,x13 893*8fb009dcSAndroid Build Coastguard Worker ldp x21,x22,[x1,#8*2] 894*8fb009dcSAndroid Build Coastguard Worker sbcs xzr,x30,xzr // did it borrow? 895*8fb009dcSAndroid Build Coastguard Worker ldr x30,[x29,#8] // pull return address 896*8fb009dcSAndroid Build Coastguard Worker stp x14,x15,[x0,#8*4] 897*8fb009dcSAndroid Build Coastguard Worker stp x16,x17,[x0,#8*6] 898*8fb009dcSAndroid Build Coastguard Worker 899*8fb009dcSAndroid Build Coastguard Worker sub x27,x5,#8*4 900*8fb009dcSAndroid Build Coastguard WorkerLsqr4x_cond_copy: 901*8fb009dcSAndroid Build Coastguard Worker sub x27,x27,#8*4 902*8fb009dcSAndroid Build Coastguard Worker csel x14,x19,x6,lo 903*8fb009dcSAndroid Build Coastguard Worker stp xzr,xzr,[x2,#8*0] 904*8fb009dcSAndroid Build Coastguard Worker csel x15,x20,x7,lo 905*8fb009dcSAndroid Build Coastguard Worker ldp x6,x7,[x3,#8*4] 906*8fb009dcSAndroid Build Coastguard Worker ldp x19,x20,[x1,#8*4] 907*8fb009dcSAndroid Build Coastguard Worker csel x16,x21,x8,lo 908*8fb009dcSAndroid Build Coastguard Worker stp xzr,xzr,[x2,#8*2] 909*8fb009dcSAndroid Build Coastguard Worker add x2,x2,#8*4 910*8fb009dcSAndroid Build Coastguard Worker csel x17,x22,x9,lo 911*8fb009dcSAndroid Build Coastguard Worker ldp x8,x9,[x3,#8*6] 912*8fb009dcSAndroid Build Coastguard Worker ldp x21,x22,[x1,#8*6] 913*8fb009dcSAndroid Build Coastguard Worker add x1,x1,#8*4 914*8fb009dcSAndroid Build Coastguard Worker stp x14,x15,[x3,#8*0] 915*8fb009dcSAndroid Build Coastguard Worker stp x16,x17,[x3,#8*2] 916*8fb009dcSAndroid Build Coastguard Worker add x3,x3,#8*4 917*8fb009dcSAndroid Build Coastguard Worker stp xzr,xzr,[x1,#8*0] 918*8fb009dcSAndroid Build Coastguard Worker stp xzr,xzr,[x1,#8*2] 919*8fb009dcSAndroid Build Coastguard Worker cbnz x27,Lsqr4x_cond_copy 920*8fb009dcSAndroid Build Coastguard Worker 921*8fb009dcSAndroid Build Coastguard Worker csel x14,x19,x6,lo 922*8fb009dcSAndroid Build Coastguard Worker stp xzr,xzr,[x2,#8*0] 923*8fb009dcSAndroid Build Coastguard Worker csel x15,x20,x7,lo 924*8fb009dcSAndroid Build Coastguard Worker stp xzr,xzr,[x2,#8*2] 925*8fb009dcSAndroid Build Coastguard Worker csel x16,x21,x8,lo 926*8fb009dcSAndroid Build Coastguard Worker csel x17,x22,x9,lo 927*8fb009dcSAndroid Build Coastguard Worker stp x14,x15,[x3,#8*0] 928*8fb009dcSAndroid Build Coastguard Worker stp x16,x17,[x3,#8*2] 929*8fb009dcSAndroid Build Coastguard Worker 930*8fb009dcSAndroid Build Coastguard Worker b Lsqr8x_done 931*8fb009dcSAndroid Build Coastguard Worker 932*8fb009dcSAndroid Build Coastguard Worker.align 4 933*8fb009dcSAndroid Build Coastguard WorkerLsqr8x8_post_condition: 934*8fb009dcSAndroid Build Coastguard Worker adc x28,xzr,xzr 935*8fb009dcSAndroid Build Coastguard Worker ldr x30,[x29,#8] // pull return address 936*8fb009dcSAndroid Build Coastguard Worker // x19-7,x28 hold result, x6-7 hold modulus 937*8fb009dcSAndroid Build Coastguard Worker subs x6,x19,x6 938*8fb009dcSAndroid Build Coastguard Worker ldr x1,[x29,#96] // pull rp 939*8fb009dcSAndroid Build Coastguard Worker sbcs x7,x20,x7 940*8fb009dcSAndroid Build Coastguard Worker stp xzr,xzr,[sp,#8*0] 941*8fb009dcSAndroid Build Coastguard Worker sbcs x8,x21,x8 942*8fb009dcSAndroid Build Coastguard Worker stp xzr,xzr,[sp,#8*2] 943*8fb009dcSAndroid Build Coastguard Worker sbcs x9,x22,x9 944*8fb009dcSAndroid Build Coastguard Worker stp xzr,xzr,[sp,#8*4] 945*8fb009dcSAndroid Build Coastguard Worker sbcs x10,x23,x10 946*8fb009dcSAndroid Build Coastguard Worker stp xzr,xzr,[sp,#8*6] 947*8fb009dcSAndroid Build Coastguard Worker sbcs x11,x24,x11 948*8fb009dcSAndroid Build Coastguard Worker stp xzr,xzr,[sp,#8*8] 949*8fb009dcSAndroid Build Coastguard Worker sbcs x12,x25,x12 950*8fb009dcSAndroid Build Coastguard Worker stp xzr,xzr,[sp,#8*10] 951*8fb009dcSAndroid Build Coastguard Worker sbcs x13,x26,x13 952*8fb009dcSAndroid Build Coastguard Worker stp xzr,xzr,[sp,#8*12] 953*8fb009dcSAndroid Build Coastguard Worker sbcs x28,x28,xzr // did it borrow? 954*8fb009dcSAndroid Build Coastguard Worker stp xzr,xzr,[sp,#8*14] 955*8fb009dcSAndroid Build Coastguard Worker 956*8fb009dcSAndroid Build Coastguard Worker // x6-7 hold result-modulus 957*8fb009dcSAndroid Build Coastguard Worker csel x6,x19,x6,lo 958*8fb009dcSAndroid Build Coastguard Worker csel x7,x20,x7,lo 959*8fb009dcSAndroid Build Coastguard Worker csel x8,x21,x8,lo 960*8fb009dcSAndroid Build Coastguard Worker csel x9,x22,x9,lo 961*8fb009dcSAndroid Build Coastguard Worker stp x6,x7,[x1,#8*0] 962*8fb009dcSAndroid Build Coastguard Worker csel x10,x23,x10,lo 963*8fb009dcSAndroid Build Coastguard Worker csel x11,x24,x11,lo 964*8fb009dcSAndroid Build Coastguard Worker stp x8,x9,[x1,#8*2] 965*8fb009dcSAndroid Build Coastguard Worker csel x12,x25,x12,lo 966*8fb009dcSAndroid Build Coastguard Worker csel x13,x26,x13,lo 967*8fb009dcSAndroid Build Coastguard Worker stp x10,x11,[x1,#8*4] 968*8fb009dcSAndroid Build Coastguard Worker stp x12,x13,[x1,#8*6] 969*8fb009dcSAndroid Build Coastguard Worker 970*8fb009dcSAndroid Build Coastguard WorkerLsqr8x_done: 971*8fb009dcSAndroid Build Coastguard Worker ldp x19,x20,[x29,#16] 972*8fb009dcSAndroid Build Coastguard Worker mov sp,x29 973*8fb009dcSAndroid Build Coastguard Worker ldp x21,x22,[x29,#32] 974*8fb009dcSAndroid Build Coastguard Worker mov x0,#1 975*8fb009dcSAndroid Build Coastguard Worker ldp x23,x24,[x29,#48] 976*8fb009dcSAndroid Build Coastguard Worker ldp x25,x26,[x29,#64] 977*8fb009dcSAndroid Build Coastguard Worker ldp x27,x28,[x29,#80] 978*8fb009dcSAndroid Build Coastguard Worker ldr x29,[sp],#128 979*8fb009dcSAndroid Build Coastguard Worker // x30 is popped earlier 980*8fb009dcSAndroid Build Coastguard Worker AARCH64_VALIDATE_LINK_REGISTER 981*8fb009dcSAndroid Build Coastguard Worker ret 982*8fb009dcSAndroid Build Coastguard Worker 983*8fb009dcSAndroid Build Coastguard Worker.def __bn_mul4x_mont 984*8fb009dcSAndroid Build Coastguard Worker .type 32 985*8fb009dcSAndroid Build Coastguard Worker.endef 986*8fb009dcSAndroid Build Coastguard Worker.align 5 987*8fb009dcSAndroid Build Coastguard Worker__bn_mul4x_mont: 988*8fb009dcSAndroid Build Coastguard Worker // Not adding AARCH64_SIGN_LINK_REGISTER here because __bn_mul4x_mont is jumped to 989*8fb009dcSAndroid Build Coastguard Worker // only from bn_mul_mont or __bn_mul8x_mont which have already signed the 990*8fb009dcSAndroid Build Coastguard Worker // return address. 991*8fb009dcSAndroid Build Coastguard Worker stp x29,x30,[sp,#-128]! 992*8fb009dcSAndroid Build Coastguard Worker add x29,sp,#0 993*8fb009dcSAndroid Build Coastguard Worker stp x19,x20,[sp,#16] 994*8fb009dcSAndroid Build Coastguard Worker stp x21,x22,[sp,#32] 995*8fb009dcSAndroid Build Coastguard Worker stp x23,x24,[sp,#48] 996*8fb009dcSAndroid Build Coastguard Worker stp x25,x26,[sp,#64] 997*8fb009dcSAndroid Build Coastguard Worker stp x27,x28,[sp,#80] 998*8fb009dcSAndroid Build Coastguard Worker 999*8fb009dcSAndroid Build Coastguard Worker sub x26,sp,x5,lsl#3 1000*8fb009dcSAndroid Build Coastguard Worker lsl x5,x5,#3 1001*8fb009dcSAndroid Build Coastguard Worker ldr x4,[x4] // *n0 1002*8fb009dcSAndroid Build Coastguard Worker sub sp,x26,#8*4 // alloca 1003*8fb009dcSAndroid Build Coastguard Worker 1004*8fb009dcSAndroid Build Coastguard Worker add x10,x2,x5 1005*8fb009dcSAndroid Build Coastguard Worker add x27,x1,x5 1006*8fb009dcSAndroid Build Coastguard Worker stp x0,x10,[x29,#96] // offload rp and &b[num] 1007*8fb009dcSAndroid Build Coastguard Worker 1008*8fb009dcSAndroid Build Coastguard Worker ldr x24,[x2,#8*0] // b[0] 1009*8fb009dcSAndroid Build Coastguard Worker ldp x6,x7,[x1,#8*0] // a[0..3] 1010*8fb009dcSAndroid Build Coastguard Worker ldp x8,x9,[x1,#8*2] 1011*8fb009dcSAndroid Build Coastguard Worker add x1,x1,#8*4 1012*8fb009dcSAndroid Build Coastguard Worker mov x19,xzr 1013*8fb009dcSAndroid Build Coastguard Worker mov x20,xzr 1014*8fb009dcSAndroid Build Coastguard Worker mov x21,xzr 1015*8fb009dcSAndroid Build Coastguard Worker mov x22,xzr 1016*8fb009dcSAndroid Build Coastguard Worker ldp x14,x15,[x3,#8*0] // n[0..3] 1017*8fb009dcSAndroid Build Coastguard Worker ldp x16,x17,[x3,#8*2] 1018*8fb009dcSAndroid Build Coastguard Worker adds x3,x3,#8*4 // clear carry bit 1019*8fb009dcSAndroid Build Coastguard Worker mov x0,xzr 1020*8fb009dcSAndroid Build Coastguard Worker mov x28,#0 1021*8fb009dcSAndroid Build Coastguard Worker mov x26,sp 1022*8fb009dcSAndroid Build Coastguard Worker 1023*8fb009dcSAndroid Build Coastguard WorkerLoop_mul4x_1st_reduction: 1024*8fb009dcSAndroid Build Coastguard Worker mul x10,x6,x24 // lo(a[0..3]*b[0]) 1025*8fb009dcSAndroid Build Coastguard Worker adc x0,x0,xzr // modulo-scheduled 1026*8fb009dcSAndroid Build Coastguard Worker mul x11,x7,x24 1027*8fb009dcSAndroid Build Coastguard Worker add x28,x28,#8 1028*8fb009dcSAndroid Build Coastguard Worker mul x12,x8,x24 1029*8fb009dcSAndroid Build Coastguard Worker and x28,x28,#31 1030*8fb009dcSAndroid Build Coastguard Worker mul x13,x9,x24 1031*8fb009dcSAndroid Build Coastguard Worker adds x19,x19,x10 1032*8fb009dcSAndroid Build Coastguard Worker umulh x10,x6,x24 // hi(a[0..3]*b[0]) 1033*8fb009dcSAndroid Build Coastguard Worker adcs x20,x20,x11 1034*8fb009dcSAndroid Build Coastguard Worker mul x25,x19,x4 // t[0]*n0 1035*8fb009dcSAndroid Build Coastguard Worker adcs x21,x21,x12 1036*8fb009dcSAndroid Build Coastguard Worker umulh x11,x7,x24 1037*8fb009dcSAndroid Build Coastguard Worker adcs x22,x22,x13 1038*8fb009dcSAndroid Build Coastguard Worker umulh x12,x8,x24 1039*8fb009dcSAndroid Build Coastguard Worker adc x23,xzr,xzr 1040*8fb009dcSAndroid Build Coastguard Worker umulh x13,x9,x24 1041*8fb009dcSAndroid Build Coastguard Worker ldr x24,[x2,x28] // next b[i] (or b[0]) 1042*8fb009dcSAndroid Build Coastguard Worker adds x20,x20,x10 1043*8fb009dcSAndroid Build Coastguard Worker // (*) mul x10,x14,x25 // lo(n[0..3]*t[0]*n0) 1044*8fb009dcSAndroid Build Coastguard Worker str x25,[x26],#8 // put aside t[0]*n0 for tail processing 1045*8fb009dcSAndroid Build Coastguard Worker adcs x21,x21,x11 1046*8fb009dcSAndroid Build Coastguard Worker mul x11,x15,x25 1047*8fb009dcSAndroid Build Coastguard Worker adcs x22,x22,x12 1048*8fb009dcSAndroid Build Coastguard Worker mul x12,x16,x25 1049*8fb009dcSAndroid Build Coastguard Worker adc x23,x23,x13 // can't overflow 1050*8fb009dcSAndroid Build Coastguard Worker mul x13,x17,x25 1051*8fb009dcSAndroid Build Coastguard Worker // (*) adds xzr,x19,x10 1052*8fb009dcSAndroid Build Coastguard Worker subs xzr,x19,#1 // (*) 1053*8fb009dcSAndroid Build Coastguard Worker umulh x10,x14,x25 // hi(n[0..3]*t[0]*n0) 1054*8fb009dcSAndroid Build Coastguard Worker adcs x19,x20,x11 1055*8fb009dcSAndroid Build Coastguard Worker umulh x11,x15,x25 1056*8fb009dcSAndroid Build Coastguard Worker adcs x20,x21,x12 1057*8fb009dcSAndroid Build Coastguard Worker umulh x12,x16,x25 1058*8fb009dcSAndroid Build Coastguard Worker adcs x21,x22,x13 1059*8fb009dcSAndroid Build Coastguard Worker umulh x13,x17,x25 1060*8fb009dcSAndroid Build Coastguard Worker adcs x22,x23,x0 1061*8fb009dcSAndroid Build Coastguard Worker adc x0,xzr,xzr 1062*8fb009dcSAndroid Build Coastguard Worker adds x19,x19,x10 1063*8fb009dcSAndroid Build Coastguard Worker sub x10,x27,x1 1064*8fb009dcSAndroid Build Coastguard Worker adcs x20,x20,x11 1065*8fb009dcSAndroid Build Coastguard Worker adcs x21,x21,x12 1066*8fb009dcSAndroid Build Coastguard Worker adcs x22,x22,x13 1067*8fb009dcSAndroid Build Coastguard Worker //adc x0,x0,xzr 1068*8fb009dcSAndroid Build Coastguard Worker cbnz x28,Loop_mul4x_1st_reduction 1069*8fb009dcSAndroid Build Coastguard Worker 1070*8fb009dcSAndroid Build Coastguard Worker cbz x10,Lmul4x4_post_condition 1071*8fb009dcSAndroid Build Coastguard Worker 1072*8fb009dcSAndroid Build Coastguard Worker ldp x6,x7,[x1,#8*0] // a[4..7] 1073*8fb009dcSAndroid Build Coastguard Worker ldp x8,x9,[x1,#8*2] 1074*8fb009dcSAndroid Build Coastguard Worker add x1,x1,#8*4 1075*8fb009dcSAndroid Build Coastguard Worker ldr x25,[sp] // a[0]*n0 1076*8fb009dcSAndroid Build Coastguard Worker ldp x14,x15,[x3,#8*0] // n[4..7] 1077*8fb009dcSAndroid Build Coastguard Worker ldp x16,x17,[x3,#8*2] 1078*8fb009dcSAndroid Build Coastguard Worker add x3,x3,#8*4 1079*8fb009dcSAndroid Build Coastguard Worker 1080*8fb009dcSAndroid Build Coastguard WorkerLoop_mul4x_1st_tail: 1081*8fb009dcSAndroid Build Coastguard Worker mul x10,x6,x24 // lo(a[4..7]*b[i]) 1082*8fb009dcSAndroid Build Coastguard Worker adc x0,x0,xzr // modulo-scheduled 1083*8fb009dcSAndroid Build Coastguard Worker mul x11,x7,x24 1084*8fb009dcSAndroid Build Coastguard Worker add x28,x28,#8 1085*8fb009dcSAndroid Build Coastguard Worker mul x12,x8,x24 1086*8fb009dcSAndroid Build Coastguard Worker and x28,x28,#31 1087*8fb009dcSAndroid Build Coastguard Worker mul x13,x9,x24 1088*8fb009dcSAndroid Build Coastguard Worker adds x19,x19,x10 1089*8fb009dcSAndroid Build Coastguard Worker umulh x10,x6,x24 // hi(a[4..7]*b[i]) 1090*8fb009dcSAndroid Build Coastguard Worker adcs x20,x20,x11 1091*8fb009dcSAndroid Build Coastguard Worker umulh x11,x7,x24 1092*8fb009dcSAndroid Build Coastguard Worker adcs x21,x21,x12 1093*8fb009dcSAndroid Build Coastguard Worker umulh x12,x8,x24 1094*8fb009dcSAndroid Build Coastguard Worker adcs x22,x22,x13 1095*8fb009dcSAndroid Build Coastguard Worker umulh x13,x9,x24 1096*8fb009dcSAndroid Build Coastguard Worker adc x23,xzr,xzr 1097*8fb009dcSAndroid Build Coastguard Worker ldr x24,[x2,x28] // next b[i] (or b[0]) 1098*8fb009dcSAndroid Build Coastguard Worker adds x20,x20,x10 1099*8fb009dcSAndroid Build Coastguard Worker mul x10,x14,x25 // lo(n[4..7]*a[0]*n0) 1100*8fb009dcSAndroid Build Coastguard Worker adcs x21,x21,x11 1101*8fb009dcSAndroid Build Coastguard Worker mul x11,x15,x25 1102*8fb009dcSAndroid Build Coastguard Worker adcs x22,x22,x12 1103*8fb009dcSAndroid Build Coastguard Worker mul x12,x16,x25 1104*8fb009dcSAndroid Build Coastguard Worker adc x23,x23,x13 // can't overflow 1105*8fb009dcSAndroid Build Coastguard Worker mul x13,x17,x25 1106*8fb009dcSAndroid Build Coastguard Worker adds x19,x19,x10 1107*8fb009dcSAndroid Build Coastguard Worker umulh x10,x14,x25 // hi(n[4..7]*a[0]*n0) 1108*8fb009dcSAndroid Build Coastguard Worker adcs x20,x20,x11 1109*8fb009dcSAndroid Build Coastguard Worker umulh x11,x15,x25 1110*8fb009dcSAndroid Build Coastguard Worker adcs x21,x21,x12 1111*8fb009dcSAndroid Build Coastguard Worker umulh x12,x16,x25 1112*8fb009dcSAndroid Build Coastguard Worker adcs x22,x22,x13 1113*8fb009dcSAndroid Build Coastguard Worker adcs x23,x23,x0 1114*8fb009dcSAndroid Build Coastguard Worker umulh x13,x17,x25 1115*8fb009dcSAndroid Build Coastguard Worker adc x0,xzr,xzr 1116*8fb009dcSAndroid Build Coastguard Worker ldr x25,[sp,x28] // next t[0]*n0 1117*8fb009dcSAndroid Build Coastguard Worker str x19,[x26],#8 // result!!! 1118*8fb009dcSAndroid Build Coastguard Worker adds x19,x20,x10 1119*8fb009dcSAndroid Build Coastguard Worker sub x10,x27,x1 // done yet? 1120*8fb009dcSAndroid Build Coastguard Worker adcs x20,x21,x11 1121*8fb009dcSAndroid Build Coastguard Worker adcs x21,x22,x12 1122*8fb009dcSAndroid Build Coastguard Worker adcs x22,x23,x13 1123*8fb009dcSAndroid Build Coastguard Worker //adc x0,x0,xzr 1124*8fb009dcSAndroid Build Coastguard Worker cbnz x28,Loop_mul4x_1st_tail 1125*8fb009dcSAndroid Build Coastguard Worker 1126*8fb009dcSAndroid Build Coastguard Worker sub x11,x27,x5 // rewinded x1 1127*8fb009dcSAndroid Build Coastguard Worker cbz x10,Lmul4x_proceed 1128*8fb009dcSAndroid Build Coastguard Worker 1129*8fb009dcSAndroid Build Coastguard Worker ldp x6,x7,[x1,#8*0] 1130*8fb009dcSAndroid Build Coastguard Worker ldp x8,x9,[x1,#8*2] 1131*8fb009dcSAndroid Build Coastguard Worker add x1,x1,#8*4 1132*8fb009dcSAndroid Build Coastguard Worker ldp x14,x15,[x3,#8*0] 1133*8fb009dcSAndroid Build Coastguard Worker ldp x16,x17,[x3,#8*2] 1134*8fb009dcSAndroid Build Coastguard Worker add x3,x3,#8*4 1135*8fb009dcSAndroid Build Coastguard Worker b Loop_mul4x_1st_tail 1136*8fb009dcSAndroid Build Coastguard Worker 1137*8fb009dcSAndroid Build Coastguard Worker.align 5 1138*8fb009dcSAndroid Build Coastguard WorkerLmul4x_proceed: 1139*8fb009dcSAndroid Build Coastguard Worker ldr x24,[x2,#8*4]! // *++b 1140*8fb009dcSAndroid Build Coastguard Worker adc x30,x0,xzr 1141*8fb009dcSAndroid Build Coastguard Worker ldp x6,x7,[x11,#8*0] // a[0..3] 1142*8fb009dcSAndroid Build Coastguard Worker sub x3,x3,x5 // rewind np 1143*8fb009dcSAndroid Build Coastguard Worker ldp x8,x9,[x11,#8*2] 1144*8fb009dcSAndroid Build Coastguard Worker add x1,x11,#8*4 1145*8fb009dcSAndroid Build Coastguard Worker 1146*8fb009dcSAndroid Build Coastguard Worker stp x19,x20,[x26,#8*0] // result!!! 1147*8fb009dcSAndroid Build Coastguard Worker ldp x19,x20,[sp,#8*4] // t[0..3] 1148*8fb009dcSAndroid Build Coastguard Worker stp x21,x22,[x26,#8*2] // result!!! 1149*8fb009dcSAndroid Build Coastguard Worker ldp x21,x22,[sp,#8*6] 1150*8fb009dcSAndroid Build Coastguard Worker 1151*8fb009dcSAndroid Build Coastguard Worker ldp x14,x15,[x3,#8*0] // n[0..3] 1152*8fb009dcSAndroid Build Coastguard Worker mov x26,sp 1153*8fb009dcSAndroid Build Coastguard Worker ldp x16,x17,[x3,#8*2] 1154*8fb009dcSAndroid Build Coastguard Worker adds x3,x3,#8*4 // clear carry bit 1155*8fb009dcSAndroid Build Coastguard Worker mov x0,xzr 1156*8fb009dcSAndroid Build Coastguard Worker 1157*8fb009dcSAndroid Build Coastguard Worker.align 4 1158*8fb009dcSAndroid Build Coastguard WorkerLoop_mul4x_reduction: 1159*8fb009dcSAndroid Build Coastguard Worker mul x10,x6,x24 // lo(a[0..3]*b[4]) 1160*8fb009dcSAndroid Build Coastguard Worker adc x0,x0,xzr // modulo-scheduled 1161*8fb009dcSAndroid Build Coastguard Worker mul x11,x7,x24 1162*8fb009dcSAndroid Build Coastguard Worker add x28,x28,#8 1163*8fb009dcSAndroid Build Coastguard Worker mul x12,x8,x24 1164*8fb009dcSAndroid Build Coastguard Worker and x28,x28,#31 1165*8fb009dcSAndroid Build Coastguard Worker mul x13,x9,x24 1166*8fb009dcSAndroid Build Coastguard Worker adds x19,x19,x10 1167*8fb009dcSAndroid Build Coastguard Worker umulh x10,x6,x24 // hi(a[0..3]*b[4]) 1168*8fb009dcSAndroid Build Coastguard Worker adcs x20,x20,x11 1169*8fb009dcSAndroid Build Coastguard Worker mul x25,x19,x4 // t[0]*n0 1170*8fb009dcSAndroid Build Coastguard Worker adcs x21,x21,x12 1171*8fb009dcSAndroid Build Coastguard Worker umulh x11,x7,x24 1172*8fb009dcSAndroid Build Coastguard Worker adcs x22,x22,x13 1173*8fb009dcSAndroid Build Coastguard Worker umulh x12,x8,x24 1174*8fb009dcSAndroid Build Coastguard Worker adc x23,xzr,xzr 1175*8fb009dcSAndroid Build Coastguard Worker umulh x13,x9,x24 1176*8fb009dcSAndroid Build Coastguard Worker ldr x24,[x2,x28] // next b[i] 1177*8fb009dcSAndroid Build Coastguard Worker adds x20,x20,x10 1178*8fb009dcSAndroid Build Coastguard Worker // (*) mul x10,x14,x25 1179*8fb009dcSAndroid Build Coastguard Worker str x25,[x26],#8 // put aside t[0]*n0 for tail processing 1180*8fb009dcSAndroid Build Coastguard Worker adcs x21,x21,x11 1181*8fb009dcSAndroid Build Coastguard Worker mul x11,x15,x25 // lo(n[0..3]*t[0]*n0 1182*8fb009dcSAndroid Build Coastguard Worker adcs x22,x22,x12 1183*8fb009dcSAndroid Build Coastguard Worker mul x12,x16,x25 1184*8fb009dcSAndroid Build Coastguard Worker adc x23,x23,x13 // can't overflow 1185*8fb009dcSAndroid Build Coastguard Worker mul x13,x17,x25 1186*8fb009dcSAndroid Build Coastguard Worker // (*) adds xzr,x19,x10 1187*8fb009dcSAndroid Build Coastguard Worker subs xzr,x19,#1 // (*) 1188*8fb009dcSAndroid Build Coastguard Worker umulh x10,x14,x25 // hi(n[0..3]*t[0]*n0 1189*8fb009dcSAndroid Build Coastguard Worker adcs x19,x20,x11 1190*8fb009dcSAndroid Build Coastguard Worker umulh x11,x15,x25 1191*8fb009dcSAndroid Build Coastguard Worker adcs x20,x21,x12 1192*8fb009dcSAndroid Build Coastguard Worker umulh x12,x16,x25 1193*8fb009dcSAndroid Build Coastguard Worker adcs x21,x22,x13 1194*8fb009dcSAndroid Build Coastguard Worker umulh x13,x17,x25 1195*8fb009dcSAndroid Build Coastguard Worker adcs x22,x23,x0 1196*8fb009dcSAndroid Build Coastguard Worker adc x0,xzr,xzr 1197*8fb009dcSAndroid Build Coastguard Worker adds x19,x19,x10 1198*8fb009dcSAndroid Build Coastguard Worker adcs x20,x20,x11 1199*8fb009dcSAndroid Build Coastguard Worker adcs x21,x21,x12 1200*8fb009dcSAndroid Build Coastguard Worker adcs x22,x22,x13 1201*8fb009dcSAndroid Build Coastguard Worker //adc x0,x0,xzr 1202*8fb009dcSAndroid Build Coastguard Worker cbnz x28,Loop_mul4x_reduction 1203*8fb009dcSAndroid Build Coastguard Worker 1204*8fb009dcSAndroid Build Coastguard Worker adc x0,x0,xzr 1205*8fb009dcSAndroid Build Coastguard Worker ldp x10,x11,[x26,#8*4] // t[4..7] 1206*8fb009dcSAndroid Build Coastguard Worker ldp x12,x13,[x26,#8*6] 1207*8fb009dcSAndroid Build Coastguard Worker ldp x6,x7,[x1,#8*0] // a[4..7] 1208*8fb009dcSAndroid Build Coastguard Worker ldp x8,x9,[x1,#8*2] 1209*8fb009dcSAndroid Build Coastguard Worker add x1,x1,#8*4 1210*8fb009dcSAndroid Build Coastguard Worker adds x19,x19,x10 1211*8fb009dcSAndroid Build Coastguard Worker adcs x20,x20,x11 1212*8fb009dcSAndroid Build Coastguard Worker adcs x21,x21,x12 1213*8fb009dcSAndroid Build Coastguard Worker adcs x22,x22,x13 1214*8fb009dcSAndroid Build Coastguard Worker //adc x0,x0,xzr 1215*8fb009dcSAndroid Build Coastguard Worker 1216*8fb009dcSAndroid Build Coastguard Worker ldr x25,[sp] // t[0]*n0 1217*8fb009dcSAndroid Build Coastguard Worker ldp x14,x15,[x3,#8*0] // n[4..7] 1218*8fb009dcSAndroid Build Coastguard Worker ldp x16,x17,[x3,#8*2] 1219*8fb009dcSAndroid Build Coastguard Worker add x3,x3,#8*4 1220*8fb009dcSAndroid Build Coastguard Worker 1221*8fb009dcSAndroid Build Coastguard Worker.align 4 1222*8fb009dcSAndroid Build Coastguard WorkerLoop_mul4x_tail: 1223*8fb009dcSAndroid Build Coastguard Worker mul x10,x6,x24 // lo(a[4..7]*b[4]) 1224*8fb009dcSAndroid Build Coastguard Worker adc x0,x0,xzr // modulo-scheduled 1225*8fb009dcSAndroid Build Coastguard Worker mul x11,x7,x24 1226*8fb009dcSAndroid Build Coastguard Worker add x28,x28,#8 1227*8fb009dcSAndroid Build Coastguard Worker mul x12,x8,x24 1228*8fb009dcSAndroid Build Coastguard Worker and x28,x28,#31 1229*8fb009dcSAndroid Build Coastguard Worker mul x13,x9,x24 1230*8fb009dcSAndroid Build Coastguard Worker adds x19,x19,x10 1231*8fb009dcSAndroid Build Coastguard Worker umulh x10,x6,x24 // hi(a[4..7]*b[4]) 1232*8fb009dcSAndroid Build Coastguard Worker adcs x20,x20,x11 1233*8fb009dcSAndroid Build Coastguard Worker umulh x11,x7,x24 1234*8fb009dcSAndroid Build Coastguard Worker adcs x21,x21,x12 1235*8fb009dcSAndroid Build Coastguard Worker umulh x12,x8,x24 1236*8fb009dcSAndroid Build Coastguard Worker adcs x22,x22,x13 1237*8fb009dcSAndroid Build Coastguard Worker umulh x13,x9,x24 1238*8fb009dcSAndroid Build Coastguard Worker adc x23,xzr,xzr 1239*8fb009dcSAndroid Build Coastguard Worker ldr x24,[x2,x28] // next b[i] 1240*8fb009dcSAndroid Build Coastguard Worker adds x20,x20,x10 1241*8fb009dcSAndroid Build Coastguard Worker mul x10,x14,x25 // lo(n[4..7]*t[0]*n0) 1242*8fb009dcSAndroid Build Coastguard Worker adcs x21,x21,x11 1243*8fb009dcSAndroid Build Coastguard Worker mul x11,x15,x25 1244*8fb009dcSAndroid Build Coastguard Worker adcs x22,x22,x12 1245*8fb009dcSAndroid Build Coastguard Worker mul x12,x16,x25 1246*8fb009dcSAndroid Build Coastguard Worker adc x23,x23,x13 // can't overflow 1247*8fb009dcSAndroid Build Coastguard Worker mul x13,x17,x25 1248*8fb009dcSAndroid Build Coastguard Worker adds x19,x19,x10 1249*8fb009dcSAndroid Build Coastguard Worker umulh x10,x14,x25 // hi(n[4..7]*t[0]*n0) 1250*8fb009dcSAndroid Build Coastguard Worker adcs x20,x20,x11 1251*8fb009dcSAndroid Build Coastguard Worker umulh x11,x15,x25 1252*8fb009dcSAndroid Build Coastguard Worker adcs x21,x21,x12 1253*8fb009dcSAndroid Build Coastguard Worker umulh x12,x16,x25 1254*8fb009dcSAndroid Build Coastguard Worker adcs x22,x22,x13 1255*8fb009dcSAndroid Build Coastguard Worker umulh x13,x17,x25 1256*8fb009dcSAndroid Build Coastguard Worker adcs x23,x23,x0 1257*8fb009dcSAndroid Build Coastguard Worker ldr x25,[sp,x28] // next a[0]*n0 1258*8fb009dcSAndroid Build Coastguard Worker adc x0,xzr,xzr 1259*8fb009dcSAndroid Build Coastguard Worker str x19,[x26],#8 // result!!! 1260*8fb009dcSAndroid Build Coastguard Worker adds x19,x20,x10 1261*8fb009dcSAndroid Build Coastguard Worker sub x10,x27,x1 // done yet? 1262*8fb009dcSAndroid Build Coastguard Worker adcs x20,x21,x11 1263*8fb009dcSAndroid Build Coastguard Worker adcs x21,x22,x12 1264*8fb009dcSAndroid Build Coastguard Worker adcs x22,x23,x13 1265*8fb009dcSAndroid Build Coastguard Worker //adc x0,x0,xzr 1266*8fb009dcSAndroid Build Coastguard Worker cbnz x28,Loop_mul4x_tail 1267*8fb009dcSAndroid Build Coastguard Worker 1268*8fb009dcSAndroid Build Coastguard Worker sub x11,x3,x5 // rewinded np? 1269*8fb009dcSAndroid Build Coastguard Worker adc x0,x0,xzr 1270*8fb009dcSAndroid Build Coastguard Worker cbz x10,Loop_mul4x_break 1271*8fb009dcSAndroid Build Coastguard Worker 1272*8fb009dcSAndroid Build Coastguard Worker ldp x10,x11,[x26,#8*4] 1273*8fb009dcSAndroid Build Coastguard Worker ldp x12,x13,[x26,#8*6] 1274*8fb009dcSAndroid Build Coastguard Worker ldp x6,x7,[x1,#8*0] 1275*8fb009dcSAndroid Build Coastguard Worker ldp x8,x9,[x1,#8*2] 1276*8fb009dcSAndroid Build Coastguard Worker add x1,x1,#8*4 1277*8fb009dcSAndroid Build Coastguard Worker adds x19,x19,x10 1278*8fb009dcSAndroid Build Coastguard Worker adcs x20,x20,x11 1279*8fb009dcSAndroid Build Coastguard Worker adcs x21,x21,x12 1280*8fb009dcSAndroid Build Coastguard Worker adcs x22,x22,x13 1281*8fb009dcSAndroid Build Coastguard Worker //adc x0,x0,xzr 1282*8fb009dcSAndroid Build Coastguard Worker ldp x14,x15,[x3,#8*0] 1283*8fb009dcSAndroid Build Coastguard Worker ldp x16,x17,[x3,#8*2] 1284*8fb009dcSAndroid Build Coastguard Worker add x3,x3,#8*4 1285*8fb009dcSAndroid Build Coastguard Worker b Loop_mul4x_tail 1286*8fb009dcSAndroid Build Coastguard Worker 1287*8fb009dcSAndroid Build Coastguard Worker.align 4 1288*8fb009dcSAndroid Build Coastguard WorkerLoop_mul4x_break: 1289*8fb009dcSAndroid Build Coastguard Worker ldp x12,x13,[x29,#96] // pull rp and &b[num] 1290*8fb009dcSAndroid Build Coastguard Worker adds x19,x19,x30 1291*8fb009dcSAndroid Build Coastguard Worker add x2,x2,#8*4 // bp++ 1292*8fb009dcSAndroid Build Coastguard Worker adcs x20,x20,xzr 1293*8fb009dcSAndroid Build Coastguard Worker sub x1,x1,x5 // rewind ap 1294*8fb009dcSAndroid Build Coastguard Worker adcs x21,x21,xzr 1295*8fb009dcSAndroid Build Coastguard Worker stp x19,x20,[x26,#8*0] // result!!! 1296*8fb009dcSAndroid Build Coastguard Worker adcs x22,x22,xzr 1297*8fb009dcSAndroid Build Coastguard Worker ldp x19,x20,[sp,#8*4] // t[0..3] 1298*8fb009dcSAndroid Build Coastguard Worker adc x30,x0,xzr 1299*8fb009dcSAndroid Build Coastguard Worker stp x21,x22,[x26,#8*2] // result!!! 1300*8fb009dcSAndroid Build Coastguard Worker cmp x2,x13 // done yet? 1301*8fb009dcSAndroid Build Coastguard Worker ldp x21,x22,[sp,#8*6] 1302*8fb009dcSAndroid Build Coastguard Worker ldp x14,x15,[x11,#8*0] // n[0..3] 1303*8fb009dcSAndroid Build Coastguard Worker ldp x16,x17,[x11,#8*2] 1304*8fb009dcSAndroid Build Coastguard Worker add x3,x11,#8*4 1305*8fb009dcSAndroid Build Coastguard Worker b.eq Lmul4x_post 1306*8fb009dcSAndroid Build Coastguard Worker 1307*8fb009dcSAndroid Build Coastguard Worker ldr x24,[x2] 1308*8fb009dcSAndroid Build Coastguard Worker ldp x6,x7,[x1,#8*0] // a[0..3] 1309*8fb009dcSAndroid Build Coastguard Worker ldp x8,x9,[x1,#8*2] 1310*8fb009dcSAndroid Build Coastguard Worker adds x1,x1,#8*4 // clear carry bit 1311*8fb009dcSAndroid Build Coastguard Worker mov x0,xzr 1312*8fb009dcSAndroid Build Coastguard Worker mov x26,sp 1313*8fb009dcSAndroid Build Coastguard Worker b Loop_mul4x_reduction 1314*8fb009dcSAndroid Build Coastguard Worker 1315*8fb009dcSAndroid Build Coastguard Worker.align 4 1316*8fb009dcSAndroid Build Coastguard WorkerLmul4x_post: 1317*8fb009dcSAndroid Build Coastguard Worker // Final step. We see if result is larger than modulus, and 1318*8fb009dcSAndroid Build Coastguard Worker // if it is, subtract the modulus. But comparison implies 1319*8fb009dcSAndroid Build Coastguard Worker // subtraction. So we subtract modulus, see if it borrowed, 1320*8fb009dcSAndroid Build Coastguard Worker // and conditionally copy original value. 1321*8fb009dcSAndroid Build Coastguard Worker mov x0,x12 1322*8fb009dcSAndroid Build Coastguard Worker mov x27,x12 // x0 copy 1323*8fb009dcSAndroid Build Coastguard Worker subs x10,x19,x14 1324*8fb009dcSAndroid Build Coastguard Worker add x26,sp,#8*8 1325*8fb009dcSAndroid Build Coastguard Worker sbcs x11,x20,x15 1326*8fb009dcSAndroid Build Coastguard Worker sub x28,x5,#8*4 1327*8fb009dcSAndroid Build Coastguard Worker 1328*8fb009dcSAndroid Build Coastguard WorkerLmul4x_sub: 1329*8fb009dcSAndroid Build Coastguard Worker sbcs x12,x21,x16 1330*8fb009dcSAndroid Build Coastguard Worker ldp x14,x15,[x3,#8*0] 1331*8fb009dcSAndroid Build Coastguard Worker sub x28,x28,#8*4 1332*8fb009dcSAndroid Build Coastguard Worker ldp x19,x20,[x26,#8*0] 1333*8fb009dcSAndroid Build Coastguard Worker sbcs x13,x22,x17 1334*8fb009dcSAndroid Build Coastguard Worker ldp x16,x17,[x3,#8*2] 1335*8fb009dcSAndroid Build Coastguard Worker add x3,x3,#8*4 1336*8fb009dcSAndroid Build Coastguard Worker ldp x21,x22,[x26,#8*2] 1337*8fb009dcSAndroid Build Coastguard Worker add x26,x26,#8*4 1338*8fb009dcSAndroid Build Coastguard Worker stp x10,x11,[x0,#8*0] 1339*8fb009dcSAndroid Build Coastguard Worker sbcs x10,x19,x14 1340*8fb009dcSAndroid Build Coastguard Worker stp x12,x13,[x0,#8*2] 1341*8fb009dcSAndroid Build Coastguard Worker add x0,x0,#8*4 1342*8fb009dcSAndroid Build Coastguard Worker sbcs x11,x20,x15 1343*8fb009dcSAndroid Build Coastguard Worker cbnz x28,Lmul4x_sub 1344*8fb009dcSAndroid Build Coastguard Worker 1345*8fb009dcSAndroid Build Coastguard Worker sbcs x12,x21,x16 1346*8fb009dcSAndroid Build Coastguard Worker mov x26,sp 1347*8fb009dcSAndroid Build Coastguard Worker add x1,sp,#8*4 1348*8fb009dcSAndroid Build Coastguard Worker ldp x6,x7,[x27,#8*0] 1349*8fb009dcSAndroid Build Coastguard Worker sbcs x13,x22,x17 1350*8fb009dcSAndroid Build Coastguard Worker stp x10,x11,[x0,#8*0] 1351*8fb009dcSAndroid Build Coastguard Worker ldp x8,x9,[x27,#8*2] 1352*8fb009dcSAndroid Build Coastguard Worker stp x12,x13,[x0,#8*2] 1353*8fb009dcSAndroid Build Coastguard Worker ldp x19,x20,[x1,#8*0] 1354*8fb009dcSAndroid Build Coastguard Worker ldp x21,x22,[x1,#8*2] 1355*8fb009dcSAndroid Build Coastguard Worker sbcs xzr,x30,xzr // did it borrow? 1356*8fb009dcSAndroid Build Coastguard Worker ldr x30,[x29,#8] // pull return address 1357*8fb009dcSAndroid Build Coastguard Worker 1358*8fb009dcSAndroid Build Coastguard Worker sub x28,x5,#8*4 1359*8fb009dcSAndroid Build Coastguard WorkerLmul4x_cond_copy: 1360*8fb009dcSAndroid Build Coastguard Worker sub x28,x28,#8*4 1361*8fb009dcSAndroid Build Coastguard Worker csel x10,x19,x6,lo 1362*8fb009dcSAndroid Build Coastguard Worker stp xzr,xzr,[x26,#8*0] 1363*8fb009dcSAndroid Build Coastguard Worker csel x11,x20,x7,lo 1364*8fb009dcSAndroid Build Coastguard Worker ldp x6,x7,[x27,#8*4] 1365*8fb009dcSAndroid Build Coastguard Worker ldp x19,x20,[x1,#8*4] 1366*8fb009dcSAndroid Build Coastguard Worker csel x12,x21,x8,lo 1367*8fb009dcSAndroid Build Coastguard Worker stp xzr,xzr,[x26,#8*2] 1368*8fb009dcSAndroid Build Coastguard Worker add x26,x26,#8*4 1369*8fb009dcSAndroid Build Coastguard Worker csel x13,x22,x9,lo 1370*8fb009dcSAndroid Build Coastguard Worker ldp x8,x9,[x27,#8*6] 1371*8fb009dcSAndroid Build Coastguard Worker ldp x21,x22,[x1,#8*6] 1372*8fb009dcSAndroid Build Coastguard Worker add x1,x1,#8*4 1373*8fb009dcSAndroid Build Coastguard Worker stp x10,x11,[x27,#8*0] 1374*8fb009dcSAndroid Build Coastguard Worker stp x12,x13,[x27,#8*2] 1375*8fb009dcSAndroid Build Coastguard Worker add x27,x27,#8*4 1376*8fb009dcSAndroid Build Coastguard Worker cbnz x28,Lmul4x_cond_copy 1377*8fb009dcSAndroid Build Coastguard Worker 1378*8fb009dcSAndroid Build Coastguard Worker csel x10,x19,x6,lo 1379*8fb009dcSAndroid Build Coastguard Worker stp xzr,xzr,[x26,#8*0] 1380*8fb009dcSAndroid Build Coastguard Worker csel x11,x20,x7,lo 1381*8fb009dcSAndroid Build Coastguard Worker stp xzr,xzr,[x26,#8*2] 1382*8fb009dcSAndroid Build Coastguard Worker csel x12,x21,x8,lo 1383*8fb009dcSAndroid Build Coastguard Worker stp xzr,xzr,[x26,#8*3] 1384*8fb009dcSAndroid Build Coastguard Worker csel x13,x22,x9,lo 1385*8fb009dcSAndroid Build Coastguard Worker stp xzr,xzr,[x26,#8*4] 1386*8fb009dcSAndroid Build Coastguard Worker stp x10,x11,[x27,#8*0] 1387*8fb009dcSAndroid Build Coastguard Worker stp x12,x13,[x27,#8*2] 1388*8fb009dcSAndroid Build Coastguard Worker 1389*8fb009dcSAndroid Build Coastguard Worker b Lmul4x_done 1390*8fb009dcSAndroid Build Coastguard Worker 1391*8fb009dcSAndroid Build Coastguard Worker.align 4 1392*8fb009dcSAndroid Build Coastguard WorkerLmul4x4_post_condition: 1393*8fb009dcSAndroid Build Coastguard Worker adc x0,x0,xzr 1394*8fb009dcSAndroid Build Coastguard Worker ldr x1,[x29,#96] // pull rp 1395*8fb009dcSAndroid Build Coastguard Worker // x19-3,x0 hold result, x14-7 hold modulus 1396*8fb009dcSAndroid Build Coastguard Worker subs x6,x19,x14 1397*8fb009dcSAndroid Build Coastguard Worker ldr x30,[x29,#8] // pull return address 1398*8fb009dcSAndroid Build Coastguard Worker sbcs x7,x20,x15 1399*8fb009dcSAndroid Build Coastguard Worker stp xzr,xzr,[sp,#8*0] 1400*8fb009dcSAndroid Build Coastguard Worker sbcs x8,x21,x16 1401*8fb009dcSAndroid Build Coastguard Worker stp xzr,xzr,[sp,#8*2] 1402*8fb009dcSAndroid Build Coastguard Worker sbcs x9,x22,x17 1403*8fb009dcSAndroid Build Coastguard Worker stp xzr,xzr,[sp,#8*4] 1404*8fb009dcSAndroid Build Coastguard Worker sbcs xzr,x0,xzr // did it borrow? 1405*8fb009dcSAndroid Build Coastguard Worker stp xzr,xzr,[sp,#8*6] 1406*8fb009dcSAndroid Build Coastguard Worker 1407*8fb009dcSAndroid Build Coastguard Worker // x6-3 hold result-modulus 1408*8fb009dcSAndroid Build Coastguard Worker csel x6,x19,x6,lo 1409*8fb009dcSAndroid Build Coastguard Worker csel x7,x20,x7,lo 1410*8fb009dcSAndroid Build Coastguard Worker csel x8,x21,x8,lo 1411*8fb009dcSAndroid Build Coastguard Worker csel x9,x22,x9,lo 1412*8fb009dcSAndroid Build Coastguard Worker stp x6,x7,[x1,#8*0] 1413*8fb009dcSAndroid Build Coastguard Worker stp x8,x9,[x1,#8*2] 1414*8fb009dcSAndroid Build Coastguard Worker 1415*8fb009dcSAndroid Build Coastguard WorkerLmul4x_done: 1416*8fb009dcSAndroid Build Coastguard Worker ldp x19,x20,[x29,#16] 1417*8fb009dcSAndroid Build Coastguard Worker mov sp,x29 1418*8fb009dcSAndroid Build Coastguard Worker ldp x21,x22,[x29,#32] 1419*8fb009dcSAndroid Build Coastguard Worker mov x0,#1 1420*8fb009dcSAndroid Build Coastguard Worker ldp x23,x24,[x29,#48] 1421*8fb009dcSAndroid Build Coastguard Worker ldp x25,x26,[x29,#64] 1422*8fb009dcSAndroid Build Coastguard Worker ldp x27,x28,[x29,#80] 1423*8fb009dcSAndroid Build Coastguard Worker ldr x29,[sp],#128 1424*8fb009dcSAndroid Build Coastguard Worker // x30 is popped earlier 1425*8fb009dcSAndroid Build Coastguard Worker AARCH64_VALIDATE_LINK_REGISTER 1426*8fb009dcSAndroid Build Coastguard Worker ret 1427*8fb009dcSAndroid Build Coastguard Worker 1428*8fb009dcSAndroid Build Coastguard Worker.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 1429*8fb009dcSAndroid Build Coastguard Worker.align 2 1430*8fb009dcSAndroid Build Coastguard Worker.align 4 1431*8fb009dcSAndroid Build Coastguard Worker#endif // !OPENSSL_NO_ASM && defined(OPENSSL_AARCH64) && defined(_WIN32) 1432