1*8fb009dcSAndroid Build Coastguard Worker// This file is generated from a similarly-named Perl script in the BoringSSL 2*8fb009dcSAndroid Build Coastguard Worker// source tree. Do not edit by hand. 3*8fb009dcSAndroid Build Coastguard Worker 4*8fb009dcSAndroid Build Coastguard Worker#include <openssl/asm_base.h> 5*8fb009dcSAndroid Build Coastguard Worker 6*8fb009dcSAndroid Build Coastguard Worker#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_AARCH64) && defined(__ELF__) 7*8fb009dcSAndroid Build Coastguard Worker#include <openssl/arm_arch.h> 8*8fb009dcSAndroid Build Coastguard Worker 9*8fb009dcSAndroid Build Coastguard Worker.text 10*8fb009dcSAndroid Build Coastguard Worker 11*8fb009dcSAndroid Build Coastguard Worker.globl bn_mul_mont 12*8fb009dcSAndroid Build Coastguard Worker.hidden bn_mul_mont 13*8fb009dcSAndroid Build Coastguard Worker.type bn_mul_mont,%function 14*8fb009dcSAndroid Build Coastguard Worker.align 5 15*8fb009dcSAndroid Build Coastguard Workerbn_mul_mont: 16*8fb009dcSAndroid Build Coastguard Worker AARCH64_SIGN_LINK_REGISTER 17*8fb009dcSAndroid Build Coastguard Worker tst x5,#7 18*8fb009dcSAndroid Build Coastguard Worker b.eq __bn_sqr8x_mont 19*8fb009dcSAndroid Build Coastguard Worker tst x5,#3 20*8fb009dcSAndroid Build Coastguard Worker b.eq __bn_mul4x_mont 21*8fb009dcSAndroid Build Coastguard Worker.Lmul_mont: 22*8fb009dcSAndroid Build Coastguard Worker stp x29,x30,[sp,#-64]! 23*8fb009dcSAndroid Build Coastguard Worker add x29,sp,#0 24*8fb009dcSAndroid Build Coastguard Worker stp x19,x20,[sp,#16] 25*8fb009dcSAndroid Build Coastguard Worker stp x21,x22,[sp,#32] 26*8fb009dcSAndroid Build Coastguard Worker stp x23,x24,[sp,#48] 27*8fb009dcSAndroid Build Coastguard Worker 28*8fb009dcSAndroid Build Coastguard Worker ldr x9,[x2],#8 // bp[0] 29*8fb009dcSAndroid Build Coastguard Worker sub x22,sp,x5,lsl#3 30*8fb009dcSAndroid Build Coastguard Worker ldp x7,x8,[x1],#16 // ap[0..1] 31*8fb009dcSAndroid Build Coastguard Worker lsl x5,x5,#3 32*8fb009dcSAndroid Build Coastguard Worker ldr x4,[x4] // *n0 33*8fb009dcSAndroid Build Coastguard Worker and x22,x22,#-16 // ABI says so 34*8fb009dcSAndroid Build Coastguard Worker ldp x13,x14,[x3],#16 // np[0..1] 35*8fb009dcSAndroid Build Coastguard Worker 36*8fb009dcSAndroid Build Coastguard Worker mul x6,x7,x9 // ap[0]*bp[0] 37*8fb009dcSAndroid Build Coastguard Worker sub x21,x5,#16 // j=num-2 38*8fb009dcSAndroid Build Coastguard Worker umulh x7,x7,x9 39*8fb009dcSAndroid Build Coastguard Worker mul x10,x8,x9 // ap[1]*bp[0] 40*8fb009dcSAndroid Build Coastguard Worker umulh x11,x8,x9 41*8fb009dcSAndroid Build Coastguard Worker 42*8fb009dcSAndroid Build Coastguard Worker mul x15,x6,x4 // "tp[0]"*n0 43*8fb009dcSAndroid Build Coastguard Worker mov sp,x22 // alloca 44*8fb009dcSAndroid Build Coastguard Worker 45*8fb009dcSAndroid Build Coastguard Worker // (*) mul x12,x13,x15 // np[0]*m1 46*8fb009dcSAndroid Build Coastguard Worker umulh x13,x13,x15 47*8fb009dcSAndroid Build Coastguard Worker mul x16,x14,x15 // np[1]*m1 48*8fb009dcSAndroid Build Coastguard Worker // (*) adds x12,x12,x6 // discarded 49*8fb009dcSAndroid Build Coastguard Worker // (*) As for removal of first multiplication and addition 50*8fb009dcSAndroid Build Coastguard Worker // instructions. The outcome of first addition is 51*8fb009dcSAndroid Build Coastguard Worker // guaranteed to be zero, which leaves two computationally 52*8fb009dcSAndroid Build Coastguard Worker // significant outcomes: it either carries or not. Then 53*8fb009dcSAndroid Build Coastguard Worker // question is when does it carry? Is there alternative 54*8fb009dcSAndroid Build Coastguard Worker // way to deduce it? If you follow operations, you can 55*8fb009dcSAndroid Build Coastguard Worker // observe that condition for carry is quite simple: 56*8fb009dcSAndroid Build Coastguard Worker // x6 being non-zero. So that carry can be calculated 57*8fb009dcSAndroid Build Coastguard Worker // by adding -1 to x6. That's what next instruction does. 58*8fb009dcSAndroid Build Coastguard Worker subs xzr,x6,#1 // (*) 59*8fb009dcSAndroid Build Coastguard Worker umulh x17,x14,x15 60*8fb009dcSAndroid Build Coastguard Worker adc x13,x13,xzr 61*8fb009dcSAndroid Build Coastguard Worker cbz x21,.L1st_skip 62*8fb009dcSAndroid Build Coastguard Worker 63*8fb009dcSAndroid Build Coastguard Worker.L1st: 64*8fb009dcSAndroid Build Coastguard Worker ldr x8,[x1],#8 65*8fb009dcSAndroid Build Coastguard Worker adds x6,x10,x7 66*8fb009dcSAndroid Build Coastguard Worker sub x21,x21,#8 // j-- 67*8fb009dcSAndroid Build Coastguard Worker adc x7,x11,xzr 68*8fb009dcSAndroid Build Coastguard Worker 69*8fb009dcSAndroid Build Coastguard Worker ldr x14,[x3],#8 70*8fb009dcSAndroid Build Coastguard Worker adds x12,x16,x13 71*8fb009dcSAndroid Build Coastguard Worker mul x10,x8,x9 // ap[j]*bp[0] 72*8fb009dcSAndroid Build Coastguard Worker adc x13,x17,xzr 73*8fb009dcSAndroid Build Coastguard Worker umulh x11,x8,x9 74*8fb009dcSAndroid Build Coastguard Worker 75*8fb009dcSAndroid Build Coastguard Worker adds x12,x12,x6 76*8fb009dcSAndroid Build Coastguard Worker mul x16,x14,x15 // np[j]*m1 77*8fb009dcSAndroid Build Coastguard Worker adc x13,x13,xzr 78*8fb009dcSAndroid Build Coastguard Worker umulh x17,x14,x15 79*8fb009dcSAndroid Build Coastguard Worker str x12,[x22],#8 // tp[j-1] 80*8fb009dcSAndroid Build Coastguard Worker cbnz x21,.L1st 81*8fb009dcSAndroid Build Coastguard Worker 82*8fb009dcSAndroid Build Coastguard Worker.L1st_skip: 83*8fb009dcSAndroid Build Coastguard Worker adds x6,x10,x7 84*8fb009dcSAndroid Build Coastguard Worker sub x1,x1,x5 // rewind x1 85*8fb009dcSAndroid Build Coastguard Worker adc x7,x11,xzr 86*8fb009dcSAndroid Build Coastguard Worker 87*8fb009dcSAndroid Build Coastguard Worker adds x12,x16,x13 88*8fb009dcSAndroid Build Coastguard Worker sub x3,x3,x5 // rewind x3 89*8fb009dcSAndroid Build Coastguard Worker adc x13,x17,xzr 90*8fb009dcSAndroid Build Coastguard Worker 91*8fb009dcSAndroid Build Coastguard Worker adds x12,x12,x6 92*8fb009dcSAndroid Build Coastguard Worker sub x20,x5,#8 // i=num-1 93*8fb009dcSAndroid Build Coastguard Worker adcs x13,x13,x7 94*8fb009dcSAndroid Build Coastguard Worker 95*8fb009dcSAndroid Build Coastguard Worker adc x19,xzr,xzr // upmost overflow bit 96*8fb009dcSAndroid Build Coastguard Worker stp x12,x13,[x22] 97*8fb009dcSAndroid Build Coastguard Worker 98*8fb009dcSAndroid Build Coastguard Worker.Louter: 99*8fb009dcSAndroid Build Coastguard Worker ldr x9,[x2],#8 // bp[i] 100*8fb009dcSAndroid Build Coastguard Worker ldp x7,x8,[x1],#16 101*8fb009dcSAndroid Build Coastguard Worker ldr x23,[sp] // tp[0] 102*8fb009dcSAndroid Build Coastguard Worker add x22,sp,#8 103*8fb009dcSAndroid Build Coastguard Worker 104*8fb009dcSAndroid Build Coastguard Worker mul x6,x7,x9 // ap[0]*bp[i] 105*8fb009dcSAndroid Build Coastguard Worker sub x21,x5,#16 // j=num-2 106*8fb009dcSAndroid Build Coastguard Worker umulh x7,x7,x9 107*8fb009dcSAndroid Build Coastguard Worker ldp x13,x14,[x3],#16 108*8fb009dcSAndroid Build Coastguard Worker mul x10,x8,x9 // ap[1]*bp[i] 109*8fb009dcSAndroid Build Coastguard Worker adds x6,x6,x23 110*8fb009dcSAndroid Build Coastguard Worker umulh x11,x8,x9 111*8fb009dcSAndroid Build Coastguard Worker adc x7,x7,xzr 112*8fb009dcSAndroid Build Coastguard Worker 113*8fb009dcSAndroid Build Coastguard Worker mul x15,x6,x4 114*8fb009dcSAndroid Build Coastguard Worker sub x20,x20,#8 // i-- 115*8fb009dcSAndroid Build Coastguard Worker 116*8fb009dcSAndroid Build Coastguard Worker // (*) mul x12,x13,x15 // np[0]*m1 117*8fb009dcSAndroid Build Coastguard Worker umulh x13,x13,x15 118*8fb009dcSAndroid Build Coastguard Worker mul x16,x14,x15 // np[1]*m1 119*8fb009dcSAndroid Build Coastguard Worker // (*) adds x12,x12,x6 120*8fb009dcSAndroid Build Coastguard Worker subs xzr,x6,#1 // (*) 121*8fb009dcSAndroid Build Coastguard Worker umulh x17,x14,x15 122*8fb009dcSAndroid Build Coastguard Worker cbz x21,.Linner_skip 123*8fb009dcSAndroid Build Coastguard Worker 124*8fb009dcSAndroid Build Coastguard Worker.Linner: 125*8fb009dcSAndroid Build Coastguard Worker ldr x8,[x1],#8 126*8fb009dcSAndroid Build Coastguard Worker adc x13,x13,xzr 127*8fb009dcSAndroid Build Coastguard Worker ldr x23,[x22],#8 // tp[j] 128*8fb009dcSAndroid Build Coastguard Worker adds x6,x10,x7 129*8fb009dcSAndroid Build Coastguard Worker sub x21,x21,#8 // j-- 130*8fb009dcSAndroid Build Coastguard Worker adc x7,x11,xzr 131*8fb009dcSAndroid Build Coastguard Worker 132*8fb009dcSAndroid Build Coastguard Worker adds x12,x16,x13 133*8fb009dcSAndroid Build Coastguard Worker ldr x14,[x3],#8 134*8fb009dcSAndroid Build Coastguard Worker adc x13,x17,xzr 135*8fb009dcSAndroid Build Coastguard Worker 136*8fb009dcSAndroid Build Coastguard Worker mul x10,x8,x9 // ap[j]*bp[i] 137*8fb009dcSAndroid Build Coastguard Worker adds x6,x6,x23 138*8fb009dcSAndroid Build Coastguard Worker umulh x11,x8,x9 139*8fb009dcSAndroid Build Coastguard Worker adc x7,x7,xzr 140*8fb009dcSAndroid Build Coastguard Worker 141*8fb009dcSAndroid Build Coastguard Worker mul x16,x14,x15 // np[j]*m1 142*8fb009dcSAndroid Build Coastguard Worker adds x12,x12,x6 143*8fb009dcSAndroid Build Coastguard Worker umulh x17,x14,x15 144*8fb009dcSAndroid Build Coastguard Worker str x12,[x22,#-16] // tp[j-1] 145*8fb009dcSAndroid Build Coastguard Worker cbnz x21,.Linner 146*8fb009dcSAndroid Build Coastguard Worker 147*8fb009dcSAndroid Build Coastguard Worker.Linner_skip: 148*8fb009dcSAndroid Build Coastguard Worker ldr x23,[x22],#8 // tp[j] 149*8fb009dcSAndroid Build Coastguard Worker adc x13,x13,xzr 150*8fb009dcSAndroid Build Coastguard Worker adds x6,x10,x7 151*8fb009dcSAndroid Build Coastguard Worker sub x1,x1,x5 // rewind x1 152*8fb009dcSAndroid Build Coastguard Worker adc x7,x11,xzr 153*8fb009dcSAndroid Build Coastguard Worker 154*8fb009dcSAndroid Build Coastguard Worker adds x12,x16,x13 155*8fb009dcSAndroid Build Coastguard Worker sub x3,x3,x5 // rewind x3 156*8fb009dcSAndroid Build Coastguard Worker adcs x13,x17,x19 157*8fb009dcSAndroid Build Coastguard Worker adc x19,xzr,xzr 158*8fb009dcSAndroid Build Coastguard Worker 159*8fb009dcSAndroid Build Coastguard Worker adds x6,x6,x23 160*8fb009dcSAndroid Build Coastguard Worker adc x7,x7,xzr 161*8fb009dcSAndroid Build Coastguard Worker 162*8fb009dcSAndroid Build Coastguard Worker adds x12,x12,x6 163*8fb009dcSAndroid Build Coastguard Worker adcs x13,x13,x7 164*8fb009dcSAndroid Build Coastguard Worker adc x19,x19,xzr // upmost overflow bit 165*8fb009dcSAndroid Build Coastguard Worker stp x12,x13,[x22,#-16] 166*8fb009dcSAndroid Build Coastguard Worker 167*8fb009dcSAndroid Build Coastguard Worker cbnz x20,.Louter 168*8fb009dcSAndroid Build Coastguard Worker 169*8fb009dcSAndroid Build Coastguard Worker // Final step. We see if result is larger than modulus, and 170*8fb009dcSAndroid Build Coastguard Worker // if it is, subtract the modulus. But comparison implies 171*8fb009dcSAndroid Build Coastguard Worker // subtraction. So we subtract modulus, see if it borrowed, 172*8fb009dcSAndroid Build Coastguard Worker // and conditionally copy original value. 173*8fb009dcSAndroid Build Coastguard Worker ldr x23,[sp] // tp[0] 174*8fb009dcSAndroid Build Coastguard Worker add x22,sp,#8 175*8fb009dcSAndroid Build Coastguard Worker ldr x14,[x3],#8 // np[0] 176*8fb009dcSAndroid Build Coastguard Worker subs x21,x5,#8 // j=num-1 and clear borrow 177*8fb009dcSAndroid Build Coastguard Worker mov x1,x0 178*8fb009dcSAndroid Build Coastguard Worker.Lsub: 179*8fb009dcSAndroid Build Coastguard Worker sbcs x8,x23,x14 // tp[j]-np[j] 180*8fb009dcSAndroid Build Coastguard Worker ldr x23,[x22],#8 181*8fb009dcSAndroid Build Coastguard Worker sub x21,x21,#8 // j-- 182*8fb009dcSAndroid Build Coastguard Worker ldr x14,[x3],#8 183*8fb009dcSAndroid Build Coastguard Worker str x8,[x1],#8 // rp[j]=tp[j]-np[j] 184*8fb009dcSAndroid Build Coastguard Worker cbnz x21,.Lsub 185*8fb009dcSAndroid Build Coastguard Worker 186*8fb009dcSAndroid Build Coastguard Worker sbcs x8,x23,x14 187*8fb009dcSAndroid Build Coastguard Worker sbcs x19,x19,xzr // did it borrow? 188*8fb009dcSAndroid Build Coastguard Worker str x8,[x1],#8 // rp[num-1] 189*8fb009dcSAndroid Build Coastguard Worker 190*8fb009dcSAndroid Build Coastguard Worker ldr x23,[sp] // tp[0] 191*8fb009dcSAndroid Build Coastguard Worker add x22,sp,#8 192*8fb009dcSAndroid Build Coastguard Worker ldr x8,[x0],#8 // rp[0] 193*8fb009dcSAndroid Build Coastguard Worker sub x5,x5,#8 // num-- 194*8fb009dcSAndroid Build Coastguard Worker nop 195*8fb009dcSAndroid Build Coastguard Worker.Lcond_copy: 196*8fb009dcSAndroid Build Coastguard Worker sub x5,x5,#8 // num-- 197*8fb009dcSAndroid Build Coastguard Worker csel x14,x23,x8,lo // did it borrow? 198*8fb009dcSAndroid Build Coastguard Worker ldr x23,[x22],#8 199*8fb009dcSAndroid Build Coastguard Worker ldr x8,[x0],#8 200*8fb009dcSAndroid Build Coastguard Worker str xzr,[x22,#-16] // wipe tp 201*8fb009dcSAndroid Build Coastguard Worker str x14,[x0,#-16] 202*8fb009dcSAndroid Build Coastguard Worker cbnz x5,.Lcond_copy 203*8fb009dcSAndroid Build Coastguard Worker 204*8fb009dcSAndroid Build Coastguard Worker csel x14,x23,x8,lo 205*8fb009dcSAndroid Build Coastguard Worker str xzr,[x22,#-8] // wipe tp 206*8fb009dcSAndroid Build Coastguard Worker str x14,[x0,#-8] 207*8fb009dcSAndroid Build Coastguard Worker 208*8fb009dcSAndroid Build Coastguard Worker ldp x19,x20,[x29,#16] 209*8fb009dcSAndroid Build Coastguard Worker mov sp,x29 210*8fb009dcSAndroid Build Coastguard Worker ldp x21,x22,[x29,#32] 211*8fb009dcSAndroid Build Coastguard Worker mov x0,#1 212*8fb009dcSAndroid Build Coastguard Worker ldp x23,x24,[x29,#48] 213*8fb009dcSAndroid Build Coastguard Worker ldr x29,[sp],#64 214*8fb009dcSAndroid Build Coastguard Worker AARCH64_VALIDATE_LINK_REGISTER 215*8fb009dcSAndroid Build Coastguard Worker ret 216*8fb009dcSAndroid Build Coastguard Worker.size bn_mul_mont,.-bn_mul_mont 217*8fb009dcSAndroid Build Coastguard Worker.type __bn_sqr8x_mont,%function 218*8fb009dcSAndroid Build Coastguard Worker.align 5 219*8fb009dcSAndroid Build Coastguard Worker__bn_sqr8x_mont: 220*8fb009dcSAndroid Build Coastguard Worker // Not adding AARCH64_SIGN_LINK_REGISTER here because __bn_sqr8x_mont is jumped to 221*8fb009dcSAndroid Build Coastguard Worker // only from bn_mul_mont which has already signed the return address. 222*8fb009dcSAndroid Build Coastguard Worker cmp x1,x2 223*8fb009dcSAndroid Build Coastguard Worker b.ne __bn_mul4x_mont 224*8fb009dcSAndroid Build Coastguard Worker.Lsqr8x_mont: 225*8fb009dcSAndroid Build Coastguard Worker stp x29,x30,[sp,#-128]! 226*8fb009dcSAndroid Build Coastguard Worker add x29,sp,#0 227*8fb009dcSAndroid Build Coastguard Worker stp x19,x20,[sp,#16] 228*8fb009dcSAndroid Build Coastguard Worker stp x21,x22,[sp,#32] 229*8fb009dcSAndroid Build Coastguard Worker stp x23,x24,[sp,#48] 230*8fb009dcSAndroid Build Coastguard Worker stp x25,x26,[sp,#64] 231*8fb009dcSAndroid Build Coastguard Worker stp x27,x28,[sp,#80] 232*8fb009dcSAndroid Build Coastguard Worker stp x0,x3,[sp,#96] // offload rp and np 233*8fb009dcSAndroid Build Coastguard Worker 234*8fb009dcSAndroid Build Coastguard Worker ldp x6,x7,[x1,#8*0] 235*8fb009dcSAndroid Build Coastguard Worker ldp x8,x9,[x1,#8*2] 236*8fb009dcSAndroid Build Coastguard Worker ldp x10,x11,[x1,#8*4] 237*8fb009dcSAndroid Build Coastguard Worker ldp x12,x13,[x1,#8*6] 238*8fb009dcSAndroid Build Coastguard Worker 239*8fb009dcSAndroid Build Coastguard Worker sub x2,sp,x5,lsl#4 240*8fb009dcSAndroid Build Coastguard Worker lsl x5,x5,#3 241*8fb009dcSAndroid Build Coastguard Worker ldr x4,[x4] // *n0 242*8fb009dcSAndroid Build Coastguard Worker mov sp,x2 // alloca 243*8fb009dcSAndroid Build Coastguard Worker sub x27,x5,#8*8 244*8fb009dcSAndroid Build Coastguard Worker b .Lsqr8x_zero_start 245*8fb009dcSAndroid Build Coastguard Worker 246*8fb009dcSAndroid Build Coastguard Worker.Lsqr8x_zero: 247*8fb009dcSAndroid Build Coastguard Worker sub x27,x27,#8*8 248*8fb009dcSAndroid Build Coastguard Worker stp xzr,xzr,[x2,#8*0] 249*8fb009dcSAndroid Build Coastguard Worker stp xzr,xzr,[x2,#8*2] 250*8fb009dcSAndroid Build Coastguard Worker stp xzr,xzr,[x2,#8*4] 251*8fb009dcSAndroid Build Coastguard Worker stp xzr,xzr,[x2,#8*6] 252*8fb009dcSAndroid Build Coastguard Worker.Lsqr8x_zero_start: 253*8fb009dcSAndroid Build Coastguard Worker stp xzr,xzr,[x2,#8*8] 254*8fb009dcSAndroid Build Coastguard Worker stp xzr,xzr,[x2,#8*10] 255*8fb009dcSAndroid Build Coastguard Worker stp xzr,xzr,[x2,#8*12] 256*8fb009dcSAndroid Build Coastguard Worker stp xzr,xzr,[x2,#8*14] 257*8fb009dcSAndroid Build Coastguard Worker add x2,x2,#8*16 258*8fb009dcSAndroid Build Coastguard Worker cbnz x27,.Lsqr8x_zero 259*8fb009dcSAndroid Build Coastguard Worker 260*8fb009dcSAndroid Build Coastguard Worker add x3,x1,x5 261*8fb009dcSAndroid Build Coastguard Worker add x1,x1,#8*8 262*8fb009dcSAndroid Build Coastguard Worker mov x19,xzr 263*8fb009dcSAndroid Build Coastguard Worker mov x20,xzr 264*8fb009dcSAndroid Build Coastguard Worker mov x21,xzr 265*8fb009dcSAndroid Build Coastguard Worker mov x22,xzr 266*8fb009dcSAndroid Build Coastguard Worker mov x23,xzr 267*8fb009dcSAndroid Build Coastguard Worker mov x24,xzr 268*8fb009dcSAndroid Build Coastguard Worker mov x25,xzr 269*8fb009dcSAndroid Build Coastguard Worker mov x26,xzr 270*8fb009dcSAndroid Build Coastguard Worker mov x2,sp 271*8fb009dcSAndroid Build Coastguard Worker str x4,[x29,#112] // offload n0 272*8fb009dcSAndroid Build Coastguard Worker 273*8fb009dcSAndroid Build Coastguard Worker // Multiply everything but a[i]*a[i] 274*8fb009dcSAndroid Build Coastguard Worker.align 4 275*8fb009dcSAndroid Build Coastguard Worker.Lsqr8x_outer_loop: 276*8fb009dcSAndroid Build Coastguard Worker // a[1]a[0] (i) 277*8fb009dcSAndroid Build Coastguard Worker // a[2]a[0] 278*8fb009dcSAndroid Build Coastguard Worker // a[3]a[0] 279*8fb009dcSAndroid Build Coastguard Worker // a[4]a[0] 280*8fb009dcSAndroid Build Coastguard Worker // a[5]a[0] 281*8fb009dcSAndroid Build Coastguard Worker // a[6]a[0] 282*8fb009dcSAndroid Build Coastguard Worker // a[7]a[0] 283*8fb009dcSAndroid Build Coastguard Worker // a[2]a[1] (ii) 284*8fb009dcSAndroid Build Coastguard Worker // a[3]a[1] 285*8fb009dcSAndroid Build Coastguard Worker // a[4]a[1] 286*8fb009dcSAndroid Build Coastguard Worker // a[5]a[1] 287*8fb009dcSAndroid Build Coastguard Worker // a[6]a[1] 288*8fb009dcSAndroid Build Coastguard Worker // a[7]a[1] 289*8fb009dcSAndroid Build Coastguard Worker // a[3]a[2] (iii) 290*8fb009dcSAndroid Build Coastguard Worker // a[4]a[2] 291*8fb009dcSAndroid Build Coastguard Worker // a[5]a[2] 292*8fb009dcSAndroid Build Coastguard Worker // a[6]a[2] 293*8fb009dcSAndroid Build Coastguard Worker // a[7]a[2] 294*8fb009dcSAndroid Build Coastguard Worker // a[4]a[3] (iv) 295*8fb009dcSAndroid Build Coastguard Worker // a[5]a[3] 296*8fb009dcSAndroid Build Coastguard Worker // a[6]a[3] 297*8fb009dcSAndroid Build Coastguard Worker // a[7]a[3] 298*8fb009dcSAndroid Build Coastguard Worker // a[5]a[4] (v) 299*8fb009dcSAndroid Build Coastguard Worker // a[6]a[4] 300*8fb009dcSAndroid Build Coastguard Worker // a[7]a[4] 301*8fb009dcSAndroid Build Coastguard Worker // a[6]a[5] (vi) 302*8fb009dcSAndroid Build Coastguard Worker // a[7]a[5] 303*8fb009dcSAndroid Build Coastguard Worker // a[7]a[6] (vii) 304*8fb009dcSAndroid Build Coastguard Worker 305*8fb009dcSAndroid Build Coastguard Worker mul x14,x7,x6 // lo(a[1..7]*a[0]) (i) 306*8fb009dcSAndroid Build Coastguard Worker mul x15,x8,x6 307*8fb009dcSAndroid Build Coastguard Worker mul x16,x9,x6 308*8fb009dcSAndroid Build Coastguard Worker mul x17,x10,x6 309*8fb009dcSAndroid Build Coastguard Worker adds x20,x20,x14 // t[1]+lo(a[1]*a[0]) 310*8fb009dcSAndroid Build Coastguard Worker mul x14,x11,x6 311*8fb009dcSAndroid Build Coastguard Worker adcs x21,x21,x15 312*8fb009dcSAndroid Build Coastguard Worker mul x15,x12,x6 313*8fb009dcSAndroid Build Coastguard Worker adcs x22,x22,x16 314*8fb009dcSAndroid Build Coastguard Worker mul x16,x13,x6 315*8fb009dcSAndroid Build Coastguard Worker adcs x23,x23,x17 316*8fb009dcSAndroid Build Coastguard Worker umulh x17,x7,x6 // hi(a[1..7]*a[0]) 317*8fb009dcSAndroid Build Coastguard Worker adcs x24,x24,x14 318*8fb009dcSAndroid Build Coastguard Worker umulh x14,x8,x6 319*8fb009dcSAndroid Build Coastguard Worker adcs x25,x25,x15 320*8fb009dcSAndroid Build Coastguard Worker umulh x15,x9,x6 321*8fb009dcSAndroid Build Coastguard Worker adcs x26,x26,x16 322*8fb009dcSAndroid Build Coastguard Worker umulh x16,x10,x6 323*8fb009dcSAndroid Build Coastguard Worker stp x19,x20,[x2],#8*2 // t[0..1] 324*8fb009dcSAndroid Build Coastguard Worker adc x19,xzr,xzr // t[8] 325*8fb009dcSAndroid Build Coastguard Worker adds x21,x21,x17 // t[2]+lo(a[1]*a[0]) 326*8fb009dcSAndroid Build Coastguard Worker umulh x17,x11,x6 327*8fb009dcSAndroid Build Coastguard Worker adcs x22,x22,x14 328*8fb009dcSAndroid Build Coastguard Worker umulh x14,x12,x6 329*8fb009dcSAndroid Build Coastguard Worker adcs x23,x23,x15 330*8fb009dcSAndroid Build Coastguard Worker umulh x15,x13,x6 331*8fb009dcSAndroid Build Coastguard Worker adcs x24,x24,x16 332*8fb009dcSAndroid Build Coastguard Worker mul x16,x8,x7 // lo(a[2..7]*a[1]) (ii) 333*8fb009dcSAndroid Build Coastguard Worker adcs x25,x25,x17 334*8fb009dcSAndroid Build Coastguard Worker mul x17,x9,x7 335*8fb009dcSAndroid Build Coastguard Worker adcs x26,x26,x14 336*8fb009dcSAndroid Build Coastguard Worker mul x14,x10,x7 337*8fb009dcSAndroid Build Coastguard Worker adc x19,x19,x15 338*8fb009dcSAndroid Build Coastguard Worker 339*8fb009dcSAndroid Build Coastguard Worker mul x15,x11,x7 340*8fb009dcSAndroid Build Coastguard Worker adds x22,x22,x16 341*8fb009dcSAndroid Build Coastguard Worker mul x16,x12,x7 342*8fb009dcSAndroid Build Coastguard Worker adcs x23,x23,x17 343*8fb009dcSAndroid Build Coastguard Worker mul x17,x13,x7 344*8fb009dcSAndroid Build Coastguard Worker adcs x24,x24,x14 345*8fb009dcSAndroid Build Coastguard Worker umulh x14,x8,x7 // hi(a[2..7]*a[1]) 346*8fb009dcSAndroid Build Coastguard Worker adcs x25,x25,x15 347*8fb009dcSAndroid Build Coastguard Worker umulh x15,x9,x7 348*8fb009dcSAndroid Build Coastguard Worker adcs x26,x26,x16 349*8fb009dcSAndroid Build Coastguard Worker umulh x16,x10,x7 350*8fb009dcSAndroid Build Coastguard Worker adcs x19,x19,x17 351*8fb009dcSAndroid Build Coastguard Worker umulh x17,x11,x7 352*8fb009dcSAndroid Build Coastguard Worker stp x21,x22,[x2],#8*2 // t[2..3] 353*8fb009dcSAndroid Build Coastguard Worker adc x20,xzr,xzr // t[9] 354*8fb009dcSAndroid Build Coastguard Worker adds x23,x23,x14 355*8fb009dcSAndroid Build Coastguard Worker umulh x14,x12,x7 356*8fb009dcSAndroid Build Coastguard Worker adcs x24,x24,x15 357*8fb009dcSAndroid Build Coastguard Worker umulh x15,x13,x7 358*8fb009dcSAndroid Build Coastguard Worker adcs x25,x25,x16 359*8fb009dcSAndroid Build Coastguard Worker mul x16,x9,x8 // lo(a[3..7]*a[2]) (iii) 360*8fb009dcSAndroid Build Coastguard Worker adcs x26,x26,x17 361*8fb009dcSAndroid Build Coastguard Worker mul x17,x10,x8 362*8fb009dcSAndroid Build Coastguard Worker adcs x19,x19,x14 363*8fb009dcSAndroid Build Coastguard Worker mul x14,x11,x8 364*8fb009dcSAndroid Build Coastguard Worker adc x20,x20,x15 365*8fb009dcSAndroid Build Coastguard Worker 366*8fb009dcSAndroid Build Coastguard Worker mul x15,x12,x8 367*8fb009dcSAndroid Build Coastguard Worker adds x24,x24,x16 368*8fb009dcSAndroid Build Coastguard Worker mul x16,x13,x8 369*8fb009dcSAndroid Build Coastguard Worker adcs x25,x25,x17 370*8fb009dcSAndroid Build Coastguard Worker umulh x17,x9,x8 // hi(a[3..7]*a[2]) 371*8fb009dcSAndroid Build Coastguard Worker adcs x26,x26,x14 372*8fb009dcSAndroid Build Coastguard Worker umulh x14,x10,x8 373*8fb009dcSAndroid Build Coastguard Worker adcs x19,x19,x15 374*8fb009dcSAndroid Build Coastguard Worker umulh x15,x11,x8 375*8fb009dcSAndroid Build Coastguard Worker adcs x20,x20,x16 376*8fb009dcSAndroid Build Coastguard Worker umulh x16,x12,x8 377*8fb009dcSAndroid Build Coastguard Worker stp x23,x24,[x2],#8*2 // t[4..5] 378*8fb009dcSAndroid Build Coastguard Worker adc x21,xzr,xzr // t[10] 379*8fb009dcSAndroid Build Coastguard Worker adds x25,x25,x17 380*8fb009dcSAndroid Build Coastguard Worker umulh x17,x13,x8 381*8fb009dcSAndroid Build Coastguard Worker adcs x26,x26,x14 382*8fb009dcSAndroid Build Coastguard Worker mul x14,x10,x9 // lo(a[4..7]*a[3]) (iv) 383*8fb009dcSAndroid Build Coastguard Worker adcs x19,x19,x15 384*8fb009dcSAndroid Build Coastguard Worker mul x15,x11,x9 385*8fb009dcSAndroid Build Coastguard Worker adcs x20,x20,x16 386*8fb009dcSAndroid Build Coastguard Worker mul x16,x12,x9 387*8fb009dcSAndroid Build Coastguard Worker adc x21,x21,x17 388*8fb009dcSAndroid Build Coastguard Worker 389*8fb009dcSAndroid Build Coastguard Worker mul x17,x13,x9 390*8fb009dcSAndroid Build Coastguard Worker adds x26,x26,x14 391*8fb009dcSAndroid Build Coastguard Worker umulh x14,x10,x9 // hi(a[4..7]*a[3]) 392*8fb009dcSAndroid Build Coastguard Worker adcs x19,x19,x15 393*8fb009dcSAndroid Build Coastguard Worker umulh x15,x11,x9 394*8fb009dcSAndroid Build Coastguard Worker adcs x20,x20,x16 395*8fb009dcSAndroid Build Coastguard Worker umulh x16,x12,x9 396*8fb009dcSAndroid Build Coastguard Worker adcs x21,x21,x17 397*8fb009dcSAndroid Build Coastguard Worker umulh x17,x13,x9 398*8fb009dcSAndroid Build Coastguard Worker stp x25,x26,[x2],#8*2 // t[6..7] 399*8fb009dcSAndroid Build Coastguard Worker adc x22,xzr,xzr // t[11] 400*8fb009dcSAndroid Build Coastguard Worker adds x19,x19,x14 401*8fb009dcSAndroid Build Coastguard Worker mul x14,x11,x10 // lo(a[5..7]*a[4]) (v) 402*8fb009dcSAndroid Build Coastguard Worker adcs x20,x20,x15 403*8fb009dcSAndroid Build Coastguard Worker mul x15,x12,x10 404*8fb009dcSAndroid Build Coastguard Worker adcs x21,x21,x16 405*8fb009dcSAndroid Build Coastguard Worker mul x16,x13,x10 406*8fb009dcSAndroid Build Coastguard Worker adc x22,x22,x17 407*8fb009dcSAndroid Build Coastguard Worker 408*8fb009dcSAndroid Build Coastguard Worker umulh x17,x11,x10 // hi(a[5..7]*a[4]) 409*8fb009dcSAndroid Build Coastguard Worker adds x20,x20,x14 410*8fb009dcSAndroid Build Coastguard Worker umulh x14,x12,x10 411*8fb009dcSAndroid Build Coastguard Worker adcs x21,x21,x15 412*8fb009dcSAndroid Build Coastguard Worker umulh x15,x13,x10 413*8fb009dcSAndroid Build Coastguard Worker adcs x22,x22,x16 414*8fb009dcSAndroid Build Coastguard Worker mul x16,x12,x11 // lo(a[6..7]*a[5]) (vi) 415*8fb009dcSAndroid Build Coastguard Worker adc x23,xzr,xzr // t[12] 416*8fb009dcSAndroid Build Coastguard Worker adds x21,x21,x17 417*8fb009dcSAndroid Build Coastguard Worker mul x17,x13,x11 418*8fb009dcSAndroid Build Coastguard Worker adcs x22,x22,x14 419*8fb009dcSAndroid Build Coastguard Worker umulh x14,x12,x11 // hi(a[6..7]*a[5]) 420*8fb009dcSAndroid Build Coastguard Worker adc x23,x23,x15 421*8fb009dcSAndroid Build Coastguard Worker 422*8fb009dcSAndroid Build Coastguard Worker umulh x15,x13,x11 423*8fb009dcSAndroid Build Coastguard Worker adds x22,x22,x16 424*8fb009dcSAndroid Build Coastguard Worker mul x16,x13,x12 // lo(a[7]*a[6]) (vii) 425*8fb009dcSAndroid Build Coastguard Worker adcs x23,x23,x17 426*8fb009dcSAndroid Build Coastguard Worker umulh x17,x13,x12 // hi(a[7]*a[6]) 427*8fb009dcSAndroid Build Coastguard Worker adc x24,xzr,xzr // t[13] 428*8fb009dcSAndroid Build Coastguard Worker adds x23,x23,x14 429*8fb009dcSAndroid Build Coastguard Worker sub x27,x3,x1 // done yet? 430*8fb009dcSAndroid Build Coastguard Worker adc x24,x24,x15 431*8fb009dcSAndroid Build Coastguard Worker 432*8fb009dcSAndroid Build Coastguard Worker adds x24,x24,x16 433*8fb009dcSAndroid Build Coastguard Worker sub x14,x3,x5 // rewinded ap 434*8fb009dcSAndroid Build Coastguard Worker adc x25,xzr,xzr // t[14] 435*8fb009dcSAndroid Build Coastguard Worker add x25,x25,x17 436*8fb009dcSAndroid Build Coastguard Worker 437*8fb009dcSAndroid Build Coastguard Worker cbz x27,.Lsqr8x_outer_break 438*8fb009dcSAndroid Build Coastguard Worker 439*8fb009dcSAndroid Build Coastguard Worker mov x4,x6 440*8fb009dcSAndroid Build Coastguard Worker ldp x6,x7,[x2,#8*0] 441*8fb009dcSAndroid Build Coastguard Worker ldp x8,x9,[x2,#8*2] 442*8fb009dcSAndroid Build Coastguard Worker ldp x10,x11,[x2,#8*4] 443*8fb009dcSAndroid Build Coastguard Worker ldp x12,x13,[x2,#8*6] 444*8fb009dcSAndroid Build Coastguard Worker adds x19,x19,x6 445*8fb009dcSAndroid Build Coastguard Worker adcs x20,x20,x7 446*8fb009dcSAndroid Build Coastguard Worker ldp x6,x7,[x1,#8*0] 447*8fb009dcSAndroid Build Coastguard Worker adcs x21,x21,x8 448*8fb009dcSAndroid Build Coastguard Worker adcs x22,x22,x9 449*8fb009dcSAndroid Build Coastguard Worker ldp x8,x9,[x1,#8*2] 450*8fb009dcSAndroid Build Coastguard Worker adcs x23,x23,x10 451*8fb009dcSAndroid Build Coastguard Worker adcs x24,x24,x11 452*8fb009dcSAndroid Build Coastguard Worker ldp x10,x11,[x1,#8*4] 453*8fb009dcSAndroid Build Coastguard Worker adcs x25,x25,x12 454*8fb009dcSAndroid Build Coastguard Worker mov x0,x1 455*8fb009dcSAndroid Build Coastguard Worker adcs x26,xzr,x13 456*8fb009dcSAndroid Build Coastguard Worker ldp x12,x13,[x1,#8*6] 457*8fb009dcSAndroid Build Coastguard Worker add x1,x1,#8*8 458*8fb009dcSAndroid Build Coastguard Worker //adc x28,xzr,xzr // moved below 459*8fb009dcSAndroid Build Coastguard Worker mov x27,#-8*8 460*8fb009dcSAndroid Build Coastguard Worker 461*8fb009dcSAndroid Build Coastguard Worker // a[8]a[0] 462*8fb009dcSAndroid Build Coastguard Worker // a[9]a[0] 463*8fb009dcSAndroid Build Coastguard Worker // a[a]a[0] 464*8fb009dcSAndroid Build Coastguard Worker // a[b]a[0] 465*8fb009dcSAndroid Build Coastguard Worker // a[c]a[0] 466*8fb009dcSAndroid Build Coastguard Worker // a[d]a[0] 467*8fb009dcSAndroid Build Coastguard Worker // a[e]a[0] 468*8fb009dcSAndroid Build Coastguard Worker // a[f]a[0] 469*8fb009dcSAndroid Build Coastguard Worker // a[8]a[1] 470*8fb009dcSAndroid Build Coastguard Worker // a[f]a[1]........................ 471*8fb009dcSAndroid Build Coastguard Worker // a[8]a[2] 472*8fb009dcSAndroid Build Coastguard Worker // a[f]a[2]........................ 473*8fb009dcSAndroid Build Coastguard Worker // a[8]a[3] 474*8fb009dcSAndroid Build Coastguard Worker // a[f]a[3]........................ 475*8fb009dcSAndroid Build Coastguard Worker // a[8]a[4] 476*8fb009dcSAndroid Build Coastguard Worker // a[f]a[4]........................ 477*8fb009dcSAndroid Build Coastguard Worker // a[8]a[5] 478*8fb009dcSAndroid Build Coastguard Worker // a[f]a[5]........................ 479*8fb009dcSAndroid Build Coastguard Worker // a[8]a[6] 480*8fb009dcSAndroid Build Coastguard Worker // a[f]a[6]........................ 481*8fb009dcSAndroid Build Coastguard Worker // a[8]a[7] 482*8fb009dcSAndroid Build Coastguard Worker // a[f]a[7]........................ 483*8fb009dcSAndroid Build Coastguard Worker.Lsqr8x_mul: 484*8fb009dcSAndroid Build Coastguard Worker mul x14,x6,x4 485*8fb009dcSAndroid Build Coastguard Worker adc x28,xzr,xzr // carry bit, modulo-scheduled 486*8fb009dcSAndroid Build Coastguard Worker mul x15,x7,x4 487*8fb009dcSAndroid Build Coastguard Worker add x27,x27,#8 488*8fb009dcSAndroid Build Coastguard Worker mul x16,x8,x4 489*8fb009dcSAndroid Build Coastguard Worker mul x17,x9,x4 490*8fb009dcSAndroid Build Coastguard Worker adds x19,x19,x14 491*8fb009dcSAndroid Build Coastguard Worker mul x14,x10,x4 492*8fb009dcSAndroid Build Coastguard Worker adcs x20,x20,x15 493*8fb009dcSAndroid Build Coastguard Worker mul x15,x11,x4 494*8fb009dcSAndroid Build Coastguard Worker adcs x21,x21,x16 495*8fb009dcSAndroid Build Coastguard Worker mul x16,x12,x4 496*8fb009dcSAndroid Build Coastguard Worker adcs x22,x22,x17 497*8fb009dcSAndroid Build Coastguard Worker mul x17,x13,x4 498*8fb009dcSAndroid Build Coastguard Worker adcs x23,x23,x14 499*8fb009dcSAndroid Build Coastguard Worker umulh x14,x6,x4 500*8fb009dcSAndroid Build Coastguard Worker adcs x24,x24,x15 501*8fb009dcSAndroid Build Coastguard Worker umulh x15,x7,x4 502*8fb009dcSAndroid Build Coastguard Worker adcs x25,x25,x16 503*8fb009dcSAndroid Build Coastguard Worker umulh x16,x8,x4 504*8fb009dcSAndroid Build Coastguard Worker adcs x26,x26,x17 505*8fb009dcSAndroid Build Coastguard Worker umulh x17,x9,x4 506*8fb009dcSAndroid Build Coastguard Worker adc x28,x28,xzr 507*8fb009dcSAndroid Build Coastguard Worker str x19,[x2],#8 508*8fb009dcSAndroid Build Coastguard Worker adds x19,x20,x14 509*8fb009dcSAndroid Build Coastguard Worker umulh x14,x10,x4 510*8fb009dcSAndroid Build Coastguard Worker adcs x20,x21,x15 511*8fb009dcSAndroid Build Coastguard Worker umulh x15,x11,x4 512*8fb009dcSAndroid Build Coastguard Worker adcs x21,x22,x16 513*8fb009dcSAndroid Build Coastguard Worker umulh x16,x12,x4 514*8fb009dcSAndroid Build Coastguard Worker adcs x22,x23,x17 515*8fb009dcSAndroid Build Coastguard Worker umulh x17,x13,x4 516*8fb009dcSAndroid Build Coastguard Worker ldr x4,[x0,x27] 517*8fb009dcSAndroid Build Coastguard Worker adcs x23,x24,x14 518*8fb009dcSAndroid Build Coastguard Worker adcs x24,x25,x15 519*8fb009dcSAndroid Build Coastguard Worker adcs x25,x26,x16 520*8fb009dcSAndroid Build Coastguard Worker adcs x26,x28,x17 521*8fb009dcSAndroid Build Coastguard Worker //adc x28,xzr,xzr // moved above 522*8fb009dcSAndroid Build Coastguard Worker cbnz x27,.Lsqr8x_mul 523*8fb009dcSAndroid Build Coastguard Worker // note that carry flag is guaranteed 524*8fb009dcSAndroid Build Coastguard Worker // to be zero at this point 525*8fb009dcSAndroid Build Coastguard Worker cmp x1,x3 // done yet? 526*8fb009dcSAndroid Build Coastguard Worker b.eq .Lsqr8x_break 527*8fb009dcSAndroid Build Coastguard Worker 528*8fb009dcSAndroid Build Coastguard Worker ldp x6,x7,[x2,#8*0] 529*8fb009dcSAndroid Build Coastguard Worker ldp x8,x9,[x2,#8*2] 530*8fb009dcSAndroid Build Coastguard Worker ldp x10,x11,[x2,#8*4] 531*8fb009dcSAndroid Build Coastguard Worker ldp x12,x13,[x2,#8*6] 532*8fb009dcSAndroid Build Coastguard Worker adds x19,x19,x6 533*8fb009dcSAndroid Build Coastguard Worker ldr x4,[x0,#-8*8] 534*8fb009dcSAndroid Build Coastguard Worker adcs x20,x20,x7 535*8fb009dcSAndroid Build Coastguard Worker ldp x6,x7,[x1,#8*0] 536*8fb009dcSAndroid Build Coastguard Worker adcs x21,x21,x8 537*8fb009dcSAndroid Build Coastguard Worker adcs x22,x22,x9 538*8fb009dcSAndroid Build Coastguard Worker ldp x8,x9,[x1,#8*2] 539*8fb009dcSAndroid Build Coastguard Worker adcs x23,x23,x10 540*8fb009dcSAndroid Build Coastguard Worker adcs x24,x24,x11 541*8fb009dcSAndroid Build Coastguard Worker ldp x10,x11,[x1,#8*4] 542*8fb009dcSAndroid Build Coastguard Worker adcs x25,x25,x12 543*8fb009dcSAndroid Build Coastguard Worker mov x27,#-8*8 544*8fb009dcSAndroid Build Coastguard Worker adcs x26,x26,x13 545*8fb009dcSAndroid Build Coastguard Worker ldp x12,x13,[x1,#8*6] 546*8fb009dcSAndroid Build Coastguard Worker add x1,x1,#8*8 547*8fb009dcSAndroid Build Coastguard Worker //adc x28,xzr,xzr // moved above 548*8fb009dcSAndroid Build Coastguard Worker b .Lsqr8x_mul 549*8fb009dcSAndroid Build Coastguard Worker 550*8fb009dcSAndroid Build Coastguard Worker.align 4 551*8fb009dcSAndroid Build Coastguard Worker.Lsqr8x_break: 552*8fb009dcSAndroid Build Coastguard Worker ldp x6,x7,[x0,#8*0] 553*8fb009dcSAndroid Build Coastguard Worker add x1,x0,#8*8 554*8fb009dcSAndroid Build Coastguard Worker ldp x8,x9,[x0,#8*2] 555*8fb009dcSAndroid Build Coastguard Worker sub x14,x3,x1 // is it last iteration? 556*8fb009dcSAndroid Build Coastguard Worker ldp x10,x11,[x0,#8*4] 557*8fb009dcSAndroid Build Coastguard Worker sub x15,x2,x14 558*8fb009dcSAndroid Build Coastguard Worker ldp x12,x13,[x0,#8*6] 559*8fb009dcSAndroid Build Coastguard Worker cbz x14,.Lsqr8x_outer_loop 560*8fb009dcSAndroid Build Coastguard Worker 561*8fb009dcSAndroid Build Coastguard Worker stp x19,x20,[x2,#8*0] 562*8fb009dcSAndroid Build Coastguard Worker ldp x19,x20,[x15,#8*0] 563*8fb009dcSAndroid Build Coastguard Worker stp x21,x22,[x2,#8*2] 564*8fb009dcSAndroid Build Coastguard Worker ldp x21,x22,[x15,#8*2] 565*8fb009dcSAndroid Build Coastguard Worker stp x23,x24,[x2,#8*4] 566*8fb009dcSAndroid Build Coastguard Worker ldp x23,x24,[x15,#8*4] 567*8fb009dcSAndroid Build Coastguard Worker stp x25,x26,[x2,#8*6] 568*8fb009dcSAndroid Build Coastguard Worker mov x2,x15 569*8fb009dcSAndroid Build Coastguard Worker ldp x25,x26,[x15,#8*6] 570*8fb009dcSAndroid Build Coastguard Worker b .Lsqr8x_outer_loop 571*8fb009dcSAndroid Build Coastguard Worker 572*8fb009dcSAndroid Build Coastguard Worker.align 4 573*8fb009dcSAndroid Build Coastguard Worker.Lsqr8x_outer_break: 574*8fb009dcSAndroid Build Coastguard Worker // Now multiply above result by 2 and add a[n-1]*a[n-1]|...|a[0]*a[0] 575*8fb009dcSAndroid Build Coastguard Worker ldp x7,x9,[x14,#8*0] // recall that x14 is &a[0] 576*8fb009dcSAndroid Build Coastguard Worker ldp x15,x16,[sp,#8*1] 577*8fb009dcSAndroid Build Coastguard Worker ldp x11,x13,[x14,#8*2] 578*8fb009dcSAndroid Build Coastguard Worker add x1,x14,#8*4 579*8fb009dcSAndroid Build Coastguard Worker ldp x17,x14,[sp,#8*3] 580*8fb009dcSAndroid Build Coastguard Worker 581*8fb009dcSAndroid Build Coastguard Worker stp x19,x20,[x2,#8*0] 582*8fb009dcSAndroid Build Coastguard Worker mul x19,x7,x7 583*8fb009dcSAndroid Build Coastguard Worker stp x21,x22,[x2,#8*2] 584*8fb009dcSAndroid Build Coastguard Worker umulh x7,x7,x7 585*8fb009dcSAndroid Build Coastguard Worker stp x23,x24,[x2,#8*4] 586*8fb009dcSAndroid Build Coastguard Worker mul x8,x9,x9 587*8fb009dcSAndroid Build Coastguard Worker stp x25,x26,[x2,#8*6] 588*8fb009dcSAndroid Build Coastguard Worker mov x2,sp 589*8fb009dcSAndroid Build Coastguard Worker umulh x9,x9,x9 590*8fb009dcSAndroid Build Coastguard Worker adds x20,x7,x15,lsl#1 591*8fb009dcSAndroid Build Coastguard Worker extr x15,x16,x15,#63 592*8fb009dcSAndroid Build Coastguard Worker sub x27,x5,#8*4 593*8fb009dcSAndroid Build Coastguard Worker 594*8fb009dcSAndroid Build Coastguard Worker.Lsqr4x_shift_n_add: 595*8fb009dcSAndroid Build Coastguard Worker adcs x21,x8,x15 596*8fb009dcSAndroid Build Coastguard Worker extr x16,x17,x16,#63 597*8fb009dcSAndroid Build Coastguard Worker sub x27,x27,#8*4 598*8fb009dcSAndroid Build Coastguard Worker adcs x22,x9,x16 599*8fb009dcSAndroid Build Coastguard Worker ldp x15,x16,[x2,#8*5] 600*8fb009dcSAndroid Build Coastguard Worker mul x10,x11,x11 601*8fb009dcSAndroid Build Coastguard Worker ldp x7,x9,[x1],#8*2 602*8fb009dcSAndroid Build Coastguard Worker umulh x11,x11,x11 603*8fb009dcSAndroid Build Coastguard Worker mul x12,x13,x13 604*8fb009dcSAndroid Build Coastguard Worker umulh x13,x13,x13 605*8fb009dcSAndroid Build Coastguard Worker extr x17,x14,x17,#63 606*8fb009dcSAndroid Build Coastguard Worker stp x19,x20,[x2,#8*0] 607*8fb009dcSAndroid Build Coastguard Worker adcs x23,x10,x17 608*8fb009dcSAndroid Build Coastguard Worker extr x14,x15,x14,#63 609*8fb009dcSAndroid Build Coastguard Worker stp x21,x22,[x2,#8*2] 610*8fb009dcSAndroid Build Coastguard Worker adcs x24,x11,x14 611*8fb009dcSAndroid Build Coastguard Worker ldp x17,x14,[x2,#8*7] 612*8fb009dcSAndroid Build Coastguard Worker extr x15,x16,x15,#63 613*8fb009dcSAndroid Build Coastguard Worker adcs x25,x12,x15 614*8fb009dcSAndroid Build Coastguard Worker extr x16,x17,x16,#63 615*8fb009dcSAndroid Build Coastguard Worker adcs x26,x13,x16 616*8fb009dcSAndroid Build Coastguard Worker ldp x15,x16,[x2,#8*9] 617*8fb009dcSAndroid Build Coastguard Worker mul x6,x7,x7 618*8fb009dcSAndroid Build Coastguard Worker ldp x11,x13,[x1],#8*2 619*8fb009dcSAndroid Build Coastguard Worker umulh x7,x7,x7 620*8fb009dcSAndroid Build Coastguard Worker mul x8,x9,x9 621*8fb009dcSAndroid Build Coastguard Worker umulh x9,x9,x9 622*8fb009dcSAndroid Build Coastguard Worker stp x23,x24,[x2,#8*4] 623*8fb009dcSAndroid Build Coastguard Worker extr x17,x14,x17,#63 624*8fb009dcSAndroid Build Coastguard Worker stp x25,x26,[x2,#8*6] 625*8fb009dcSAndroid Build Coastguard Worker add x2,x2,#8*8 626*8fb009dcSAndroid Build Coastguard Worker adcs x19,x6,x17 627*8fb009dcSAndroid Build Coastguard Worker extr x14,x15,x14,#63 628*8fb009dcSAndroid Build Coastguard Worker adcs x20,x7,x14 629*8fb009dcSAndroid Build Coastguard Worker ldp x17,x14,[x2,#8*3] 630*8fb009dcSAndroid Build Coastguard Worker extr x15,x16,x15,#63 631*8fb009dcSAndroid Build Coastguard Worker cbnz x27,.Lsqr4x_shift_n_add 632*8fb009dcSAndroid Build Coastguard Worker ldp x1,x4,[x29,#104] // pull np and n0 633*8fb009dcSAndroid Build Coastguard Worker 634*8fb009dcSAndroid Build Coastguard Worker adcs x21,x8,x15 635*8fb009dcSAndroid Build Coastguard Worker extr x16,x17,x16,#63 636*8fb009dcSAndroid Build Coastguard Worker adcs x22,x9,x16 637*8fb009dcSAndroid Build Coastguard Worker ldp x15,x16,[x2,#8*5] 638*8fb009dcSAndroid Build Coastguard Worker mul x10,x11,x11 639*8fb009dcSAndroid Build Coastguard Worker umulh x11,x11,x11 640*8fb009dcSAndroid Build Coastguard Worker stp x19,x20,[x2,#8*0] 641*8fb009dcSAndroid Build Coastguard Worker mul x12,x13,x13 642*8fb009dcSAndroid Build Coastguard Worker umulh x13,x13,x13 643*8fb009dcSAndroid Build Coastguard Worker stp x21,x22,[x2,#8*2] 644*8fb009dcSAndroid Build Coastguard Worker extr x17,x14,x17,#63 645*8fb009dcSAndroid Build Coastguard Worker adcs x23,x10,x17 646*8fb009dcSAndroid Build Coastguard Worker extr x14,x15,x14,#63 647*8fb009dcSAndroid Build Coastguard Worker ldp x19,x20,[sp,#8*0] 648*8fb009dcSAndroid Build Coastguard Worker adcs x24,x11,x14 649*8fb009dcSAndroid Build Coastguard Worker extr x15,x16,x15,#63 650*8fb009dcSAndroid Build Coastguard Worker ldp x6,x7,[x1,#8*0] 651*8fb009dcSAndroid Build Coastguard Worker adcs x25,x12,x15 652*8fb009dcSAndroid Build Coastguard Worker extr x16,xzr,x16,#63 653*8fb009dcSAndroid Build Coastguard Worker ldp x8,x9,[x1,#8*2] 654*8fb009dcSAndroid Build Coastguard Worker adc x26,x13,x16 655*8fb009dcSAndroid Build Coastguard Worker ldp x10,x11,[x1,#8*4] 656*8fb009dcSAndroid Build Coastguard Worker 657*8fb009dcSAndroid Build Coastguard Worker // Reduce by 512 bits per iteration 658*8fb009dcSAndroid Build Coastguard Worker mul x28,x4,x19 // t[0]*n0 659*8fb009dcSAndroid Build Coastguard Worker ldp x12,x13,[x1,#8*6] 660*8fb009dcSAndroid Build Coastguard Worker add x3,x1,x5 661*8fb009dcSAndroid Build Coastguard Worker ldp x21,x22,[sp,#8*2] 662*8fb009dcSAndroid Build Coastguard Worker stp x23,x24,[x2,#8*4] 663*8fb009dcSAndroid Build Coastguard Worker ldp x23,x24,[sp,#8*4] 664*8fb009dcSAndroid Build Coastguard Worker stp x25,x26,[x2,#8*6] 665*8fb009dcSAndroid Build Coastguard Worker ldp x25,x26,[sp,#8*6] 666*8fb009dcSAndroid Build Coastguard Worker add x1,x1,#8*8 667*8fb009dcSAndroid Build Coastguard Worker mov x30,xzr // initial top-most carry 668*8fb009dcSAndroid Build Coastguard Worker mov x2,sp 669*8fb009dcSAndroid Build Coastguard Worker mov x27,#8 670*8fb009dcSAndroid Build Coastguard Worker 671*8fb009dcSAndroid Build Coastguard Worker.Lsqr8x_reduction: 672*8fb009dcSAndroid Build Coastguard Worker // (*) mul x14,x6,x28 // lo(n[0-7])*lo(t[0]*n0) 673*8fb009dcSAndroid Build Coastguard Worker mul x15,x7,x28 674*8fb009dcSAndroid Build Coastguard Worker sub x27,x27,#1 675*8fb009dcSAndroid Build Coastguard Worker mul x16,x8,x28 676*8fb009dcSAndroid Build Coastguard Worker str x28,[x2],#8 // put aside t[0]*n0 for tail processing 677*8fb009dcSAndroid Build Coastguard Worker mul x17,x9,x28 678*8fb009dcSAndroid Build Coastguard Worker // (*) adds xzr,x19,x14 679*8fb009dcSAndroid Build Coastguard Worker subs xzr,x19,#1 // (*) 680*8fb009dcSAndroid Build Coastguard Worker mul x14,x10,x28 681*8fb009dcSAndroid Build Coastguard Worker adcs x19,x20,x15 682*8fb009dcSAndroid Build Coastguard Worker mul x15,x11,x28 683*8fb009dcSAndroid Build Coastguard Worker adcs x20,x21,x16 684*8fb009dcSAndroid Build Coastguard Worker mul x16,x12,x28 685*8fb009dcSAndroid Build Coastguard Worker adcs x21,x22,x17 686*8fb009dcSAndroid Build Coastguard Worker mul x17,x13,x28 687*8fb009dcSAndroid Build Coastguard Worker adcs x22,x23,x14 688*8fb009dcSAndroid Build Coastguard Worker umulh x14,x6,x28 // hi(n[0-7])*lo(t[0]*n0) 689*8fb009dcSAndroid Build Coastguard Worker adcs x23,x24,x15 690*8fb009dcSAndroid Build Coastguard Worker umulh x15,x7,x28 691*8fb009dcSAndroid Build Coastguard Worker adcs x24,x25,x16 692*8fb009dcSAndroid Build Coastguard Worker umulh x16,x8,x28 693*8fb009dcSAndroid Build Coastguard Worker adcs x25,x26,x17 694*8fb009dcSAndroid Build Coastguard Worker umulh x17,x9,x28 695*8fb009dcSAndroid Build Coastguard Worker adc x26,xzr,xzr 696*8fb009dcSAndroid Build Coastguard Worker adds x19,x19,x14 697*8fb009dcSAndroid Build Coastguard Worker umulh x14,x10,x28 698*8fb009dcSAndroid Build Coastguard Worker adcs x20,x20,x15 699*8fb009dcSAndroid Build Coastguard Worker umulh x15,x11,x28 700*8fb009dcSAndroid Build Coastguard Worker adcs x21,x21,x16 701*8fb009dcSAndroid Build Coastguard Worker umulh x16,x12,x28 702*8fb009dcSAndroid Build Coastguard Worker adcs x22,x22,x17 703*8fb009dcSAndroid Build Coastguard Worker umulh x17,x13,x28 704*8fb009dcSAndroid Build Coastguard Worker mul x28,x4,x19 // next t[0]*n0 705*8fb009dcSAndroid Build Coastguard Worker adcs x23,x23,x14 706*8fb009dcSAndroid Build Coastguard Worker adcs x24,x24,x15 707*8fb009dcSAndroid Build Coastguard Worker adcs x25,x25,x16 708*8fb009dcSAndroid Build Coastguard Worker adc x26,x26,x17 709*8fb009dcSAndroid Build Coastguard Worker cbnz x27,.Lsqr8x_reduction 710*8fb009dcSAndroid Build Coastguard Worker 711*8fb009dcSAndroid Build Coastguard Worker ldp x14,x15,[x2,#8*0] 712*8fb009dcSAndroid Build Coastguard Worker ldp x16,x17,[x2,#8*2] 713*8fb009dcSAndroid Build Coastguard Worker mov x0,x2 714*8fb009dcSAndroid Build Coastguard Worker sub x27,x3,x1 // done yet? 715*8fb009dcSAndroid Build Coastguard Worker adds x19,x19,x14 716*8fb009dcSAndroid Build Coastguard Worker adcs x20,x20,x15 717*8fb009dcSAndroid Build Coastguard Worker ldp x14,x15,[x2,#8*4] 718*8fb009dcSAndroid Build Coastguard Worker adcs x21,x21,x16 719*8fb009dcSAndroid Build Coastguard Worker adcs x22,x22,x17 720*8fb009dcSAndroid Build Coastguard Worker ldp x16,x17,[x2,#8*6] 721*8fb009dcSAndroid Build Coastguard Worker adcs x23,x23,x14 722*8fb009dcSAndroid Build Coastguard Worker adcs x24,x24,x15 723*8fb009dcSAndroid Build Coastguard Worker adcs x25,x25,x16 724*8fb009dcSAndroid Build Coastguard Worker adcs x26,x26,x17 725*8fb009dcSAndroid Build Coastguard Worker //adc x28,xzr,xzr // moved below 726*8fb009dcSAndroid Build Coastguard Worker cbz x27,.Lsqr8x8_post_condition 727*8fb009dcSAndroid Build Coastguard Worker 728*8fb009dcSAndroid Build Coastguard Worker ldr x4,[x2,#-8*8] 729*8fb009dcSAndroid Build Coastguard Worker ldp x6,x7,[x1,#8*0] 730*8fb009dcSAndroid Build Coastguard Worker ldp x8,x9,[x1,#8*2] 731*8fb009dcSAndroid Build Coastguard Worker ldp x10,x11,[x1,#8*4] 732*8fb009dcSAndroid Build Coastguard Worker mov x27,#-8*8 733*8fb009dcSAndroid Build Coastguard Worker ldp x12,x13,[x1,#8*6] 734*8fb009dcSAndroid Build Coastguard Worker add x1,x1,#8*8 735*8fb009dcSAndroid Build Coastguard Worker 736*8fb009dcSAndroid Build Coastguard Worker.Lsqr8x_tail: 737*8fb009dcSAndroid Build Coastguard Worker mul x14,x6,x4 738*8fb009dcSAndroid Build Coastguard Worker adc x28,xzr,xzr // carry bit, modulo-scheduled 739*8fb009dcSAndroid Build Coastguard Worker mul x15,x7,x4 740*8fb009dcSAndroid Build Coastguard Worker add x27,x27,#8 741*8fb009dcSAndroid Build Coastguard Worker mul x16,x8,x4 742*8fb009dcSAndroid Build Coastguard Worker mul x17,x9,x4 743*8fb009dcSAndroid Build Coastguard Worker adds x19,x19,x14 744*8fb009dcSAndroid Build Coastguard Worker mul x14,x10,x4 745*8fb009dcSAndroid Build Coastguard Worker adcs x20,x20,x15 746*8fb009dcSAndroid Build Coastguard Worker mul x15,x11,x4 747*8fb009dcSAndroid Build Coastguard Worker adcs x21,x21,x16 748*8fb009dcSAndroid Build Coastguard Worker mul x16,x12,x4 749*8fb009dcSAndroid Build Coastguard Worker adcs x22,x22,x17 750*8fb009dcSAndroid Build Coastguard Worker mul x17,x13,x4 751*8fb009dcSAndroid Build Coastguard Worker adcs x23,x23,x14 752*8fb009dcSAndroid Build Coastguard Worker umulh x14,x6,x4 753*8fb009dcSAndroid Build Coastguard Worker adcs x24,x24,x15 754*8fb009dcSAndroid Build Coastguard Worker umulh x15,x7,x4 755*8fb009dcSAndroid Build Coastguard Worker adcs x25,x25,x16 756*8fb009dcSAndroid Build Coastguard Worker umulh x16,x8,x4 757*8fb009dcSAndroid Build Coastguard Worker adcs x26,x26,x17 758*8fb009dcSAndroid Build Coastguard Worker umulh x17,x9,x4 759*8fb009dcSAndroid Build Coastguard Worker adc x28,x28,xzr 760*8fb009dcSAndroid Build Coastguard Worker str x19,[x2],#8 761*8fb009dcSAndroid Build Coastguard Worker adds x19,x20,x14 762*8fb009dcSAndroid Build Coastguard Worker umulh x14,x10,x4 763*8fb009dcSAndroid Build Coastguard Worker adcs x20,x21,x15 764*8fb009dcSAndroid Build Coastguard Worker umulh x15,x11,x4 765*8fb009dcSAndroid Build Coastguard Worker adcs x21,x22,x16 766*8fb009dcSAndroid Build Coastguard Worker umulh x16,x12,x4 767*8fb009dcSAndroid Build Coastguard Worker adcs x22,x23,x17 768*8fb009dcSAndroid Build Coastguard Worker umulh x17,x13,x4 769*8fb009dcSAndroid Build Coastguard Worker ldr x4,[x0,x27] 770*8fb009dcSAndroid Build Coastguard Worker adcs x23,x24,x14 771*8fb009dcSAndroid Build Coastguard Worker adcs x24,x25,x15 772*8fb009dcSAndroid Build Coastguard Worker adcs x25,x26,x16 773*8fb009dcSAndroid Build Coastguard Worker adcs x26,x28,x17 774*8fb009dcSAndroid Build Coastguard Worker //adc x28,xzr,xzr // moved above 775*8fb009dcSAndroid Build Coastguard Worker cbnz x27,.Lsqr8x_tail 776*8fb009dcSAndroid Build Coastguard Worker // note that carry flag is guaranteed 777*8fb009dcSAndroid Build Coastguard Worker // to be zero at this point 778*8fb009dcSAndroid Build Coastguard Worker ldp x6,x7,[x2,#8*0] 779*8fb009dcSAndroid Build Coastguard Worker sub x27,x3,x1 // done yet? 780*8fb009dcSAndroid Build Coastguard Worker sub x16,x3,x5 // rewinded np 781*8fb009dcSAndroid Build Coastguard Worker ldp x8,x9,[x2,#8*2] 782*8fb009dcSAndroid Build Coastguard Worker ldp x10,x11,[x2,#8*4] 783*8fb009dcSAndroid Build Coastguard Worker ldp x12,x13,[x2,#8*6] 784*8fb009dcSAndroid Build Coastguard Worker cbz x27,.Lsqr8x_tail_break 785*8fb009dcSAndroid Build Coastguard Worker 786*8fb009dcSAndroid Build Coastguard Worker ldr x4,[x0,#-8*8] 787*8fb009dcSAndroid Build Coastguard Worker adds x19,x19,x6 788*8fb009dcSAndroid Build Coastguard Worker adcs x20,x20,x7 789*8fb009dcSAndroid Build Coastguard Worker ldp x6,x7,[x1,#8*0] 790*8fb009dcSAndroid Build Coastguard Worker adcs x21,x21,x8 791*8fb009dcSAndroid Build Coastguard Worker adcs x22,x22,x9 792*8fb009dcSAndroid Build Coastguard Worker ldp x8,x9,[x1,#8*2] 793*8fb009dcSAndroid Build Coastguard Worker adcs x23,x23,x10 794*8fb009dcSAndroid Build Coastguard Worker adcs x24,x24,x11 795*8fb009dcSAndroid Build Coastguard Worker ldp x10,x11,[x1,#8*4] 796*8fb009dcSAndroid Build Coastguard Worker adcs x25,x25,x12 797*8fb009dcSAndroid Build Coastguard Worker mov x27,#-8*8 798*8fb009dcSAndroid Build Coastguard Worker adcs x26,x26,x13 799*8fb009dcSAndroid Build Coastguard Worker ldp x12,x13,[x1,#8*6] 800*8fb009dcSAndroid Build Coastguard Worker add x1,x1,#8*8 801*8fb009dcSAndroid Build Coastguard Worker //adc x28,xzr,xzr // moved above 802*8fb009dcSAndroid Build Coastguard Worker b .Lsqr8x_tail 803*8fb009dcSAndroid Build Coastguard Worker 804*8fb009dcSAndroid Build Coastguard Worker.align 4 805*8fb009dcSAndroid Build Coastguard Worker.Lsqr8x_tail_break: 806*8fb009dcSAndroid Build Coastguard Worker ldr x4,[x29,#112] // pull n0 807*8fb009dcSAndroid Build Coastguard Worker add x27,x2,#8*8 // end of current t[num] window 808*8fb009dcSAndroid Build Coastguard Worker 809*8fb009dcSAndroid Build Coastguard Worker subs xzr,x30,#1 // "move" top-most carry to carry bit 810*8fb009dcSAndroid Build Coastguard Worker adcs x14,x19,x6 811*8fb009dcSAndroid Build Coastguard Worker adcs x15,x20,x7 812*8fb009dcSAndroid Build Coastguard Worker ldp x19,x20,[x0,#8*0] 813*8fb009dcSAndroid Build Coastguard Worker adcs x21,x21,x8 814*8fb009dcSAndroid Build Coastguard Worker ldp x6,x7,[x16,#8*0] // recall that x16 is &n[0] 815*8fb009dcSAndroid Build Coastguard Worker adcs x22,x22,x9 816*8fb009dcSAndroid Build Coastguard Worker ldp x8,x9,[x16,#8*2] 817*8fb009dcSAndroid Build Coastguard Worker adcs x23,x23,x10 818*8fb009dcSAndroid Build Coastguard Worker adcs x24,x24,x11 819*8fb009dcSAndroid Build Coastguard Worker ldp x10,x11,[x16,#8*4] 820*8fb009dcSAndroid Build Coastguard Worker adcs x25,x25,x12 821*8fb009dcSAndroid Build Coastguard Worker adcs x26,x26,x13 822*8fb009dcSAndroid Build Coastguard Worker ldp x12,x13,[x16,#8*6] 823*8fb009dcSAndroid Build Coastguard Worker add x1,x16,#8*8 824*8fb009dcSAndroid Build Coastguard Worker adc x30,xzr,xzr // top-most carry 825*8fb009dcSAndroid Build Coastguard Worker mul x28,x4,x19 826*8fb009dcSAndroid Build Coastguard Worker stp x14,x15,[x2,#8*0] 827*8fb009dcSAndroid Build Coastguard Worker stp x21,x22,[x2,#8*2] 828*8fb009dcSAndroid Build Coastguard Worker ldp x21,x22,[x0,#8*2] 829*8fb009dcSAndroid Build Coastguard Worker stp x23,x24,[x2,#8*4] 830*8fb009dcSAndroid Build Coastguard Worker ldp x23,x24,[x0,#8*4] 831*8fb009dcSAndroid Build Coastguard Worker cmp x27,x29 // did we hit the bottom? 832*8fb009dcSAndroid Build Coastguard Worker stp x25,x26,[x2,#8*6] 833*8fb009dcSAndroid Build Coastguard Worker mov x2,x0 // slide the window 834*8fb009dcSAndroid Build Coastguard Worker ldp x25,x26,[x0,#8*6] 835*8fb009dcSAndroid Build Coastguard Worker mov x27,#8 836*8fb009dcSAndroid Build Coastguard Worker b.ne .Lsqr8x_reduction 837*8fb009dcSAndroid Build Coastguard Worker 838*8fb009dcSAndroid Build Coastguard Worker // Final step. We see if result is larger than modulus, and 839*8fb009dcSAndroid Build Coastguard Worker // if it is, subtract the modulus. But comparison implies 840*8fb009dcSAndroid Build Coastguard Worker // subtraction. So we subtract modulus, see if it borrowed, 841*8fb009dcSAndroid Build Coastguard Worker // and conditionally copy original value. 842*8fb009dcSAndroid Build Coastguard Worker ldr x0,[x29,#96] // pull rp 843*8fb009dcSAndroid Build Coastguard Worker add x2,x2,#8*8 844*8fb009dcSAndroid Build Coastguard Worker subs x14,x19,x6 845*8fb009dcSAndroid Build Coastguard Worker sbcs x15,x20,x7 846*8fb009dcSAndroid Build Coastguard Worker sub x27,x5,#8*8 847*8fb009dcSAndroid Build Coastguard Worker mov x3,x0 // x0 copy 848*8fb009dcSAndroid Build Coastguard Worker 849*8fb009dcSAndroid Build Coastguard Worker.Lsqr8x_sub: 850*8fb009dcSAndroid Build Coastguard Worker sbcs x16,x21,x8 851*8fb009dcSAndroid Build Coastguard Worker ldp x6,x7,[x1,#8*0] 852*8fb009dcSAndroid Build Coastguard Worker sbcs x17,x22,x9 853*8fb009dcSAndroid Build Coastguard Worker stp x14,x15,[x0,#8*0] 854*8fb009dcSAndroid Build Coastguard Worker sbcs x14,x23,x10 855*8fb009dcSAndroid Build Coastguard Worker ldp x8,x9,[x1,#8*2] 856*8fb009dcSAndroid Build Coastguard Worker sbcs x15,x24,x11 857*8fb009dcSAndroid Build Coastguard Worker stp x16,x17,[x0,#8*2] 858*8fb009dcSAndroid Build Coastguard Worker sbcs x16,x25,x12 859*8fb009dcSAndroid Build Coastguard Worker ldp x10,x11,[x1,#8*4] 860*8fb009dcSAndroid Build Coastguard Worker sbcs x17,x26,x13 861*8fb009dcSAndroid Build Coastguard Worker ldp x12,x13,[x1,#8*6] 862*8fb009dcSAndroid Build Coastguard Worker add x1,x1,#8*8 863*8fb009dcSAndroid Build Coastguard Worker ldp x19,x20,[x2,#8*0] 864*8fb009dcSAndroid Build Coastguard Worker sub x27,x27,#8*8 865*8fb009dcSAndroid Build Coastguard Worker ldp x21,x22,[x2,#8*2] 866*8fb009dcSAndroid Build Coastguard Worker ldp x23,x24,[x2,#8*4] 867*8fb009dcSAndroid Build Coastguard Worker ldp x25,x26,[x2,#8*6] 868*8fb009dcSAndroid Build Coastguard Worker add x2,x2,#8*8 869*8fb009dcSAndroid Build Coastguard Worker stp x14,x15,[x0,#8*4] 870*8fb009dcSAndroid Build Coastguard Worker sbcs x14,x19,x6 871*8fb009dcSAndroid Build Coastguard Worker stp x16,x17,[x0,#8*6] 872*8fb009dcSAndroid Build Coastguard Worker add x0,x0,#8*8 873*8fb009dcSAndroid Build Coastguard Worker sbcs x15,x20,x7 874*8fb009dcSAndroid Build Coastguard Worker cbnz x27,.Lsqr8x_sub 875*8fb009dcSAndroid Build Coastguard Worker 876*8fb009dcSAndroid Build Coastguard Worker sbcs x16,x21,x8 877*8fb009dcSAndroid Build Coastguard Worker mov x2,sp 878*8fb009dcSAndroid Build Coastguard Worker add x1,sp,x5 879*8fb009dcSAndroid Build Coastguard Worker ldp x6,x7,[x3,#8*0] 880*8fb009dcSAndroid Build Coastguard Worker sbcs x17,x22,x9 881*8fb009dcSAndroid Build Coastguard Worker stp x14,x15,[x0,#8*0] 882*8fb009dcSAndroid Build Coastguard Worker sbcs x14,x23,x10 883*8fb009dcSAndroid Build Coastguard Worker ldp x8,x9,[x3,#8*2] 884*8fb009dcSAndroid Build Coastguard Worker sbcs x15,x24,x11 885*8fb009dcSAndroid Build Coastguard Worker stp x16,x17,[x0,#8*2] 886*8fb009dcSAndroid Build Coastguard Worker sbcs x16,x25,x12 887*8fb009dcSAndroid Build Coastguard Worker ldp x19,x20,[x1,#8*0] 888*8fb009dcSAndroid Build Coastguard Worker sbcs x17,x26,x13 889*8fb009dcSAndroid Build Coastguard Worker ldp x21,x22,[x1,#8*2] 890*8fb009dcSAndroid Build Coastguard Worker sbcs xzr,x30,xzr // did it borrow? 891*8fb009dcSAndroid Build Coastguard Worker ldr x30,[x29,#8] // pull return address 892*8fb009dcSAndroid Build Coastguard Worker stp x14,x15,[x0,#8*4] 893*8fb009dcSAndroid Build Coastguard Worker stp x16,x17,[x0,#8*6] 894*8fb009dcSAndroid Build Coastguard Worker 895*8fb009dcSAndroid Build Coastguard Worker sub x27,x5,#8*4 896*8fb009dcSAndroid Build Coastguard Worker.Lsqr4x_cond_copy: 897*8fb009dcSAndroid Build Coastguard Worker sub x27,x27,#8*4 898*8fb009dcSAndroid Build Coastguard Worker csel x14,x19,x6,lo 899*8fb009dcSAndroid Build Coastguard Worker stp xzr,xzr,[x2,#8*0] 900*8fb009dcSAndroid Build Coastguard Worker csel x15,x20,x7,lo 901*8fb009dcSAndroid Build Coastguard Worker ldp x6,x7,[x3,#8*4] 902*8fb009dcSAndroid Build Coastguard Worker ldp x19,x20,[x1,#8*4] 903*8fb009dcSAndroid Build Coastguard Worker csel x16,x21,x8,lo 904*8fb009dcSAndroid Build Coastguard Worker stp xzr,xzr,[x2,#8*2] 905*8fb009dcSAndroid Build Coastguard Worker add x2,x2,#8*4 906*8fb009dcSAndroid Build Coastguard Worker csel x17,x22,x9,lo 907*8fb009dcSAndroid Build Coastguard Worker ldp x8,x9,[x3,#8*6] 908*8fb009dcSAndroid Build Coastguard Worker ldp x21,x22,[x1,#8*6] 909*8fb009dcSAndroid Build Coastguard Worker add x1,x1,#8*4 910*8fb009dcSAndroid Build Coastguard Worker stp x14,x15,[x3,#8*0] 911*8fb009dcSAndroid Build Coastguard Worker stp x16,x17,[x3,#8*2] 912*8fb009dcSAndroid Build Coastguard Worker add x3,x3,#8*4 913*8fb009dcSAndroid Build Coastguard Worker stp xzr,xzr,[x1,#8*0] 914*8fb009dcSAndroid Build Coastguard Worker stp xzr,xzr,[x1,#8*2] 915*8fb009dcSAndroid Build Coastguard Worker cbnz x27,.Lsqr4x_cond_copy 916*8fb009dcSAndroid Build Coastguard Worker 917*8fb009dcSAndroid Build Coastguard Worker csel x14,x19,x6,lo 918*8fb009dcSAndroid Build Coastguard Worker stp xzr,xzr,[x2,#8*0] 919*8fb009dcSAndroid Build Coastguard Worker csel x15,x20,x7,lo 920*8fb009dcSAndroid Build Coastguard Worker stp xzr,xzr,[x2,#8*2] 921*8fb009dcSAndroid Build Coastguard Worker csel x16,x21,x8,lo 922*8fb009dcSAndroid Build Coastguard Worker csel x17,x22,x9,lo 923*8fb009dcSAndroid Build Coastguard Worker stp x14,x15,[x3,#8*0] 924*8fb009dcSAndroid Build Coastguard Worker stp x16,x17,[x3,#8*2] 925*8fb009dcSAndroid Build Coastguard Worker 926*8fb009dcSAndroid Build Coastguard Worker b .Lsqr8x_done 927*8fb009dcSAndroid Build Coastguard Worker 928*8fb009dcSAndroid Build Coastguard Worker.align 4 929*8fb009dcSAndroid Build Coastguard Worker.Lsqr8x8_post_condition: 930*8fb009dcSAndroid Build Coastguard Worker adc x28,xzr,xzr 931*8fb009dcSAndroid Build Coastguard Worker ldr x30,[x29,#8] // pull return address 932*8fb009dcSAndroid Build Coastguard Worker // x19-7,x28 hold result, x6-7 hold modulus 933*8fb009dcSAndroid Build Coastguard Worker subs x6,x19,x6 934*8fb009dcSAndroid Build Coastguard Worker ldr x1,[x29,#96] // pull rp 935*8fb009dcSAndroid Build Coastguard Worker sbcs x7,x20,x7 936*8fb009dcSAndroid Build Coastguard Worker stp xzr,xzr,[sp,#8*0] 937*8fb009dcSAndroid Build Coastguard Worker sbcs x8,x21,x8 938*8fb009dcSAndroid Build Coastguard Worker stp xzr,xzr,[sp,#8*2] 939*8fb009dcSAndroid Build Coastguard Worker sbcs x9,x22,x9 940*8fb009dcSAndroid Build Coastguard Worker stp xzr,xzr,[sp,#8*4] 941*8fb009dcSAndroid Build Coastguard Worker sbcs x10,x23,x10 942*8fb009dcSAndroid Build Coastguard Worker stp xzr,xzr,[sp,#8*6] 943*8fb009dcSAndroid Build Coastguard Worker sbcs x11,x24,x11 944*8fb009dcSAndroid Build Coastguard Worker stp xzr,xzr,[sp,#8*8] 945*8fb009dcSAndroid Build Coastguard Worker sbcs x12,x25,x12 946*8fb009dcSAndroid Build Coastguard Worker stp xzr,xzr,[sp,#8*10] 947*8fb009dcSAndroid Build Coastguard Worker sbcs x13,x26,x13 948*8fb009dcSAndroid Build Coastguard Worker stp xzr,xzr,[sp,#8*12] 949*8fb009dcSAndroid Build Coastguard Worker sbcs x28,x28,xzr // did it borrow? 950*8fb009dcSAndroid Build Coastguard Worker stp xzr,xzr,[sp,#8*14] 951*8fb009dcSAndroid Build Coastguard Worker 952*8fb009dcSAndroid Build Coastguard Worker // x6-7 hold result-modulus 953*8fb009dcSAndroid Build Coastguard Worker csel x6,x19,x6,lo 954*8fb009dcSAndroid Build Coastguard Worker csel x7,x20,x7,lo 955*8fb009dcSAndroid Build Coastguard Worker csel x8,x21,x8,lo 956*8fb009dcSAndroid Build Coastguard Worker csel x9,x22,x9,lo 957*8fb009dcSAndroid Build Coastguard Worker stp x6,x7,[x1,#8*0] 958*8fb009dcSAndroid Build Coastguard Worker csel x10,x23,x10,lo 959*8fb009dcSAndroid Build Coastguard Worker csel x11,x24,x11,lo 960*8fb009dcSAndroid Build Coastguard Worker stp x8,x9,[x1,#8*2] 961*8fb009dcSAndroid Build Coastguard Worker csel x12,x25,x12,lo 962*8fb009dcSAndroid Build Coastguard Worker csel x13,x26,x13,lo 963*8fb009dcSAndroid Build Coastguard Worker stp x10,x11,[x1,#8*4] 964*8fb009dcSAndroid Build Coastguard Worker stp x12,x13,[x1,#8*6] 965*8fb009dcSAndroid Build Coastguard Worker 966*8fb009dcSAndroid Build Coastguard Worker.Lsqr8x_done: 967*8fb009dcSAndroid Build Coastguard Worker ldp x19,x20,[x29,#16] 968*8fb009dcSAndroid Build Coastguard Worker mov sp,x29 969*8fb009dcSAndroid Build Coastguard Worker ldp x21,x22,[x29,#32] 970*8fb009dcSAndroid Build Coastguard Worker mov x0,#1 971*8fb009dcSAndroid Build Coastguard Worker ldp x23,x24,[x29,#48] 972*8fb009dcSAndroid Build Coastguard Worker ldp x25,x26,[x29,#64] 973*8fb009dcSAndroid Build Coastguard Worker ldp x27,x28,[x29,#80] 974*8fb009dcSAndroid Build Coastguard Worker ldr x29,[sp],#128 975*8fb009dcSAndroid Build Coastguard Worker // x30 is popped earlier 976*8fb009dcSAndroid Build Coastguard Worker AARCH64_VALIDATE_LINK_REGISTER 977*8fb009dcSAndroid Build Coastguard Worker ret 978*8fb009dcSAndroid Build Coastguard Worker.size __bn_sqr8x_mont,.-__bn_sqr8x_mont 979*8fb009dcSAndroid Build Coastguard Worker.type __bn_mul4x_mont,%function 980*8fb009dcSAndroid Build Coastguard Worker.align 5 981*8fb009dcSAndroid Build Coastguard Worker__bn_mul4x_mont: 982*8fb009dcSAndroid Build Coastguard Worker // Not adding AARCH64_SIGN_LINK_REGISTER here because __bn_mul4x_mont is jumped to 983*8fb009dcSAndroid Build Coastguard Worker // only from bn_mul_mont or __bn_mul8x_mont which have already signed the 984*8fb009dcSAndroid Build Coastguard Worker // return address. 985*8fb009dcSAndroid Build Coastguard Worker stp x29,x30,[sp,#-128]! 986*8fb009dcSAndroid Build Coastguard Worker add x29,sp,#0 987*8fb009dcSAndroid Build Coastguard Worker stp x19,x20,[sp,#16] 988*8fb009dcSAndroid Build Coastguard Worker stp x21,x22,[sp,#32] 989*8fb009dcSAndroid Build Coastguard Worker stp x23,x24,[sp,#48] 990*8fb009dcSAndroid Build Coastguard Worker stp x25,x26,[sp,#64] 991*8fb009dcSAndroid Build Coastguard Worker stp x27,x28,[sp,#80] 992*8fb009dcSAndroid Build Coastguard Worker 993*8fb009dcSAndroid Build Coastguard Worker sub x26,sp,x5,lsl#3 994*8fb009dcSAndroid Build Coastguard Worker lsl x5,x5,#3 995*8fb009dcSAndroid Build Coastguard Worker ldr x4,[x4] // *n0 996*8fb009dcSAndroid Build Coastguard Worker sub sp,x26,#8*4 // alloca 997*8fb009dcSAndroid Build Coastguard Worker 998*8fb009dcSAndroid Build Coastguard Worker add x10,x2,x5 999*8fb009dcSAndroid Build Coastguard Worker add x27,x1,x5 1000*8fb009dcSAndroid Build Coastguard Worker stp x0,x10,[x29,#96] // offload rp and &b[num] 1001*8fb009dcSAndroid Build Coastguard Worker 1002*8fb009dcSAndroid Build Coastguard Worker ldr x24,[x2,#8*0] // b[0] 1003*8fb009dcSAndroid Build Coastguard Worker ldp x6,x7,[x1,#8*0] // a[0..3] 1004*8fb009dcSAndroid Build Coastguard Worker ldp x8,x9,[x1,#8*2] 1005*8fb009dcSAndroid Build Coastguard Worker add x1,x1,#8*4 1006*8fb009dcSAndroid Build Coastguard Worker mov x19,xzr 1007*8fb009dcSAndroid Build Coastguard Worker mov x20,xzr 1008*8fb009dcSAndroid Build Coastguard Worker mov x21,xzr 1009*8fb009dcSAndroid Build Coastguard Worker mov x22,xzr 1010*8fb009dcSAndroid Build Coastguard Worker ldp x14,x15,[x3,#8*0] // n[0..3] 1011*8fb009dcSAndroid Build Coastguard Worker ldp x16,x17,[x3,#8*2] 1012*8fb009dcSAndroid Build Coastguard Worker adds x3,x3,#8*4 // clear carry bit 1013*8fb009dcSAndroid Build Coastguard Worker mov x0,xzr 1014*8fb009dcSAndroid Build Coastguard Worker mov x28,#0 1015*8fb009dcSAndroid Build Coastguard Worker mov x26,sp 1016*8fb009dcSAndroid Build Coastguard Worker 1017*8fb009dcSAndroid Build Coastguard Worker.Loop_mul4x_1st_reduction: 1018*8fb009dcSAndroid Build Coastguard Worker mul x10,x6,x24 // lo(a[0..3]*b[0]) 1019*8fb009dcSAndroid Build Coastguard Worker adc x0,x0,xzr // modulo-scheduled 1020*8fb009dcSAndroid Build Coastguard Worker mul x11,x7,x24 1021*8fb009dcSAndroid Build Coastguard Worker add x28,x28,#8 1022*8fb009dcSAndroid Build Coastguard Worker mul x12,x8,x24 1023*8fb009dcSAndroid Build Coastguard Worker and x28,x28,#31 1024*8fb009dcSAndroid Build Coastguard Worker mul x13,x9,x24 1025*8fb009dcSAndroid Build Coastguard Worker adds x19,x19,x10 1026*8fb009dcSAndroid Build Coastguard Worker umulh x10,x6,x24 // hi(a[0..3]*b[0]) 1027*8fb009dcSAndroid Build Coastguard Worker adcs x20,x20,x11 1028*8fb009dcSAndroid Build Coastguard Worker mul x25,x19,x4 // t[0]*n0 1029*8fb009dcSAndroid Build Coastguard Worker adcs x21,x21,x12 1030*8fb009dcSAndroid Build Coastguard Worker umulh x11,x7,x24 1031*8fb009dcSAndroid Build Coastguard Worker adcs x22,x22,x13 1032*8fb009dcSAndroid Build Coastguard Worker umulh x12,x8,x24 1033*8fb009dcSAndroid Build Coastguard Worker adc x23,xzr,xzr 1034*8fb009dcSAndroid Build Coastguard Worker umulh x13,x9,x24 1035*8fb009dcSAndroid Build Coastguard Worker ldr x24,[x2,x28] // next b[i] (or b[0]) 1036*8fb009dcSAndroid Build Coastguard Worker adds x20,x20,x10 1037*8fb009dcSAndroid Build Coastguard Worker // (*) mul x10,x14,x25 // lo(n[0..3]*t[0]*n0) 1038*8fb009dcSAndroid Build Coastguard Worker str x25,[x26],#8 // put aside t[0]*n0 for tail processing 1039*8fb009dcSAndroid Build Coastguard Worker adcs x21,x21,x11 1040*8fb009dcSAndroid Build Coastguard Worker mul x11,x15,x25 1041*8fb009dcSAndroid Build Coastguard Worker adcs x22,x22,x12 1042*8fb009dcSAndroid Build Coastguard Worker mul x12,x16,x25 1043*8fb009dcSAndroid Build Coastguard Worker adc x23,x23,x13 // can't overflow 1044*8fb009dcSAndroid Build Coastguard Worker mul x13,x17,x25 1045*8fb009dcSAndroid Build Coastguard Worker // (*) adds xzr,x19,x10 1046*8fb009dcSAndroid Build Coastguard Worker subs xzr,x19,#1 // (*) 1047*8fb009dcSAndroid Build Coastguard Worker umulh x10,x14,x25 // hi(n[0..3]*t[0]*n0) 1048*8fb009dcSAndroid Build Coastguard Worker adcs x19,x20,x11 1049*8fb009dcSAndroid Build Coastguard Worker umulh x11,x15,x25 1050*8fb009dcSAndroid Build Coastguard Worker adcs x20,x21,x12 1051*8fb009dcSAndroid Build Coastguard Worker umulh x12,x16,x25 1052*8fb009dcSAndroid Build Coastguard Worker adcs x21,x22,x13 1053*8fb009dcSAndroid Build Coastguard Worker umulh x13,x17,x25 1054*8fb009dcSAndroid Build Coastguard Worker adcs x22,x23,x0 1055*8fb009dcSAndroid Build Coastguard Worker adc x0,xzr,xzr 1056*8fb009dcSAndroid Build Coastguard Worker adds x19,x19,x10 1057*8fb009dcSAndroid Build Coastguard Worker sub x10,x27,x1 1058*8fb009dcSAndroid Build Coastguard Worker adcs x20,x20,x11 1059*8fb009dcSAndroid Build Coastguard Worker adcs x21,x21,x12 1060*8fb009dcSAndroid Build Coastguard Worker adcs x22,x22,x13 1061*8fb009dcSAndroid Build Coastguard Worker //adc x0,x0,xzr 1062*8fb009dcSAndroid Build Coastguard Worker cbnz x28,.Loop_mul4x_1st_reduction 1063*8fb009dcSAndroid Build Coastguard Worker 1064*8fb009dcSAndroid Build Coastguard Worker cbz x10,.Lmul4x4_post_condition 1065*8fb009dcSAndroid Build Coastguard Worker 1066*8fb009dcSAndroid Build Coastguard Worker ldp x6,x7,[x1,#8*0] // a[4..7] 1067*8fb009dcSAndroid Build Coastguard Worker ldp x8,x9,[x1,#8*2] 1068*8fb009dcSAndroid Build Coastguard Worker add x1,x1,#8*4 1069*8fb009dcSAndroid Build Coastguard Worker ldr x25,[sp] // a[0]*n0 1070*8fb009dcSAndroid Build Coastguard Worker ldp x14,x15,[x3,#8*0] // n[4..7] 1071*8fb009dcSAndroid Build Coastguard Worker ldp x16,x17,[x3,#8*2] 1072*8fb009dcSAndroid Build Coastguard Worker add x3,x3,#8*4 1073*8fb009dcSAndroid Build Coastguard Worker 1074*8fb009dcSAndroid Build Coastguard Worker.Loop_mul4x_1st_tail: 1075*8fb009dcSAndroid Build Coastguard Worker mul x10,x6,x24 // lo(a[4..7]*b[i]) 1076*8fb009dcSAndroid Build Coastguard Worker adc x0,x0,xzr // modulo-scheduled 1077*8fb009dcSAndroid Build Coastguard Worker mul x11,x7,x24 1078*8fb009dcSAndroid Build Coastguard Worker add x28,x28,#8 1079*8fb009dcSAndroid Build Coastguard Worker mul x12,x8,x24 1080*8fb009dcSAndroid Build Coastguard Worker and x28,x28,#31 1081*8fb009dcSAndroid Build Coastguard Worker mul x13,x9,x24 1082*8fb009dcSAndroid Build Coastguard Worker adds x19,x19,x10 1083*8fb009dcSAndroid Build Coastguard Worker umulh x10,x6,x24 // hi(a[4..7]*b[i]) 1084*8fb009dcSAndroid Build Coastguard Worker adcs x20,x20,x11 1085*8fb009dcSAndroid Build Coastguard Worker umulh x11,x7,x24 1086*8fb009dcSAndroid Build Coastguard Worker adcs x21,x21,x12 1087*8fb009dcSAndroid Build Coastguard Worker umulh x12,x8,x24 1088*8fb009dcSAndroid Build Coastguard Worker adcs x22,x22,x13 1089*8fb009dcSAndroid Build Coastguard Worker umulh x13,x9,x24 1090*8fb009dcSAndroid Build Coastguard Worker adc x23,xzr,xzr 1091*8fb009dcSAndroid Build Coastguard Worker ldr x24,[x2,x28] // next b[i] (or b[0]) 1092*8fb009dcSAndroid Build Coastguard Worker adds x20,x20,x10 1093*8fb009dcSAndroid Build Coastguard Worker mul x10,x14,x25 // lo(n[4..7]*a[0]*n0) 1094*8fb009dcSAndroid Build Coastguard Worker adcs x21,x21,x11 1095*8fb009dcSAndroid Build Coastguard Worker mul x11,x15,x25 1096*8fb009dcSAndroid Build Coastguard Worker adcs x22,x22,x12 1097*8fb009dcSAndroid Build Coastguard Worker mul x12,x16,x25 1098*8fb009dcSAndroid Build Coastguard Worker adc x23,x23,x13 // can't overflow 1099*8fb009dcSAndroid Build Coastguard Worker mul x13,x17,x25 1100*8fb009dcSAndroid Build Coastguard Worker adds x19,x19,x10 1101*8fb009dcSAndroid Build Coastguard Worker umulh x10,x14,x25 // hi(n[4..7]*a[0]*n0) 1102*8fb009dcSAndroid Build Coastguard Worker adcs x20,x20,x11 1103*8fb009dcSAndroid Build Coastguard Worker umulh x11,x15,x25 1104*8fb009dcSAndroid Build Coastguard Worker adcs x21,x21,x12 1105*8fb009dcSAndroid Build Coastguard Worker umulh x12,x16,x25 1106*8fb009dcSAndroid Build Coastguard Worker adcs x22,x22,x13 1107*8fb009dcSAndroid Build Coastguard Worker adcs x23,x23,x0 1108*8fb009dcSAndroid Build Coastguard Worker umulh x13,x17,x25 1109*8fb009dcSAndroid Build Coastguard Worker adc x0,xzr,xzr 1110*8fb009dcSAndroid Build Coastguard Worker ldr x25,[sp,x28] // next t[0]*n0 1111*8fb009dcSAndroid Build Coastguard Worker str x19,[x26],#8 // result!!! 1112*8fb009dcSAndroid Build Coastguard Worker adds x19,x20,x10 1113*8fb009dcSAndroid Build Coastguard Worker sub x10,x27,x1 // done yet? 1114*8fb009dcSAndroid Build Coastguard Worker adcs x20,x21,x11 1115*8fb009dcSAndroid Build Coastguard Worker adcs x21,x22,x12 1116*8fb009dcSAndroid Build Coastguard Worker adcs x22,x23,x13 1117*8fb009dcSAndroid Build Coastguard Worker //adc x0,x0,xzr 1118*8fb009dcSAndroid Build Coastguard Worker cbnz x28,.Loop_mul4x_1st_tail 1119*8fb009dcSAndroid Build Coastguard Worker 1120*8fb009dcSAndroid Build Coastguard Worker sub x11,x27,x5 // rewinded x1 1121*8fb009dcSAndroid Build Coastguard Worker cbz x10,.Lmul4x_proceed 1122*8fb009dcSAndroid Build Coastguard Worker 1123*8fb009dcSAndroid Build Coastguard Worker ldp x6,x7,[x1,#8*0] 1124*8fb009dcSAndroid Build Coastguard Worker ldp x8,x9,[x1,#8*2] 1125*8fb009dcSAndroid Build Coastguard Worker add x1,x1,#8*4 1126*8fb009dcSAndroid Build Coastguard Worker ldp x14,x15,[x3,#8*0] 1127*8fb009dcSAndroid Build Coastguard Worker ldp x16,x17,[x3,#8*2] 1128*8fb009dcSAndroid Build Coastguard Worker add x3,x3,#8*4 1129*8fb009dcSAndroid Build Coastguard Worker b .Loop_mul4x_1st_tail 1130*8fb009dcSAndroid Build Coastguard Worker 1131*8fb009dcSAndroid Build Coastguard Worker.align 5 1132*8fb009dcSAndroid Build Coastguard Worker.Lmul4x_proceed: 1133*8fb009dcSAndroid Build Coastguard Worker ldr x24,[x2,#8*4]! // *++b 1134*8fb009dcSAndroid Build Coastguard Worker adc x30,x0,xzr 1135*8fb009dcSAndroid Build Coastguard Worker ldp x6,x7,[x11,#8*0] // a[0..3] 1136*8fb009dcSAndroid Build Coastguard Worker sub x3,x3,x5 // rewind np 1137*8fb009dcSAndroid Build Coastguard Worker ldp x8,x9,[x11,#8*2] 1138*8fb009dcSAndroid Build Coastguard Worker add x1,x11,#8*4 1139*8fb009dcSAndroid Build Coastguard Worker 1140*8fb009dcSAndroid Build Coastguard Worker stp x19,x20,[x26,#8*0] // result!!! 1141*8fb009dcSAndroid Build Coastguard Worker ldp x19,x20,[sp,#8*4] // t[0..3] 1142*8fb009dcSAndroid Build Coastguard Worker stp x21,x22,[x26,#8*2] // result!!! 1143*8fb009dcSAndroid Build Coastguard Worker ldp x21,x22,[sp,#8*6] 1144*8fb009dcSAndroid Build Coastguard Worker 1145*8fb009dcSAndroid Build Coastguard Worker ldp x14,x15,[x3,#8*0] // n[0..3] 1146*8fb009dcSAndroid Build Coastguard Worker mov x26,sp 1147*8fb009dcSAndroid Build Coastguard Worker ldp x16,x17,[x3,#8*2] 1148*8fb009dcSAndroid Build Coastguard Worker adds x3,x3,#8*4 // clear carry bit 1149*8fb009dcSAndroid Build Coastguard Worker mov x0,xzr 1150*8fb009dcSAndroid Build Coastguard Worker 1151*8fb009dcSAndroid Build Coastguard Worker.align 4 1152*8fb009dcSAndroid Build Coastguard Worker.Loop_mul4x_reduction: 1153*8fb009dcSAndroid Build Coastguard Worker mul x10,x6,x24 // lo(a[0..3]*b[4]) 1154*8fb009dcSAndroid Build Coastguard Worker adc x0,x0,xzr // modulo-scheduled 1155*8fb009dcSAndroid Build Coastguard Worker mul x11,x7,x24 1156*8fb009dcSAndroid Build Coastguard Worker add x28,x28,#8 1157*8fb009dcSAndroid Build Coastguard Worker mul x12,x8,x24 1158*8fb009dcSAndroid Build Coastguard Worker and x28,x28,#31 1159*8fb009dcSAndroid Build Coastguard Worker mul x13,x9,x24 1160*8fb009dcSAndroid Build Coastguard Worker adds x19,x19,x10 1161*8fb009dcSAndroid Build Coastguard Worker umulh x10,x6,x24 // hi(a[0..3]*b[4]) 1162*8fb009dcSAndroid Build Coastguard Worker adcs x20,x20,x11 1163*8fb009dcSAndroid Build Coastguard Worker mul x25,x19,x4 // t[0]*n0 1164*8fb009dcSAndroid Build Coastguard Worker adcs x21,x21,x12 1165*8fb009dcSAndroid Build Coastguard Worker umulh x11,x7,x24 1166*8fb009dcSAndroid Build Coastguard Worker adcs x22,x22,x13 1167*8fb009dcSAndroid Build Coastguard Worker umulh x12,x8,x24 1168*8fb009dcSAndroid Build Coastguard Worker adc x23,xzr,xzr 1169*8fb009dcSAndroid Build Coastguard Worker umulh x13,x9,x24 1170*8fb009dcSAndroid Build Coastguard Worker ldr x24,[x2,x28] // next b[i] 1171*8fb009dcSAndroid Build Coastguard Worker adds x20,x20,x10 1172*8fb009dcSAndroid Build Coastguard Worker // (*) mul x10,x14,x25 1173*8fb009dcSAndroid Build Coastguard Worker str x25,[x26],#8 // put aside t[0]*n0 for tail processing 1174*8fb009dcSAndroid Build Coastguard Worker adcs x21,x21,x11 1175*8fb009dcSAndroid Build Coastguard Worker mul x11,x15,x25 // lo(n[0..3]*t[0]*n0 1176*8fb009dcSAndroid Build Coastguard Worker adcs x22,x22,x12 1177*8fb009dcSAndroid Build Coastguard Worker mul x12,x16,x25 1178*8fb009dcSAndroid Build Coastguard Worker adc x23,x23,x13 // can't overflow 1179*8fb009dcSAndroid Build Coastguard Worker mul x13,x17,x25 1180*8fb009dcSAndroid Build Coastguard Worker // (*) adds xzr,x19,x10 1181*8fb009dcSAndroid Build Coastguard Worker subs xzr,x19,#1 // (*) 1182*8fb009dcSAndroid Build Coastguard Worker umulh x10,x14,x25 // hi(n[0..3]*t[0]*n0 1183*8fb009dcSAndroid Build Coastguard Worker adcs x19,x20,x11 1184*8fb009dcSAndroid Build Coastguard Worker umulh x11,x15,x25 1185*8fb009dcSAndroid Build Coastguard Worker adcs x20,x21,x12 1186*8fb009dcSAndroid Build Coastguard Worker umulh x12,x16,x25 1187*8fb009dcSAndroid Build Coastguard Worker adcs x21,x22,x13 1188*8fb009dcSAndroid Build Coastguard Worker umulh x13,x17,x25 1189*8fb009dcSAndroid Build Coastguard Worker adcs x22,x23,x0 1190*8fb009dcSAndroid Build Coastguard Worker adc x0,xzr,xzr 1191*8fb009dcSAndroid Build Coastguard Worker adds x19,x19,x10 1192*8fb009dcSAndroid Build Coastguard Worker adcs x20,x20,x11 1193*8fb009dcSAndroid Build Coastguard Worker adcs x21,x21,x12 1194*8fb009dcSAndroid Build Coastguard Worker adcs x22,x22,x13 1195*8fb009dcSAndroid Build Coastguard Worker //adc x0,x0,xzr 1196*8fb009dcSAndroid Build Coastguard Worker cbnz x28,.Loop_mul4x_reduction 1197*8fb009dcSAndroid Build Coastguard Worker 1198*8fb009dcSAndroid Build Coastguard Worker adc x0,x0,xzr 1199*8fb009dcSAndroid Build Coastguard Worker ldp x10,x11,[x26,#8*4] // t[4..7] 1200*8fb009dcSAndroid Build Coastguard Worker ldp x12,x13,[x26,#8*6] 1201*8fb009dcSAndroid Build Coastguard Worker ldp x6,x7,[x1,#8*0] // a[4..7] 1202*8fb009dcSAndroid Build Coastguard Worker ldp x8,x9,[x1,#8*2] 1203*8fb009dcSAndroid Build Coastguard Worker add x1,x1,#8*4 1204*8fb009dcSAndroid Build Coastguard Worker adds x19,x19,x10 1205*8fb009dcSAndroid Build Coastguard Worker adcs x20,x20,x11 1206*8fb009dcSAndroid Build Coastguard Worker adcs x21,x21,x12 1207*8fb009dcSAndroid Build Coastguard Worker adcs x22,x22,x13 1208*8fb009dcSAndroid Build Coastguard Worker //adc x0,x0,xzr 1209*8fb009dcSAndroid Build Coastguard Worker 1210*8fb009dcSAndroid Build Coastguard Worker ldr x25,[sp] // t[0]*n0 1211*8fb009dcSAndroid Build Coastguard Worker ldp x14,x15,[x3,#8*0] // n[4..7] 1212*8fb009dcSAndroid Build Coastguard Worker ldp x16,x17,[x3,#8*2] 1213*8fb009dcSAndroid Build Coastguard Worker add x3,x3,#8*4 1214*8fb009dcSAndroid Build Coastguard Worker 1215*8fb009dcSAndroid Build Coastguard Worker.align 4 1216*8fb009dcSAndroid Build Coastguard Worker.Loop_mul4x_tail: 1217*8fb009dcSAndroid Build Coastguard Worker mul x10,x6,x24 // lo(a[4..7]*b[4]) 1218*8fb009dcSAndroid Build Coastguard Worker adc x0,x0,xzr // modulo-scheduled 1219*8fb009dcSAndroid Build Coastguard Worker mul x11,x7,x24 1220*8fb009dcSAndroid Build Coastguard Worker add x28,x28,#8 1221*8fb009dcSAndroid Build Coastguard Worker mul x12,x8,x24 1222*8fb009dcSAndroid Build Coastguard Worker and x28,x28,#31 1223*8fb009dcSAndroid Build Coastguard Worker mul x13,x9,x24 1224*8fb009dcSAndroid Build Coastguard Worker adds x19,x19,x10 1225*8fb009dcSAndroid Build Coastguard Worker umulh x10,x6,x24 // hi(a[4..7]*b[4]) 1226*8fb009dcSAndroid Build Coastguard Worker adcs x20,x20,x11 1227*8fb009dcSAndroid Build Coastguard Worker umulh x11,x7,x24 1228*8fb009dcSAndroid Build Coastguard Worker adcs x21,x21,x12 1229*8fb009dcSAndroid Build Coastguard Worker umulh x12,x8,x24 1230*8fb009dcSAndroid Build Coastguard Worker adcs x22,x22,x13 1231*8fb009dcSAndroid Build Coastguard Worker umulh x13,x9,x24 1232*8fb009dcSAndroid Build Coastguard Worker adc x23,xzr,xzr 1233*8fb009dcSAndroid Build Coastguard Worker ldr x24,[x2,x28] // next b[i] 1234*8fb009dcSAndroid Build Coastguard Worker adds x20,x20,x10 1235*8fb009dcSAndroid Build Coastguard Worker mul x10,x14,x25 // lo(n[4..7]*t[0]*n0) 1236*8fb009dcSAndroid Build Coastguard Worker adcs x21,x21,x11 1237*8fb009dcSAndroid Build Coastguard Worker mul x11,x15,x25 1238*8fb009dcSAndroid Build Coastguard Worker adcs x22,x22,x12 1239*8fb009dcSAndroid Build Coastguard Worker mul x12,x16,x25 1240*8fb009dcSAndroid Build Coastguard Worker adc x23,x23,x13 // can't overflow 1241*8fb009dcSAndroid Build Coastguard Worker mul x13,x17,x25 1242*8fb009dcSAndroid Build Coastguard Worker adds x19,x19,x10 1243*8fb009dcSAndroid Build Coastguard Worker umulh x10,x14,x25 // hi(n[4..7]*t[0]*n0) 1244*8fb009dcSAndroid Build Coastguard Worker adcs x20,x20,x11 1245*8fb009dcSAndroid Build Coastguard Worker umulh x11,x15,x25 1246*8fb009dcSAndroid Build Coastguard Worker adcs x21,x21,x12 1247*8fb009dcSAndroid Build Coastguard Worker umulh x12,x16,x25 1248*8fb009dcSAndroid Build Coastguard Worker adcs x22,x22,x13 1249*8fb009dcSAndroid Build Coastguard Worker umulh x13,x17,x25 1250*8fb009dcSAndroid Build Coastguard Worker adcs x23,x23,x0 1251*8fb009dcSAndroid Build Coastguard Worker ldr x25,[sp,x28] // next a[0]*n0 1252*8fb009dcSAndroid Build Coastguard Worker adc x0,xzr,xzr 1253*8fb009dcSAndroid Build Coastguard Worker str x19,[x26],#8 // result!!! 1254*8fb009dcSAndroid Build Coastguard Worker adds x19,x20,x10 1255*8fb009dcSAndroid Build Coastguard Worker sub x10,x27,x1 // done yet? 1256*8fb009dcSAndroid Build Coastguard Worker adcs x20,x21,x11 1257*8fb009dcSAndroid Build Coastguard Worker adcs x21,x22,x12 1258*8fb009dcSAndroid Build Coastguard Worker adcs x22,x23,x13 1259*8fb009dcSAndroid Build Coastguard Worker //adc x0,x0,xzr 1260*8fb009dcSAndroid Build Coastguard Worker cbnz x28,.Loop_mul4x_tail 1261*8fb009dcSAndroid Build Coastguard Worker 1262*8fb009dcSAndroid Build Coastguard Worker sub x11,x3,x5 // rewinded np? 1263*8fb009dcSAndroid Build Coastguard Worker adc x0,x0,xzr 1264*8fb009dcSAndroid Build Coastguard Worker cbz x10,.Loop_mul4x_break 1265*8fb009dcSAndroid Build Coastguard Worker 1266*8fb009dcSAndroid Build Coastguard Worker ldp x10,x11,[x26,#8*4] 1267*8fb009dcSAndroid Build Coastguard Worker ldp x12,x13,[x26,#8*6] 1268*8fb009dcSAndroid Build Coastguard Worker ldp x6,x7,[x1,#8*0] 1269*8fb009dcSAndroid Build Coastguard Worker ldp x8,x9,[x1,#8*2] 1270*8fb009dcSAndroid Build Coastguard Worker add x1,x1,#8*4 1271*8fb009dcSAndroid Build Coastguard Worker adds x19,x19,x10 1272*8fb009dcSAndroid Build Coastguard Worker adcs x20,x20,x11 1273*8fb009dcSAndroid Build Coastguard Worker adcs x21,x21,x12 1274*8fb009dcSAndroid Build Coastguard Worker adcs x22,x22,x13 1275*8fb009dcSAndroid Build Coastguard Worker //adc x0,x0,xzr 1276*8fb009dcSAndroid Build Coastguard Worker ldp x14,x15,[x3,#8*0] 1277*8fb009dcSAndroid Build Coastguard Worker ldp x16,x17,[x3,#8*2] 1278*8fb009dcSAndroid Build Coastguard Worker add x3,x3,#8*4 1279*8fb009dcSAndroid Build Coastguard Worker b .Loop_mul4x_tail 1280*8fb009dcSAndroid Build Coastguard Worker 1281*8fb009dcSAndroid Build Coastguard Worker.align 4 1282*8fb009dcSAndroid Build Coastguard Worker.Loop_mul4x_break: 1283*8fb009dcSAndroid Build Coastguard Worker ldp x12,x13,[x29,#96] // pull rp and &b[num] 1284*8fb009dcSAndroid Build Coastguard Worker adds x19,x19,x30 1285*8fb009dcSAndroid Build Coastguard Worker add x2,x2,#8*4 // bp++ 1286*8fb009dcSAndroid Build Coastguard Worker adcs x20,x20,xzr 1287*8fb009dcSAndroid Build Coastguard Worker sub x1,x1,x5 // rewind ap 1288*8fb009dcSAndroid Build Coastguard Worker adcs x21,x21,xzr 1289*8fb009dcSAndroid Build Coastguard Worker stp x19,x20,[x26,#8*0] // result!!! 1290*8fb009dcSAndroid Build Coastguard Worker adcs x22,x22,xzr 1291*8fb009dcSAndroid Build Coastguard Worker ldp x19,x20,[sp,#8*4] // t[0..3] 1292*8fb009dcSAndroid Build Coastguard Worker adc x30,x0,xzr 1293*8fb009dcSAndroid Build Coastguard Worker stp x21,x22,[x26,#8*2] // result!!! 1294*8fb009dcSAndroid Build Coastguard Worker cmp x2,x13 // done yet? 1295*8fb009dcSAndroid Build Coastguard Worker ldp x21,x22,[sp,#8*6] 1296*8fb009dcSAndroid Build Coastguard Worker ldp x14,x15,[x11,#8*0] // n[0..3] 1297*8fb009dcSAndroid Build Coastguard Worker ldp x16,x17,[x11,#8*2] 1298*8fb009dcSAndroid Build Coastguard Worker add x3,x11,#8*4 1299*8fb009dcSAndroid Build Coastguard Worker b.eq .Lmul4x_post 1300*8fb009dcSAndroid Build Coastguard Worker 1301*8fb009dcSAndroid Build Coastguard Worker ldr x24,[x2] 1302*8fb009dcSAndroid Build Coastguard Worker ldp x6,x7,[x1,#8*0] // a[0..3] 1303*8fb009dcSAndroid Build Coastguard Worker ldp x8,x9,[x1,#8*2] 1304*8fb009dcSAndroid Build Coastguard Worker adds x1,x1,#8*4 // clear carry bit 1305*8fb009dcSAndroid Build Coastguard Worker mov x0,xzr 1306*8fb009dcSAndroid Build Coastguard Worker mov x26,sp 1307*8fb009dcSAndroid Build Coastguard Worker b .Loop_mul4x_reduction 1308*8fb009dcSAndroid Build Coastguard Worker 1309*8fb009dcSAndroid Build Coastguard Worker.align 4 1310*8fb009dcSAndroid Build Coastguard Worker.Lmul4x_post: 1311*8fb009dcSAndroid Build Coastguard Worker // Final step. We see if result is larger than modulus, and 1312*8fb009dcSAndroid Build Coastguard Worker // if it is, subtract the modulus. But comparison implies 1313*8fb009dcSAndroid Build Coastguard Worker // subtraction. So we subtract modulus, see if it borrowed, 1314*8fb009dcSAndroid Build Coastguard Worker // and conditionally copy original value. 1315*8fb009dcSAndroid Build Coastguard Worker mov x0,x12 1316*8fb009dcSAndroid Build Coastguard Worker mov x27,x12 // x0 copy 1317*8fb009dcSAndroid Build Coastguard Worker subs x10,x19,x14 1318*8fb009dcSAndroid Build Coastguard Worker add x26,sp,#8*8 1319*8fb009dcSAndroid Build Coastguard Worker sbcs x11,x20,x15 1320*8fb009dcSAndroid Build Coastguard Worker sub x28,x5,#8*4 1321*8fb009dcSAndroid Build Coastguard Worker 1322*8fb009dcSAndroid Build Coastguard Worker.Lmul4x_sub: 1323*8fb009dcSAndroid Build Coastguard Worker sbcs x12,x21,x16 1324*8fb009dcSAndroid Build Coastguard Worker ldp x14,x15,[x3,#8*0] 1325*8fb009dcSAndroid Build Coastguard Worker sub x28,x28,#8*4 1326*8fb009dcSAndroid Build Coastguard Worker ldp x19,x20,[x26,#8*0] 1327*8fb009dcSAndroid Build Coastguard Worker sbcs x13,x22,x17 1328*8fb009dcSAndroid Build Coastguard Worker ldp x16,x17,[x3,#8*2] 1329*8fb009dcSAndroid Build Coastguard Worker add x3,x3,#8*4 1330*8fb009dcSAndroid Build Coastguard Worker ldp x21,x22,[x26,#8*2] 1331*8fb009dcSAndroid Build Coastguard Worker add x26,x26,#8*4 1332*8fb009dcSAndroid Build Coastguard Worker stp x10,x11,[x0,#8*0] 1333*8fb009dcSAndroid Build Coastguard Worker sbcs x10,x19,x14 1334*8fb009dcSAndroid Build Coastguard Worker stp x12,x13,[x0,#8*2] 1335*8fb009dcSAndroid Build Coastguard Worker add x0,x0,#8*4 1336*8fb009dcSAndroid Build Coastguard Worker sbcs x11,x20,x15 1337*8fb009dcSAndroid Build Coastguard Worker cbnz x28,.Lmul4x_sub 1338*8fb009dcSAndroid Build Coastguard Worker 1339*8fb009dcSAndroid Build Coastguard Worker sbcs x12,x21,x16 1340*8fb009dcSAndroid Build Coastguard Worker mov x26,sp 1341*8fb009dcSAndroid Build Coastguard Worker add x1,sp,#8*4 1342*8fb009dcSAndroid Build Coastguard Worker ldp x6,x7,[x27,#8*0] 1343*8fb009dcSAndroid Build Coastguard Worker sbcs x13,x22,x17 1344*8fb009dcSAndroid Build Coastguard Worker stp x10,x11,[x0,#8*0] 1345*8fb009dcSAndroid Build Coastguard Worker ldp x8,x9,[x27,#8*2] 1346*8fb009dcSAndroid Build Coastguard Worker stp x12,x13,[x0,#8*2] 1347*8fb009dcSAndroid Build Coastguard Worker ldp x19,x20,[x1,#8*0] 1348*8fb009dcSAndroid Build Coastguard Worker ldp x21,x22,[x1,#8*2] 1349*8fb009dcSAndroid Build Coastguard Worker sbcs xzr,x30,xzr // did it borrow? 1350*8fb009dcSAndroid Build Coastguard Worker ldr x30,[x29,#8] // pull return address 1351*8fb009dcSAndroid Build Coastguard Worker 1352*8fb009dcSAndroid Build Coastguard Worker sub x28,x5,#8*4 1353*8fb009dcSAndroid Build Coastguard Worker.Lmul4x_cond_copy: 1354*8fb009dcSAndroid Build Coastguard Worker sub x28,x28,#8*4 1355*8fb009dcSAndroid Build Coastguard Worker csel x10,x19,x6,lo 1356*8fb009dcSAndroid Build Coastguard Worker stp xzr,xzr,[x26,#8*0] 1357*8fb009dcSAndroid Build Coastguard Worker csel x11,x20,x7,lo 1358*8fb009dcSAndroid Build Coastguard Worker ldp x6,x7,[x27,#8*4] 1359*8fb009dcSAndroid Build Coastguard Worker ldp x19,x20,[x1,#8*4] 1360*8fb009dcSAndroid Build Coastguard Worker csel x12,x21,x8,lo 1361*8fb009dcSAndroid Build Coastguard Worker stp xzr,xzr,[x26,#8*2] 1362*8fb009dcSAndroid Build Coastguard Worker add x26,x26,#8*4 1363*8fb009dcSAndroid Build Coastguard Worker csel x13,x22,x9,lo 1364*8fb009dcSAndroid Build Coastguard Worker ldp x8,x9,[x27,#8*6] 1365*8fb009dcSAndroid Build Coastguard Worker ldp x21,x22,[x1,#8*6] 1366*8fb009dcSAndroid Build Coastguard Worker add x1,x1,#8*4 1367*8fb009dcSAndroid Build Coastguard Worker stp x10,x11,[x27,#8*0] 1368*8fb009dcSAndroid Build Coastguard Worker stp x12,x13,[x27,#8*2] 1369*8fb009dcSAndroid Build Coastguard Worker add x27,x27,#8*4 1370*8fb009dcSAndroid Build Coastguard Worker cbnz x28,.Lmul4x_cond_copy 1371*8fb009dcSAndroid Build Coastguard Worker 1372*8fb009dcSAndroid Build Coastguard Worker csel x10,x19,x6,lo 1373*8fb009dcSAndroid Build Coastguard Worker stp xzr,xzr,[x26,#8*0] 1374*8fb009dcSAndroid Build Coastguard Worker csel x11,x20,x7,lo 1375*8fb009dcSAndroid Build Coastguard Worker stp xzr,xzr,[x26,#8*2] 1376*8fb009dcSAndroid Build Coastguard Worker csel x12,x21,x8,lo 1377*8fb009dcSAndroid Build Coastguard Worker stp xzr,xzr,[x26,#8*3] 1378*8fb009dcSAndroid Build Coastguard Worker csel x13,x22,x9,lo 1379*8fb009dcSAndroid Build Coastguard Worker stp xzr,xzr,[x26,#8*4] 1380*8fb009dcSAndroid Build Coastguard Worker stp x10,x11,[x27,#8*0] 1381*8fb009dcSAndroid Build Coastguard Worker stp x12,x13,[x27,#8*2] 1382*8fb009dcSAndroid Build Coastguard Worker 1383*8fb009dcSAndroid Build Coastguard Worker b .Lmul4x_done 1384*8fb009dcSAndroid Build Coastguard Worker 1385*8fb009dcSAndroid Build Coastguard Worker.align 4 1386*8fb009dcSAndroid Build Coastguard Worker.Lmul4x4_post_condition: 1387*8fb009dcSAndroid Build Coastguard Worker adc x0,x0,xzr 1388*8fb009dcSAndroid Build Coastguard Worker ldr x1,[x29,#96] // pull rp 1389*8fb009dcSAndroid Build Coastguard Worker // x19-3,x0 hold result, x14-7 hold modulus 1390*8fb009dcSAndroid Build Coastguard Worker subs x6,x19,x14 1391*8fb009dcSAndroid Build Coastguard Worker ldr x30,[x29,#8] // pull return address 1392*8fb009dcSAndroid Build Coastguard Worker sbcs x7,x20,x15 1393*8fb009dcSAndroid Build Coastguard Worker stp xzr,xzr,[sp,#8*0] 1394*8fb009dcSAndroid Build Coastguard Worker sbcs x8,x21,x16 1395*8fb009dcSAndroid Build Coastguard Worker stp xzr,xzr,[sp,#8*2] 1396*8fb009dcSAndroid Build Coastguard Worker sbcs x9,x22,x17 1397*8fb009dcSAndroid Build Coastguard Worker stp xzr,xzr,[sp,#8*4] 1398*8fb009dcSAndroid Build Coastguard Worker sbcs xzr,x0,xzr // did it borrow? 1399*8fb009dcSAndroid Build Coastguard Worker stp xzr,xzr,[sp,#8*6] 1400*8fb009dcSAndroid Build Coastguard Worker 1401*8fb009dcSAndroid Build Coastguard Worker // x6-3 hold result-modulus 1402*8fb009dcSAndroid Build Coastguard Worker csel x6,x19,x6,lo 1403*8fb009dcSAndroid Build Coastguard Worker csel x7,x20,x7,lo 1404*8fb009dcSAndroid Build Coastguard Worker csel x8,x21,x8,lo 1405*8fb009dcSAndroid Build Coastguard Worker csel x9,x22,x9,lo 1406*8fb009dcSAndroid Build Coastguard Worker stp x6,x7,[x1,#8*0] 1407*8fb009dcSAndroid Build Coastguard Worker stp x8,x9,[x1,#8*2] 1408*8fb009dcSAndroid Build Coastguard Worker 1409*8fb009dcSAndroid Build Coastguard Worker.Lmul4x_done: 1410*8fb009dcSAndroid Build Coastguard Worker ldp x19,x20,[x29,#16] 1411*8fb009dcSAndroid Build Coastguard Worker mov sp,x29 1412*8fb009dcSAndroid Build Coastguard Worker ldp x21,x22,[x29,#32] 1413*8fb009dcSAndroid Build Coastguard Worker mov x0,#1 1414*8fb009dcSAndroid Build Coastguard Worker ldp x23,x24,[x29,#48] 1415*8fb009dcSAndroid Build Coastguard Worker ldp x25,x26,[x29,#64] 1416*8fb009dcSAndroid Build Coastguard Worker ldp x27,x28,[x29,#80] 1417*8fb009dcSAndroid Build Coastguard Worker ldr x29,[sp],#128 1418*8fb009dcSAndroid Build Coastguard Worker // x30 is popped earlier 1419*8fb009dcSAndroid Build Coastguard Worker AARCH64_VALIDATE_LINK_REGISTER 1420*8fb009dcSAndroid Build Coastguard Worker ret 1421*8fb009dcSAndroid Build Coastguard Worker.size __bn_mul4x_mont,.-__bn_mul4x_mont 1422*8fb009dcSAndroid Build Coastguard Worker.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 1423*8fb009dcSAndroid Build Coastguard Worker.align 2 1424*8fb009dcSAndroid Build Coastguard Worker.align 4 1425*8fb009dcSAndroid Build Coastguard Worker#endif // !OPENSSL_NO_ASM && defined(OPENSSL_AARCH64) && defined(__ELF__) 1426