1; This file is generated from a similarly-named Perl script in the BoringSSL 2; source tree. Do not edit by hand. 3 4%ifidn __OUTPUT_FORMAT__, win64 5default rel 6%define XMMWORD 7%define YMMWORD 8%define ZMMWORD 9%define _CET_ENDBR 10 11%ifdef BORINGSSL_PREFIX 12%include "boringssl_prefix_symbols_nasm.inc" 13%endif 14section .text code align=64 15 16 17 18 19global beeu_mod_inverse_vartime 20ALIGN 32 21beeu_mod_inverse_vartime: 22 mov QWORD[8+rsp],rdi ;WIN64 prologue 23 mov QWORD[16+rsp],rsi 24 mov rax,rsp 25$L$SEH_begin_beeu_mod_inverse_vartime: 26 mov rdi,rcx 27 mov rsi,rdx 28 mov rdx,r8 29 mov rcx,r9 30 mov r8,QWORD[40+rsp] 31 mov r9,QWORD[48+rsp] 32 33 34 35_CET_ENDBR 36 push rbp 37 38 push r12 39 40 push r13 41 42 push r14 43 44 push r15 45 46 push rbx 47 48 push rsi 49 50 51 sub rsp,80 52 53 mov QWORD[rsp],rdi 54 55 56 mov r8,1 57 xor r9,r9 58 xor r10,r10 59 xor r11,r11 60 xor rdi,rdi 61 62 xor r12,r12 63 xor r13,r13 64 xor r14,r14 65 xor r15,r15 66 xor rbp,rbp 67 68 69 vmovdqu xmm0,XMMWORD[rsi] 70 vmovdqu xmm1,XMMWORD[16+rsi] 71 vmovdqu XMMWORD[48+rsp],xmm0 72 vmovdqu XMMWORD[64+rsp],xmm1 73 74 vmovdqu xmm0,XMMWORD[rdx] 75 vmovdqu xmm1,XMMWORD[16+rdx] 76 vmovdqu XMMWORD[16+rsp],xmm0 77 vmovdqu XMMWORD[32+rsp],xmm1 78 79$L$beeu_loop: 80 xor rbx,rbx 81 or rbx,QWORD[48+rsp] 82 or rbx,QWORD[56+rsp] 83 or rbx,QWORD[64+rsp] 84 or rbx,QWORD[72+rsp] 85 jz NEAR $L$beeu_loop_end 86 87 88 89 90 91 92 93 94 95 96 mov rcx,1 97 98 99$L$beeu_shift_loop_XB: 100 mov rbx,rcx 101 and rbx,QWORD[48+rsp] 102 jnz NEAR $L$beeu_shift_loop_end_XB 103 104 105 mov rbx,1 106 and rbx,r8 107 jz NEAR $L$shift1_0 108 add r8,QWORD[rdx] 109 adc r9,QWORD[8+rdx] 110 adc r10,QWORD[16+rdx] 111 adc r11,QWORD[24+rdx] 112 adc rdi,0 113 114$L$shift1_0: 115 shrd r8,r9,1 116 shrd r9,r10,1 117 shrd r10,r11,1 118 shrd r11,rdi,1 119 shr rdi,1 120 121 shl rcx,1 122 123 124 125 126 127 cmp rcx,0x8000000 128 jne NEAR $L$beeu_shift_loop_XB 129 130$L$beeu_shift_loop_end_XB: 131 bsf rcx,rcx 132 test rcx,rcx 133 jz NEAR $L$beeu_no_shift_XB 134 135 136 137 mov rax,QWORD[((8+48))+rsp] 138 mov rbx,QWORD[((16+48))+rsp] 139 mov rsi,QWORD[((24+48))+rsp] 140 141 shrd QWORD[((0+48))+rsp],rax,cl 142 shrd QWORD[((8+48))+rsp],rbx,cl 143 shrd QWORD[((16+48))+rsp],rsi,cl 144 145 shr rsi,cl 146 mov QWORD[((24+48))+rsp],rsi 147 148 149$L$beeu_no_shift_XB: 150 151 mov rcx,1 152 153 154$L$beeu_shift_loop_YA: 155 mov rbx,rcx 156 and rbx,QWORD[16+rsp] 157 jnz NEAR $L$beeu_shift_loop_end_YA 158 159 160 mov rbx,1 161 and rbx,r12 162 jz NEAR $L$shift1_1 163 add r12,QWORD[rdx] 164 adc r13,QWORD[8+rdx] 165 adc r14,QWORD[16+rdx] 166 adc r15,QWORD[24+rdx] 167 adc rbp,0 168 169$L$shift1_1: 170 shrd r12,r13,1 171 shrd r13,r14,1 172 shrd r14,r15,1 173 shrd r15,rbp,1 174 shr rbp,1 175 176 shl rcx,1 177 178 179 180 181 182 cmp rcx,0x8000000 183 jne NEAR $L$beeu_shift_loop_YA 184 185$L$beeu_shift_loop_end_YA: 186 bsf rcx,rcx 187 test rcx,rcx 188 jz NEAR $L$beeu_no_shift_YA 189 190 191 192 mov rax,QWORD[((8+16))+rsp] 193 mov rbx,QWORD[((16+16))+rsp] 194 mov rsi,QWORD[((24+16))+rsp] 195 196 shrd QWORD[((0+16))+rsp],rax,cl 197 shrd QWORD[((8+16))+rsp],rbx,cl 198 shrd QWORD[((16+16))+rsp],rsi,cl 199 200 shr rsi,cl 201 mov QWORD[((24+16))+rsp],rsi 202 203 204$L$beeu_no_shift_YA: 205 206 mov rax,QWORD[48+rsp] 207 mov rbx,QWORD[56+rsp] 208 mov rsi,QWORD[64+rsp] 209 mov rcx,QWORD[72+rsp] 210 sub rax,QWORD[16+rsp] 211 sbb rbx,QWORD[24+rsp] 212 sbb rsi,QWORD[32+rsp] 213 sbb rcx,QWORD[40+rsp] 214 jnc NEAR $L$beeu_B_bigger_than_A 215 216 217 mov rax,QWORD[16+rsp] 218 mov rbx,QWORD[24+rsp] 219 mov rsi,QWORD[32+rsp] 220 mov rcx,QWORD[40+rsp] 221 sub rax,QWORD[48+rsp] 222 sbb rbx,QWORD[56+rsp] 223 sbb rsi,QWORD[64+rsp] 224 sbb rcx,QWORD[72+rsp] 225 mov QWORD[16+rsp],rax 226 mov QWORD[24+rsp],rbx 227 mov QWORD[32+rsp],rsi 228 mov QWORD[40+rsp],rcx 229 230 231 add r12,r8 232 adc r13,r9 233 adc r14,r10 234 adc r15,r11 235 adc rbp,rdi 236 jmp NEAR $L$beeu_loop 237 238$L$beeu_B_bigger_than_A: 239 240 mov QWORD[48+rsp],rax 241 mov QWORD[56+rsp],rbx 242 mov QWORD[64+rsp],rsi 243 mov QWORD[72+rsp],rcx 244 245 246 add r8,r12 247 adc r9,r13 248 adc r10,r14 249 adc r11,r15 250 adc rdi,rbp 251 252 jmp NEAR $L$beeu_loop 253 254$L$beeu_loop_end: 255 256 257 258 259 mov rbx,QWORD[16+rsp] 260 sub rbx,1 261 or rbx,QWORD[24+rsp] 262 or rbx,QWORD[32+rsp] 263 or rbx,QWORD[40+rsp] 264 265 jnz NEAR $L$beeu_err 266 267 268 269 270 mov r8,QWORD[rdx] 271 mov r9,QWORD[8+rdx] 272 mov r10,QWORD[16+rdx] 273 mov r11,QWORD[24+rdx] 274 xor rdi,rdi 275 276$L$beeu_reduction_loop: 277 mov QWORD[16+rsp],r12 278 mov QWORD[24+rsp],r13 279 mov QWORD[32+rsp],r14 280 mov QWORD[40+rsp],r15 281 mov QWORD[48+rsp],rbp 282 283 284 sub r12,r8 285 sbb r13,r9 286 sbb r14,r10 287 sbb r15,r11 288 sbb rbp,0 289 290 291 cmovc r12,QWORD[16+rsp] 292 cmovc r13,QWORD[24+rsp] 293 cmovc r14,QWORD[32+rsp] 294 cmovc r15,QWORD[40+rsp] 295 jnc NEAR $L$beeu_reduction_loop 296 297 298 sub r8,r12 299 sbb r9,r13 300 sbb r10,r14 301 sbb r11,r15 302 303$L$beeu_save: 304 305 mov rdi,QWORD[rsp] 306 307 mov QWORD[rdi],r8 308 mov QWORD[8+rdi],r9 309 mov QWORD[16+rdi],r10 310 mov QWORD[24+rdi],r11 311 312 313 mov rax,1 314 jmp NEAR $L$beeu_finish 315 316$L$beeu_err: 317 318 xor rax,rax 319 320$L$beeu_finish: 321 add rsp,80 322 323 pop rsi 324 325 pop rbx 326 327 pop r15 328 329 pop r14 330 331 pop r13 332 333 pop r12 334 335 pop rbp 336 337 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 338 mov rsi,QWORD[16+rsp] 339 ret 340 341 342$L$SEH_end_beeu_mod_inverse_vartime: 343%else 344; Work around https://bugzilla.nasm.us/show_bug.cgi?id=3392738 345ret 346%endif 347