1*f6dc9357SAndroid Build Coastguard Worker; Sha256Opt.asm -- SHA-256 optimized code for SHA-256 x86 hardware instructions 2*f6dc9357SAndroid Build Coastguard Worker; 2024-06-16 : Igor Pavlov : Public domain 3*f6dc9357SAndroid Build Coastguard Worker 4*f6dc9357SAndroid Build Coastguard Workerinclude 7zAsm.asm 5*f6dc9357SAndroid Build Coastguard Worker 6*f6dc9357SAndroid Build Coastguard WorkerMY_ASM_START 7*f6dc9357SAndroid Build Coastguard Worker 8*f6dc9357SAndroid Build Coastguard Worker; .data 9*f6dc9357SAndroid Build Coastguard Worker; public K 10*f6dc9357SAndroid Build Coastguard Worker 11*f6dc9357SAndroid Build Coastguard Worker; we can use external SHA256_K_ARRAY defined in Sha256.c 12*f6dc9357SAndroid Build Coastguard Worker; but we must guarantee that SHA256_K_ARRAY is aligned for 16-bytes 13*f6dc9357SAndroid Build Coastguard Worker 14*f6dc9357SAndroid Build Coastguard WorkerCOMMENT @ 15*f6dc9357SAndroid Build Coastguard Workerifdef x64 16*f6dc9357SAndroid Build Coastguard WorkerK_CONST equ SHA256_K_ARRAY 17*f6dc9357SAndroid Build Coastguard Workerelse 18*f6dc9357SAndroid Build Coastguard WorkerK_CONST equ _SHA256_K_ARRAY 19*f6dc9357SAndroid Build Coastguard Workerendif 20*f6dc9357SAndroid Build Coastguard WorkerEXTRN K_CONST:xmmword 21*f6dc9357SAndroid Build Coastguard Worker@ 22*f6dc9357SAndroid Build Coastguard Worker 23*f6dc9357SAndroid Build Coastguard WorkerCONST SEGMENT READONLY 24*f6dc9357SAndroid Build Coastguard Worker 25*f6dc9357SAndroid Build Coastguard Workeralign 16 26*f6dc9357SAndroid Build Coastguard WorkerReverse_Endian_Mask db 3,2,1,0, 7,6,5,4, 11,10,9,8, 15,14,13,12 27*f6dc9357SAndroid Build Coastguard Worker 28*f6dc9357SAndroid Build Coastguard Worker; COMMENT @ 29*f6dc9357SAndroid Build Coastguard Workeralign 16 30*f6dc9357SAndroid Build Coastguard WorkerK_CONST \ 31*f6dc9357SAndroid Build Coastguard WorkerDD 0428a2f98H, 071374491H, 0b5c0fbcfH, 0e9b5dba5H 32*f6dc9357SAndroid Build Coastguard WorkerDD 03956c25bH, 059f111f1H, 0923f82a4H, 0ab1c5ed5H 33*f6dc9357SAndroid Build Coastguard WorkerDD 0d807aa98H, 012835b01H, 0243185beH, 0550c7dc3H 34*f6dc9357SAndroid Build Coastguard WorkerDD 072be5d74H, 080deb1feH, 09bdc06a7H, 0c19bf174H 35*f6dc9357SAndroid Build Coastguard WorkerDD 0e49b69c1H, 0efbe4786H, 00fc19dc6H, 0240ca1ccH 36*f6dc9357SAndroid Build Coastguard WorkerDD 02de92c6fH, 04a7484aaH, 05cb0a9dcH, 076f988daH 37*f6dc9357SAndroid Build Coastguard WorkerDD 0983e5152H, 0a831c66dH, 0b00327c8H, 0bf597fc7H 38*f6dc9357SAndroid Build Coastguard WorkerDD 0c6e00bf3H, 0d5a79147H, 006ca6351H, 014292967H 39*f6dc9357SAndroid Build Coastguard WorkerDD 027b70a85H, 02e1b2138H, 04d2c6dfcH, 053380d13H 40*f6dc9357SAndroid Build Coastguard WorkerDD 0650a7354H, 0766a0abbH, 081c2c92eH, 092722c85H 41*f6dc9357SAndroid Build Coastguard WorkerDD 0a2bfe8a1H, 0a81a664bH, 0c24b8b70H, 0c76c51a3H 42*f6dc9357SAndroid Build Coastguard WorkerDD 0d192e819H, 0d6990624H, 0f40e3585H, 0106aa070H 43*f6dc9357SAndroid Build Coastguard WorkerDD 019a4c116H, 01e376c08H, 02748774cH, 034b0bcb5H 44*f6dc9357SAndroid Build Coastguard WorkerDD 0391c0cb3H, 04ed8aa4aH, 05b9cca4fH, 0682e6ff3H 45*f6dc9357SAndroid Build Coastguard WorkerDD 0748f82eeH, 078a5636fH, 084c87814H, 08cc70208H 46*f6dc9357SAndroid Build Coastguard WorkerDD 090befffaH, 0a4506cebH, 0bef9a3f7H, 0c67178f2H 47*f6dc9357SAndroid Build Coastguard Worker; @ 48*f6dc9357SAndroid Build Coastguard Worker 49*f6dc9357SAndroid Build Coastguard WorkerCONST ENDS 50*f6dc9357SAndroid Build Coastguard Worker 51*f6dc9357SAndroid Build Coastguard Worker; _TEXT$SHA256OPT SEGMENT 'CODE' 52*f6dc9357SAndroid Build Coastguard Worker 53*f6dc9357SAndroid Build Coastguard Workerifndef x64 54*f6dc9357SAndroid Build Coastguard Worker .686 55*f6dc9357SAndroid Build Coastguard Worker .xmm 56*f6dc9357SAndroid Build Coastguard Workerendif 57*f6dc9357SAndroid Build Coastguard Worker 58*f6dc9357SAndroid Build Coastguard Worker; jwasm-based assemblers for linux and linker from new versions of binutils 59*f6dc9357SAndroid Build Coastguard Worker; can generate incorrect code for load [ARRAY + offset] instructions. 60*f6dc9357SAndroid Build Coastguard Worker; 22.00: we load K_CONST offset to (rTable) register to avoid jwasm+binutils problem 61*f6dc9357SAndroid Build Coastguard Worker rTable equ r0 62*f6dc9357SAndroid Build Coastguard Worker ; rTable equ K_CONST 63*f6dc9357SAndroid Build Coastguard Worker 64*f6dc9357SAndroid Build Coastguard Workerifdef x64 65*f6dc9357SAndroid Build Coastguard Worker rNum equ REG_ABI_PARAM_2 66*f6dc9357SAndroid Build Coastguard Worker if (IS_LINUX eq 0) 67*f6dc9357SAndroid Build Coastguard Worker LOCAL_SIZE equ (16 * 2) 68*f6dc9357SAndroid Build Coastguard Worker endif 69*f6dc9357SAndroid Build Coastguard Workerelse 70*f6dc9357SAndroid Build Coastguard Worker rNum equ r3 71*f6dc9357SAndroid Build Coastguard Worker LOCAL_SIZE equ (16 * 1) 72*f6dc9357SAndroid Build Coastguard Workerendif 73*f6dc9357SAndroid Build Coastguard Worker 74*f6dc9357SAndroid Build Coastguard WorkerrState equ REG_ABI_PARAM_0 75*f6dc9357SAndroid Build Coastguard WorkerrData equ REG_ABI_PARAM_1 76*f6dc9357SAndroid Build Coastguard Worker 77*f6dc9357SAndroid Build Coastguard Worker 78*f6dc9357SAndroid Build Coastguard Worker 79*f6dc9357SAndroid Build Coastguard Worker 80*f6dc9357SAndroid Build Coastguard Worker 81*f6dc9357SAndroid Build Coastguard Worker 82*f6dc9357SAndroid Build Coastguard WorkerMY_SHA_INSTR macro cmd, a1, a2 83*f6dc9357SAndroid Build Coastguard Worker db 0fH, 038H, cmd, (0c0H + a1 * 8 + a2) 84*f6dc9357SAndroid Build Coastguard Workerendm 85*f6dc9357SAndroid Build Coastguard Worker 86*f6dc9357SAndroid Build Coastguard Workercmd_sha256rnds2 equ 0cbH 87*f6dc9357SAndroid Build Coastguard Workercmd_sha256msg1 equ 0ccH 88*f6dc9357SAndroid Build Coastguard Workercmd_sha256msg2 equ 0cdH 89*f6dc9357SAndroid Build Coastguard Worker 90*f6dc9357SAndroid Build Coastguard WorkerMY_sha256rnds2 macro a1, a2 91*f6dc9357SAndroid Build Coastguard Worker MY_SHA_INSTR cmd_sha256rnds2, a1, a2 92*f6dc9357SAndroid Build Coastguard Workerendm 93*f6dc9357SAndroid Build Coastguard Worker 94*f6dc9357SAndroid Build Coastguard WorkerMY_sha256msg1 macro a1, a2 95*f6dc9357SAndroid Build Coastguard Worker MY_SHA_INSTR cmd_sha256msg1, a1, a2 96*f6dc9357SAndroid Build Coastguard Workerendm 97*f6dc9357SAndroid Build Coastguard Worker 98*f6dc9357SAndroid Build Coastguard WorkerMY_sha256msg2 macro a1, a2 99*f6dc9357SAndroid Build Coastguard Worker MY_SHA_INSTR cmd_sha256msg2, a1, a2 100*f6dc9357SAndroid Build Coastguard Workerendm 101*f6dc9357SAndroid Build Coastguard Worker 102*f6dc9357SAndroid Build Coastguard WorkerMY_PROLOG macro 103*f6dc9357SAndroid Build Coastguard Worker ifdef x64 104*f6dc9357SAndroid Build Coastguard Worker if (IS_LINUX eq 0) 105*f6dc9357SAndroid Build Coastguard Worker movdqa [r4 + 8], xmm6 106*f6dc9357SAndroid Build Coastguard Worker movdqa [r4 + 8 + 16], xmm7 107*f6dc9357SAndroid Build Coastguard Worker sub r4, LOCAL_SIZE + 8 108*f6dc9357SAndroid Build Coastguard Worker movdqa [r4 ], xmm8 109*f6dc9357SAndroid Build Coastguard Worker movdqa [r4 + 16], xmm9 110*f6dc9357SAndroid Build Coastguard Worker endif 111*f6dc9357SAndroid Build Coastguard Worker else ; x86 112*f6dc9357SAndroid Build Coastguard Worker push r3 113*f6dc9357SAndroid Build Coastguard Worker push r5 114*f6dc9357SAndroid Build Coastguard Worker mov r5, r4 115*f6dc9357SAndroid Build Coastguard Worker NUM_PUSH_REGS equ 2 116*f6dc9357SAndroid Build Coastguard Worker PARAM_OFFSET equ (REG_SIZE * (1 + NUM_PUSH_REGS)) 117*f6dc9357SAndroid Build Coastguard Worker if (IS_CDECL gt 0) 118*f6dc9357SAndroid Build Coastguard Worker mov rState, [r4 + PARAM_OFFSET] 119*f6dc9357SAndroid Build Coastguard Worker mov rData, [r4 + PARAM_OFFSET + REG_SIZE * 1] 120*f6dc9357SAndroid Build Coastguard Worker mov rNum, [r4 + PARAM_OFFSET + REG_SIZE * 2] 121*f6dc9357SAndroid Build Coastguard Worker else ; fastcall 122*f6dc9357SAndroid Build Coastguard Worker mov rNum, [r4 + PARAM_OFFSET] 123*f6dc9357SAndroid Build Coastguard Worker endif 124*f6dc9357SAndroid Build Coastguard Worker and r4, -16 125*f6dc9357SAndroid Build Coastguard Worker sub r4, LOCAL_SIZE 126*f6dc9357SAndroid Build Coastguard Worker endif 127*f6dc9357SAndroid Build Coastguard Workerendm 128*f6dc9357SAndroid Build Coastguard Worker 129*f6dc9357SAndroid Build Coastguard WorkerMY_EPILOG macro 130*f6dc9357SAndroid Build Coastguard Worker ifdef x64 131*f6dc9357SAndroid Build Coastguard Worker if (IS_LINUX eq 0) 132*f6dc9357SAndroid Build Coastguard Worker movdqa xmm8, [r4] 133*f6dc9357SAndroid Build Coastguard Worker movdqa xmm9, [r4 + 16] 134*f6dc9357SAndroid Build Coastguard Worker add r4, LOCAL_SIZE + 8 135*f6dc9357SAndroid Build Coastguard Worker movdqa xmm6, [r4 + 8] 136*f6dc9357SAndroid Build Coastguard Worker movdqa xmm7, [r4 + 8 + 16] 137*f6dc9357SAndroid Build Coastguard Worker endif 138*f6dc9357SAndroid Build Coastguard Worker else ; x86 139*f6dc9357SAndroid Build Coastguard Worker mov r4, r5 140*f6dc9357SAndroid Build Coastguard Worker pop r5 141*f6dc9357SAndroid Build Coastguard Worker pop r3 142*f6dc9357SAndroid Build Coastguard Worker endif 143*f6dc9357SAndroid Build Coastguard Worker MY_ENDP 144*f6dc9357SAndroid Build Coastguard Workerendm 145*f6dc9357SAndroid Build Coastguard Worker 146*f6dc9357SAndroid Build Coastguard Worker 147*f6dc9357SAndroid Build Coastguard Workermsg equ xmm0 148*f6dc9357SAndroid Build Coastguard Workertmp equ xmm0 149*f6dc9357SAndroid Build Coastguard Workerstate0_N equ 2 150*f6dc9357SAndroid Build Coastguard Workerstate1_N equ 3 151*f6dc9357SAndroid Build Coastguard Workerw_regs equ 4 152*f6dc9357SAndroid Build Coastguard Worker 153*f6dc9357SAndroid Build Coastguard Worker 154*f6dc9357SAndroid Build Coastguard Workerstate1_save equ xmm1 155*f6dc9357SAndroid Build Coastguard Workerstate0 equ @CatStr(xmm, %state0_N) 156*f6dc9357SAndroid Build Coastguard Workerstate1 equ @CatStr(xmm, %state1_N) 157*f6dc9357SAndroid Build Coastguard Worker 158*f6dc9357SAndroid Build Coastguard Worker 159*f6dc9357SAndroid Build Coastguard Workerifdef x64 160*f6dc9357SAndroid Build Coastguard Worker state0_save equ xmm8 161*f6dc9357SAndroid Build Coastguard Worker mask2 equ xmm9 162*f6dc9357SAndroid Build Coastguard Workerelse 163*f6dc9357SAndroid Build Coastguard Worker state0_save equ [r4] 164*f6dc9357SAndroid Build Coastguard Worker mask2 equ xmm0 165*f6dc9357SAndroid Build Coastguard Workerendif 166*f6dc9357SAndroid Build Coastguard Worker 167*f6dc9357SAndroid Build Coastguard WorkerLOAD_MASK macro 168*f6dc9357SAndroid Build Coastguard Worker movdqa mask2, XMMWORD PTR Reverse_Endian_Mask 169*f6dc9357SAndroid Build Coastguard Workerendm 170*f6dc9357SAndroid Build Coastguard Worker 171*f6dc9357SAndroid Build Coastguard WorkerLOAD_W macro k:req 172*f6dc9357SAndroid Build Coastguard Worker movdqu @CatStr(xmm, %(w_regs + k)), [rData + (16 * (k))] 173*f6dc9357SAndroid Build Coastguard Worker pshufb @CatStr(xmm, %(w_regs + k)), mask2 174*f6dc9357SAndroid Build Coastguard Workerendm 175*f6dc9357SAndroid Build Coastguard Worker 176*f6dc9357SAndroid Build Coastguard Worker 177*f6dc9357SAndroid Build Coastguard Worker; pre1 <= 4 && pre2 >= 1 && pre1 > pre2 && (pre1 - pre2) <= 1 178*f6dc9357SAndroid Build Coastguard Workerpre1 equ 3 179*f6dc9357SAndroid Build Coastguard Workerpre2 equ 2 180*f6dc9357SAndroid Build Coastguard Worker 181*f6dc9357SAndroid Build Coastguard Worker 182*f6dc9357SAndroid Build Coastguard Worker 183*f6dc9357SAndroid Build Coastguard WorkerRND4 macro k 184*f6dc9357SAndroid Build Coastguard Worker movdqa msg, xmmword ptr [rTable + (k) * 16] 185*f6dc9357SAndroid Build Coastguard Worker paddd msg, @CatStr(xmm, %(w_regs + ((k + 0) mod 4))) 186*f6dc9357SAndroid Build Coastguard Worker MY_sha256rnds2 state0_N, state1_N 187*f6dc9357SAndroid Build Coastguard Worker pshufd msg, msg, 0eH 188*f6dc9357SAndroid Build Coastguard Worker 189*f6dc9357SAndroid Build Coastguard Worker if (k GE (4 - pre1)) AND (k LT (16 - pre1)) 190*f6dc9357SAndroid Build Coastguard Worker ; w4[0] = msg1(w4[-4], w4[-3]) 191*f6dc9357SAndroid Build Coastguard Worker MY_sha256msg1 (w_regs + ((k + pre1) mod 4)), (w_regs + ((k + pre1 - 3) mod 4)) 192*f6dc9357SAndroid Build Coastguard Worker endif 193*f6dc9357SAndroid Build Coastguard Worker 194*f6dc9357SAndroid Build Coastguard Worker MY_sha256rnds2 state1_N, state0_N 195*f6dc9357SAndroid Build Coastguard Worker 196*f6dc9357SAndroid Build Coastguard Worker if (k GE (4 - pre2)) AND (k LT (16 - pre2)) 197*f6dc9357SAndroid Build Coastguard Worker movdqa tmp, @CatStr(xmm, %(w_regs + ((k + pre2 - 1) mod 4))) 198*f6dc9357SAndroid Build Coastguard Worker palignr tmp, @CatStr(xmm, %(w_regs + ((k + pre2 - 2) mod 4))), 4 199*f6dc9357SAndroid Build Coastguard Worker paddd @CatStr(xmm, %(w_regs + ((k + pre2) mod 4))), tmp 200*f6dc9357SAndroid Build Coastguard Worker ; w4[0] = msg2(w4[0], w4[-1]) 201*f6dc9357SAndroid Build Coastguard Worker MY_sha256msg2 %(w_regs + ((k + pre2) mod 4)), %(w_regs + ((k + pre2 - 1) mod 4)) 202*f6dc9357SAndroid Build Coastguard Worker endif 203*f6dc9357SAndroid Build Coastguard Workerendm 204*f6dc9357SAndroid Build Coastguard Worker 205*f6dc9357SAndroid Build Coastguard Worker 206*f6dc9357SAndroid Build Coastguard Worker 207*f6dc9357SAndroid Build Coastguard Worker 208*f6dc9357SAndroid Build Coastguard Worker 209*f6dc9357SAndroid Build Coastguard WorkerREVERSE_STATE macro 210*f6dc9357SAndroid Build Coastguard Worker ; state0 ; dcba 211*f6dc9357SAndroid Build Coastguard Worker ; state1 ; hgfe 212*f6dc9357SAndroid Build Coastguard Worker pshufd tmp, state0, 01bH ; abcd 213*f6dc9357SAndroid Build Coastguard Worker pshufd state0, state1, 01bH ; efgh 214*f6dc9357SAndroid Build Coastguard Worker movdqa state1, state0 ; efgh 215*f6dc9357SAndroid Build Coastguard Worker punpcklqdq state0, tmp ; cdgh 216*f6dc9357SAndroid Build Coastguard Worker punpckhqdq state1, tmp ; abef 217*f6dc9357SAndroid Build Coastguard Workerendm 218*f6dc9357SAndroid Build Coastguard Worker 219*f6dc9357SAndroid Build Coastguard Worker 220*f6dc9357SAndroid Build Coastguard WorkerMY_PROC Sha256_UpdateBlocks_HW, 3 221*f6dc9357SAndroid Build Coastguard Worker MY_PROLOG 222*f6dc9357SAndroid Build Coastguard Worker 223*f6dc9357SAndroid Build Coastguard Worker lea rTable, [K_CONST] 224*f6dc9357SAndroid Build Coastguard Worker 225*f6dc9357SAndroid Build Coastguard Worker cmp rNum, 0 226*f6dc9357SAndroid Build Coastguard Worker je end_c 227*f6dc9357SAndroid Build Coastguard Worker 228*f6dc9357SAndroid Build Coastguard Worker movdqu state0, [rState] ; dcba 229*f6dc9357SAndroid Build Coastguard Worker movdqu state1, [rState + 16] ; hgfe 230*f6dc9357SAndroid Build Coastguard Worker 231*f6dc9357SAndroid Build Coastguard Worker REVERSE_STATE 232*f6dc9357SAndroid Build Coastguard Worker 233*f6dc9357SAndroid Build Coastguard Worker ifdef x64 234*f6dc9357SAndroid Build Coastguard Worker LOAD_MASK 235*f6dc9357SAndroid Build Coastguard Worker endif 236*f6dc9357SAndroid Build Coastguard Worker 237*f6dc9357SAndroid Build Coastguard Worker align 16 238*f6dc9357SAndroid Build Coastguard Worker nextBlock: 239*f6dc9357SAndroid Build Coastguard Worker movdqa state0_save, state0 240*f6dc9357SAndroid Build Coastguard Worker movdqa state1_save, state1 241*f6dc9357SAndroid Build Coastguard Worker 242*f6dc9357SAndroid Build Coastguard Worker ifndef x64 243*f6dc9357SAndroid Build Coastguard Worker LOAD_MASK 244*f6dc9357SAndroid Build Coastguard Worker endif 245*f6dc9357SAndroid Build Coastguard Worker 246*f6dc9357SAndroid Build Coastguard Worker LOAD_W 0 247*f6dc9357SAndroid Build Coastguard Worker LOAD_W 1 248*f6dc9357SAndroid Build Coastguard Worker LOAD_W 2 249*f6dc9357SAndroid Build Coastguard Worker LOAD_W 3 250*f6dc9357SAndroid Build Coastguard Worker 251*f6dc9357SAndroid Build Coastguard Worker 252*f6dc9357SAndroid Build Coastguard Worker k = 0 253*f6dc9357SAndroid Build Coastguard Worker rept 16 254*f6dc9357SAndroid Build Coastguard Worker RND4 k 255*f6dc9357SAndroid Build Coastguard Worker k = k + 1 256*f6dc9357SAndroid Build Coastguard Worker endm 257*f6dc9357SAndroid Build Coastguard Worker 258*f6dc9357SAndroid Build Coastguard Worker paddd state0, state0_save 259*f6dc9357SAndroid Build Coastguard Worker paddd state1, state1_save 260*f6dc9357SAndroid Build Coastguard Worker 261*f6dc9357SAndroid Build Coastguard Worker add rData, 64 262*f6dc9357SAndroid Build Coastguard Worker sub rNum, 1 263*f6dc9357SAndroid Build Coastguard Worker jnz nextBlock 264*f6dc9357SAndroid Build Coastguard Worker 265*f6dc9357SAndroid Build Coastguard Worker REVERSE_STATE 266*f6dc9357SAndroid Build Coastguard Worker 267*f6dc9357SAndroid Build Coastguard Worker movdqu [rState], state0 268*f6dc9357SAndroid Build Coastguard Worker movdqu [rState + 16], state1 269*f6dc9357SAndroid Build Coastguard Worker 270*f6dc9357SAndroid Build Coastguard Worker end_c: 271*f6dc9357SAndroid Build Coastguard WorkerMY_EPILOG 272*f6dc9357SAndroid Build Coastguard Worker 273*f6dc9357SAndroid Build Coastguard Worker; _TEXT$SHA256OPT ENDS 274*f6dc9357SAndroid Build Coastguard Worker 275*f6dc9357SAndroid Build Coastguard Workerend 276