1*62c56f98SSadaf Ebrahimi /** 2*62c56f98SSadaf Ebrahimi * \file bn_mul.h 3*62c56f98SSadaf Ebrahimi * 4*62c56f98SSadaf Ebrahimi * \brief Multi-precision integer library 5*62c56f98SSadaf Ebrahimi */ 6*62c56f98SSadaf Ebrahimi /* 7*62c56f98SSadaf Ebrahimi * Copyright The Mbed TLS Contributors 8*62c56f98SSadaf Ebrahimi * SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later 9*62c56f98SSadaf Ebrahimi */ 10*62c56f98SSadaf Ebrahimi /* 11*62c56f98SSadaf Ebrahimi * Multiply source vector [s] with b, add result 12*62c56f98SSadaf Ebrahimi * to destination vector [d] and set carry c. 13*62c56f98SSadaf Ebrahimi * 14*62c56f98SSadaf Ebrahimi * Currently supports: 15*62c56f98SSadaf Ebrahimi * 16*62c56f98SSadaf Ebrahimi * . IA-32 (386+) . AMD64 / EM64T 17*62c56f98SSadaf Ebrahimi * . IA-32 (SSE2) . Motorola 68000 18*62c56f98SSadaf Ebrahimi * . PowerPC, 32-bit . MicroBlaze 19*62c56f98SSadaf Ebrahimi * . PowerPC, 64-bit . TriCore 20*62c56f98SSadaf Ebrahimi * . SPARC v8 . ARM v3+ 21*62c56f98SSadaf Ebrahimi * . Alpha . MIPS32 22*62c56f98SSadaf Ebrahimi * . C, longlong . C, generic 23*62c56f98SSadaf Ebrahimi */ 24*62c56f98SSadaf Ebrahimi #ifndef MBEDTLS_BN_MUL_H 25*62c56f98SSadaf Ebrahimi #define MBEDTLS_BN_MUL_H 26*62c56f98SSadaf Ebrahimi 27*62c56f98SSadaf Ebrahimi #include "mbedtls/build_info.h" 28*62c56f98SSadaf Ebrahimi 29*62c56f98SSadaf Ebrahimi #include "mbedtls/bignum.h" 30*62c56f98SSadaf Ebrahimi 31*62c56f98SSadaf Ebrahimi 32*62c56f98SSadaf Ebrahimi /* 33*62c56f98SSadaf Ebrahimi * Conversion macros for embedded constants: 34*62c56f98SSadaf Ebrahimi * build lists of mbedtls_mpi_uint's from lists of unsigned char's grouped by 8, 4 or 2 35*62c56f98SSadaf Ebrahimi */ 36*62c56f98SSadaf Ebrahimi #if defined(MBEDTLS_HAVE_INT32) 37*62c56f98SSadaf Ebrahimi 38*62c56f98SSadaf Ebrahimi #define MBEDTLS_BYTES_TO_T_UINT_4(a, b, c, d) \ 39*62c56f98SSadaf Ebrahimi ((mbedtls_mpi_uint) (a) << 0) | \ 40*62c56f98SSadaf Ebrahimi ((mbedtls_mpi_uint) (b) << 8) | \ 41*62c56f98SSadaf Ebrahimi ((mbedtls_mpi_uint) (c) << 16) | \ 42*62c56f98SSadaf Ebrahimi ((mbedtls_mpi_uint) (d) << 24) 43*62c56f98SSadaf Ebrahimi 44*62c56f98SSadaf Ebrahimi #define MBEDTLS_BYTES_TO_T_UINT_2(a, b) \ 45*62c56f98SSadaf Ebrahimi MBEDTLS_BYTES_TO_T_UINT_4(a, b, 0, 0) 46*62c56f98SSadaf Ebrahimi 47*62c56f98SSadaf Ebrahimi #define MBEDTLS_BYTES_TO_T_UINT_8(a, b, c, d, e, f, g, h) \ 48*62c56f98SSadaf Ebrahimi MBEDTLS_BYTES_TO_T_UINT_4(a, b, c, d), \ 49*62c56f98SSadaf Ebrahimi MBEDTLS_BYTES_TO_T_UINT_4(e, f, g, h) 50*62c56f98SSadaf Ebrahimi 51*62c56f98SSadaf Ebrahimi #else /* 64-bits */ 52*62c56f98SSadaf Ebrahimi 53*62c56f98SSadaf Ebrahimi #define MBEDTLS_BYTES_TO_T_UINT_8(a, b, c, d, e, f, g, h) \ 54*62c56f98SSadaf Ebrahimi ((mbedtls_mpi_uint) (a) << 0) | \ 55*62c56f98SSadaf Ebrahimi ((mbedtls_mpi_uint) (b) << 8) | \ 56*62c56f98SSadaf Ebrahimi ((mbedtls_mpi_uint) (c) << 16) | \ 57*62c56f98SSadaf Ebrahimi ((mbedtls_mpi_uint) (d) << 24) | \ 58*62c56f98SSadaf Ebrahimi ((mbedtls_mpi_uint) (e) << 32) | \ 59*62c56f98SSadaf Ebrahimi ((mbedtls_mpi_uint) (f) << 40) | \ 60*62c56f98SSadaf Ebrahimi ((mbedtls_mpi_uint) (g) << 48) | \ 61*62c56f98SSadaf Ebrahimi ((mbedtls_mpi_uint) (h) << 56) 62*62c56f98SSadaf Ebrahimi 63*62c56f98SSadaf Ebrahimi #define MBEDTLS_BYTES_TO_T_UINT_4(a, b, c, d) \ 64*62c56f98SSadaf Ebrahimi MBEDTLS_BYTES_TO_T_UINT_8(a, b, c, d, 0, 0, 0, 0) 65*62c56f98SSadaf Ebrahimi 66*62c56f98SSadaf Ebrahimi #define MBEDTLS_BYTES_TO_T_UINT_2(a, b) \ 67*62c56f98SSadaf Ebrahimi MBEDTLS_BYTES_TO_T_UINT_8(a, b, 0, 0, 0, 0, 0, 0) 68*62c56f98SSadaf Ebrahimi 69*62c56f98SSadaf Ebrahimi #endif /* bits in mbedtls_mpi_uint */ 70*62c56f98SSadaf Ebrahimi 71*62c56f98SSadaf Ebrahimi /* *INDENT-OFF* */ 72*62c56f98SSadaf Ebrahimi #if defined(MBEDTLS_HAVE_ASM) 73*62c56f98SSadaf Ebrahimi 74*62c56f98SSadaf Ebrahimi /* armcc5 --gnu defines __GNUC__ but doesn't support GNU's extended asm */ 75*62c56f98SSadaf Ebrahimi #if defined(__GNUC__) && \ 76*62c56f98SSadaf Ebrahimi ( !defined(__ARMCC_VERSION) || __ARMCC_VERSION >= 6000000 ) 77*62c56f98SSadaf Ebrahimi 78*62c56f98SSadaf Ebrahimi /* 79*62c56f98SSadaf Ebrahimi * GCC < 5.0 treated the x86 ebx (which is used for the GOT) as a 80*62c56f98SSadaf Ebrahimi * fixed reserved register when building as PIC, leading to errors 81*62c56f98SSadaf Ebrahimi * like: bn_mul.h:46:13: error: PIC register clobbered by 'ebx' in 'asm' 82*62c56f98SSadaf Ebrahimi * 83*62c56f98SSadaf Ebrahimi * This is fixed by an improved register allocator in GCC 5+. From the 84*62c56f98SSadaf Ebrahimi * release notes: 85*62c56f98SSadaf Ebrahimi * Register allocation improvements: Reuse of the PIC hard register, 86*62c56f98SSadaf Ebrahimi * instead of using a fixed register, was implemented on x86/x86-64 87*62c56f98SSadaf Ebrahimi * targets. This improves generated PIC code performance as more hard 88*62c56f98SSadaf Ebrahimi * registers can be used. 89*62c56f98SSadaf Ebrahimi */ 90*62c56f98SSadaf Ebrahimi #if defined(__GNUC__) && __GNUC__ < 5 && defined(__PIC__) 91*62c56f98SSadaf Ebrahimi #define MULADDC_CANNOT_USE_EBX 92*62c56f98SSadaf Ebrahimi #endif 93*62c56f98SSadaf Ebrahimi 94*62c56f98SSadaf Ebrahimi /* 95*62c56f98SSadaf Ebrahimi * Disable use of the i386 assembly code below if option -O0, to disable all 96*62c56f98SSadaf Ebrahimi * compiler optimisations, is passed, detected with __OPTIMIZE__ 97*62c56f98SSadaf Ebrahimi * This is done as the number of registers used in the assembly code doesn't 98*62c56f98SSadaf Ebrahimi * work with the -O0 option. 99*62c56f98SSadaf Ebrahimi */ 100*62c56f98SSadaf Ebrahimi #if defined(__i386__) && defined(__OPTIMIZE__) && !defined(MULADDC_CANNOT_USE_EBX) 101*62c56f98SSadaf Ebrahimi 102*62c56f98SSadaf Ebrahimi #define MULADDC_X1_INIT \ 103*62c56f98SSadaf Ebrahimi { mbedtls_mpi_uint t; \ 104*62c56f98SSadaf Ebrahimi asm( \ 105*62c56f98SSadaf Ebrahimi "movl %%ebx, %0 \n\t" \ 106*62c56f98SSadaf Ebrahimi "movl %5, %%esi \n\t" \ 107*62c56f98SSadaf Ebrahimi "movl %6, %%edi \n\t" \ 108*62c56f98SSadaf Ebrahimi "movl %7, %%ecx \n\t" \ 109*62c56f98SSadaf Ebrahimi "movl %8, %%ebx \n\t" 110*62c56f98SSadaf Ebrahimi 111*62c56f98SSadaf Ebrahimi #define MULADDC_X1_CORE \ 112*62c56f98SSadaf Ebrahimi "lodsl \n\t" \ 113*62c56f98SSadaf Ebrahimi "mull %%ebx \n\t" \ 114*62c56f98SSadaf Ebrahimi "addl %%ecx, %%eax \n\t" \ 115*62c56f98SSadaf Ebrahimi "adcl $0, %%edx \n\t" \ 116*62c56f98SSadaf Ebrahimi "addl (%%edi), %%eax \n\t" \ 117*62c56f98SSadaf Ebrahimi "adcl $0, %%edx \n\t" \ 118*62c56f98SSadaf Ebrahimi "movl %%edx, %%ecx \n\t" \ 119*62c56f98SSadaf Ebrahimi "stosl \n\t" 120*62c56f98SSadaf Ebrahimi 121*62c56f98SSadaf Ebrahimi #define MULADDC_X1_STOP \ 122*62c56f98SSadaf Ebrahimi "movl %4, %%ebx \n\t" \ 123*62c56f98SSadaf Ebrahimi "movl %%ecx, %1 \n\t" \ 124*62c56f98SSadaf Ebrahimi "movl %%edi, %2 \n\t" \ 125*62c56f98SSadaf Ebrahimi "movl %%esi, %3 \n\t" \ 126*62c56f98SSadaf Ebrahimi : "=m" (t), "=m" (c), "=m" (d), "=m" (s) \ 127*62c56f98SSadaf Ebrahimi : "m" (t), "m" (s), "m" (d), "m" (c), "m" (b) \ 128*62c56f98SSadaf Ebrahimi : "eax", "ebx", "ecx", "edx", "esi", "edi" \ 129*62c56f98SSadaf Ebrahimi ); } 130*62c56f98SSadaf Ebrahimi 131*62c56f98SSadaf Ebrahimi #if defined(MBEDTLS_HAVE_SSE2) 132*62c56f98SSadaf Ebrahimi 133*62c56f98SSadaf Ebrahimi #define MULADDC_X8_INIT MULADDC_X1_INIT 134*62c56f98SSadaf Ebrahimi 135*62c56f98SSadaf Ebrahimi #define MULADDC_X8_CORE \ 136*62c56f98SSadaf Ebrahimi "movd %%ecx, %%mm1 \n\t" \ 137*62c56f98SSadaf Ebrahimi "movd %%ebx, %%mm0 \n\t" \ 138*62c56f98SSadaf Ebrahimi "movd (%%edi), %%mm3 \n\t" \ 139*62c56f98SSadaf Ebrahimi "paddq %%mm3, %%mm1 \n\t" \ 140*62c56f98SSadaf Ebrahimi "movd (%%esi), %%mm2 \n\t" \ 141*62c56f98SSadaf Ebrahimi "pmuludq %%mm0, %%mm2 \n\t" \ 142*62c56f98SSadaf Ebrahimi "movd 4(%%esi), %%mm4 \n\t" \ 143*62c56f98SSadaf Ebrahimi "pmuludq %%mm0, %%mm4 \n\t" \ 144*62c56f98SSadaf Ebrahimi "movd 8(%%esi), %%mm6 \n\t" \ 145*62c56f98SSadaf Ebrahimi "pmuludq %%mm0, %%mm6 \n\t" \ 146*62c56f98SSadaf Ebrahimi "movd 12(%%esi), %%mm7 \n\t" \ 147*62c56f98SSadaf Ebrahimi "pmuludq %%mm0, %%mm7 \n\t" \ 148*62c56f98SSadaf Ebrahimi "paddq %%mm2, %%mm1 \n\t" \ 149*62c56f98SSadaf Ebrahimi "movd 4(%%edi), %%mm3 \n\t" \ 150*62c56f98SSadaf Ebrahimi "paddq %%mm4, %%mm3 \n\t" \ 151*62c56f98SSadaf Ebrahimi "movd 8(%%edi), %%mm5 \n\t" \ 152*62c56f98SSadaf Ebrahimi "paddq %%mm6, %%mm5 \n\t" \ 153*62c56f98SSadaf Ebrahimi "movd 12(%%edi), %%mm4 \n\t" \ 154*62c56f98SSadaf Ebrahimi "paddq %%mm4, %%mm7 \n\t" \ 155*62c56f98SSadaf Ebrahimi "movd %%mm1, (%%edi) \n\t" \ 156*62c56f98SSadaf Ebrahimi "movd 16(%%esi), %%mm2 \n\t" \ 157*62c56f98SSadaf Ebrahimi "pmuludq %%mm0, %%mm2 \n\t" \ 158*62c56f98SSadaf Ebrahimi "psrlq $32, %%mm1 \n\t" \ 159*62c56f98SSadaf Ebrahimi "movd 20(%%esi), %%mm4 \n\t" \ 160*62c56f98SSadaf Ebrahimi "pmuludq %%mm0, %%mm4 \n\t" \ 161*62c56f98SSadaf Ebrahimi "paddq %%mm3, %%mm1 \n\t" \ 162*62c56f98SSadaf Ebrahimi "movd 24(%%esi), %%mm6 \n\t" \ 163*62c56f98SSadaf Ebrahimi "pmuludq %%mm0, %%mm6 \n\t" \ 164*62c56f98SSadaf Ebrahimi "movd %%mm1, 4(%%edi) \n\t" \ 165*62c56f98SSadaf Ebrahimi "psrlq $32, %%mm1 \n\t" \ 166*62c56f98SSadaf Ebrahimi "movd 28(%%esi), %%mm3 \n\t" \ 167*62c56f98SSadaf Ebrahimi "pmuludq %%mm0, %%mm3 \n\t" \ 168*62c56f98SSadaf Ebrahimi "paddq %%mm5, %%mm1 \n\t" \ 169*62c56f98SSadaf Ebrahimi "movd 16(%%edi), %%mm5 \n\t" \ 170*62c56f98SSadaf Ebrahimi "paddq %%mm5, %%mm2 \n\t" \ 171*62c56f98SSadaf Ebrahimi "movd %%mm1, 8(%%edi) \n\t" \ 172*62c56f98SSadaf Ebrahimi "psrlq $32, %%mm1 \n\t" \ 173*62c56f98SSadaf Ebrahimi "paddq %%mm7, %%mm1 \n\t" \ 174*62c56f98SSadaf Ebrahimi "movd 20(%%edi), %%mm5 \n\t" \ 175*62c56f98SSadaf Ebrahimi "paddq %%mm5, %%mm4 \n\t" \ 176*62c56f98SSadaf Ebrahimi "movd %%mm1, 12(%%edi) \n\t" \ 177*62c56f98SSadaf Ebrahimi "psrlq $32, %%mm1 \n\t" \ 178*62c56f98SSadaf Ebrahimi "paddq %%mm2, %%mm1 \n\t" \ 179*62c56f98SSadaf Ebrahimi "movd 24(%%edi), %%mm5 \n\t" \ 180*62c56f98SSadaf Ebrahimi "paddq %%mm5, %%mm6 \n\t" \ 181*62c56f98SSadaf Ebrahimi "movd %%mm1, 16(%%edi) \n\t" \ 182*62c56f98SSadaf Ebrahimi "psrlq $32, %%mm1 \n\t" \ 183*62c56f98SSadaf Ebrahimi "paddq %%mm4, %%mm1 \n\t" \ 184*62c56f98SSadaf Ebrahimi "movd 28(%%edi), %%mm5 \n\t" \ 185*62c56f98SSadaf Ebrahimi "paddq %%mm5, %%mm3 \n\t" \ 186*62c56f98SSadaf Ebrahimi "movd %%mm1, 20(%%edi) \n\t" \ 187*62c56f98SSadaf Ebrahimi "psrlq $32, %%mm1 \n\t" \ 188*62c56f98SSadaf Ebrahimi "paddq %%mm6, %%mm1 \n\t" \ 189*62c56f98SSadaf Ebrahimi "movd %%mm1, 24(%%edi) \n\t" \ 190*62c56f98SSadaf Ebrahimi "psrlq $32, %%mm1 \n\t" \ 191*62c56f98SSadaf Ebrahimi "paddq %%mm3, %%mm1 \n\t" \ 192*62c56f98SSadaf Ebrahimi "movd %%mm1, 28(%%edi) \n\t" \ 193*62c56f98SSadaf Ebrahimi "addl $32, %%edi \n\t" \ 194*62c56f98SSadaf Ebrahimi "addl $32, %%esi \n\t" \ 195*62c56f98SSadaf Ebrahimi "psrlq $32, %%mm1 \n\t" \ 196*62c56f98SSadaf Ebrahimi "movd %%mm1, %%ecx \n\t" 197*62c56f98SSadaf Ebrahimi 198*62c56f98SSadaf Ebrahimi #define MULADDC_X8_STOP \ 199*62c56f98SSadaf Ebrahimi "emms \n\t" \ 200*62c56f98SSadaf Ebrahimi "movl %4, %%ebx \n\t" \ 201*62c56f98SSadaf Ebrahimi "movl %%ecx, %1 \n\t" \ 202*62c56f98SSadaf Ebrahimi "movl %%edi, %2 \n\t" \ 203*62c56f98SSadaf Ebrahimi "movl %%esi, %3 \n\t" \ 204*62c56f98SSadaf Ebrahimi : "=m" (t), "=m" (c), "=m" (d), "=m" (s) \ 205*62c56f98SSadaf Ebrahimi : "m" (t), "m" (s), "m" (d), "m" (c), "m" (b) \ 206*62c56f98SSadaf Ebrahimi : "eax", "ebx", "ecx", "edx", "esi", "edi" \ 207*62c56f98SSadaf Ebrahimi ); } \ 208*62c56f98SSadaf Ebrahimi 209*62c56f98SSadaf Ebrahimi #endif /* SSE2 */ 210*62c56f98SSadaf Ebrahimi 211*62c56f98SSadaf Ebrahimi #endif /* i386 */ 212*62c56f98SSadaf Ebrahimi 213*62c56f98SSadaf Ebrahimi #if defined(__amd64__) || defined (__x86_64__) 214*62c56f98SSadaf Ebrahimi 215*62c56f98SSadaf Ebrahimi #define MULADDC_X1_INIT \ 216*62c56f98SSadaf Ebrahimi asm( \ 217*62c56f98SSadaf Ebrahimi "xorq %%r8, %%r8\n" 218*62c56f98SSadaf Ebrahimi 219*62c56f98SSadaf Ebrahimi #define MULADDC_X1_CORE \ 220*62c56f98SSadaf Ebrahimi "movq (%%rsi), %%rax\n" \ 221*62c56f98SSadaf Ebrahimi "mulq %%rbx\n" \ 222*62c56f98SSadaf Ebrahimi "addq $8, %%rsi\n" \ 223*62c56f98SSadaf Ebrahimi "addq %%rcx, %%rax\n" \ 224*62c56f98SSadaf Ebrahimi "movq %%r8, %%rcx\n" \ 225*62c56f98SSadaf Ebrahimi "adcq $0, %%rdx\n" \ 226*62c56f98SSadaf Ebrahimi "nop \n" \ 227*62c56f98SSadaf Ebrahimi "addq %%rax, (%%rdi)\n" \ 228*62c56f98SSadaf Ebrahimi "adcq %%rdx, %%rcx\n" \ 229*62c56f98SSadaf Ebrahimi "addq $8, %%rdi\n" 230*62c56f98SSadaf Ebrahimi 231*62c56f98SSadaf Ebrahimi #define MULADDC_X1_STOP \ 232*62c56f98SSadaf Ebrahimi : "+c" (c), "+D" (d), "+S" (s), "+m" (*(uint64_t (*)[16]) d) \ 233*62c56f98SSadaf Ebrahimi : "b" (b), "m" (*(const uint64_t (*)[16]) s) \ 234*62c56f98SSadaf Ebrahimi : "rax", "rdx", "r8" \ 235*62c56f98SSadaf Ebrahimi ); 236*62c56f98SSadaf Ebrahimi 237*62c56f98SSadaf Ebrahimi #endif /* AMD64 */ 238*62c56f98SSadaf Ebrahimi 239*62c56f98SSadaf Ebrahimi // The following assembly code assumes that a pointer will fit in a 64-bit register 240*62c56f98SSadaf Ebrahimi // (including ILP32 __aarch64__ ABIs such as on watchOS, hence the 2^32 - 1) 241*62c56f98SSadaf Ebrahimi #if defined(__aarch64__) && (UINTPTR_MAX == 0xfffffffful || UINTPTR_MAX == 0xfffffffffffffffful) 242*62c56f98SSadaf Ebrahimi 243*62c56f98SSadaf Ebrahimi /* 244*62c56f98SSadaf Ebrahimi * There are some issues around different compilers requiring different constraint 245*62c56f98SSadaf Ebrahimi * syntax for updating pointers from assembly code (see notes for 246*62c56f98SSadaf Ebrahimi * MBEDTLS_ASM_AARCH64_PTR_CONSTRAINT in common.h), especially on aarch64_32 (aka ILP32). 247*62c56f98SSadaf Ebrahimi * 248*62c56f98SSadaf Ebrahimi * For this reason we cast the pointers to/from uintptr_t here. 249*62c56f98SSadaf Ebrahimi */ 250*62c56f98SSadaf Ebrahimi #define MULADDC_X1_INIT \ 251*62c56f98SSadaf Ebrahimi do { uintptr_t muladdc_d = (uintptr_t) d, muladdc_s = (uintptr_t) s; asm( 252*62c56f98SSadaf Ebrahimi 253*62c56f98SSadaf Ebrahimi #define MULADDC_X1_CORE \ 254*62c56f98SSadaf Ebrahimi "ldr x4, [%x2], #8 \n\t" \ 255*62c56f98SSadaf Ebrahimi "ldr x5, [%x1] \n\t" \ 256*62c56f98SSadaf Ebrahimi "mul x6, x4, %4 \n\t" \ 257*62c56f98SSadaf Ebrahimi "umulh x7, x4, %4 \n\t" \ 258*62c56f98SSadaf Ebrahimi "adds x5, x5, x6 \n\t" \ 259*62c56f98SSadaf Ebrahimi "adc x7, x7, xzr \n\t" \ 260*62c56f98SSadaf Ebrahimi "adds x5, x5, %0 \n\t" \ 261*62c56f98SSadaf Ebrahimi "adc %0, x7, xzr \n\t" \ 262*62c56f98SSadaf Ebrahimi "str x5, [%x1], #8 \n\t" 263*62c56f98SSadaf Ebrahimi 264*62c56f98SSadaf Ebrahimi #define MULADDC_X1_STOP \ 265*62c56f98SSadaf Ebrahimi : "+r" (c), \ 266*62c56f98SSadaf Ebrahimi "+r" (muladdc_d), \ 267*62c56f98SSadaf Ebrahimi "+r" (muladdc_s), \ 268*62c56f98SSadaf Ebrahimi "+m" (*(uint64_t (*)[16]) d) \ 269*62c56f98SSadaf Ebrahimi : "r" (b), "m" (*(const uint64_t (*)[16]) s) \ 270*62c56f98SSadaf Ebrahimi : "x4", "x5", "x6", "x7", "cc" \ 271*62c56f98SSadaf Ebrahimi ); d = (mbedtls_mpi_uint *)muladdc_d; s = (mbedtls_mpi_uint *)muladdc_s; } while (0); 272*62c56f98SSadaf Ebrahimi 273*62c56f98SSadaf Ebrahimi #endif /* Aarch64 */ 274*62c56f98SSadaf Ebrahimi 275*62c56f98SSadaf Ebrahimi #if defined(__mc68020__) || defined(__mcpu32__) 276*62c56f98SSadaf Ebrahimi 277*62c56f98SSadaf Ebrahimi #define MULADDC_X1_INIT \ 278*62c56f98SSadaf Ebrahimi asm( \ 279*62c56f98SSadaf Ebrahimi "movl %3, %%a2 \n\t" \ 280*62c56f98SSadaf Ebrahimi "movl %4, %%a3 \n\t" \ 281*62c56f98SSadaf Ebrahimi "movl %5, %%d3 \n\t" \ 282*62c56f98SSadaf Ebrahimi "movl %6, %%d2 \n\t" \ 283*62c56f98SSadaf Ebrahimi "moveq #0, %%d0 \n\t" 284*62c56f98SSadaf Ebrahimi 285*62c56f98SSadaf Ebrahimi #define MULADDC_X1_CORE \ 286*62c56f98SSadaf Ebrahimi "movel %%a2@+, %%d1 \n\t" \ 287*62c56f98SSadaf Ebrahimi "mulul %%d2, %%d4:%%d1 \n\t" \ 288*62c56f98SSadaf Ebrahimi "addl %%d3, %%d1 \n\t" \ 289*62c56f98SSadaf Ebrahimi "addxl %%d0, %%d4 \n\t" \ 290*62c56f98SSadaf Ebrahimi "moveq #0, %%d3 \n\t" \ 291*62c56f98SSadaf Ebrahimi "addl %%d1, %%a3@+ \n\t" \ 292*62c56f98SSadaf Ebrahimi "addxl %%d4, %%d3 \n\t" 293*62c56f98SSadaf Ebrahimi 294*62c56f98SSadaf Ebrahimi #define MULADDC_X1_STOP \ 295*62c56f98SSadaf Ebrahimi "movl %%d3, %0 \n\t" \ 296*62c56f98SSadaf Ebrahimi "movl %%a3, %1 \n\t" \ 297*62c56f98SSadaf Ebrahimi "movl %%a2, %2 \n\t" \ 298*62c56f98SSadaf Ebrahimi : "=m" (c), "=m" (d), "=m" (s) \ 299*62c56f98SSadaf Ebrahimi : "m" (s), "m" (d), "m" (c), "m" (b) \ 300*62c56f98SSadaf Ebrahimi : "d0", "d1", "d2", "d3", "d4", "a2", "a3" \ 301*62c56f98SSadaf Ebrahimi ); 302*62c56f98SSadaf Ebrahimi 303*62c56f98SSadaf Ebrahimi #define MULADDC_X8_INIT MULADDC_X1_INIT 304*62c56f98SSadaf Ebrahimi 305*62c56f98SSadaf Ebrahimi #define MULADDC_X8_CORE \ 306*62c56f98SSadaf Ebrahimi "movel %%a2@+, %%d1 \n\t" \ 307*62c56f98SSadaf Ebrahimi "mulul %%d2, %%d4:%%d1 \n\t" \ 308*62c56f98SSadaf Ebrahimi "addxl %%d3, %%d1 \n\t" \ 309*62c56f98SSadaf Ebrahimi "addxl %%d0, %%d4 \n\t" \ 310*62c56f98SSadaf Ebrahimi "addl %%d1, %%a3@+ \n\t" \ 311*62c56f98SSadaf Ebrahimi "movel %%a2@+, %%d1 \n\t" \ 312*62c56f98SSadaf Ebrahimi "mulul %%d2, %%d3:%%d1 \n\t" \ 313*62c56f98SSadaf Ebrahimi "addxl %%d4, %%d1 \n\t" \ 314*62c56f98SSadaf Ebrahimi "addxl %%d0, %%d3 \n\t" \ 315*62c56f98SSadaf Ebrahimi "addl %%d1, %%a3@+ \n\t" \ 316*62c56f98SSadaf Ebrahimi "movel %%a2@+, %%d1 \n\t" \ 317*62c56f98SSadaf Ebrahimi "mulul %%d2, %%d4:%%d1 \n\t" \ 318*62c56f98SSadaf Ebrahimi "addxl %%d3, %%d1 \n\t" \ 319*62c56f98SSadaf Ebrahimi "addxl %%d0, %%d4 \n\t" \ 320*62c56f98SSadaf Ebrahimi "addl %%d1, %%a3@+ \n\t" \ 321*62c56f98SSadaf Ebrahimi "movel %%a2@+, %%d1 \n\t" \ 322*62c56f98SSadaf Ebrahimi "mulul %%d2, %%d3:%%d1 \n\t" \ 323*62c56f98SSadaf Ebrahimi "addxl %%d4, %%d1 \n\t" \ 324*62c56f98SSadaf Ebrahimi "addxl %%d0, %%d3 \n\t" \ 325*62c56f98SSadaf Ebrahimi "addl %%d1, %%a3@+ \n\t" \ 326*62c56f98SSadaf Ebrahimi "movel %%a2@+, %%d1 \n\t" \ 327*62c56f98SSadaf Ebrahimi "mulul %%d2, %%d4:%%d1 \n\t" \ 328*62c56f98SSadaf Ebrahimi "addxl %%d3, %%d1 \n\t" \ 329*62c56f98SSadaf Ebrahimi "addxl %%d0, %%d4 \n\t" \ 330*62c56f98SSadaf Ebrahimi "addl %%d1, %%a3@+ \n\t" \ 331*62c56f98SSadaf Ebrahimi "movel %%a2@+, %%d1 \n\t" \ 332*62c56f98SSadaf Ebrahimi "mulul %%d2, %%d3:%%d1 \n\t" \ 333*62c56f98SSadaf Ebrahimi "addxl %%d4, %%d1 \n\t" \ 334*62c56f98SSadaf Ebrahimi "addxl %%d0, %%d3 \n\t" \ 335*62c56f98SSadaf Ebrahimi "addl %%d1, %%a3@+ \n\t" \ 336*62c56f98SSadaf Ebrahimi "movel %%a2@+, %%d1 \n\t" \ 337*62c56f98SSadaf Ebrahimi "mulul %%d2, %%d4:%%d1 \n\t" \ 338*62c56f98SSadaf Ebrahimi "addxl %%d3, %%d1 \n\t" \ 339*62c56f98SSadaf Ebrahimi "addxl %%d0, %%d4 \n\t" \ 340*62c56f98SSadaf Ebrahimi "addl %%d1, %%a3@+ \n\t" \ 341*62c56f98SSadaf Ebrahimi "movel %%a2@+, %%d1 \n\t" \ 342*62c56f98SSadaf Ebrahimi "mulul %%d2, %%d3:%%d1 \n\t" \ 343*62c56f98SSadaf Ebrahimi "addxl %%d4, %%d1 \n\t" \ 344*62c56f98SSadaf Ebrahimi "addxl %%d0, %%d3 \n\t" \ 345*62c56f98SSadaf Ebrahimi "addl %%d1, %%a3@+ \n\t" \ 346*62c56f98SSadaf Ebrahimi "addxl %%d0, %%d3 \n\t" 347*62c56f98SSadaf Ebrahimi 348*62c56f98SSadaf Ebrahimi #define MULADDC_X8_STOP MULADDC_X1_STOP 349*62c56f98SSadaf Ebrahimi 350*62c56f98SSadaf Ebrahimi #endif /* MC68000 */ 351*62c56f98SSadaf Ebrahimi 352*62c56f98SSadaf Ebrahimi #if defined(__powerpc64__) || defined(__ppc64__) 353*62c56f98SSadaf Ebrahimi 354*62c56f98SSadaf Ebrahimi #if defined(__MACH__) && defined(__APPLE__) 355*62c56f98SSadaf Ebrahimi 356*62c56f98SSadaf Ebrahimi #define MULADDC_X1_INIT \ 357*62c56f98SSadaf Ebrahimi asm( \ 358*62c56f98SSadaf Ebrahimi "ld r3, %3 \n\t" \ 359*62c56f98SSadaf Ebrahimi "ld r4, %4 \n\t" \ 360*62c56f98SSadaf Ebrahimi "ld r5, %5 \n\t" \ 361*62c56f98SSadaf Ebrahimi "ld r6, %6 \n\t" \ 362*62c56f98SSadaf Ebrahimi "addi r3, r3, -8 \n\t" \ 363*62c56f98SSadaf Ebrahimi "addi r4, r4, -8 \n\t" \ 364*62c56f98SSadaf Ebrahimi "addic r5, r5, 0 \n\t" 365*62c56f98SSadaf Ebrahimi 366*62c56f98SSadaf Ebrahimi #define MULADDC_X1_CORE \ 367*62c56f98SSadaf Ebrahimi "ldu r7, 8(r3) \n\t" \ 368*62c56f98SSadaf Ebrahimi "mulld r8, r7, r6 \n\t" \ 369*62c56f98SSadaf Ebrahimi "mulhdu r9, r7, r6 \n\t" \ 370*62c56f98SSadaf Ebrahimi "adde r8, r8, r5 \n\t" \ 371*62c56f98SSadaf Ebrahimi "ld r7, 8(r4) \n\t" \ 372*62c56f98SSadaf Ebrahimi "addze r5, r9 \n\t" \ 373*62c56f98SSadaf Ebrahimi "addc r8, r8, r7 \n\t" \ 374*62c56f98SSadaf Ebrahimi "stdu r8, 8(r4) \n\t" 375*62c56f98SSadaf Ebrahimi 376*62c56f98SSadaf Ebrahimi #define MULADDC_X1_STOP \ 377*62c56f98SSadaf Ebrahimi "addze r5, r5 \n\t" \ 378*62c56f98SSadaf Ebrahimi "addi r4, r4, 8 \n\t" \ 379*62c56f98SSadaf Ebrahimi "addi r3, r3, 8 \n\t" \ 380*62c56f98SSadaf Ebrahimi "std r5, %0 \n\t" \ 381*62c56f98SSadaf Ebrahimi "std r4, %1 \n\t" \ 382*62c56f98SSadaf Ebrahimi "std r3, %2 \n\t" \ 383*62c56f98SSadaf Ebrahimi : "=m" (c), "=m" (d), "=m" (s) \ 384*62c56f98SSadaf Ebrahimi : "m" (s), "m" (d), "m" (c), "m" (b) \ 385*62c56f98SSadaf Ebrahimi : "r3", "r4", "r5", "r6", "r7", "r8", "r9" \ 386*62c56f98SSadaf Ebrahimi ); 387*62c56f98SSadaf Ebrahimi 388*62c56f98SSadaf Ebrahimi 389*62c56f98SSadaf Ebrahimi #else /* __MACH__ && __APPLE__ */ 390*62c56f98SSadaf Ebrahimi 391*62c56f98SSadaf Ebrahimi #define MULADDC_X1_INIT \ 392*62c56f98SSadaf Ebrahimi asm( \ 393*62c56f98SSadaf Ebrahimi "ld %%r3, %3 \n\t" \ 394*62c56f98SSadaf Ebrahimi "ld %%r4, %4 \n\t" \ 395*62c56f98SSadaf Ebrahimi "ld %%r5, %5 \n\t" \ 396*62c56f98SSadaf Ebrahimi "ld %%r6, %6 \n\t" \ 397*62c56f98SSadaf Ebrahimi "addi %%r3, %%r3, -8 \n\t" \ 398*62c56f98SSadaf Ebrahimi "addi %%r4, %%r4, -8 \n\t" \ 399*62c56f98SSadaf Ebrahimi "addic %%r5, %%r5, 0 \n\t" 400*62c56f98SSadaf Ebrahimi 401*62c56f98SSadaf Ebrahimi #define MULADDC_X1_CORE \ 402*62c56f98SSadaf Ebrahimi "ldu %%r7, 8(%%r3) \n\t" \ 403*62c56f98SSadaf Ebrahimi "mulld %%r8, %%r7, %%r6 \n\t" \ 404*62c56f98SSadaf Ebrahimi "mulhdu %%r9, %%r7, %%r6 \n\t" \ 405*62c56f98SSadaf Ebrahimi "adde %%r8, %%r8, %%r5 \n\t" \ 406*62c56f98SSadaf Ebrahimi "ld %%r7, 8(%%r4) \n\t" \ 407*62c56f98SSadaf Ebrahimi "addze %%r5, %%r9 \n\t" \ 408*62c56f98SSadaf Ebrahimi "addc %%r8, %%r8, %%r7 \n\t" \ 409*62c56f98SSadaf Ebrahimi "stdu %%r8, 8(%%r4) \n\t" 410*62c56f98SSadaf Ebrahimi 411*62c56f98SSadaf Ebrahimi #define MULADDC_X1_STOP \ 412*62c56f98SSadaf Ebrahimi "addze %%r5, %%r5 \n\t" \ 413*62c56f98SSadaf Ebrahimi "addi %%r4, %%r4, 8 \n\t" \ 414*62c56f98SSadaf Ebrahimi "addi %%r3, %%r3, 8 \n\t" \ 415*62c56f98SSadaf Ebrahimi "std %%r5, %0 \n\t" \ 416*62c56f98SSadaf Ebrahimi "std %%r4, %1 \n\t" \ 417*62c56f98SSadaf Ebrahimi "std %%r3, %2 \n\t" \ 418*62c56f98SSadaf Ebrahimi : "=m" (c), "=m" (d), "=m" (s) \ 419*62c56f98SSadaf Ebrahimi : "m" (s), "m" (d), "m" (c), "m" (b) \ 420*62c56f98SSadaf Ebrahimi : "r3", "r4", "r5", "r6", "r7", "r8", "r9" \ 421*62c56f98SSadaf Ebrahimi ); 422*62c56f98SSadaf Ebrahimi 423*62c56f98SSadaf Ebrahimi #endif /* __MACH__ && __APPLE__ */ 424*62c56f98SSadaf Ebrahimi 425*62c56f98SSadaf Ebrahimi #elif defined(__powerpc__) || defined(__ppc__) /* end PPC64/begin PPC32 */ 426*62c56f98SSadaf Ebrahimi 427*62c56f98SSadaf Ebrahimi #if defined(__MACH__) && defined(__APPLE__) 428*62c56f98SSadaf Ebrahimi 429*62c56f98SSadaf Ebrahimi #define MULADDC_X1_INIT \ 430*62c56f98SSadaf Ebrahimi asm( \ 431*62c56f98SSadaf Ebrahimi "lwz r3, %3 \n\t" \ 432*62c56f98SSadaf Ebrahimi "lwz r4, %4 \n\t" \ 433*62c56f98SSadaf Ebrahimi "lwz r5, %5 \n\t" \ 434*62c56f98SSadaf Ebrahimi "lwz r6, %6 \n\t" \ 435*62c56f98SSadaf Ebrahimi "addi r3, r3, -4 \n\t" \ 436*62c56f98SSadaf Ebrahimi "addi r4, r4, -4 \n\t" \ 437*62c56f98SSadaf Ebrahimi "addic r5, r5, 0 \n\t" 438*62c56f98SSadaf Ebrahimi 439*62c56f98SSadaf Ebrahimi #define MULADDC_X1_CORE \ 440*62c56f98SSadaf Ebrahimi "lwzu r7, 4(r3) \n\t" \ 441*62c56f98SSadaf Ebrahimi "mullw r8, r7, r6 \n\t" \ 442*62c56f98SSadaf Ebrahimi "mulhwu r9, r7, r6 \n\t" \ 443*62c56f98SSadaf Ebrahimi "adde r8, r8, r5 \n\t" \ 444*62c56f98SSadaf Ebrahimi "lwz r7, 4(r4) \n\t" \ 445*62c56f98SSadaf Ebrahimi "addze r5, r9 \n\t" \ 446*62c56f98SSadaf Ebrahimi "addc r8, r8, r7 \n\t" \ 447*62c56f98SSadaf Ebrahimi "stwu r8, 4(r4) \n\t" 448*62c56f98SSadaf Ebrahimi 449*62c56f98SSadaf Ebrahimi #define MULADDC_X1_STOP \ 450*62c56f98SSadaf Ebrahimi "addze r5, r5 \n\t" \ 451*62c56f98SSadaf Ebrahimi "addi r4, r4, 4 \n\t" \ 452*62c56f98SSadaf Ebrahimi "addi r3, r3, 4 \n\t" \ 453*62c56f98SSadaf Ebrahimi "stw r5, %0 \n\t" \ 454*62c56f98SSadaf Ebrahimi "stw r4, %1 \n\t" \ 455*62c56f98SSadaf Ebrahimi "stw r3, %2 \n\t" \ 456*62c56f98SSadaf Ebrahimi : "=m" (c), "=m" (d), "=m" (s) \ 457*62c56f98SSadaf Ebrahimi : "m" (s), "m" (d), "m" (c), "m" (b) \ 458*62c56f98SSadaf Ebrahimi : "r3", "r4", "r5", "r6", "r7", "r8", "r9" \ 459*62c56f98SSadaf Ebrahimi ); 460*62c56f98SSadaf Ebrahimi 461*62c56f98SSadaf Ebrahimi #else /* __MACH__ && __APPLE__ */ 462*62c56f98SSadaf Ebrahimi 463*62c56f98SSadaf Ebrahimi #define MULADDC_X1_INIT \ 464*62c56f98SSadaf Ebrahimi asm( \ 465*62c56f98SSadaf Ebrahimi "lwz %%r3, %3 \n\t" \ 466*62c56f98SSadaf Ebrahimi "lwz %%r4, %4 \n\t" \ 467*62c56f98SSadaf Ebrahimi "lwz %%r5, %5 \n\t" \ 468*62c56f98SSadaf Ebrahimi "lwz %%r6, %6 \n\t" \ 469*62c56f98SSadaf Ebrahimi "addi %%r3, %%r3, -4 \n\t" \ 470*62c56f98SSadaf Ebrahimi "addi %%r4, %%r4, -4 \n\t" \ 471*62c56f98SSadaf Ebrahimi "addic %%r5, %%r5, 0 \n\t" 472*62c56f98SSadaf Ebrahimi 473*62c56f98SSadaf Ebrahimi #define MULADDC_X1_CORE \ 474*62c56f98SSadaf Ebrahimi "lwzu %%r7, 4(%%r3) \n\t" \ 475*62c56f98SSadaf Ebrahimi "mullw %%r8, %%r7, %%r6 \n\t" \ 476*62c56f98SSadaf Ebrahimi "mulhwu %%r9, %%r7, %%r6 \n\t" \ 477*62c56f98SSadaf Ebrahimi "adde %%r8, %%r8, %%r5 \n\t" \ 478*62c56f98SSadaf Ebrahimi "lwz %%r7, 4(%%r4) \n\t" \ 479*62c56f98SSadaf Ebrahimi "addze %%r5, %%r9 \n\t" \ 480*62c56f98SSadaf Ebrahimi "addc %%r8, %%r8, %%r7 \n\t" \ 481*62c56f98SSadaf Ebrahimi "stwu %%r8, 4(%%r4) \n\t" 482*62c56f98SSadaf Ebrahimi 483*62c56f98SSadaf Ebrahimi #define MULADDC_X1_STOP \ 484*62c56f98SSadaf Ebrahimi "addze %%r5, %%r5 \n\t" \ 485*62c56f98SSadaf Ebrahimi "addi %%r4, %%r4, 4 \n\t" \ 486*62c56f98SSadaf Ebrahimi "addi %%r3, %%r3, 4 \n\t" \ 487*62c56f98SSadaf Ebrahimi "stw %%r5, %0 \n\t" \ 488*62c56f98SSadaf Ebrahimi "stw %%r4, %1 \n\t" \ 489*62c56f98SSadaf Ebrahimi "stw %%r3, %2 \n\t" \ 490*62c56f98SSadaf Ebrahimi : "=m" (c), "=m" (d), "=m" (s) \ 491*62c56f98SSadaf Ebrahimi : "m" (s), "m" (d), "m" (c), "m" (b) \ 492*62c56f98SSadaf Ebrahimi : "r3", "r4", "r5", "r6", "r7", "r8", "r9" \ 493*62c56f98SSadaf Ebrahimi ); 494*62c56f98SSadaf Ebrahimi 495*62c56f98SSadaf Ebrahimi #endif /* __MACH__ && __APPLE__ */ 496*62c56f98SSadaf Ebrahimi 497*62c56f98SSadaf Ebrahimi #endif /* PPC32 */ 498*62c56f98SSadaf Ebrahimi 499*62c56f98SSadaf Ebrahimi /* 500*62c56f98SSadaf Ebrahimi * The Sparc(64) assembly is reported to be broken. 501*62c56f98SSadaf Ebrahimi * Disable it for now, until we're able to fix it. 502*62c56f98SSadaf Ebrahimi */ 503*62c56f98SSadaf Ebrahimi #if 0 && defined(__sparc__) 504*62c56f98SSadaf Ebrahimi #if defined(__sparc64__) 505*62c56f98SSadaf Ebrahimi 506*62c56f98SSadaf Ebrahimi #define MULADDC_X1_INIT \ 507*62c56f98SSadaf Ebrahimi asm( \ 508*62c56f98SSadaf Ebrahimi "ldx %3, %%o0 \n\t" \ 509*62c56f98SSadaf Ebrahimi "ldx %4, %%o1 \n\t" \ 510*62c56f98SSadaf Ebrahimi "ld %5, %%o2 \n\t" \ 511*62c56f98SSadaf Ebrahimi "ld %6, %%o3 \n\t" 512*62c56f98SSadaf Ebrahimi 513*62c56f98SSadaf Ebrahimi #define MULADDC_X1_CORE \ 514*62c56f98SSadaf Ebrahimi "ld [%%o0], %%o4 \n\t" \ 515*62c56f98SSadaf Ebrahimi "inc 4, %%o0 \n\t" \ 516*62c56f98SSadaf Ebrahimi "ld [%%o1], %%o5 \n\t" \ 517*62c56f98SSadaf Ebrahimi "umul %%o3, %%o4, %%o4 \n\t" \ 518*62c56f98SSadaf Ebrahimi "addcc %%o4, %%o2, %%o4 \n\t" \ 519*62c56f98SSadaf Ebrahimi "rd %%y, %%g1 \n\t" \ 520*62c56f98SSadaf Ebrahimi "addx %%g1, 0, %%g1 \n\t" \ 521*62c56f98SSadaf Ebrahimi "addcc %%o4, %%o5, %%o4 \n\t" \ 522*62c56f98SSadaf Ebrahimi "st %%o4, [%%o1] \n\t" \ 523*62c56f98SSadaf Ebrahimi "addx %%g1, 0, %%o2 \n\t" \ 524*62c56f98SSadaf Ebrahimi "inc 4, %%o1 \n\t" 525*62c56f98SSadaf Ebrahimi 526*62c56f98SSadaf Ebrahimi #define MULADDC_X1_STOP \ 527*62c56f98SSadaf Ebrahimi "st %%o2, %0 \n\t" \ 528*62c56f98SSadaf Ebrahimi "stx %%o1, %1 \n\t" \ 529*62c56f98SSadaf Ebrahimi "stx %%o0, %2 \n\t" \ 530*62c56f98SSadaf Ebrahimi : "=m" (c), "=m" (d), "=m" (s) \ 531*62c56f98SSadaf Ebrahimi : "m" (s), "m" (d), "m" (c), "m" (b) \ 532*62c56f98SSadaf Ebrahimi : "g1", "o0", "o1", "o2", "o3", "o4", \ 533*62c56f98SSadaf Ebrahimi "o5" \ 534*62c56f98SSadaf Ebrahimi ); 535*62c56f98SSadaf Ebrahimi 536*62c56f98SSadaf Ebrahimi #else /* __sparc64__ */ 537*62c56f98SSadaf Ebrahimi 538*62c56f98SSadaf Ebrahimi #define MULADDC_X1_INIT \ 539*62c56f98SSadaf Ebrahimi asm( \ 540*62c56f98SSadaf Ebrahimi "ld %3, %%o0 \n\t" \ 541*62c56f98SSadaf Ebrahimi "ld %4, %%o1 \n\t" \ 542*62c56f98SSadaf Ebrahimi "ld %5, %%o2 \n\t" \ 543*62c56f98SSadaf Ebrahimi "ld %6, %%o3 \n\t" 544*62c56f98SSadaf Ebrahimi 545*62c56f98SSadaf Ebrahimi #define MULADDC_X1_CORE \ 546*62c56f98SSadaf Ebrahimi "ld [%%o0], %%o4 \n\t" \ 547*62c56f98SSadaf Ebrahimi "inc 4, %%o0 \n\t" \ 548*62c56f98SSadaf Ebrahimi "ld [%%o1], %%o5 \n\t" \ 549*62c56f98SSadaf Ebrahimi "umul %%o3, %%o4, %%o4 \n\t" \ 550*62c56f98SSadaf Ebrahimi "addcc %%o4, %%o2, %%o4 \n\t" \ 551*62c56f98SSadaf Ebrahimi "rd %%y, %%g1 \n\t" \ 552*62c56f98SSadaf Ebrahimi "addx %%g1, 0, %%g1 \n\t" \ 553*62c56f98SSadaf Ebrahimi "addcc %%o4, %%o5, %%o4 \n\t" \ 554*62c56f98SSadaf Ebrahimi "st %%o4, [%%o1] \n\t" \ 555*62c56f98SSadaf Ebrahimi "addx %%g1, 0, %%o2 \n\t" \ 556*62c56f98SSadaf Ebrahimi "inc 4, %%o1 \n\t" 557*62c56f98SSadaf Ebrahimi 558*62c56f98SSadaf Ebrahimi #define MULADDC_X1_STOP \ 559*62c56f98SSadaf Ebrahimi "st %%o2, %0 \n\t" \ 560*62c56f98SSadaf Ebrahimi "st %%o1, %1 \n\t" \ 561*62c56f98SSadaf Ebrahimi "st %%o0, %2 \n\t" \ 562*62c56f98SSadaf Ebrahimi : "=m" (c), "=m" (d), "=m" (s) \ 563*62c56f98SSadaf Ebrahimi : "m" (s), "m" (d), "m" (c), "m" (b) \ 564*62c56f98SSadaf Ebrahimi : "g1", "o0", "o1", "o2", "o3", "o4", \ 565*62c56f98SSadaf Ebrahimi "o5" \ 566*62c56f98SSadaf Ebrahimi ); 567*62c56f98SSadaf Ebrahimi 568*62c56f98SSadaf Ebrahimi #endif /* __sparc64__ */ 569*62c56f98SSadaf Ebrahimi #endif /* __sparc__ */ 570*62c56f98SSadaf Ebrahimi 571*62c56f98SSadaf Ebrahimi #if defined(__microblaze__) || defined(microblaze) 572*62c56f98SSadaf Ebrahimi 573*62c56f98SSadaf Ebrahimi #define MULADDC_X1_INIT \ 574*62c56f98SSadaf Ebrahimi asm( \ 575*62c56f98SSadaf Ebrahimi "lwi r3, %3 \n\t" \ 576*62c56f98SSadaf Ebrahimi "lwi r4, %4 \n\t" \ 577*62c56f98SSadaf Ebrahimi "lwi r5, %5 \n\t" \ 578*62c56f98SSadaf Ebrahimi "lwi r6, %6 \n\t" \ 579*62c56f98SSadaf Ebrahimi "andi r7, r6, 0xffff \n\t" \ 580*62c56f98SSadaf Ebrahimi "bsrli r6, r6, 16 \n\t" 581*62c56f98SSadaf Ebrahimi 582*62c56f98SSadaf Ebrahimi #if(__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) 583*62c56f98SSadaf Ebrahimi #define MULADDC_LHUI \ 584*62c56f98SSadaf Ebrahimi "lhui r9, r3, 0 \n\t" \ 585*62c56f98SSadaf Ebrahimi "addi r3, r3, 2 \n\t" \ 586*62c56f98SSadaf Ebrahimi "lhui r8, r3, 0 \n\t" 587*62c56f98SSadaf Ebrahimi #else 588*62c56f98SSadaf Ebrahimi #define MULADDC_LHUI \ 589*62c56f98SSadaf Ebrahimi "lhui r8, r3, 0 \n\t" \ 590*62c56f98SSadaf Ebrahimi "addi r3, r3, 2 \n\t" \ 591*62c56f98SSadaf Ebrahimi "lhui r9, r3, 0 \n\t" 592*62c56f98SSadaf Ebrahimi #endif 593*62c56f98SSadaf Ebrahimi 594*62c56f98SSadaf Ebrahimi #define MULADDC_X1_CORE \ 595*62c56f98SSadaf Ebrahimi MULADDC_LHUI \ 596*62c56f98SSadaf Ebrahimi "addi r3, r3, 2 \n\t" \ 597*62c56f98SSadaf Ebrahimi "mul r10, r9, r6 \n\t" \ 598*62c56f98SSadaf Ebrahimi "mul r11, r8, r7 \n\t" \ 599*62c56f98SSadaf Ebrahimi "mul r12, r9, r7 \n\t" \ 600*62c56f98SSadaf Ebrahimi "mul r13, r8, r6 \n\t" \ 601*62c56f98SSadaf Ebrahimi "bsrli r8, r10, 16 \n\t" \ 602*62c56f98SSadaf Ebrahimi "bsrli r9, r11, 16 \n\t" \ 603*62c56f98SSadaf Ebrahimi "add r13, r13, r8 \n\t" \ 604*62c56f98SSadaf Ebrahimi "add r13, r13, r9 \n\t" \ 605*62c56f98SSadaf Ebrahimi "bslli r10, r10, 16 \n\t" \ 606*62c56f98SSadaf Ebrahimi "bslli r11, r11, 16 \n\t" \ 607*62c56f98SSadaf Ebrahimi "add r12, r12, r10 \n\t" \ 608*62c56f98SSadaf Ebrahimi "addc r13, r13, r0 \n\t" \ 609*62c56f98SSadaf Ebrahimi "add r12, r12, r11 \n\t" \ 610*62c56f98SSadaf Ebrahimi "addc r13, r13, r0 \n\t" \ 611*62c56f98SSadaf Ebrahimi "lwi r10, r4, 0 \n\t" \ 612*62c56f98SSadaf Ebrahimi "add r12, r12, r10 \n\t" \ 613*62c56f98SSadaf Ebrahimi "addc r13, r13, r0 \n\t" \ 614*62c56f98SSadaf Ebrahimi "add r12, r12, r5 \n\t" \ 615*62c56f98SSadaf Ebrahimi "addc r5, r13, r0 \n\t" \ 616*62c56f98SSadaf Ebrahimi "swi r12, r4, 0 \n\t" \ 617*62c56f98SSadaf Ebrahimi "addi r4, r4, 4 \n\t" 618*62c56f98SSadaf Ebrahimi 619*62c56f98SSadaf Ebrahimi #define MULADDC_X1_STOP \ 620*62c56f98SSadaf Ebrahimi "swi r5, %0 \n\t" \ 621*62c56f98SSadaf Ebrahimi "swi r4, %1 \n\t" \ 622*62c56f98SSadaf Ebrahimi "swi r3, %2 \n\t" \ 623*62c56f98SSadaf Ebrahimi : "=m" (c), "=m" (d), "=m" (s) \ 624*62c56f98SSadaf Ebrahimi : "m" (s), "m" (d), "m" (c), "m" (b) \ 625*62c56f98SSadaf Ebrahimi : "r3", "r4", "r5", "r6", "r7", "r8", \ 626*62c56f98SSadaf Ebrahimi "r9", "r10", "r11", "r12", "r13" \ 627*62c56f98SSadaf Ebrahimi ); 628*62c56f98SSadaf Ebrahimi 629*62c56f98SSadaf Ebrahimi #endif /* MicroBlaze */ 630*62c56f98SSadaf Ebrahimi 631*62c56f98SSadaf Ebrahimi #if defined(__tricore__) 632*62c56f98SSadaf Ebrahimi 633*62c56f98SSadaf Ebrahimi #define MULADDC_X1_INIT \ 634*62c56f98SSadaf Ebrahimi asm( \ 635*62c56f98SSadaf Ebrahimi "ld.a %%a2, %3 \n\t" \ 636*62c56f98SSadaf Ebrahimi "ld.a %%a3, %4 \n\t" \ 637*62c56f98SSadaf Ebrahimi "ld.w %%d4, %5 \n\t" \ 638*62c56f98SSadaf Ebrahimi "ld.w %%d1, %6 \n\t" \ 639*62c56f98SSadaf Ebrahimi "xor %%d5, %%d5 \n\t" 640*62c56f98SSadaf Ebrahimi 641*62c56f98SSadaf Ebrahimi #define MULADDC_X1_CORE \ 642*62c56f98SSadaf Ebrahimi "ld.w %%d0, [%%a2+] \n\t" \ 643*62c56f98SSadaf Ebrahimi "madd.u %%e2, %%e4, %%d0, %%d1 \n\t" \ 644*62c56f98SSadaf Ebrahimi "ld.w %%d0, [%%a3] \n\t" \ 645*62c56f98SSadaf Ebrahimi "addx %%d2, %%d2, %%d0 \n\t" \ 646*62c56f98SSadaf Ebrahimi "addc %%d3, %%d3, 0 \n\t" \ 647*62c56f98SSadaf Ebrahimi "mov %%d4, %%d3 \n\t" \ 648*62c56f98SSadaf Ebrahimi "st.w [%%a3+], %%d2 \n\t" 649*62c56f98SSadaf Ebrahimi 650*62c56f98SSadaf Ebrahimi #define MULADDC_X1_STOP \ 651*62c56f98SSadaf Ebrahimi "st.w %0, %%d4 \n\t" \ 652*62c56f98SSadaf Ebrahimi "st.a %1, %%a3 \n\t" \ 653*62c56f98SSadaf Ebrahimi "st.a %2, %%a2 \n\t" \ 654*62c56f98SSadaf Ebrahimi : "=m" (c), "=m" (d), "=m" (s) \ 655*62c56f98SSadaf Ebrahimi : "m" (s), "m" (d), "m" (c), "m" (b) \ 656*62c56f98SSadaf Ebrahimi : "d0", "d1", "e2", "d4", "a2", "a3" \ 657*62c56f98SSadaf Ebrahimi ); 658*62c56f98SSadaf Ebrahimi 659*62c56f98SSadaf Ebrahimi #endif /* TriCore */ 660*62c56f98SSadaf Ebrahimi 661*62c56f98SSadaf Ebrahimi #if defined(__arm__) 662*62c56f98SSadaf Ebrahimi 663*62c56f98SSadaf Ebrahimi #if defined(__thumb__) && !defined(__thumb2__) 664*62c56f98SSadaf Ebrahimi #if defined(MBEDTLS_COMPILER_IS_GCC) 665*62c56f98SSadaf Ebrahimi /* 666*62c56f98SSadaf Ebrahimi * Thumb 1 ISA. This code path has only been tested successfully on gcc; 667*62c56f98SSadaf Ebrahimi * it does not compile on clang or armclang. 668*62c56f98SSadaf Ebrahimi */ 669*62c56f98SSadaf Ebrahimi 670*62c56f98SSadaf Ebrahimi #if !defined(__OPTIMIZE__) && defined(__GNUC__) 671*62c56f98SSadaf Ebrahimi /* 672*62c56f98SSadaf Ebrahimi * Note, gcc -O0 by default uses r7 for the frame pointer, so it complains about 673*62c56f98SSadaf Ebrahimi * our use of r7 below, unless -fomit-frame-pointer is passed. 674*62c56f98SSadaf Ebrahimi * 675*62c56f98SSadaf Ebrahimi * On the other hand, -fomit-frame-pointer is implied by any -Ox options with 676*62c56f98SSadaf Ebrahimi * x !=0, which we can detect using __OPTIMIZE__ (which is also defined by 677*62c56f98SSadaf Ebrahimi * clang and armcc5 under the same conditions). 678*62c56f98SSadaf Ebrahimi * 679*62c56f98SSadaf Ebrahimi * If gcc needs to use r7, we use r1 as a scratch register and have a few extra 680*62c56f98SSadaf Ebrahimi * instructions to preserve/restore it; otherwise, we can use r7 and avoid 681*62c56f98SSadaf Ebrahimi * the preserve/restore overhead. 682*62c56f98SSadaf Ebrahimi */ 683*62c56f98SSadaf Ebrahimi #define MULADDC_SCRATCH "RS .req r1 \n\t" 684*62c56f98SSadaf Ebrahimi #define MULADDC_PRESERVE_SCRATCH "mov r10, r1 \n\t" 685*62c56f98SSadaf Ebrahimi #define MULADDC_RESTORE_SCRATCH "mov r1, r10 \n\t" 686*62c56f98SSadaf Ebrahimi #define MULADDC_SCRATCH_CLOBBER "r10" 687*62c56f98SSadaf Ebrahimi #else /* !defined(__OPTIMIZE__) && defined(__GNUC__) */ 688*62c56f98SSadaf Ebrahimi #define MULADDC_SCRATCH "RS .req r7 \n\t" 689*62c56f98SSadaf Ebrahimi #define MULADDC_PRESERVE_SCRATCH "" 690*62c56f98SSadaf Ebrahimi #define MULADDC_RESTORE_SCRATCH "" 691*62c56f98SSadaf Ebrahimi #define MULADDC_SCRATCH_CLOBBER "r7" 692*62c56f98SSadaf Ebrahimi #endif /* !defined(__OPTIMIZE__) && defined(__GNUC__) */ 693*62c56f98SSadaf Ebrahimi 694*62c56f98SSadaf Ebrahimi #define MULADDC_X1_INIT \ 695*62c56f98SSadaf Ebrahimi asm( \ 696*62c56f98SSadaf Ebrahimi MULADDC_SCRATCH \ 697*62c56f98SSadaf Ebrahimi "ldr r0, %3 \n\t" \ 698*62c56f98SSadaf Ebrahimi "ldr r1, %4 \n\t" \ 699*62c56f98SSadaf Ebrahimi "ldr r2, %5 \n\t" \ 700*62c56f98SSadaf Ebrahimi "ldr r3, %6 \n\t" \ 701*62c56f98SSadaf Ebrahimi "lsr r4, r3, #16 \n\t" \ 702*62c56f98SSadaf Ebrahimi "mov r9, r4 \n\t" \ 703*62c56f98SSadaf Ebrahimi "lsl r4, r3, #16 \n\t" \ 704*62c56f98SSadaf Ebrahimi "lsr r4, r4, #16 \n\t" \ 705*62c56f98SSadaf Ebrahimi "mov r8, r4 \n\t" \ 706*62c56f98SSadaf Ebrahimi 707*62c56f98SSadaf Ebrahimi 708*62c56f98SSadaf Ebrahimi #define MULADDC_X1_CORE \ 709*62c56f98SSadaf Ebrahimi MULADDC_PRESERVE_SCRATCH \ 710*62c56f98SSadaf Ebrahimi "ldmia r0!, {r6} \n\t" \ 711*62c56f98SSadaf Ebrahimi "lsr RS, r6, #16 \n\t" \ 712*62c56f98SSadaf Ebrahimi "lsl r6, r6, #16 \n\t" \ 713*62c56f98SSadaf Ebrahimi "lsr r6, r6, #16 \n\t" \ 714*62c56f98SSadaf Ebrahimi "mov r4, r8 \n\t" \ 715*62c56f98SSadaf Ebrahimi "mul r4, r6 \n\t" \ 716*62c56f98SSadaf Ebrahimi "mov r3, r9 \n\t" \ 717*62c56f98SSadaf Ebrahimi "mul r6, r3 \n\t" \ 718*62c56f98SSadaf Ebrahimi "mov r5, r9 \n\t" \ 719*62c56f98SSadaf Ebrahimi "mul r5, RS \n\t" \ 720*62c56f98SSadaf Ebrahimi "mov r3, r8 \n\t" \ 721*62c56f98SSadaf Ebrahimi "mul RS, r3 \n\t" \ 722*62c56f98SSadaf Ebrahimi "lsr r3, r6, #16 \n\t" \ 723*62c56f98SSadaf Ebrahimi "add r5, r5, r3 \n\t" \ 724*62c56f98SSadaf Ebrahimi "lsr r3, RS, #16 \n\t" \ 725*62c56f98SSadaf Ebrahimi "add r5, r5, r3 \n\t" \ 726*62c56f98SSadaf Ebrahimi "add r4, r4, r2 \n\t" \ 727*62c56f98SSadaf Ebrahimi "mov r2, #0 \n\t" \ 728*62c56f98SSadaf Ebrahimi "adc r5, r2 \n\t" \ 729*62c56f98SSadaf Ebrahimi "lsl r3, r6, #16 \n\t" \ 730*62c56f98SSadaf Ebrahimi "add r4, r4, r3 \n\t" \ 731*62c56f98SSadaf Ebrahimi "adc r5, r2 \n\t" \ 732*62c56f98SSadaf Ebrahimi "lsl r3, RS, #16 \n\t" \ 733*62c56f98SSadaf Ebrahimi "add r4, r4, r3 \n\t" \ 734*62c56f98SSadaf Ebrahimi "adc r5, r2 \n\t" \ 735*62c56f98SSadaf Ebrahimi MULADDC_RESTORE_SCRATCH \ 736*62c56f98SSadaf Ebrahimi "ldr r3, [r1] \n\t" \ 737*62c56f98SSadaf Ebrahimi "add r4, r4, r3 \n\t" \ 738*62c56f98SSadaf Ebrahimi "adc r2, r5 \n\t" \ 739*62c56f98SSadaf Ebrahimi "stmia r1!, {r4} \n\t" 740*62c56f98SSadaf Ebrahimi 741*62c56f98SSadaf Ebrahimi #define MULADDC_X1_STOP \ 742*62c56f98SSadaf Ebrahimi "str r2, %0 \n\t" \ 743*62c56f98SSadaf Ebrahimi "str r1, %1 \n\t" \ 744*62c56f98SSadaf Ebrahimi "str r0, %2 \n\t" \ 745*62c56f98SSadaf Ebrahimi : "=m" (c), "=m" (d), "=m" (s) \ 746*62c56f98SSadaf Ebrahimi : "m" (s), "m" (d), "m" (c), "m" (b) \ 747*62c56f98SSadaf Ebrahimi : "r0", "r1", "r2", "r3", "r4", "r5", \ 748*62c56f98SSadaf Ebrahimi "r6", MULADDC_SCRATCH_CLOBBER, "r8", "r9", "cc" \ 749*62c56f98SSadaf Ebrahimi ); 750*62c56f98SSadaf Ebrahimi #endif /* !defined(__ARMCC_VERSION) && !defined(__clang__) */ 751*62c56f98SSadaf Ebrahimi 752*62c56f98SSadaf Ebrahimi #elif (__ARM_ARCH >= 6) && \ 753*62c56f98SSadaf Ebrahimi defined (__ARM_FEATURE_DSP) && (__ARM_FEATURE_DSP == 1) 754*62c56f98SSadaf Ebrahimi /* Armv6-M (or later) with DSP Instruction Set Extensions. 755*62c56f98SSadaf Ebrahimi * Requires support for either Thumb 2 or Arm ISA. 756*62c56f98SSadaf Ebrahimi */ 757*62c56f98SSadaf Ebrahimi 758*62c56f98SSadaf Ebrahimi #define MULADDC_X1_INIT \ 759*62c56f98SSadaf Ebrahimi { \ 760*62c56f98SSadaf Ebrahimi mbedtls_mpi_uint tmp_a, tmp_b; \ 761*62c56f98SSadaf Ebrahimi asm volatile ( 762*62c56f98SSadaf Ebrahimi 763*62c56f98SSadaf Ebrahimi #define MULADDC_X1_CORE \ 764*62c56f98SSadaf Ebrahimi ".p2align 2 \n\t" \ 765*62c56f98SSadaf Ebrahimi "ldr %[a], [%[in]], #4 \n\t" \ 766*62c56f98SSadaf Ebrahimi "ldr %[b], [%[acc]] \n\t" \ 767*62c56f98SSadaf Ebrahimi "umaal %[b], %[carry], %[scalar], %[a] \n\t" \ 768*62c56f98SSadaf Ebrahimi "str %[b], [%[acc]], #4 \n\t" 769*62c56f98SSadaf Ebrahimi 770*62c56f98SSadaf Ebrahimi #define MULADDC_X1_STOP \ 771*62c56f98SSadaf Ebrahimi : [a] "=&r" (tmp_a), \ 772*62c56f98SSadaf Ebrahimi [b] "=&r" (tmp_b), \ 773*62c56f98SSadaf Ebrahimi [in] "+r" (s), \ 774*62c56f98SSadaf Ebrahimi [acc] "+r" (d), \ 775*62c56f98SSadaf Ebrahimi [carry] "+l" (c) \ 776*62c56f98SSadaf Ebrahimi : [scalar] "r" (b) \ 777*62c56f98SSadaf Ebrahimi : "memory" \ 778*62c56f98SSadaf Ebrahimi ); \ 779*62c56f98SSadaf Ebrahimi } 780*62c56f98SSadaf Ebrahimi 781*62c56f98SSadaf Ebrahimi #define MULADDC_X2_INIT \ 782*62c56f98SSadaf Ebrahimi { \ 783*62c56f98SSadaf Ebrahimi mbedtls_mpi_uint tmp_a0, tmp_b0; \ 784*62c56f98SSadaf Ebrahimi mbedtls_mpi_uint tmp_a1, tmp_b1; \ 785*62c56f98SSadaf Ebrahimi asm volatile ( 786*62c56f98SSadaf Ebrahimi 787*62c56f98SSadaf Ebrahimi /* - Make sure loop is 4-byte aligned to avoid stalls 788*62c56f98SSadaf Ebrahimi * upon repeated non-word aligned instructions in 789*62c56f98SSadaf Ebrahimi * some microarchitectures. 790*62c56f98SSadaf Ebrahimi * - Don't use ldm with post-increment or back-to-back 791*62c56f98SSadaf Ebrahimi * loads with post-increment and same address register 792*62c56f98SSadaf Ebrahimi * to avoid stalls on some microarchitectures. 793*62c56f98SSadaf Ebrahimi * - Bunch loads and stores to reduce latency on some 794*62c56f98SSadaf Ebrahimi * microarchitectures. E.g., on Cortex-M4, the first 795*62c56f98SSadaf Ebrahimi * in a series of load/store operations has latency 796*62c56f98SSadaf Ebrahimi * 2 cycles, while subsequent loads/stores are single-cycle. */ 797*62c56f98SSadaf Ebrahimi #define MULADDC_X2_CORE \ 798*62c56f98SSadaf Ebrahimi ".p2align 2 \n\t" \ 799*62c56f98SSadaf Ebrahimi "ldr %[a0], [%[in]], #+8 \n\t" \ 800*62c56f98SSadaf Ebrahimi "ldr %[b0], [%[acc]], #+8 \n\t" \ 801*62c56f98SSadaf Ebrahimi "ldr %[a1], [%[in], #-4] \n\t" \ 802*62c56f98SSadaf Ebrahimi "ldr %[b1], [%[acc], #-4] \n\t" \ 803*62c56f98SSadaf Ebrahimi "umaal %[b0], %[carry], %[scalar], %[a0] \n\t" \ 804*62c56f98SSadaf Ebrahimi "umaal %[b1], %[carry], %[scalar], %[a1] \n\t" \ 805*62c56f98SSadaf Ebrahimi "str %[b0], [%[acc], #-8] \n\t" \ 806*62c56f98SSadaf Ebrahimi "str %[b1], [%[acc], #-4] \n\t" 807*62c56f98SSadaf Ebrahimi 808*62c56f98SSadaf Ebrahimi #define MULADDC_X2_STOP \ 809*62c56f98SSadaf Ebrahimi : [a0] "=&r" (tmp_a0), \ 810*62c56f98SSadaf Ebrahimi [b0] "=&r" (tmp_b0), \ 811*62c56f98SSadaf Ebrahimi [a1] "=&r" (tmp_a1), \ 812*62c56f98SSadaf Ebrahimi [b1] "=&r" (tmp_b1), \ 813*62c56f98SSadaf Ebrahimi [in] "+r" (s), \ 814*62c56f98SSadaf Ebrahimi [acc] "+r" (d), \ 815*62c56f98SSadaf Ebrahimi [carry] "+l" (c) \ 816*62c56f98SSadaf Ebrahimi : [scalar] "r" (b) \ 817*62c56f98SSadaf Ebrahimi : "memory" \ 818*62c56f98SSadaf Ebrahimi ); \ 819*62c56f98SSadaf Ebrahimi } 820*62c56f98SSadaf Ebrahimi 821*62c56f98SSadaf Ebrahimi #else /* Thumb 2 or Arm ISA, without DSP extensions */ 822*62c56f98SSadaf Ebrahimi 823*62c56f98SSadaf Ebrahimi #define MULADDC_X1_INIT \ 824*62c56f98SSadaf Ebrahimi asm( \ 825*62c56f98SSadaf Ebrahimi "ldr r0, %3 \n\t" \ 826*62c56f98SSadaf Ebrahimi "ldr r1, %4 \n\t" \ 827*62c56f98SSadaf Ebrahimi "ldr r2, %5 \n\t" \ 828*62c56f98SSadaf Ebrahimi "ldr r3, %6 \n\t" 829*62c56f98SSadaf Ebrahimi 830*62c56f98SSadaf Ebrahimi #define MULADDC_X1_CORE \ 831*62c56f98SSadaf Ebrahimi "ldr r4, [r0], #4 \n\t" \ 832*62c56f98SSadaf Ebrahimi "mov r5, #0 \n\t" \ 833*62c56f98SSadaf Ebrahimi "ldr r6, [r1] \n\t" \ 834*62c56f98SSadaf Ebrahimi "umlal r2, r5, r3, r4 \n\t" \ 835*62c56f98SSadaf Ebrahimi "adds r4, r6, r2 \n\t" \ 836*62c56f98SSadaf Ebrahimi "adc r2, r5, #0 \n\t" \ 837*62c56f98SSadaf Ebrahimi "str r4, [r1], #4 \n\t" 838*62c56f98SSadaf Ebrahimi 839*62c56f98SSadaf Ebrahimi #define MULADDC_X1_STOP \ 840*62c56f98SSadaf Ebrahimi "str r2, %0 \n\t" \ 841*62c56f98SSadaf Ebrahimi "str r1, %1 \n\t" \ 842*62c56f98SSadaf Ebrahimi "str r0, %2 \n\t" \ 843*62c56f98SSadaf Ebrahimi : "=m" (c), "=m" (d), "=m" (s) \ 844*62c56f98SSadaf Ebrahimi : "m" (s), "m" (d), "m" (c), "m" (b) \ 845*62c56f98SSadaf Ebrahimi : "r0", "r1", "r2", "r3", "r4", "r5", \ 846*62c56f98SSadaf Ebrahimi "r6", "cc" \ 847*62c56f98SSadaf Ebrahimi ); 848*62c56f98SSadaf Ebrahimi 849*62c56f98SSadaf Ebrahimi #endif /* ISA codepath selection */ 850*62c56f98SSadaf Ebrahimi 851*62c56f98SSadaf Ebrahimi #endif /* defined(__arm__) */ 852*62c56f98SSadaf Ebrahimi 853*62c56f98SSadaf Ebrahimi #if defined(__alpha__) 854*62c56f98SSadaf Ebrahimi 855*62c56f98SSadaf Ebrahimi #define MULADDC_X1_INIT \ 856*62c56f98SSadaf Ebrahimi asm( \ 857*62c56f98SSadaf Ebrahimi "ldq $1, %3 \n\t" \ 858*62c56f98SSadaf Ebrahimi "ldq $2, %4 \n\t" \ 859*62c56f98SSadaf Ebrahimi "ldq $3, %5 \n\t" \ 860*62c56f98SSadaf Ebrahimi "ldq $4, %6 \n\t" 861*62c56f98SSadaf Ebrahimi 862*62c56f98SSadaf Ebrahimi #define MULADDC_X1_CORE \ 863*62c56f98SSadaf Ebrahimi "ldq $6, 0($1) \n\t" \ 864*62c56f98SSadaf Ebrahimi "addq $1, 8, $1 \n\t" \ 865*62c56f98SSadaf Ebrahimi "mulq $6, $4, $7 \n\t" \ 866*62c56f98SSadaf Ebrahimi "umulh $6, $4, $6 \n\t" \ 867*62c56f98SSadaf Ebrahimi "addq $7, $3, $7 \n\t" \ 868*62c56f98SSadaf Ebrahimi "cmpult $7, $3, $3 \n\t" \ 869*62c56f98SSadaf Ebrahimi "ldq $5, 0($2) \n\t" \ 870*62c56f98SSadaf Ebrahimi "addq $7, $5, $7 \n\t" \ 871*62c56f98SSadaf Ebrahimi "cmpult $7, $5, $5 \n\t" \ 872*62c56f98SSadaf Ebrahimi "stq $7, 0($2) \n\t" \ 873*62c56f98SSadaf Ebrahimi "addq $2, 8, $2 \n\t" \ 874*62c56f98SSadaf Ebrahimi "addq $6, $3, $3 \n\t" \ 875*62c56f98SSadaf Ebrahimi "addq $5, $3, $3 \n\t" 876*62c56f98SSadaf Ebrahimi 877*62c56f98SSadaf Ebrahimi #define MULADDC_X1_STOP \ 878*62c56f98SSadaf Ebrahimi "stq $3, %0 \n\t" \ 879*62c56f98SSadaf Ebrahimi "stq $2, %1 \n\t" \ 880*62c56f98SSadaf Ebrahimi "stq $1, %2 \n\t" \ 881*62c56f98SSadaf Ebrahimi : "=m" (c), "=m" (d), "=m" (s) \ 882*62c56f98SSadaf Ebrahimi : "m" (s), "m" (d), "m" (c), "m" (b) \ 883*62c56f98SSadaf Ebrahimi : "$1", "$2", "$3", "$4", "$5", "$6", "$7" \ 884*62c56f98SSadaf Ebrahimi ); 885*62c56f98SSadaf Ebrahimi #endif /* Alpha */ 886*62c56f98SSadaf Ebrahimi 887*62c56f98SSadaf Ebrahimi #if defined(__mips__) && !defined(__mips64) 888*62c56f98SSadaf Ebrahimi 889*62c56f98SSadaf Ebrahimi #define MULADDC_X1_INIT \ 890*62c56f98SSadaf Ebrahimi asm( \ 891*62c56f98SSadaf Ebrahimi "lw $10, %3 \n\t" \ 892*62c56f98SSadaf Ebrahimi "lw $11, %4 \n\t" \ 893*62c56f98SSadaf Ebrahimi "lw $12, %5 \n\t" \ 894*62c56f98SSadaf Ebrahimi "lw $13, %6 \n\t" 895*62c56f98SSadaf Ebrahimi 896*62c56f98SSadaf Ebrahimi #define MULADDC_X1_CORE \ 897*62c56f98SSadaf Ebrahimi "lw $14, 0($10) \n\t" \ 898*62c56f98SSadaf Ebrahimi "multu $13, $14 \n\t" \ 899*62c56f98SSadaf Ebrahimi "addi $10, $10, 4 \n\t" \ 900*62c56f98SSadaf Ebrahimi "mflo $14 \n\t" \ 901*62c56f98SSadaf Ebrahimi "mfhi $9 \n\t" \ 902*62c56f98SSadaf Ebrahimi "addu $14, $12, $14 \n\t" \ 903*62c56f98SSadaf Ebrahimi "lw $15, 0($11) \n\t" \ 904*62c56f98SSadaf Ebrahimi "sltu $12, $14, $12 \n\t" \ 905*62c56f98SSadaf Ebrahimi "addu $15, $14, $15 \n\t" \ 906*62c56f98SSadaf Ebrahimi "sltu $14, $15, $14 \n\t" \ 907*62c56f98SSadaf Ebrahimi "addu $12, $12, $9 \n\t" \ 908*62c56f98SSadaf Ebrahimi "sw $15, 0($11) \n\t" \ 909*62c56f98SSadaf Ebrahimi "addu $12, $12, $14 \n\t" \ 910*62c56f98SSadaf Ebrahimi "addi $11, $11, 4 \n\t" 911*62c56f98SSadaf Ebrahimi 912*62c56f98SSadaf Ebrahimi #define MULADDC_X1_STOP \ 913*62c56f98SSadaf Ebrahimi "sw $12, %0 \n\t" \ 914*62c56f98SSadaf Ebrahimi "sw $11, %1 \n\t" \ 915*62c56f98SSadaf Ebrahimi "sw $10, %2 \n\t" \ 916*62c56f98SSadaf Ebrahimi : "=m" (c), "=m" (d), "=m" (s) \ 917*62c56f98SSadaf Ebrahimi : "m" (s), "m" (d), "m" (c), "m" (b) \ 918*62c56f98SSadaf Ebrahimi : "$9", "$10", "$11", "$12", "$13", "$14", "$15", "lo", "hi" \ 919*62c56f98SSadaf Ebrahimi ); 920*62c56f98SSadaf Ebrahimi 921*62c56f98SSadaf Ebrahimi #endif /* MIPS */ 922*62c56f98SSadaf Ebrahimi #endif /* GNUC */ 923*62c56f98SSadaf Ebrahimi 924*62c56f98SSadaf Ebrahimi #if (defined(_MSC_VER) && defined(_M_IX86)) || defined(__WATCOMC__) 925*62c56f98SSadaf Ebrahimi 926*62c56f98SSadaf Ebrahimi #define MULADDC_X1_INIT \ 927*62c56f98SSadaf Ebrahimi __asm mov esi, s \ 928*62c56f98SSadaf Ebrahimi __asm mov edi, d \ 929*62c56f98SSadaf Ebrahimi __asm mov ecx, c \ 930*62c56f98SSadaf Ebrahimi __asm mov ebx, b 931*62c56f98SSadaf Ebrahimi 932*62c56f98SSadaf Ebrahimi #define MULADDC_X1_CORE \ 933*62c56f98SSadaf Ebrahimi __asm lodsd \ 934*62c56f98SSadaf Ebrahimi __asm mul ebx \ 935*62c56f98SSadaf Ebrahimi __asm add eax, ecx \ 936*62c56f98SSadaf Ebrahimi __asm adc edx, 0 \ 937*62c56f98SSadaf Ebrahimi __asm add eax, [edi] \ 938*62c56f98SSadaf Ebrahimi __asm adc edx, 0 \ 939*62c56f98SSadaf Ebrahimi __asm mov ecx, edx \ 940*62c56f98SSadaf Ebrahimi __asm stosd 941*62c56f98SSadaf Ebrahimi 942*62c56f98SSadaf Ebrahimi #define MULADDC_X1_STOP \ 943*62c56f98SSadaf Ebrahimi __asm mov c, ecx \ 944*62c56f98SSadaf Ebrahimi __asm mov d, edi \ 945*62c56f98SSadaf Ebrahimi __asm mov s, esi 946*62c56f98SSadaf Ebrahimi 947*62c56f98SSadaf Ebrahimi #if defined(MBEDTLS_HAVE_SSE2) 948*62c56f98SSadaf Ebrahimi 949*62c56f98SSadaf Ebrahimi #define EMIT __asm _emit 950*62c56f98SSadaf Ebrahimi 951*62c56f98SSadaf Ebrahimi #define MULADDC_X8_INIT MULADDC_X1_INIT 952*62c56f98SSadaf Ebrahimi 953*62c56f98SSadaf Ebrahimi #define MULADDC_X8_CORE \ 954*62c56f98SSadaf Ebrahimi EMIT 0x0F EMIT 0x6E EMIT 0xC9 \ 955*62c56f98SSadaf Ebrahimi EMIT 0x0F EMIT 0x6E EMIT 0xC3 \ 956*62c56f98SSadaf Ebrahimi EMIT 0x0F EMIT 0x6E EMIT 0x1F \ 957*62c56f98SSadaf Ebrahimi EMIT 0x0F EMIT 0xD4 EMIT 0xCB \ 958*62c56f98SSadaf Ebrahimi EMIT 0x0F EMIT 0x6E EMIT 0x16 \ 959*62c56f98SSadaf Ebrahimi EMIT 0x0F EMIT 0xF4 EMIT 0xD0 \ 960*62c56f98SSadaf Ebrahimi EMIT 0x0F EMIT 0x6E EMIT 0x66 EMIT 0x04 \ 961*62c56f98SSadaf Ebrahimi EMIT 0x0F EMIT 0xF4 EMIT 0xE0 \ 962*62c56f98SSadaf Ebrahimi EMIT 0x0F EMIT 0x6E EMIT 0x76 EMIT 0x08 \ 963*62c56f98SSadaf Ebrahimi EMIT 0x0F EMIT 0xF4 EMIT 0xF0 \ 964*62c56f98SSadaf Ebrahimi EMIT 0x0F EMIT 0x6E EMIT 0x7E EMIT 0x0C \ 965*62c56f98SSadaf Ebrahimi EMIT 0x0F EMIT 0xF4 EMIT 0xF8 \ 966*62c56f98SSadaf Ebrahimi EMIT 0x0F EMIT 0xD4 EMIT 0xCA \ 967*62c56f98SSadaf Ebrahimi EMIT 0x0F EMIT 0x6E EMIT 0x5F EMIT 0x04 \ 968*62c56f98SSadaf Ebrahimi EMIT 0x0F EMIT 0xD4 EMIT 0xDC \ 969*62c56f98SSadaf Ebrahimi EMIT 0x0F EMIT 0x6E EMIT 0x6F EMIT 0x08 \ 970*62c56f98SSadaf Ebrahimi EMIT 0x0F EMIT 0xD4 EMIT 0xEE \ 971*62c56f98SSadaf Ebrahimi EMIT 0x0F EMIT 0x6E EMIT 0x67 EMIT 0x0C \ 972*62c56f98SSadaf Ebrahimi EMIT 0x0F EMIT 0xD4 EMIT 0xFC \ 973*62c56f98SSadaf Ebrahimi EMIT 0x0F EMIT 0x7E EMIT 0x0F \ 974*62c56f98SSadaf Ebrahimi EMIT 0x0F EMIT 0x6E EMIT 0x56 EMIT 0x10 \ 975*62c56f98SSadaf Ebrahimi EMIT 0x0F EMIT 0xF4 EMIT 0xD0 \ 976*62c56f98SSadaf Ebrahimi EMIT 0x0F EMIT 0x73 EMIT 0xD1 EMIT 0x20 \ 977*62c56f98SSadaf Ebrahimi EMIT 0x0F EMIT 0x6E EMIT 0x66 EMIT 0x14 \ 978*62c56f98SSadaf Ebrahimi EMIT 0x0F EMIT 0xF4 EMIT 0xE0 \ 979*62c56f98SSadaf Ebrahimi EMIT 0x0F EMIT 0xD4 EMIT 0xCB \ 980*62c56f98SSadaf Ebrahimi EMIT 0x0F EMIT 0x6E EMIT 0x76 EMIT 0x18 \ 981*62c56f98SSadaf Ebrahimi EMIT 0x0F EMIT 0xF4 EMIT 0xF0 \ 982*62c56f98SSadaf Ebrahimi EMIT 0x0F EMIT 0x7E EMIT 0x4F EMIT 0x04 \ 983*62c56f98SSadaf Ebrahimi EMIT 0x0F EMIT 0x73 EMIT 0xD1 EMIT 0x20 \ 984*62c56f98SSadaf Ebrahimi EMIT 0x0F EMIT 0x6E EMIT 0x5E EMIT 0x1C \ 985*62c56f98SSadaf Ebrahimi EMIT 0x0F EMIT 0xF4 EMIT 0xD8 \ 986*62c56f98SSadaf Ebrahimi EMIT 0x0F EMIT 0xD4 EMIT 0xCD \ 987*62c56f98SSadaf Ebrahimi EMIT 0x0F EMIT 0x6E EMIT 0x6F EMIT 0x10 \ 988*62c56f98SSadaf Ebrahimi EMIT 0x0F EMIT 0xD4 EMIT 0xD5 \ 989*62c56f98SSadaf Ebrahimi EMIT 0x0F EMIT 0x7E EMIT 0x4F EMIT 0x08 \ 990*62c56f98SSadaf Ebrahimi EMIT 0x0F EMIT 0x73 EMIT 0xD1 EMIT 0x20 \ 991*62c56f98SSadaf Ebrahimi EMIT 0x0F EMIT 0xD4 EMIT 0xCF \ 992*62c56f98SSadaf Ebrahimi EMIT 0x0F EMIT 0x6E EMIT 0x6F EMIT 0x14 \ 993*62c56f98SSadaf Ebrahimi EMIT 0x0F EMIT 0xD4 EMIT 0xE5 \ 994*62c56f98SSadaf Ebrahimi EMIT 0x0F EMIT 0x7E EMIT 0x4F EMIT 0x0C \ 995*62c56f98SSadaf Ebrahimi EMIT 0x0F EMIT 0x73 EMIT 0xD1 EMIT 0x20 \ 996*62c56f98SSadaf Ebrahimi EMIT 0x0F EMIT 0xD4 EMIT 0xCA \ 997*62c56f98SSadaf Ebrahimi EMIT 0x0F EMIT 0x6E EMIT 0x6F EMIT 0x18 \ 998*62c56f98SSadaf Ebrahimi EMIT 0x0F EMIT 0xD4 EMIT 0xF5 \ 999*62c56f98SSadaf Ebrahimi EMIT 0x0F EMIT 0x7E EMIT 0x4F EMIT 0x10 \ 1000*62c56f98SSadaf Ebrahimi EMIT 0x0F EMIT 0x73 EMIT 0xD1 EMIT 0x20 \ 1001*62c56f98SSadaf Ebrahimi EMIT 0x0F EMIT 0xD4 EMIT 0xCC \ 1002*62c56f98SSadaf Ebrahimi EMIT 0x0F EMIT 0x6E EMIT 0x6F EMIT 0x1C \ 1003*62c56f98SSadaf Ebrahimi EMIT 0x0F EMIT 0xD4 EMIT 0xDD \ 1004*62c56f98SSadaf Ebrahimi EMIT 0x0F EMIT 0x7E EMIT 0x4F EMIT 0x14 \ 1005*62c56f98SSadaf Ebrahimi EMIT 0x0F EMIT 0x73 EMIT 0xD1 EMIT 0x20 \ 1006*62c56f98SSadaf Ebrahimi EMIT 0x0F EMIT 0xD4 EMIT 0xCE \ 1007*62c56f98SSadaf Ebrahimi EMIT 0x0F EMIT 0x7E EMIT 0x4F EMIT 0x18 \ 1008*62c56f98SSadaf Ebrahimi EMIT 0x0F EMIT 0x73 EMIT 0xD1 EMIT 0x20 \ 1009*62c56f98SSadaf Ebrahimi EMIT 0x0F EMIT 0xD4 EMIT 0xCB \ 1010*62c56f98SSadaf Ebrahimi EMIT 0x0F EMIT 0x7E EMIT 0x4F EMIT 0x1C \ 1011*62c56f98SSadaf Ebrahimi EMIT 0x83 EMIT 0xC7 EMIT 0x20 \ 1012*62c56f98SSadaf Ebrahimi EMIT 0x83 EMIT 0xC6 EMIT 0x20 \ 1013*62c56f98SSadaf Ebrahimi EMIT 0x0F EMIT 0x73 EMIT 0xD1 EMIT 0x20 \ 1014*62c56f98SSadaf Ebrahimi EMIT 0x0F EMIT 0x7E EMIT 0xC9 1015*62c56f98SSadaf Ebrahimi 1016*62c56f98SSadaf Ebrahimi #define MULADDC_X8_STOP \ 1017*62c56f98SSadaf Ebrahimi EMIT 0x0F EMIT 0x77 \ 1018*62c56f98SSadaf Ebrahimi __asm mov c, ecx \ 1019*62c56f98SSadaf Ebrahimi __asm mov d, edi \ 1020*62c56f98SSadaf Ebrahimi __asm mov s, esi 1021*62c56f98SSadaf Ebrahimi 1022*62c56f98SSadaf Ebrahimi #endif /* SSE2 */ 1023*62c56f98SSadaf Ebrahimi #endif /* MSVC */ 1024*62c56f98SSadaf Ebrahimi 1025*62c56f98SSadaf Ebrahimi #endif /* MBEDTLS_HAVE_ASM */ 1026*62c56f98SSadaf Ebrahimi 1027*62c56f98SSadaf Ebrahimi #if !defined(MULADDC_X1_CORE) 1028*62c56f98SSadaf Ebrahimi #if defined(MBEDTLS_HAVE_UDBL) 1029*62c56f98SSadaf Ebrahimi 1030*62c56f98SSadaf Ebrahimi #define MULADDC_X1_INIT \ 1031*62c56f98SSadaf Ebrahimi { \ 1032*62c56f98SSadaf Ebrahimi mbedtls_t_udbl r; \ 1033*62c56f98SSadaf Ebrahimi mbedtls_mpi_uint r0, r1; 1034*62c56f98SSadaf Ebrahimi 1035*62c56f98SSadaf Ebrahimi #define MULADDC_X1_CORE \ 1036*62c56f98SSadaf Ebrahimi r = *(s++) * (mbedtls_t_udbl) b; \ 1037*62c56f98SSadaf Ebrahimi r0 = (mbedtls_mpi_uint) r; \ 1038*62c56f98SSadaf Ebrahimi r1 = (mbedtls_mpi_uint)( r >> biL ); \ 1039*62c56f98SSadaf Ebrahimi r0 += c; r1 += (r0 < c); \ 1040*62c56f98SSadaf Ebrahimi r0 += *d; r1 += (r0 < *d); \ 1041*62c56f98SSadaf Ebrahimi c = r1; *(d++) = r0; 1042*62c56f98SSadaf Ebrahimi 1043*62c56f98SSadaf Ebrahimi #define MULADDC_X1_STOP \ 1044*62c56f98SSadaf Ebrahimi } 1045*62c56f98SSadaf Ebrahimi 1046*62c56f98SSadaf Ebrahimi #else /* MBEDTLS_HAVE_UDBL */ 1047*62c56f98SSadaf Ebrahimi 1048*62c56f98SSadaf Ebrahimi #define MULADDC_X1_INIT \ 1049*62c56f98SSadaf Ebrahimi { \ 1050*62c56f98SSadaf Ebrahimi mbedtls_mpi_uint s0, s1, b0, b1; \ 1051*62c56f98SSadaf Ebrahimi mbedtls_mpi_uint r0, r1, rx, ry; \ 1052*62c56f98SSadaf Ebrahimi b0 = ( b << biH ) >> biH; \ 1053*62c56f98SSadaf Ebrahimi b1 = ( b >> biH ); 1054*62c56f98SSadaf Ebrahimi 1055*62c56f98SSadaf Ebrahimi #define MULADDC_X1_CORE \ 1056*62c56f98SSadaf Ebrahimi s0 = ( *s << biH ) >> biH; \ 1057*62c56f98SSadaf Ebrahimi s1 = ( *s >> biH ); s++; \ 1058*62c56f98SSadaf Ebrahimi rx = s0 * b1; r0 = s0 * b0; \ 1059*62c56f98SSadaf Ebrahimi ry = s1 * b0; r1 = s1 * b1; \ 1060*62c56f98SSadaf Ebrahimi r1 += ( rx >> biH ); \ 1061*62c56f98SSadaf Ebrahimi r1 += ( ry >> biH ); \ 1062*62c56f98SSadaf Ebrahimi rx <<= biH; ry <<= biH; \ 1063*62c56f98SSadaf Ebrahimi r0 += rx; r1 += (r0 < rx); \ 1064*62c56f98SSadaf Ebrahimi r0 += ry; r1 += (r0 < ry); \ 1065*62c56f98SSadaf Ebrahimi r0 += c; r1 += (r0 < c); \ 1066*62c56f98SSadaf Ebrahimi r0 += *d; r1 += (r0 < *d); \ 1067*62c56f98SSadaf Ebrahimi c = r1; *(d++) = r0; 1068*62c56f98SSadaf Ebrahimi 1069*62c56f98SSadaf Ebrahimi #define MULADDC_X1_STOP \ 1070*62c56f98SSadaf Ebrahimi } 1071*62c56f98SSadaf Ebrahimi 1072*62c56f98SSadaf Ebrahimi #endif /* C (longlong) */ 1073*62c56f98SSadaf Ebrahimi #endif /* C (generic) */ 1074*62c56f98SSadaf Ebrahimi 1075*62c56f98SSadaf Ebrahimi #if !defined(MULADDC_X2_CORE) 1076*62c56f98SSadaf Ebrahimi #define MULADDC_X2_INIT MULADDC_X1_INIT 1077*62c56f98SSadaf Ebrahimi #define MULADDC_X2_STOP MULADDC_X1_STOP 1078*62c56f98SSadaf Ebrahimi #define MULADDC_X2_CORE MULADDC_X1_CORE MULADDC_X1_CORE 1079*62c56f98SSadaf Ebrahimi #endif /* MULADDC_X2_CORE */ 1080*62c56f98SSadaf Ebrahimi 1081*62c56f98SSadaf Ebrahimi #if !defined(MULADDC_X4_CORE) 1082*62c56f98SSadaf Ebrahimi #define MULADDC_X4_INIT MULADDC_X2_INIT 1083*62c56f98SSadaf Ebrahimi #define MULADDC_X4_STOP MULADDC_X2_STOP 1084*62c56f98SSadaf Ebrahimi #define MULADDC_X4_CORE MULADDC_X2_CORE MULADDC_X2_CORE 1085*62c56f98SSadaf Ebrahimi #endif /* MULADDC_X4_CORE */ 1086*62c56f98SSadaf Ebrahimi 1087*62c56f98SSadaf Ebrahimi #if !defined(MULADDC_X8_CORE) 1088*62c56f98SSadaf Ebrahimi #define MULADDC_X8_INIT MULADDC_X4_INIT 1089*62c56f98SSadaf Ebrahimi #define MULADDC_X8_STOP MULADDC_X4_STOP 1090*62c56f98SSadaf Ebrahimi #define MULADDC_X8_CORE MULADDC_X4_CORE MULADDC_X4_CORE 1091*62c56f98SSadaf Ebrahimi #endif /* MULADDC_X8_CORE */ 1092*62c56f98SSadaf Ebrahimi 1093*62c56f98SSadaf Ebrahimi /* *INDENT-ON* */ 1094*62c56f98SSadaf Ebrahimi #endif /* bn_mul.h */ 1095