1#define DEC_5 4 2#define DEC_6 5 3#define DEC_7 6 4#define DEC_8 7 5 6#define DEC(N) uECC_CONCAT(DEC_, N) 7 8#define REPEAT_1(stuff) stuff 9#define REPEAT_2(stuff) REPEAT_1(stuff) stuff 10#define REPEAT_3(stuff) REPEAT_2(stuff) stuff 11#define REPEAT_4(stuff) REPEAT_3(stuff) stuff 12#define REPEAT_5(stuff) REPEAT_4(stuff) stuff 13#define REPEAT_6(stuff) REPEAT_5(stuff) stuff 14#define REPEAT_7(stuff) REPEAT_6(stuff) stuff 15#define REPEAT_8(stuff) REPEAT_7(stuff) stuff 16 17#define REPEAT(N, stuff) uECC_CONCAT(REPEAT_, N)(stuff) 18 19#define STR2(thing) #thing 20#define STR(thing) STR2(thing) 21 22#if (uECC_ASM == uECC_asm_fast) 23 24static uint32_t vli_add(uint32_t *result, const uint32_t *left, const uint32_t *right) { 25 uint32_t carry = 0; 26 uint32_t left_word; 27 uint32_t right_word; 28 29 __asm__ volatile ( 30 ".syntax unified \n\t" 31 "ldmia %[lptr]!, {%[left]} \n\t" /* Load left word. */ 32 "ldmia %[rptr]!, {%[right]} \n\t" /* Load right word. */ 33 "adds %[left], %[right] \n\t" /* Add first word. */ 34 "stmia %[dptr]!, {%[left]} \n\t" /* Store result word. */ 35 36 /* Now we just do the remaining words with the carry bit (using ADC) */ 37 REPEAT(DEC(uECC_WORDS), 38 "ldmia %[lptr]!, {%[left]} \n\t" 39 "ldmia %[rptr]!, {%[right]} \n\t" 40 "adcs %[left], %[right] \n\t" 41 "stmia %[dptr]!, {%[left]} \n\t") 42 43 "adcs %[carry], %[carry] \n\t" /* Store carry bit. */ 44 #if (uECC_PLATFORM != uECC_arm_thumb2) 45 ".syntax divided \n\t" 46 #endif 47 #if (uECC_PLATFORM == uECC_arm_thumb) 48 : [dptr] "+l" (result), [lptr] "+l" (left), [rptr] "+l" (right), 49 [carry] "+l" (carry), [left] "=l" (left_word), [right] "=l" (right_word) 50 #else 51 : [dptr] "+r" (result), [lptr] "+r" (left), [rptr] "+r" (right), 52 [carry] "+r" (carry), [left] "=r" (left_word), [right] "=r" (right_word) 53 #endif 54 : 55 : "cc", "memory" 56 ); 57 return carry; 58} 59#define asm_add 1 60 61static uint32_t vli_sub(uint32_t *result, const uint32_t *left, const uint32_t *right) { 62 uint32_t carry = 0; 63 uint32_t left_word; 64 uint32_t right_word; 65 66 __asm__ volatile ( 67 ".syntax unified \n\t" 68 "ldmia %[lptr]!, {%[left]} \n\t" /* Load left word. */ 69 "ldmia %[rptr]!, {%[right]} \n\t" /* Load right word. */ 70 "subs %[left], %[right] \n\t" /* Subtract. */ 71 "stmia %[dptr]!, {%[left]} \n\t" /* Store result word. */ 72 73 /* Now we just do the remaining words with the carry bit (using SBC) */ 74 REPEAT(DEC(uECC_WORDS), 75 "ldmia %[lptr]!, {%[left]} \n\t" 76 "ldmia %[rptr]!, {%[right]} \n\t" 77 "sbcs %[left], %[right] \n\t" 78 "stmia %[dptr]!, {%[left]} \n\t") 79 80 "adcs %[carry], %[carry] \n\t" /* Store carry bit. */ 81 #if (uECC_PLATFORM != uECC_arm_thumb2) 82 ".syntax divided \n\t" 83 #endif 84 #if (uECC_PLATFORM == uECC_arm_thumb) 85 : [dptr] "+l" (result), [lptr] "+l" (left), [rptr] "+l" (right), 86 [carry] "+l" (carry), [left] "=l" (left_word), [right] "=l" (right_word) 87 #else 88 : [dptr] "+r" (result), [lptr] "+r" (left), [rptr] "+r" (right), 89 [carry] "+r" (carry), [left] "=r" (left_word), [right] "=r" (right_word) 90 #endif 91 : 92 : "cc", "memory" 93 ); 94 return !carry; // note that on ARM, carry flag set means "no borrow" when subtracting 95 // (for some reason...) 96} 97#define asm_sub 1 98 99#if (uECC_PLATFORM != uECC_arm_thumb) 100#if (uECC_WORDS == 5) 101static void vli_mult(uint32_t *result, const uint32_t *left, const uint32_t *right) { 102 register uint32_t *r0 __asm__("r0") = result; 103 register const uint32_t *r1 __asm__("r1") = left; 104 register const uint32_t *r2 __asm__("r2") = right; 105 106 __asm__ volatile ( 107 ".syntax unified \n\t" 108 "add r0, 12 \n\t" 109 "add r2, 12 \n\t" 110 "ldmia r1!, {r3,r4} \n\t" 111 "ldmia r2!, {r6,r7} \n\t" 112 113 "umull r11, r12, r3, r6 \n\t" 114 "stmia r0!, {r11} \n\t" 115 116 "mov r10, #0 \n\t" 117 "umull r11, r9, r3, r7 \n\t" 118 "adds r12, r11 \n\t" 119 "adc r9, #0 \n\t" 120 "umull r11, r14, r4, r6 \n\t" 121 "adds r12, r11 \n\t" 122 "adcs r9, r14 \n\t" 123 "adc r10, #0 \n\t" 124 "stmia r0!, {r12} \n\t" 125 126 "umull r12, r14, r4, r7 \n\t" 127 "adds r9, r12 \n\t" 128 "adc r10, r14 \n\t" 129 "stmia r0!, {r9, r10} \n\t" 130 131 "sub r0, 28 \n\t" 132 "sub r2, 20 \n\t" 133 "ldmia r2!, {r6,r7,r8} \n\t" 134 "ldmia r1!, {r5} \n\t" 135 136 "umull r11, r12, r3, r6 \n\t" 137 "stmia r0!, {r11} \n\t" 138 139 "mov r10, #0 \n\t" 140 "umull r11, r9, r3, r7 \n\t" 141 "adds r12, r11 \n\t" 142 "adc r9, #0 \n\t" 143 "umull r11, r14, r4, r6 \n\t" 144 "adds r12, r11 \n\t" 145 "adcs r9, r14 \n\t" 146 "adc r10, #0 \n\t" 147 "stmia r0!, {r12} \n\t" 148 149 "mov r11, #0 \n\t" 150 "umull r12, r14, r3, r8 \n\t" 151 "adds r9, r12 \n\t" 152 "adcs r10, r14 \n\t" 153 "adc r11, #0 \n\t" 154 "umull r12, r14, r4, r7 \n\t" 155 "adds r9, r12 \n\t" 156 "adcs r10, r14 \n\t" 157 "adc r11, #0 \n\t" 158 "umull r12, r14, r5, r6 \n\t" 159 "adds r9, r12 \n\t" 160 "adcs r10, r14 \n\t" 161 "adc r11, #0 \n\t" 162 "stmia r0!, {r9} \n\t" 163 164 "ldmia r1!, {r3} \n\t" 165 "mov r12, #0 \n\t" 166 "umull r14, r9, r4, r8 \n\t" 167 "adds r10, r14 \n\t" 168 "adcs r11, r9 \n\t" 169 "adc r12, #0 \n\t" 170 "umull r14, r9, r5, r7 \n\t" 171 "adds r10, r14 \n\t" 172 "adcs r11, r9 \n\t" 173 "adc r12, #0 \n\t" 174 "umull r14, r9, r3, r6 \n\t" 175 "adds r10, r14 \n\t" 176 "adcs r11, r9 \n\t" 177 "adc r12, #0 \n\t" 178 "ldr r14, [r0] \n\t" 179 "adds r10, r14 \n\t" 180 "adcs r11, #0 \n\t" 181 "adc r12, #0 \n\t" 182 "stmia r0!, {r10} \n\t" 183 184 "ldmia r1!, {r4} \n\t" 185 "mov r14, #0 \n\t" 186 "umull r9, r10, r5, r8 \n\t" 187 "adds r11, r9 \n\t" 188 "adcs r12, r10 \n\t" 189 "adc r14, #0 \n\t" 190 "umull r9, r10, r3, r7 \n\t" 191 "adds r11, r9 \n\t" 192 "adcs r12, r10 \n\t" 193 "adc r14, #0 \n\t" 194 "umull r9, r10, r4, r6 \n\t" 195 "adds r11, r9 \n\t" 196 "adcs r12, r10 \n\t" 197 "adc r14, #0 \n\t" 198 "ldr r9, [r0] \n\t" 199 "adds r11, r9 \n\t" 200 "adcs r12, #0 \n\t" 201 "adc r14, #0 \n\t" 202 "stmia r0!, {r11} \n\t" 203 204 "ldmia r2!, {r6} \n\t" 205 "mov r9, #0 \n\t" 206 "umull r10, r11, r5, r6 \n\t" 207 "adds r12, r10 \n\t" 208 "adcs r14, r11 \n\t" 209 "adc r9, #0 \n\t" 210 "umull r10, r11, r3, r8 \n\t" 211 "adds r12, r10 \n\t" 212 "adcs r14, r11 \n\t" 213 "adc r9, #0 \n\t" 214 "umull r10, r11, r4, r7 \n\t" 215 "adds r12, r10 \n\t" 216 "adcs r14, r11 \n\t" 217 "adc r9, #0 \n\t" 218 "ldr r10, [r0] \n\t" 219 "adds r12, r10 \n\t" 220 "adcs r14, #0 \n\t" 221 "adc r9, #0 \n\t" 222 "stmia r0!, {r12} \n\t" 223 224 "ldmia r2!, {r7} \n\t" 225 "mov r10, #0 \n\t" 226 "umull r11, r12, r5, r7 \n\t" 227 "adds r14, r11 \n\t" 228 "adcs r9, r12 \n\t" 229 "adc r10, #0 \n\t" 230 "umull r11, r12, r3, r6 \n\t" 231 "adds r14, r11 \n\t" 232 "adcs r9, r12 \n\t" 233 "adc r10, #0 \n\t" 234 "umull r11, r12, r4, r8 \n\t" 235 "adds r14, r11 \n\t" 236 "adcs r9, r12 \n\t" 237 "adc r10, #0 \n\t" 238 "ldr r11, [r0] \n\t" 239 "adds r14, r11 \n\t" 240 "adcs r9, #0 \n\t" 241 "adc r10, #0 \n\t" 242 "stmia r0!, {r14} \n\t" 243 244 "mov r11, #0 \n\t" 245 "umull r12, r14, r3, r7 \n\t" 246 "adds r9, r12 \n\t" 247 "adcs r10, r14 \n\t" 248 "adc r11, #0 \n\t" 249 "umull r12, r14, r4, r6 \n\t" 250 "adds r9, r12 \n\t" 251 "adcs r10, r14 \n\t" 252 "adc r11, #0 \n\t" 253 "stmia r0!, {r9} \n\t" 254 255 "umull r14, r9, r4, r7 \n\t" 256 "adds r10, r14 \n\t" 257 "adc r11, r9 \n\t" 258 "stmia r0!, {r10, r11} \n\t" 259 #if (uECC_PLATFORM != uECC_arm_thumb2) 260 ".syntax divided \n\t" 261 #endif 262 : "+r" (r0), "+r" (r1), "+r" (r2) 263 : 264 : "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14", "cc", "memory" 265 ); 266} 267#define asm_mult 1 268#endif /* (uECC_WORDS == 5) */ 269 270#if (uECC_WORDS == 6) 271static void vli_mult(uint32_t *result, const uint32_t *left, const uint32_t *right) { 272 register uint32_t *r0 __asm__("r0") = result; 273 register const uint32_t *r1 __asm__("r1") = left; 274 register const uint32_t *r2 __asm__("r2") = right; 275 276 __asm__ volatile ( 277 ".syntax unified \n\t" 278 "add r0, 12 \n\t" 279 "add r2, 12 \n\t" 280 "ldmia r1!, {r3,r4,r5} \n\t" 281 "ldmia r2!, {r6,r7,r8} \n\t" 282 283 "umull r11, r12, r3, r6 \n\t" 284 "stmia r0!, {r11} \n\t" 285 286 "mov r10, #0 \n\t" 287 "umull r11, r9, r3, r7 \n\t" 288 "adds r12, r11 \n\t" 289 "adc r9, #0 \n\t" 290 "umull r11, r14, r4, r6 \n\t" 291 "adds r12, r11 \n\t" 292 "adcs r9, r14 \n\t" 293 "adc r10, #0 \n\t" 294 "stmia r0!, {r12} \n\t" 295 296 "mov r11, #0 \n\t" 297 "umull r12, r14, r3, r8 \n\t" 298 "adds r9, r12 \n\t" 299 "adcs r10, r14 \n\t" 300 "adc r11, #0 \n\t" 301 "umull r12, r14, r4, r7 \n\t" 302 "adds r9, r12 \n\t" 303 "adcs r10, r14 \n\t" 304 "adc r11, #0 \n\t" 305 "umull r12, r14, r5, r6 \n\t" 306 "adds r9, r12 \n\t" 307 "adcs r10, r14 \n\t" 308 "adc r11, #0 \n\t" 309 "stmia r0!, {r9} \n\t" 310 311 "mov r12, #0 \n\t" 312 "umull r14, r9, r4, r8 \n\t" 313 "adds r10, r14 \n\t" 314 "adcs r11, r9 \n\t" 315 "adc r12, #0 \n\t" 316 "umull r14, r9, r5, r7 \n\t" 317 "adds r10, r14 \n\t" 318 "adcs r11, r9 \n\t" 319 "adc r12, #0 \n\t" 320 "stmia r0!, {r10} \n\t" 321 322 "umull r9, r10, r5, r8 \n\t" 323 "adds r11, r9 \n\t" 324 "adc r12, r10 \n\t" 325 "stmia r0!, {r11, r12} \n\t" 326 327 "sub r0, 36 \n\t" 328 "sub r2, 24 \n\t" 329 "ldmia r2!, {r6,r7,r8} \n\t" 330 331 "umull r11, r12, r3, r6 \n\t" 332 "stmia r0!, {r11} \n\t" 333 334 "mov r10, #0 \n\t" 335 "umull r11, r9, r3, r7 \n\t" 336 "adds r12, r11 \n\t" 337 "adc r9, #0 \n\t" 338 "umull r11, r14, r4, r6 \n\t" 339 "adds r12, r11 \n\t" 340 "adcs r9, r14 \n\t" 341 "adc r10, #0 \n\t" 342 "stmia r0!, {r12} \n\t" 343 344 "mov r11, #0 \n\t" 345 "umull r12, r14, r3, r8 \n\t" 346 "adds r9, r12 \n\t" 347 "adcs r10, r14 \n\t" 348 "adc r11, #0 \n\t" 349 "umull r12, r14, r4, r7 \n\t" 350 "adds r9, r12 \n\t" 351 "adcs r10, r14 \n\t" 352 "adc r11, #0 \n\t" 353 "umull r12, r14, r5, r6 \n\t" 354 "adds r9, r12 \n\t" 355 "adcs r10, r14 \n\t" 356 "adc r11, #0 \n\t" 357 "stmia r0!, {r9} \n\t" 358 359 "ldmia r1!, {r3} \n\t" 360 "mov r12, #0 \n\t" 361 "umull r14, r9, r4, r8 \n\t" 362 "adds r10, r14 \n\t" 363 "adcs r11, r9 \n\t" 364 "adc r12, #0 \n\t" 365 "umull r14, r9, r5, r7 \n\t" 366 "adds r10, r14 \n\t" 367 "adcs r11, r9 \n\t" 368 "adc r12, #0 \n\t" 369 "umull r14, r9, r3, r6 \n\t" 370 "adds r10, r14 \n\t" 371 "adcs r11, r9 \n\t" 372 "adc r12, #0 \n\t" 373 "ldr r14, [r0] \n\t" 374 "adds r10, r14 \n\t" 375 "adcs r11, #0 \n\t" 376 "adc r12, #0 \n\t" 377 "stmia r0!, {r10} \n\t" 378 379 "ldmia r1!, {r4} \n\t" 380 "mov r14, #0 \n\t" 381 "umull r9, r10, r5, r8 \n\t" 382 "adds r11, r9 \n\t" 383 "adcs r12, r10 \n\t" 384 "adc r14, #0 \n\t" 385 "umull r9, r10, r3, r7 \n\t" 386 "adds r11, r9 \n\t" 387 "adcs r12, r10 \n\t" 388 "adc r14, #0 \n\t" 389 "umull r9, r10, r4, r6 \n\t" 390 "adds r11, r9 \n\t" 391 "adcs r12, r10 \n\t" 392 "adc r14, #0 \n\t" 393 "ldr r9, [r0] \n\t" 394 "adds r11, r9 \n\t" 395 "adcs r12, #0 \n\t" 396 "adc r14, #0 \n\t" 397 "stmia r0!, {r11} \n\t" 398 399 "ldmia r1!, {r5} \n\t" 400 "mov r9, #0 \n\t" 401 "umull r10, r11, r3, r8 \n\t" 402 "adds r12, r10 \n\t" 403 "adcs r14, r11 \n\t" 404 "adc r9, #0 \n\t" 405 "umull r10, r11, r4, r7 \n\t" 406 "adds r12, r10 \n\t" 407 "adcs r14, r11 \n\t" 408 "adc r9, #0 \n\t" 409 "umull r10, r11, r5, r6 \n\t" 410 "adds r12, r10 \n\t" 411 "adcs r14, r11 \n\t" 412 "adc r9, #0 \n\t" 413 "ldr r10, [r0] \n\t" 414 "adds r12, r10 \n\t" 415 "adcs r14, #0 \n\t" 416 "adc r9, #0 \n\t" 417 "stmia r0!, {r12} \n\t" 418 419 "ldmia r2!, {r6} \n\t" 420 "mov r10, #0 \n\t" 421 "umull r11, r12, r3, r6 \n\t" 422 "adds r14, r11 \n\t" 423 "adcs r9, r12 \n\t" 424 "adc r10, #0 \n\t" 425 "umull r11, r12, r4, r8 \n\t" 426 "adds r14, r11 \n\t" 427 "adcs r9, r12 \n\t" 428 "adc r10, #0 \n\t" 429 "umull r11, r12, r5, r7 \n\t" 430 "adds r14, r11 \n\t" 431 "adcs r9, r12 \n\t" 432 "adc r10, #0 \n\t" 433 "ldr r11, [r0] \n\t" 434 "adds r14, r11 \n\t" 435 "adcs r9, #0 \n\t" 436 "adc r10, #0 \n\t" 437 "stmia r0!, {r14} \n\t" 438 439 "ldmia r2!, {r7} \n\t" 440 "mov r11, #0 \n\t" 441 "umull r12, r14, r3, r7 \n\t" 442 "adds r9, r12 \n\t" 443 "adcs r10, r14 \n\t" 444 "adc r11, #0 \n\t" 445 "umull r12, r14, r4, r6 \n\t" 446 "adds r9, r12 \n\t" 447 "adcs r10, r14 \n\t" 448 "adc r11, #0 \n\t" 449 "umull r12, r14, r5, r8 \n\t" 450 "adds r9, r12 \n\t" 451 "adcs r10, r14 \n\t" 452 "adc r11, #0 \n\t" 453 "ldr r12, [r0] \n\t" 454 "adds r9, r12 \n\t" 455 "adcs r10, #0 \n\t" 456 "adc r11, #0 \n\t" 457 "stmia r0!, {r9} \n\t" 458 459 "ldmia r2!, {r8} \n\t" 460 "mov r12, #0 \n\t" 461 "umull r14, r9, r3, r8 \n\t" 462 "adds r10, r14 \n\t" 463 "adcs r11, r9 \n\t" 464 "adc r12, #0 \n\t" 465 "umull r14, r9, r4, r7 \n\t" 466 "adds r10, r14 \n\t" 467 "adcs r11, r9 \n\t" 468 "adc r12, #0 \n\t" 469 "umull r14, r9, r5, r6 \n\t" 470 "adds r10, r14 \n\t" 471 "adcs r11, r9 \n\t" 472 "adc r12, #0 \n\t" 473 "ldr r14, [r0] \n\t" 474 "adds r10, r14 \n\t" 475 "adcs r11, #0 \n\t" 476 "adc r12, #0 \n\t" 477 "stmia r0!, {r10} \n\t" 478 479 "mov r14, #0 \n\t" 480 "umull r9, r10, r4, r8 \n\t" 481 "adds r11, r9 \n\t" 482 "adcs r12, r10 \n\t" 483 "adc r14, #0 \n\t" 484 "umull r9, r10, r5, r7 \n\t" 485 "adds r11, r9 \n\t" 486 "adcs r12, r10 \n\t" 487 "adc r14, #0 \n\t" 488 "stmia r0!, {r11} \n\t" 489 490 "umull r10, r11, r5, r8 \n\t" 491 "adds r12, r10 \n\t" 492 "adc r14, r11 \n\t" 493 "stmia r0!, {r12, r14} \n\t" 494 #if (uECC_PLATFORM != uECC_arm_thumb2) 495 ".syntax divided \n\t" 496 #endif 497 : "+r" (r0), "+r" (r1), "+r" (r2) 498 : 499 : "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14", "cc", "memory" 500 ); 501} 502#define asm_mult 1 503#endif /* (uECC_WORDS == 6) */ 504 505#if (uECC_WORDS == 7) 506static void vli_mult(uint32_t *result, const uint32_t *left, const uint32_t *right) { 507 register uint32_t *r0 __asm__("r0") = result; 508 register const uint32_t *r1 __asm__("r1") = left; 509 register const uint32_t *r2 __asm__("r2") = right; 510 511 __asm__ volatile ( 512 ".syntax unified \n\t" 513 "add r0, 24 \n\t" 514 "add r2, 24 \n\t" 515 "ldmia r1!, {r3} \n\t" 516 "ldmia r2!, {r6} \n\t" 517 518 "umull r9, r10, r3, r6 \n\t" 519 "stmia r0!, {r9, r10} \n\t" 520 521 "sub r0, 20 \n\t" 522 "sub r2, 16 \n\t" 523 "ldmia r2!, {r6, r7, r8} \n\t" 524 "ldmia r1!, {r4, r5} \n\t" 525 526 "umull r9, r10, r3, r6 \n\t" 527 "stmia r0!, {r9} \n\t" 528 529 "mov r14, #0 \n\t" 530 "umull r9, r12, r3, r7 \n\t" 531 "adds r10, r9 \n\t" 532 "adc r12, #0 \n\t" 533 "umull r9, r11, r4, r6 \n\t" 534 "adds r10, r9 \n\t" 535 "adcs r12, r11 \n\t" 536 "adc r14, #0 \n\t" 537 "stmia r0!, {r10} \n\t" 538 539 "mov r9, #0 \n\t" 540 "umull r10, r11, r3, r8 \n\t" 541 "adds r12, r10 \n\t" 542 "adcs r14, r11 \n\t" 543 "adc r9, #0 \n\t" 544 "umull r10, r11, r4, r7 \n\t" 545 "adds r12, r10 \n\t" 546 "adcs r14, r11 \n\t" 547 "adc r9, #0 \n\t" 548 "umull r10, r11, r5, r6 \n\t" 549 "adds r12, r10 \n\t" 550 "adcs r14, r11 \n\t" 551 "adc r9, #0 \n\t" 552 "stmia r0!, {r12} \n\t" 553 554 "ldmia r1!, {r3} \n\t" 555 "mov r10, #0 \n\t" 556 "umull r11, r12, r4, r8 \n\t" 557 "adds r14, r11 \n\t" 558 "adcs r9, r12 \n\t" 559 "adc r10, #0 \n\t" 560 "umull r11, r12, r5, r7 \n\t" 561 "adds r14, r11 \n\t" 562 "adcs r9, r12 \n\t" 563 "adc r10, #0 \n\t" 564 "umull r11, r12, r3, r6 \n\t" 565 "adds r14, r11 \n\t" 566 "adcs r9, r12 \n\t" 567 "adc r10, #0 \n\t" 568 "ldr r11, [r0] \n\t" 569 "adds r14, r11 \n\t" 570 "adcs r9, #0 \n\t" 571 "adc r10, #0 \n\t" 572 "stmia r0!, {r14} \n\t" 573 574 "ldmia r2!, {r6} \n\t" 575 "mov r11, #0 \n\t" 576 "umull r12, r14, r4, r6 \n\t" 577 "adds r9, r12 \n\t" 578 "adcs r10, r14 \n\t" 579 "adc r11, #0 \n\t" 580 "umull r12, r14, r5, r8 \n\t" 581 "adds r9, r12 \n\t" 582 "adcs r10, r14 \n\t" 583 "adc r11, #0 \n\t" 584 "umull r12, r14, r3, r7 \n\t" 585 "adds r9, r12 \n\t" 586 "adcs r10, r14 \n\t" 587 "adc r11, #0 \n\t" 588 "ldr r12, [r0] \n\t" 589 "adds r9, r12 \n\t" 590 "adcs r10, #0 \n\t" 591 "adc r11, #0 \n\t" 592 "stmia r0!, {r9} \n\t" 593 594 "mov r12, #0 \n\t" 595 "umull r14, r9, r5, r6 \n\t" 596 "adds r10, r14 \n\t" 597 "adcs r11, r9 \n\t" 598 "adc r12, #0 \n\t" 599 "umull r14, r9, r3, r8 \n\t" 600 "adds r10, r14 \n\t" 601 "adcs r11, r9 \n\t" 602 "adc r12, #0 \n\t" 603 "stmia r0!, {r10} \n\t" 604 605 "umull r9, r10, r3, r6 \n\t" 606 "adds r11, r9 \n\t" 607 "adc r12, r10 \n\t" 608 "stmia r0!, {r11, r12} \n\t" 609 610 "sub r0, 44 \n\t" 611 "sub r1, 16 \n\t" 612 "sub r2, 28 \n\t" 613 "ldmia r1!, {r3,r4,r5} \n\t" 614 "ldmia r2!, {r6,r7,r8} \n\t" 615 616 "umull r9, r10, r3, r6 \n\t" 617 "stmia r0!, {r9} \n\t" 618 619 "mov r14, #0 \n\t" 620 "umull r9, r12, r3, r7 \n\t" 621 "adds r10, r9 \n\t" 622 "adc r12, #0 \n\t" 623 "umull r9, r11, r4, r6 \n\t" 624 "adds r10, r9 \n\t" 625 "adcs r12, r11 \n\t" 626 "adc r14, #0 \n\t" 627 "stmia r0!, {r10} \n\t" 628 629 "mov r9, #0 \n\t" 630 "umull r10, r11, r3, r8 \n\t" 631 "adds r12, r10 \n\t" 632 "adcs r14, r11 \n\t" 633 "adc r9, #0 \n\t" 634 "umull r10, r11, r4, r7 \n\t" 635 "adds r12, r10 \n\t" 636 "adcs r14, r11 \n\t" 637 "adc r9, #0 \n\t" 638 "umull r10, r11, r5, r6 \n\t" 639 "adds r12, r10 \n\t" 640 "adcs r14, r11 \n\t" 641 "adc r9, #0 \n\t" 642 "stmia r0!, {r12} \n\t" 643 644 "ldmia r1!, {r3} \n\t" 645 "mov r10, #0 \n\t" 646 "umull r11, r12, r4, r8 \n\t" 647 "adds r14, r11 \n\t" 648 "adcs r9, r12 \n\t" 649 "adc r10, #0 \n\t" 650 "umull r11, r12, r5, r7 \n\t" 651 "adds r14, r11 \n\t" 652 "adcs r9, r12 \n\t" 653 "adc r10, #0 \n\t" 654 "umull r11, r12, r3, r6 \n\t" 655 "adds r14, r11 \n\t" 656 "adcs r9, r12 \n\t" 657 "adc r10, #0 \n\t" 658 "ldr r11, [r0] \n\t" 659 "adds r14, r11 \n\t" 660 "adcs r9, #0 \n\t" 661 "adc r10, #0 \n\t" 662 "stmia r0!, {r14} \n\t" 663 664 "ldmia r1!, {r4} \n\t" 665 "mov r11, #0 \n\t" 666 "umull r12, r14, r5, r8 \n\t" 667 "adds r9, r12 \n\t" 668 "adcs r10, r14 \n\t" 669 "adc r11, #0 \n\t" 670 "umull r12, r14, r3, r7 \n\t" 671 "adds r9, r12 \n\t" 672 "adcs r10, r14 \n\t" 673 "adc r11, #0 \n\t" 674 "umull r12, r14, r4, r6 \n\t" 675 "adds r9, r12 \n\t" 676 "adcs r10, r14 \n\t" 677 "adc r11, #0 \n\t" 678 "ldr r12, [r0] \n\t" 679 "adds r9, r12 \n\t" 680 "adcs r10, #0 \n\t" 681 "adc r11, #0 \n\t" 682 "stmia r0!, {r9} \n\t" 683 684 "ldmia r1!, {r5} \n\t" 685 "mov r12, #0 \n\t" 686 "umull r14, r9, r3, r8 \n\t" 687 "adds r10, r14 \n\t" 688 "adcs r11, r9 \n\t" 689 "adc r12, #0 \n\t" 690 "umull r14, r9, r4, r7 \n\t" 691 "adds r10, r14 \n\t" 692 "adcs r11, r9 \n\t" 693 "adc r12, #0 \n\t" 694 "umull r14, r9, r5, r6 \n\t" 695 "adds r10, r14 \n\t" 696 "adcs r11, r9 \n\t" 697 "adc r12, #0 \n\t" 698 "ldr r14, [r0] \n\t" 699 "adds r10, r14 \n\t" 700 "adcs r11, #0 \n\t" 701 "adc r12, #0 \n\t" 702 "stmia r0!, {r10} \n\t" 703 704 "ldmia r1!, {r3} \n\t" 705 "mov r14, #0 \n\t" 706 "umull r9, r10, r4, r8 \n\t" 707 "adds r11, r9 \n\t" 708 "adcs r12, r10 \n\t" 709 "adc r14, #0 \n\t" 710 "umull r9, r10, r5, r7 \n\t" 711 "adds r11, r9 \n\t" 712 "adcs r12, r10 \n\t" 713 "adc r14, #0 \n\t" 714 "umull r9, r10, r3, r6 \n\t" 715 "adds r11, r9 \n\t" 716 "adcs r12, r10 \n\t" 717 "adc r14, #0 \n\t" 718 "ldr r9, [r0] \n\t" 719 "adds r11, r9 \n\t" 720 "adcs r12, #0 \n\t" 721 "adc r14, #0 \n\t" 722 "stmia r0!, {r11} \n\t" 723 724 "ldmia r2!, {r6} \n\t" 725 "mov r9, #0 \n\t" 726 "umull r10, r11, r4, r6 \n\t" 727 "adds r12, r10 \n\t" 728 "adcs r14, r11 \n\t" 729 "adc r9, #0 \n\t" 730 "umull r10, r11, r5, r8 \n\t" 731 "adds r12, r10 \n\t" 732 "adcs r14, r11 \n\t" 733 "adc r9, #0 \n\t" 734 "umull r10, r11, r3, r7 \n\t" 735 "adds r12, r10 \n\t" 736 "adcs r14, r11 \n\t" 737 "adc r9, #0 \n\t" 738 "ldr r10, [r0] \n\t" 739 "adds r12, r10 \n\t" 740 "adcs r14, #0 \n\t" 741 "adc r9, #0 \n\t" 742 "stmia r0!, {r12} \n\t" 743 744 "ldmia r2!, {r7} \n\t" 745 "mov r10, #0 \n\t" 746 "umull r11, r12, r4, r7 \n\t" 747 "adds r14, r11 \n\t" 748 "adcs r9, r12 \n\t" 749 "adc r10, #0 \n\t" 750 "umull r11, r12, r5, r6 \n\t" 751 "adds r14, r11 \n\t" 752 "adcs r9, r12 \n\t" 753 "adc r10, #0 \n\t" 754 "umull r11, r12, r3, r8 \n\t" 755 "adds r14, r11 \n\t" 756 "adcs r9, r12 \n\t" 757 "adc r10, #0 \n\t" 758 "ldr r11, [r0] \n\t" 759 "adds r14, r11 \n\t" 760 "adcs r9, #0 \n\t" 761 "adc r10, #0 \n\t" 762 "stmia r0!, {r14} \n\t" 763 764 "ldmia r2!, {r8} \n\t" 765 "mov r11, #0 \n\t" 766 "umull r12, r14, r4, r8 \n\t" 767 "adds r9, r12 \n\t" 768 "adcs r10, r14 \n\t" 769 "adc r11, #0 \n\t" 770 "umull r12, r14, r5, r7 \n\t" 771 "adds r9, r12 \n\t" 772 "adcs r10, r14 \n\t" 773 "adc r11, #0 \n\t" 774 "umull r12, r14, r3, r6 \n\t" 775 "adds r9, r12 \n\t" 776 "adcs r10, r14 \n\t" 777 "adc r11, #0 \n\t" 778 "ldr r12, [r0] \n\t" 779 "adds r9, r12 \n\t" 780 "adcs r10, #0 \n\t" 781 "adc r11, #0 \n\t" 782 "stmia r0!, {r9} \n\t" 783 784 "ldmia r2!, {r6} \n\t" 785 "mov r12, #0 \n\t" 786 "umull r14, r9, r4, r6 \n\t" 787 "adds r10, r14 \n\t" 788 "adcs r11, r9 \n\t" 789 "adc r12, #0 \n\t" 790 "umull r14, r9, r5, r8 \n\t" 791 "adds r10, r14 \n\t" 792 "adcs r11, r9 \n\t" 793 "adc r12, #0 \n\t" 794 "umull r14, r9, r3, r7 \n\t" 795 "adds r10, r14 \n\t" 796 "adcs r11, r9 \n\t" 797 "adc r12, #0 \n\t" 798 "ldr r14, [r0] \n\t" 799 "adds r10, r14 \n\t" 800 "adcs r11, #0 \n\t" 801 "adc r12, #0 \n\t" 802 "stmia r0!, {r10} \n\t" 803 804 "mov r14, #0 \n\t" 805 "umull r9, r10, r5, r6 \n\t" 806 "adds r11, r9 \n\t" 807 "adcs r12, r10 \n\t" 808 "adc r14, #0 \n\t" 809 "umull r9, r10, r3, r8 \n\t" 810 "adds r11, r9 \n\t" 811 "adcs r12, r10 \n\t" 812 "adc r14, #0 \n\t" 813 "stmia r0!, {r11} \n\t" 814 815 "umull r10, r11, r3, r6 \n\t" 816 "adds r12, r10 \n\t" 817 "adc r14, r11 \n\t" 818 "stmia r0!, {r12, r14} \n\t" 819 #if (uECC_PLATFORM != uECC_arm_thumb2) 820 ".syntax divided \n\t" 821 #endif 822 : "+r" (r0), "+r" (r1), "+r" (r2) 823 : 824 : "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14", "cc", "memory" 825 ); 826} 827#define asm_mult 1 828#endif /* (uECC_WORDS == 7) */ 829 830#if (uECC_WORDS == 8) 831static void vli_mult(uint32_t *result, const uint32_t *left, const uint32_t *right) { 832 register uint32_t *r0 __asm__("r0") = result; 833 register const uint32_t *r1 __asm__("r1") = left; 834 register const uint32_t *r2 __asm__("r2") = right; 835 836 __asm__ volatile ( 837 ".syntax unified \n\t" 838 "add r0, 24 \n\t" 839 "add r2, 24 \n\t" 840 "ldmia r1!, {r3,r4} \n\t" 841 "ldmia r2!, {r6,r7} \n\t" 842 843 "umull r11, r12, r3, r6 \n\t" 844 "stmia r0!, {r11} \n\t" 845 846 "mov r10, #0 \n\t" 847 "umull r11, r9, r3, r7 \n\t" 848 "adds r12, r11 \n\t" 849 "adc r9, #0 \n\t" 850 "umull r11, r14, r4, r6 \n\t" 851 "adds r12, r11 \n\t" 852 "adcs r9, r14 \n\t" 853 "adc r10, #0 \n\t" 854 "stmia r0!, {r12} \n\t" 855 856 "umull r12, r14, r4, r7 \n\t" 857 "adds r9, r12 \n\t" 858 "adc r10, r14 \n\t" 859 "stmia r0!, {r9, r10} \n\t" 860 861 "sub r0, 28 \n\t" 862 "sub r2, 20 \n\t" 863 "ldmia r2!, {r6,r7,r8} \n\t" 864 "ldmia r1!, {r5} \n\t" 865 866 "umull r11, r12, r3, r6 \n\t" 867 "stmia r0!, {r11} \n\t" 868 869 "mov r10, #0 \n\t" 870 "umull r11, r9, r3, r7 \n\t" 871 "adds r12, r11 \n\t" 872 "adc r9, #0 \n\t" 873 "umull r11, r14, r4, r6 \n\t" 874 "adds r12, r11 \n\t" 875 "adcs r9, r14 \n\t" 876 "adc r10, #0 \n\t" 877 "stmia r0!, {r12} \n\t" 878 879 "mov r11, #0 \n\t" 880 "umull r12, r14, r3, r8 \n\t" 881 "adds r9, r12 \n\t" 882 "adcs r10, r14 \n\t" 883 "adc r11, #0 \n\t" 884 "umull r12, r14, r4, r7 \n\t" 885 "adds r9, r12 \n\t" 886 "adcs r10, r14 \n\t" 887 "adc r11, #0 \n\t" 888 "umull r12, r14, r5, r6 \n\t" 889 "adds r9, r12 \n\t" 890 "adcs r10, r14 \n\t" 891 "adc r11, #0 \n\t" 892 "stmia r0!, {r9} \n\t" 893 894 "ldmia r1!, {r3} \n\t" 895 "mov r12, #0 \n\t" 896 "umull r14, r9, r4, r8 \n\t" 897 "adds r10, r14 \n\t" 898 "adcs r11, r9 \n\t" 899 "adc r12, #0 \n\t" 900 "umull r14, r9, r5, r7 \n\t" 901 "adds r10, r14 \n\t" 902 "adcs r11, r9 \n\t" 903 "adc r12, #0 \n\t" 904 "umull r14, r9, r3, r6 \n\t" 905 "adds r10, r14 \n\t" 906 "adcs r11, r9 \n\t" 907 "adc r12, #0 \n\t" 908 "ldr r14, [r0] \n\t" 909 "adds r10, r14 \n\t" 910 "adcs r11, #0 \n\t" 911 "adc r12, #0 \n\t" 912 "stmia r0!, {r10} \n\t" 913 914 "ldmia r1!, {r4} \n\t" 915 "mov r14, #0 \n\t" 916 "umull r9, r10, r5, r8 \n\t" 917 "adds r11, r9 \n\t" 918 "adcs r12, r10 \n\t" 919 "adc r14, #0 \n\t" 920 "umull r9, r10, r3, r7 \n\t" 921 "adds r11, r9 \n\t" 922 "adcs r12, r10 \n\t" 923 "adc r14, #0 \n\t" 924 "umull r9, r10, r4, r6 \n\t" 925 "adds r11, r9 \n\t" 926 "adcs r12, r10 \n\t" 927 "adc r14, #0 \n\t" 928 "ldr r9, [r0] \n\t" 929 "adds r11, r9 \n\t" 930 "adcs r12, #0 \n\t" 931 "adc r14, #0 \n\t" 932 "stmia r0!, {r11} \n\t" 933 934 "ldmia r2!, {r6} \n\t" 935 "mov r9, #0 \n\t" 936 "umull r10, r11, r5, r6 \n\t" 937 "adds r12, r10 \n\t" 938 "adcs r14, r11 \n\t" 939 "adc r9, #0 \n\t" 940 "umull r10, r11, r3, r8 \n\t" 941 "adds r12, r10 \n\t" 942 "adcs r14, r11 \n\t" 943 "adc r9, #0 \n\t" 944 "umull r10, r11, r4, r7 \n\t" 945 "adds r12, r10 \n\t" 946 "adcs r14, r11 \n\t" 947 "adc r9, #0 \n\t" 948 "ldr r10, [r0] \n\t" 949 "adds r12, r10 \n\t" 950 "adcs r14, #0 \n\t" 951 "adc r9, #0 \n\t" 952 "stmia r0!, {r12} \n\t" 953 954 "ldmia r2!, {r7} \n\t" 955 "mov r10, #0 \n\t" 956 "umull r11, r12, r5, r7 \n\t" 957 "adds r14, r11 \n\t" 958 "adcs r9, r12 \n\t" 959 "adc r10, #0 \n\t" 960 "umull r11, r12, r3, r6 \n\t" 961 "adds r14, r11 \n\t" 962 "adcs r9, r12 \n\t" 963 "adc r10, #0 \n\t" 964 "umull r11, r12, r4, r8 \n\t" 965 "adds r14, r11 \n\t" 966 "adcs r9, r12 \n\t" 967 "adc r10, #0 \n\t" 968 "ldr r11, [r0] \n\t" 969 "adds r14, r11 \n\t" 970 "adcs r9, #0 \n\t" 971 "adc r10, #0 \n\t" 972 "stmia r0!, {r14} \n\t" 973 974 "mov r11, #0 \n\t" 975 "umull r12, r14, r3, r7 \n\t" 976 "adds r9, r12 \n\t" 977 "adcs r10, r14 \n\t" 978 "adc r11, #0 \n\t" 979 "umull r12, r14, r4, r6 \n\t" 980 "adds r9, r12 \n\t" 981 "adcs r10, r14 \n\t" 982 "adc r11, #0 \n\t" 983 "stmia r0!, {r9} \n\t" 984 985 "umull r14, r9, r4, r7 \n\t" 986 "adds r10, r14 \n\t" 987 "adc r11, r9 \n\t" 988 "stmia r0!, {r10, r11} \n\t" 989 990 "sub r0, 52 \n\t" 991 "sub r1, 20 \n\t" 992 "sub r2, 32 \n\t" 993 "ldmia r1!, {r3,r4,r5} \n\t" 994 "ldmia r2!, {r6,r7,r8} \n\t" 995 996 "umull r11, r12, r3, r6 \n\t" 997 "stmia r0!, {r11} \n\t" 998 999 "mov r10, #0 \n\t" 1000 "umull r11, r9, r3, r7 \n\t" 1001 "adds r12, r11 \n\t" 1002 "adc r9, #0 \n\t" 1003 "umull r11, r14, r4, r6 \n\t" 1004 "adds r12, r11 \n\t" 1005 "adcs r9, r14 \n\t" 1006 "adc r10, #0 \n\t" 1007 "stmia r0!, {r12} \n\t" 1008 1009 "mov r11, #0 \n\t" 1010 "umull r12, r14, r3, r8 \n\t" 1011 "adds r9, r12 \n\t" 1012 "adcs r10, r14 \n\t" 1013 "adc r11, #0 \n\t" 1014 "umull r12, r14, r4, r7 \n\t" 1015 "adds r9, r12 \n\t" 1016 "adcs r10, r14 \n\t" 1017 "adc r11, #0 \n\t" 1018 "umull r12, r14, r5, r6 \n\t" 1019 "adds r9, r12 \n\t" 1020 "adcs r10, r14 \n\t" 1021 "adc r11, #0 \n\t" 1022 "stmia r0!, {r9} \n\t" 1023 1024 "ldmia r1!, {r3} \n\t" 1025 "mov r12, #0 \n\t" 1026 "umull r14, r9, r4, r8 \n\t" 1027 "adds r10, r14 \n\t" 1028 "adcs r11, r9 \n\t" 1029 "adc r12, #0 \n\t" 1030 "umull r14, r9, r5, r7 \n\t" 1031 "adds r10, r14 \n\t" 1032 "adcs r11, r9 \n\t" 1033 "adc r12, #0 \n\t" 1034 "umull r14, r9, r3, r6 \n\t" 1035 "adds r10, r14 \n\t" 1036 "adcs r11, r9 \n\t" 1037 "adc r12, #0 \n\t" 1038 "ldr r14, [r0] \n\t" 1039 "adds r10, r14 \n\t" 1040 "adcs r11, #0 \n\t" 1041 "adc r12, #0 \n\t" 1042 "stmia r0!, {r10} \n\t" 1043 1044 "ldmia r1!, {r4} \n\t" 1045 "mov r14, #0 \n\t" 1046 "umull r9, r10, r5, r8 \n\t" 1047 "adds r11, r9 \n\t" 1048 "adcs r12, r10 \n\t" 1049 "adc r14, #0 \n\t" 1050 "umull r9, r10, r3, r7 \n\t" 1051 "adds r11, r9 \n\t" 1052 "adcs r12, r10 \n\t" 1053 "adc r14, #0 \n\t" 1054 "umull r9, r10, r4, r6 \n\t" 1055 "adds r11, r9 \n\t" 1056 "adcs r12, r10 \n\t" 1057 "adc r14, #0 \n\t" 1058 "ldr r9, [r0] \n\t" 1059 "adds r11, r9 \n\t" 1060 "adcs r12, #0 \n\t" 1061 "adc r14, #0 \n\t" 1062 "stmia r0!, {r11} \n\t" 1063 1064 "ldmia r1!, {r5} \n\t" 1065 "mov r9, #0 \n\t" 1066 "umull r10, r11, r3, r8 \n\t" 1067 "adds r12, r10 \n\t" 1068 "adcs r14, r11 \n\t" 1069 "adc r9, #0 \n\t" 1070 "umull r10, r11, r4, r7 \n\t" 1071 "adds r12, r10 \n\t" 1072 "adcs r14, r11 \n\t" 1073 "adc r9, #0 \n\t" 1074 "umull r10, r11, r5, r6 \n\t" 1075 "adds r12, r10 \n\t" 1076 "adcs r14, r11 \n\t" 1077 "adc r9, #0 \n\t" 1078 "ldr r10, [r0] \n\t" 1079 "adds r12, r10 \n\t" 1080 "adcs r14, #0 \n\t" 1081 "adc r9, #0 \n\t" 1082 "stmia r0!, {r12} \n\t" 1083 1084 "ldmia r1!, {r3} \n\t" 1085 "mov r10, #0 \n\t" 1086 "umull r11, r12, r4, r8 \n\t" 1087 "adds r14, r11 \n\t" 1088 "adcs r9, r12 \n\t" 1089 "adc r10, #0 \n\t" 1090 "umull r11, r12, r5, r7 \n\t" 1091 "adds r14, r11 \n\t" 1092 "adcs r9, r12 \n\t" 1093 "adc r10, #0 \n\t" 1094 "umull r11, r12, r3, r6 \n\t" 1095 "adds r14, r11 \n\t" 1096 "adcs r9, r12 \n\t" 1097 "adc r10, #0 \n\t" 1098 "ldr r11, [r0] \n\t" 1099 "adds r14, r11 \n\t" 1100 "adcs r9, #0 \n\t" 1101 "adc r10, #0 \n\t" 1102 "stmia r0!, {r14} \n\t" 1103 1104 "ldmia r1!, {r4} \n\t" 1105 "mov r11, #0 \n\t" 1106 "umull r12, r14, r5, r8 \n\t" 1107 "adds r9, r12 \n\t" 1108 "adcs r10, r14 \n\t" 1109 "adc r11, #0 \n\t" 1110 "umull r12, r14, r3, r7 \n\t" 1111 "adds r9, r12 \n\t" 1112 "adcs r10, r14 \n\t" 1113 "adc r11, #0 \n\t" 1114 "umull r12, r14, r4, r6 \n\t" 1115 "adds r9, r12 \n\t" 1116 "adcs r10, r14 \n\t" 1117 "adc r11, #0 \n\t" 1118 "ldr r12, [r0] \n\t" 1119 "adds r9, r12 \n\t" 1120 "adcs r10, #0 \n\t" 1121 "adc r11, #0 \n\t" 1122 "stmia r0!, {r9} \n\t" 1123 1124 "ldmia r2!, {r6} \n\t" 1125 "mov r12, #0 \n\t" 1126 "umull r14, r9, r5, r6 \n\t" 1127 "adds r10, r14 \n\t" 1128 "adcs r11, r9 \n\t" 1129 "adc r12, #0 \n\t" 1130 "umull r14, r9, r3, r8 \n\t" 1131 "adds r10, r14 \n\t" 1132 "adcs r11, r9 \n\t" 1133 "adc r12, #0 \n\t" 1134 "umull r14, r9, r4, r7 \n\t" 1135 "adds r10, r14 \n\t" 1136 "adcs r11, r9 \n\t" 1137 "adc r12, #0 \n\t" 1138 "ldr r14, [r0] \n\t" 1139 "adds r10, r14 \n\t" 1140 "adcs r11, #0 \n\t" 1141 "adc r12, #0 \n\t" 1142 "stmia r0!, {r10} \n\t" 1143 1144 "ldmia r2!, {r7} \n\t" 1145 "mov r14, #0 \n\t" 1146 "umull r9, r10, r5, r7 \n\t" 1147 "adds r11, r9 \n\t" 1148 "adcs r12, r10 \n\t" 1149 "adc r14, #0 \n\t" 1150 "umull r9, r10, r3, r6 \n\t" 1151 "adds r11, r9 \n\t" 1152 "adcs r12, r10 \n\t" 1153 "adc r14, #0 \n\t" 1154 "umull r9, r10, r4, r8 \n\t" 1155 "adds r11, r9 \n\t" 1156 "adcs r12, r10 \n\t" 1157 "adc r14, #0 \n\t" 1158 "ldr r9, [r0] \n\t" 1159 "adds r11, r9 \n\t" 1160 "adcs r12, #0 \n\t" 1161 "adc r14, #0 \n\t" 1162 "stmia r0!, {r11} \n\t" 1163 1164 "ldmia r2!, {r8} \n\t" 1165 "mov r9, #0 \n\t" 1166 "umull r10, r11, r5, r8 \n\t" 1167 "adds r12, r10 \n\t" 1168 "adcs r14, r11 \n\t" 1169 "adc r9, #0 \n\t" 1170 "umull r10, r11, r3, r7 \n\t" 1171 "adds r12, r10 \n\t" 1172 "adcs r14, r11 \n\t" 1173 "adc r9, #0 \n\t" 1174 "umull r10, r11, r4, r6 \n\t" 1175 "adds r12, r10 \n\t" 1176 "adcs r14, r11 \n\t" 1177 "adc r9, #0 \n\t" 1178 "ldr r10, [r0] \n\t" 1179 "adds r12, r10 \n\t" 1180 "adcs r14, #0 \n\t" 1181 "adc r9, #0 \n\t" 1182 "stmia r0!, {r12} \n\t" 1183 1184 "ldmia r2!, {r6} \n\t" 1185 "mov r10, #0 \n\t" 1186 "umull r11, r12, r5, r6 \n\t" 1187 "adds r14, r11 \n\t" 1188 "adcs r9, r12 \n\t" 1189 "adc r10, #0 \n\t" 1190 "umull r11, r12, r3, r8 \n\t" 1191 "adds r14, r11 \n\t" 1192 "adcs r9, r12 \n\t" 1193 "adc r10, #0 \n\t" 1194 "umull r11, r12, r4, r7 \n\t" 1195 "adds r14, r11 \n\t" 1196 "adcs r9, r12 \n\t" 1197 "adc r10, #0 \n\t" 1198 "ldr r11, [r0] \n\t" 1199 "adds r14, r11 \n\t" 1200 "adcs r9, #0 \n\t" 1201 "adc r10, #0 \n\t" 1202 "stmia r0!, {r14} \n\t" 1203 1204 "ldmia r2!, {r7} \n\t" 1205 "mov r11, #0 \n\t" 1206 "umull r12, r14, r5, r7 \n\t" 1207 "adds r9, r12 \n\t" 1208 "adcs r10, r14 \n\t" 1209 "adc r11, #0 \n\t" 1210 "umull r12, r14, r3, r6 \n\t" 1211 "adds r9, r12 \n\t" 1212 "adcs r10, r14 \n\t" 1213 "adc r11, #0 \n\t" 1214 "umull r12, r14, r4, r8 \n\t" 1215 "adds r9, r12 \n\t" 1216 "adcs r10, r14 \n\t" 1217 "adc r11, #0 \n\t" 1218 "ldr r12, [r0] \n\t" 1219 "adds r9, r12 \n\t" 1220 "adcs r10, #0 \n\t" 1221 "adc r11, #0 \n\t" 1222 "stmia r0!, {r9} \n\t" 1223 1224 "mov r12, #0 \n\t" 1225 "umull r14, r9, r3, r7 \n\t" 1226 "adds r10, r14 \n\t" 1227 "adcs r11, r9 \n\t" 1228 "adc r12, #0 \n\t" 1229 "umull r14, r9, r4, r6 \n\t" 1230 "adds r10, r14 \n\t" 1231 "adcs r11, r9 \n\t" 1232 "adc r12, #0 \n\t" 1233 "stmia r0!, {r10} \n\t" 1234 1235 "umull r9, r10, r4, r7 \n\t" 1236 "adds r11, r9 \n\t" 1237 "adc r12, r10 \n\t" 1238 "stmia r0!, {r11, r12} \n\t" 1239 #if (uECC_PLATFORM != uECC_arm_thumb2) 1240 ".syntax divided \n\t" 1241 #endif 1242 : "+r" (r0), "+r" (r1), "+r" (r2) 1243 : 1244 : "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14", "cc", "memory" 1245 ); 1246} 1247#define asm_mult 1 1248#endif /* (uECC_WORDS == 8) */ 1249 1250#if uECC_SQUARE_FUNC 1251#if (uECC_WORDS == 5) 1252static void vli_square(uint32_t *result, const uint32_t *left) { 1253 register uint32_t *r0 __asm__("r0") = result; 1254 register const uint32_t *r1 __asm__("r1") = left; 1255 1256 __asm__ volatile ( 1257 ".syntax unified \n\t" 1258 "ldmia r1!, {r2,r3,r4,r5,r6} \n\t" 1259 1260 "umull r11, r12, r2, r2 \n\t" 1261 "stmia r0!, {r11} \n\t" 1262 1263 "mov r9, #0 \n\t" 1264 "umull r10, r11, r2, r3 \n\t" 1265 "adds r12, r10 \n\t" 1266 "adcs r8, r11, #0 \n\t" 1267 "adc r9, #0 \n\t" 1268 "adds r12, r10 \n\t" 1269 "adcs r8, r11 \n\t" 1270 "adc r9, #0 \n\t" 1271 "stmia r0!, {r12} \n\t" 1272 1273 "mov r10, #0 \n\t" 1274 "umull r11, r12, r2, r4 \n\t" 1275 "adds r11, r11 \n\t" 1276 "adcs r12, r12 \n\t" 1277 "adc r10, #0 \n\t" 1278 "adds r8, r11 \n\t" 1279 "adcs r9, r12 \n\t" 1280 "adc r10, #0 \n\t" 1281 "umull r11, r12, r3, r3 \n\t" 1282 "adds r8, r11 \n\t" 1283 "adcs r9, r12 \n\t" 1284 "adc r10, #0 \n\t" 1285 "stmia r0!, {r8} \n\t" 1286 1287 "mov r12, #0 \n\t" 1288 "umull r8, r11, r2, r5 \n\t" 1289 "umull r1, r14, r3, r4 \n\t" 1290 "adds r8, r1 \n\t" 1291 "adcs r11, r14 \n\t" 1292 "adc r12, #0 \n\t" 1293 "adds r8, r8 \n\t" 1294 "adcs r11, r11 \n\t" 1295 "adc r12, r12 \n\t" 1296 "adds r8, r9 \n\t" 1297 "adcs r11, r10 \n\t" 1298 "adc r12, #0 \n\t" 1299 "stmia r0!, {r8} \n\t" 1300 1301 "mov r10, #0 \n\t" 1302 "umull r8, r9, r2, r6 \n\t" 1303 "umull r1, r14, r3, r5 \n\t" 1304 "adds r8, r1 \n\t" 1305 "adcs r9, r14 \n\t" 1306 "adc r10, #0 \n\t" 1307 "adds r8, r8 \n\t" 1308 "adcs r9, r9 \n\t" 1309 "adc r10, r10 \n\t" 1310 "umull r1, r14, r4, r4 \n\t" 1311 "adds r8, r1 \n\t" 1312 "adcs r9, r14 \n\t" 1313 "adc r10, #0 \n\t" 1314 "adds r8, r11 \n\t" 1315 "adcs r9, r12 \n\t" 1316 "adc r10, #0 \n\t" 1317 "stmia r0!, {r8} \n\t" 1318 1319 "mov r12, #0 \n\t" 1320 "umull r8, r11, r3, r6 \n\t" 1321 "umull r1, r14, r4, r5 \n\t" 1322 "adds r8, r1 \n\t" 1323 "adcs r11, r14 \n\t" 1324 "adc r12, #0 \n\t" 1325 "adds r8, r8 \n\t" 1326 "adcs r11, r11 \n\t" 1327 "adc r12, r12 \n\t" 1328 "adds r8, r9 \n\t" 1329 "adcs r11, r10 \n\t" 1330 "adc r12, #0 \n\t" 1331 "stmia r0!, {r8} \n\t" 1332 1333 "mov r8, #0 \n\t" 1334 "umull r1, r10, r4, r6 \n\t" 1335 "adds r1, r1 \n\t" 1336 "adcs r10, r10 \n\t" 1337 "adc r8, #0 \n\t" 1338 "adds r11, r1 \n\t" 1339 "adcs r12, r10 \n\t" 1340 "adc r8, #0 \n\t" 1341 "umull r1, r10, r5, r5 \n\t" 1342 "adds r11, r1 \n\t" 1343 "adcs r12, r10 \n\t" 1344 "adc r8, #0 \n\t" 1345 "stmia r0!, {r11} \n\t" 1346 1347 "mov r11, #0 \n\t" 1348 "umull r1, r10, r5, r6 \n\t" 1349 "adds r1, r1 \n\t" 1350 "adcs r10, r10 \n\t" 1351 "adc r11, #0 \n\t" 1352 "adds r12, r1 \n\t" 1353 "adcs r8, r10 \n\t" 1354 "adc r11, #0 \n\t" 1355 "stmia r0!, {r12} \n\t" 1356 1357 "umull r1, r10, r6, r6 \n\t" 1358 "adds r8, r1 \n\t" 1359 "adcs r11, r10 \n\t" 1360 "stmia r0!, {r8, r11} \n\t" 1361 #if (uECC_PLATFORM != uECC_arm_thumb2) 1362 ".syntax divided \n\t" 1363 #endif 1364 : "+r" (r0), "+r" (r1) 1365 : 1366 : "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14", "cc", "memory" 1367 ); 1368} 1369#define asm_square 1 1370#endif /* (uECC_WORDS == 5) */ 1371 1372#if (uECC_WORDS == 6) 1373static void vli_square(uint32_t *result, const uint32_t *left) { 1374 register uint32_t *r0 __asm__("r0") = result; 1375 register const uint32_t *r1 __asm__("r1") = left; 1376 1377 __asm__ volatile ( 1378 ".syntax unified \n\t" 1379 "ldmia r1!, {r2,r3,r4,r5,r6,r7} \n\t" 1380 1381 "umull r11, r12, r2, r2 \n\t" 1382 "stmia r0!, {r11} \n\t" 1383 1384 "mov r9, #0 \n\t" 1385 "umull r10, r11, r2, r3 \n\t" 1386 "adds r12, r10 \n\t" 1387 "adcs r8, r11, #0 \n\t" 1388 "adc r9, #0 \n\t" 1389 "adds r12, r10 \n\t" 1390 "adcs r8, r11 \n\t" 1391 "adc r9, #0 \n\t" 1392 "stmia r0!, {r12} \n\t" 1393 1394 "mov r10, #0 \n\t" 1395 "umull r11, r12, r2, r4 \n\t" 1396 "adds r11, r11 \n\t" 1397 "adcs r12, r12 \n\t" 1398 "adc r10, #0 \n\t" 1399 "adds r8, r11 \n\t" 1400 "adcs r9, r12 \n\t" 1401 "adc r10, #0 \n\t" 1402 "umull r11, r12, r3, r3 \n\t" 1403 "adds r8, r11 \n\t" 1404 "adcs r9, r12 \n\t" 1405 "adc r10, #0 \n\t" 1406 "stmia r0!, {r8} \n\t" 1407 1408 "mov r12, #0 \n\t" 1409 "umull r8, r11, r2, r5 \n\t" 1410 "umull r1, r14, r3, r4 \n\t" 1411 "adds r8, r1 \n\t" 1412 "adcs r11, r14 \n\t" 1413 "adc r12, #0 \n\t" 1414 "adds r8, r8 \n\t" 1415 "adcs r11, r11 \n\t" 1416 "adc r12, r12 \n\t" 1417 "adds r8, r9 \n\t" 1418 "adcs r11, r10 \n\t" 1419 "adc r12, #0 \n\t" 1420 "stmia r0!, {r8} \n\t" 1421 1422 "mov r10, #0 \n\t" 1423 "umull r8, r9, r2, r6 \n\t" 1424 "umull r1, r14, r3, r5 \n\t" 1425 "adds r8, r1 \n\t" 1426 "adcs r9, r14 \n\t" 1427 "adc r10, #0 \n\t" 1428 "adds r8, r8 \n\t" 1429 "adcs r9, r9 \n\t" 1430 "adc r10, r10 \n\t" 1431 "umull r1, r14, r4, r4 \n\t" 1432 "adds r8, r1 \n\t" 1433 "adcs r9, r14 \n\t" 1434 "adc r10, #0 \n\t" 1435 "adds r8, r11 \n\t" 1436 "adcs r9, r12 \n\t" 1437 "adc r10, #0 \n\t" 1438 "stmia r0!, {r8} \n\t" 1439 1440 "mov r12, #0 \n\t" 1441 "umull r8, r11, r2, r7 \n\t" 1442 "umull r1, r14, r3, r6 \n\t" 1443 "adds r8, r1 \n\t" 1444 "adcs r11, r14 \n\t" 1445 "adc r12, #0 \n\t" 1446 "umull r1, r14, r4, r5 \n\t" 1447 "adds r8, r1 \n\t" 1448 "adcs r11, r14 \n\t" 1449 "adc r12, #0 \n\t" 1450 "adds r8, r8 \n\t" 1451 "adcs r11, r11 \n\t" 1452 "adc r12, r12 \n\t" 1453 "adds r8, r9 \n\t" 1454 "adcs r11, r10 \n\t" 1455 "adc r12, #0 \n\t" 1456 "stmia r0!, {r8} \n\t" 1457 1458 "mov r10, #0 \n\t" 1459 "umull r8, r9, r3, r7 \n\t" 1460 "umull r1, r14, r4, r6 \n\t" 1461 "adds r8, r1 \n\t" 1462 "adcs r9, r14 \n\t" 1463 "adc r10, #0 \n\t" 1464 "adds r8, r8 \n\t" 1465 "adcs r9, r9 \n\t" 1466 "adc r10, r10 \n\t" 1467 "umull r1, r14, r5, r5 \n\t" 1468 "adds r8, r1 \n\t" 1469 "adcs r9, r14 \n\t" 1470 "adc r10, #0 \n\t" 1471 "adds r8, r11 \n\t" 1472 "adcs r9, r12 \n\t" 1473 "adc r10, #0 \n\t" 1474 "stmia r0!, {r8} \n\t" 1475 1476 "mov r12, #0 \n\t" 1477 "umull r8, r11, r4, r7 \n\t" 1478 "umull r1, r14, r5, r6 \n\t" 1479 "adds r8, r1 \n\t" 1480 "adcs r11, r14 \n\t" 1481 "adc r12, #0 \n\t" 1482 "adds r8, r8 \n\t" 1483 "adcs r11, r11 \n\t" 1484 "adc r12, r12 \n\t" 1485 "adds r8, r9 \n\t" 1486 "adcs r11, r10 \n\t" 1487 "adc r12, #0 \n\t" 1488 "stmia r0!, {r8} \n\t" 1489 1490 "mov r8, #0 \n\t" 1491 "umull r1, r10, r5, r7 \n\t" 1492 "adds r1, r1 \n\t" 1493 "adcs r10, r10 \n\t" 1494 "adc r8, #0 \n\t" 1495 "adds r11, r1 \n\t" 1496 "adcs r12, r10 \n\t" 1497 "adc r8, #0 \n\t" 1498 "umull r1, r10, r6, r6 \n\t" 1499 "adds r11, r1 \n\t" 1500 "adcs r12, r10 \n\t" 1501 "adc r8, #0 \n\t" 1502 "stmia r0!, {r11} \n\t" 1503 1504 "mov r11, #0 \n\t" 1505 "umull r1, r10, r6, r7 \n\t" 1506 "adds r1, r1 \n\t" 1507 "adcs r10, r10 \n\t" 1508 "adc r11, #0 \n\t" 1509 "adds r12, r1 \n\t" 1510 "adcs r8, r10 \n\t" 1511 "adc r11, #0 \n\t" 1512 "stmia r0!, {r12} \n\t" 1513 1514 "umull r1, r10, r7, r7 \n\t" 1515 "adds r8, r1 \n\t" 1516 "adcs r11, r10 \n\t" 1517 "stmia r0!, {r8, r11} \n\t" 1518 #if (uECC_PLATFORM != uECC_arm_thumb2) 1519 ".syntax divided \n\t" 1520 #endif 1521 : "+r" (r0), "+r" (r1) 1522 : 1523 : "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14", "cc", "memory" 1524 ); 1525} 1526#define asm_square 1 1527#endif /* (uECC_WORDS == 6) */ 1528 1529#if (uECC_WORDS == 7) 1530static void vli_square(uint32_t *result, const uint32_t *left) { 1531 register uint32_t *r0 __asm__("r0") = result; 1532 register const uint32_t *r1 __asm__("r1") = left; 1533 1534 __asm__ volatile ( 1535 ".syntax unified \n\t" 1536 "ldmia r1!, {r2} \n\t" 1537 "add r1, 20 \n\t" 1538 "ldmia r1!, {r5} \n\t" 1539 "add r0, 24 \n\t" 1540 "umull r8, r9, r2, r5 \n\t" 1541 "stmia r0!, {r8, r9} \n\t" 1542 "sub r0, 32 \n\t" 1543 "sub r1, 28 \n\t" 1544 1545 "ldmia r1!, {r2, r3, r4, r5, r6, r7} \n\t" 1546 1547 "umull r11, r12, r2, r2 \n\t" 1548 "stmia r0!, {r11} \n\t" 1549 1550 "mov r9, #0 \n\t" 1551 "umull r10, r11, r2, r3 \n\t" 1552 "adds r12, r10 \n\t" 1553 "adcs r8, r11, #0 \n\t" 1554 "adc r9, #0 \n\t" 1555 "adds r12, r10 \n\t" 1556 "adcs r8, r11 \n\t" 1557 "adc r9, #0 \n\t" 1558 "stmia r0!, {r12} \n\t" 1559 1560 "mov r10, #0 \n\t" 1561 "umull r11, r12, r2, r4 \n\t" 1562 "adds r11, r11 \n\t" 1563 "adcs r12, r12 \n\t" 1564 "adc r10, #0 \n\t" 1565 "adds r8, r11 \n\t" 1566 "adcs r9, r12 \n\t" 1567 "adc r10, #0 \n\t" 1568 "umull r11, r12, r3, r3 \n\t" 1569 "adds r8, r11 \n\t" 1570 "adcs r9, r12 \n\t" 1571 "adc r10, #0 \n\t" 1572 "stmia r0!, {r8} \n\t" 1573 1574 "mov r12, #0 \n\t" 1575 "umull r8, r11, r2, r5 \n\t" 1576 "mov r14, r11 \n\t" 1577 "umlal r8, r11, r3, r4 \n\t" 1578 "cmp r14, r11 \n\t" 1579 "it hi \n\t" 1580 "adchi r12, #0 \n\t" 1581 "adds r8, r8 \n\t" 1582 "adcs r11, r11 \n\t" 1583 "adc r12, r12 \n\t" 1584 "adds r8, r9 \n\t" 1585 "adcs r11, r10 \n\t" 1586 "adc r12, #0 \n\t" 1587 "stmia r0!, {r8} \n\t" 1588 1589 "mov r10, #0 \n\t" 1590 "umull r8, r9, r2, r6 \n\t" 1591 "mov r14, r9 \n\t" 1592 "umlal r8, r9, r3, r5 \n\t" 1593 "cmp r14, r9 \n\t" 1594 "it hi \n\t" 1595 "adchi r10, #0 \n\t" 1596 "adds r8, r8 \n\t" 1597 "adcs r9, r9 \n\t" 1598 "adc r10, r10 \n\t" 1599 "mov r14, r9 \n\t" 1600 "umlal r8, r9, r4, r4 \n\t" 1601 "cmp r14, r9 \n\t" 1602 "it hi \n\t" 1603 "adchi r10, #0 \n\t" 1604 "adds r8, r11 \n\t" 1605 "adcs r9, r12 \n\t" 1606 "adc r10, #0 \n\t" 1607 "stmia r0!, {r8} \n\t" 1608 1609 "mov r12, #0 \n\t" 1610 "umull r8, r11, r2, r7 \n\t" 1611 "mov r14, r11 \n\t" 1612 "umlal r8, r11, r3, r6 \n\t" 1613 "cmp r14, r11 \n\t" 1614 "it hi \n\t" 1615 "adchi r12, #0 \n\t" 1616 "mov r14, r11 \n\t" 1617 "umlal r8, r11, r4, r5 \n\t" 1618 "cmp r14, r11 \n\t" 1619 "it hi \n\t" 1620 "adchi r12, #0 \n\t" 1621 "adds r8, r8 \n\t" 1622 "adcs r11, r11 \n\t" 1623 "adc r12, r12 \n\t" 1624 "adds r8, r9 \n\t" 1625 "adcs r11, r10 \n\t" 1626 "adc r12, #0 \n\t" 1627 "stmia r0!, {r8} \n\t" 1628 1629 "ldmia r1!, {r2} \n\t" 1630 "mov r10, #0 \n\t" 1631 "umull r8, r9, r3, r7 \n\t" 1632 "mov r14, r9 \n\t" 1633 "umlal r8, r9, r4, r6 \n\t" 1634 "cmp r14, r9 \n\t" 1635 "it hi \n\t" 1636 "adchi r10, #0 \n\t" 1637 "ldr r14, [r0] \n\t" 1638 "adds r8, r14 \n\t" 1639 "adcs r9, #0 \n\t" 1640 "adc r10, #0 \n\t" 1641 "adds r8, r8 \n\t" 1642 "adcs r9, r9 \n\t" 1643 "adc r10, r10 \n\t" 1644 "mov r14, r9 \n\t" 1645 "umlal r8, r9, r5, r5 \n\t" 1646 "cmp r14, r9 \n\t" 1647 "it hi \n\t" 1648 "adchi r10, #0 \n\t" 1649 "adds r8, r11 \n\t" 1650 "adcs r9, r12 \n\t" 1651 "adc r10, #0 \n\t" 1652 "stmia r0!, {r8} \n\t" 1653 1654 "mov r12, #0 \n\t" 1655 "umull r8, r11, r3, r2 \n\t" 1656 "mov r14, r11 \n\t" 1657 "umlal r8, r11, r4, r7 \n\t" 1658 "cmp r14, r11 \n\t" 1659 "it hi \n\t" 1660 "adchi r12, #0 \n\t" 1661 "mov r14, r11 \n\t" 1662 "umlal r8, r11, r5, r6 \n\t" 1663 "cmp r14, r11 \n\t" 1664 "it hi \n\t" 1665 "adchi r12, #0 \n\t" 1666 "ldr r14, [r0] \n\t" 1667 "adds r8, r14 \n\t" 1668 "adcs r11, #0 \n\t" 1669 "adc r12, #0 \n\t" 1670 "adds r8, r8 \n\t" 1671 "adcs r11, r11 \n\t" 1672 "adc r12, r12 \n\t" 1673 "adds r8, r9 \n\t" 1674 "adcs r11, r10 \n\t" 1675 "adc r12, #0 \n\t" 1676 "stmia r0!, {r8} \n\t" 1677 1678 "mov r10, #0 \n\t" 1679 "umull r8, r9, r4, r2 \n\t" 1680 "mov r14, r9 \n\t" 1681 "umlal r8, r9, r5, r7 \n\t" 1682 "cmp r14, r9 \n\t" 1683 "it hi \n\t" 1684 "adchi r10, #0 \n\t" 1685 "adds r8, r8 \n\t" 1686 "adcs r9, r9 \n\t" 1687 "adc r10, r10 \n\t" 1688 "mov r14, r9 \n\t" 1689 "umlal r8, r9, r6, r6 \n\t" 1690 "cmp r14, r9 \n\t" 1691 "it hi \n\t" 1692 "adchi r10, #0 \n\t" 1693 "adds r8, r11 \n\t" 1694 "adcs r9, r12 \n\t" 1695 "adc r10, #0 \n\t" 1696 "stmia r0!, {r8} \n\t" 1697 1698 "mov r12, #0 \n\t" 1699 "umull r8, r11, r5, r2 \n\t" 1700 "mov r14, r11 \n\t" 1701 "umlal r8, r11, r6, r7 \n\t" 1702 "cmp r14, r11 \n\t" 1703 "it hi \n\t" 1704 "adchi r12, #0 \n\t" 1705 "adds r8, r8 \n\t" 1706 "adcs r11, r11 \n\t" 1707 "adc r12, r12 \n\t" 1708 "adds r8, r9 \n\t" 1709 "adcs r11, r10 \n\t" 1710 "adc r12, #0 \n\t" 1711 "stmia r0!, {r8} \n\t" 1712 1713 "mov r8, #0 \n\t" 1714 "umull r1, r10, r6, r2 \n\t" 1715 "adds r1, r1 \n\t" 1716 "adcs r10, r10 \n\t" 1717 "adc r8, #0 \n\t" 1718 "adds r11, r1 \n\t" 1719 "adcs r12, r10 \n\t" 1720 "adc r8, #0 \n\t" 1721 "umull r1, r10, r7, r7 \n\t" 1722 "adds r11, r1 \n\t" 1723 "adcs r12, r10 \n\t" 1724 "adc r8, #0 \n\t" 1725 "stmia r0!, {r11} \n\t" 1726 1727 "mov r11, #0 \n\t" 1728 "umull r1, r10, r7, r2 \n\t" 1729 "adds r1, r1 \n\t" 1730 "adcs r10, r10 \n\t" 1731 "adc r11, #0 \n\t" 1732 "adds r12, r1 \n\t" 1733 "adcs r8, r10 \n\t" 1734 "adc r11, #0 \n\t" 1735 "stmia r0!, {r12} \n\t" 1736 1737 "umull r1, r10, r2, r2 \n\t" 1738 "adds r8, r1 \n\t" 1739 "adcs r11, r10 \n\t" 1740 "stmia r0!, {r8, r11} \n\t" 1741 #if (uECC_PLATFORM != uECC_arm_thumb2) 1742 ".syntax divided \n\t" 1743 #endif 1744 : "+r" (r0), "+r" (r1) 1745 : 1746 : "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14", "cc", "memory" 1747 ); 1748} 1749#define asm_square 1 1750#endif /* (uECC_WORDS == 7) */ 1751 1752#if (uECC_WORDS == 8) 1753static void vli_square(uint32_t *result, const uint32_t *left) { 1754 register uint32_t *r0 __asm__("r0") = result; 1755 register const uint32_t *r1 __asm__("r1") = left; 1756 1757 __asm__ volatile ( 1758 ".syntax unified \n\t" 1759 "ldmia r1!, {r2, r3} \n\t" 1760 "add r1, 16 \n\t" 1761 "ldmia r1!, {r5, r6} \n\t" 1762 "add r0, 24 \n\t" 1763 1764 "umull r8, r9, r2, r5 \n\t" 1765 "stmia r0!, {r8} \n\t" 1766 1767 "umull r12, r10, r2, r6 \n\t" 1768 "adds r9, r12 \n\t" 1769 "adc r10, #0 \n\t" 1770 "stmia r0!, {r9} \n\t" 1771 1772 "umull r8, r9, r3, r6 \n\t" 1773 "adds r10, r8 \n\t" 1774 "adc r11, r9, #0 \n\t" 1775 "stmia r0!, {r10, r11} \n\t" 1776 1777 "sub r0, 40 \n\t" 1778 "sub r1, 32 \n\t" 1779 "ldmia r1!, {r2,r3,r4,r5,r6,r7} \n\t" 1780 1781 "umull r11, r12, r2, r2 \n\t" 1782 "stmia r0!, {r11} \n\t" 1783 1784 "mov r9, #0 \n\t" 1785 "umull r10, r11, r2, r3 \n\t" 1786 "adds r12, r10 \n\t" 1787 "adcs r8, r11, #0 \n\t" 1788 "adc r9, #0 \n\t" 1789 "adds r12, r10 \n\t" 1790 "adcs r8, r11 \n\t" 1791 "adc r9, #0 \n\t" 1792 "stmia r0!, {r12} \n\t" 1793 1794 "mov r10, #0 \n\t" 1795 "umull r11, r12, r2, r4 \n\t" 1796 "adds r11, r11 \n\t" 1797 "adcs r12, r12 \n\t" 1798 "adc r10, #0 \n\t" 1799 "adds r8, r11 \n\t" 1800 "adcs r9, r12 \n\t" 1801 "adc r10, #0 \n\t" 1802 "umull r11, r12, r3, r3 \n\t" 1803 "adds r8, r11 \n\t" 1804 "adcs r9, r12 \n\t" 1805 "adc r10, #0 \n\t" 1806 "stmia r0!, {r8} \n\t" 1807 1808 "mov r12, #0 \n\t" 1809 "umull r8, r11, r2, r5 \n\t" 1810 "mov r14, r11 \n\t" 1811 "umlal r8, r11, r3, r4 \n\t" 1812 "cmp r14, r11 \n\t" 1813 "it hi \n\t" 1814 "adchi r12, #0 \n\t" 1815 "adds r8, r8 \n\t" 1816 "adcs r11, r11 \n\t" 1817 "adc r12, r12 \n\t" 1818 "adds r8, r9 \n\t" 1819 "adcs r11, r10 \n\t" 1820 "adc r12, #0 \n\t" 1821 "stmia r0!, {r8} \n\t" 1822 1823 "mov r10, #0 \n\t" 1824 "umull r8, r9, r2, r6 \n\t" 1825 "mov r14, r9 \n\t" 1826 "umlal r8, r9, r3, r5 \n\t" 1827 "cmp r14, r9 \n\t" 1828 "it hi \n\t" 1829 "adchi r10, #0 \n\t" 1830 "adds r8, r8 \n\t" 1831 "adcs r9, r9 \n\t" 1832 "adc r10, r10 \n\t" 1833 "mov r14, r9 \n\t" 1834 "umlal r8, r9, r4, r4 \n\t" 1835 "cmp r14, r9 \n\t" 1836 "it hi \n\t" 1837 "adchi r10, #0 \n\t" 1838 "adds r8, r11 \n\t" 1839 "adcs r9, r12 \n\t" 1840 "adc r10, #0 \n\t" 1841 "stmia r0!, {r8} \n\t" 1842 1843 "mov r12, #0 \n\t" 1844 "umull r8, r11, r2, r7 \n\t" 1845 "mov r14, r11 \n\t" 1846 "umlal r8, r11, r3, r6 \n\t" 1847 "cmp r14, r11 \n\t" 1848 "it hi \n\t" 1849 "adchi r12, #0 \n\t" 1850 "mov r14, r11 \n\t" 1851 "umlal r8, r11, r4, r5 \n\t" 1852 "cmp r14, r11 \n\t" 1853 "it hi \n\t" 1854 "adchi r12, #0 \n\t" 1855 "adds r8, r8 \n\t" 1856 "adcs r11, r11 \n\t" 1857 "adc r12, r12 \n\t" 1858 "adds r8, r9 \n\t" 1859 "adcs r11, r10 \n\t" 1860 "adc r12, #0 \n\t" 1861 "stmia r0!, {r8} \n\t" 1862 1863 "ldmia r1!, {r2} \n\t" 1864 "mov r10, #0 \n\t" 1865 "umull r8, r9, r3, r7 \n\t" 1866 "mov r14, r9 \n\t" 1867 "umlal r8, r9, r4, r6 \n\t" 1868 "cmp r14, r9 \n\t" 1869 "it hi \n\t" 1870 "adchi r10, #0 \n\t" 1871 "ldr r14, [r0] \n\t" 1872 "adds r8, r14 \n\t" 1873 "adcs r9, #0 \n\t" 1874 "adc r10, #0 \n\t" 1875 "adds r8, r8 \n\t" 1876 "adcs r9, r9 \n\t" 1877 "adc r10, r10 \n\t" 1878 "mov r14, r9 \n\t" 1879 "umlal r8, r9, r5, r5 \n\t" 1880 "cmp r14, r9 \n\t" 1881 "it hi \n\t" 1882 "adchi r10, #0 \n\t" 1883 "adds r8, r11 \n\t" 1884 "adcs r9, r12 \n\t" 1885 "adc r10, #0 \n\t" 1886 "stmia r0!, {r8} \n\t" 1887 1888 "mov r12, #0 \n\t" 1889 "umull r8, r11, r3, r2 \n\t" 1890 "mov r14, r11 \n\t" 1891 "umlal r8, r11, r4, r7 \n\t" 1892 "cmp r14, r11 \n\t" 1893 "it hi \n\t" 1894 "adchi r12, #0 \n\t" 1895 "mov r14, r11 \n\t" 1896 "umlal r8, r11, r5, r6 \n\t" 1897 "cmp r14, r11 \n\t" 1898 "it hi \n\t" 1899 "adchi r12, #0 \n\t" 1900 "ldr r14, [r0] \n\t" 1901 "adds r8, r14 \n\t" 1902 "adcs r11, #0 \n\t" 1903 "adc r12, #0 \n\t" 1904 "adds r8, r8 \n\t" 1905 "adcs r11, r11 \n\t" 1906 "adc r12, r12 \n\t" 1907 "adds r8, r9 \n\t" 1908 "adcs r11, r10 \n\t" 1909 "adc r12, #0 \n\t" 1910 "stmia r0!, {r8} \n\t" 1911 1912 "ldmia r1!, {r3} \n\t" 1913 "mov r10, #0 \n\t" 1914 "umull r8, r9, r4, r2 \n\t" 1915 "mov r14, r9 \n\t" 1916 "umlal r8, r9, r5, r7 \n\t" 1917 "cmp r14, r9 \n\t" 1918 "it hi \n\t" 1919 "adchi r10, #0 \n\t" 1920 "ldr r14, [r0] \n\t" 1921 "adds r8, r14 \n\t" 1922 "adcs r9, #0 \n\t" 1923 "adc r10, #0 \n\t" 1924 "adds r8, r8 \n\t" 1925 "adcs r9, r9 \n\t" 1926 "adc r10, r10 \n\t" 1927 "mov r14, r9 \n\t" 1928 "umlal r8, r9, r6, r6 \n\t" 1929 "cmp r14, r9 \n\t" 1930 "it hi \n\t" 1931 "adchi r10, #0 \n\t" 1932 "adds r8, r11 \n\t" 1933 "adcs r9, r12 \n\t" 1934 "adc r10, #0 \n\t" 1935 "stmia r0!, {r8} \n\t" 1936 1937 "mov r12, #0 \n\t" 1938 "umull r8, r11, r4, r3 \n\t" 1939 "mov r14, r11 \n\t" 1940 "umlal r8, r11, r5, r2 \n\t" 1941 "cmp r14, r11 \n\t" 1942 "it hi \n\t" 1943 "adchi r12, #0 \n\t" 1944 "mov r14, r11 \n\t" 1945 "umlal r8, r11, r6, r7 \n\t" 1946 "cmp r14, r11 \n\t" 1947 "it hi \n\t" 1948 "adchi r12, #0 \n\t" 1949 "ldr r14, [r0] \n\t" 1950 "adds r8, r14 \n\t" 1951 "adcs r11, #0 \n\t" 1952 "adc r12, #0 \n\t" 1953 "adds r8, r8 \n\t" 1954 "adcs r11, r11 \n\t" 1955 "adc r12, r12 \n\t" 1956 "adds r8, r9 \n\t" 1957 "adcs r11, r10 \n\t" 1958 "adc r12, #0 \n\t" 1959 "stmia r0!, {r8} \n\t" 1960 1961 "mov r10, #0 \n\t" 1962 "umull r8, r9, r5, r3 \n\t" 1963 "mov r14, r9 \n\t" 1964 "umlal r8, r9, r6, r2 \n\t" 1965 "cmp r14, r9 \n\t" 1966 "it hi \n\t" 1967 "adchi r10, #0 \n\t" 1968 "adds r8, r8 \n\t" 1969 "adcs r9, r9 \n\t" 1970 "adc r10, r10 \n\t" 1971 "mov r14, r9 \n\t" 1972 "umlal r8, r9, r7, r7 \n\t" 1973 "cmp r14, r9 \n\t" 1974 "it hi \n\t" 1975 "adchi r10, #0 \n\t" 1976 "adds r8, r11 \n\t" 1977 "adcs r9, r12 \n\t" 1978 "adc r10, #0 \n\t" 1979 "stmia r0!, {r8} \n\t" 1980 1981 "mov r12, #0 \n\t" 1982 "umull r8, r11, r6, r3 \n\t" 1983 "mov r14, r11 \n\t" 1984 "umlal r8, r11, r7, r2 \n\t" 1985 "cmp r14, r11 \n\t" 1986 "it hi \n\t" 1987 "adchi r12, #0 \n\t" 1988 "adds r8, r8 \n\t" 1989 "adcs r11, r11 \n\t" 1990 "adc r12, r12 \n\t" 1991 "adds r8, r9 \n\t" 1992 "adcs r11, r10 \n\t" 1993 "adc r12, #0 \n\t" 1994 "stmia r0!, {r8} \n\t" 1995 1996 "mov r8, #0 \n\t" 1997 "umull r1, r10, r7, r3 \n\t" 1998 "adds r1, r1 \n\t" 1999 "adcs r10, r10 \n\t" 2000 "adc r8, #0 \n\t" 2001 "adds r11, r1 \n\t" 2002 "adcs r12, r10 \n\t" 2003 "adc r8, #0 \n\t" 2004 "umull r1, r10, r2, r2 \n\t" 2005 "adds r11, r1 \n\t" 2006 "adcs r12, r10 \n\t" 2007 "adc r8, #0 \n\t" 2008 "stmia r0!, {r11} \n\t" 2009 2010 "mov r11, #0 \n\t" 2011 "umull r1, r10, r2, r3 \n\t" 2012 "adds r1, r1 \n\t" 2013 "adcs r10, r10 \n\t" 2014 "adc r11, #0 \n\t" 2015 "adds r12, r1 \n\t" 2016 "adcs r8, r10 \n\t" 2017 "adc r11, #0 \n\t" 2018 "stmia r0!, {r12} \n\t" 2019 2020 "umull r1, r10, r3, r3 \n\t" 2021 "adds r8, r1 \n\t" 2022 "adcs r11, r10 \n\t" 2023 "stmia r0!, {r8, r11} \n\t" 2024 #if (uECC_PLATFORM != uECC_arm_thumb2) 2025 ".syntax divided \n\t" 2026 #endif 2027 : "+r" (r0), "+r" (r1) 2028 : 2029 : "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14", "cc", "memory" 2030 ); 2031} 2032#define asm_square 1 2033#endif /* (uECC_WORDS == 8) */ 2034#endif /* uECC_SQUARE_FUNC */ 2035 2036#endif /* (uECC_PLATFORM != uECC_arm_thumb) */ 2037#endif /* (uECC_ASM == uECC_asm_fast) */ 2038 2039#if !asm_add 2040static uint32_t vli_add(uint32_t *result, const uint32_t *left, const uint32_t *right) { 2041 uint32_t counter = uECC_WORDS; 2042 uint32_t carry = 0; 2043 uint32_t left_word; 2044 uint32_t right_word; 2045 2046 __asm__ volatile ( 2047 ".syntax unified \n\t" 2048 "1: \n\t" 2049 "ldmia %[lptr]!, {%[left]} \n\t" /* Load left word. */ 2050 "ldmia %[rptr]!, {%[right]} \n\t" /* Load right word. */ 2051 "lsrs %[carry], #1 \n\t" /* Set up carry flag (carry = 0 after this). */ 2052 "adcs %[left], %[right] \n\t" /* Add with carry. */ 2053 "adcs %[carry], %[carry] \n\t" /* Store carry bit. */ 2054 "stmia %[dptr]!, {%[left]} \n\t" /* Store result word. */ 2055 "subs %[ctr], #1 \n\t" /* Decrement counter. */ 2056 "bne 1b \n\t" /* Loop until counter == 0. */ 2057 #if (uECC_PLATFORM != uECC_arm_thumb2) 2058 ".syntax divided \n\t" 2059 #endif 2060 #if (uECC_PLATFORM == uECC_arm_thumb) 2061 : [dptr] "+l" (result), [lptr] "+l" (left), [rptr] "+l" (right), 2062 [ctr] "+l" (counter), [carry] "+l" (carry), 2063 [left] "=l" (left_word), [right] "=l" (right_word) 2064 #else 2065 : [dptr] "+r" (result), [lptr] "+r" (left), [rptr] "+r" (right), 2066 [ctr] "+r" (counter), [carry] "+r" (carry), 2067 [left] "=r" (left_word), [right] "=r" (right_word) 2068 #endif 2069 : 2070 : "cc", "memory" 2071 ); 2072 return carry; 2073} 2074#define asm_add 1 2075#endif 2076 2077#if !asm_sub 2078static uint32_t vli_sub(uint32_t *result, const uint32_t *left, const uint32_t *right) { 2079 uint32_t counter = uECC_WORDS; 2080 uint32_t carry = 1; /* carry = 1 initially (means don't borrow) */ 2081 uint32_t left_word; 2082 uint32_t right_word; 2083 2084 __asm__ volatile ( 2085 ".syntax unified \n\t" 2086 "1: \n\t" 2087 "ldmia %[lptr]!, {%[left]} \n\t" /* Load left word. */ 2088 "ldmia %[rptr]!, {%[right]} \n\t" /* Load right word. */ 2089 "lsrs %[carry], #1 \n\t" /* Set up carry flag (carry = 0 after this). */ 2090 "sbcs %[left], %[right] \n\t" /* Subtract with borrow. */ 2091 "adcs %[carry], %[carry] \n\t" /* Store carry bit. */ 2092 "stmia %[dptr]!, {%[left]} \n\t" /* Store result word. */ 2093 "subs %[ctr], #1 \n\t" /* Decrement counter. */ 2094 "bne 1b \n\t" /* Loop until counter == 0. */ 2095 #if (uECC_PLATFORM != uECC_arm_thumb2) 2096 ".syntax divided \n\t" 2097 #endif 2098 #if (uECC_PLATFORM == uECC_arm_thumb) 2099 : [dptr] "+l" (result), [lptr] "+l" (left), [rptr] "+l" (right), 2100 [ctr] "+l" (counter), [carry] "+l" (carry), 2101 [left] "=l" (left_word), [right] "=l" (right_word) 2102 #else 2103 : [dptr] "+r" (result), [lptr] "+r" (left), [rptr] "+r" (right), 2104 [ctr] "+r" (counter), [carry] "+r" (carry), 2105 [left] "=r" (left_word), [right] "=r" (right_word) 2106 #endif 2107 : 2108 : "cc", "memory" 2109 ); 2110 return !carry; 2111} 2112#define asm_sub 1 2113#endif 2114 2115#if !asm_mult 2116static void vli_mult(uint32_t *result, const uint32_t *left, const uint32_t *right) { 2117#if (uECC_PLATFORM != uECC_arm_thumb) 2118 uint32_t c0 = 0; 2119 uint32_t c1 = 0; 2120 uint32_t c2 = 0; 2121 uint32_t k = 0; 2122 uint32_t i; 2123 uint32_t t0, t1; 2124 2125 __asm__ volatile ( 2126 ".syntax unified \n\t" 2127 2128 "1: \n\t" /* outer loop (k < uECC_WORDS) */ 2129 "movs %[i], #0 \n\t" /* i = 0 */ 2130 "b 3f \n\t" 2131 2132 "2: \n\t" /* outer loop (k >= uECC_WORDS) */ 2133 "movs %[i], %[k] \n\t" /* i = k */ 2134 "subs %[i], %[eccdm1] \n\t" /* i = k - (uECC_WORDS - 1) (times 4) */ 2135 2136 "3: \n\t" /* inner loop */ 2137 "subs %[t0], %[k], %[i] \n\t" /* t0 = k-i */ 2138 2139 "ldr %[t1], [%[right], %[t0]] \n\t" /* t1 = right[k - i] */ 2140 "ldr %[t0], [%[left], %[i]] \n\t" /* t0 = left[i] */ 2141 2142 "umull %[t0], %[t1], %[t0], %[t1] \n\t" /* (t0, t1) = left[i] * right[k - i] */ 2143 2144 "adds %[c0], %[t0] \n\t" /* add low word to c0 */ 2145 "adcs %[c1], %[t1] \n\t" /* add high word to c1, including carry */ 2146 "adcs %[c2], #0 \n\t" /* add carry to c2 */ 2147 2148 "adds %[i], #4 \n\t" /* i += 4 */ 2149 "cmp %[i], %[eccd] \n\t" /* i < uECC_WORDS (times 4)? */ 2150 "bge 4f \n\t" /* if not, exit the loop */ 2151 "cmp %[i], %[k] \n\t" /* i <= k? */ 2152 "ble 3b \n\t" /* if so, continue looping */ 2153 2154 "4: \n\t" /* end inner loop */ 2155 2156 "str %[c0], [%[result], %[k]] \n\t" /* result[k] = c0 */ 2157 "mov %[c0], %[c1] \n\t" /* c0 = c1 */ 2158 "mov %[c1], %[c2] \n\t" /* c1 = c2 */ 2159 "movs %[c2], #0 \n\t" /* c2 = 0 */ 2160 "adds %[k], #4 \n\t" /* k += 4 */ 2161 "cmp %[k], %[eccd] \n\t" /* k < uECC_WORDS (times 4) ? */ 2162 "blt 1b \n\t" /* if not, loop back, start with i = 0 */ 2163 "cmp %[k], %[eccd2m1] \n\t" /* k < uECC_WORDS * 2 - 1 (times 4) ? */ 2164 "blt 2b \n\t" /* if not, loop back, start with i = (k + 1) - uECC_WORDS */ 2165 /* end outer loop */ 2166 2167 "str %[c0], [%[result], %[k]] \n\t" /* result[uECC_WORDS * 2 - 1] = c0 */ 2168 #if (uECC_PLATFORM != uECC_arm_thumb2) 2169 ".syntax divided \n\t" 2170 #endif 2171 : [c0] "+r" (c0), [c1] "+r" (c1), [c2] "+r" (c2), 2172 [k] "+r" (k), [i] "=&r" (i), [t0] "=&r" (t0), [t1] "=&r" (t1) 2173 : [result] "r" (result), [left] "r" (left), [right] "r" (right), 2174 [eccd] "I" (uECC_WORDS * 4), [eccdm1] "I" ((uECC_WORDS-1) * 4), 2175 [eccd2m1] "I" ((uECC_WORDS * 2 - 1) * 4) 2176 : "cc", "memory" 2177 ); 2178 2179#else /* Thumb-1 */ 2180 2181 register uint32_t *r0 __asm__("r0") = result; 2182 register const uint32_t *r1 __asm__("r1") = left; 2183 register const uint32_t *r2 __asm__("r2") = right; 2184 2185 __asm__ volatile ( 2186 ".syntax unified \n\t" 2187 "movs r3, #0 \n\t" /* c0 = 0 */ 2188 "movs r4, #0 \n\t" /* c1 = 0 */ 2189 "movs r5, #0 \n\t" /* c2 = 0 */ 2190 "movs r6, #0 \n\t" /* k = 0 */ 2191 2192 "push {r0} \n\t" /* keep result on the stack */ 2193 2194 "1: \n\t" /* outer loop (k < uECC_WORDS) */ 2195 "movs r7, #0 \n\t" /* r7 = i = 0 */ 2196 "b 3f \n\t" 2197 2198 "2: \n\t" /* outer loop (k >= uECC_WORDS) */ 2199 "movs r7, r6 \n\t" /* r7 = k */ 2200 "subs r7, %[eccdm1] \n\t" /* r7 = i = k - (uECC_WORDS - 1) (times 4) */ 2201 2202 "3: \n\t" /* inner loop */ 2203 "push {r3, r4, r5, r6} \n\t" /* push things, r3 (c0) is at the top of stack. */ 2204 "subs r0, r6, r7 \n\t" /* r0 = k - i */ 2205 2206 "ldr r4, [r2, r0] \n\t" /* r4 = right[k - i] */ 2207 "ldr r0, [r1, r7] \n\t" /* r0 = left[i] */ 2208 2209 "lsrs r3, r0, #16 \n\t" /* r3 = a1 */ 2210 "uxth r0, r0 \n\t" /* r0 = a0 */ 2211 2212 "lsrs r5, r4, #16 \n\t" /* r5 = b1 */ 2213 "uxth r4, r4 \n\t" /* r4 = b0 */ 2214 2215 "movs r6, r3 \n\t" /* r6 = a1 */ 2216 "muls r6, r5, r6 \n\t" /* r6 = a1 * b1 */ 2217 "muls r3, r4, r3 \n\t" /* r3 = b0 * a1 */ 2218 "muls r5, r0, r5 \n\t" /* r5 = a0 * b1 */ 2219 "muls r0, r4, r0 \n\t" /* r0 = a0 * b0 */ 2220 2221 "movs r4, #0 \n\t" /* r4 = 0 */ 2222 "adds r3, r5 \n\t" /* r3 = b0 * a1 + a0 * b1 */ 2223 "adcs r4, r4 \n\t" /* r4 = carry */ 2224 "lsls r4, #16 \n\t" /* r4 = carry << 16 */ 2225 "adds r6, r4 \n\t" /* r6 = a1 * b1 + carry */ 2226 2227 "lsls r4, r3, #16 \n\t" /* r4 = (b0 * a1 + a0 * b1) << 16 */ 2228 "lsrs r3, #16 \n\t" /* r3 = (b0 * a1 + a0 * b1) >> 16 */ 2229 "adds r0, r4 \n\t" /* r0 = low word = a0 * b0 + ((b0 * a1 + a0 * b1) << 16) */ 2230 "adcs r6, r3 \n\t" /* r6 = high word = a1 * b1 + carry + ((b0 * a1 + a0 * b1) >> 16) */ 2231 2232 "pop {r3, r4, r5} \n\t" /* r3 = c0, r4 = c1, r5 = c2 */ 2233 "adds r3, r0 \n\t" /* add low word to c0 */ 2234 "adcs r4, r6 \n\t" /* add high word to c1, including carry */ 2235 "movs r0, #0 \n\t" /* r0 = 0 (does not affect carry bit) */ 2236 "adcs r5, r0 \n\t" /* add carry to c2 */ 2237 2238 "pop {r6} \n\t" /* r6 = k */ 2239 2240 "adds r7, #4 \n\t" /* i += 4 */ 2241 "cmp r7, %[eccd] \n\t" /* i < uECC_WORDS (times 4)? */ 2242 "bge 4f \n\t" /* if not, exit the loop */ 2243 "cmp r7, r6 \n\t" /* i <= k? */ 2244 "ble 3b \n\t" /* if so, continue looping */ 2245 2246 "4: \n\t" /* end inner loop */ 2247 2248 "ldr r0, [sp, #0] \n\t" /* r0 = result */ 2249 2250 "str r3, [r0, r6] \n\t" /* result[k] = c0 */ 2251 "mov r3, r4 \n\t" /* c0 = c1 */ 2252 "mov r4, r5 \n\t" /* c1 = c2 */ 2253 "movs r5, #0 \n\t" /* c2 = 0 */ 2254 "adds r6, #4 \n\t" /* k += 4 */ 2255 "cmp r6, %[eccd] \n\t" /* k < uECC_WORDS (times 4) ? */ 2256 "blt 1b \n\t" /* if not, loop back, start with i = 0 */ 2257 "cmp r6, %[eccd2m1] \n\t" /* k < uECC_WORDS * 2 - 1 (times 4) ? */ 2258 "blt 2b \n\t" /* if not, loop back, start with i = (k + 1) - uECC_WORDS */ 2259 /* end outer loop */ 2260 2261 "str r3, [r0, r6] \n\t" /* result[uECC_WORDS * 2 - 1] = c0 */ 2262 "pop {r0} \n\t" /* pop result off the stack */ 2263 2264 ".syntax divided \n\t" 2265 : 2266 : [r0] "l" (r0), [r1] "l" (r1), [r2] "l" (r2), [eccd] "I" (uECC_WORDS * 4), [eccdm1] "I" ((uECC_WORDS-1) * 4), [eccd2m1] "I" ((uECC_WORDS * 2 - 1) * 4) 2267 : "r3", "r4", "r5", "r6", "r7", "cc", "memory" 2268 ); 2269#endif 2270} 2271#define asm_mult 1 2272#endif /* !asm_mult */ 2273 2274#if uECC_SQUARE_FUNC 2275#if !asm_square 2276static void vli_square(uint32_t *result, const uint32_t *left) { 2277#if (uECC_PLATFORM != uECC_arm_thumb) 2278 uint32_t c0 = 0; 2279 uint32_t c1 = 0; 2280 uint32_t c2 = 0; 2281 uint32_t k = 0; 2282 uint32_t i, tt; 2283 uint32_t t0, t1; 2284 2285 __asm__ volatile ( 2286 ".syntax unified \n\t" 2287 2288 "1: \n\t" /* outer loop (k < uECC_WORDS) */ 2289 "movs %[i], #0 \n\t" /* i = 0 */ 2290 "b 3f \n\t" 2291 2292 "2: \n\t" /* outer loop (k >= uECC_WORDS) */ 2293 "movs %[i], %[k] \n\t" /* i = k */ 2294 "subs %[i], %[eccdm1] \n\t" /* i = k - (uECC_WORDS - 1) (times 4) */ 2295 2296 "3: \n\t" /* inner loop */ 2297 "subs %[tt], %[k], %[i] \n\t" /* tt = k-i */ 2298 2299 "ldr %[t1], [%[left], %[tt]] \n\t" /* t1 = left[k - i] */ 2300 "ldr %[t0], [%[left], %[i]] \n\t" /* t0 = left[i] */ 2301 2302 "umull %[t0], %[t1], %[t0], %[t1] \n\t" /* (t0, t1) = left[i] * right[k - i] */ 2303 2304 "cmp %[i], %[tt] \n\t" /* (i < k - i) ? */ 2305 "bge 4f \n\t" /* if i >= k - i, skip */ 2306 "lsls %[t1], #1 \n\t" /* high word << 1 */ 2307 "adc %[c2], #0 \n\t" /* add carry bit to c2 */ 2308 "lsls %[t0], #1 \n\t" /* low word << 1 */ 2309 "adc %[t1], #0 \n\t" /* add carry bit to high word */ 2310 2311 "4: \n\t" 2312 2313 "adds %[c0], %[t0] \n\t" /* add low word to c0 */ 2314 "adcs %[c1], %[t1] \n\t" /* add high word to c1, including carry */ 2315 "adc %[c2], #0 \n\t" /* add carry to c2 */ 2316 2317 "adds %[i], #4 \n\t" /* i += 4 */ 2318 "cmp %[i], %[k] \n\t" /* i <= k? */ 2319 "bge 5f \n\t" /* if not, exit the loop */ 2320 "subs %[tt], %[k], %[i] \n\t" /* tt = k - i */ 2321 "cmp %[i], %[tt] \n\t" /* i <= k - i? */ 2322 "ble 3b \n\t" /* if so, continue looping */ 2323 2324 "5: \n\t" /* end inner loop */ 2325 2326 "str %[c0], [%[result], %[k]] \n\t" /* result[k] = c0 */ 2327 "mov %[c0], %[c1] \n\t" /* c0 = c1 */ 2328 "mov %[c1], %[c2] \n\t" /* c1 = c2 */ 2329 "movs %[c2], #0 \n\t" /* c2 = 0 */ 2330 "adds %[k], #4 \n\t" /* k += 4 */ 2331 "cmp %[k], %[eccd] \n\t" /* k < uECC_WORDS (times 4) ? */ 2332 "blt 1b \n\t" /* if not, loop back, start with i = 0 */ 2333 "cmp %[k], %[eccd2m1] \n\t" /* k < uECC_WORDS * 2 - 1 (times 4) ? */ 2334 "blt 2b \n\t" /* if not, loop back, start with i = (k + 1) - uECC_WORDS */ 2335 /* end outer loop */ 2336 2337 "str %[c0], [%[result], %[k]] \n\t" /* result[uECC_WORDS * 2 - 1] = c0 */ 2338 #if (uECC_PLATFORM != uECC_arm_thumb2) 2339 ".syntax divided \n\t" 2340 #endif 2341 : [c0] "+r" (c0), [c1] "+r" (c1), [c2] "+r" (c2), 2342 [k] "+r" (k), [i] "=&r" (i), [tt] "=&r" (tt), [t0] "=&r" (t0), [t1] "=&r" (t1) 2343 : [result] "r" (result), [left] "r" (left), 2344 [eccd] "I" (uECC_WORDS * 4), [eccdm1] "I" ((uECC_WORDS-1) * 4), 2345 [eccd2m1] "I" ((uECC_WORDS * 2 - 1) * 4) 2346 : "cc", "memory" 2347 ); 2348 2349#else 2350 2351 register uint32_t *r0 __asm__("r0") = result; 2352 register const uint32_t *r1 __asm__("r1") = left; 2353 2354 __asm__ volatile ( 2355 ".syntax unified \n\t" 2356 "movs r2, #0 \n\t" /* c0 = 0 */ 2357 "movs r3, #0 \n\t" /* c1 = 0 */ 2358 "movs r4, #0 \n\t" /* c2 = 0 */ 2359 "movs r5, #0 \n\t" /* k = 0 */ 2360 2361 "push {r0} \n\t" /* keep result on the stack */ 2362 2363 "1: \n\t" /* outer loop (k < uECC_WORDS) */ 2364 "movs r6, #0 \n\t" /* r6 = i = 0 */ 2365 "b 3f \n\t" 2366 2367 "2: \n\t" /* outer loop (k >= uECC_WORDS) */ 2368 "movs r6, r5 \n\t" /* r6 = k */ 2369 "subs r6, %[eccdm1] \n\t" /* r6 = i = k - (uECC_WORDS - 1) (times 4) */ 2370 2371 "3: \n\t" /* inner loop */ 2372 "push {r2, r3, r4, r5} \n\t" /* push things, r2 (c0) is at the top of stack. */ 2373 "subs r7, r5, r6 \n\t" /* r7 = k - i */ 2374 2375 "ldr r3, [r1, r7] \n\t" /* r3 = left[k - i] */ 2376 "ldr r0, [r1, r6] \n\t" /* r0 = left[i] */ 2377 2378 "lsrs r2, r0, #16 \n\t" /* r2 = a1 */ 2379 "uxth r0, r0 \n\t" /* r0 = a0 */ 2380 2381 "lsrs r4, r3, #16 \n\t" /* r4 = b1 */ 2382 "uxth r3, r3 \n\t" /* r3 = b0 */ 2383 2384 "movs r5, r2 \n\t" /* r5 = a1 */ 2385 "muls r5, r4, r5 \n\t" /* r5 = a1 * b1 */ 2386 "muls r2, r3, r2 \n\t" /* r2 = b0 * a1 */ 2387 "muls r4, r0, r4 \n\t" /* r4 = a0 * b1 */ 2388 "muls r0, r3, r0 \n\t" /* r0 = a0 * b0 */ 2389 2390 "movs r3, #0 \n\t" /* r3 = 0 */ 2391 "adds r2, r4 \n\t" /* r2 = b0 * a1 + a0 * b1 */ 2392 "adcs r3, r3 \n\t" /* r3 = carry */ 2393 "lsls r3, #16 \n\t" /* r3 = carry << 16 */ 2394 "adds r5, r3 \n\t" /* r5 = a1 * b1 + carry */ 2395 2396 "lsls r3, r2, #16 \n\t" /* r3 = (b0 * a1 + a0 * b1) << 16 */ 2397 "lsrs r2, #16 \n\t" /* r2 = (b0 * a1 + a0 * b1) >> 16 */ 2398 "adds r0, r3 \n\t" /* r0 = low word = a0 * b0 + ((b0 * a1 + a0 * b1) << 16) */ 2399 "adcs r5, r2 \n\t" /* r5 = high word = a1 * b1 + carry + ((b0 * a1 + a0 * b1) >> 16) */ 2400 2401 "movs r3, #0 \n\t" /* r3 = 0 */ 2402 "cmp r6, r7 \n\t" /* (i < k - i) ? */ 2403 "mov r7, r3 \n\t" /* r7 = 0 (does not affect condition)*/ 2404 "bge 4f \n\t" /* if i >= k - i, skip */ 2405 "lsls r5, #1 \n\t" /* high word << 1 */ 2406 "adcs r7, r3 \n\t" /* r7 = carry bit for c2 */ 2407 "lsls r0, #1 \n\t" /* low word << 1 */ 2408 "adcs r5, r3 \n\t" /* add carry from shift to high word */ 2409 2410 "4: \n\t" 2411 "pop {r2, r3, r4} \n\t" /* r2 = c0, r3 = c1, r4 = c2 */ 2412 "adds r2, r0 \n\t" /* add low word to c0 */ 2413 "adcs r3, r5 \n\t" /* add high word to c1, including carry */ 2414 "movs r0, #0 \n\t" /* r0 = 0 (does not affect carry bit) */ 2415 "adcs r4, r0 \n\t" /* add carry to c2 */ 2416 "adds r4, r7 \n\t" /* add carry from doubling (if any) */ 2417 2418 "pop {r5} \n\t" /* r5 = k */ 2419 2420 "adds r6, #4 \n\t" /* i += 4 */ 2421 "cmp r6, r5 \n\t" /* i <= k? */ 2422 "bge 5f \n\t" /* if not, exit the loop */ 2423 "subs r7, r5, r6 \n\t" /* r7 = k - i */ 2424 "cmp r6, r7 \n\t" /* i <= k - i? */ 2425 "ble 3b \n\t" /* if so, continue looping */ 2426 2427 "5: \n\t" /* end inner loop */ 2428 2429 "ldr r0, [sp, #0] \n\t" /* r0 = result */ 2430 2431 "str r2, [r0, r5] \n\t" /* result[k] = c0 */ 2432 "mov r2, r3 \n\t" /* c0 = c1 */ 2433 "mov r3, r4 \n\t" /* c1 = c2 */ 2434 "movs r4, #0 \n\t" /* c2 = 0 */ 2435 "adds r5, #4 \n\t" /* k += 4 */ 2436 "cmp r5, %[eccd] \n\t" /* k < uECC_WORDS (times 4) ? */ 2437 "blt 1b \n\t" /* if not, loop back, start with i = 0 */ 2438 "cmp r5, %[eccd2m1] \n\t" /* k < uECC_WORDS * 2 - 1 (times 4) ? */ 2439 "blt 2b \n\t" /* if not, loop back, start with i = (k + 1) - uECC_WORDS */ 2440 /* end outer loop */ 2441 2442 "str r2, [r0, r5] \n\t" /* result[uECC_WORDS * 2 - 1] = c0 */ 2443 "pop {r0} \n\t" /* pop result off the stack */ 2444 2445 ".syntax divided \n\t" 2446 : [r0] "+l" (r0), [r1] "+l" (r1) 2447 : [eccd] "I" (uECC_WORDS * 4), [eccdm1] "I" ((uECC_WORDS-1) * 4), 2448 [eccd2m1] "I" ((uECC_WORDS * 2 - 1) * 4) 2449 : "r2", "r3", "r4", "r5", "r6", "r7", "cc", "memory" 2450 ); 2451#endif 2452} 2453#define asm_square 1 2454#endif /* !asm_square */ 2455#endif /* uECC_SQUARE_FUNC */ 2456