1*6ccd8248SMilanka Ringwald#!/usr/bin/env python3 2af03003cSMatthias Ringwald 3af03003cSMatthias Ringwaldimport sys 4af03003cSMatthias Ringwald 5af03003cSMatthias Ringwaldif len(sys.argv) < 2: 6*6ccd8248SMilanka Ringwald print("Provide the integer size in 32-bit words") 7af03003cSMatthias Ringwald sys.exit(1) 8af03003cSMatthias Ringwald 9af03003cSMatthias Ringwaldsize = int(sys.argv[1]) 10af03003cSMatthias Ringwald 11af03003cSMatthias Ringwaldfull_rows = size // 3 12af03003cSMatthias Ringwaldinit_size = size % 3 13af03003cSMatthias Ringwald 14af03003cSMatthias Ringwaldif init_size == 0: 15af03003cSMatthias Ringwald full_rows = full_rows - 1 16af03003cSMatthias Ringwald init_size = 3 17af03003cSMatthias Ringwald 18af03003cSMatthias Ringwalddef emit(line, *args): 19af03003cSMatthias Ringwald s = '"' + line + r' \n\t"' 20*6ccd8248SMilanka Ringwald print(s % args) 21af03003cSMatthias Ringwald 22af03003cSMatthias Ringwaldrx = [3, 4, 5] 23af03003cSMatthias Ringwaldry = [6, 7, 8] 24af03003cSMatthias Ringwald 25af03003cSMatthias Ringwald#### set up registers 26af03003cSMatthias Ringwaldemit("add r0, %s", (size - init_size) * 4) # move z 27af03003cSMatthias Ringwaldemit("add r2, %s", (size - init_size) * 4) # move y 28af03003cSMatthias Ringwald 29*6ccd8248SMilanka Ringwaldemit("ldmia r1!, {%s}", ", ".join(["r%s" % (rx[i]) for i in range(init_size)])) 30*6ccd8248SMilanka Ringwaldemit("ldmia r2!, {%s}", ", ".join(["r%s" % (ry[i]) for i in range(init_size)])) 31af03003cSMatthias Ringwald 32*6ccd8248SMilanka Ringwaldprint("") 33af03003cSMatthias Ringwaldif init_size == 1: 34af03003cSMatthias Ringwald emit("umull r9, r10, r3, r6") 35af03003cSMatthias Ringwald emit("stmia r0!, {r9, r10}") 36af03003cSMatthias Ringwaldelse: 37af03003cSMatthias Ringwald #### first two multiplications of initial block 38af03003cSMatthias Ringwald emit("umull r11, r12, r3, r6") 39af03003cSMatthias Ringwald emit("stmia r0!, {r11}") 40*6ccd8248SMilanka Ringwald print("") 41af03003cSMatthias Ringwald emit("mov r10, #0") 42af03003cSMatthias Ringwald emit("umull r11, r9, r3, r7") 43af03003cSMatthias Ringwald emit("adds r12, r11") 44af03003cSMatthias Ringwald emit("adc r9, #0") 45af03003cSMatthias Ringwald emit("umull r11, r14, r4, r6") 46af03003cSMatthias Ringwald emit("adds r12, r11") 47af03003cSMatthias Ringwald emit("adcs r9, r14") 48af03003cSMatthias Ringwald emit("adc r10, #0") 49af03003cSMatthias Ringwald emit("stmia r0!, {r12}") 50*6ccd8248SMilanka Ringwald print("") 51af03003cSMatthias Ringwald 52af03003cSMatthias Ringwald #### rest of initial block, with moving accumulator registers 53af03003cSMatthias Ringwald acc = [9, 10, 11, 12, 14] 54af03003cSMatthias Ringwald if init_size == 3: 55af03003cSMatthias Ringwald emit("mov r%s, #0", acc[2]) 56*6ccd8248SMilanka Ringwald for i in range(0, 3): 57af03003cSMatthias Ringwald emit("umull r%s, r%s, r%s, r%s", acc[3], acc[4], rx[i], ry[2 - i]) 58af03003cSMatthias Ringwald emit("adds r%s, r%s", acc[0], acc[3]) 59af03003cSMatthias Ringwald emit("adcs r%s, r%s", acc[1], acc[4]) 60af03003cSMatthias Ringwald emit("adc r%s, #0", acc[2]) 61af03003cSMatthias Ringwald emit("stmia r0!, {r%s}", acc[0]) 62*6ccd8248SMilanka Ringwald print("") 63af03003cSMatthias Ringwald acc = acc[1:] + acc[:1] 64af03003cSMatthias Ringwald 65af03003cSMatthias Ringwald emit("mov r%s, #0", acc[2]) 66*6ccd8248SMilanka Ringwald for i in range(0, 2): 67af03003cSMatthias Ringwald emit("umull r%s, r%s, r%s, r%s", acc[3], acc[4], rx[i + 1], ry[2 - i]) 68af03003cSMatthias Ringwald emit("adds r%s, r%s", acc[0], acc[3]) 69af03003cSMatthias Ringwald emit("adcs r%s, r%s", acc[1], acc[4]) 70af03003cSMatthias Ringwald emit("adc r%s, #0", acc[2]) 71af03003cSMatthias Ringwald emit("stmia r0!, {r%s}", acc[0]) 72*6ccd8248SMilanka Ringwald print("") 73af03003cSMatthias Ringwald acc = acc[1:] + acc[:1] 74af03003cSMatthias Ringwald 75af03003cSMatthias Ringwald emit("umull r%s, r%s, r%s, r%s", acc[3], acc[4], rx[init_size-1], ry[init_size-1]) 76af03003cSMatthias Ringwald emit("adds r%s, r%s", acc[0], acc[3]) 77af03003cSMatthias Ringwald emit("adc r%s, r%s", acc[1], acc[4]) 78af03003cSMatthias Ringwald emit("stmia r0!, {r%s}", acc[0]) 79af03003cSMatthias Ringwald emit("stmia r0!, {r%s}", acc[1]) 80*6ccd8248SMilanka Ringwaldprint("") 81af03003cSMatthias Ringwald 82af03003cSMatthias Ringwald#### reset y and z pointers 83af03003cSMatthias Ringwaldemit("sub r0, %s", (2 * init_size + 3) * 4) 84af03003cSMatthias Ringwaldemit("sub r2, %s", (init_size + 3) * 4) 85af03003cSMatthias Ringwald 86af03003cSMatthias Ringwald#### load y registers 87*6ccd8248SMilanka Ringwaldemit("ldmia r2!, {%s}", ", ".join(["r%s" % (ry[i]) for i in range(3)])) 88af03003cSMatthias Ringwald 89af03003cSMatthias Ringwald#### load additional x registers 90af03003cSMatthias Ringwaldif init_size != 3: 91*6ccd8248SMilanka Ringwald emit("ldmia r1!, {%s}", ", ".join(["r%s" % (rx[i]) for i in range(init_size, 3)])) 92*6ccd8248SMilanka Ringwaldprint("") 93af03003cSMatthias Ringwald 94af03003cSMatthias Ringwaldprev_size = init_size 95*6ccd8248SMilanka Ringwaldfor row in range(full_rows): 96af03003cSMatthias Ringwald emit("umull r11, r12, r3, r6") 97af03003cSMatthias Ringwald emit("stmia r0!, {r11}") 98*6ccd8248SMilanka Ringwald print("") 99af03003cSMatthias Ringwald emit("mov r10, #0") 100af03003cSMatthias Ringwald emit("umull r11, r9, r3, r7") 101af03003cSMatthias Ringwald emit("adds r12, r11") 102af03003cSMatthias Ringwald emit("adc r9, #0") 103af03003cSMatthias Ringwald emit("umull r11, r14, r4, r6") 104af03003cSMatthias Ringwald emit("adds r12, r11") 105af03003cSMatthias Ringwald emit("adcs r9, r14") 106af03003cSMatthias Ringwald emit("adc r10, #0") 107af03003cSMatthias Ringwald emit("stmia r0!, {r12}") 108*6ccd8248SMilanka Ringwald print("") 109af03003cSMatthias Ringwald 110af03003cSMatthias Ringwald acc = [9, 10, 11, 12, 14] 111af03003cSMatthias Ringwald emit("mov r%s, #0", acc[2]) 112*6ccd8248SMilanka Ringwald for i in range(0, 3): 113af03003cSMatthias Ringwald emit("umull r%s, r%s, r%s, r%s", acc[3], acc[4], rx[i], ry[2 - i]) 114af03003cSMatthias Ringwald emit("adds r%s, r%s", acc[0], acc[3]) 115af03003cSMatthias Ringwald emit("adcs r%s, r%s", acc[1], acc[4]) 116af03003cSMatthias Ringwald emit("adc r%s, #0", acc[2]) 117af03003cSMatthias Ringwald emit("stmia r0!, {r%s}", acc[0]) 118*6ccd8248SMilanka Ringwald print("") 119af03003cSMatthias Ringwald acc = acc[1:] + acc[:1] 120af03003cSMatthias Ringwald 121af03003cSMatthias Ringwald #### now we need to start shifting x and loading from z 122af03003cSMatthias Ringwald x_regs = [3, 4, 5] 123*6ccd8248SMilanka Ringwald for r in range(0, prev_size): 124af03003cSMatthias Ringwald x_regs = x_regs[1:] + x_regs[:1] 125af03003cSMatthias Ringwald emit("ldmia r1!, {r%s}", x_regs[2]) 126af03003cSMatthias Ringwald emit("mov r%s, #0", acc[2]) 127*6ccd8248SMilanka Ringwald for i in range(0, 3): 128af03003cSMatthias Ringwald emit("umull r%s, r%s, r%s, r%s", acc[3], acc[4], x_regs[i], ry[2 - i]) 129af03003cSMatthias Ringwald emit("adds r%s, r%s", acc[0], acc[3]) 130af03003cSMatthias Ringwald emit("adcs r%s, r%s", acc[1], acc[4]) 131af03003cSMatthias Ringwald emit("adc r%s, #0", acc[2]) 132af03003cSMatthias Ringwald emit("ldr r%s, [r0]", acc[3]) # load stored value from initial block, and add to accumulator 133af03003cSMatthias Ringwald emit("adds r%s, r%s", acc[0], acc[3]) 134af03003cSMatthias Ringwald emit("adcs r%s, #0", acc[1]) 135af03003cSMatthias Ringwald emit("adc r%s, #0", acc[2]) 136af03003cSMatthias Ringwald emit("stmia r0!, {r%s}", acc[0]) 137*6ccd8248SMilanka Ringwald print("") 138af03003cSMatthias Ringwald acc = acc[1:] + acc[:1] 139af03003cSMatthias Ringwald 140af03003cSMatthias Ringwald # done shifting x, start shifting y 141af03003cSMatthias Ringwald y_regs = [6, 7, 8] 142*6ccd8248SMilanka Ringwald for r in range(0, prev_size): 143af03003cSMatthias Ringwald y_regs = y_regs[1:] + y_regs[:1] 144af03003cSMatthias Ringwald emit("ldmia r2!, {r%s}", y_regs[2]) 145af03003cSMatthias Ringwald emit("mov r%s, #0", acc[2]) 146*6ccd8248SMilanka Ringwald for i in range(0, 3): 147af03003cSMatthias Ringwald emit("umull r%s, r%s, r%s, r%s", acc[3], acc[4], x_regs[i], y_regs[2 - i]) 148af03003cSMatthias Ringwald emit("adds r%s, r%s", acc[0], acc[3]) 149af03003cSMatthias Ringwald emit("adcs r%s, r%s", acc[1], acc[4]) 150af03003cSMatthias Ringwald emit("adc r%s, #0", acc[2]) 151af03003cSMatthias Ringwald emit("ldr r%s, [r0]", acc[3]) # load stored value from initial block, and add to accumulator 152af03003cSMatthias Ringwald emit("adds r%s, r%s", acc[0], acc[3]) 153af03003cSMatthias Ringwald emit("adcs r%s, #0", acc[1]) 154af03003cSMatthias Ringwald emit("adc r%s, #0", acc[2]) 155af03003cSMatthias Ringwald emit("stmia r0!, {r%s}", acc[0]) 156*6ccd8248SMilanka Ringwald print("") 157af03003cSMatthias Ringwald acc = acc[1:] + acc[:1] 158af03003cSMatthias Ringwald 159af03003cSMatthias Ringwald # done both shifts, do remaining corner 160af03003cSMatthias Ringwald emit("mov r%s, #0", acc[2]) 161*6ccd8248SMilanka Ringwald for i in range(0, 2): 162af03003cSMatthias Ringwald emit("umull r%s, r%s, r%s, r%s", acc[3], acc[4], x_regs[i + 1], y_regs[2 - i]) 163af03003cSMatthias Ringwald emit("adds r%s, r%s", acc[0], acc[3]) 164af03003cSMatthias Ringwald emit("adcs r%s, r%s", acc[1], acc[4]) 165af03003cSMatthias Ringwald emit("adc r%s, #0", acc[2]) 166af03003cSMatthias Ringwald emit("stmia r0!, {r%s}", acc[0]) 167*6ccd8248SMilanka Ringwald print("") 168af03003cSMatthias Ringwald acc = acc[1:] + acc[:1] 169af03003cSMatthias Ringwald 170af03003cSMatthias Ringwald emit("umull r%s, r%s, r%s, r%s", acc[3], acc[4], x_regs[2], y_regs[2]) 171af03003cSMatthias Ringwald emit("adds r%s, r%s", acc[0], acc[3]) 172af03003cSMatthias Ringwald emit("adc r%s, r%s", acc[1], acc[4]) 173af03003cSMatthias Ringwald emit("stmia r0!, {r%s}", acc[0]) 174af03003cSMatthias Ringwald emit("stmia r0!, {r%s}", acc[1]) 175*6ccd8248SMilanka Ringwald print("") 176af03003cSMatthias Ringwald 177af03003cSMatthias Ringwald prev_size = prev_size + 3 178af03003cSMatthias Ringwald if row < full_rows - 1: 179af03003cSMatthias Ringwald #### reset x, y and z pointers 180af03003cSMatthias Ringwald emit("sub r0, %s", (2 * prev_size + 3) * 4) 181af03003cSMatthias Ringwald emit("sub r1, %s", prev_size * 4) 182af03003cSMatthias Ringwald emit("sub r2, %s", (prev_size + 3) * 4) 183af03003cSMatthias Ringwald 184af03003cSMatthias Ringwald #### load x and y registers 185*6ccd8248SMilanka Ringwald emit("ldmia r1!, {%s}", ",".join(["r%s" % (rx[i]) for i in range(3)])) 186*6ccd8248SMilanka Ringwald emit("ldmia r2!, {%s}", ",".join(["r%s" % (ry[i]) for i in range(3)])) 187af03003cSMatthias Ringwald 188*6ccd8248SMilanka Ringwald print("") 189