1*af03003cSMatthias Ringwald#!/usr/bin/env python 2*af03003cSMatthias Ringwald 3*af03003cSMatthias Ringwaldimport sys 4*af03003cSMatthias Ringwald 5*af03003cSMatthias Ringwaldif len(sys.argv) < 2: 6*af03003cSMatthias Ringwald print "Provide the integer size in bytes" 7*af03003cSMatthias Ringwald sys.exit(1) 8*af03003cSMatthias Ringwald 9*af03003cSMatthias Ringwaldsize = int(sys.argv[1]) 10*af03003cSMatthias Ringwald 11*af03003cSMatthias Ringwaldfull_rows = size // 10 12*af03003cSMatthias Ringwaldinit_size = size % 10 13*af03003cSMatthias Ringwald 14*af03003cSMatthias Ringwaldif init_size == 0: 15*af03003cSMatthias Ringwald full_rows = full_rows - 1 16*af03003cSMatthias Ringwald init_size = 10 17*af03003cSMatthias Ringwald 18*af03003cSMatthias Ringwalddef rx(i): 19*af03003cSMatthias Ringwald return i + 2 20*af03003cSMatthias Ringwald 21*af03003cSMatthias Ringwalddef ry(i): 22*af03003cSMatthias Ringwald return i + 12 23*af03003cSMatthias Ringwald 24*af03003cSMatthias Ringwalddef emit(line, *args): 25*af03003cSMatthias Ringwald s = '"' + line + r' \n\t"' 26*af03003cSMatthias Ringwald print s % args 27*af03003cSMatthias Ringwald 28*af03003cSMatthias Ringwald#### set up registers 29*af03003cSMatthias Ringwaldemit("adiw r30, %s", size - init_size) # move z 30*af03003cSMatthias Ringwaldemit("adiw r28, %s", size - init_size) # move y 31*af03003cSMatthias Ringwald 32*af03003cSMatthias Ringwaldfor i in xrange(init_size): 33*af03003cSMatthias Ringwald emit("ld r%s, x+", rx(i)) 34*af03003cSMatthias Ringwaldfor i in xrange(init_size): 35*af03003cSMatthias Ringwald emit("ld r%s, y+", ry(i)) 36*af03003cSMatthias Ringwald 37*af03003cSMatthias Ringwaldemit("ldi r25, 0") 38*af03003cSMatthias Ringwaldprint "" 39*af03003cSMatthias Ringwaldif init_size == 1: 40*af03003cSMatthias Ringwald emit("mul r2, r12") 41*af03003cSMatthias Ringwald emit("st z+, r0") 42*af03003cSMatthias Ringwald emit("st z+, r1") 43*af03003cSMatthias Ringwaldelse: 44*af03003cSMatthias Ringwald #### first two multiplications of initial block 45*af03003cSMatthias Ringwald emit("ldi r23, 0") 46*af03003cSMatthias Ringwald emit("mul r2, r12") 47*af03003cSMatthias Ringwald emit("st z+, r0") 48*af03003cSMatthias Ringwald emit("mov r22, r1") 49*af03003cSMatthias Ringwald print "" 50*af03003cSMatthias Ringwald emit("ldi r24, 0") 51*af03003cSMatthias Ringwald emit("mul r2, r13") 52*af03003cSMatthias Ringwald emit("add r22, r0") 53*af03003cSMatthias Ringwald emit("adc r23, r1") 54*af03003cSMatthias Ringwald emit("mul r3, r12") 55*af03003cSMatthias Ringwald emit("add r22, r0") 56*af03003cSMatthias Ringwald emit("adc r23, r1") 57*af03003cSMatthias Ringwald emit("adc r24, r25") 58*af03003cSMatthias Ringwald emit("st z+, r22") 59*af03003cSMatthias Ringwald print "" 60*af03003cSMatthias Ringwald 61*af03003cSMatthias Ringwald #### rest of initial block, with moving accumulator registers 62*af03003cSMatthias Ringwald acc = [23, 24, 22] 63*af03003cSMatthias Ringwald for r in xrange(2, init_size): 64*af03003cSMatthias Ringwald emit("ldi r%s, 0", acc[2]) 65*af03003cSMatthias Ringwald for i in xrange(0, r+1): 66*af03003cSMatthias Ringwald emit("mul r%s, r%s", rx(i), ry(r - i)) 67*af03003cSMatthias Ringwald emit("add r%s, r0", acc[0]) 68*af03003cSMatthias Ringwald emit("adc r%s, r1", acc[1]) 69*af03003cSMatthias Ringwald emit("adc r%s, r25", acc[2]) 70*af03003cSMatthias Ringwald emit("st z+, r%s", acc[0]) 71*af03003cSMatthias Ringwald print "" 72*af03003cSMatthias Ringwald acc = acc[1:] + acc[:1] 73*af03003cSMatthias Ringwald for r in xrange(1, init_size-1): 74*af03003cSMatthias Ringwald emit("ldi r%s, 0", acc[2]) 75*af03003cSMatthias Ringwald for i in xrange(0, init_size-r): 76*af03003cSMatthias Ringwald emit("mul r%s, r%s", rx(r+i), ry((init_size-1) - i)) 77*af03003cSMatthias Ringwald emit("add r%s, r0", acc[0]) 78*af03003cSMatthias Ringwald emit("adc r%s, r1", acc[1]) 79*af03003cSMatthias Ringwald emit("adc r%s, r25", acc[2]) 80*af03003cSMatthias Ringwald emit("st z+, r%s", acc[0]) 81*af03003cSMatthias Ringwald print "" 82*af03003cSMatthias Ringwald acc = acc[1:] + acc[:1] 83*af03003cSMatthias Ringwald emit("mul r%s, r%s", rx(init_size-1), ry(init_size-1)) 84*af03003cSMatthias Ringwald emit("add r%s, r0", acc[0]) 85*af03003cSMatthias Ringwald emit("adc r%s, r1", acc[1]) 86*af03003cSMatthias Ringwald emit("st z+, r%s", acc[0]) 87*af03003cSMatthias Ringwald emit("st z+, r%s", acc[1]) 88*af03003cSMatthias Ringwaldprint "" 89*af03003cSMatthias Ringwald 90*af03003cSMatthias Ringwald#### reset y and z pointers 91*af03003cSMatthias Ringwaldemit("sbiw r30, %s", 2 * init_size + 10) 92*af03003cSMatthias Ringwaldemit("sbiw r28, %s", init_size + 10) 93*af03003cSMatthias Ringwald 94*af03003cSMatthias Ringwald#### load y registers 95*af03003cSMatthias Ringwaldfor i in xrange(10): 96*af03003cSMatthias Ringwald emit("ld r%s, y+", ry(i)) 97*af03003cSMatthias Ringwald 98*af03003cSMatthias Ringwald#### load additional x registers 99*af03003cSMatthias Ringwaldfor i in xrange(init_size, 10): 100*af03003cSMatthias Ringwald emit("ld r%s, x+", rx(i)) 101*af03003cSMatthias Ringwaldprint "" 102*af03003cSMatthias Ringwald 103*af03003cSMatthias Ringwaldprev_size = init_size 104*af03003cSMatthias Ringwaldfor row in xrange(full_rows): 105*af03003cSMatthias Ringwald #### do x = 0-9, y = 0-9 multiplications 106*af03003cSMatthias Ringwald emit("ldi r23, 0") 107*af03003cSMatthias Ringwald emit("mul r2, r12") 108*af03003cSMatthias Ringwald emit("st z+, r0") 109*af03003cSMatthias Ringwald emit("mov r22, r1") 110*af03003cSMatthias Ringwald print "" 111*af03003cSMatthias Ringwald emit("ldi r24, 0") 112*af03003cSMatthias Ringwald emit("mul r2, r13") 113*af03003cSMatthias Ringwald emit("add r22, r0") 114*af03003cSMatthias Ringwald emit("adc r23, r1") 115*af03003cSMatthias Ringwald emit("mul r3, r12") 116*af03003cSMatthias Ringwald emit("add r22, r0") 117*af03003cSMatthias Ringwald emit("adc r23, r1") 118*af03003cSMatthias Ringwald emit("adc r24, r25") 119*af03003cSMatthias Ringwald emit("st z+, r22") 120*af03003cSMatthias Ringwald print "" 121*af03003cSMatthias Ringwald 122*af03003cSMatthias Ringwald acc = [23, 24, 22] 123*af03003cSMatthias Ringwald for r in xrange(2, 10): 124*af03003cSMatthias Ringwald emit("ldi r%s, 0", acc[2]) 125*af03003cSMatthias Ringwald for i in xrange(0, r+1): 126*af03003cSMatthias Ringwald emit("mul r%s, r%s", rx(i), ry(r - i)) 127*af03003cSMatthias Ringwald emit("add r%s, r0", acc[0]) 128*af03003cSMatthias Ringwald emit("adc r%s, r1", acc[1]) 129*af03003cSMatthias Ringwald emit("adc r%s, r25", acc[2]) 130*af03003cSMatthias Ringwald emit("st z+, r%s", acc[0]) 131*af03003cSMatthias Ringwald print "" 132*af03003cSMatthias Ringwald acc = acc[1:] + acc[:1] 133*af03003cSMatthias Ringwald 134*af03003cSMatthias Ringwald #### now we need to start shifting x and loading from z 135*af03003cSMatthias Ringwald x_regs = [2, 3, 4, 5, 6, 7, 8, 9, 10, 11] 136*af03003cSMatthias Ringwald for r in xrange(0, prev_size): 137*af03003cSMatthias Ringwald x_regs = x_regs[1:] + x_regs[:1] 138*af03003cSMatthias Ringwald emit("ld r%s, x+", x_regs[9]) # load next byte of left 139*af03003cSMatthias Ringwald emit("ldi r%s, 0", acc[2]) 140*af03003cSMatthias Ringwald for i in xrange(0, 10): 141*af03003cSMatthias Ringwald emit("mul r%s, r%s", x_regs[i], ry(9 - i)) 142*af03003cSMatthias Ringwald emit("add r%s, r0", acc[0]) 143*af03003cSMatthias Ringwald emit("adc r%s, r1", acc[1]) 144*af03003cSMatthias Ringwald emit("adc r%s, r25", acc[2]) 145*af03003cSMatthias Ringwald emit("ld r0, z") # load stored value from initial block, and add to accumulator (note z does not increment) 146*af03003cSMatthias Ringwald emit("add r%s, r0", acc[0]) 147*af03003cSMatthias Ringwald emit("adc r%s, r25", acc[1]) 148*af03003cSMatthias Ringwald emit("adc r%s, r25", acc[2]) 149*af03003cSMatthias Ringwald emit("st z+, r%s", acc[0]) # store next byte (z increments) 150*af03003cSMatthias Ringwald print "" 151*af03003cSMatthias Ringwald acc = acc[1:] + acc[:1] 152*af03003cSMatthias Ringwald 153*af03003cSMatthias Ringwald # done shifting x, start shifting y 154*af03003cSMatthias Ringwald y_regs = [12, 13, 14, 15, 16, 17, 18, 19, 20, 21] 155*af03003cSMatthias Ringwald for r in xrange(0, prev_size): 156*af03003cSMatthias Ringwald y_regs = y_regs[1:] + y_regs[:1] 157*af03003cSMatthias Ringwald emit("ld r%s, y+", y_regs[9]) # load next byte of right 158*af03003cSMatthias Ringwald emit("ldi r%s, 0", acc[2]) 159*af03003cSMatthias Ringwald for i in xrange(0, 10): 160*af03003cSMatthias Ringwald emit("mul r%s, r%s", x_regs[i], y_regs[9 -i]) 161*af03003cSMatthias Ringwald emit("add r%s, r0", acc[0]) 162*af03003cSMatthias Ringwald emit("adc r%s, r1", acc[1]) 163*af03003cSMatthias Ringwald emit("adc r%s, r25", acc[2]) 164*af03003cSMatthias Ringwald emit("ld r0, z") # load stored value from initial block, and add to accumulator (note z does not increment) 165*af03003cSMatthias Ringwald emit("add r%s, r0", acc[0]) 166*af03003cSMatthias Ringwald emit("adc r%s, r25", acc[1]) 167*af03003cSMatthias Ringwald emit("adc r%s, r25", acc[2]) 168*af03003cSMatthias Ringwald emit("st z+, r%s", acc[0]) # store next byte (z increments) 169*af03003cSMatthias Ringwald print "" 170*af03003cSMatthias Ringwald acc = acc[1:] + acc[:1] 171*af03003cSMatthias Ringwald 172*af03003cSMatthias Ringwald # done both shifts, do remaining corner 173*af03003cSMatthias Ringwald for r in xrange(1, 9): 174*af03003cSMatthias Ringwald emit("ldi r%s, 0", acc[2]) 175*af03003cSMatthias Ringwald for i in xrange(0, 10-r): 176*af03003cSMatthias Ringwald emit("mul r%s, r%s", x_regs[r+i], y_regs[9 - i]) 177*af03003cSMatthias Ringwald emit("add r%s, r0", acc[0]) 178*af03003cSMatthias Ringwald emit("adc r%s, r1", acc[1]) 179*af03003cSMatthias Ringwald emit("adc r%s, r25", acc[2]) 180*af03003cSMatthias Ringwald emit("st z+, r%s", acc[0]) 181*af03003cSMatthias Ringwald print "" 182*af03003cSMatthias Ringwald acc = acc[1:] + acc[:1] 183*af03003cSMatthias Ringwald emit("mul r%s, r%s", x_regs[9], y_regs[9]) 184*af03003cSMatthias Ringwald emit("add r%s, r0", acc[0]) 185*af03003cSMatthias Ringwald emit("adc r%s, r1", acc[1]) 186*af03003cSMatthias Ringwald emit("st z+, r%s", acc[0]) 187*af03003cSMatthias Ringwald emit("st z+, r%s", acc[1]) 188*af03003cSMatthias Ringwald print "" 189*af03003cSMatthias Ringwald 190*af03003cSMatthias Ringwald prev_size = prev_size + 10 191*af03003cSMatthias Ringwald if row < full_rows - 1: 192*af03003cSMatthias Ringwald #### reset x, y and z pointers 193*af03003cSMatthias Ringwald emit("sbiw r30, %s", 2 * prev_size + 10) 194*af03003cSMatthias Ringwald emit("sbiw r28, %s", prev_size + 10) 195*af03003cSMatthias Ringwald emit("sbiw r26, %s", prev_size) 196*af03003cSMatthias Ringwald 197*af03003cSMatthias Ringwald #### load x and y registers 198*af03003cSMatthias Ringwald for i in xrange(10): 199*af03003cSMatthias Ringwald emit("ld r%s, x+", rx(i)) 200*af03003cSMatthias Ringwald emit("ld r%s, y+", ry(i)) 201*af03003cSMatthias Ringwald print "" 202*af03003cSMatthias Ringwald 203*af03003cSMatthias Ringwaldemit("eor r1, r1") 204