xref: /btstack/3rd-party/micro-ecc/scripts/mult_avr.py (revision af03003c8ac55cf0eea9563b597879b24aee256f)
1*af03003cSMatthias Ringwald#!/usr/bin/env python
2*af03003cSMatthias Ringwald
3*af03003cSMatthias Ringwaldimport sys
4*af03003cSMatthias Ringwald
5*af03003cSMatthias Ringwaldif len(sys.argv) < 2:
6*af03003cSMatthias Ringwald    print "Provide the integer size in bytes"
7*af03003cSMatthias Ringwald    sys.exit(1)
8*af03003cSMatthias Ringwald
9*af03003cSMatthias Ringwaldsize = int(sys.argv[1])
10*af03003cSMatthias Ringwald
11*af03003cSMatthias Ringwaldfull_rows = size // 10
12*af03003cSMatthias Ringwaldinit_size = size % 10
13*af03003cSMatthias Ringwald
14*af03003cSMatthias Ringwaldif init_size == 0:
15*af03003cSMatthias Ringwald    full_rows = full_rows - 1
16*af03003cSMatthias Ringwald    init_size = 10
17*af03003cSMatthias Ringwald
18*af03003cSMatthias Ringwalddef rx(i):
19*af03003cSMatthias Ringwald    return i + 2
20*af03003cSMatthias Ringwald
21*af03003cSMatthias Ringwalddef ry(i):
22*af03003cSMatthias Ringwald    return i + 12
23*af03003cSMatthias Ringwald
24*af03003cSMatthias Ringwalddef emit(line, *args):
25*af03003cSMatthias Ringwald    s = '"' + line + r' \n\t"'
26*af03003cSMatthias Ringwald    print s % args
27*af03003cSMatthias Ringwald
28*af03003cSMatthias Ringwald#### set up registers
29*af03003cSMatthias Ringwaldemit("adiw r30, %s", size - init_size) # move z
30*af03003cSMatthias Ringwaldemit("adiw r28, %s", size - init_size) # move y
31*af03003cSMatthias Ringwald
32*af03003cSMatthias Ringwaldfor i in xrange(init_size):
33*af03003cSMatthias Ringwald    emit("ld r%s, x+", rx(i))
34*af03003cSMatthias Ringwaldfor i in xrange(init_size):
35*af03003cSMatthias Ringwald    emit("ld r%s, y+", ry(i))
36*af03003cSMatthias Ringwald
37*af03003cSMatthias Ringwaldemit("ldi r25, 0")
38*af03003cSMatthias Ringwaldprint ""
39*af03003cSMatthias Ringwaldif init_size == 1:
40*af03003cSMatthias Ringwald    emit("mul r2, r12")
41*af03003cSMatthias Ringwald    emit("st z+, r0")
42*af03003cSMatthias Ringwald    emit("st z+, r1")
43*af03003cSMatthias Ringwaldelse:
44*af03003cSMatthias Ringwald    #### first two multiplications of initial block
45*af03003cSMatthias Ringwald    emit("ldi r23, 0")
46*af03003cSMatthias Ringwald    emit("mul r2, r12")
47*af03003cSMatthias Ringwald    emit("st z+, r0")
48*af03003cSMatthias Ringwald    emit("mov r22, r1")
49*af03003cSMatthias Ringwald    print ""
50*af03003cSMatthias Ringwald    emit("ldi r24, 0")
51*af03003cSMatthias Ringwald    emit("mul r2, r13")
52*af03003cSMatthias Ringwald    emit("add r22, r0")
53*af03003cSMatthias Ringwald    emit("adc r23, r1")
54*af03003cSMatthias Ringwald    emit("mul r3, r12")
55*af03003cSMatthias Ringwald    emit("add r22, r0")
56*af03003cSMatthias Ringwald    emit("adc r23, r1")
57*af03003cSMatthias Ringwald    emit("adc r24, r25")
58*af03003cSMatthias Ringwald    emit("st z+, r22")
59*af03003cSMatthias Ringwald    print ""
60*af03003cSMatthias Ringwald
61*af03003cSMatthias Ringwald    #### rest of initial block, with moving accumulator registers
62*af03003cSMatthias Ringwald    acc = [23, 24, 22]
63*af03003cSMatthias Ringwald    for r in xrange(2, init_size):
64*af03003cSMatthias Ringwald        emit("ldi r%s, 0", acc[2])
65*af03003cSMatthias Ringwald        for i in xrange(0, r+1):
66*af03003cSMatthias Ringwald            emit("mul r%s, r%s", rx(i), ry(r - i))
67*af03003cSMatthias Ringwald            emit("add r%s, r0", acc[0])
68*af03003cSMatthias Ringwald            emit("adc r%s, r1", acc[1])
69*af03003cSMatthias Ringwald            emit("adc r%s, r25", acc[2])
70*af03003cSMatthias Ringwald        emit("st z+, r%s", acc[0])
71*af03003cSMatthias Ringwald        print ""
72*af03003cSMatthias Ringwald        acc = acc[1:] + acc[:1]
73*af03003cSMatthias Ringwald    for r in xrange(1, init_size-1):
74*af03003cSMatthias Ringwald        emit("ldi r%s, 0", acc[2])
75*af03003cSMatthias Ringwald        for i in xrange(0, init_size-r):
76*af03003cSMatthias Ringwald            emit("mul r%s, r%s", rx(r+i), ry((init_size-1) - i))
77*af03003cSMatthias Ringwald            emit("add r%s, r0", acc[0])
78*af03003cSMatthias Ringwald            emit("adc r%s, r1", acc[1])
79*af03003cSMatthias Ringwald            emit("adc r%s, r25", acc[2])
80*af03003cSMatthias Ringwald        emit("st z+, r%s", acc[0])
81*af03003cSMatthias Ringwald        print ""
82*af03003cSMatthias Ringwald        acc = acc[1:] + acc[:1]
83*af03003cSMatthias Ringwald    emit("mul r%s, r%s", rx(init_size-1), ry(init_size-1))
84*af03003cSMatthias Ringwald    emit("add r%s, r0", acc[0])
85*af03003cSMatthias Ringwald    emit("adc r%s, r1", acc[1])
86*af03003cSMatthias Ringwald    emit("st z+, r%s", acc[0])
87*af03003cSMatthias Ringwald    emit("st z+, r%s", acc[1])
88*af03003cSMatthias Ringwaldprint ""
89*af03003cSMatthias Ringwald
90*af03003cSMatthias Ringwald#### reset y and z pointers
91*af03003cSMatthias Ringwaldemit("sbiw r30, %s", 2 * init_size + 10)
92*af03003cSMatthias Ringwaldemit("sbiw r28, %s", init_size + 10)
93*af03003cSMatthias Ringwald
94*af03003cSMatthias Ringwald#### load y registers
95*af03003cSMatthias Ringwaldfor i in xrange(10):
96*af03003cSMatthias Ringwald    emit("ld r%s, y+", ry(i))
97*af03003cSMatthias Ringwald
98*af03003cSMatthias Ringwald#### load additional x registers
99*af03003cSMatthias Ringwaldfor i in xrange(init_size, 10):
100*af03003cSMatthias Ringwald    emit("ld r%s, x+", rx(i))
101*af03003cSMatthias Ringwaldprint ""
102*af03003cSMatthias Ringwald
103*af03003cSMatthias Ringwaldprev_size = init_size
104*af03003cSMatthias Ringwaldfor row in xrange(full_rows):
105*af03003cSMatthias Ringwald    #### do x = 0-9, y = 0-9 multiplications
106*af03003cSMatthias Ringwald    emit("ldi r23, 0")
107*af03003cSMatthias Ringwald    emit("mul r2, r12")
108*af03003cSMatthias Ringwald    emit("st z+, r0")
109*af03003cSMatthias Ringwald    emit("mov r22, r1")
110*af03003cSMatthias Ringwald    print ""
111*af03003cSMatthias Ringwald    emit("ldi r24, 0")
112*af03003cSMatthias Ringwald    emit("mul r2, r13")
113*af03003cSMatthias Ringwald    emit("add r22, r0")
114*af03003cSMatthias Ringwald    emit("adc r23, r1")
115*af03003cSMatthias Ringwald    emit("mul r3, r12")
116*af03003cSMatthias Ringwald    emit("add r22, r0")
117*af03003cSMatthias Ringwald    emit("adc r23, r1")
118*af03003cSMatthias Ringwald    emit("adc r24, r25")
119*af03003cSMatthias Ringwald    emit("st z+, r22")
120*af03003cSMatthias Ringwald    print ""
121*af03003cSMatthias Ringwald
122*af03003cSMatthias Ringwald    acc = [23, 24, 22]
123*af03003cSMatthias Ringwald    for r in xrange(2, 10):
124*af03003cSMatthias Ringwald        emit("ldi r%s, 0", acc[2])
125*af03003cSMatthias Ringwald        for i in xrange(0, r+1):
126*af03003cSMatthias Ringwald            emit("mul r%s, r%s", rx(i), ry(r - i))
127*af03003cSMatthias Ringwald            emit("add r%s, r0", acc[0])
128*af03003cSMatthias Ringwald            emit("adc r%s, r1", acc[1])
129*af03003cSMatthias Ringwald            emit("adc r%s, r25", acc[2])
130*af03003cSMatthias Ringwald        emit("st z+, r%s", acc[0])
131*af03003cSMatthias Ringwald        print ""
132*af03003cSMatthias Ringwald        acc = acc[1:] + acc[:1]
133*af03003cSMatthias Ringwald
134*af03003cSMatthias Ringwald    #### now we need to start shifting x and loading from z
135*af03003cSMatthias Ringwald    x_regs = [2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
136*af03003cSMatthias Ringwald    for r in xrange(0, prev_size):
137*af03003cSMatthias Ringwald        x_regs = x_regs[1:] + x_regs[:1]
138*af03003cSMatthias Ringwald        emit("ld r%s, x+", x_regs[9]) # load next byte of left
139*af03003cSMatthias Ringwald        emit("ldi r%s, 0", acc[2])
140*af03003cSMatthias Ringwald        for i in xrange(0, 10):
141*af03003cSMatthias Ringwald            emit("mul r%s, r%s", x_regs[i], ry(9 - i))
142*af03003cSMatthias Ringwald            emit("add r%s, r0", acc[0])
143*af03003cSMatthias Ringwald            emit("adc r%s, r1", acc[1])
144*af03003cSMatthias Ringwald            emit("adc r%s, r25", acc[2])
145*af03003cSMatthias Ringwald        emit("ld r0, z") # load stored value from initial block, and add to accumulator (note z does not increment)
146*af03003cSMatthias Ringwald        emit("add r%s, r0", acc[0])
147*af03003cSMatthias Ringwald        emit("adc r%s, r25", acc[1])
148*af03003cSMatthias Ringwald        emit("adc r%s, r25", acc[2])
149*af03003cSMatthias Ringwald        emit("st z+, r%s", acc[0]) # store next byte (z increments)
150*af03003cSMatthias Ringwald        print ""
151*af03003cSMatthias Ringwald        acc = acc[1:] + acc[:1]
152*af03003cSMatthias Ringwald
153*af03003cSMatthias Ringwald    # done shifting x, start shifting y
154*af03003cSMatthias Ringwald    y_regs = [12, 13, 14, 15, 16, 17, 18, 19, 20, 21]
155*af03003cSMatthias Ringwald    for r in xrange(0, prev_size):
156*af03003cSMatthias Ringwald        y_regs = y_regs[1:] + y_regs[:1]
157*af03003cSMatthias Ringwald        emit("ld r%s, y+", y_regs[9]) # load next byte of right
158*af03003cSMatthias Ringwald        emit("ldi r%s, 0", acc[2])
159*af03003cSMatthias Ringwald        for i in xrange(0, 10):
160*af03003cSMatthias Ringwald            emit("mul r%s, r%s", x_regs[i], y_regs[9 -i])
161*af03003cSMatthias Ringwald            emit("add r%s, r0", acc[0])
162*af03003cSMatthias Ringwald            emit("adc r%s, r1", acc[1])
163*af03003cSMatthias Ringwald            emit("adc r%s, r25", acc[2])
164*af03003cSMatthias Ringwald        emit("ld r0, z") # load stored value from initial block, and add to accumulator (note z does not increment)
165*af03003cSMatthias Ringwald        emit("add r%s, r0", acc[0])
166*af03003cSMatthias Ringwald        emit("adc r%s, r25", acc[1])
167*af03003cSMatthias Ringwald        emit("adc r%s, r25", acc[2])
168*af03003cSMatthias Ringwald        emit("st z+, r%s", acc[0]) # store next byte (z increments)
169*af03003cSMatthias Ringwald        print ""
170*af03003cSMatthias Ringwald        acc = acc[1:] + acc[:1]
171*af03003cSMatthias Ringwald
172*af03003cSMatthias Ringwald    # done both shifts, do remaining corner
173*af03003cSMatthias Ringwald    for r in xrange(1, 9):
174*af03003cSMatthias Ringwald        emit("ldi r%s, 0", acc[2])
175*af03003cSMatthias Ringwald        for i in xrange(0, 10-r):
176*af03003cSMatthias Ringwald            emit("mul r%s, r%s", x_regs[r+i], y_regs[9 - i])
177*af03003cSMatthias Ringwald            emit("add r%s, r0", acc[0])
178*af03003cSMatthias Ringwald            emit("adc r%s, r1", acc[1])
179*af03003cSMatthias Ringwald            emit("adc r%s, r25", acc[2])
180*af03003cSMatthias Ringwald        emit("st z+, r%s", acc[0])
181*af03003cSMatthias Ringwald        print ""
182*af03003cSMatthias Ringwald        acc = acc[1:] + acc[:1]
183*af03003cSMatthias Ringwald    emit("mul r%s, r%s", x_regs[9], y_regs[9])
184*af03003cSMatthias Ringwald    emit("add r%s, r0", acc[0])
185*af03003cSMatthias Ringwald    emit("adc r%s, r1", acc[1])
186*af03003cSMatthias Ringwald    emit("st z+, r%s", acc[0])
187*af03003cSMatthias Ringwald    emit("st z+, r%s", acc[1])
188*af03003cSMatthias Ringwald    print ""
189*af03003cSMatthias Ringwald
190*af03003cSMatthias Ringwald    prev_size = prev_size + 10
191*af03003cSMatthias Ringwald    if row < full_rows - 1:
192*af03003cSMatthias Ringwald        #### reset x, y and z pointers
193*af03003cSMatthias Ringwald        emit("sbiw r30, %s", 2 * prev_size + 10)
194*af03003cSMatthias Ringwald        emit("sbiw r28, %s", prev_size + 10)
195*af03003cSMatthias Ringwald        emit("sbiw r26, %s", prev_size)
196*af03003cSMatthias Ringwald
197*af03003cSMatthias Ringwald        #### load x and y registers
198*af03003cSMatthias Ringwald        for i in xrange(10):
199*af03003cSMatthias Ringwald            emit("ld r%s, x+", rx(i))
200*af03003cSMatthias Ringwald            emit("ld r%s, y+", ry(i))
201*af03003cSMatthias Ringwald        print ""
202*af03003cSMatthias Ringwald
203*af03003cSMatthias Ringwaldemit("eor r1, r1")
204