xref: /btstack/3rd-party/micro-ecc/scripts/square_avr.py (revision 6ccd8248590f666db07dd7add13fecb4f5664fb5)
1*6ccd8248SMilanka Ringwald#!/usr/bin/env python3
2af03003cSMatthias Ringwald
3af03003cSMatthias Ringwaldimport sys
4af03003cSMatthias Ringwald
5af03003cSMatthias Ringwaldif len(sys.argv) < 2:
6*6ccd8248SMilanka Ringwald    print("Provide the integer size in bytes")
7af03003cSMatthias Ringwald    sys.exit(1)
8af03003cSMatthias Ringwald
9af03003cSMatthias Ringwaldsize = int(sys.argv[1])
10af03003cSMatthias Ringwald
11af03003cSMatthias Ringwaldif size > 40:
12*6ccd8248SMilanka Ringwald    print("This script doesn't work with integer size %s due to laziness" % (size))
13af03003cSMatthias Ringwald    sys.exit(1)
14af03003cSMatthias Ringwald
15af03003cSMatthias Ringwaldinit_size = size - 20
16af03003cSMatthias Ringwaldif size < 20:
17af03003cSMatthias Ringwald    init_size = 0
18af03003cSMatthias Ringwald
19af03003cSMatthias Ringwalddef rg(i):
20af03003cSMatthias Ringwald    return i + 2
21af03003cSMatthias Ringwald
22af03003cSMatthias Ringwalddef lo(i):
23af03003cSMatthias Ringwald    return i + 2
24af03003cSMatthias Ringwald
25af03003cSMatthias Ringwalddef hi(i):
26af03003cSMatthias Ringwald    return i + 12
27af03003cSMatthias Ringwald
28af03003cSMatthias Ringwalddef emit(line, *args):
29af03003cSMatthias Ringwald    s = '"' + line + r' \n\t"'
30*6ccd8248SMilanka Ringwald    print(s % args)
31af03003cSMatthias Ringwald
32af03003cSMatthias Ringwald#### set up registers
33af03003cSMatthias Ringwaldzero = "r25"
34af03003cSMatthias Ringwaldemit("ldi %s, 0", zero) # zero register
35af03003cSMatthias Ringwald
36af03003cSMatthias Ringwaldif init_size > 0:
37af03003cSMatthias Ringwald    emit("movw r28, r26") # y = x
38af03003cSMatthias Ringwald    h = (init_size + 1)//2
39af03003cSMatthias Ringwald
40*6ccd8248SMilanka Ringwald    for i in range(h):
41af03003cSMatthias Ringwald        emit("ld r%s, x+", lo(i))
42af03003cSMatthias Ringwald    emit("adiw r28, %s", size - init_size) # move y to other end
43*6ccd8248SMilanka Ringwald    for i in range(h):
44af03003cSMatthias Ringwald        emit("ld r%s, y+", hi(i))
45af03003cSMatthias Ringwald
46af03003cSMatthias Ringwald    emit("adiw r30, %s", size - init_size) # move z
47af03003cSMatthias Ringwald
48af03003cSMatthias Ringwald    if init_size == 1:
49af03003cSMatthias Ringwald        emit("mul %s, %s", lo(0), hi(0))
50af03003cSMatthias Ringwald        emit("st z+, r0")
51af03003cSMatthias Ringwald        emit("st z+, r1")
52af03003cSMatthias Ringwald    else:
53af03003cSMatthias Ringwald        #### first one
54*6ccd8248SMilanka Ringwald        print("")
55af03003cSMatthias Ringwald        emit("ldi r23, 0")
56af03003cSMatthias Ringwald        emit("mul %s, %s", lo(0), hi(0))
57af03003cSMatthias Ringwald        emit("st z+, r0")
58af03003cSMatthias Ringwald        emit("mov r22, r1")
59*6ccd8248SMilanka Ringwald        print("")
60af03003cSMatthias Ringwald
61af03003cSMatthias Ringwald        #### rest of initial block, with moving accumulator registers
62af03003cSMatthias Ringwald        acc = [22, 23, 24]
63*6ccd8248SMilanka Ringwald        for r in range(1, h):
64af03003cSMatthias Ringwald            emit("ldi r%s, 0", acc[2])
65*6ccd8248SMilanka Ringwald            for i in range(0, (r+2)//2):
66af03003cSMatthias Ringwald                emit("mul r%s, r%s", lo(i), hi(r - i))
67af03003cSMatthias Ringwald                emit("add r%s, r0", acc[0])
68af03003cSMatthias Ringwald                emit("adc r%s, r1", acc[1])
69af03003cSMatthias Ringwald                emit("adc r%s, %s", acc[2], zero)
70af03003cSMatthias Ringwald            emit("st z+, r%s", acc[0])
71*6ccd8248SMilanka Ringwald            print("")
72af03003cSMatthias Ringwald            acc = acc[1:] + acc[:1]
73af03003cSMatthias Ringwald
74af03003cSMatthias Ringwald        lo_r = range(2, 2 + h)
75af03003cSMatthias Ringwald        hi_r = range(12, 12 + h)
76af03003cSMatthias Ringwald
77af03003cSMatthias Ringwald        # now we need to start loading more from the high end
78*6ccd8248SMilanka Ringwald        for r in range(h, init_size):
79af03003cSMatthias Ringwald            hi_r = hi_r[1:] + hi_r[:1]
80af03003cSMatthias Ringwald            emit("ld r%s, y+", hi_r[h-1])
81af03003cSMatthias Ringwald
82af03003cSMatthias Ringwald            emit("ldi r%s, 0", acc[2])
83*6ccd8248SMilanka Ringwald            for i in range(0, (r+2)//2):
84af03003cSMatthias Ringwald                emit("mul r%s, r%s", lo(i), hi_r[h - 1 - i])
85af03003cSMatthias Ringwald                emit("add r%s, r0", acc[0])
86af03003cSMatthias Ringwald                emit("adc r%s, r1", acc[1])
87af03003cSMatthias Ringwald                emit("adc r%s, %s", acc[2], zero)
88af03003cSMatthias Ringwald            emit("st z+, r%s", acc[0])
89*6ccd8248SMilanka Ringwald            print("")
90af03003cSMatthias Ringwald            acc = acc[1:] + acc[:1]
91af03003cSMatthias Ringwald
92af03003cSMatthias Ringwald        # loaded all of the high end bytes; now need to start loading the rest of the low end
93*6ccd8248SMilanka Ringwald        for r in range(1, init_size-h):
94af03003cSMatthias Ringwald            lo_r = lo_r[1:] + lo_r[:1]
95af03003cSMatthias Ringwald            emit("ld r%s, x+", lo_r[h-1])
96af03003cSMatthias Ringwald
97af03003cSMatthias Ringwald            emit("ldi r%s, 0", acc[2])
98*6ccd8248SMilanka Ringwald            for i in range(0, (init_size+1 - r)//2):
99af03003cSMatthias Ringwald                emit("mul r%s, r%s", lo_r[i], hi_r[h - 1 - i])
100af03003cSMatthias Ringwald                emit("add r%s, r0", acc[0])
101af03003cSMatthias Ringwald                emit("adc r%s, r1", acc[1])
102af03003cSMatthias Ringwald                emit("adc r%s, %s", acc[2], zero)
103af03003cSMatthias Ringwald            emit("st z+, r%s", acc[0])
104*6ccd8248SMilanka Ringwald            print("")
105af03003cSMatthias Ringwald            acc = acc[1:] + acc[:1]
106af03003cSMatthias Ringwald
107af03003cSMatthias Ringwald        lo_r = lo_r[1:] + lo_r[:1]
108af03003cSMatthias Ringwald        emit("ld r%s, x+", lo_r[h-1])
109af03003cSMatthias Ringwald
110af03003cSMatthias Ringwald        # now we have loaded everything, and we just need to finish the last corner
111*6ccd8248SMilanka Ringwald        for r in range(init_size-h, init_size-1):
112af03003cSMatthias Ringwald            emit("ldi r%s, 0", acc[2])
113*6ccd8248SMilanka Ringwald            for i in range(0, (init_size+1 - r)//2):
114af03003cSMatthias Ringwald                emit("mul r%s, r%s", lo_r[i], hi_r[h - 1 - i])
115af03003cSMatthias Ringwald                emit("add r%s, r0", acc[0])
116af03003cSMatthias Ringwald                emit("adc r%s, r1", acc[1])
117af03003cSMatthias Ringwald                emit("adc r%s, %s", acc[2], zero)
118af03003cSMatthias Ringwald            emit("st z+, r%s", acc[0])
119*6ccd8248SMilanka Ringwald            print("")
120af03003cSMatthias Ringwald            acc = acc[1:] + acc[:1]
121af03003cSMatthias Ringwald            lo_r = lo_r[1:] + lo_r[:1] # make the indexing easy
122af03003cSMatthias Ringwald
123af03003cSMatthias Ringwald        emit("mul r%s, r%s", lo_r[0], hi_r[h - 1])
124af03003cSMatthias Ringwald        emit("add r%s, r0", acc[0])
125af03003cSMatthias Ringwald        emit("adc r%s, r1", acc[1])
126af03003cSMatthias Ringwald        emit("st z+, r%s", acc[0])
127af03003cSMatthias Ringwald        emit("st z+, r%s", acc[1])
128*6ccd8248SMilanka Ringwald    print("")
129af03003cSMatthias Ringwald    emit("sbiw r26, %s", init_size) # reset x
130af03003cSMatthias Ringwald    emit("sbiw r30, %s", size + init_size) # reset z
131af03003cSMatthias Ringwald
132af03003cSMatthias Ringwald# TODO you could do more rows of size 20 here if your integers are larger than 40 bytes
133af03003cSMatthias Ringwald
134af03003cSMatthias Ringwalds = size - init_size
135af03003cSMatthias Ringwald
136*6ccd8248SMilanka Ringwaldfor i in range(s):
137af03003cSMatthias Ringwald    emit("ld r%s, x+", rg(i))
138af03003cSMatthias Ringwald
139af03003cSMatthias Ringwald#### first few columns
140af03003cSMatthias Ringwald# NOTE: this is only valid if size >= 3
141*6ccd8248SMilanka Ringwaldprint("")
142af03003cSMatthias Ringwaldemit("ldi r23, 0")
143af03003cSMatthias Ringwaldemit("mul r%s, r%s", rg(0), rg(0))
144af03003cSMatthias Ringwaldemit("st z+, r0")
145af03003cSMatthias Ringwaldemit("mov r22, r1")
146*6ccd8248SMilanka Ringwaldprint("")
147af03003cSMatthias Ringwaldemit("ldi r24, 0")
148af03003cSMatthias Ringwaldemit("mul r%s, r%s", rg(0), rg(1))
149af03003cSMatthias Ringwaldemit("add r22, r0")
150af03003cSMatthias Ringwaldemit("adc r23, r1")
151af03003cSMatthias Ringwaldemit("adc r24, %s", zero)
152af03003cSMatthias Ringwaldemit("add r22, r0")
153af03003cSMatthias Ringwaldemit("adc r23, r1")
154af03003cSMatthias Ringwaldemit("adc r24, %s", zero)
155af03003cSMatthias Ringwaldemit("st z+, r22")
156*6ccd8248SMilanka Ringwaldprint("")
157af03003cSMatthias Ringwaldemit("ldi r22, 0")
158af03003cSMatthias Ringwaldemit("mul r%s, r%s", rg(0), rg(2))
159af03003cSMatthias Ringwaldemit("add r23, r0")
160af03003cSMatthias Ringwaldemit("adc r24, r1")
161af03003cSMatthias Ringwaldemit("adc r22, %s", zero)
162af03003cSMatthias Ringwaldemit("add r23, r0")
163af03003cSMatthias Ringwaldemit("adc r24, r1")
164af03003cSMatthias Ringwaldemit("adc r22, %s", zero)
165af03003cSMatthias Ringwaldemit("mul r%s, r%s", rg(1), rg(1))
166af03003cSMatthias Ringwaldemit("add r23, r0")
167af03003cSMatthias Ringwaldemit("adc r24, r1")
168af03003cSMatthias Ringwaldemit("adc r22, %s", zero)
169af03003cSMatthias Ringwaldemit("st z+, r23")
170*6ccd8248SMilanka Ringwaldprint("")
171af03003cSMatthias Ringwald
172af03003cSMatthias Ringwaldacc = [23, 24, 22]
173af03003cSMatthias Ringwaldold_acc = [28, 29]
174*6ccd8248SMilanka Ringwaldfor i in range(3, s):
175af03003cSMatthias Ringwald    emit("ldi r%s, 0", old_acc[1])
176af03003cSMatthias Ringwald    tmp = [acc[1], acc[2]]
177af03003cSMatthias Ringwald    acc = [acc[0], old_acc[0], old_acc[1]]
178af03003cSMatthias Ringwald    old_acc = tmp
179af03003cSMatthias Ringwald
180af03003cSMatthias Ringwald    # gather non-equal words
181af03003cSMatthias Ringwald    emit("mul r%s, r%s", rg(0), rg(i))
182af03003cSMatthias Ringwald    emit("mov r%s, r0", acc[0])
183af03003cSMatthias Ringwald    emit("mov r%s, r1", acc[1])
184*6ccd8248SMilanka Ringwald    for j in range(1, (i+1)//2):
185af03003cSMatthias Ringwald        emit("mul r%s, r%s", rg(j), rg(i-j))
186af03003cSMatthias Ringwald        emit("add r%s, r0", acc[0])
187af03003cSMatthias Ringwald        emit("adc r%s, r1", acc[1])
188af03003cSMatthias Ringwald        emit("adc r%s, %s", acc[2], zero)
189af03003cSMatthias Ringwald    # multiply by 2
190af03003cSMatthias Ringwald    emit("lsl r%s", acc[0])
191af03003cSMatthias Ringwald    emit("rol r%s", acc[1])
192af03003cSMatthias Ringwald    emit("rol r%s", acc[2])
193af03003cSMatthias Ringwald
194af03003cSMatthias Ringwald    # add equal word (if any)
195af03003cSMatthias Ringwald    if ((i+1) % 2) != 0:
196af03003cSMatthias Ringwald        emit("mul r%s, r%s", rg(i//2), rg(i//2))
197af03003cSMatthias Ringwald        emit("add r%s, r0", acc[0])
198af03003cSMatthias Ringwald        emit("adc r%s, r1", acc[1])
199af03003cSMatthias Ringwald        emit("adc r%s, %s", acc[2], zero)
200af03003cSMatthias Ringwald
201af03003cSMatthias Ringwald    # add old accumulator
202af03003cSMatthias Ringwald    emit("add r%s, r%s", acc[0], old_acc[0])
203af03003cSMatthias Ringwald    emit("adc r%s, r%s", acc[1], old_acc[1])
204af03003cSMatthias Ringwald    emit("adc r%s, %s", acc[2], zero)
205af03003cSMatthias Ringwald
206af03003cSMatthias Ringwald    # store
207af03003cSMatthias Ringwald    emit("st z+, r%s", acc[0])
208*6ccd8248SMilanka Ringwald    print("")
209af03003cSMatthias Ringwald
210af03003cSMatthias Ringwaldregs = range(2, 22)
211*6ccd8248SMilanka Ringwaldfor i in range(init_size):
212af03003cSMatthias Ringwald    regs = regs[1:] + regs[:1]
213af03003cSMatthias Ringwald    emit("ld r%s, x+", regs[19])
214af03003cSMatthias Ringwald
215af03003cSMatthias Ringwald    for limit in [18, 19]:
216af03003cSMatthias Ringwald        emit("ldi r%s, 0", old_acc[1])
217af03003cSMatthias Ringwald        tmp = [acc[1], acc[2]]
218af03003cSMatthias Ringwald        acc = [acc[0], old_acc[0], old_acc[1]]
219af03003cSMatthias Ringwald        old_acc = tmp
220af03003cSMatthias Ringwald
221af03003cSMatthias Ringwald        # gather non-equal words
222af03003cSMatthias Ringwald        emit("mul r%s, r%s", regs[0], regs[limit])
223af03003cSMatthias Ringwald        emit("mov r%s, r0", acc[0])
224af03003cSMatthias Ringwald        emit("mov r%s, r1", acc[1])
225*6ccd8248SMilanka Ringwald        for j in range(1, (limit+1)//2):
226af03003cSMatthias Ringwald            emit("mul r%s, r%s", regs[j], regs[limit-j])
227af03003cSMatthias Ringwald            emit("add r%s, r0", acc[0])
228af03003cSMatthias Ringwald            emit("adc r%s, r1", acc[1])
229af03003cSMatthias Ringwald            emit("adc r%s, %s", acc[2], zero)
230af03003cSMatthias Ringwald
231af03003cSMatthias Ringwald        emit("ld r0, z") # load stored value from initial block, and add to accumulator (note z does not increment)
232af03003cSMatthias Ringwald        emit("add r%s, r0", acc[0])
233af03003cSMatthias Ringwald        emit("adc r%s, r25", acc[1])
234af03003cSMatthias Ringwald        emit("adc r%s, r25", acc[2])
235af03003cSMatthias Ringwald
236af03003cSMatthias Ringwald        # multiply by 2
237af03003cSMatthias Ringwald        emit("lsl r%s", acc[0])
238af03003cSMatthias Ringwald        emit("rol r%s", acc[1])
239af03003cSMatthias Ringwald        emit("rol r%s", acc[2])
240af03003cSMatthias Ringwald
241af03003cSMatthias Ringwald        # add equal word
242af03003cSMatthias Ringwald        if limit == 18:
243af03003cSMatthias Ringwald            emit("mul r%s, r%s", regs[9], regs[9])
244af03003cSMatthias Ringwald            emit("add r%s, r0", acc[0])
245af03003cSMatthias Ringwald            emit("adc r%s, r1", acc[1])
246af03003cSMatthias Ringwald            emit("adc r%s, %s", acc[2], zero)
247af03003cSMatthias Ringwald
248af03003cSMatthias Ringwald        # add old accumulator
249af03003cSMatthias Ringwald        emit("add r%s, r%s", acc[0], old_acc[0])
250af03003cSMatthias Ringwald        emit("adc r%s, r%s", acc[1], old_acc[1])
251af03003cSMatthias Ringwald        emit("adc r%s, %s", acc[2], zero)
252af03003cSMatthias Ringwald
253af03003cSMatthias Ringwald        # store
254af03003cSMatthias Ringwald        emit("st z+, r%s", acc[0])
255*6ccd8248SMilanka Ringwald        print("")
256af03003cSMatthias Ringwald
257*6ccd8248SMilanka Ringwaldfor i in range(1, s-3):
258af03003cSMatthias Ringwald    emit("ldi r%s, 0", old_acc[1])
259af03003cSMatthias Ringwald    tmp = [acc[1], acc[2]]
260af03003cSMatthias Ringwald    acc = [acc[0], old_acc[0], old_acc[1]]
261af03003cSMatthias Ringwald    old_acc = tmp
262af03003cSMatthias Ringwald
263af03003cSMatthias Ringwald    # gather non-equal words
264af03003cSMatthias Ringwald    emit("mul r%s, r%s", regs[i], regs[s - 1])
265af03003cSMatthias Ringwald    emit("mov r%s, r0", acc[0])
266af03003cSMatthias Ringwald    emit("mov r%s, r1", acc[1])
267*6ccd8248SMilanka Ringwald    for j in range(1, (s-i)//2):
268af03003cSMatthias Ringwald        emit("mul r%s, r%s", regs[i+j], regs[s - 1 - j])
269af03003cSMatthias Ringwald        emit("add r%s, r0", acc[0])
270af03003cSMatthias Ringwald        emit("adc r%s, r1", acc[1])
271af03003cSMatthias Ringwald        emit("adc r%s, %s", acc[2], zero)
272af03003cSMatthias Ringwald    # multiply by 2
273af03003cSMatthias Ringwald    emit("lsl r%s", acc[0])
274af03003cSMatthias Ringwald    emit("rol r%s", acc[1])
275af03003cSMatthias Ringwald    emit("rol r%s", acc[2])
276af03003cSMatthias Ringwald
277af03003cSMatthias Ringwald    # add equal word (if any)
278af03003cSMatthias Ringwald    if ((s-i) % 2) != 0:
279af03003cSMatthias Ringwald        emit("mul r%s, r%s", regs[i + (s-i)//2], regs[i + (s-i)//2])
280af03003cSMatthias Ringwald        emit("add r%s, r0", acc[0])
281af03003cSMatthias Ringwald        emit("adc r%s, r1", acc[1])
282af03003cSMatthias Ringwald        emit("adc r%s, %s", acc[2], zero)
283af03003cSMatthias Ringwald
284af03003cSMatthias Ringwald    # add old accumulator
285af03003cSMatthias Ringwald    emit("add r%s, r%s", acc[0], old_acc[0])
286af03003cSMatthias Ringwald    emit("adc r%s, r%s", acc[1], old_acc[1])
287af03003cSMatthias Ringwald    emit("adc r%s, %s", acc[2], zero)
288af03003cSMatthias Ringwald
289af03003cSMatthias Ringwald    # store
290af03003cSMatthias Ringwald    emit("st z+, r%s", acc[0])
291*6ccd8248SMilanka Ringwald    print("")
292af03003cSMatthias Ringwald
293af03003cSMatthias Ringwaldacc = acc[1:] + acc[:1]
294af03003cSMatthias Ringwaldemit("ldi r%s, 0", acc[2])
295af03003cSMatthias Ringwaldemit("mul r%s, r%s", regs[17], regs[19])
296af03003cSMatthias Ringwaldemit("add r%s, r0", acc[0])
297af03003cSMatthias Ringwaldemit("adc r%s, r1", acc[1])
298af03003cSMatthias Ringwaldemit("adc r%s, %s", acc[2], zero)
299af03003cSMatthias Ringwaldemit("add r%s, r0", acc[0])
300af03003cSMatthias Ringwaldemit("adc r%s, r1", acc[1])
301af03003cSMatthias Ringwaldemit("adc r%s, %s", acc[2], zero)
302af03003cSMatthias Ringwaldemit("mul r%s, r%s", regs[18], regs[18])
303af03003cSMatthias Ringwaldemit("add r%s, r0", acc[0])
304af03003cSMatthias Ringwaldemit("adc r%s, r1", acc[1])
305af03003cSMatthias Ringwaldemit("adc r%s, %s", acc[2], zero)
306af03003cSMatthias Ringwaldemit("st z+, r%s", acc[0])
307*6ccd8248SMilanka Ringwaldprint("")
308af03003cSMatthias Ringwald
309af03003cSMatthias Ringwaldacc = acc[1:] + acc[:1]
310af03003cSMatthias Ringwaldemit("ldi r%s, 0", acc[2])
311af03003cSMatthias Ringwaldemit("mul r%s, r%s", regs[18], regs[19])
312af03003cSMatthias Ringwaldemit("add r%s, r0", acc[0])
313af03003cSMatthias Ringwaldemit("adc r%s, r1", acc[1])
314af03003cSMatthias Ringwaldemit("adc r%s, %s", acc[2], zero)
315af03003cSMatthias Ringwaldemit("add r%s, r0", acc[0])
316af03003cSMatthias Ringwaldemit("adc r%s, r1", acc[1])
317af03003cSMatthias Ringwaldemit("adc r%s, %s", acc[2], zero)
318af03003cSMatthias Ringwaldemit("st z+, r%s", acc[0])
319*6ccd8248SMilanka Ringwaldprint("")
320af03003cSMatthias Ringwald
321af03003cSMatthias Ringwaldemit("mul r%s, r%s", regs[19], regs[19])
322af03003cSMatthias Ringwaldemit("add r%s, r0", acc[1])
323af03003cSMatthias Ringwaldemit("adc r%s, r1", acc[2])
324af03003cSMatthias Ringwaldemit("st z+, r%s", acc[1])
325af03003cSMatthias Ringwald
326af03003cSMatthias Ringwaldemit("st z+, r%s", acc[2])
327af03003cSMatthias Ringwaldemit("eor r1, r1")
328