xref: /btstack/3rd-party/micro-ecc/scripts/mult_arm.py (revision 6ccd8248590f666db07dd7add13fecb4f5664fb5)
1*6ccd8248SMilanka Ringwald#!/usr/bin/env python3
2af03003cSMatthias Ringwald
3af03003cSMatthias Ringwaldimport sys
4af03003cSMatthias Ringwald
5af03003cSMatthias Ringwaldif len(sys.argv) < 2:
6*6ccd8248SMilanka Ringwald    print("Provide the integer size in 32-bit words")
7af03003cSMatthias Ringwald    sys.exit(1)
8af03003cSMatthias Ringwald
9af03003cSMatthias Ringwaldsize = int(sys.argv[1])
10af03003cSMatthias Ringwald
11af03003cSMatthias Ringwaldfull_rows = size // 3
12af03003cSMatthias Ringwaldinit_size = size % 3
13af03003cSMatthias Ringwald
14af03003cSMatthias Ringwaldif init_size == 0:
15af03003cSMatthias Ringwald    full_rows = full_rows - 1
16af03003cSMatthias Ringwald    init_size = 3
17af03003cSMatthias Ringwald
18af03003cSMatthias Ringwalddef emit(line, *args):
19af03003cSMatthias Ringwald    s = '"' + line + r' \n\t"'
20*6ccd8248SMilanka Ringwald    print(s % args)
21af03003cSMatthias Ringwald
22af03003cSMatthias Ringwaldrx = [3, 4, 5]
23af03003cSMatthias Ringwaldry = [6, 7, 8]
24af03003cSMatthias Ringwald
25af03003cSMatthias Ringwald#### set up registers
26af03003cSMatthias Ringwaldemit("add r0, %s", (size - init_size) * 4) # move z
27af03003cSMatthias Ringwaldemit("add r2, %s", (size - init_size) * 4) # move y
28af03003cSMatthias Ringwald
29*6ccd8248SMilanka Ringwaldemit("ldmia r1!, {%s}", ", ".join(["r%s" % (rx[i]) for i in range(init_size)]))
30*6ccd8248SMilanka Ringwaldemit("ldmia r2!, {%s}", ", ".join(["r%s" % (ry[i]) for i in range(init_size)]))
31af03003cSMatthias Ringwald
32*6ccd8248SMilanka Ringwaldprint("")
33af03003cSMatthias Ringwaldif init_size == 1:
34af03003cSMatthias Ringwald    emit("umull r9, r10, r3, r6")
35af03003cSMatthias Ringwald    emit("stmia r0!, {r9, r10}")
36af03003cSMatthias Ringwaldelse:
37af03003cSMatthias Ringwald    #### first two multiplications of initial block
38af03003cSMatthias Ringwald    emit("umull r11, r12, r3, r6")
39af03003cSMatthias Ringwald    emit("stmia r0!, {r11}")
40*6ccd8248SMilanka Ringwald    print("")
41af03003cSMatthias Ringwald    emit("mov r10, #0")
42af03003cSMatthias Ringwald    emit("umull r11, r9, r3, r7")
43af03003cSMatthias Ringwald    emit("adds r12, r11")
44af03003cSMatthias Ringwald    emit("adc r9, #0")
45af03003cSMatthias Ringwald    emit("umull r11, r14, r4, r6")
46af03003cSMatthias Ringwald    emit("adds r12, r11")
47af03003cSMatthias Ringwald    emit("adcs r9, r14")
48af03003cSMatthias Ringwald    emit("adc r10, #0")
49af03003cSMatthias Ringwald    emit("stmia r0!, {r12}")
50*6ccd8248SMilanka Ringwald    print("")
51af03003cSMatthias Ringwald
52af03003cSMatthias Ringwald    #### rest of initial block, with moving accumulator registers
53af03003cSMatthias Ringwald    acc = [9, 10, 11, 12, 14]
54af03003cSMatthias Ringwald    if init_size == 3:
55af03003cSMatthias Ringwald        emit("mov r%s, #0", acc[2])
56*6ccd8248SMilanka Ringwald        for i in range(0, 3):
57af03003cSMatthias Ringwald            emit("umull r%s, r%s, r%s, r%s", acc[3], acc[4], rx[i], ry[2 - i])
58af03003cSMatthias Ringwald            emit("adds r%s, r%s", acc[0], acc[3])
59af03003cSMatthias Ringwald            emit("adcs r%s, r%s", acc[1], acc[4])
60af03003cSMatthias Ringwald            emit("adc r%s, #0", acc[2])
61af03003cSMatthias Ringwald        emit("stmia r0!, {r%s}", acc[0])
62*6ccd8248SMilanka Ringwald        print("")
63af03003cSMatthias Ringwald        acc = acc[1:] + acc[:1]
64af03003cSMatthias Ringwald
65af03003cSMatthias Ringwald        emit("mov r%s, #0", acc[2])
66*6ccd8248SMilanka Ringwald        for i in range(0, 2):
67af03003cSMatthias Ringwald            emit("umull r%s, r%s, r%s, r%s", acc[3], acc[4], rx[i + 1], ry[2 - i])
68af03003cSMatthias Ringwald            emit("adds r%s, r%s", acc[0], acc[3])
69af03003cSMatthias Ringwald            emit("adcs r%s, r%s", acc[1], acc[4])
70af03003cSMatthias Ringwald            emit("adc r%s, #0", acc[2])
71af03003cSMatthias Ringwald        emit("stmia r0!, {r%s}", acc[0])
72*6ccd8248SMilanka Ringwald        print("")
73af03003cSMatthias Ringwald        acc = acc[1:] + acc[:1]
74af03003cSMatthias Ringwald
75af03003cSMatthias Ringwald    emit("umull r%s, r%s, r%s, r%s", acc[3], acc[4], rx[init_size-1], ry[init_size-1])
76af03003cSMatthias Ringwald    emit("adds r%s, r%s", acc[0], acc[3])
77af03003cSMatthias Ringwald    emit("adc r%s, r%s", acc[1], acc[4])
78af03003cSMatthias Ringwald    emit("stmia r0!, {r%s}", acc[0])
79af03003cSMatthias Ringwald    emit("stmia r0!, {r%s}", acc[1])
80*6ccd8248SMilanka Ringwaldprint("")
81af03003cSMatthias Ringwald
82af03003cSMatthias Ringwald#### reset y and z pointers
83af03003cSMatthias Ringwaldemit("sub r0, %s", (2 * init_size + 3) * 4)
84af03003cSMatthias Ringwaldemit("sub r2, %s", (init_size + 3) * 4)
85af03003cSMatthias Ringwald
86af03003cSMatthias Ringwald#### load y registers
87*6ccd8248SMilanka Ringwaldemit("ldmia r2!, {%s}", ", ".join(["r%s" % (ry[i]) for i in range(3)]))
88af03003cSMatthias Ringwald
89af03003cSMatthias Ringwald#### load additional x registers
90af03003cSMatthias Ringwaldif init_size != 3:
91*6ccd8248SMilanka Ringwald    emit("ldmia r1!, {%s}", ", ".join(["r%s" % (rx[i]) for i in range(init_size, 3)]))
92*6ccd8248SMilanka Ringwaldprint("")
93af03003cSMatthias Ringwald
94af03003cSMatthias Ringwaldprev_size = init_size
95*6ccd8248SMilanka Ringwaldfor row in range(full_rows):
96af03003cSMatthias Ringwald    emit("umull r11, r12, r3, r6")
97af03003cSMatthias Ringwald    emit("stmia r0!, {r11}")
98*6ccd8248SMilanka Ringwald    print("")
99af03003cSMatthias Ringwald    emit("mov r10, #0")
100af03003cSMatthias Ringwald    emit("umull r11, r9, r3, r7")
101af03003cSMatthias Ringwald    emit("adds r12, r11")
102af03003cSMatthias Ringwald    emit("adc r9, #0")
103af03003cSMatthias Ringwald    emit("umull r11, r14, r4, r6")
104af03003cSMatthias Ringwald    emit("adds r12, r11")
105af03003cSMatthias Ringwald    emit("adcs r9, r14")
106af03003cSMatthias Ringwald    emit("adc r10, #0")
107af03003cSMatthias Ringwald    emit("stmia r0!, {r12}")
108*6ccd8248SMilanka Ringwald    print("")
109af03003cSMatthias Ringwald
110af03003cSMatthias Ringwald    acc = [9, 10, 11, 12, 14]
111af03003cSMatthias Ringwald    emit("mov r%s, #0", acc[2])
112*6ccd8248SMilanka Ringwald    for i in range(0, 3):
113af03003cSMatthias Ringwald        emit("umull r%s, r%s, r%s, r%s", acc[3], acc[4], rx[i], ry[2 - i])
114af03003cSMatthias Ringwald        emit("adds r%s, r%s", acc[0], acc[3])
115af03003cSMatthias Ringwald        emit("adcs r%s, r%s", acc[1], acc[4])
116af03003cSMatthias Ringwald        emit("adc r%s, #0", acc[2])
117af03003cSMatthias Ringwald    emit("stmia r0!, {r%s}", acc[0])
118*6ccd8248SMilanka Ringwald    print("")
119af03003cSMatthias Ringwald    acc = acc[1:] + acc[:1]
120af03003cSMatthias Ringwald
121af03003cSMatthias Ringwald    #### now we need to start shifting x and loading from z
122af03003cSMatthias Ringwald    x_regs = [3, 4, 5]
123*6ccd8248SMilanka Ringwald    for r in range(0, prev_size):
124af03003cSMatthias Ringwald        x_regs = x_regs[1:] + x_regs[:1]
125af03003cSMatthias Ringwald        emit("ldmia r1!, {r%s}", x_regs[2])
126af03003cSMatthias Ringwald        emit("mov r%s, #0", acc[2])
127*6ccd8248SMilanka Ringwald        for i in range(0, 3):
128af03003cSMatthias Ringwald            emit("umull r%s, r%s, r%s, r%s", acc[3], acc[4], x_regs[i], ry[2 - i])
129af03003cSMatthias Ringwald            emit("adds r%s, r%s", acc[0], acc[3])
130af03003cSMatthias Ringwald            emit("adcs r%s, r%s", acc[1], acc[4])
131af03003cSMatthias Ringwald            emit("adc r%s, #0", acc[2])
132af03003cSMatthias Ringwald        emit("ldr r%s, [r0]", acc[3]) # load stored value from initial block, and add to accumulator
133af03003cSMatthias Ringwald        emit("adds r%s, r%s", acc[0], acc[3])
134af03003cSMatthias Ringwald        emit("adcs r%s, #0", acc[1])
135af03003cSMatthias Ringwald        emit("adc r%s, #0", acc[2])
136af03003cSMatthias Ringwald        emit("stmia r0!, {r%s}", acc[0])
137*6ccd8248SMilanka Ringwald        print("")
138af03003cSMatthias Ringwald        acc = acc[1:] + acc[:1]
139af03003cSMatthias Ringwald
140af03003cSMatthias Ringwald    # done shifting x, start shifting y
141af03003cSMatthias Ringwald    y_regs = [6, 7, 8]
142*6ccd8248SMilanka Ringwald    for r in range(0, prev_size):
143af03003cSMatthias Ringwald        y_regs = y_regs[1:] + y_regs[:1]
144af03003cSMatthias Ringwald        emit("ldmia r2!, {r%s}", y_regs[2])
145af03003cSMatthias Ringwald        emit("mov r%s, #0", acc[2])
146*6ccd8248SMilanka Ringwald        for i in range(0, 3):
147af03003cSMatthias Ringwald            emit("umull r%s, r%s, r%s, r%s", acc[3], acc[4], x_regs[i], y_regs[2 - i])
148af03003cSMatthias Ringwald            emit("adds r%s, r%s", acc[0], acc[3])
149af03003cSMatthias Ringwald            emit("adcs r%s, r%s", acc[1], acc[4])
150af03003cSMatthias Ringwald            emit("adc r%s, #0", acc[2])
151af03003cSMatthias Ringwald        emit("ldr r%s, [r0]", acc[3]) # load stored value from initial block, and add to accumulator
152af03003cSMatthias Ringwald        emit("adds r%s, r%s", acc[0], acc[3])
153af03003cSMatthias Ringwald        emit("adcs r%s, #0", acc[1])
154af03003cSMatthias Ringwald        emit("adc r%s, #0", acc[2])
155af03003cSMatthias Ringwald        emit("stmia r0!, {r%s}", acc[0])
156*6ccd8248SMilanka Ringwald        print("")
157af03003cSMatthias Ringwald        acc = acc[1:] + acc[:1]
158af03003cSMatthias Ringwald
159af03003cSMatthias Ringwald    # done both shifts, do remaining corner
160af03003cSMatthias Ringwald    emit("mov r%s, #0", acc[2])
161*6ccd8248SMilanka Ringwald    for i in range(0, 2):
162af03003cSMatthias Ringwald        emit("umull r%s, r%s, r%s, r%s", acc[3], acc[4], x_regs[i + 1], y_regs[2 - i])
163af03003cSMatthias Ringwald        emit("adds r%s, r%s", acc[0], acc[3])
164af03003cSMatthias Ringwald        emit("adcs r%s, r%s", acc[1], acc[4])
165af03003cSMatthias Ringwald        emit("adc r%s, #0", acc[2])
166af03003cSMatthias Ringwald    emit("stmia r0!, {r%s}", acc[0])
167*6ccd8248SMilanka Ringwald    print("")
168af03003cSMatthias Ringwald    acc = acc[1:] + acc[:1]
169af03003cSMatthias Ringwald
170af03003cSMatthias Ringwald    emit("umull r%s, r%s, r%s, r%s", acc[3], acc[4], x_regs[2], y_regs[2])
171af03003cSMatthias Ringwald    emit("adds r%s, r%s", acc[0], acc[3])
172af03003cSMatthias Ringwald    emit("adc r%s, r%s", acc[1], acc[4])
173af03003cSMatthias Ringwald    emit("stmia r0!, {r%s}", acc[0])
174af03003cSMatthias Ringwald    emit("stmia r0!, {r%s}", acc[1])
175*6ccd8248SMilanka Ringwald    print("")
176af03003cSMatthias Ringwald
177af03003cSMatthias Ringwald    prev_size = prev_size + 3
178af03003cSMatthias Ringwald    if row < full_rows - 1:
179af03003cSMatthias Ringwald        #### reset x, y and z pointers
180af03003cSMatthias Ringwald        emit("sub r0, %s", (2 * prev_size + 3) * 4)
181af03003cSMatthias Ringwald        emit("sub r1, %s", prev_size * 4)
182af03003cSMatthias Ringwald        emit("sub r2, %s", (prev_size + 3) * 4)
183af03003cSMatthias Ringwald
184af03003cSMatthias Ringwald        #### load x and y registers
185*6ccd8248SMilanka Ringwald        emit("ldmia r1!, {%s}", ",".join(["r%s" % (rx[i]) for i in range(3)]))
186*6ccd8248SMilanka Ringwald        emit("ldmia r2!, {%s}", ",".join(["r%s" % (ry[i]) for i in range(3)]))
187af03003cSMatthias Ringwald
188*6ccd8248SMilanka Ringwald        print("")
189