xref: /btstack/3rd-party/micro-ecc/scripts/mult_arm.py (revision af03003c8ac55cf0eea9563b597879b24aee256f)
1*af03003cSMatthias Ringwald#!/usr/bin/env python
2*af03003cSMatthias Ringwald
3*af03003cSMatthias Ringwaldimport sys
4*af03003cSMatthias Ringwald
5*af03003cSMatthias Ringwaldif len(sys.argv) < 2:
6*af03003cSMatthias Ringwald    print "Provide the integer size in 32-bit words"
7*af03003cSMatthias Ringwald    sys.exit(1)
8*af03003cSMatthias Ringwald
9*af03003cSMatthias Ringwaldsize = int(sys.argv[1])
10*af03003cSMatthias Ringwald
11*af03003cSMatthias Ringwaldfull_rows = size // 3
12*af03003cSMatthias Ringwaldinit_size = size % 3
13*af03003cSMatthias Ringwald
14*af03003cSMatthias Ringwaldif init_size == 0:
15*af03003cSMatthias Ringwald    full_rows = full_rows - 1
16*af03003cSMatthias Ringwald    init_size = 3
17*af03003cSMatthias Ringwald
18*af03003cSMatthias Ringwalddef emit(line, *args):
19*af03003cSMatthias Ringwald    s = '"' + line + r' \n\t"'
20*af03003cSMatthias Ringwald    print s % args
21*af03003cSMatthias Ringwald
22*af03003cSMatthias Ringwaldrx = [3, 4, 5]
23*af03003cSMatthias Ringwaldry = [6, 7, 8]
24*af03003cSMatthias Ringwald
25*af03003cSMatthias Ringwald#### set up registers
26*af03003cSMatthias Ringwaldemit("add r0, %s", (size - init_size) * 4) # move z
27*af03003cSMatthias Ringwaldemit("add r2, %s", (size - init_size) * 4) # move y
28*af03003cSMatthias Ringwald
29*af03003cSMatthias Ringwaldemit("ldmia r1!, {%s}", ", ".join(["r%s" % (rx[i]) for i in xrange(init_size)]))
30*af03003cSMatthias Ringwaldemit("ldmia r2!, {%s}", ", ".join(["r%s" % (ry[i]) for i in xrange(init_size)]))
31*af03003cSMatthias Ringwald
32*af03003cSMatthias Ringwaldprint ""
33*af03003cSMatthias Ringwaldif init_size == 1:
34*af03003cSMatthias Ringwald    emit("umull r9, r10, r3, r6")
35*af03003cSMatthias Ringwald    emit("stmia r0!, {r9, r10}")
36*af03003cSMatthias Ringwaldelse:
37*af03003cSMatthias Ringwald    #### first two multiplications of initial block
38*af03003cSMatthias Ringwald    emit("umull r11, r12, r3, r6")
39*af03003cSMatthias Ringwald    emit("stmia r0!, {r11}")
40*af03003cSMatthias Ringwald    print ""
41*af03003cSMatthias Ringwald    emit("mov r10, #0")
42*af03003cSMatthias Ringwald    emit("umull r11, r9, r3, r7")
43*af03003cSMatthias Ringwald    emit("adds r12, r11")
44*af03003cSMatthias Ringwald    emit("adc r9, #0")
45*af03003cSMatthias Ringwald    emit("umull r11, r14, r4, r6")
46*af03003cSMatthias Ringwald    emit("adds r12, r11")
47*af03003cSMatthias Ringwald    emit("adcs r9, r14")
48*af03003cSMatthias Ringwald    emit("adc r10, #0")
49*af03003cSMatthias Ringwald    emit("stmia r0!, {r12}")
50*af03003cSMatthias Ringwald    print ""
51*af03003cSMatthias Ringwald
52*af03003cSMatthias Ringwald    #### rest of initial block, with moving accumulator registers
53*af03003cSMatthias Ringwald    acc = [9, 10, 11, 12, 14]
54*af03003cSMatthias Ringwald    if init_size == 3:
55*af03003cSMatthias Ringwald        emit("mov r%s, #0", acc[2])
56*af03003cSMatthias Ringwald        for i in xrange(0, 3):
57*af03003cSMatthias Ringwald            emit("umull r%s, r%s, r%s, r%s", acc[3], acc[4], rx[i], ry[2 - i])
58*af03003cSMatthias Ringwald            emit("adds r%s, r%s", acc[0], acc[3])
59*af03003cSMatthias Ringwald            emit("adcs r%s, r%s", acc[1], acc[4])
60*af03003cSMatthias Ringwald            emit("adc r%s, #0", acc[2])
61*af03003cSMatthias Ringwald        emit("stmia r0!, {r%s}", acc[0])
62*af03003cSMatthias Ringwald        print ""
63*af03003cSMatthias Ringwald        acc = acc[1:] + acc[:1]
64*af03003cSMatthias Ringwald
65*af03003cSMatthias Ringwald        emit("mov r%s, #0", acc[2])
66*af03003cSMatthias Ringwald        for i in xrange(0, 2):
67*af03003cSMatthias Ringwald            emit("umull r%s, r%s, r%s, r%s", acc[3], acc[4], rx[i + 1], ry[2 - i])
68*af03003cSMatthias Ringwald            emit("adds r%s, r%s", acc[0], acc[3])
69*af03003cSMatthias Ringwald            emit("adcs r%s, r%s", acc[1], acc[4])
70*af03003cSMatthias Ringwald            emit("adc r%s, #0", acc[2])
71*af03003cSMatthias Ringwald        emit("stmia r0!, {r%s}", acc[0])
72*af03003cSMatthias Ringwald        print ""
73*af03003cSMatthias Ringwald        acc = acc[1:] + acc[:1]
74*af03003cSMatthias Ringwald
75*af03003cSMatthias Ringwald    emit("umull r%s, r%s, r%s, r%s", acc[3], acc[4], rx[init_size-1], ry[init_size-1])
76*af03003cSMatthias Ringwald    emit("adds r%s, r%s", acc[0], acc[3])
77*af03003cSMatthias Ringwald    emit("adc r%s, r%s", acc[1], acc[4])
78*af03003cSMatthias Ringwald    emit("stmia r0!, {r%s}", acc[0])
79*af03003cSMatthias Ringwald    emit("stmia r0!, {r%s}", acc[1])
80*af03003cSMatthias Ringwaldprint ""
81*af03003cSMatthias Ringwald
82*af03003cSMatthias Ringwald#### reset y and z pointers
83*af03003cSMatthias Ringwaldemit("sub r0, %s", (2 * init_size + 3) * 4)
84*af03003cSMatthias Ringwaldemit("sub r2, %s", (init_size + 3) * 4)
85*af03003cSMatthias Ringwald
86*af03003cSMatthias Ringwald#### load y registers
87*af03003cSMatthias Ringwaldemit("ldmia r2!, {%s}", ", ".join(["r%s" % (ry[i]) for i in xrange(3)]))
88*af03003cSMatthias Ringwald
89*af03003cSMatthias Ringwald#### load additional x registers
90*af03003cSMatthias Ringwaldif init_size != 3:
91*af03003cSMatthias Ringwald    emit("ldmia r1!, {%s}", ", ".join(["r%s" % (rx[i]) for i in xrange(init_size, 3)]))
92*af03003cSMatthias Ringwaldprint ""
93*af03003cSMatthias Ringwald
94*af03003cSMatthias Ringwaldprev_size = init_size
95*af03003cSMatthias Ringwaldfor row in xrange(full_rows):
96*af03003cSMatthias Ringwald    emit("umull r11, r12, r3, r6")
97*af03003cSMatthias Ringwald    emit("stmia r0!, {r11}")
98*af03003cSMatthias Ringwald    print ""
99*af03003cSMatthias Ringwald    emit("mov r10, #0")
100*af03003cSMatthias Ringwald    emit("umull r11, r9, r3, r7")
101*af03003cSMatthias Ringwald    emit("adds r12, r11")
102*af03003cSMatthias Ringwald    emit("adc r9, #0")
103*af03003cSMatthias Ringwald    emit("umull r11, r14, r4, r6")
104*af03003cSMatthias Ringwald    emit("adds r12, r11")
105*af03003cSMatthias Ringwald    emit("adcs r9, r14")
106*af03003cSMatthias Ringwald    emit("adc r10, #0")
107*af03003cSMatthias Ringwald    emit("stmia r0!, {r12}")
108*af03003cSMatthias Ringwald    print ""
109*af03003cSMatthias Ringwald
110*af03003cSMatthias Ringwald    acc = [9, 10, 11, 12, 14]
111*af03003cSMatthias Ringwald    emit("mov r%s, #0", acc[2])
112*af03003cSMatthias Ringwald    for i in xrange(0, 3):
113*af03003cSMatthias Ringwald        emit("umull r%s, r%s, r%s, r%s", acc[3], acc[4], rx[i], ry[2 - i])
114*af03003cSMatthias Ringwald        emit("adds r%s, r%s", acc[0], acc[3])
115*af03003cSMatthias Ringwald        emit("adcs r%s, r%s", acc[1], acc[4])
116*af03003cSMatthias Ringwald        emit("adc r%s, #0", acc[2])
117*af03003cSMatthias Ringwald    emit("stmia r0!, {r%s}", acc[0])
118*af03003cSMatthias Ringwald    print ""
119*af03003cSMatthias Ringwald    acc = acc[1:] + acc[:1]
120*af03003cSMatthias Ringwald
121*af03003cSMatthias Ringwald    #### now we need to start shifting x and loading from z
122*af03003cSMatthias Ringwald    x_regs = [3, 4, 5]
123*af03003cSMatthias Ringwald    for r in xrange(0, prev_size):
124*af03003cSMatthias Ringwald        x_regs = x_regs[1:] + x_regs[:1]
125*af03003cSMatthias Ringwald        emit("ldmia r1!, {r%s}", x_regs[2])
126*af03003cSMatthias Ringwald        emit("mov r%s, #0", acc[2])
127*af03003cSMatthias Ringwald        for i in xrange(0, 3):
128*af03003cSMatthias Ringwald            emit("umull r%s, r%s, r%s, r%s", acc[3], acc[4], x_regs[i], ry[2 - i])
129*af03003cSMatthias Ringwald            emit("adds r%s, r%s", acc[0], acc[3])
130*af03003cSMatthias Ringwald            emit("adcs r%s, r%s", acc[1], acc[4])
131*af03003cSMatthias Ringwald            emit("adc r%s, #0", acc[2])
132*af03003cSMatthias Ringwald        emit("ldr r%s, [r0]", acc[3]) # load stored value from initial block, and add to accumulator
133*af03003cSMatthias Ringwald        emit("adds r%s, r%s", acc[0], acc[3])
134*af03003cSMatthias Ringwald        emit("adcs r%s, #0", acc[1])
135*af03003cSMatthias Ringwald        emit("adc r%s, #0", acc[2])
136*af03003cSMatthias Ringwald        emit("stmia r0!, {r%s}", acc[0])
137*af03003cSMatthias Ringwald        print ""
138*af03003cSMatthias Ringwald        acc = acc[1:] + acc[:1]
139*af03003cSMatthias Ringwald
140*af03003cSMatthias Ringwald    # done shifting x, start shifting y
141*af03003cSMatthias Ringwald    y_regs = [6, 7, 8]
142*af03003cSMatthias Ringwald    for r in xrange(0, prev_size):
143*af03003cSMatthias Ringwald        y_regs = y_regs[1:] + y_regs[:1]
144*af03003cSMatthias Ringwald        emit("ldmia r2!, {r%s}", y_regs[2])
145*af03003cSMatthias Ringwald        emit("mov r%s, #0", acc[2])
146*af03003cSMatthias Ringwald        for i in xrange(0, 3):
147*af03003cSMatthias Ringwald            emit("umull r%s, r%s, r%s, r%s", acc[3], acc[4], x_regs[i], y_regs[2 - i])
148*af03003cSMatthias Ringwald            emit("adds r%s, r%s", acc[0], acc[3])
149*af03003cSMatthias Ringwald            emit("adcs r%s, r%s", acc[1], acc[4])
150*af03003cSMatthias Ringwald            emit("adc r%s, #0", acc[2])
151*af03003cSMatthias Ringwald        emit("ldr r%s, [r0]", acc[3]) # load stored value from initial block, and add to accumulator
152*af03003cSMatthias Ringwald        emit("adds r%s, r%s", acc[0], acc[3])
153*af03003cSMatthias Ringwald        emit("adcs r%s, #0", acc[1])
154*af03003cSMatthias Ringwald        emit("adc r%s, #0", acc[2])
155*af03003cSMatthias Ringwald        emit("stmia r0!, {r%s}", acc[0])
156*af03003cSMatthias Ringwald        print ""
157*af03003cSMatthias Ringwald        acc = acc[1:] + acc[:1]
158*af03003cSMatthias Ringwald
159*af03003cSMatthias Ringwald    # done both shifts, do remaining corner
160*af03003cSMatthias Ringwald    emit("mov r%s, #0", acc[2])
161*af03003cSMatthias Ringwald    for i in xrange(0, 2):
162*af03003cSMatthias Ringwald        emit("umull r%s, r%s, r%s, r%s", acc[3], acc[4], x_regs[i + 1], y_regs[2 - i])
163*af03003cSMatthias Ringwald        emit("adds r%s, r%s", acc[0], acc[3])
164*af03003cSMatthias Ringwald        emit("adcs r%s, r%s", acc[1], acc[4])
165*af03003cSMatthias Ringwald        emit("adc r%s, #0", acc[2])
166*af03003cSMatthias Ringwald    emit("stmia r0!, {r%s}", acc[0])
167*af03003cSMatthias Ringwald    print ""
168*af03003cSMatthias Ringwald    acc = acc[1:] + acc[:1]
169*af03003cSMatthias Ringwald
170*af03003cSMatthias Ringwald    emit("umull r%s, r%s, r%s, r%s", acc[3], acc[4], x_regs[2], y_regs[2])
171*af03003cSMatthias Ringwald    emit("adds r%s, r%s", acc[0], acc[3])
172*af03003cSMatthias Ringwald    emit("adc r%s, r%s", acc[1], acc[4])
173*af03003cSMatthias Ringwald    emit("stmia r0!, {r%s}", acc[0])
174*af03003cSMatthias Ringwald    emit("stmia r0!, {r%s}", acc[1])
175*af03003cSMatthias Ringwald    print ""
176*af03003cSMatthias Ringwald
177*af03003cSMatthias Ringwald    prev_size = prev_size + 3
178*af03003cSMatthias Ringwald    if row < full_rows - 1:
179*af03003cSMatthias Ringwald        #### reset x, y and z pointers
180*af03003cSMatthias Ringwald        emit("sub r0, %s", (2 * prev_size + 3) * 4)
181*af03003cSMatthias Ringwald        emit("sub r1, %s", prev_size * 4)
182*af03003cSMatthias Ringwald        emit("sub r2, %s", (prev_size + 3) * 4)
183*af03003cSMatthias Ringwald
184*af03003cSMatthias Ringwald        #### load x and y registers
185*af03003cSMatthias Ringwald        emit("ldmia r1!, {%s}", ",".join(["r%s" % (rx[i]) for i in xrange(3)]))
186*af03003cSMatthias Ringwald        emit("ldmia r2!, {%s}", ",".join(["r%s" % (ry[i]) for i in xrange(3)]))
187*af03003cSMatthias Ringwald
188*af03003cSMatthias Ringwald        print ""
189