1// Copyright 2013 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4//
5// ARM version of md5block.go
6
7//go:build !purego
8
9#include "textflag.h"
10
11// Register definitions
12#define Rtable	R0	// Pointer to MD5 constants table
13#define Rdata	R1	// Pointer to data to hash
14#define Ra	R2	// MD5 accumulator
15#define Rb	R3	// MD5 accumulator
16#define Rc	R4	// MD5 accumulator
17#define Rd	R5	// MD5 accumulator
18#define Rc0	R6	// MD5 constant
19#define Rc1	R7	// MD5 constant
20#define Rc2	R8	// MD5 constant
21// r9, r10 are forbidden
22// r11 is OK provided you check the assembler that no synthetic instructions use it
23#define Rc3	R11	// MD5 constant
24#define Rt0	R12	// temporary
25#define Rt1	R14	// temporary
26
27// func block(dig *digest, p []byte)
28// 0(FP) is *digest
29// 4(FP) is p.array (struct Slice)
30// 8(FP) is p.len
31//12(FP) is p.cap
32//
33// Stack frame
34#define p_end	end-4(SP)	// pointer to the end of data
35#define p_data	data-8(SP)	// current data pointer
36#define buf	buffer-(8+4*16)(SP)	//16 words temporary buffer
37		// 3 words at 4..12(R13) for called routine parameters
38
39TEXT	·block(SB), NOSPLIT, $84-16
40	MOVW	p+4(FP), Rdata	// pointer to the data
41	MOVW	p_len+8(FP), Rt0	// number of bytes
42	ADD	Rdata, Rt0
43	MOVW	Rt0, p_end	// pointer to end of data
44
45loop:
46	MOVW	Rdata, p_data	// Save Rdata
47	AND.S	$3, Rdata, Rt0	// TST $3, Rdata not working see issue 5921
48	BEQ	aligned			// aligned detected - skip copy
49
50	// Copy the unaligned source data into the aligned temporary buffer
51	// memmove(to=4(R13), from=8(R13), n=12(R13)) - Corrupts all registers
52	MOVW	$buf, Rtable	// to
53	MOVW	$64, Rc0		// n
54	MOVM.IB	[Rtable,Rdata,Rc0], (R13)
55	BL	runtime·memmove(SB)
56
57	// Point to the local aligned copy of the data
58	MOVW	$buf, Rdata
59
60aligned:
61	// Point to the table of constants
62	// A PC relative add would be cheaper than this
63	MOVWtable(SB), Rtable
64
65	// Load up initial MD5 accumulator
66	MOVW	dig+0(FP), Rc0
67	MOVM.IA (Rc0), [Ra,Rb,Rc,Rd]
68
69// a += (((c^d)&b)^d) + X[index] + const
70// a = a<<shift | a>>(32-shift) + b
71#define ROUND1(Ra, Rb, Rc, Rd, index, shift, Rconst) \
72	EOR	Rc, Rd, Rt0		; \
73	AND	Rb, Rt0			; \
74	EOR	Rd, Rt0			; \
75	MOVW	(index<<2)(Rdata), Rt1	; \
76	ADD	Rt1, Rt0			; \
77	ADD	Rconst, Rt0			; \
78	ADD	Rt0, Ra			; \
79	ADD	Ra@>(32-shift), Rb, Ra	;
80
81	MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
82	ROUND1(Ra, Rb, Rc, Rd,  0,	7, Rc0)
83	ROUND1(Rd, Ra, Rb, Rc,  1, 12, Rc1)
84	ROUND1(Rc, Rd, Ra, Rb,  2, 17, Rc2)
85	ROUND1(Rb, Rc, Rd, Ra,  3, 22, Rc3)
86
87	MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
88	ROUND1(Ra, Rb, Rc, Rd,  4,	7, Rc0)
89	ROUND1(Rd, Ra, Rb, Rc,  5, 12, Rc1)
90	ROUND1(Rc, Rd, Ra, Rb,  6, 17, Rc2)
91	ROUND1(Rb, Rc, Rd, Ra,  7, 22, Rc3)
92
93	MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
94	ROUND1(Ra, Rb, Rc, Rd,  8,	7, Rc0)
95	ROUND1(Rd, Ra, Rb, Rc,  9, 12, Rc1)
96	ROUND1(Rc, Rd, Ra, Rb, 10, 17, Rc2)
97	ROUND1(Rb, Rc, Rd, Ra, 11, 22, Rc3)
98
99	MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
100	ROUND1(Ra, Rb, Rc, Rd, 12,	7, Rc0)
101	ROUND1(Rd, Ra, Rb, Rc, 13, 12, Rc1)
102	ROUND1(Rc, Rd, Ra, Rb, 14, 17, Rc2)
103	ROUND1(Rb, Rc, Rd, Ra, 15, 22, Rc3)
104
105// a += (((b^c)&d)^c) + X[index] + const
106// a = a<<shift | a>>(32-shift) + b
107#define ROUND2(Ra, Rb, Rc, Rd, index, shift, Rconst) \
108	EOR	Rb, Rc, Rt0		; \
109	AND	Rd, Rt0			; \
110	EOR	Rc, Rt0			; \
111	MOVW	(index<<2)(Rdata), Rt1	; \
112	ADD	Rt1, Rt0			; \
113	ADD	Rconst, Rt0			; \
114	ADD	Rt0, Ra			; \
115	ADD	Ra@>(32-shift), Rb, Ra	;
116
117	MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
118	ROUND2(Ra, Rb, Rc, Rd,  1,	5, Rc0)
119	ROUND2(Rd, Ra, Rb, Rc,  6,	9, Rc1)
120	ROUND2(Rc, Rd, Ra, Rb, 11, 14, Rc2)
121	ROUND2(Rb, Rc, Rd, Ra,  0, 20, Rc3)
122
123	MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
124	ROUND2(Ra, Rb, Rc, Rd,  5,	5, Rc0)
125	ROUND2(Rd, Ra, Rb, Rc, 10,	9, Rc1)
126	ROUND2(Rc, Rd, Ra, Rb, 15, 14, Rc2)
127	ROUND2(Rb, Rc, Rd, Ra,  4, 20, Rc3)
128
129	MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
130	ROUND2(Ra, Rb, Rc, Rd,  9,	5, Rc0)
131	ROUND2(Rd, Ra, Rb, Rc, 14,	9, Rc1)
132	ROUND2(Rc, Rd, Ra, Rb,  3, 14, Rc2)
133	ROUND2(Rb, Rc, Rd, Ra,  8, 20, Rc3)
134
135	MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
136	ROUND2(Ra, Rb, Rc, Rd, 13,	5, Rc0)
137	ROUND2(Rd, Ra, Rb, Rc,  2,	9, Rc1)
138	ROUND2(Rc, Rd, Ra, Rb,  7, 14, Rc2)
139	ROUND2(Rb, Rc, Rd, Ra, 12, 20, Rc3)
140
141// a += (b^c^d) + X[index] + const
142// a = a<<shift | a>>(32-shift) + b
143#define ROUND3(Ra, Rb, Rc, Rd, index, shift, Rconst) \
144	EOR	Rb, Rc, Rt0		; \
145	EOR	Rd, Rt0			; \
146	MOVW	(index<<2)(Rdata), Rt1	; \
147	ADD	Rt1, Rt0			; \
148	ADD	Rconst, Rt0			; \
149	ADD	Rt0, Ra			; \
150	ADD	Ra@>(32-shift), Rb, Ra	;
151
152	MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
153	ROUND3(Ra, Rb, Rc, Rd,  5,	4, Rc0)
154	ROUND3(Rd, Ra, Rb, Rc,  8, 11, Rc1)
155	ROUND3(Rc, Rd, Ra, Rb, 11, 16, Rc2)
156	ROUND3(Rb, Rc, Rd, Ra, 14, 23, Rc3)
157
158	MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
159	ROUND3(Ra, Rb, Rc, Rd,  1,	4, Rc0)
160	ROUND3(Rd, Ra, Rb, Rc,  4, 11, Rc1)
161	ROUND3(Rc, Rd, Ra, Rb,  7, 16, Rc2)
162	ROUND3(Rb, Rc, Rd, Ra, 10, 23, Rc3)
163
164	MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
165	ROUND3(Ra, Rb, Rc, Rd, 13,	4, Rc0)
166	ROUND3(Rd, Ra, Rb, Rc,  0, 11, Rc1)
167	ROUND3(Rc, Rd, Ra, Rb,  3, 16, Rc2)
168	ROUND3(Rb, Rc, Rd, Ra,  6, 23, Rc3)
169
170	MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
171	ROUND3(Ra, Rb, Rc, Rd,  9,	4, Rc0)
172	ROUND3(Rd, Ra, Rb, Rc, 12, 11, Rc1)
173	ROUND3(Rc, Rd, Ra, Rb, 15, 16, Rc2)
174	ROUND3(Rb, Rc, Rd, Ra,  2, 23, Rc3)
175
176// a += (c^(b|^d)) + X[index] + const
177// a = a<<shift | a>>(32-shift) + b
178#define ROUND4(Ra, Rb, Rc, Rd, index, shift, Rconst) \
179	MVN	Rd, Rt0			; \
180	ORR	Rb, Rt0			; \
181	EOR	Rc, Rt0			; \
182	MOVW	(index<<2)(Rdata), Rt1	; \
183	ADD	Rt1, Rt0			; \
184	ADD	Rconst, Rt0			; \
185	ADD	Rt0, Ra			; \
186	ADD	Ra@>(32-shift), Rb, Ra	;
187
188	MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
189	ROUND4(Ra, Rb, Rc, Rd,  0,	6, Rc0)
190	ROUND4(Rd, Ra, Rb, Rc,  7, 10, Rc1)
191	ROUND4(Rc, Rd, Ra, Rb, 14, 15, Rc2)
192	ROUND4(Rb, Rc, Rd, Ra,  5, 21, Rc3)
193
194	MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
195	ROUND4(Ra, Rb, Rc, Rd, 12,	6, Rc0)
196	ROUND4(Rd, Ra, Rb, Rc,  3, 10, Rc1)
197	ROUND4(Rc, Rd, Ra, Rb, 10, 15, Rc2)
198	ROUND4(Rb, Rc, Rd, Ra,  1, 21, Rc3)
199
200	MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
201	ROUND4(Ra, Rb, Rc, Rd,  8,	6, Rc0)
202	ROUND4(Rd, Ra, Rb, Rc, 15, 10, Rc1)
203	ROUND4(Rc, Rd, Ra, Rb,  6, 15, Rc2)
204	ROUND4(Rb, Rc, Rd, Ra, 13, 21, Rc3)
205
206	MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
207	ROUND4(Ra, Rb, Rc, Rd,  4,	6, Rc0)
208	ROUND4(Rd, Ra, Rb, Rc, 11, 10, Rc1)
209	ROUND4(Rc, Rd, Ra, Rb,  2, 15, Rc2)
210	ROUND4(Rb, Rc, Rd, Ra,  9, 21, Rc3)
211
212	MOVW	dig+0(FP), Rt0
213	MOVM.IA (Rt0), [Rc0,Rc1,Rc2,Rc3]
214
215	ADD	Rc0, Ra
216	ADD	Rc1, Rb
217	ADD	Rc2, Rc
218	ADD	Rc3, Rd
219
220	MOVM.IA [Ra,Rb,Rc,Rd], (Rt0)
221
222	MOVW	p_data, Rdata
223	MOVW	p_end, Rt0
224	ADD	$64, Rdata
225	CMP	Rt0, Rdata
226	BLO	loop
227
228	RET
229
230// MD5 constants table
231
232	// Round 1
233	DATA	·table+0x00(SB)/4, $0xd76aa478
234	DATA	·table+0x04(SB)/4, $0xe8c7b756
235	DATA	·table+0x08(SB)/4, $0x242070db
236	DATA	·table+0x0c(SB)/4, $0xc1bdceee
237	DATA	·table+0x10(SB)/4, $0xf57c0faf
238	DATA	·table+0x14(SB)/4, $0x4787c62a
239	DATA	·table+0x18(SB)/4, $0xa8304613
240	DATA	·table+0x1c(SB)/4, $0xfd469501
241	DATA	·table+0x20(SB)/4, $0x698098d8
242	DATA	·table+0x24(SB)/4, $0x8b44f7af
243	DATA	·table+0x28(SB)/4, $0xffff5bb1
244	DATA	·table+0x2c(SB)/4, $0x895cd7be
245	DATA	·table+0x30(SB)/4, $0x6b901122
246	DATA	·table+0x34(SB)/4, $0xfd987193
247	DATA	·table+0x38(SB)/4, $0xa679438e
248	DATA	·table+0x3c(SB)/4, $0x49b40821
249	// Round 2
250	DATA	·table+0x40(SB)/4, $0xf61e2562
251	DATA	·table+0x44(SB)/4, $0xc040b340
252	DATA	·table+0x48(SB)/4, $0x265e5a51
253	DATA	·table+0x4c(SB)/4, $0xe9b6c7aa
254	DATA	·table+0x50(SB)/4, $0xd62f105d
255	DATA	·table+0x54(SB)/4, $0x02441453
256	DATA	·table+0x58(SB)/4, $0xd8a1e681
257	DATA	·table+0x5c(SB)/4, $0xe7d3fbc8
258	DATA	·table+0x60(SB)/4, $0x21e1cde6
259	DATA	·table+0x64(SB)/4, $0xc33707d6
260	DATA	·table+0x68(SB)/4, $0xf4d50d87
261	DATA	·table+0x6c(SB)/4, $0x455a14ed
262	DATA	·table+0x70(SB)/4, $0xa9e3e905
263	DATA	·table+0x74(SB)/4, $0xfcefa3f8
264	DATA	·table+0x78(SB)/4, $0x676f02d9
265	DATA	·table+0x7c(SB)/4, $0x8d2a4c8a
266	// Round 3
267	DATA	·table+0x80(SB)/4, $0xfffa3942
268	DATA	·table+0x84(SB)/4, $0x8771f681
269	DATA	·table+0x88(SB)/4, $0x6d9d6122
270	DATA	·table+0x8c(SB)/4, $0xfde5380c
271	DATA	·table+0x90(SB)/4, $0xa4beea44
272	DATA	·table+0x94(SB)/4, $0x4bdecfa9
273	DATA	·table+0x98(SB)/4, $0xf6bb4b60
274	DATA	·table+0x9c(SB)/4, $0xbebfbc70
275	DATA	·table+0xa0(SB)/4, $0x289b7ec6
276	DATA	·table+0xa4(SB)/4, $0xeaa127fa
277	DATA	·table+0xa8(SB)/4, $0xd4ef3085
278	DATA	·table+0xac(SB)/4, $0x04881d05
279	DATA	·table+0xb0(SB)/4, $0xd9d4d039
280	DATA	·table+0xb4(SB)/4, $0xe6db99e5
281	DATA	·table+0xb8(SB)/4, $0x1fa27cf8
282	DATA	·table+0xbc(SB)/4, $0xc4ac5665
283	// Round 4
284	DATA	·table+0xc0(SB)/4, $0xf4292244
285	DATA	·table+0xc4(SB)/4, $0x432aff97
286	DATA	·table+0xc8(SB)/4, $0xab9423a7
287	DATA	·table+0xcc(SB)/4, $0xfc93a039
288	DATA	·table+0xd0(SB)/4, $0x655b59c3
289	DATA	·table+0xd4(SB)/4, $0x8f0ccc92
290	DATA	·table+0xd8(SB)/4, $0xffeff47d
291	DATA	·table+0xdc(SB)/4, $0x85845dd1
292	DATA	·table+0xe0(SB)/4, $0x6fa87e4f
293	DATA	·table+0xe4(SB)/4, $0xfe2ce6e0
294	DATA	·table+0xe8(SB)/4, $0xa3014314
295	DATA	·table+0xec(SB)/4, $0x4e0811a1
296	DATA	·table+0xf0(SB)/4, $0xf7537e82
297	DATA	·table+0xf4(SB)/4, $0xbd3af235
298	DATA	·table+0xf8(SB)/4, $0x2ad7d2bb
299	DATA	·table+0xfc(SB)/4, $0xeb86d391
300	// Global definition
301	GLOBL	·table(SB),8,$256
302