1// This file is generated from a similarly-named Perl script in the BoringSSL
2// source tree. Do not edit by hand.
3
4#include <ring-core/asm_base.h>
5
6#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_ARM) && defined(__ELF__)
7@ Copyright 2007-2016 The OpenSSL Project Authors. All Rights Reserved.
8@
9@ Licensed under the OpenSSL license (the "License").  You may not use
10@ this file except in compliance with the License.  You can obtain a copy
11@ in the file LICENSE in the source distribution or at
12@ https://www.openssl.org/source/license.html
13
14
15@ ====================================================================
16@ Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
17@ project. The module is, however, dual licensed under OpenSSL and
18@ CRYPTOGAMS licenses depending on where you obtain it. For further
19@ details see http://www.openssl.org/~appro/cryptogams/.
20@
21@ Permission to use under GPL terms is granted.
22@ ====================================================================
23
24@ SHA256 block procedure for ARMv4. May 2007.
25
26@ Performance is ~2x better than gcc 3.4 generated code and in "abso-
27@ lute" terms is ~2250 cycles per 64-byte block or ~35 cycles per
28@ byte [on single-issue Xscale PXA250 core].
29
30@ July 2010.
31@
32@ Rescheduling for dual-issue pipeline resulted in 22% improvement on
33@ Cortex A8 core and ~20 cycles per processed byte.
34
35@ February 2011.
36@
37@ Profiler-assisted and platform-specific optimization resulted in 16%
38@ improvement on Cortex A8 core and ~15.4 cycles per processed byte.
39
40@ September 2013.
41@
42@ Add NEON implementation. On Cortex A8 it was measured to process one
43@ byte in 12.5 cycles or 23% faster than integer-only code. Snapdragon
44@ S4 does it in 12.5 cycles too, but it's 50% faster than integer-only
45@ code (meaning that latter performs sub-optimally, nothing was done
46@ about it).
47
48@ May 2014.
49@
50@ Add ARMv8 code path performing at 2.0 cpb on Apple A7.
51
52#ifndef __KERNEL__
53# include <ring-core/arm_arch.h>
54#else
55# define __ARM_ARCH__ __LINUX_ARM_ARCH__
56# define __ARM_MAX_ARCH__ 7
57#endif
58
59@ Silence ARMv8 deprecated IT instruction warnings. This file is used by both
60@ ARMv7 and ARMv8 processors. It does have ARMv8-only code, but those
61@ instructions are manually-encoded. (See unsha256.)
62.arch	armv7-a
63
64.text
65#if defined(__thumb2__)
66.syntax	unified
67.thumb
68#else
69.code	32
70#endif
71
72.type	K256,%object
73.align	5
74K256:
75.word	0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
76.word	0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
77.word	0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
78.word	0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
79.word	0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
80.word	0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
81.word	0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
82.word	0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
83.word	0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
84.word	0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
85.word	0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
86.word	0xd192e819,0xd6990624,0xf40e3585,0x106aa070
87.word	0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
88.word	0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
89.word	0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
90.word	0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
91.size	K256,.-K256
92.word	0				@ terminator
93#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
94
95.hidden	OPENSSL_armcap_P
96.LOPENSSL_armcap:
97.word	OPENSSL_armcap_P-.Lsha256_block_data_order
98#endif
99.align	5
100
101.globl	sha256_block_data_order
102.hidden	sha256_block_data_order
103.type	sha256_block_data_order,%function
104sha256_block_data_order:
105.Lsha256_block_data_order:
106#if __ARM_ARCH__<7 && !defined(__thumb2__)
107	sub	r3,pc,#8		@ sha256_block_data_order
108#else
109	adr	r3,.Lsha256_block_data_order
110#endif
111#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
112	ldr	r12,.LOPENSSL_armcap
113	ldr	r12,[r3,r12]		@ OPENSSL_armcap_P
114#ifdef	__APPLE__
115	ldr	r12,[r12]
116#endif
117	tst	r12,#ARMV8_SHA256
118	bne	.LARMv8
119	tst	r12,#ARMV7_NEON
120	bne	.LNEON
121#endif
122	add	r2,r1,r2,lsl#6	@ len to point at the end of inp
123	stmdb	sp!,{r0,r1,r2,r4-r11,lr}
124	ldmia	r0,{r4,r5,r6,r7,r8,r9,r10,r11}
125	sub	r14,r3,#256+32	@ K256
126	sub	sp,sp,#16*4		@ alloca(X[16])
127.Loop:
128# if __ARM_ARCH__>=7
129	ldr	r2,[r1],#4
130# else
131	ldrb	r2,[r1,#3]
132# endif
133	eor	r3,r5,r6		@ magic
134	eor	r12,r12,r12
135#if __ARM_ARCH__>=7
136	@ ldr	r2,[r1],#4			@ 0
137# if 0==15
138	str	r1,[sp,#17*4]			@ make room for r1
139# endif
140	eor	r0,r8,r8,ror#5
141	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
142	eor	r0,r0,r8,ror#19	@ Sigma1(e)
143# ifndef __ARMEB__
144	rev	r2,r2
145# endif
146#else
147	@ ldrb	r2,[r1,#3]			@ 0
148	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
149	ldrb	r12,[r1,#2]
150	ldrb	r0,[r1,#1]
151	orr	r2,r2,r12,lsl#8
152	ldrb	r12,[r1],#4
153	orr	r2,r2,r0,lsl#16
154# if 0==15
155	str	r1,[sp,#17*4]			@ make room for r1
156# endif
157	eor	r0,r8,r8,ror#5
158	orr	r2,r2,r12,lsl#24
159	eor	r0,r0,r8,ror#19	@ Sigma1(e)
160#endif
161	ldr	r12,[r14],#4			@ *K256++
162	add	r11,r11,r2			@ h+=X[i]
163	str	r2,[sp,#0*4]
164	eor	r2,r9,r10
165	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
166	and	r2,r2,r8
167	add	r11,r11,r12			@ h+=K256[i]
168	eor	r2,r2,r10			@ Ch(e,f,g)
169	eor	r0,r4,r4,ror#11
170	add	r11,r11,r2			@ h+=Ch(e,f,g)
171#if 0==31
172	and	r12,r12,#0xff
173	cmp	r12,#0xf2			@ done?
174#endif
175#if 0<15
176# if __ARM_ARCH__>=7
177	ldr	r2,[r1],#4			@ prefetch
178# else
179	ldrb	r2,[r1,#3]
180# endif
181	eor	r12,r4,r5			@ a^b, b^c in next round
182#else
183	ldr	r2,[sp,#2*4]		@ from future BODY_16_xx
184	eor	r12,r4,r5			@ a^b, b^c in next round
185	ldr	r1,[sp,#15*4]	@ from future BODY_16_xx
186#endif
187	eor	r0,r0,r4,ror#20	@ Sigma0(a)
188	and	r3,r3,r12			@ (b^c)&=(a^b)
189	add	r7,r7,r11			@ d+=h
190	eor	r3,r3,r5			@ Maj(a,b,c)
191	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
192	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
193#if __ARM_ARCH__>=7
194	@ ldr	r2,[r1],#4			@ 1
195# if 1==15
196	str	r1,[sp,#17*4]			@ make room for r1
197# endif
198	eor	r0,r7,r7,ror#5
199	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
200	eor	r0,r0,r7,ror#19	@ Sigma1(e)
201# ifndef __ARMEB__
202	rev	r2,r2
203# endif
204#else
205	@ ldrb	r2,[r1,#3]			@ 1
206	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
207	ldrb	r3,[r1,#2]
208	ldrb	r0,[r1,#1]
209	orr	r2,r2,r3,lsl#8
210	ldrb	r3,[r1],#4
211	orr	r2,r2,r0,lsl#16
212# if 1==15
213	str	r1,[sp,#17*4]			@ make room for r1
214# endif
215	eor	r0,r7,r7,ror#5
216	orr	r2,r2,r3,lsl#24
217	eor	r0,r0,r7,ror#19	@ Sigma1(e)
218#endif
219	ldr	r3,[r14],#4			@ *K256++
220	add	r10,r10,r2			@ h+=X[i]
221	str	r2,[sp,#1*4]
222	eor	r2,r8,r9
223	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
224	and	r2,r2,r7
225	add	r10,r10,r3			@ h+=K256[i]
226	eor	r2,r2,r9			@ Ch(e,f,g)
227	eor	r0,r11,r11,ror#11
228	add	r10,r10,r2			@ h+=Ch(e,f,g)
229#if 1==31
230	and	r3,r3,#0xff
231	cmp	r3,#0xf2			@ done?
232#endif
233#if 1<15
234# if __ARM_ARCH__>=7
235	ldr	r2,[r1],#4			@ prefetch
236# else
237	ldrb	r2,[r1,#3]
238# endif
239	eor	r3,r11,r4			@ a^b, b^c in next round
240#else
241	ldr	r2,[sp,#3*4]		@ from future BODY_16_xx
242	eor	r3,r11,r4			@ a^b, b^c in next round
243	ldr	r1,[sp,#0*4]	@ from future BODY_16_xx
244#endif
245	eor	r0,r0,r11,ror#20	@ Sigma0(a)
246	and	r12,r12,r3			@ (b^c)&=(a^b)
247	add	r6,r6,r10			@ d+=h
248	eor	r12,r12,r4			@ Maj(a,b,c)
249	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
250	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
251#if __ARM_ARCH__>=7
252	@ ldr	r2,[r1],#4			@ 2
253# if 2==15
254	str	r1,[sp,#17*4]			@ make room for r1
255# endif
256	eor	r0,r6,r6,ror#5
257	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
258	eor	r0,r0,r6,ror#19	@ Sigma1(e)
259# ifndef __ARMEB__
260	rev	r2,r2
261# endif
262#else
263	@ ldrb	r2,[r1,#3]			@ 2
264	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
265	ldrb	r12,[r1,#2]
266	ldrb	r0,[r1,#1]
267	orr	r2,r2,r12,lsl#8
268	ldrb	r12,[r1],#4
269	orr	r2,r2,r0,lsl#16
270# if 2==15
271	str	r1,[sp,#17*4]			@ make room for r1
272# endif
273	eor	r0,r6,r6,ror#5
274	orr	r2,r2,r12,lsl#24
275	eor	r0,r0,r6,ror#19	@ Sigma1(e)
276#endif
277	ldr	r12,[r14],#4			@ *K256++
278	add	r9,r9,r2			@ h+=X[i]
279	str	r2,[sp,#2*4]
280	eor	r2,r7,r8
281	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
282	and	r2,r2,r6
283	add	r9,r9,r12			@ h+=K256[i]
284	eor	r2,r2,r8			@ Ch(e,f,g)
285	eor	r0,r10,r10,ror#11
286	add	r9,r9,r2			@ h+=Ch(e,f,g)
287#if 2==31
288	and	r12,r12,#0xff
289	cmp	r12,#0xf2			@ done?
290#endif
291#if 2<15
292# if __ARM_ARCH__>=7
293	ldr	r2,[r1],#4			@ prefetch
294# else
295	ldrb	r2,[r1,#3]
296# endif
297	eor	r12,r10,r11			@ a^b, b^c in next round
298#else
299	ldr	r2,[sp,#4*4]		@ from future BODY_16_xx
300	eor	r12,r10,r11			@ a^b, b^c in next round
301	ldr	r1,[sp,#1*4]	@ from future BODY_16_xx
302#endif
303	eor	r0,r0,r10,ror#20	@ Sigma0(a)
304	and	r3,r3,r12			@ (b^c)&=(a^b)
305	add	r5,r5,r9			@ d+=h
306	eor	r3,r3,r11			@ Maj(a,b,c)
307	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
308	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
309#if __ARM_ARCH__>=7
310	@ ldr	r2,[r1],#4			@ 3
311# if 3==15
312	str	r1,[sp,#17*4]			@ make room for r1
313# endif
314	eor	r0,r5,r5,ror#5
315	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
316	eor	r0,r0,r5,ror#19	@ Sigma1(e)
317# ifndef __ARMEB__
318	rev	r2,r2
319# endif
320#else
321	@ ldrb	r2,[r1,#3]			@ 3
322	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
323	ldrb	r3,[r1,#2]
324	ldrb	r0,[r1,#1]
325	orr	r2,r2,r3,lsl#8
326	ldrb	r3,[r1],#4
327	orr	r2,r2,r0,lsl#16
328# if 3==15
329	str	r1,[sp,#17*4]			@ make room for r1
330# endif
331	eor	r0,r5,r5,ror#5
332	orr	r2,r2,r3,lsl#24
333	eor	r0,r0,r5,ror#19	@ Sigma1(e)
334#endif
335	ldr	r3,[r14],#4			@ *K256++
336	add	r8,r8,r2			@ h+=X[i]
337	str	r2,[sp,#3*4]
338	eor	r2,r6,r7
339	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
340	and	r2,r2,r5
341	add	r8,r8,r3			@ h+=K256[i]
342	eor	r2,r2,r7			@ Ch(e,f,g)
343	eor	r0,r9,r9,ror#11
344	add	r8,r8,r2			@ h+=Ch(e,f,g)
345#if 3==31
346	and	r3,r3,#0xff
347	cmp	r3,#0xf2			@ done?
348#endif
349#if 3<15
350# if __ARM_ARCH__>=7
351	ldr	r2,[r1],#4			@ prefetch
352# else
353	ldrb	r2,[r1,#3]
354# endif
355	eor	r3,r9,r10			@ a^b, b^c in next round
356#else
357	ldr	r2,[sp,#5*4]		@ from future BODY_16_xx
358	eor	r3,r9,r10			@ a^b, b^c in next round
359	ldr	r1,[sp,#2*4]	@ from future BODY_16_xx
360#endif
361	eor	r0,r0,r9,ror#20	@ Sigma0(a)
362	and	r12,r12,r3			@ (b^c)&=(a^b)
363	add	r4,r4,r8			@ d+=h
364	eor	r12,r12,r10			@ Maj(a,b,c)
365	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
366	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
367#if __ARM_ARCH__>=7
368	@ ldr	r2,[r1],#4			@ 4
369# if 4==15
370	str	r1,[sp,#17*4]			@ make room for r1
371# endif
372	eor	r0,r4,r4,ror#5
373	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
374	eor	r0,r0,r4,ror#19	@ Sigma1(e)
375# ifndef __ARMEB__
376	rev	r2,r2
377# endif
378#else
379	@ ldrb	r2,[r1,#3]			@ 4
380	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
381	ldrb	r12,[r1,#2]
382	ldrb	r0,[r1,#1]
383	orr	r2,r2,r12,lsl#8
384	ldrb	r12,[r1],#4
385	orr	r2,r2,r0,lsl#16
386# if 4==15
387	str	r1,[sp,#17*4]			@ make room for r1
388# endif
389	eor	r0,r4,r4,ror#5
390	orr	r2,r2,r12,lsl#24
391	eor	r0,r0,r4,ror#19	@ Sigma1(e)
392#endif
393	ldr	r12,[r14],#4			@ *K256++
394	add	r7,r7,r2			@ h+=X[i]
395	str	r2,[sp,#4*4]
396	eor	r2,r5,r6
397	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
398	and	r2,r2,r4
399	add	r7,r7,r12			@ h+=K256[i]
400	eor	r2,r2,r6			@ Ch(e,f,g)
401	eor	r0,r8,r8,ror#11
402	add	r7,r7,r2			@ h+=Ch(e,f,g)
403#if 4==31
404	and	r12,r12,#0xff
405	cmp	r12,#0xf2			@ done?
406#endif
407#if 4<15
408# if __ARM_ARCH__>=7
409	ldr	r2,[r1],#4			@ prefetch
410# else
411	ldrb	r2,[r1,#3]
412# endif
413	eor	r12,r8,r9			@ a^b, b^c in next round
414#else
415	ldr	r2,[sp,#6*4]		@ from future BODY_16_xx
416	eor	r12,r8,r9			@ a^b, b^c in next round
417	ldr	r1,[sp,#3*4]	@ from future BODY_16_xx
418#endif
419	eor	r0,r0,r8,ror#20	@ Sigma0(a)
420	and	r3,r3,r12			@ (b^c)&=(a^b)
421	add	r11,r11,r7			@ d+=h
422	eor	r3,r3,r9			@ Maj(a,b,c)
423	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
424	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
425#if __ARM_ARCH__>=7
426	@ ldr	r2,[r1],#4			@ 5
427# if 5==15
428	str	r1,[sp,#17*4]			@ make room for r1
429# endif
430	eor	r0,r11,r11,ror#5
431	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
432	eor	r0,r0,r11,ror#19	@ Sigma1(e)
433# ifndef __ARMEB__
434	rev	r2,r2
435# endif
436#else
437	@ ldrb	r2,[r1,#3]			@ 5
438	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
439	ldrb	r3,[r1,#2]
440	ldrb	r0,[r1,#1]
441	orr	r2,r2,r3,lsl#8
442	ldrb	r3,[r1],#4
443	orr	r2,r2,r0,lsl#16
444# if 5==15
445	str	r1,[sp,#17*4]			@ make room for r1
446# endif
447	eor	r0,r11,r11,ror#5
448	orr	r2,r2,r3,lsl#24
449	eor	r0,r0,r11,ror#19	@ Sigma1(e)
450#endif
451	ldr	r3,[r14],#4			@ *K256++
452	add	r6,r6,r2			@ h+=X[i]
453	str	r2,[sp,#5*4]
454	eor	r2,r4,r5
455	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
456	and	r2,r2,r11
457	add	r6,r6,r3			@ h+=K256[i]
458	eor	r2,r2,r5			@ Ch(e,f,g)
459	eor	r0,r7,r7,ror#11
460	add	r6,r6,r2			@ h+=Ch(e,f,g)
461#if 5==31
462	and	r3,r3,#0xff
463	cmp	r3,#0xf2			@ done?
464#endif
465#if 5<15
466# if __ARM_ARCH__>=7
467	ldr	r2,[r1],#4			@ prefetch
468# else
469	ldrb	r2,[r1,#3]
470# endif
471	eor	r3,r7,r8			@ a^b, b^c in next round
472#else
473	ldr	r2,[sp,#7*4]		@ from future BODY_16_xx
474	eor	r3,r7,r8			@ a^b, b^c in next round
475	ldr	r1,[sp,#4*4]	@ from future BODY_16_xx
476#endif
477	eor	r0,r0,r7,ror#20	@ Sigma0(a)
478	and	r12,r12,r3			@ (b^c)&=(a^b)
479	add	r10,r10,r6			@ d+=h
480	eor	r12,r12,r8			@ Maj(a,b,c)
481	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
482	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
483#if __ARM_ARCH__>=7
484	@ ldr	r2,[r1],#4			@ 6
485# if 6==15
486	str	r1,[sp,#17*4]			@ make room for r1
487# endif
488	eor	r0,r10,r10,ror#5
489	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
490	eor	r0,r0,r10,ror#19	@ Sigma1(e)
491# ifndef __ARMEB__
492	rev	r2,r2
493# endif
494#else
495	@ ldrb	r2,[r1,#3]			@ 6
496	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
497	ldrb	r12,[r1,#2]
498	ldrb	r0,[r1,#1]
499	orr	r2,r2,r12,lsl#8
500	ldrb	r12,[r1],#4
501	orr	r2,r2,r0,lsl#16
502# if 6==15
503	str	r1,[sp,#17*4]			@ make room for r1
504# endif
505	eor	r0,r10,r10,ror#5
506	orr	r2,r2,r12,lsl#24
507	eor	r0,r0,r10,ror#19	@ Sigma1(e)
508#endif
509	ldr	r12,[r14],#4			@ *K256++
510	add	r5,r5,r2			@ h+=X[i]
511	str	r2,[sp,#6*4]
512	eor	r2,r11,r4
513	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
514	and	r2,r2,r10
515	add	r5,r5,r12			@ h+=K256[i]
516	eor	r2,r2,r4			@ Ch(e,f,g)
517	eor	r0,r6,r6,ror#11
518	add	r5,r5,r2			@ h+=Ch(e,f,g)
519#if 6==31
520	and	r12,r12,#0xff
521	cmp	r12,#0xf2			@ done?
522#endif
523#if 6<15
524# if __ARM_ARCH__>=7
525	ldr	r2,[r1],#4			@ prefetch
526# else
527	ldrb	r2,[r1,#3]
528# endif
529	eor	r12,r6,r7			@ a^b, b^c in next round
530#else
531	ldr	r2,[sp,#8*4]		@ from future BODY_16_xx
532	eor	r12,r6,r7			@ a^b, b^c in next round
533	ldr	r1,[sp,#5*4]	@ from future BODY_16_xx
534#endif
535	eor	r0,r0,r6,ror#20	@ Sigma0(a)
536	and	r3,r3,r12			@ (b^c)&=(a^b)
537	add	r9,r9,r5			@ d+=h
538	eor	r3,r3,r7			@ Maj(a,b,c)
539	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
540	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
541#if __ARM_ARCH__>=7
542	@ ldr	r2,[r1],#4			@ 7
543# if 7==15
544	str	r1,[sp,#17*4]			@ make room for r1
545# endif
546	eor	r0,r9,r9,ror#5
547	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
548	eor	r0,r0,r9,ror#19	@ Sigma1(e)
549# ifndef __ARMEB__
550	rev	r2,r2
551# endif
552#else
553	@ ldrb	r2,[r1,#3]			@ 7
554	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
555	ldrb	r3,[r1,#2]
556	ldrb	r0,[r1,#1]
557	orr	r2,r2,r3,lsl#8
558	ldrb	r3,[r1],#4
559	orr	r2,r2,r0,lsl#16
560# if 7==15
561	str	r1,[sp,#17*4]			@ make room for r1
562# endif
563	eor	r0,r9,r9,ror#5
564	orr	r2,r2,r3,lsl#24
565	eor	r0,r0,r9,ror#19	@ Sigma1(e)
566#endif
567	ldr	r3,[r14],#4			@ *K256++
568	add	r4,r4,r2			@ h+=X[i]
569	str	r2,[sp,#7*4]
570	eor	r2,r10,r11
571	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
572	and	r2,r2,r9
573	add	r4,r4,r3			@ h+=K256[i]
574	eor	r2,r2,r11			@ Ch(e,f,g)
575	eor	r0,r5,r5,ror#11
576	add	r4,r4,r2			@ h+=Ch(e,f,g)
577#if 7==31
578	and	r3,r3,#0xff
579	cmp	r3,#0xf2			@ done?
580#endif
581#if 7<15
582# if __ARM_ARCH__>=7
583	ldr	r2,[r1],#4			@ prefetch
584# else
585	ldrb	r2,[r1,#3]
586# endif
587	eor	r3,r5,r6			@ a^b, b^c in next round
588#else
589	ldr	r2,[sp,#9*4]		@ from future BODY_16_xx
590	eor	r3,r5,r6			@ a^b, b^c in next round
591	ldr	r1,[sp,#6*4]	@ from future BODY_16_xx
592#endif
593	eor	r0,r0,r5,ror#20	@ Sigma0(a)
594	and	r12,r12,r3			@ (b^c)&=(a^b)
595	add	r8,r8,r4			@ d+=h
596	eor	r12,r12,r6			@ Maj(a,b,c)
597	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
598	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
599#if __ARM_ARCH__>=7
600	@ ldr	r2,[r1],#4			@ 8
601# if 8==15
602	str	r1,[sp,#17*4]			@ make room for r1
603# endif
604	eor	r0,r8,r8,ror#5
605	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
606	eor	r0,r0,r8,ror#19	@ Sigma1(e)
607# ifndef __ARMEB__
608	rev	r2,r2
609# endif
610#else
611	@ ldrb	r2,[r1,#3]			@ 8
612	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
613	ldrb	r12,[r1,#2]
614	ldrb	r0,[r1,#1]
615	orr	r2,r2,r12,lsl#8
616	ldrb	r12,[r1],#4
617	orr	r2,r2,r0,lsl#16
618# if 8==15
619	str	r1,[sp,#17*4]			@ make room for r1
620# endif
621	eor	r0,r8,r8,ror#5
622	orr	r2,r2,r12,lsl#24
623	eor	r0,r0,r8,ror#19	@ Sigma1(e)
624#endif
625	ldr	r12,[r14],#4			@ *K256++
626	add	r11,r11,r2			@ h+=X[i]
627	str	r2,[sp,#8*4]
628	eor	r2,r9,r10
629	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
630	and	r2,r2,r8
631	add	r11,r11,r12			@ h+=K256[i]
632	eor	r2,r2,r10			@ Ch(e,f,g)
633	eor	r0,r4,r4,ror#11
634	add	r11,r11,r2			@ h+=Ch(e,f,g)
635#if 8==31
636	and	r12,r12,#0xff
637	cmp	r12,#0xf2			@ done?
638#endif
639#if 8<15
640# if __ARM_ARCH__>=7
641	ldr	r2,[r1],#4			@ prefetch
642# else
643	ldrb	r2,[r1,#3]
644# endif
645	eor	r12,r4,r5			@ a^b, b^c in next round
646#else
647	ldr	r2,[sp,#10*4]		@ from future BODY_16_xx
648	eor	r12,r4,r5			@ a^b, b^c in next round
649	ldr	r1,[sp,#7*4]	@ from future BODY_16_xx
650#endif
651	eor	r0,r0,r4,ror#20	@ Sigma0(a)
652	and	r3,r3,r12			@ (b^c)&=(a^b)
653	add	r7,r7,r11			@ d+=h
654	eor	r3,r3,r5			@ Maj(a,b,c)
655	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
656	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
657#if __ARM_ARCH__>=7
658	@ ldr	r2,[r1],#4			@ 9
659# if 9==15
660	str	r1,[sp,#17*4]			@ make room for r1
661# endif
662	eor	r0,r7,r7,ror#5
663	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
664	eor	r0,r0,r7,ror#19	@ Sigma1(e)
665# ifndef __ARMEB__
666	rev	r2,r2
667# endif
668#else
669	@ ldrb	r2,[r1,#3]			@ 9
670	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
671	ldrb	r3,[r1,#2]
672	ldrb	r0,[r1,#1]
673	orr	r2,r2,r3,lsl#8
674	ldrb	r3,[r1],#4
675	orr	r2,r2,r0,lsl#16
676# if 9==15
677	str	r1,[sp,#17*4]			@ make room for r1
678# endif
679	eor	r0,r7,r7,ror#5
680	orr	r2,r2,r3,lsl#24
681	eor	r0,r0,r7,ror#19	@ Sigma1(e)
682#endif
683	ldr	r3,[r14],#4			@ *K256++
684	add	r10,r10,r2			@ h+=X[i]
685	str	r2,[sp,#9*4]
686	eor	r2,r8,r9
687	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
688	and	r2,r2,r7
689	add	r10,r10,r3			@ h+=K256[i]
690	eor	r2,r2,r9			@ Ch(e,f,g)
691	eor	r0,r11,r11,ror#11
692	add	r10,r10,r2			@ h+=Ch(e,f,g)
693#if 9==31
694	and	r3,r3,#0xff
695	cmp	r3,#0xf2			@ done?
696#endif
697#if 9<15
698# if __ARM_ARCH__>=7
699	ldr	r2,[r1],#4			@ prefetch
700# else
701	ldrb	r2,[r1,#3]
702# endif
703	eor	r3,r11,r4			@ a^b, b^c in next round
704#else
705	ldr	r2,[sp,#11*4]		@ from future BODY_16_xx
706	eor	r3,r11,r4			@ a^b, b^c in next round
707	ldr	r1,[sp,#8*4]	@ from future BODY_16_xx
708#endif
709	eor	r0,r0,r11,ror#20	@ Sigma0(a)
710	and	r12,r12,r3			@ (b^c)&=(a^b)
711	add	r6,r6,r10			@ d+=h
712	eor	r12,r12,r4			@ Maj(a,b,c)
713	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
714	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
715#if __ARM_ARCH__>=7
716	@ ldr	r2,[r1],#4			@ 10
717# if 10==15
718	str	r1,[sp,#17*4]			@ make room for r1
719# endif
720	eor	r0,r6,r6,ror#5
721	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
722	eor	r0,r0,r6,ror#19	@ Sigma1(e)
723# ifndef __ARMEB__
724	rev	r2,r2
725# endif
726#else
727	@ ldrb	r2,[r1,#3]			@ 10
728	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
729	ldrb	r12,[r1,#2]
730	ldrb	r0,[r1,#1]
731	orr	r2,r2,r12,lsl#8
732	ldrb	r12,[r1],#4
733	orr	r2,r2,r0,lsl#16
734# if 10==15
735	str	r1,[sp,#17*4]			@ make room for r1
736# endif
737	eor	r0,r6,r6,ror#5
738	orr	r2,r2,r12,lsl#24
739	eor	r0,r0,r6,ror#19	@ Sigma1(e)
740#endif
741	ldr	r12,[r14],#4			@ *K256++
742	add	r9,r9,r2			@ h+=X[i]
743	str	r2,[sp,#10*4]
744	eor	r2,r7,r8
745	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
746	and	r2,r2,r6
747	add	r9,r9,r12			@ h+=K256[i]
748	eor	r2,r2,r8			@ Ch(e,f,g)
749	eor	r0,r10,r10,ror#11
750	add	r9,r9,r2			@ h+=Ch(e,f,g)
751#if 10==31
752	and	r12,r12,#0xff
753	cmp	r12,#0xf2			@ done?
754#endif
755#if 10<15
756# if __ARM_ARCH__>=7
757	ldr	r2,[r1],#4			@ prefetch
758# else
759	ldrb	r2,[r1,#3]
760# endif
761	eor	r12,r10,r11			@ a^b, b^c in next round
762#else
763	ldr	r2,[sp,#12*4]		@ from future BODY_16_xx
764	eor	r12,r10,r11			@ a^b, b^c in next round
765	ldr	r1,[sp,#9*4]	@ from future BODY_16_xx
766#endif
767	eor	r0,r0,r10,ror#20	@ Sigma0(a)
768	and	r3,r3,r12			@ (b^c)&=(a^b)
769	add	r5,r5,r9			@ d+=h
770	eor	r3,r3,r11			@ Maj(a,b,c)
771	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
772	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
773#if __ARM_ARCH__>=7
774	@ ldr	r2,[r1],#4			@ 11
775# if 11==15
776	str	r1,[sp,#17*4]			@ make room for r1
777# endif
778	eor	r0,r5,r5,ror#5
779	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
780	eor	r0,r0,r5,ror#19	@ Sigma1(e)
781# ifndef __ARMEB__
782	rev	r2,r2
783# endif
784#else
785	@ ldrb	r2,[r1,#3]			@ 11
786	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
787	ldrb	r3,[r1,#2]
788	ldrb	r0,[r1,#1]
789	orr	r2,r2,r3,lsl#8
790	ldrb	r3,[r1],#4
791	orr	r2,r2,r0,lsl#16
792# if 11==15
793	str	r1,[sp,#17*4]			@ make room for r1
794# endif
795	eor	r0,r5,r5,ror#5
796	orr	r2,r2,r3,lsl#24
797	eor	r0,r0,r5,ror#19	@ Sigma1(e)
798#endif
799	ldr	r3,[r14],#4			@ *K256++
800	add	r8,r8,r2			@ h+=X[i]
801	str	r2,[sp,#11*4]
802	eor	r2,r6,r7
803	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
804	and	r2,r2,r5
805	add	r8,r8,r3			@ h+=K256[i]
806	eor	r2,r2,r7			@ Ch(e,f,g)
807	eor	r0,r9,r9,ror#11
808	add	r8,r8,r2			@ h+=Ch(e,f,g)
809#if 11==31
810	and	r3,r3,#0xff
811	cmp	r3,#0xf2			@ done?
812#endif
813#if 11<15
814# if __ARM_ARCH__>=7
815	ldr	r2,[r1],#4			@ prefetch
816# else
817	ldrb	r2,[r1,#3]
818# endif
819	eor	r3,r9,r10			@ a^b, b^c in next round
820#else
821	ldr	r2,[sp,#13*4]		@ from future BODY_16_xx
822	eor	r3,r9,r10			@ a^b, b^c in next round
823	ldr	r1,[sp,#10*4]	@ from future BODY_16_xx
824#endif
825	eor	r0,r0,r9,ror#20	@ Sigma0(a)
826	and	r12,r12,r3			@ (b^c)&=(a^b)
827	add	r4,r4,r8			@ d+=h
828	eor	r12,r12,r10			@ Maj(a,b,c)
829	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
830	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
831#if __ARM_ARCH__>=7
832	@ ldr	r2,[r1],#4			@ 12
833# if 12==15
834	str	r1,[sp,#17*4]			@ make room for r1
835# endif
836	eor	r0,r4,r4,ror#5
837	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
838	eor	r0,r0,r4,ror#19	@ Sigma1(e)
839# ifndef __ARMEB__
840	rev	r2,r2
841# endif
842#else
843	@ ldrb	r2,[r1,#3]			@ 12
844	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
845	ldrb	r12,[r1,#2]
846	ldrb	r0,[r1,#1]
847	orr	r2,r2,r12,lsl#8
848	ldrb	r12,[r1],#4
849	orr	r2,r2,r0,lsl#16
850# if 12==15
851	str	r1,[sp,#17*4]			@ make room for r1
852# endif
853	eor	r0,r4,r4,ror#5
854	orr	r2,r2,r12,lsl#24
855	eor	r0,r0,r4,ror#19	@ Sigma1(e)
856#endif
857	ldr	r12,[r14],#4			@ *K256++
858	add	r7,r7,r2			@ h+=X[i]
859	str	r2,[sp,#12*4]
860	eor	r2,r5,r6
861	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
862	and	r2,r2,r4
863	add	r7,r7,r12			@ h+=K256[i]
864	eor	r2,r2,r6			@ Ch(e,f,g)
865	eor	r0,r8,r8,ror#11
866	add	r7,r7,r2			@ h+=Ch(e,f,g)
867#if 12==31
868	and	r12,r12,#0xff
869	cmp	r12,#0xf2			@ done?
870#endif
871#if 12<15
872# if __ARM_ARCH__>=7
873	ldr	r2,[r1],#4			@ prefetch
874# else
875	ldrb	r2,[r1,#3]
876# endif
877	eor	r12,r8,r9			@ a^b, b^c in next round
878#else
879	ldr	r2,[sp,#14*4]		@ from future BODY_16_xx
880	eor	r12,r8,r9			@ a^b, b^c in next round
881	ldr	r1,[sp,#11*4]	@ from future BODY_16_xx
882#endif
883	eor	r0,r0,r8,ror#20	@ Sigma0(a)
884	and	r3,r3,r12			@ (b^c)&=(a^b)
885	add	r11,r11,r7			@ d+=h
886	eor	r3,r3,r9			@ Maj(a,b,c)
887	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
888	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
889#if __ARM_ARCH__>=7
890	@ ldr	r2,[r1],#4			@ 13
891# if 13==15
892	str	r1,[sp,#17*4]			@ make room for r1
893# endif
894	eor	r0,r11,r11,ror#5
895	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
896	eor	r0,r0,r11,ror#19	@ Sigma1(e)
897# ifndef __ARMEB__
898	rev	r2,r2
899# endif
900#else
901	@ ldrb	r2,[r1,#3]			@ 13
902	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
903	ldrb	r3,[r1,#2]
904	ldrb	r0,[r1,#1]
905	orr	r2,r2,r3,lsl#8
906	ldrb	r3,[r1],#4
907	orr	r2,r2,r0,lsl#16
908# if 13==15
909	str	r1,[sp,#17*4]			@ make room for r1
910# endif
911	eor	r0,r11,r11,ror#5
912	orr	r2,r2,r3,lsl#24
913	eor	r0,r0,r11,ror#19	@ Sigma1(e)
914#endif
915	ldr	r3,[r14],#4			@ *K256++
916	add	r6,r6,r2			@ h+=X[i]
917	str	r2,[sp,#13*4]
918	eor	r2,r4,r5
919	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
920	and	r2,r2,r11
921	add	r6,r6,r3			@ h+=K256[i]
922	eor	r2,r2,r5			@ Ch(e,f,g)
923	eor	r0,r7,r7,ror#11
924	add	r6,r6,r2			@ h+=Ch(e,f,g)
925#if 13==31
926	and	r3,r3,#0xff
927	cmp	r3,#0xf2			@ done?
928#endif
929#if 13<15
930# if __ARM_ARCH__>=7
931	ldr	r2,[r1],#4			@ prefetch
932# else
933	ldrb	r2,[r1,#3]
934# endif
935	eor	r3,r7,r8			@ a^b, b^c in next round
936#else
937	ldr	r2,[sp,#15*4]		@ from future BODY_16_xx
938	eor	r3,r7,r8			@ a^b, b^c in next round
939	ldr	r1,[sp,#12*4]	@ from future BODY_16_xx
940#endif
941	eor	r0,r0,r7,ror#20	@ Sigma0(a)
942	and	r12,r12,r3			@ (b^c)&=(a^b)
943	add	r10,r10,r6			@ d+=h
944	eor	r12,r12,r8			@ Maj(a,b,c)
945	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
946	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
947#if __ARM_ARCH__>=7
948	@ ldr	r2,[r1],#4			@ 14
949# if 14==15
950	str	r1,[sp,#17*4]			@ make room for r1
951# endif
952	eor	r0,r10,r10,ror#5
953	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
954	eor	r0,r0,r10,ror#19	@ Sigma1(e)
955# ifndef __ARMEB__
956	rev	r2,r2
957# endif
958#else
959	@ ldrb	r2,[r1,#3]			@ 14
960	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
961	ldrb	r12,[r1,#2]
962	ldrb	r0,[r1,#1]
963	orr	r2,r2,r12,lsl#8
964	ldrb	r12,[r1],#4
965	orr	r2,r2,r0,lsl#16
966# if 14==15
967	str	r1,[sp,#17*4]			@ make room for r1
968# endif
969	eor	r0,r10,r10,ror#5
970	orr	r2,r2,r12,lsl#24
971	eor	r0,r0,r10,ror#19	@ Sigma1(e)
972#endif
973	ldr	r12,[r14],#4			@ *K256++
974	add	r5,r5,r2			@ h+=X[i]
975	str	r2,[sp,#14*4]
976	eor	r2,r11,r4
977	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
978	and	r2,r2,r10
979	add	r5,r5,r12			@ h+=K256[i]
980	eor	r2,r2,r4			@ Ch(e,f,g)
981	eor	r0,r6,r6,ror#11
982	add	r5,r5,r2			@ h+=Ch(e,f,g)
983#if 14==31
984	and	r12,r12,#0xff
985	cmp	r12,#0xf2			@ done?
986#endif
987#if 14<15
988# if __ARM_ARCH__>=7
989	ldr	r2,[r1],#4			@ prefetch
990# else
991	ldrb	r2,[r1,#3]
992# endif
993	eor	r12,r6,r7			@ a^b, b^c in next round
994#else
995	ldr	r2,[sp,#0*4]		@ from future BODY_16_xx
996	eor	r12,r6,r7			@ a^b, b^c in next round
997	ldr	r1,[sp,#13*4]	@ from future BODY_16_xx
998#endif
999	eor	r0,r0,r6,ror#20	@ Sigma0(a)
1000	and	r3,r3,r12			@ (b^c)&=(a^b)
1001	add	r9,r9,r5			@ d+=h
1002	eor	r3,r3,r7			@ Maj(a,b,c)
1003	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
1004	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
1005#if __ARM_ARCH__>=7
1006	@ ldr	r2,[r1],#4			@ 15
1007# if 15==15
1008	str	r1,[sp,#17*4]			@ make room for r1
1009# endif
1010	eor	r0,r9,r9,ror#5
1011	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
1012	eor	r0,r0,r9,ror#19	@ Sigma1(e)
1013# ifndef __ARMEB__
1014	rev	r2,r2
1015# endif
1016#else
1017	@ ldrb	r2,[r1,#3]			@ 15
1018	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
1019	ldrb	r3,[r1,#2]
1020	ldrb	r0,[r1,#1]
1021	orr	r2,r2,r3,lsl#8
1022	ldrb	r3,[r1],#4
1023	orr	r2,r2,r0,lsl#16
1024# if 15==15
1025	str	r1,[sp,#17*4]			@ make room for r1
1026# endif
1027	eor	r0,r9,r9,ror#5
1028	orr	r2,r2,r3,lsl#24
1029	eor	r0,r0,r9,ror#19	@ Sigma1(e)
1030#endif
1031	ldr	r3,[r14],#4			@ *K256++
1032	add	r4,r4,r2			@ h+=X[i]
1033	str	r2,[sp,#15*4]
1034	eor	r2,r10,r11
1035	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
1036	and	r2,r2,r9
1037	add	r4,r4,r3			@ h+=K256[i]
1038	eor	r2,r2,r11			@ Ch(e,f,g)
1039	eor	r0,r5,r5,ror#11
1040	add	r4,r4,r2			@ h+=Ch(e,f,g)
1041#if 15==31
1042	and	r3,r3,#0xff
1043	cmp	r3,#0xf2			@ done?
1044#endif
1045#if 15<15
1046# if __ARM_ARCH__>=7
1047	ldr	r2,[r1],#4			@ prefetch
1048# else
1049	ldrb	r2,[r1,#3]
1050# endif
1051	eor	r3,r5,r6			@ a^b, b^c in next round
1052#else
1053	ldr	r2,[sp,#1*4]		@ from future BODY_16_xx
1054	eor	r3,r5,r6			@ a^b, b^c in next round
1055	ldr	r1,[sp,#14*4]	@ from future BODY_16_xx
1056#endif
1057	eor	r0,r0,r5,ror#20	@ Sigma0(a)
1058	and	r12,r12,r3			@ (b^c)&=(a^b)
1059	add	r8,r8,r4			@ d+=h
1060	eor	r12,r12,r6			@ Maj(a,b,c)
1061	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
1062	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
1063.Lrounds_16_xx:
1064	@ ldr	r2,[sp,#1*4]		@ 16
1065	@ ldr	r1,[sp,#14*4]
1066	mov	r0,r2,ror#7
1067	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
1068	mov	r12,r1,ror#17
1069	eor	r0,r0,r2,ror#18
1070	eor	r12,r12,r1,ror#19
1071	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1072	ldr	r2,[sp,#0*4]
1073	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1074	ldr	r1,[sp,#9*4]
1075
1076	add	r12,r12,r0
1077	eor	r0,r8,r8,ror#5	@ from BODY_00_15
1078	add	r2,r2,r12
1079	eor	r0,r0,r8,ror#19	@ Sigma1(e)
1080	add	r2,r2,r1			@ X[i]
1081	ldr	r12,[r14],#4			@ *K256++
1082	add	r11,r11,r2			@ h+=X[i]
1083	str	r2,[sp,#0*4]
1084	eor	r2,r9,r10
1085	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
1086	and	r2,r2,r8
1087	add	r11,r11,r12			@ h+=K256[i]
1088	eor	r2,r2,r10			@ Ch(e,f,g)
1089	eor	r0,r4,r4,ror#11
1090	add	r11,r11,r2			@ h+=Ch(e,f,g)
1091#if 16==31
1092	and	r12,r12,#0xff
1093	cmp	r12,#0xf2			@ done?
1094#endif
1095#if 16<15
1096# if __ARM_ARCH__>=7
1097	ldr	r2,[r1],#4			@ prefetch
1098# else
1099	ldrb	r2,[r1,#3]
1100# endif
1101	eor	r12,r4,r5			@ a^b, b^c in next round
1102#else
1103	ldr	r2,[sp,#2*4]		@ from future BODY_16_xx
1104	eor	r12,r4,r5			@ a^b, b^c in next round
1105	ldr	r1,[sp,#15*4]	@ from future BODY_16_xx
1106#endif
1107	eor	r0,r0,r4,ror#20	@ Sigma0(a)
1108	and	r3,r3,r12			@ (b^c)&=(a^b)
1109	add	r7,r7,r11			@ d+=h
1110	eor	r3,r3,r5			@ Maj(a,b,c)
1111	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
1112	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
1113	@ ldr	r2,[sp,#2*4]		@ 17
1114	@ ldr	r1,[sp,#15*4]
1115	mov	r0,r2,ror#7
1116	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
1117	mov	r3,r1,ror#17
1118	eor	r0,r0,r2,ror#18
1119	eor	r3,r3,r1,ror#19
1120	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1121	ldr	r2,[sp,#1*4]
1122	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1123	ldr	r1,[sp,#10*4]
1124
1125	add	r3,r3,r0
1126	eor	r0,r7,r7,ror#5	@ from BODY_00_15
1127	add	r2,r2,r3
1128	eor	r0,r0,r7,ror#19	@ Sigma1(e)
1129	add	r2,r2,r1			@ X[i]
1130	ldr	r3,[r14],#4			@ *K256++
1131	add	r10,r10,r2			@ h+=X[i]
1132	str	r2,[sp,#1*4]
1133	eor	r2,r8,r9
1134	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
1135	and	r2,r2,r7
1136	add	r10,r10,r3			@ h+=K256[i]
1137	eor	r2,r2,r9			@ Ch(e,f,g)
1138	eor	r0,r11,r11,ror#11
1139	add	r10,r10,r2			@ h+=Ch(e,f,g)
1140#if 17==31
1141	and	r3,r3,#0xff
1142	cmp	r3,#0xf2			@ done?
1143#endif
1144#if 17<15
1145# if __ARM_ARCH__>=7
1146	ldr	r2,[r1],#4			@ prefetch
1147# else
1148	ldrb	r2,[r1,#3]
1149# endif
1150	eor	r3,r11,r4			@ a^b, b^c in next round
1151#else
1152	ldr	r2,[sp,#3*4]		@ from future BODY_16_xx
1153	eor	r3,r11,r4			@ a^b, b^c in next round
1154	ldr	r1,[sp,#0*4]	@ from future BODY_16_xx
1155#endif
1156	eor	r0,r0,r11,ror#20	@ Sigma0(a)
1157	and	r12,r12,r3			@ (b^c)&=(a^b)
1158	add	r6,r6,r10			@ d+=h
1159	eor	r12,r12,r4			@ Maj(a,b,c)
1160	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
1161	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
1162	@ ldr	r2,[sp,#3*4]		@ 18
1163	@ ldr	r1,[sp,#0*4]
1164	mov	r0,r2,ror#7
1165	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
1166	mov	r12,r1,ror#17
1167	eor	r0,r0,r2,ror#18
1168	eor	r12,r12,r1,ror#19
1169	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1170	ldr	r2,[sp,#2*4]
1171	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1172	ldr	r1,[sp,#11*4]
1173
1174	add	r12,r12,r0
1175	eor	r0,r6,r6,ror#5	@ from BODY_00_15
1176	add	r2,r2,r12
1177	eor	r0,r0,r6,ror#19	@ Sigma1(e)
1178	add	r2,r2,r1			@ X[i]
1179	ldr	r12,[r14],#4			@ *K256++
1180	add	r9,r9,r2			@ h+=X[i]
1181	str	r2,[sp,#2*4]
1182	eor	r2,r7,r8
1183	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
1184	and	r2,r2,r6
1185	add	r9,r9,r12			@ h+=K256[i]
1186	eor	r2,r2,r8			@ Ch(e,f,g)
1187	eor	r0,r10,r10,ror#11
1188	add	r9,r9,r2			@ h+=Ch(e,f,g)
1189#if 18==31
1190	and	r12,r12,#0xff
1191	cmp	r12,#0xf2			@ done?
1192#endif
1193#if 18<15
1194# if __ARM_ARCH__>=7
1195	ldr	r2,[r1],#4			@ prefetch
1196# else
1197	ldrb	r2,[r1,#3]
1198# endif
1199	eor	r12,r10,r11			@ a^b, b^c in next round
1200#else
1201	ldr	r2,[sp,#4*4]		@ from future BODY_16_xx
1202	eor	r12,r10,r11			@ a^b, b^c in next round
1203	ldr	r1,[sp,#1*4]	@ from future BODY_16_xx
1204#endif
1205	eor	r0,r0,r10,ror#20	@ Sigma0(a)
1206	and	r3,r3,r12			@ (b^c)&=(a^b)
1207	add	r5,r5,r9			@ d+=h
1208	eor	r3,r3,r11			@ Maj(a,b,c)
1209	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
1210	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
1211	@ ldr	r2,[sp,#4*4]		@ 19
1212	@ ldr	r1,[sp,#1*4]
1213	mov	r0,r2,ror#7
1214	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
1215	mov	r3,r1,ror#17
1216	eor	r0,r0,r2,ror#18
1217	eor	r3,r3,r1,ror#19
1218	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1219	ldr	r2,[sp,#3*4]
1220	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1221	ldr	r1,[sp,#12*4]
1222
1223	add	r3,r3,r0
1224	eor	r0,r5,r5,ror#5	@ from BODY_00_15
1225	add	r2,r2,r3
1226	eor	r0,r0,r5,ror#19	@ Sigma1(e)
1227	add	r2,r2,r1			@ X[i]
1228	ldr	r3,[r14],#4			@ *K256++
1229	add	r8,r8,r2			@ h+=X[i]
1230	str	r2,[sp,#3*4]
1231	eor	r2,r6,r7
1232	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
1233	and	r2,r2,r5
1234	add	r8,r8,r3			@ h+=K256[i]
1235	eor	r2,r2,r7			@ Ch(e,f,g)
1236	eor	r0,r9,r9,ror#11
1237	add	r8,r8,r2			@ h+=Ch(e,f,g)
1238#if 19==31
1239	and	r3,r3,#0xff
1240	cmp	r3,#0xf2			@ done?
1241#endif
1242#if 19<15
1243# if __ARM_ARCH__>=7
1244	ldr	r2,[r1],#4			@ prefetch
1245# else
1246	ldrb	r2,[r1,#3]
1247# endif
1248	eor	r3,r9,r10			@ a^b, b^c in next round
1249#else
1250	ldr	r2,[sp,#5*4]		@ from future BODY_16_xx
1251	eor	r3,r9,r10			@ a^b, b^c in next round
1252	ldr	r1,[sp,#2*4]	@ from future BODY_16_xx
1253#endif
1254	eor	r0,r0,r9,ror#20	@ Sigma0(a)
1255	and	r12,r12,r3			@ (b^c)&=(a^b)
1256	add	r4,r4,r8			@ d+=h
1257	eor	r12,r12,r10			@ Maj(a,b,c)
1258	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
1259	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
1260	@ ldr	r2,[sp,#5*4]		@ 20
1261	@ ldr	r1,[sp,#2*4]
1262	mov	r0,r2,ror#7
1263	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
1264	mov	r12,r1,ror#17
1265	eor	r0,r0,r2,ror#18
1266	eor	r12,r12,r1,ror#19
1267	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1268	ldr	r2,[sp,#4*4]
1269	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1270	ldr	r1,[sp,#13*4]
1271
1272	add	r12,r12,r0
1273	eor	r0,r4,r4,ror#5	@ from BODY_00_15
1274	add	r2,r2,r12
1275	eor	r0,r0,r4,ror#19	@ Sigma1(e)
1276	add	r2,r2,r1			@ X[i]
1277	ldr	r12,[r14],#4			@ *K256++
1278	add	r7,r7,r2			@ h+=X[i]
1279	str	r2,[sp,#4*4]
1280	eor	r2,r5,r6
1281	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
1282	and	r2,r2,r4
1283	add	r7,r7,r12			@ h+=K256[i]
1284	eor	r2,r2,r6			@ Ch(e,f,g)
1285	eor	r0,r8,r8,ror#11
1286	add	r7,r7,r2			@ h+=Ch(e,f,g)
1287#if 20==31
1288	and	r12,r12,#0xff
1289	cmp	r12,#0xf2			@ done?
1290#endif
1291#if 20<15
1292# if __ARM_ARCH__>=7
1293	ldr	r2,[r1],#4			@ prefetch
1294# else
1295	ldrb	r2,[r1,#3]
1296# endif
1297	eor	r12,r8,r9			@ a^b, b^c in next round
1298#else
1299	ldr	r2,[sp,#6*4]		@ from future BODY_16_xx
1300	eor	r12,r8,r9			@ a^b, b^c in next round
1301	ldr	r1,[sp,#3*4]	@ from future BODY_16_xx
1302#endif
1303	eor	r0,r0,r8,ror#20	@ Sigma0(a)
1304	and	r3,r3,r12			@ (b^c)&=(a^b)
1305	add	r11,r11,r7			@ d+=h
1306	eor	r3,r3,r9			@ Maj(a,b,c)
1307	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
1308	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
1309	@ ldr	r2,[sp,#6*4]		@ 21
1310	@ ldr	r1,[sp,#3*4]
1311	mov	r0,r2,ror#7
1312	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
1313	mov	r3,r1,ror#17
1314	eor	r0,r0,r2,ror#18
1315	eor	r3,r3,r1,ror#19
1316	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1317	ldr	r2,[sp,#5*4]
1318	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1319	ldr	r1,[sp,#14*4]
1320
1321	add	r3,r3,r0
1322	eor	r0,r11,r11,ror#5	@ from BODY_00_15
1323	add	r2,r2,r3
1324	eor	r0,r0,r11,ror#19	@ Sigma1(e)
1325	add	r2,r2,r1			@ X[i]
1326	ldr	r3,[r14],#4			@ *K256++
1327	add	r6,r6,r2			@ h+=X[i]
1328	str	r2,[sp,#5*4]
1329	eor	r2,r4,r5
1330	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
1331	and	r2,r2,r11
1332	add	r6,r6,r3			@ h+=K256[i]
1333	eor	r2,r2,r5			@ Ch(e,f,g)
1334	eor	r0,r7,r7,ror#11
1335	add	r6,r6,r2			@ h+=Ch(e,f,g)
1336#if 21==31
1337	and	r3,r3,#0xff
1338	cmp	r3,#0xf2			@ done?
1339#endif
1340#if 21<15
1341# if __ARM_ARCH__>=7
1342	ldr	r2,[r1],#4			@ prefetch
1343# else
1344	ldrb	r2,[r1,#3]
1345# endif
1346	eor	r3,r7,r8			@ a^b, b^c in next round
1347#else
1348	ldr	r2,[sp,#7*4]		@ from future BODY_16_xx
1349	eor	r3,r7,r8			@ a^b, b^c in next round
1350	ldr	r1,[sp,#4*4]	@ from future BODY_16_xx
1351#endif
1352	eor	r0,r0,r7,ror#20	@ Sigma0(a)
1353	and	r12,r12,r3			@ (b^c)&=(a^b)
1354	add	r10,r10,r6			@ d+=h
1355	eor	r12,r12,r8			@ Maj(a,b,c)
1356	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
1357	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
1358	@ ldr	r2,[sp,#7*4]		@ 22
1359	@ ldr	r1,[sp,#4*4]
1360	mov	r0,r2,ror#7
1361	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
1362	mov	r12,r1,ror#17
1363	eor	r0,r0,r2,ror#18
1364	eor	r12,r12,r1,ror#19
1365	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1366	ldr	r2,[sp,#6*4]
1367	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1368	ldr	r1,[sp,#15*4]
1369
1370	add	r12,r12,r0
1371	eor	r0,r10,r10,ror#5	@ from BODY_00_15
1372	add	r2,r2,r12
1373	eor	r0,r0,r10,ror#19	@ Sigma1(e)
1374	add	r2,r2,r1			@ X[i]
1375	ldr	r12,[r14],#4			@ *K256++
1376	add	r5,r5,r2			@ h+=X[i]
1377	str	r2,[sp,#6*4]
1378	eor	r2,r11,r4
1379	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
1380	and	r2,r2,r10
1381	add	r5,r5,r12			@ h+=K256[i]
1382	eor	r2,r2,r4			@ Ch(e,f,g)
1383	eor	r0,r6,r6,ror#11
1384	add	r5,r5,r2			@ h+=Ch(e,f,g)
1385#if 22==31
1386	and	r12,r12,#0xff
1387	cmp	r12,#0xf2			@ done?
1388#endif
1389#if 22<15
1390# if __ARM_ARCH__>=7
1391	ldr	r2,[r1],#4			@ prefetch
1392# else
1393	ldrb	r2,[r1,#3]
1394# endif
1395	eor	r12,r6,r7			@ a^b, b^c in next round
1396#else
1397	ldr	r2,[sp,#8*4]		@ from future BODY_16_xx
1398	eor	r12,r6,r7			@ a^b, b^c in next round
1399	ldr	r1,[sp,#5*4]	@ from future BODY_16_xx
1400#endif
1401	eor	r0,r0,r6,ror#20	@ Sigma0(a)
1402	and	r3,r3,r12			@ (b^c)&=(a^b)
1403	add	r9,r9,r5			@ d+=h
1404	eor	r3,r3,r7			@ Maj(a,b,c)
1405	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
1406	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
1407	@ ldr	r2,[sp,#8*4]		@ 23
1408	@ ldr	r1,[sp,#5*4]
1409	mov	r0,r2,ror#7
1410	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
1411	mov	r3,r1,ror#17
1412	eor	r0,r0,r2,ror#18
1413	eor	r3,r3,r1,ror#19
1414	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1415	ldr	r2,[sp,#7*4]
1416	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1417	ldr	r1,[sp,#0*4]
1418
1419	add	r3,r3,r0
1420	eor	r0,r9,r9,ror#5	@ from BODY_00_15
1421	add	r2,r2,r3
1422	eor	r0,r0,r9,ror#19	@ Sigma1(e)
1423	add	r2,r2,r1			@ X[i]
1424	ldr	r3,[r14],#4			@ *K256++
1425	add	r4,r4,r2			@ h+=X[i]
1426	str	r2,[sp,#7*4]
1427	eor	r2,r10,r11
1428	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
1429	and	r2,r2,r9
1430	add	r4,r4,r3			@ h+=K256[i]
1431	eor	r2,r2,r11			@ Ch(e,f,g)
1432	eor	r0,r5,r5,ror#11
1433	add	r4,r4,r2			@ h+=Ch(e,f,g)
1434#if 23==31
1435	and	r3,r3,#0xff
1436	cmp	r3,#0xf2			@ done?
1437#endif
1438#if 23<15
1439# if __ARM_ARCH__>=7
1440	ldr	r2,[r1],#4			@ prefetch
1441# else
1442	ldrb	r2,[r1,#3]
1443# endif
1444	eor	r3,r5,r6			@ a^b, b^c in next round
1445#else
1446	ldr	r2,[sp,#9*4]		@ from future BODY_16_xx
1447	eor	r3,r5,r6			@ a^b, b^c in next round
1448	ldr	r1,[sp,#6*4]	@ from future BODY_16_xx
1449#endif
1450	eor	r0,r0,r5,ror#20	@ Sigma0(a)
1451	and	r12,r12,r3			@ (b^c)&=(a^b)
1452	add	r8,r8,r4			@ d+=h
1453	eor	r12,r12,r6			@ Maj(a,b,c)
1454	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
1455	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
1456	@ ldr	r2,[sp,#9*4]		@ 24
1457	@ ldr	r1,[sp,#6*4]
1458	mov	r0,r2,ror#7
1459	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
1460	mov	r12,r1,ror#17
1461	eor	r0,r0,r2,ror#18
1462	eor	r12,r12,r1,ror#19
1463	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1464	ldr	r2,[sp,#8*4]
1465	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1466	ldr	r1,[sp,#1*4]
1467
1468	add	r12,r12,r0
1469	eor	r0,r8,r8,ror#5	@ from BODY_00_15
1470	add	r2,r2,r12
1471	eor	r0,r0,r8,ror#19	@ Sigma1(e)
1472	add	r2,r2,r1			@ X[i]
1473	ldr	r12,[r14],#4			@ *K256++
1474	add	r11,r11,r2			@ h+=X[i]
1475	str	r2,[sp,#8*4]
1476	eor	r2,r9,r10
1477	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
1478	and	r2,r2,r8
1479	add	r11,r11,r12			@ h+=K256[i]
1480	eor	r2,r2,r10			@ Ch(e,f,g)
1481	eor	r0,r4,r4,ror#11
1482	add	r11,r11,r2			@ h+=Ch(e,f,g)
1483#if 24==31
1484	and	r12,r12,#0xff
1485	cmp	r12,#0xf2			@ done?
1486#endif
1487#if 24<15
1488# if __ARM_ARCH__>=7
1489	ldr	r2,[r1],#4			@ prefetch
1490# else
1491	ldrb	r2,[r1,#3]
1492# endif
1493	eor	r12,r4,r5			@ a^b, b^c in next round
1494#else
1495	ldr	r2,[sp,#10*4]		@ from future BODY_16_xx
1496	eor	r12,r4,r5			@ a^b, b^c in next round
1497	ldr	r1,[sp,#7*4]	@ from future BODY_16_xx
1498#endif
1499	eor	r0,r0,r4,ror#20	@ Sigma0(a)
1500	and	r3,r3,r12			@ (b^c)&=(a^b)
1501	add	r7,r7,r11			@ d+=h
1502	eor	r3,r3,r5			@ Maj(a,b,c)
1503	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
1504	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
1505	@ ldr	r2,[sp,#10*4]		@ 25
1506	@ ldr	r1,[sp,#7*4]
1507	mov	r0,r2,ror#7
1508	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
1509	mov	r3,r1,ror#17
1510	eor	r0,r0,r2,ror#18
1511	eor	r3,r3,r1,ror#19
1512	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1513	ldr	r2,[sp,#9*4]
1514	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1515	ldr	r1,[sp,#2*4]
1516
1517	add	r3,r3,r0
1518	eor	r0,r7,r7,ror#5	@ from BODY_00_15
1519	add	r2,r2,r3
1520	eor	r0,r0,r7,ror#19	@ Sigma1(e)
1521	add	r2,r2,r1			@ X[i]
1522	ldr	r3,[r14],#4			@ *K256++
1523	add	r10,r10,r2			@ h+=X[i]
1524	str	r2,[sp,#9*4]
1525	eor	r2,r8,r9
1526	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
1527	and	r2,r2,r7
1528	add	r10,r10,r3			@ h+=K256[i]
1529	eor	r2,r2,r9			@ Ch(e,f,g)
1530	eor	r0,r11,r11,ror#11
1531	add	r10,r10,r2			@ h+=Ch(e,f,g)
1532#if 25==31
1533	and	r3,r3,#0xff
1534	cmp	r3,#0xf2			@ done?
1535#endif
1536#if 25<15
1537# if __ARM_ARCH__>=7
1538	ldr	r2,[r1],#4			@ prefetch
1539# else
1540	ldrb	r2,[r1,#3]
1541# endif
1542	eor	r3,r11,r4			@ a^b, b^c in next round
1543#else
1544	ldr	r2,[sp,#11*4]		@ from future BODY_16_xx
1545	eor	r3,r11,r4			@ a^b, b^c in next round
1546	ldr	r1,[sp,#8*4]	@ from future BODY_16_xx
1547#endif
1548	eor	r0,r0,r11,ror#20	@ Sigma0(a)
1549	and	r12,r12,r3			@ (b^c)&=(a^b)
1550	add	r6,r6,r10			@ d+=h
1551	eor	r12,r12,r4			@ Maj(a,b,c)
1552	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
1553	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
1554	@ ldr	r2,[sp,#11*4]		@ 26
1555	@ ldr	r1,[sp,#8*4]
1556	mov	r0,r2,ror#7
1557	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
1558	mov	r12,r1,ror#17
1559	eor	r0,r0,r2,ror#18
1560	eor	r12,r12,r1,ror#19
1561	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1562	ldr	r2,[sp,#10*4]
1563	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1564	ldr	r1,[sp,#3*4]
1565
1566	add	r12,r12,r0
1567	eor	r0,r6,r6,ror#5	@ from BODY_00_15
1568	add	r2,r2,r12
1569	eor	r0,r0,r6,ror#19	@ Sigma1(e)
1570	add	r2,r2,r1			@ X[i]
1571	ldr	r12,[r14],#4			@ *K256++
1572	add	r9,r9,r2			@ h+=X[i]
1573	str	r2,[sp,#10*4]
1574	eor	r2,r7,r8
1575	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
1576	and	r2,r2,r6
1577	add	r9,r9,r12			@ h+=K256[i]
1578	eor	r2,r2,r8			@ Ch(e,f,g)
1579	eor	r0,r10,r10,ror#11
1580	add	r9,r9,r2			@ h+=Ch(e,f,g)
1581#if 26==31
1582	and	r12,r12,#0xff
1583	cmp	r12,#0xf2			@ done?
1584#endif
1585#if 26<15
1586# if __ARM_ARCH__>=7
1587	ldr	r2,[r1],#4			@ prefetch
1588# else
1589	ldrb	r2,[r1,#3]
1590# endif
1591	eor	r12,r10,r11			@ a^b, b^c in next round
1592#else
1593	ldr	r2,[sp,#12*4]		@ from future BODY_16_xx
1594	eor	r12,r10,r11			@ a^b, b^c in next round
1595	ldr	r1,[sp,#9*4]	@ from future BODY_16_xx
1596#endif
1597	eor	r0,r0,r10,ror#20	@ Sigma0(a)
1598	and	r3,r3,r12			@ (b^c)&=(a^b)
1599	add	r5,r5,r9			@ d+=h
1600	eor	r3,r3,r11			@ Maj(a,b,c)
1601	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
1602	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
1603	@ ldr	r2,[sp,#12*4]		@ 27
1604	@ ldr	r1,[sp,#9*4]
1605	mov	r0,r2,ror#7
1606	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
1607	mov	r3,r1,ror#17
1608	eor	r0,r0,r2,ror#18
1609	eor	r3,r3,r1,ror#19
1610	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1611	ldr	r2,[sp,#11*4]
1612	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1613	ldr	r1,[sp,#4*4]
1614
1615	add	r3,r3,r0
1616	eor	r0,r5,r5,ror#5	@ from BODY_00_15
1617	add	r2,r2,r3
1618	eor	r0,r0,r5,ror#19	@ Sigma1(e)
1619	add	r2,r2,r1			@ X[i]
1620	ldr	r3,[r14],#4			@ *K256++
1621	add	r8,r8,r2			@ h+=X[i]
1622	str	r2,[sp,#11*4]
1623	eor	r2,r6,r7
1624	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
1625	and	r2,r2,r5
1626	add	r8,r8,r3			@ h+=K256[i]
1627	eor	r2,r2,r7			@ Ch(e,f,g)
1628	eor	r0,r9,r9,ror#11
1629	add	r8,r8,r2			@ h+=Ch(e,f,g)
1630#if 27==31
1631	and	r3,r3,#0xff
1632	cmp	r3,#0xf2			@ done?
1633#endif
1634#if 27<15
1635# if __ARM_ARCH__>=7
1636	ldr	r2,[r1],#4			@ prefetch
1637# else
1638	ldrb	r2,[r1,#3]
1639# endif
1640	eor	r3,r9,r10			@ a^b, b^c in next round
1641#else
1642	ldr	r2,[sp,#13*4]		@ from future BODY_16_xx
1643	eor	r3,r9,r10			@ a^b, b^c in next round
1644	ldr	r1,[sp,#10*4]	@ from future BODY_16_xx
1645#endif
1646	eor	r0,r0,r9,ror#20	@ Sigma0(a)
1647	and	r12,r12,r3			@ (b^c)&=(a^b)
1648	add	r4,r4,r8			@ d+=h
1649	eor	r12,r12,r10			@ Maj(a,b,c)
1650	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
1651	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
1652	@ ldr	r2,[sp,#13*4]		@ 28
1653	@ ldr	r1,[sp,#10*4]
1654	mov	r0,r2,ror#7
1655	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
1656	mov	r12,r1,ror#17
1657	eor	r0,r0,r2,ror#18
1658	eor	r12,r12,r1,ror#19
1659	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1660	ldr	r2,[sp,#12*4]
1661	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1662	ldr	r1,[sp,#5*4]
1663
1664	add	r12,r12,r0
1665	eor	r0,r4,r4,ror#5	@ from BODY_00_15
1666	add	r2,r2,r12
1667	eor	r0,r0,r4,ror#19	@ Sigma1(e)
1668	add	r2,r2,r1			@ X[i]
1669	ldr	r12,[r14],#4			@ *K256++
1670	add	r7,r7,r2			@ h+=X[i]
1671	str	r2,[sp,#12*4]
1672	eor	r2,r5,r6
1673	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
1674	and	r2,r2,r4
1675	add	r7,r7,r12			@ h+=K256[i]
1676	eor	r2,r2,r6			@ Ch(e,f,g)
1677	eor	r0,r8,r8,ror#11
1678	add	r7,r7,r2			@ h+=Ch(e,f,g)
1679#if 28==31
1680	and	r12,r12,#0xff
1681	cmp	r12,#0xf2			@ done?
1682#endif
1683#if 28<15
1684# if __ARM_ARCH__>=7
1685	ldr	r2,[r1],#4			@ prefetch
1686# else
1687	ldrb	r2,[r1,#3]
1688# endif
1689	eor	r12,r8,r9			@ a^b, b^c in next round
1690#else
1691	ldr	r2,[sp,#14*4]		@ from future BODY_16_xx
1692	eor	r12,r8,r9			@ a^b, b^c in next round
1693	ldr	r1,[sp,#11*4]	@ from future BODY_16_xx
1694#endif
1695	eor	r0,r0,r8,ror#20	@ Sigma0(a)
1696	and	r3,r3,r12			@ (b^c)&=(a^b)
1697	add	r11,r11,r7			@ d+=h
1698	eor	r3,r3,r9			@ Maj(a,b,c)
1699	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
1700	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
1701	@ ldr	r2,[sp,#14*4]		@ 29
1702	@ ldr	r1,[sp,#11*4]
1703	mov	r0,r2,ror#7
1704	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
1705	mov	r3,r1,ror#17
1706	eor	r0,r0,r2,ror#18
1707	eor	r3,r3,r1,ror#19
1708	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1709	ldr	r2,[sp,#13*4]
1710	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1711	ldr	r1,[sp,#6*4]
1712
1713	add	r3,r3,r0
1714	eor	r0,r11,r11,ror#5	@ from BODY_00_15
1715	add	r2,r2,r3
1716	eor	r0,r0,r11,ror#19	@ Sigma1(e)
1717	add	r2,r2,r1			@ X[i]
1718	ldr	r3,[r14],#4			@ *K256++
1719	add	r6,r6,r2			@ h+=X[i]
1720	str	r2,[sp,#13*4]
1721	eor	r2,r4,r5
1722	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
1723	and	r2,r2,r11
1724	add	r6,r6,r3			@ h+=K256[i]
1725	eor	r2,r2,r5			@ Ch(e,f,g)
1726	eor	r0,r7,r7,ror#11
1727	add	r6,r6,r2			@ h+=Ch(e,f,g)
1728#if 29==31
1729	and	r3,r3,#0xff
1730	cmp	r3,#0xf2			@ done?
1731#endif
1732#if 29<15
1733# if __ARM_ARCH__>=7
1734	ldr	r2,[r1],#4			@ prefetch
1735# else
1736	ldrb	r2,[r1,#3]
1737# endif
1738	eor	r3,r7,r8			@ a^b, b^c in next round
1739#else
1740	ldr	r2,[sp,#15*4]		@ from future BODY_16_xx
1741	eor	r3,r7,r8			@ a^b, b^c in next round
1742	ldr	r1,[sp,#12*4]	@ from future BODY_16_xx
1743#endif
1744	eor	r0,r0,r7,ror#20	@ Sigma0(a)
1745	and	r12,r12,r3			@ (b^c)&=(a^b)
1746	add	r10,r10,r6			@ d+=h
1747	eor	r12,r12,r8			@ Maj(a,b,c)
1748	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
1749	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
1750	@ ldr	r2,[sp,#15*4]		@ 30
1751	@ ldr	r1,[sp,#12*4]
1752	mov	r0,r2,ror#7
1753	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
1754	mov	r12,r1,ror#17
1755	eor	r0,r0,r2,ror#18
1756	eor	r12,r12,r1,ror#19
1757	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1758	ldr	r2,[sp,#14*4]
1759	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1760	ldr	r1,[sp,#7*4]
1761
1762	add	r12,r12,r0
1763	eor	r0,r10,r10,ror#5	@ from BODY_00_15
1764	add	r2,r2,r12
1765	eor	r0,r0,r10,ror#19	@ Sigma1(e)
1766	add	r2,r2,r1			@ X[i]
1767	ldr	r12,[r14],#4			@ *K256++
1768	add	r5,r5,r2			@ h+=X[i]
1769	str	r2,[sp,#14*4]
1770	eor	r2,r11,r4
1771	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
1772	and	r2,r2,r10
1773	add	r5,r5,r12			@ h+=K256[i]
1774	eor	r2,r2,r4			@ Ch(e,f,g)
1775	eor	r0,r6,r6,ror#11
1776	add	r5,r5,r2			@ h+=Ch(e,f,g)
1777#if 30==31
1778	and	r12,r12,#0xff
1779	cmp	r12,#0xf2			@ done?
1780#endif
1781#if 30<15
1782# if __ARM_ARCH__>=7
1783	ldr	r2,[r1],#4			@ prefetch
1784# else
1785	ldrb	r2,[r1,#3]
1786# endif
1787	eor	r12,r6,r7			@ a^b, b^c in next round
1788#else
1789	ldr	r2,[sp,#0*4]		@ from future BODY_16_xx
1790	eor	r12,r6,r7			@ a^b, b^c in next round
1791	ldr	r1,[sp,#13*4]	@ from future BODY_16_xx
1792#endif
1793	eor	r0,r0,r6,ror#20	@ Sigma0(a)
1794	and	r3,r3,r12			@ (b^c)&=(a^b)
1795	add	r9,r9,r5			@ d+=h
1796	eor	r3,r3,r7			@ Maj(a,b,c)
1797	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
1798	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
1799	@ ldr	r2,[sp,#0*4]		@ 31
1800	@ ldr	r1,[sp,#13*4]
1801	mov	r0,r2,ror#7
1802	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
1803	mov	r3,r1,ror#17
1804	eor	r0,r0,r2,ror#18
1805	eor	r3,r3,r1,ror#19
1806	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1807	ldr	r2,[sp,#15*4]
1808	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1809	ldr	r1,[sp,#8*4]
1810
1811	add	r3,r3,r0
1812	eor	r0,r9,r9,ror#5	@ from BODY_00_15
1813	add	r2,r2,r3
1814	eor	r0,r0,r9,ror#19	@ Sigma1(e)
1815	add	r2,r2,r1			@ X[i]
1816	ldr	r3,[r14],#4			@ *K256++
1817	add	r4,r4,r2			@ h+=X[i]
1818	str	r2,[sp,#15*4]
1819	eor	r2,r10,r11
1820	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
1821	and	r2,r2,r9
1822	add	r4,r4,r3			@ h+=K256[i]
1823	eor	r2,r2,r11			@ Ch(e,f,g)
1824	eor	r0,r5,r5,ror#11
1825	add	r4,r4,r2			@ h+=Ch(e,f,g)
1826#if 31==31
1827	and	r3,r3,#0xff
1828	cmp	r3,#0xf2			@ done?
1829#endif
1830#if 31<15
1831# if __ARM_ARCH__>=7
1832	ldr	r2,[r1],#4			@ prefetch
1833# else
1834	ldrb	r2,[r1,#3]
1835# endif
1836	eor	r3,r5,r6			@ a^b, b^c in next round
1837#else
1838	ldr	r2,[sp,#1*4]		@ from future BODY_16_xx
1839	eor	r3,r5,r6			@ a^b, b^c in next round
1840	ldr	r1,[sp,#14*4]	@ from future BODY_16_xx
1841#endif
1842	eor	r0,r0,r5,ror#20	@ Sigma0(a)
1843	and	r12,r12,r3			@ (b^c)&=(a^b)
1844	add	r8,r8,r4			@ d+=h
1845	eor	r12,r12,r6			@ Maj(a,b,c)
1846	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
1847	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
1848#if __ARM_ARCH__>=7
1849	ite	eq			@ Thumb2 thing, sanity check in ARM
1850#endif
1851	ldreq	r3,[sp,#16*4]		@ pull ctx
1852	bne	.Lrounds_16_xx
1853
1854	add	r4,r4,r12		@ h+=Maj(a,b,c) from the past
1855	ldr	r0,[r3,#0]
1856	ldr	r2,[r3,#4]
1857	ldr	r12,[r3,#8]
1858	add	r4,r4,r0
1859	ldr	r0,[r3,#12]
1860	add	r5,r5,r2
1861	ldr	r2,[r3,#16]
1862	add	r6,r6,r12
1863	ldr	r12,[r3,#20]
1864	add	r7,r7,r0
1865	ldr	r0,[r3,#24]
1866	add	r8,r8,r2
1867	ldr	r2,[r3,#28]
1868	add	r9,r9,r12
1869	ldr	r1,[sp,#17*4]		@ pull inp
1870	ldr	r12,[sp,#18*4]		@ pull inp+len
1871	add	r10,r10,r0
1872	add	r11,r11,r2
1873	stmia	r3,{r4,r5,r6,r7,r8,r9,r10,r11}
1874	cmp	r1,r12
1875	sub	r14,r14,#256	@ rewind Ktbl
1876	bne	.Loop
1877
1878	add	sp,sp,#19*4	@ destroy frame
1879#if __ARM_ARCH__>=5
1880	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,pc}
1881#else
1882	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,lr}
1883	tst	lr,#1
1884	moveq	pc,lr			@ be binary compatible with V4, yet
1885.word	0xe12fff1e			@ interoperable with Thumb ISA:-)
1886#endif
1887.size	sha256_block_data_order,.-sha256_block_data_order
1888#if __ARM_MAX_ARCH__>=7
1889.arch	armv7-a
1890.fpu	neon
1891
1892.type	sha256_block_data_order_neon,%function
1893.align	5
1894.skip	16
1895sha256_block_data_order_neon:
1896.LNEON:
1897	stmdb	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr}
1898
1899	sub	r11,sp,#16*4+16
1900	adr	r14,K256
1901	bic	r11,r11,#15		@ align for 128-bit stores
1902	mov	r12,sp
1903	mov	sp,r11			@ alloca
1904	add	r2,r1,r2,lsl#6	@ len to point at the end of inp
1905
1906	vld1.8	{q0},[r1]!
1907	vld1.8	{q1},[r1]!
1908	vld1.8	{q2},[r1]!
1909	vld1.8	{q3},[r1]!
1910	vld1.32	{q8},[r14,:128]!
1911	vld1.32	{q9},[r14,:128]!
1912	vld1.32	{q10},[r14,:128]!
1913	vld1.32	{q11},[r14,:128]!
1914	vrev32.8	q0,q0		@ yes, even on
1915	str	r0,[sp,#64]
1916	vrev32.8	q1,q1		@ big-endian
1917	str	r1,[sp,#68]
1918	mov	r1,sp
1919	vrev32.8	q2,q2
1920	str	r2,[sp,#72]
1921	vrev32.8	q3,q3
1922	str	r12,[sp,#76]		@ save original sp
1923	vadd.i32	q8,q8,q0
1924	vadd.i32	q9,q9,q1
1925	vst1.32	{q8},[r1,:128]!
1926	vadd.i32	q10,q10,q2
1927	vst1.32	{q9},[r1,:128]!
1928	vadd.i32	q11,q11,q3
1929	vst1.32	{q10},[r1,:128]!
1930	vst1.32	{q11},[r1,:128]!
1931
1932	ldmia	r0,{r4,r5,r6,r7,r8,r9,r10,r11}
1933	sub	r1,r1,#64
1934	ldr	r2,[sp,#0]
1935	eor	r12,r12,r12
1936	eor	r3,r5,r6
1937	b	.L_00_48
1938
1939.align	4
1940.L_00_48:
1941	vext.8	q8,q0,q1,#4
1942	add	r11,r11,r2
1943	eor	r2,r9,r10
1944	eor	r0,r8,r8,ror#5
1945	vext.8	q9,q2,q3,#4
1946	add	r4,r4,r12
1947	and	r2,r2,r8
1948	eor	r12,r0,r8,ror#19
1949	vshr.u32	q10,q8,#7
1950	eor	r0,r4,r4,ror#11
1951	eor	r2,r2,r10
1952	vadd.i32	q0,q0,q9
1953	add	r11,r11,r12,ror#6
1954	eor	r12,r4,r5
1955	vshr.u32	q9,q8,#3
1956	eor	r0,r0,r4,ror#20
1957	add	r11,r11,r2
1958	vsli.32	q10,q8,#25
1959	ldr	r2,[sp,#4]
1960	and	r3,r3,r12
1961	vshr.u32	q11,q8,#18
1962	add	r7,r7,r11
1963	add	r11,r11,r0,ror#2
1964	eor	r3,r3,r5
1965	veor	q9,q9,q10
1966	add	r10,r10,r2
1967	vsli.32	q11,q8,#14
1968	eor	r2,r8,r9
1969	eor	r0,r7,r7,ror#5
1970	vshr.u32	d24,d7,#17
1971	add	r11,r11,r3
1972	and	r2,r2,r7
1973	veor	q9,q9,q11
1974	eor	r3,r0,r7,ror#19
1975	eor	r0,r11,r11,ror#11
1976	vsli.32	d24,d7,#15
1977	eor	r2,r2,r9
1978	add	r10,r10,r3,ror#6
1979	vshr.u32	d25,d7,#10
1980	eor	r3,r11,r4
1981	eor	r0,r0,r11,ror#20
1982	vadd.i32	q0,q0,q9
1983	add	r10,r10,r2
1984	ldr	r2,[sp,#8]
1985	veor	d25,d25,d24
1986	and	r12,r12,r3
1987	add	r6,r6,r10
1988	vshr.u32	d24,d7,#19
1989	add	r10,r10,r0,ror#2
1990	eor	r12,r12,r4
1991	vsli.32	d24,d7,#13
1992	add	r9,r9,r2
1993	eor	r2,r7,r8
1994	veor	d25,d25,d24
1995	eor	r0,r6,r6,ror#5
1996	add	r10,r10,r12
1997	vadd.i32	d0,d0,d25
1998	and	r2,r2,r6
1999	eor	r12,r0,r6,ror#19
2000	vshr.u32	d24,d0,#17
2001	eor	r0,r10,r10,ror#11
2002	eor	r2,r2,r8
2003	vsli.32	d24,d0,#15
2004	add	r9,r9,r12,ror#6
2005	eor	r12,r10,r11
2006	vshr.u32	d25,d0,#10
2007	eor	r0,r0,r10,ror#20
2008	add	r9,r9,r2
2009	veor	d25,d25,d24
2010	ldr	r2,[sp,#12]
2011	and	r3,r3,r12
2012	vshr.u32	d24,d0,#19
2013	add	r5,r5,r9
2014	add	r9,r9,r0,ror#2
2015	eor	r3,r3,r11
2016	vld1.32	{q8},[r14,:128]!
2017	add	r8,r8,r2
2018	vsli.32	d24,d0,#13
2019	eor	r2,r6,r7
2020	eor	r0,r5,r5,ror#5
2021	veor	d25,d25,d24
2022	add	r9,r9,r3
2023	and	r2,r2,r5
2024	vadd.i32	d1,d1,d25
2025	eor	r3,r0,r5,ror#19
2026	eor	r0,r9,r9,ror#11
2027	vadd.i32	q8,q8,q0
2028	eor	r2,r2,r7
2029	add	r8,r8,r3,ror#6
2030	eor	r3,r9,r10
2031	eor	r0,r0,r9,ror#20
2032	add	r8,r8,r2
2033	ldr	r2,[sp,#16]
2034	and	r12,r12,r3
2035	add	r4,r4,r8
2036	vst1.32	{q8},[r1,:128]!
2037	add	r8,r8,r0,ror#2
2038	eor	r12,r12,r10
2039	vext.8	q8,q1,q2,#4
2040	add	r7,r7,r2
2041	eor	r2,r5,r6
2042	eor	r0,r4,r4,ror#5
2043	vext.8	q9,q3,q0,#4
2044	add	r8,r8,r12
2045	and	r2,r2,r4
2046	eor	r12,r0,r4,ror#19
2047	vshr.u32	q10,q8,#7
2048	eor	r0,r8,r8,ror#11
2049	eor	r2,r2,r6
2050	vadd.i32	q1,q1,q9
2051	add	r7,r7,r12,ror#6
2052	eor	r12,r8,r9
2053	vshr.u32	q9,q8,#3
2054	eor	r0,r0,r8,ror#20
2055	add	r7,r7,r2
2056	vsli.32	q10,q8,#25
2057	ldr	r2,[sp,#20]
2058	and	r3,r3,r12
2059	vshr.u32	q11,q8,#18
2060	add	r11,r11,r7
2061	add	r7,r7,r0,ror#2
2062	eor	r3,r3,r9
2063	veor	q9,q9,q10
2064	add	r6,r6,r2
2065	vsli.32	q11,q8,#14
2066	eor	r2,r4,r5
2067	eor	r0,r11,r11,ror#5
2068	vshr.u32	d24,d1,#17
2069	add	r7,r7,r3
2070	and	r2,r2,r11
2071	veor	q9,q9,q11
2072	eor	r3,r0,r11,ror#19
2073	eor	r0,r7,r7,ror#11
2074	vsli.32	d24,d1,#15
2075	eor	r2,r2,r5
2076	add	r6,r6,r3,ror#6
2077	vshr.u32	d25,d1,#10
2078	eor	r3,r7,r8
2079	eor	r0,r0,r7,ror#20
2080	vadd.i32	q1,q1,q9
2081	add	r6,r6,r2
2082	ldr	r2,[sp,#24]
2083	veor	d25,d25,d24
2084	and	r12,r12,r3
2085	add	r10,r10,r6
2086	vshr.u32	d24,d1,#19
2087	add	r6,r6,r0,ror#2
2088	eor	r12,r12,r8
2089	vsli.32	d24,d1,#13
2090	add	r5,r5,r2
2091	eor	r2,r11,r4
2092	veor	d25,d25,d24
2093	eor	r0,r10,r10,ror#5
2094	add	r6,r6,r12
2095	vadd.i32	d2,d2,d25
2096	and	r2,r2,r10
2097	eor	r12,r0,r10,ror#19
2098	vshr.u32	d24,d2,#17
2099	eor	r0,r6,r6,ror#11
2100	eor	r2,r2,r4
2101	vsli.32	d24,d2,#15
2102	add	r5,r5,r12,ror#6
2103	eor	r12,r6,r7
2104	vshr.u32	d25,d2,#10
2105	eor	r0,r0,r6,ror#20
2106	add	r5,r5,r2
2107	veor	d25,d25,d24
2108	ldr	r2,[sp,#28]
2109	and	r3,r3,r12
2110	vshr.u32	d24,d2,#19
2111	add	r9,r9,r5
2112	add	r5,r5,r0,ror#2
2113	eor	r3,r3,r7
2114	vld1.32	{q8},[r14,:128]!
2115	add	r4,r4,r2
2116	vsli.32	d24,d2,#13
2117	eor	r2,r10,r11
2118	eor	r0,r9,r9,ror#5
2119	veor	d25,d25,d24
2120	add	r5,r5,r3
2121	and	r2,r2,r9
2122	vadd.i32	d3,d3,d25
2123	eor	r3,r0,r9,ror#19
2124	eor	r0,r5,r5,ror#11
2125	vadd.i32	q8,q8,q1
2126	eor	r2,r2,r11
2127	add	r4,r4,r3,ror#6
2128	eor	r3,r5,r6
2129	eor	r0,r0,r5,ror#20
2130	add	r4,r4,r2
2131	ldr	r2,[sp,#32]
2132	and	r12,r12,r3
2133	add	r8,r8,r4
2134	vst1.32	{q8},[r1,:128]!
2135	add	r4,r4,r0,ror#2
2136	eor	r12,r12,r6
2137	vext.8	q8,q2,q3,#4
2138	add	r11,r11,r2
2139	eor	r2,r9,r10
2140	eor	r0,r8,r8,ror#5
2141	vext.8	q9,q0,q1,#4
2142	add	r4,r4,r12
2143	and	r2,r2,r8
2144	eor	r12,r0,r8,ror#19
2145	vshr.u32	q10,q8,#7
2146	eor	r0,r4,r4,ror#11
2147	eor	r2,r2,r10
2148	vadd.i32	q2,q2,q9
2149	add	r11,r11,r12,ror#6
2150	eor	r12,r4,r5
2151	vshr.u32	q9,q8,#3
2152	eor	r0,r0,r4,ror#20
2153	add	r11,r11,r2
2154	vsli.32	q10,q8,#25
2155	ldr	r2,[sp,#36]
2156	and	r3,r3,r12
2157	vshr.u32	q11,q8,#18
2158	add	r7,r7,r11
2159	add	r11,r11,r0,ror#2
2160	eor	r3,r3,r5
2161	veor	q9,q9,q10
2162	add	r10,r10,r2
2163	vsli.32	q11,q8,#14
2164	eor	r2,r8,r9
2165	eor	r0,r7,r7,ror#5
2166	vshr.u32	d24,d3,#17
2167	add	r11,r11,r3
2168	and	r2,r2,r7
2169	veor	q9,q9,q11
2170	eor	r3,r0,r7,ror#19
2171	eor	r0,r11,r11,ror#11
2172	vsli.32	d24,d3,#15
2173	eor	r2,r2,r9
2174	add	r10,r10,r3,ror#6
2175	vshr.u32	d25,d3,#10
2176	eor	r3,r11,r4
2177	eor	r0,r0,r11,ror#20
2178	vadd.i32	q2,q2,q9
2179	add	r10,r10,r2
2180	ldr	r2,[sp,#40]
2181	veor	d25,d25,d24
2182	and	r12,r12,r3
2183	add	r6,r6,r10
2184	vshr.u32	d24,d3,#19
2185	add	r10,r10,r0,ror#2
2186	eor	r12,r12,r4
2187	vsli.32	d24,d3,#13
2188	add	r9,r9,r2
2189	eor	r2,r7,r8
2190	veor	d25,d25,d24
2191	eor	r0,r6,r6,ror#5
2192	add	r10,r10,r12
2193	vadd.i32	d4,d4,d25
2194	and	r2,r2,r6
2195	eor	r12,r0,r6,ror#19
2196	vshr.u32	d24,d4,#17
2197	eor	r0,r10,r10,ror#11
2198	eor	r2,r2,r8
2199	vsli.32	d24,d4,#15
2200	add	r9,r9,r12,ror#6
2201	eor	r12,r10,r11
2202	vshr.u32	d25,d4,#10
2203	eor	r0,r0,r10,ror#20
2204	add	r9,r9,r2
2205	veor	d25,d25,d24
2206	ldr	r2,[sp,#44]
2207	and	r3,r3,r12
2208	vshr.u32	d24,d4,#19
2209	add	r5,r5,r9
2210	add	r9,r9,r0,ror#2
2211	eor	r3,r3,r11
2212	vld1.32	{q8},[r14,:128]!
2213	add	r8,r8,r2
2214	vsli.32	d24,d4,#13
2215	eor	r2,r6,r7
2216	eor	r0,r5,r5,ror#5
2217	veor	d25,d25,d24
2218	add	r9,r9,r3
2219	and	r2,r2,r5
2220	vadd.i32	d5,d5,d25
2221	eor	r3,r0,r5,ror#19
2222	eor	r0,r9,r9,ror#11
2223	vadd.i32	q8,q8,q2
2224	eor	r2,r2,r7
2225	add	r8,r8,r3,ror#6
2226	eor	r3,r9,r10
2227	eor	r0,r0,r9,ror#20
2228	add	r8,r8,r2
2229	ldr	r2,[sp,#48]
2230	and	r12,r12,r3
2231	add	r4,r4,r8
2232	vst1.32	{q8},[r1,:128]!
2233	add	r8,r8,r0,ror#2
2234	eor	r12,r12,r10
2235	vext.8	q8,q3,q0,#4
2236	add	r7,r7,r2
2237	eor	r2,r5,r6
2238	eor	r0,r4,r4,ror#5
2239	vext.8	q9,q1,q2,#4
2240	add	r8,r8,r12
2241	and	r2,r2,r4
2242	eor	r12,r0,r4,ror#19
2243	vshr.u32	q10,q8,#7
2244	eor	r0,r8,r8,ror#11
2245	eor	r2,r2,r6
2246	vadd.i32	q3,q3,q9
2247	add	r7,r7,r12,ror#6
2248	eor	r12,r8,r9
2249	vshr.u32	q9,q8,#3
2250	eor	r0,r0,r8,ror#20
2251	add	r7,r7,r2
2252	vsli.32	q10,q8,#25
2253	ldr	r2,[sp,#52]
2254	and	r3,r3,r12
2255	vshr.u32	q11,q8,#18
2256	add	r11,r11,r7
2257	add	r7,r7,r0,ror#2
2258	eor	r3,r3,r9
2259	veor	q9,q9,q10
2260	add	r6,r6,r2
2261	vsli.32	q11,q8,#14
2262	eor	r2,r4,r5
2263	eor	r0,r11,r11,ror#5
2264	vshr.u32	d24,d5,#17
2265	add	r7,r7,r3
2266	and	r2,r2,r11
2267	veor	q9,q9,q11
2268	eor	r3,r0,r11,ror#19
2269	eor	r0,r7,r7,ror#11
2270	vsli.32	d24,d5,#15
2271	eor	r2,r2,r5
2272	add	r6,r6,r3,ror#6
2273	vshr.u32	d25,d5,#10
2274	eor	r3,r7,r8
2275	eor	r0,r0,r7,ror#20
2276	vadd.i32	q3,q3,q9
2277	add	r6,r6,r2
2278	ldr	r2,[sp,#56]
2279	veor	d25,d25,d24
2280	and	r12,r12,r3
2281	add	r10,r10,r6
2282	vshr.u32	d24,d5,#19
2283	add	r6,r6,r0,ror#2
2284	eor	r12,r12,r8
2285	vsli.32	d24,d5,#13
2286	add	r5,r5,r2
2287	eor	r2,r11,r4
2288	veor	d25,d25,d24
2289	eor	r0,r10,r10,ror#5
2290	add	r6,r6,r12
2291	vadd.i32	d6,d6,d25
2292	and	r2,r2,r10
2293	eor	r12,r0,r10,ror#19
2294	vshr.u32	d24,d6,#17
2295	eor	r0,r6,r6,ror#11
2296	eor	r2,r2,r4
2297	vsli.32	d24,d6,#15
2298	add	r5,r5,r12,ror#6
2299	eor	r12,r6,r7
2300	vshr.u32	d25,d6,#10
2301	eor	r0,r0,r6,ror#20
2302	add	r5,r5,r2
2303	veor	d25,d25,d24
2304	ldr	r2,[sp,#60]
2305	and	r3,r3,r12
2306	vshr.u32	d24,d6,#19
2307	add	r9,r9,r5
2308	add	r5,r5,r0,ror#2
2309	eor	r3,r3,r7
2310	vld1.32	{q8},[r14,:128]!
2311	add	r4,r4,r2
2312	vsli.32	d24,d6,#13
2313	eor	r2,r10,r11
2314	eor	r0,r9,r9,ror#5
2315	veor	d25,d25,d24
2316	add	r5,r5,r3
2317	and	r2,r2,r9
2318	vadd.i32	d7,d7,d25
2319	eor	r3,r0,r9,ror#19
2320	eor	r0,r5,r5,ror#11
2321	vadd.i32	q8,q8,q3
2322	eor	r2,r2,r11
2323	add	r4,r4,r3,ror#6
2324	eor	r3,r5,r6
2325	eor	r0,r0,r5,ror#20
2326	add	r4,r4,r2
2327	ldr	r2,[r14]
2328	and	r12,r12,r3
2329	add	r8,r8,r4
2330	vst1.32	{q8},[r1,:128]!
2331	add	r4,r4,r0,ror#2
2332	eor	r12,r12,r6
2333	teq	r2,#0				@ check for K256 terminator
2334	ldr	r2,[sp,#0]
2335	sub	r1,r1,#64
2336	bne	.L_00_48
2337
2338	ldr	r1,[sp,#68]
2339	ldr	r0,[sp,#72]
2340	sub	r14,r14,#256	@ rewind r14
2341	teq	r1,r0
2342	it	eq
2343	subeq	r1,r1,#64		@ avoid SEGV
2344	vld1.8	{q0},[r1]!		@ load next input block
2345	vld1.8	{q1},[r1]!
2346	vld1.8	{q2},[r1]!
2347	vld1.8	{q3},[r1]!
2348	it	ne
2349	strne	r1,[sp,#68]
2350	mov	r1,sp
2351	add	r11,r11,r2
2352	eor	r2,r9,r10
2353	eor	r0,r8,r8,ror#5
2354	add	r4,r4,r12
2355	vld1.32	{q8},[r14,:128]!
2356	and	r2,r2,r8
2357	eor	r12,r0,r8,ror#19
2358	eor	r0,r4,r4,ror#11
2359	eor	r2,r2,r10
2360	vrev32.8	q0,q0
2361	add	r11,r11,r12,ror#6
2362	eor	r12,r4,r5
2363	eor	r0,r0,r4,ror#20
2364	add	r11,r11,r2
2365	vadd.i32	q8,q8,q0
2366	ldr	r2,[sp,#4]
2367	and	r3,r3,r12
2368	add	r7,r7,r11
2369	add	r11,r11,r0,ror#2
2370	eor	r3,r3,r5
2371	add	r10,r10,r2
2372	eor	r2,r8,r9
2373	eor	r0,r7,r7,ror#5
2374	add	r11,r11,r3
2375	and	r2,r2,r7
2376	eor	r3,r0,r7,ror#19
2377	eor	r0,r11,r11,ror#11
2378	eor	r2,r2,r9
2379	add	r10,r10,r3,ror#6
2380	eor	r3,r11,r4
2381	eor	r0,r0,r11,ror#20
2382	add	r10,r10,r2
2383	ldr	r2,[sp,#8]
2384	and	r12,r12,r3
2385	add	r6,r6,r10
2386	add	r10,r10,r0,ror#2
2387	eor	r12,r12,r4
2388	add	r9,r9,r2
2389	eor	r2,r7,r8
2390	eor	r0,r6,r6,ror#5
2391	add	r10,r10,r12
2392	and	r2,r2,r6
2393	eor	r12,r0,r6,ror#19
2394	eor	r0,r10,r10,ror#11
2395	eor	r2,r2,r8
2396	add	r9,r9,r12,ror#6
2397	eor	r12,r10,r11
2398	eor	r0,r0,r10,ror#20
2399	add	r9,r9,r2
2400	ldr	r2,[sp,#12]
2401	and	r3,r3,r12
2402	add	r5,r5,r9
2403	add	r9,r9,r0,ror#2
2404	eor	r3,r3,r11
2405	add	r8,r8,r2
2406	eor	r2,r6,r7
2407	eor	r0,r5,r5,ror#5
2408	add	r9,r9,r3
2409	and	r2,r2,r5
2410	eor	r3,r0,r5,ror#19
2411	eor	r0,r9,r9,ror#11
2412	eor	r2,r2,r7
2413	add	r8,r8,r3,ror#6
2414	eor	r3,r9,r10
2415	eor	r0,r0,r9,ror#20
2416	add	r8,r8,r2
2417	ldr	r2,[sp,#16]
2418	and	r12,r12,r3
2419	add	r4,r4,r8
2420	add	r8,r8,r0,ror#2
2421	eor	r12,r12,r10
2422	vst1.32	{q8},[r1,:128]!
2423	add	r7,r7,r2
2424	eor	r2,r5,r6
2425	eor	r0,r4,r4,ror#5
2426	add	r8,r8,r12
2427	vld1.32	{q8},[r14,:128]!
2428	and	r2,r2,r4
2429	eor	r12,r0,r4,ror#19
2430	eor	r0,r8,r8,ror#11
2431	eor	r2,r2,r6
2432	vrev32.8	q1,q1
2433	add	r7,r7,r12,ror#6
2434	eor	r12,r8,r9
2435	eor	r0,r0,r8,ror#20
2436	add	r7,r7,r2
2437	vadd.i32	q8,q8,q1
2438	ldr	r2,[sp,#20]
2439	and	r3,r3,r12
2440	add	r11,r11,r7
2441	add	r7,r7,r0,ror#2
2442	eor	r3,r3,r9
2443	add	r6,r6,r2
2444	eor	r2,r4,r5
2445	eor	r0,r11,r11,ror#5
2446	add	r7,r7,r3
2447	and	r2,r2,r11
2448	eor	r3,r0,r11,ror#19
2449	eor	r0,r7,r7,ror#11
2450	eor	r2,r2,r5
2451	add	r6,r6,r3,ror#6
2452	eor	r3,r7,r8
2453	eor	r0,r0,r7,ror#20
2454	add	r6,r6,r2
2455	ldr	r2,[sp,#24]
2456	and	r12,r12,r3
2457	add	r10,r10,r6
2458	add	r6,r6,r0,ror#2
2459	eor	r12,r12,r8
2460	add	r5,r5,r2
2461	eor	r2,r11,r4
2462	eor	r0,r10,r10,ror#5
2463	add	r6,r6,r12
2464	and	r2,r2,r10
2465	eor	r12,r0,r10,ror#19
2466	eor	r0,r6,r6,ror#11
2467	eor	r2,r2,r4
2468	add	r5,r5,r12,ror#6
2469	eor	r12,r6,r7
2470	eor	r0,r0,r6,ror#20
2471	add	r5,r5,r2
2472	ldr	r2,[sp,#28]
2473	and	r3,r3,r12
2474	add	r9,r9,r5
2475	add	r5,r5,r0,ror#2
2476	eor	r3,r3,r7
2477	add	r4,r4,r2
2478	eor	r2,r10,r11
2479	eor	r0,r9,r9,ror#5
2480	add	r5,r5,r3
2481	and	r2,r2,r9
2482	eor	r3,r0,r9,ror#19
2483	eor	r0,r5,r5,ror#11
2484	eor	r2,r2,r11
2485	add	r4,r4,r3,ror#6
2486	eor	r3,r5,r6
2487	eor	r0,r0,r5,ror#20
2488	add	r4,r4,r2
2489	ldr	r2,[sp,#32]
2490	and	r12,r12,r3
2491	add	r8,r8,r4
2492	add	r4,r4,r0,ror#2
2493	eor	r12,r12,r6
2494	vst1.32	{q8},[r1,:128]!
2495	add	r11,r11,r2
2496	eor	r2,r9,r10
2497	eor	r0,r8,r8,ror#5
2498	add	r4,r4,r12
2499	vld1.32	{q8},[r14,:128]!
2500	and	r2,r2,r8
2501	eor	r12,r0,r8,ror#19
2502	eor	r0,r4,r4,ror#11
2503	eor	r2,r2,r10
2504	vrev32.8	q2,q2
2505	add	r11,r11,r12,ror#6
2506	eor	r12,r4,r5
2507	eor	r0,r0,r4,ror#20
2508	add	r11,r11,r2
2509	vadd.i32	q8,q8,q2
2510	ldr	r2,[sp,#36]
2511	and	r3,r3,r12
2512	add	r7,r7,r11
2513	add	r11,r11,r0,ror#2
2514	eor	r3,r3,r5
2515	add	r10,r10,r2
2516	eor	r2,r8,r9
2517	eor	r0,r7,r7,ror#5
2518	add	r11,r11,r3
2519	and	r2,r2,r7
2520	eor	r3,r0,r7,ror#19
2521	eor	r0,r11,r11,ror#11
2522	eor	r2,r2,r9
2523	add	r10,r10,r3,ror#6
2524	eor	r3,r11,r4
2525	eor	r0,r0,r11,ror#20
2526	add	r10,r10,r2
2527	ldr	r2,[sp,#40]
2528	and	r12,r12,r3
2529	add	r6,r6,r10
2530	add	r10,r10,r0,ror#2
2531	eor	r12,r12,r4
2532	add	r9,r9,r2
2533	eor	r2,r7,r8
2534	eor	r0,r6,r6,ror#5
2535	add	r10,r10,r12
2536	and	r2,r2,r6
2537	eor	r12,r0,r6,ror#19
2538	eor	r0,r10,r10,ror#11
2539	eor	r2,r2,r8
2540	add	r9,r9,r12,ror#6
2541	eor	r12,r10,r11
2542	eor	r0,r0,r10,ror#20
2543	add	r9,r9,r2
2544	ldr	r2,[sp,#44]
2545	and	r3,r3,r12
2546	add	r5,r5,r9
2547	add	r9,r9,r0,ror#2
2548	eor	r3,r3,r11
2549	add	r8,r8,r2
2550	eor	r2,r6,r7
2551	eor	r0,r5,r5,ror#5
2552	add	r9,r9,r3
2553	and	r2,r2,r5
2554	eor	r3,r0,r5,ror#19
2555	eor	r0,r9,r9,ror#11
2556	eor	r2,r2,r7
2557	add	r8,r8,r3,ror#6
2558	eor	r3,r9,r10
2559	eor	r0,r0,r9,ror#20
2560	add	r8,r8,r2
2561	ldr	r2,[sp,#48]
2562	and	r12,r12,r3
2563	add	r4,r4,r8
2564	add	r8,r8,r0,ror#2
2565	eor	r12,r12,r10
2566	vst1.32	{q8},[r1,:128]!
2567	add	r7,r7,r2
2568	eor	r2,r5,r6
2569	eor	r0,r4,r4,ror#5
2570	add	r8,r8,r12
2571	vld1.32	{q8},[r14,:128]!
2572	and	r2,r2,r4
2573	eor	r12,r0,r4,ror#19
2574	eor	r0,r8,r8,ror#11
2575	eor	r2,r2,r6
2576	vrev32.8	q3,q3
2577	add	r7,r7,r12,ror#6
2578	eor	r12,r8,r9
2579	eor	r0,r0,r8,ror#20
2580	add	r7,r7,r2
2581	vadd.i32	q8,q8,q3
2582	ldr	r2,[sp,#52]
2583	and	r3,r3,r12
2584	add	r11,r11,r7
2585	add	r7,r7,r0,ror#2
2586	eor	r3,r3,r9
2587	add	r6,r6,r2
2588	eor	r2,r4,r5
2589	eor	r0,r11,r11,ror#5
2590	add	r7,r7,r3
2591	and	r2,r2,r11
2592	eor	r3,r0,r11,ror#19
2593	eor	r0,r7,r7,ror#11
2594	eor	r2,r2,r5
2595	add	r6,r6,r3,ror#6
2596	eor	r3,r7,r8
2597	eor	r0,r0,r7,ror#20
2598	add	r6,r6,r2
2599	ldr	r2,[sp,#56]
2600	and	r12,r12,r3
2601	add	r10,r10,r6
2602	add	r6,r6,r0,ror#2
2603	eor	r12,r12,r8
2604	add	r5,r5,r2
2605	eor	r2,r11,r4
2606	eor	r0,r10,r10,ror#5
2607	add	r6,r6,r12
2608	and	r2,r2,r10
2609	eor	r12,r0,r10,ror#19
2610	eor	r0,r6,r6,ror#11
2611	eor	r2,r2,r4
2612	add	r5,r5,r12,ror#6
2613	eor	r12,r6,r7
2614	eor	r0,r0,r6,ror#20
2615	add	r5,r5,r2
2616	ldr	r2,[sp,#60]
2617	and	r3,r3,r12
2618	add	r9,r9,r5
2619	add	r5,r5,r0,ror#2
2620	eor	r3,r3,r7
2621	add	r4,r4,r2
2622	eor	r2,r10,r11
2623	eor	r0,r9,r9,ror#5
2624	add	r5,r5,r3
2625	and	r2,r2,r9
2626	eor	r3,r0,r9,ror#19
2627	eor	r0,r5,r5,ror#11
2628	eor	r2,r2,r11
2629	add	r4,r4,r3,ror#6
2630	eor	r3,r5,r6
2631	eor	r0,r0,r5,ror#20
2632	add	r4,r4,r2
2633	ldr	r2,[sp,#64]
2634	and	r12,r12,r3
2635	add	r8,r8,r4
2636	add	r4,r4,r0,ror#2
2637	eor	r12,r12,r6
2638	vst1.32	{q8},[r1,:128]!
2639	ldr	r0,[r2,#0]
2640	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
2641	ldr	r12,[r2,#4]
2642	ldr	r3,[r2,#8]
2643	ldr	r1,[r2,#12]
2644	add	r4,r4,r0			@ accumulate
2645	ldr	r0,[r2,#16]
2646	add	r5,r5,r12
2647	ldr	r12,[r2,#20]
2648	add	r6,r6,r3
2649	ldr	r3,[r2,#24]
2650	add	r7,r7,r1
2651	ldr	r1,[r2,#28]
2652	add	r8,r8,r0
2653	str	r4,[r2],#4
2654	add	r9,r9,r12
2655	str	r5,[r2],#4
2656	add	r10,r10,r3
2657	str	r6,[r2],#4
2658	add	r11,r11,r1
2659	str	r7,[r2],#4
2660	stmia	r2,{r8,r9,r10,r11}
2661
2662	ittte	ne
2663	movne	r1,sp
2664	ldrne	r2,[sp,#0]
2665	eorne	r12,r12,r12
2666	ldreq	sp,[sp,#76]			@ restore original sp
2667	itt	ne
2668	eorne	r3,r5,r6
2669	bne	.L_00_48
2670
2671	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,pc}
2672.size	sha256_block_data_order_neon,.-sha256_block_data_order_neon
2673#endif
2674#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
2675
2676# if defined(__thumb2__)
2677#  define INST(a,b,c,d)	.byte	c,d|0xc,a,b
2678# else
2679#  define INST(a,b,c,d)	.byte	a,b,c,d
2680# endif
2681
2682.type	sha256_block_data_order_armv8,%function
2683.align	5
2684sha256_block_data_order_armv8:
2685.LARMv8:
2686	vld1.32	{q0,q1},[r0]
2687	sub	r3,r3,#256+32
2688	add	r2,r1,r2,lsl#6	@ len to point at the end of inp
2689	b	.Loop_v8
2690
2691.align	4
2692.Loop_v8:
2693	vld1.8	{q8,q9},[r1]!
2694	vld1.8	{q10,q11},[r1]!
2695	vld1.32	{q12},[r3]!
2696	vrev32.8	q8,q8
2697	vrev32.8	q9,q9
2698	vrev32.8	q10,q10
2699	vrev32.8	q11,q11
2700	vmov	q14,q0	@ offload
2701	vmov	q15,q1
2702	teq	r1,r2
2703	vld1.32	{q13},[r3]!
2704	vadd.i32	q12,q12,q8
2705	INST(0xe2,0x03,0xfa,0xf3)	@ sha256su0 q8,q9
2706	vmov	q2,q0
2707	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2708	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2709	INST(0xe6,0x0c,0x64,0xf3)	@ sha256su1 q8,q10,q11
2710	vld1.32	{q12},[r3]!
2711	vadd.i32	q13,q13,q9
2712	INST(0xe4,0x23,0xfa,0xf3)	@ sha256su0 q9,q10
2713	vmov	q2,q0
2714	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2715	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2716	INST(0xe0,0x2c,0x66,0xf3)	@ sha256su1 q9,q11,q8
2717	vld1.32	{q13},[r3]!
2718	vadd.i32	q12,q12,q10
2719	INST(0xe6,0x43,0xfa,0xf3)	@ sha256su0 q10,q11
2720	vmov	q2,q0
2721	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2722	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2723	INST(0xe2,0x4c,0x60,0xf3)	@ sha256su1 q10,q8,q9
2724	vld1.32	{q12},[r3]!
2725	vadd.i32	q13,q13,q11
2726	INST(0xe0,0x63,0xfa,0xf3)	@ sha256su0 q11,q8
2727	vmov	q2,q0
2728	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2729	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2730	INST(0xe4,0x6c,0x62,0xf3)	@ sha256su1 q11,q9,q10
2731	vld1.32	{q13},[r3]!
2732	vadd.i32	q12,q12,q8
2733	INST(0xe2,0x03,0xfa,0xf3)	@ sha256su0 q8,q9
2734	vmov	q2,q0
2735	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2736	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2737	INST(0xe6,0x0c,0x64,0xf3)	@ sha256su1 q8,q10,q11
2738	vld1.32	{q12},[r3]!
2739	vadd.i32	q13,q13,q9
2740	INST(0xe4,0x23,0xfa,0xf3)	@ sha256su0 q9,q10
2741	vmov	q2,q0
2742	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2743	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2744	INST(0xe0,0x2c,0x66,0xf3)	@ sha256su1 q9,q11,q8
2745	vld1.32	{q13},[r3]!
2746	vadd.i32	q12,q12,q10
2747	INST(0xe6,0x43,0xfa,0xf3)	@ sha256su0 q10,q11
2748	vmov	q2,q0
2749	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2750	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2751	INST(0xe2,0x4c,0x60,0xf3)	@ sha256su1 q10,q8,q9
2752	vld1.32	{q12},[r3]!
2753	vadd.i32	q13,q13,q11
2754	INST(0xe0,0x63,0xfa,0xf3)	@ sha256su0 q11,q8
2755	vmov	q2,q0
2756	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2757	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2758	INST(0xe4,0x6c,0x62,0xf3)	@ sha256su1 q11,q9,q10
2759	vld1.32	{q13},[r3]!
2760	vadd.i32	q12,q12,q8
2761	INST(0xe2,0x03,0xfa,0xf3)	@ sha256su0 q8,q9
2762	vmov	q2,q0
2763	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2764	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2765	INST(0xe6,0x0c,0x64,0xf3)	@ sha256su1 q8,q10,q11
2766	vld1.32	{q12},[r3]!
2767	vadd.i32	q13,q13,q9
2768	INST(0xe4,0x23,0xfa,0xf3)	@ sha256su0 q9,q10
2769	vmov	q2,q0
2770	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2771	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2772	INST(0xe0,0x2c,0x66,0xf3)	@ sha256su1 q9,q11,q8
2773	vld1.32	{q13},[r3]!
2774	vadd.i32	q12,q12,q10
2775	INST(0xe6,0x43,0xfa,0xf3)	@ sha256su0 q10,q11
2776	vmov	q2,q0
2777	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2778	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2779	INST(0xe2,0x4c,0x60,0xf3)	@ sha256su1 q10,q8,q9
2780	vld1.32	{q12},[r3]!
2781	vadd.i32	q13,q13,q11
2782	INST(0xe0,0x63,0xfa,0xf3)	@ sha256su0 q11,q8
2783	vmov	q2,q0
2784	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2785	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2786	INST(0xe4,0x6c,0x62,0xf3)	@ sha256su1 q11,q9,q10
2787	vld1.32	{q13},[r3]!
2788	vadd.i32	q12,q12,q8
2789	vmov	q2,q0
2790	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2791	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2792
2793	vld1.32	{q12},[r3]!
2794	vadd.i32	q13,q13,q9
2795	vmov	q2,q0
2796	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2797	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2798
2799	vld1.32	{q13},[r3]
2800	vadd.i32	q12,q12,q10
2801	sub	r3,r3,#256-16	@ rewind
2802	vmov	q2,q0
2803	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2804	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2805
2806	vadd.i32	q13,q13,q11
2807	vmov	q2,q0
2808	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2809	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2810
2811	vadd.i32	q0,q0,q14
2812	vadd.i32	q1,q1,q15
2813	it	ne
2814	bne	.Loop_v8
2815
2816	vst1.32	{q0,q1},[r0]
2817
2818	bx	lr		@ bx lr
2819.size	sha256_block_data_order_armv8,.-sha256_block_data_order_armv8
2820#endif
2821.byte	83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,52,47,78,69,79,78,47,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
2822.align	2
2823#endif  // !OPENSSL_NO_ASM && defined(OPENSSL_ARM) && defined(__ELF__)
2824