1// Original source:
2//	http://www.zorinaq.com/papers/md5-amd64.html
3//	http://www.zorinaq.com/papers/md5-amd64.tar.bz2
4//
5// MD5 adapted for s390x using Go's assembler for
6// s390x, based on md5block_amd64.s implementation by
7// the Go authors.
8//
9// Author: Marc Bevand <bevand_m (at) epita.fr>
10// Licence: I hereby disclaim the copyright on this code and place it
11// in the public domain.
12
13//go:build !purego
14
15#include "textflag.h"
16
17// func block(dig *digest, p []byte)
18TEXT ·block(SB),NOSPLIT,$16-32
19	MOVD	dig+0(FP), R1
20	MOVD	p+8(FP), R6
21	MOVD	p_len+16(FP), R5
22	AND	$-64, R5
23	LAY	(R6)(R5*1), R7
24
25	LMY	0(R1), R2, R5
26	CMPBEQ	R6, R7, end
27
28loop:
29	STMY	R2, R5, tmp-16(SP)
30
31	MOVWBR	0(R6), R8
32	MOVWZ	R5, R9
33
34#define ROUND1(a, b, c, d, index, const, shift) \
35	XOR	c, R9; \
36	ADD	$const, a; \
37	ADD	R8, a; \
38	MOVWBR	(index*4)(R6), R8; \
39	AND	b, R9; \
40	XOR	d, R9; \
41	ADD	R9, a; \
42	RLL	$shift, a; \
43	MOVWZ	c, R9; \
44	ADD	b, a
45
46	ROUND1(R2,R3,R4,R5, 1,0xd76aa478, 7);
47	ROUND1(R5,R2,R3,R4, 2,0xe8c7b756,12);
48	ROUND1(R4,R5,R2,R3, 3,0x242070db,17);
49	ROUND1(R3,R4,R5,R2, 4,0xc1bdceee,22);
50	ROUND1(R2,R3,R4,R5, 5,0xf57c0faf, 7);
51	ROUND1(R5,R2,R3,R4, 6,0x4787c62a,12);
52	ROUND1(R4,R5,R2,R3, 7,0xa8304613,17);
53	ROUND1(R3,R4,R5,R2, 8,0xfd469501,22);
54	ROUND1(R2,R3,R4,R5, 9,0x698098d8, 7);
55	ROUND1(R5,R2,R3,R4,10,0x8b44f7af,12);
56	ROUND1(R4,R5,R2,R3,11,0xffff5bb1,17);
57	ROUND1(R3,R4,R5,R2,12,0x895cd7be,22);
58	ROUND1(R2,R3,R4,R5,13,0x6b901122, 7);
59	ROUND1(R5,R2,R3,R4,14,0xfd987193,12);
60	ROUND1(R4,R5,R2,R3,15,0xa679438e,17);
61	ROUND1(R3,R4,R5,R2, 0,0x49b40821,22);
62
63	MOVWBR	(1*4)(R6), R8
64	MOVWZ	R5, R9
65	MOVWZ	R5, R1
66
67#define ROUND2(a, b, c, d, index, const, shift) \
68	XOR	$0xffffffff, R9; \ // NOTW R9
69	ADD	$const, a; \
70	ADD	R8, a; \
71	MOVWBR	(index*4)(R6), R8; \
72	AND	b, R1; \
73	AND	c, R9; \
74	OR	R9, R1; \
75	MOVWZ	c, R9; \
76	ADD	R1, a; \
77	MOVWZ	c, R1; \
78	RLL	$shift,	a; \
79	ADD	b, a
80
81	ROUND2(R2,R3,R4,R5, 6,0xf61e2562, 5);
82	ROUND2(R5,R2,R3,R4,11,0xc040b340, 9);
83	ROUND2(R4,R5,R2,R3, 0,0x265e5a51,14);
84	ROUND2(R3,R4,R5,R2, 5,0xe9b6c7aa,20);
85	ROUND2(R2,R3,R4,R5,10,0xd62f105d, 5);
86	ROUND2(R5,R2,R3,R4,15, 0x2441453, 9);
87	ROUND2(R4,R5,R2,R3, 4,0xd8a1e681,14);
88	ROUND2(R3,R4,R5,R2, 9,0xe7d3fbc8,20);
89	ROUND2(R2,R3,R4,R5,14,0x21e1cde6, 5);
90	ROUND2(R5,R2,R3,R4, 3,0xc33707d6, 9);
91	ROUND2(R4,R5,R2,R3, 8,0xf4d50d87,14);
92	ROUND2(R3,R4,R5,R2,13,0x455a14ed,20);
93	ROUND2(R2,R3,R4,R5, 2,0xa9e3e905, 5);
94	ROUND2(R5,R2,R3,R4, 7,0xfcefa3f8, 9);
95	ROUND2(R4,R5,R2,R3,12,0x676f02d9,14);
96	ROUND2(R3,R4,R5,R2, 0,0x8d2a4c8a,20);
97
98	MOVWBR	(5*4)(R6), R8
99	MOVWZ	R4, R9
100
101#define ROUND3(a, b, c, d, index, const, shift) \
102	ADD	$const, a; \
103	ADD	R8, a; \
104	MOVWBR	(index*4)(R6), R8; \
105	XOR	d, R9; \
106	XOR	b, R9; \
107	ADD	R9, a; \
108	RLL	$shift, a; \
109	MOVWZ	b, R9; \
110	ADD	b, a
111
112	ROUND3(R2,R3,R4,R5, 8,0xfffa3942, 4);
113	ROUND3(R5,R2,R3,R4,11,0x8771f681,11);
114	ROUND3(R4,R5,R2,R3,14,0x6d9d6122,16);
115	ROUND3(R3,R4,R5,R2, 1,0xfde5380c,23);
116	ROUND3(R2,R3,R4,R5, 4,0xa4beea44, 4);
117	ROUND3(R5,R2,R3,R4, 7,0x4bdecfa9,11);
118	ROUND3(R4,R5,R2,R3,10,0xf6bb4b60,16);
119	ROUND3(R3,R4,R5,R2,13,0xbebfbc70,23);
120	ROUND3(R2,R3,R4,R5, 0,0x289b7ec6, 4);
121	ROUND3(R5,R2,R3,R4, 3,0xeaa127fa,11);
122	ROUND3(R4,R5,R2,R3, 6,0xd4ef3085,16);
123	ROUND3(R3,R4,R5,R2, 9, 0x4881d05,23);
124	ROUND3(R2,R3,R4,R5,12,0xd9d4d039, 4);
125	ROUND3(R5,R2,R3,R4,15,0xe6db99e5,11);
126	ROUND3(R4,R5,R2,R3, 2,0x1fa27cf8,16);
127	ROUND3(R3,R4,R5,R2, 0,0xc4ac5665,23);
128
129	MOVWBR	(0*4)(R6), R8
130	MOVWZ	$0xffffffff, R9
131	XOR	R5, R9
132
133#define ROUND4(a, b, c, d, index, const, shift) \
134	ADD	$const, a; \
135	ADD	R8, a; \
136	MOVWBR	(index*4)(R6), R8; \
137	OR	b, R9; \
138	XOR	c, R9; \
139	ADD	R9, a; \
140	MOVWZ	$0xffffffff, R9; \
141	RLL	$shift,	a; \
142	XOR	c, R9; \
143	ADD	b, a
144
145	ROUND4(R2,R3,R4,R5, 7,0xf4292244, 6);
146	ROUND4(R5,R2,R3,R4,14,0x432aff97,10);
147	ROUND4(R4,R5,R2,R3, 5,0xab9423a7,15);
148	ROUND4(R3,R4,R5,R2,12,0xfc93a039,21);
149	ROUND4(R2,R3,R4,R5, 3,0x655b59c3, 6);
150	ROUND4(R5,R2,R3,R4,10,0x8f0ccc92,10);
151	ROUND4(R4,R5,R2,R3, 1,0xffeff47d,15);
152	ROUND4(R3,R4,R5,R2, 8,0x85845dd1,21);
153	ROUND4(R2,R3,R4,R5,15,0x6fa87e4f, 6);
154	ROUND4(R5,R2,R3,R4, 6,0xfe2ce6e0,10);
155	ROUND4(R4,R5,R2,R3,13,0xa3014314,15);
156	ROUND4(R3,R4,R5,R2, 4,0x4e0811a1,21);
157	ROUND4(R2,R3,R4,R5,11,0xf7537e82, 6);
158	ROUND4(R5,R2,R3,R4, 2,0xbd3af235,10);
159	ROUND4(R4,R5,R2,R3, 9,0x2ad7d2bb,15);
160	ROUND4(R3,R4,R5,R2, 0,0xeb86d391,21);
161
162	MOVWZ	tmp-16(SP), R1
163	ADD	R1, R2
164	MOVWZ	tmp-12(SP), R1
165	ADD	R1, R3
166	MOVWZ	tmp-8(SP), R1
167	ADD	R1, R4
168	MOVWZ	tmp-4(SP), R1
169	ADD	R1, R5
170
171	LA	64(R6), R6
172	CMPBLT	R6, R7, loop
173
174end:
175	MOVD	dig+0(FP), R1
176	STMY	R2, R5, 0(R1)
177	RET
178