1// Copyright 2015 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5#include "go_asm.h"
6#include "go_tls.h"
7#include "tls_arm64.h"
8#include "funcdata.h"
9#include "textflag.h"
10
11TEXT runtime·rt0_go(SB),NOSPLIT|TOPFRAME,$0
12	// SP = stack; R0 = argc; R1 = argv
13
14	SUB	$32, RSP
15	MOVW	R0, 8(RSP) // argc
16	MOVD	R1, 16(RSP) // argv
17
18#ifdef TLS_darwin
19	// Initialize TLS.
20	MOVD	ZR, g // clear g, make sure it's not junk.
21	SUB	$32, RSP
22	MRS_TPIDR_R0
23	AND	$~7, R0
24	MOVD	R0, 16(RSP)             // arg2: TLS base
25	MOVD	$runtime·tls_g(SB), R2
26	MOVD	R2, 8(RSP)              // arg1: &tlsg
27	BL	·tlsinit(SB)
28	ADD	$32, RSP
29#endif
30
31	// create istack out of the given (operating system) stack.
32	// _cgo_init may update stackguard.
33	MOVD	$runtime·g0(SB), g
34	MOVD	RSP, R7
35	MOVD	$(-64*1024)(R7), R0
36	MOVD	R0, g_stackguard0(g)
37	MOVD	R0, g_stackguard1(g)
38	MOVD	R0, (g_stack+stack_lo)(g)
39	MOVD	R7, (g_stack+stack_hi)(g)
40
41	// if there is a _cgo_init, call it using the gcc ABI.
42	MOVD	_cgo_init(SB), R12
43	CBZ	R12, nocgo
44
45#ifdef GOOS_android
46	MRS_TPIDR_R0			// load TLS base pointer
47	MOVD	R0, R3			// arg 3: TLS base pointer
48	MOVD	$runtime·tls_g(SB), R2 	// arg 2: &tls_g
49#else
50	MOVD	$0, R2		        // arg 2: not used when using platform's TLS
51#endif
52	MOVD	$setg_gcc<>(SB), R1	// arg 1: setg
53	MOVD	g, R0			// arg 0: G
54	SUB	$16, RSP		// reserve 16 bytes for sp-8 where fp may be saved.
55	BL	(R12)
56	ADD	$16, RSP
57
58nocgo:
59	BL	runtime·save_g(SB)
60	// update stackguard after _cgo_init
61	MOVD	(g_stack+stack_lo)(g), R0
62	ADD	$const_stackGuard, R0
63	MOVD	R0, g_stackguard0(g)
64	MOVD	R0, g_stackguard1(g)
65
66	// set the per-goroutine and per-mach "registers"
67	MOVD	$runtime·m0(SB), R0
68
69	// save m->g0 = g0
70	MOVD	g, m_g0(R0)
71	// save m0 to g0->m
72	MOVD	R0, g_m(g)
73
74	BL	runtime·check(SB)
75
76#ifdef GOOS_windows
77	BL	runtime·wintls(SB)
78#endif
79
80	MOVW	8(RSP), R0	// copy argc
81	MOVW	R0, -8(RSP)
82	MOVD	16(RSP), R0		// copy argv
83	MOVD	R0, 0(RSP)
84	BL	runtime·args(SB)
85	BL	runtime·osinit(SB)
86	BL	runtime·schedinit(SB)
87
88	// create a new goroutine to start program
89	MOVD	$runtime·mainPC(SB), R0		// entry
90	SUB	$16, RSP
91	MOVD	R0, 8(RSP) // arg
92	MOVD	$0, 0(RSP) // dummy LR
93	BL	runtime·newproc(SB)
94	ADD	$16, RSP
95
96	// start this M
97	BL	runtime·mstart(SB)
98
99	// Prevent dead-code elimination of debugCallV2 and debugPinnerV1, which are
100	// intended to be called by debuggers.
101	MOVD	$runtime·debugPinnerV1<ABIInternal>(SB), R0
102	MOVD	$runtime·debugCallV2<ABIInternal>(SB), R0
103
104	MOVD	$0, R0
105	MOVD	R0, (R0)	// boom
106	UNDEF
107
108DATA	runtime·mainPC+0(SB)/8,$runtime·main<ABIInternal>(SB)
109GLOBL	runtime·mainPC(SB),RODATA,$8
110
111// Windows ARM64 needs an immediate 0xf000 argument.
112// See go.dev/issues/53837.
113#define BREAK	\
114#ifdef GOOS_windows	\
115	BRK	$0xf000 	\
116#else 				\
117	BRK 			\
118#endif 				\
119
120
121TEXT runtime·breakpoint(SB),NOSPLIT|NOFRAME,$0-0
122	BREAK
123	RET
124
125TEXT runtime·asminit(SB),NOSPLIT|NOFRAME,$0-0
126	RET
127
128TEXT runtime·mstart(SB),NOSPLIT|TOPFRAME,$0
129	BL	runtime·mstart0(SB)
130	RET // not reached
131
132/*
133 *  go-routine
134 */
135
136// void gogo(Gobuf*)
137// restore state from Gobuf; longjmp
138TEXT runtime·gogo(SB), NOSPLIT|NOFRAME, $0-8
139	MOVD	buf+0(FP), R5
140	MOVD	gobuf_g(R5), R6
141	MOVD	0(R6), R4	// make sure g != nil
142	B	gogo<>(SB)
143
144TEXT gogo<>(SB), NOSPLIT|NOFRAME, $0
145	MOVD	R6, g
146	BL	runtime·save_g(SB)
147
148	MOVD	gobuf_sp(R5), R0
149	MOVD	R0, RSP
150	MOVD	gobuf_bp(R5), R29
151	MOVD	gobuf_lr(R5), LR
152	MOVD	gobuf_ret(R5), R0
153	MOVD	gobuf_ctxt(R5), R26
154	MOVD	$0, gobuf_sp(R5)
155	MOVD	$0, gobuf_bp(R5)
156	MOVD	$0, gobuf_ret(R5)
157	MOVD	$0, gobuf_lr(R5)
158	MOVD	$0, gobuf_ctxt(R5)
159	CMP	ZR, ZR // set condition codes for == test, needed by stack split
160	MOVD	gobuf_pc(R5), R6
161	B	(R6)
162
163// void mcall(fn func(*g))
164// Switch to m->g0's stack, call fn(g).
165// Fn must never return. It should gogo(&g->sched)
166// to keep running g.
167TEXT runtime·mcall<ABIInternal>(SB), NOSPLIT|NOFRAME, $0-8
168	MOVD	R0, R26				// context
169
170	// Save caller state in g->sched
171	MOVD	RSP, R0
172	MOVD	R0, (g_sched+gobuf_sp)(g)
173	MOVD	R29, (g_sched+gobuf_bp)(g)
174	MOVD	LR, (g_sched+gobuf_pc)(g)
175	MOVD	$0, (g_sched+gobuf_lr)(g)
176
177	// Switch to m->g0 & its stack, call fn.
178	MOVD	g, R3
179	MOVD	g_m(g), R8
180	MOVD	m_g0(R8), g
181	BL	runtime·save_g(SB)
182	CMP	g, R3
183	BNE	2(PC)
184	B	runtime·badmcall(SB)
185
186	MOVD	(g_sched+gobuf_sp)(g), R0
187	MOVD	R0, RSP	// sp = m->g0->sched.sp
188	MOVD	(g_sched+gobuf_bp)(g), R29
189	MOVD	R3, R0				// arg = g
190	MOVD	$0, -16(RSP)			// dummy LR
191	SUB	$16, RSP
192	MOVD	0(R26), R4			// code pointer
193	BL	(R4)
194	B	runtime·badmcall2(SB)
195
196// systemstack_switch is a dummy routine that systemstack leaves at the bottom
197// of the G stack. We need to distinguish the routine that
198// lives at the bottom of the G stack from the one that lives
199// at the top of the system stack because the one at the top of
200// the system stack terminates the stack walk (see topofstack()).
201TEXT runtime·systemstack_switch(SB), NOSPLIT, $0-0
202	UNDEF
203	BL	(LR)	// make sure this function is not leaf
204	RET
205
206// func systemstack(fn func())
207TEXT runtime·systemstack(SB), NOSPLIT, $0-8
208	MOVD	fn+0(FP), R3	// R3 = fn
209	MOVD	R3, R26		// context
210	MOVD	g_m(g), R4	// R4 = m
211
212	MOVD	m_gsignal(R4), R5	// R5 = gsignal
213	CMP	g, R5
214	BEQ	noswitch
215
216	MOVD	m_g0(R4), R5	// R5 = g0
217	CMP	g, R5
218	BEQ	noswitch
219
220	MOVD	m_curg(R4), R6
221	CMP	g, R6
222	BEQ	switch
223
224	// Bad: g is not gsignal, not g0, not curg. What is it?
225	// Hide call from linker nosplit analysis.
226	MOVD	$runtime·badsystemstack(SB), R3
227	BL	(R3)
228	B	runtime·abort(SB)
229
230switch:
231	// save our state in g->sched. Pretend to
232	// be systemstack_switch if the G stack is scanned.
233	BL	gosave_systemstack_switch<>(SB)
234
235	// switch to g0
236	MOVD	R5, g
237	BL	runtime·save_g(SB)
238	MOVD	(g_sched+gobuf_sp)(g), R3
239	MOVD	R3, RSP
240	MOVD	(g_sched+gobuf_bp)(g), R29
241
242	// call target function
243	MOVD	0(R26), R3	// code pointer
244	BL	(R3)
245
246	// switch back to g
247	MOVD	g_m(g), R3
248	MOVD	m_curg(R3), g
249	BL	runtime·save_g(SB)
250	MOVD	(g_sched+gobuf_sp)(g), R0
251	MOVD	R0, RSP
252	MOVD	(g_sched+gobuf_bp)(g), R29
253	MOVD	$0, (g_sched+gobuf_sp)(g)
254	MOVD	$0, (g_sched+gobuf_bp)(g)
255	RET
256
257noswitch:
258	// already on m stack, just call directly
259	// Using a tail call here cleans up tracebacks since we won't stop
260	// at an intermediate systemstack.
261	MOVD	0(R26), R3	// code pointer
262	MOVD.P	16(RSP), R30	// restore LR
263	SUB	$8, RSP, R29	// restore FP
264	B	(R3)
265
266// func switchToCrashStack0(fn func())
267TEXT runtime·switchToCrashStack0<ABIInternal>(SB), NOSPLIT, $0-8
268	MOVD	R0, R26    // context register
269	MOVD	g_m(g), R1 // curm
270
271	// set g to gcrash
272	MOVD	$runtime·gcrash(SB), g // g = &gcrash
273	BL	runtime·save_g(SB)         // clobbers R0
274	MOVD	R1, g_m(g)             // g.m = curm
275	MOVD	g, m_g0(R1)            // curm.g0 = g
276
277	// switch to crashstack
278	MOVD	(g_stack+stack_hi)(g), R1
279	SUB	$(4*8), R1
280	MOVD	R1, RSP
281
282	// call target function
283	MOVD	0(R26), R0
284	CALL	(R0)
285
286	// should never return
287	CALL	runtime·abort(SB)
288	UNDEF
289
290/*
291 * support for morestack
292 */
293
294// Called during function prolog when more stack is needed.
295// Caller has already loaded:
296// R3 prolog's LR (R30)
297//
298// The traceback routines see morestack on a g0 as being
299// the top of a stack (for example, morestack calling newstack
300// calling the scheduler calling newm calling gc), so we must
301// record an argument size. For that purpose, it has no arguments.
302TEXT runtime·morestack(SB),NOSPLIT|NOFRAME,$0-0
303	// Cannot grow scheduler stack (m->g0).
304	MOVD	g_m(g), R8
305	MOVD	m_g0(R8), R4
306
307	// Called from f.
308	// Set g->sched to context in f
309	MOVD	RSP, R0
310	MOVD	R0, (g_sched+gobuf_sp)(g)
311	MOVD	R29, (g_sched+gobuf_bp)(g)
312	MOVD	LR, (g_sched+gobuf_pc)(g)
313	MOVD	R3, (g_sched+gobuf_lr)(g)
314	MOVD	R26, (g_sched+gobuf_ctxt)(g)
315
316	CMP	g, R4
317	BNE	3(PC)
318	BL	runtime·badmorestackg0(SB)
319	B	runtime·abort(SB)
320
321	// Cannot grow signal stack (m->gsignal).
322	MOVD	m_gsignal(R8), R4
323	CMP	g, R4
324	BNE	3(PC)
325	BL	runtime·badmorestackgsignal(SB)
326	B	runtime·abort(SB)
327
328	// Called from f.
329	// Set m->morebuf to f's callers.
330	MOVD	R3, (m_morebuf+gobuf_pc)(R8)	// f's caller's PC
331	MOVD	RSP, R0
332	MOVD	R0, (m_morebuf+gobuf_sp)(R8)	// f's caller's RSP
333	MOVD	g, (m_morebuf+gobuf_g)(R8)
334
335	// Call newstack on m->g0's stack.
336	MOVD	m_g0(R8), g
337	BL	runtime·save_g(SB)
338	MOVD	(g_sched+gobuf_sp)(g), R0
339	MOVD	R0, RSP
340	MOVD	(g_sched+gobuf_bp)(g), R29
341	MOVD.W	$0, -16(RSP)	// create a call frame on g0 (saved LR; keep 16-aligned)
342	BL	runtime·newstack(SB)
343
344	// Not reached, but make sure the return PC from the call to newstack
345	// is still in this function, and not the beginning of the next.
346	UNDEF
347
348TEXT runtime·morestack_noctxt(SB),NOSPLIT|NOFRAME,$0-0
349	// Force SPWRITE. This function doesn't actually write SP,
350	// but it is called with a special calling convention where
351	// the caller doesn't save LR on stack but passes it as a
352	// register (R3), and the unwinder currently doesn't understand.
353	// Make it SPWRITE to stop unwinding. (See issue 54332)
354	MOVD	RSP, RSP
355
356	MOVW	$0, R26
357	B runtime·morestack(SB)
358
359// spillArgs stores return values from registers to a *internal/abi.RegArgs in R20.
360TEXT ·spillArgs(SB),NOSPLIT,$0-0
361	STP	(R0, R1), (0*8)(R20)
362	STP	(R2, R3), (2*8)(R20)
363	STP	(R4, R5), (4*8)(R20)
364	STP	(R6, R7), (6*8)(R20)
365	STP	(R8, R9), (8*8)(R20)
366	STP	(R10, R11), (10*8)(R20)
367	STP	(R12, R13), (12*8)(R20)
368	STP	(R14, R15), (14*8)(R20)
369	FSTPD	(F0, F1), (16*8)(R20)
370	FSTPD	(F2, F3), (18*8)(R20)
371	FSTPD	(F4, F5), (20*8)(R20)
372	FSTPD	(F6, F7), (22*8)(R20)
373	FSTPD	(F8, F9), (24*8)(R20)
374	FSTPD	(F10, F11), (26*8)(R20)
375	FSTPD	(F12, F13), (28*8)(R20)
376	FSTPD	(F14, F15), (30*8)(R20)
377	RET
378
379// unspillArgs loads args into registers from a *internal/abi.RegArgs in R20.
380TEXT ·unspillArgs(SB),NOSPLIT,$0-0
381	LDP	(0*8)(R20), (R0, R1)
382	LDP	(2*8)(R20), (R2, R3)
383	LDP	(4*8)(R20), (R4, R5)
384	LDP	(6*8)(R20), (R6, R7)
385	LDP	(8*8)(R20), (R8, R9)
386	LDP	(10*8)(R20), (R10, R11)
387	LDP	(12*8)(R20), (R12, R13)
388	LDP	(14*8)(R20), (R14, R15)
389	FLDPD	(16*8)(R20), (F0, F1)
390	FLDPD	(18*8)(R20), (F2, F3)
391	FLDPD	(20*8)(R20), (F4, F5)
392	FLDPD	(22*8)(R20), (F6, F7)
393	FLDPD	(24*8)(R20), (F8, F9)
394	FLDPD	(26*8)(R20), (F10, F11)
395	FLDPD	(28*8)(R20), (F12, F13)
396	FLDPD	(30*8)(R20), (F14, F15)
397	RET
398
399// reflectcall: call a function with the given argument list
400// func call(stackArgsType *_type, f *FuncVal, stackArgs *byte, stackArgsSize, stackRetOffset, frameSize uint32, regArgs *abi.RegArgs).
401// we don't have variable-sized frames, so we use a small number
402// of constant-sized-frame functions to encode a few bits of size in the pc.
403// Caution: ugly multiline assembly macros in your future!
404
405#define DISPATCH(NAME,MAXSIZE)		\
406	MOVD	$MAXSIZE, R27;		\
407	CMP	R27, R16;		\
408	BGT	3(PC);			\
409	MOVD	$NAME(SB), R27;	\
410	B	(R27)
411// Note: can't just "B NAME(SB)" - bad inlining results.
412
413TEXT ·reflectcall(SB), NOSPLIT|NOFRAME, $0-48
414	MOVWU	frameSize+32(FP), R16
415	DISPATCH(runtime·call16, 16)
416	DISPATCH(runtime·call32, 32)
417	DISPATCH(runtime·call64, 64)
418	DISPATCH(runtime·call128, 128)
419	DISPATCH(runtime·call256, 256)
420	DISPATCH(runtime·call512, 512)
421	DISPATCH(runtime·call1024, 1024)
422	DISPATCH(runtime·call2048, 2048)
423	DISPATCH(runtime·call4096, 4096)
424	DISPATCH(runtime·call8192, 8192)
425	DISPATCH(runtime·call16384, 16384)
426	DISPATCH(runtime·call32768, 32768)
427	DISPATCH(runtime·call65536, 65536)
428	DISPATCH(runtime·call131072, 131072)
429	DISPATCH(runtime·call262144, 262144)
430	DISPATCH(runtime·call524288, 524288)
431	DISPATCH(runtime·call1048576, 1048576)
432	DISPATCH(runtime·call2097152, 2097152)
433	DISPATCH(runtime·call4194304, 4194304)
434	DISPATCH(runtime·call8388608, 8388608)
435	DISPATCH(runtime·call16777216, 16777216)
436	DISPATCH(runtime·call33554432, 33554432)
437	DISPATCH(runtime·call67108864, 67108864)
438	DISPATCH(runtime·call134217728, 134217728)
439	DISPATCH(runtime·call268435456, 268435456)
440	DISPATCH(runtime·call536870912, 536870912)
441	DISPATCH(runtime·call1073741824, 1073741824)
442	MOVD	$runtime·badreflectcall(SB), R0
443	B	(R0)
444
445#define CALLFN(NAME,MAXSIZE)			\
446TEXT NAME(SB), WRAPPER, $MAXSIZE-48;		\
447	NO_LOCAL_POINTERS;			\
448	/* copy arguments to stack */		\
449	MOVD	stackArgs+16(FP), R3;			\
450	MOVWU	stackArgsSize+24(FP), R4;		\
451	ADD	$8, RSP, R5;			\
452	BIC	$0xf, R4, R6;			\
453	CBZ	R6, 6(PC);			\
454	/* if R6=(argsize&~15) != 0 */		\
455	ADD	R6, R5, R6;			\
456	/* copy 16 bytes a time */		\
457	LDP.P	16(R3), (R7, R8);		\
458	STP.P	(R7, R8), 16(R5);		\
459	CMP	R5, R6;				\
460	BNE	-3(PC);				\
461	AND	$0xf, R4, R6;			\
462	CBZ	R6, 6(PC);			\
463	/* if R6=(argsize&15) != 0 */		\
464	ADD	R6, R5, R6;			\
465	/* copy 1 byte a time for the rest */	\
466	MOVBU.P	1(R3), R7;			\
467	MOVBU.P	R7, 1(R5);			\
468	CMP	R5, R6;				\
469	BNE	-3(PC);				\
470	/* set up argument registers */		\
471	MOVD	regArgs+40(FP), R20;		\
472	CALL	·unspillArgs(SB);		\
473	/* call function */			\
474	MOVD	f+8(FP), R26;			\
475	MOVD	(R26), R20;			\
476	PCDATA	$PCDATA_StackMapIndex, $0;	\
477	BL	(R20);				\
478	/* copy return values back */		\
479	MOVD	regArgs+40(FP), R20;		\
480	CALL	·spillArgs(SB);		\
481	MOVD	stackArgsType+0(FP), R7;		\
482	MOVD	stackArgs+16(FP), R3;			\
483	MOVWU	stackArgsSize+24(FP), R4;			\
484	MOVWU	stackRetOffset+28(FP), R6;		\
485	ADD	$8, RSP, R5;			\
486	ADD	R6, R5; 			\
487	ADD	R6, R3;				\
488	SUB	R6, R4;				\
489	BL	callRet<>(SB);			\
490	RET
491
492// callRet copies return values back at the end of call*. This is a
493// separate function so it can allocate stack space for the arguments
494// to reflectcallmove. It does not follow the Go ABI; it expects its
495// arguments in registers.
496TEXT callRet<>(SB), NOSPLIT, $48-0
497	NO_LOCAL_POINTERS
498	STP	(R7, R3), 8(RSP)
499	STP	(R5, R4), 24(RSP)
500	MOVD	R20, 40(RSP)
501	BL	runtime·reflectcallmove(SB)
502	RET
503
504CALLFNcall16, 16)
505CALLFNcall32, 32)
506CALLFNcall64, 64)
507CALLFNcall128, 128)
508CALLFNcall256, 256)
509CALLFNcall512, 512)
510CALLFNcall1024, 1024)
511CALLFNcall2048, 2048)
512CALLFNcall4096, 4096)
513CALLFNcall8192, 8192)
514CALLFNcall16384, 16384)
515CALLFNcall32768, 32768)
516CALLFNcall65536, 65536)
517CALLFNcall131072, 131072)
518CALLFNcall262144, 262144)
519CALLFNcall524288, 524288)
520CALLFNcall1048576, 1048576)
521CALLFNcall2097152, 2097152)
522CALLFNcall4194304, 4194304)
523CALLFNcall8388608, 8388608)
524CALLFNcall16777216, 16777216)
525CALLFNcall33554432, 33554432)
526CALLFNcall67108864, 67108864)
527CALLFNcall134217728, 134217728)
528CALLFNcall268435456, 268435456)
529CALLFNcall536870912, 536870912)
530CALLFNcall1073741824, 1073741824)
531
532// func memhash32(p unsafe.Pointer, h uintptr) uintptr
533TEXT runtime·memhash32<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-24
534	MOVB	runtime·useAeshash(SB), R10
535	CBZ	R10, noaes
536	MOVD	$runtime·aeskeysched+0(SB), R3
537
538	VEOR	V0.B16, V0.B16, V0.B16
539	VLD1	(R3), [V2.B16]
540	VLD1	(R0), V0.S[1]
541	VMOV	R1, V0.S[0]
542
543	AESE	V2.B16, V0.B16
544	AESMC	V0.B16, V0.B16
545	AESE	V2.B16, V0.B16
546	AESMC	V0.B16, V0.B16
547	AESE	V2.B16, V0.B16
548
549	VMOV	V0.D[0], R0
550	RET
551noaes:
552	B	runtime·memhash32Fallback<ABIInternal>(SB)
553
554// func memhash64(p unsafe.Pointer, h uintptr) uintptr
555TEXT runtime·memhash64<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-24
556	MOVB	runtime·useAeshash(SB), R10
557	CBZ	R10, noaes
558	MOVD	$runtime·aeskeysched+0(SB), R3
559
560	VEOR	V0.B16, V0.B16, V0.B16
561	VLD1	(R3), [V2.B16]
562	VLD1	(R0), V0.D[1]
563	VMOV	R1, V0.D[0]
564
565	AESE	V2.B16, V0.B16
566	AESMC	V0.B16, V0.B16
567	AESE	V2.B16, V0.B16
568	AESMC	V0.B16, V0.B16
569	AESE	V2.B16, V0.B16
570
571	VMOV	V0.D[0], R0
572	RET
573noaes:
574	B	runtime·memhash64Fallback<ABIInternal>(SB)
575
576// func memhash(p unsafe.Pointer, h, size uintptr) uintptr
577TEXT runtime·memhash<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-32
578	MOVB	runtime·useAeshash(SB), R10
579	CBZ	R10, noaes
580	B	aeshashbody<>(SB)
581noaes:
582	B	runtime·memhashFallback<ABIInternal>(SB)
583
584// func strhash(p unsafe.Pointer, h uintptr) uintptr
585TEXT runtime·strhash<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-24
586	MOVB	runtime·useAeshash(SB), R10
587	CBZ	R10, noaes
588	LDP	(R0), (R0, R2)	// string data / length
589	B	aeshashbody<>(SB)
590noaes:
591	B	runtime·strhashFallback<ABIInternal>(SB)
592
593// R0: data
594// R1: seed data
595// R2: length
596// At return, R0 = return value
597TEXT aeshashbody<>(SB),NOSPLIT|NOFRAME,$0
598	VEOR	V30.B16, V30.B16, V30.B16
599	VMOV	R1, V30.D[0]
600	VMOV	R2, V30.D[1] // load length into seed
601
602	MOVD	$runtime·aeskeysched+0(SB), R4
603	VLD1.P	16(R4), [V0.B16]
604	AESE	V30.B16, V0.B16
605	AESMC	V0.B16, V0.B16
606	CMP	$16, R2
607	BLO	aes0to15
608	BEQ	aes16
609	CMP	$32, R2
610	BLS	aes17to32
611	CMP	$64, R2
612	BLS	aes33to64
613	CMP	$128, R2
614	BLS	aes65to128
615	B	aes129plus
616
617aes0to15:
618	CBZ	R2, aes0
619	VEOR	V2.B16, V2.B16, V2.B16
620	TBZ	$3, R2, less_than_8
621	VLD1.P	8(R0), V2.D[0]
622
623less_than_8:
624	TBZ	$2, R2, less_than_4
625	VLD1.P	4(R0), V2.S[2]
626
627less_than_4:
628	TBZ	$1, R2, less_than_2
629	VLD1.P	2(R0), V2.H[6]
630
631less_than_2:
632	TBZ	$0, R2, done
633	VLD1	(R0), V2.B[14]
634done:
635	AESE	V0.B16, V2.B16
636	AESMC	V2.B16, V2.B16
637	AESE	V0.B16, V2.B16
638	AESMC	V2.B16, V2.B16
639	AESE	V0.B16, V2.B16
640	AESMC	V2.B16, V2.B16
641
642	VMOV	V2.D[0], R0
643	RET
644
645aes0:
646	VMOV	V0.D[0], R0
647	RET
648
649aes16:
650	VLD1	(R0), [V2.B16]
651	B	done
652
653aes17to32:
654	// make second seed
655	VLD1	(R4), [V1.B16]
656	AESE	V30.B16, V1.B16
657	AESMC	V1.B16, V1.B16
658	SUB	$16, R2, R10
659	VLD1.P	(R0)(R10), [V2.B16]
660	VLD1	(R0), [V3.B16]
661
662	AESE	V0.B16, V2.B16
663	AESMC	V2.B16, V2.B16
664	AESE	V1.B16, V3.B16
665	AESMC	V3.B16, V3.B16
666
667	AESE	V0.B16, V2.B16
668	AESMC	V2.B16, V2.B16
669	AESE	V1.B16, V3.B16
670	AESMC	V3.B16, V3.B16
671
672	AESE	V0.B16, V2.B16
673	AESE	V1.B16, V3.B16
674
675	VEOR	V3.B16, V2.B16, V2.B16
676
677	VMOV	V2.D[0], R0
678	RET
679
680aes33to64:
681	VLD1	(R4), [V1.B16, V2.B16, V3.B16]
682	AESE	V30.B16, V1.B16
683	AESMC	V1.B16, V1.B16
684	AESE	V30.B16, V2.B16
685	AESMC	V2.B16, V2.B16
686	AESE	V30.B16, V3.B16
687	AESMC	V3.B16, V3.B16
688	SUB	$32, R2, R10
689
690	VLD1.P	(R0)(R10), [V4.B16, V5.B16]
691	VLD1	(R0), [V6.B16, V7.B16]
692
693	AESE	V0.B16, V4.B16
694	AESMC	V4.B16, V4.B16
695	AESE	V1.B16, V5.B16
696	AESMC	V5.B16, V5.B16
697	AESE	V2.B16, V6.B16
698	AESMC	V6.B16, V6.B16
699	AESE	V3.B16, V7.B16
700	AESMC	V7.B16, V7.B16
701
702	AESE	V0.B16, V4.B16
703	AESMC	V4.B16, V4.B16
704	AESE	V1.B16, V5.B16
705	AESMC	V5.B16, V5.B16
706	AESE	V2.B16, V6.B16
707	AESMC	V6.B16, V6.B16
708	AESE	V3.B16, V7.B16
709	AESMC	V7.B16, V7.B16
710
711	AESE	V0.B16, V4.B16
712	AESE	V1.B16, V5.B16
713	AESE	V2.B16, V6.B16
714	AESE	V3.B16, V7.B16
715
716	VEOR	V6.B16, V4.B16, V4.B16
717	VEOR	V7.B16, V5.B16, V5.B16
718	VEOR	V5.B16, V4.B16, V4.B16
719
720	VMOV	V4.D[0], R0
721	RET
722
723aes65to128:
724	VLD1.P	64(R4), [V1.B16, V2.B16, V3.B16, V4.B16]
725	VLD1	(R4), [V5.B16, V6.B16, V7.B16]
726	AESE	V30.B16, V1.B16
727	AESMC	V1.B16, V1.B16
728	AESE	V30.B16, V2.B16
729	AESMC	V2.B16, V2.B16
730	AESE	V30.B16, V3.B16
731	AESMC	V3.B16, V3.B16
732	AESE	V30.B16, V4.B16
733	AESMC	V4.B16, V4.B16
734	AESE	V30.B16, V5.B16
735	AESMC	V5.B16, V5.B16
736	AESE	V30.B16, V6.B16
737	AESMC	V6.B16, V6.B16
738	AESE	V30.B16, V7.B16
739	AESMC	V7.B16, V7.B16
740
741	SUB	$64, R2, R10
742	VLD1.P	(R0)(R10), [V8.B16, V9.B16, V10.B16, V11.B16]
743	VLD1	(R0), [V12.B16, V13.B16, V14.B16, V15.B16]
744	AESE	V0.B16,	 V8.B16
745	AESMC	V8.B16,  V8.B16
746	AESE	V1.B16,	 V9.B16
747	AESMC	V9.B16,  V9.B16
748	AESE	V2.B16, V10.B16
749	AESMC	V10.B16,  V10.B16
750	AESE	V3.B16, V11.B16
751	AESMC	V11.B16,  V11.B16
752	AESE	V4.B16, V12.B16
753	AESMC	V12.B16,  V12.B16
754	AESE	V5.B16, V13.B16
755	AESMC	V13.B16,  V13.B16
756	AESE	V6.B16, V14.B16
757	AESMC	V14.B16,  V14.B16
758	AESE	V7.B16, V15.B16
759	AESMC	V15.B16,  V15.B16
760
761	AESE	V0.B16,	 V8.B16
762	AESMC	V8.B16,  V8.B16
763	AESE	V1.B16,	 V9.B16
764	AESMC	V9.B16,  V9.B16
765	AESE	V2.B16, V10.B16
766	AESMC	V10.B16,  V10.B16
767	AESE	V3.B16, V11.B16
768	AESMC	V11.B16,  V11.B16
769	AESE	V4.B16, V12.B16
770	AESMC	V12.B16,  V12.B16
771	AESE	V5.B16, V13.B16
772	AESMC	V13.B16,  V13.B16
773	AESE	V6.B16, V14.B16
774	AESMC	V14.B16,  V14.B16
775	AESE	V7.B16, V15.B16
776	AESMC	V15.B16,  V15.B16
777
778	AESE	V0.B16,	 V8.B16
779	AESE	V1.B16,	 V9.B16
780	AESE	V2.B16, V10.B16
781	AESE	V3.B16, V11.B16
782	AESE	V4.B16, V12.B16
783	AESE	V5.B16, V13.B16
784	AESE	V6.B16, V14.B16
785	AESE	V7.B16, V15.B16
786
787	VEOR	V12.B16, V8.B16, V8.B16
788	VEOR	V13.B16, V9.B16, V9.B16
789	VEOR	V14.B16, V10.B16, V10.B16
790	VEOR	V15.B16, V11.B16, V11.B16
791	VEOR	V10.B16, V8.B16, V8.B16
792	VEOR	V11.B16, V9.B16, V9.B16
793	VEOR	V9.B16, V8.B16, V8.B16
794
795	VMOV	V8.D[0], R0
796	RET
797
798aes129plus:
799	PRFM (R0), PLDL1KEEP
800	VLD1.P	64(R4), [V1.B16, V2.B16, V3.B16, V4.B16]
801	VLD1	(R4), [V5.B16, V6.B16, V7.B16]
802	AESE	V30.B16, V1.B16
803	AESMC	V1.B16, V1.B16
804	AESE	V30.B16, V2.B16
805	AESMC	V2.B16, V2.B16
806	AESE	V30.B16, V3.B16
807	AESMC	V3.B16, V3.B16
808	AESE	V30.B16, V4.B16
809	AESMC	V4.B16, V4.B16
810	AESE	V30.B16, V5.B16
811	AESMC	V5.B16, V5.B16
812	AESE	V30.B16, V6.B16
813	AESMC	V6.B16, V6.B16
814	AESE	V30.B16, V7.B16
815	AESMC	V7.B16, V7.B16
816	ADD	R0, R2, R10
817	SUB	$128, R10, R10
818	VLD1.P	64(R10), [V8.B16, V9.B16, V10.B16, V11.B16]
819	VLD1	(R10), [V12.B16, V13.B16, V14.B16, V15.B16]
820	SUB	$1, R2, R2
821	LSR	$7, R2, R2
822
823aesloop:
824	AESE	V8.B16,	 V0.B16
825	AESMC	V0.B16,  V0.B16
826	AESE	V9.B16,	 V1.B16
827	AESMC	V1.B16,  V1.B16
828	AESE	V10.B16, V2.B16
829	AESMC	V2.B16,  V2.B16
830	AESE	V11.B16, V3.B16
831	AESMC	V3.B16,  V3.B16
832	AESE	V12.B16, V4.B16
833	AESMC	V4.B16,  V4.B16
834	AESE	V13.B16, V5.B16
835	AESMC	V5.B16,  V5.B16
836	AESE	V14.B16, V6.B16
837	AESMC	V6.B16,  V6.B16
838	AESE	V15.B16, V7.B16
839	AESMC	V7.B16,  V7.B16
840
841	VLD1.P	64(R0), [V8.B16, V9.B16, V10.B16, V11.B16]
842	AESE	V8.B16,	 V0.B16
843	AESMC	V0.B16,  V0.B16
844	AESE	V9.B16,	 V1.B16
845	AESMC	V1.B16,  V1.B16
846	AESE	V10.B16, V2.B16
847	AESMC	V2.B16,  V2.B16
848	AESE	V11.B16, V3.B16
849	AESMC	V3.B16,  V3.B16
850
851	VLD1.P	64(R0), [V12.B16, V13.B16, V14.B16, V15.B16]
852	AESE	V12.B16, V4.B16
853	AESMC	V4.B16,  V4.B16
854	AESE	V13.B16, V5.B16
855	AESMC	V5.B16,  V5.B16
856	AESE	V14.B16, V6.B16
857	AESMC	V6.B16,  V6.B16
858	AESE	V15.B16, V7.B16
859	AESMC	V7.B16,  V7.B16
860	SUB	$1, R2, R2
861	CBNZ	R2, aesloop
862
863	AESE	V8.B16,	 V0.B16
864	AESMC	V0.B16,  V0.B16
865	AESE	V9.B16,	 V1.B16
866	AESMC	V1.B16,  V1.B16
867	AESE	V10.B16, V2.B16
868	AESMC	V2.B16,  V2.B16
869	AESE	V11.B16, V3.B16
870	AESMC	V3.B16,  V3.B16
871	AESE	V12.B16, V4.B16
872	AESMC	V4.B16,  V4.B16
873	AESE	V13.B16, V5.B16
874	AESMC	V5.B16,  V5.B16
875	AESE	V14.B16, V6.B16
876	AESMC	V6.B16,  V6.B16
877	AESE	V15.B16, V7.B16
878	AESMC	V7.B16,  V7.B16
879
880	AESE	V8.B16,	 V0.B16
881	AESMC	V0.B16,  V0.B16
882	AESE	V9.B16,	 V1.B16
883	AESMC	V1.B16,  V1.B16
884	AESE	V10.B16, V2.B16
885	AESMC	V2.B16,  V2.B16
886	AESE	V11.B16, V3.B16
887	AESMC	V3.B16,  V3.B16
888	AESE	V12.B16, V4.B16
889	AESMC	V4.B16,  V4.B16
890	AESE	V13.B16, V5.B16
891	AESMC	V5.B16,  V5.B16
892	AESE	V14.B16, V6.B16
893	AESMC	V6.B16,  V6.B16
894	AESE	V15.B16, V7.B16
895	AESMC	V7.B16,  V7.B16
896
897	AESE	V8.B16,	 V0.B16
898	AESE	V9.B16,	 V1.B16
899	AESE	V10.B16, V2.B16
900	AESE	V11.B16, V3.B16
901	AESE	V12.B16, V4.B16
902	AESE	V13.B16, V5.B16
903	AESE	V14.B16, V6.B16
904	AESE	V15.B16, V7.B16
905
906	VEOR	V0.B16, V1.B16, V0.B16
907	VEOR	V2.B16, V3.B16, V2.B16
908	VEOR	V4.B16, V5.B16, V4.B16
909	VEOR	V6.B16, V7.B16, V6.B16
910	VEOR	V0.B16, V2.B16, V0.B16
911	VEOR	V4.B16, V6.B16, V4.B16
912	VEOR	V4.B16, V0.B16, V0.B16
913
914	VMOV	V0.D[0], R0
915	RET
916
917TEXT runtime·procyield(SB),NOSPLIT,$0-0
918	MOVWU	cycles+0(FP), R0
919again:
920	YIELD
921	SUBW	$1, R0
922	CBNZ	R0, again
923	RET
924
925// Save state of caller into g->sched,
926// but using fake PC from systemstack_switch.
927// Must only be called from functions with no locals ($0)
928// or else unwinding from systemstack_switch is incorrect.
929// Smashes R0.
930TEXT gosave_systemstack_switch<>(SB),NOSPLIT|NOFRAME,$0
931	MOVD	$runtime·systemstack_switch(SB), R0
932	ADD	$8, R0	// get past prologue
933	MOVD	R0, (g_sched+gobuf_pc)(g)
934	MOVD	RSP, R0
935	MOVD	R0, (g_sched+gobuf_sp)(g)
936	MOVD	R29, (g_sched+gobuf_bp)(g)
937	MOVD	$0, (g_sched+gobuf_lr)(g)
938	MOVD	$0, (g_sched+gobuf_ret)(g)
939	// Assert ctxt is zero. See func save.
940	MOVD	(g_sched+gobuf_ctxt)(g), R0
941	CBZ	R0, 2(PC)
942	CALL	runtime·abort(SB)
943	RET
944
945// func asmcgocall_no_g(fn, arg unsafe.Pointer)
946// Call fn(arg) aligned appropriately for the gcc ABI.
947// Called on a system stack, and there may be no g yet (during needm).
948TEXT ·asmcgocall_no_g(SB),NOSPLIT,$0-16
949	MOVD	fn+0(FP), R1
950	MOVD	arg+8(FP), R0
951	SUB	$16, RSP	// skip over saved frame pointer below RSP
952	BL	(R1)
953	ADD	$16, RSP	// skip over saved frame pointer below RSP
954	RET
955
956// func asmcgocall(fn, arg unsafe.Pointer) int32
957// Call fn(arg) on the scheduler stack,
958// aligned appropriately for the gcc ABI.
959// See cgocall.go for more details.
960TEXT ·asmcgocall(SB),NOSPLIT,$0-20
961	MOVD	fn+0(FP), R1
962	MOVD	arg+8(FP), R0
963
964	MOVD	RSP, R2		// save original stack pointer
965	CBZ	g, nosave
966	MOVD	g, R4
967
968	// Figure out if we need to switch to m->g0 stack.
969	// We get called to create new OS threads too, and those
970	// come in on the m->g0 stack already. Or we might already
971	// be on the m->gsignal stack.
972	MOVD	g_m(g), R8
973	MOVD	m_gsignal(R8), R3
974	CMP	R3, g
975	BEQ	nosave
976	MOVD	m_g0(R8), R3
977	CMP	R3, g
978	BEQ	nosave
979
980	// Switch to system stack.
981	MOVD	R0, R9	// gosave_systemstack_switch<> and save_g might clobber R0
982	BL	gosave_systemstack_switch<>(SB)
983	MOVD	R3, g
984	BL	runtime·save_g(SB)
985	MOVD	(g_sched+gobuf_sp)(g), R0
986	MOVD	R0, RSP
987	MOVD	(g_sched+gobuf_bp)(g), R29
988	MOVD	R9, R0
989
990	// Now on a scheduling stack (a pthread-created stack).
991	// Save room for two of our pointers /*, plus 32 bytes of callee
992	// save area that lives on the caller stack. */
993	MOVD	RSP, R13
994	SUB	$16, R13
995	MOVD	R13, RSP
996	MOVD	R4, 0(RSP)	// save old g on stack
997	MOVD	(g_stack+stack_hi)(R4), R4
998	SUB	R2, R4
999	MOVD	R4, 8(RSP)	// save depth in old g stack (can't just save SP, as stack might be copied during a callback)
1000	BL	(R1)
1001	MOVD	R0, R9
1002
1003	// Restore g, stack pointer. R0 is errno, so don't touch it
1004	MOVD	0(RSP), g
1005	BL	runtime·save_g(SB)
1006	MOVD	(g_stack+stack_hi)(g), R5
1007	MOVD	8(RSP), R6
1008	SUB	R6, R5
1009	MOVD	R9, R0
1010	MOVD	R5, RSP
1011
1012	MOVW	R0, ret+16(FP)
1013	RET
1014
1015nosave:
1016	// Running on a system stack, perhaps even without a g.
1017	// Having no g can happen during thread creation or thread teardown
1018	// (see needm/dropm on Solaris, for example).
1019	// This code is like the above sequence but without saving/restoring g
1020	// and without worrying about the stack moving out from under us
1021	// (because we're on a system stack, not a goroutine stack).
1022	// The above code could be used directly if already on a system stack,
1023	// but then the only path through this code would be a rare case on Solaris.
1024	// Using this code for all "already on system stack" calls exercises it more,
1025	// which should help keep it correct.
1026	MOVD	RSP, R13
1027	SUB	$16, R13
1028	MOVD	R13, RSP
1029	MOVD	$0, R4
1030	MOVD	R4, 0(RSP)	// Where above code stores g, in case someone looks during debugging.
1031	MOVD	R2, 8(RSP)	// Save original stack pointer.
1032	BL	(R1)
1033	// Restore stack pointer.
1034	MOVD	8(RSP), R2
1035	MOVD	R2, RSP
1036	MOVD	R0, ret+16(FP)
1037	RET
1038
1039// cgocallback(fn, frame unsafe.Pointer, ctxt uintptr)
1040// See cgocall.go for more details.
1041TEXT ·cgocallback(SB),NOSPLIT,$24-24
1042	NO_LOCAL_POINTERS
1043
1044	// Skip cgocallbackg, just dropm when fn is nil, and frame is the saved g.
1045	// It is used to dropm while thread is exiting.
1046	MOVD	fn+0(FP), R1
1047	CBNZ	R1, loadg
1048	// Restore the g from frame.
1049	MOVD	frame+8(FP), g
1050	B	dropm
1051
1052loadg:
1053	// Load g from thread-local storage.
1054	BL	runtime·load_g(SB)
1055
1056	// If g is nil, Go did not create the current thread,
1057	// or if this thread never called into Go on pthread platforms.
1058	// Call needm to obtain one for temporary use.
1059	// In this case, we're running on the thread stack, so there's
1060	// lots of space, but the linker doesn't know. Hide the call from
1061	// the linker analysis by using an indirect call.
1062	CBZ	g, needm
1063
1064	MOVD	g_m(g), R8
1065	MOVD	R8, savedm-8(SP)
1066	B	havem
1067
1068needm:
1069	MOVD	g, savedm-8(SP) // g is zero, so is m.
1070	MOVD	$runtime·needAndBindM(SB), R0
1071	BL	(R0)
1072
1073	// Set m->g0->sched.sp = SP, so that if a panic happens
1074	// during the function we are about to execute, it will
1075	// have a valid SP to run on the g0 stack.
1076	// The next few lines (after the havem label)
1077	// will save this SP onto the stack and then write
1078	// the same SP back to m->sched.sp. That seems redundant,
1079	// but if an unrecovered panic happens, unwindm will
1080	// restore the g->sched.sp from the stack location
1081	// and then systemstack will try to use it. If we don't set it here,
1082	// that restored SP will be uninitialized (typically 0) and
1083	// will not be usable.
1084	MOVD	g_m(g), R8
1085	MOVD	m_g0(R8), R3
1086	MOVD	RSP, R0
1087	MOVD	R0, (g_sched+gobuf_sp)(R3)
1088	MOVD	R29, (g_sched+gobuf_bp)(R3)
1089
1090havem:
1091	// Now there's a valid m, and we're running on its m->g0.
1092	// Save current m->g0->sched.sp on stack and then set it to SP.
1093	// Save current sp in m->g0->sched.sp in preparation for
1094	// switch back to m->curg stack.
1095	// NOTE: unwindm knows that the saved g->sched.sp is at 16(RSP) aka savedsp-16(SP).
1096	// Beware that the frame size is actually 32+16.
1097	MOVD	m_g0(R8), R3
1098	MOVD	(g_sched+gobuf_sp)(R3), R4
1099	MOVD	R4, savedsp-16(SP)
1100	MOVD	RSP, R0
1101	MOVD	R0, (g_sched+gobuf_sp)(R3)
1102
1103	// Switch to m->curg stack and call runtime.cgocallbackg.
1104	// Because we are taking over the execution of m->curg
1105	// but *not* resuming what had been running, we need to
1106	// save that information (m->curg->sched) so we can restore it.
1107	// We can restore m->curg->sched.sp easily, because calling
1108	// runtime.cgocallbackg leaves SP unchanged upon return.
1109	// To save m->curg->sched.pc, we push it onto the curg stack and
1110	// open a frame the same size as cgocallback's g0 frame.
1111	// Once we switch to the curg stack, the pushed PC will appear
1112	// to be the return PC of cgocallback, so that the traceback
1113	// will seamlessly trace back into the earlier calls.
1114	MOVD	m_curg(R8), g
1115	BL	runtime·save_g(SB)
1116	MOVD	(g_sched+gobuf_sp)(g), R4 // prepare stack as R4
1117	MOVD	(g_sched+gobuf_pc)(g), R5
1118	MOVD	R5, -48(R4)
1119	MOVD	(g_sched+gobuf_bp)(g), R5
1120	MOVD	R5, -56(R4)
1121	// Gather our arguments into registers.
1122	MOVD	fn+0(FP), R1
1123	MOVD	frame+8(FP), R2
1124	MOVD	ctxt+16(FP), R3
1125	MOVD	$-48(R4), R0 // maintain 16-byte SP alignment
1126	MOVD	R0, RSP	// switch stack
1127	MOVD	R1, 8(RSP)
1128	MOVD	R2, 16(RSP)
1129	MOVD	R3, 24(RSP)
1130	MOVD	$runtime·cgocallbackg(SB), R0
1131	CALL	(R0) // indirect call to bypass nosplit check. We're on a different stack now.
1132
1133	// Restore g->sched (== m->curg->sched) from saved values.
1134	MOVD	0(RSP), R5
1135	MOVD	R5, (g_sched+gobuf_pc)(g)
1136	MOVD	RSP, R4
1137	ADD	$48, R4, R4
1138	MOVD	R4, (g_sched+gobuf_sp)(g)
1139
1140	// Switch back to m->g0's stack and restore m->g0->sched.sp.
1141	// (Unlike m->curg, the g0 goroutine never uses sched.pc,
1142	// so we do not have to restore it.)
1143	MOVD	g_m(g), R8
1144	MOVD	m_g0(R8), g
1145	BL	runtime·save_g(SB)
1146	MOVD	(g_sched+gobuf_sp)(g), R0
1147	MOVD	R0, RSP
1148	MOVD	savedsp-16(SP), R4
1149	MOVD	R4, (g_sched+gobuf_sp)(g)
1150
1151	// If the m on entry was nil, we called needm above to borrow an m,
1152	// 1. for the duration of the call on non-pthread platforms,
1153	// 2. or the duration of the C thread alive on pthread platforms.
1154	// If the m on entry wasn't nil,
1155	// 1. the thread might be a Go thread,
1156	// 2. or it wasn't the first call from a C thread on pthread platforms,
1157	//    since then we skip dropm to reuse the m in the first call.
1158	MOVD	savedm-8(SP), R6
1159	CBNZ	R6, droppedm
1160
1161	// Skip dropm to reuse it in the next call, when a pthread key has been created.
1162	MOVD	_cgo_pthread_key_created(SB), R6
1163	// It means cgo is disabled when _cgo_pthread_key_created is a nil pointer, need dropm.
1164	CBZ	R6, dropm
1165	MOVD	(R6), R6
1166	CBNZ	R6, droppedm
1167
1168dropm:
1169	MOVD	$runtime·dropm(SB), R0
1170	BL	(R0)
1171droppedm:
1172
1173	// Done!
1174	RET
1175
1176// Called from cgo wrappers, this function returns g->m->curg.stack.hi.
1177// Must obey the gcc calling convention.
1178TEXT _cgo_topofstack(SB),NOSPLIT,$24
1179	// g (R28) and REGTMP (R27)  might be clobbered by load_g. They
1180	// are callee-save in the gcc calling convention, so save them.
1181	MOVD	R27, savedR27-8(SP)
1182	MOVD	g, saveG-16(SP)
1183
1184	BL	runtime·load_g(SB)
1185	MOVD	g_m(g), R0
1186	MOVD	m_curg(R0), R0
1187	MOVD	(g_stack+stack_hi)(R0), R0
1188
1189	MOVD	saveG-16(SP), g
1190	MOVD	savedR28-8(SP), R27
1191	RET
1192
1193// void setg(G*); set g. for use by needm.
1194TEXT runtime·setg(SB), NOSPLIT, $0-8
1195	MOVD	gg+0(FP), g
1196	// This only happens if iscgo, so jump straight to save_g
1197	BL	runtime·save_g(SB)
1198	RET
1199
1200// void setg_gcc(G*); set g called from gcc
1201TEXT setg_gcc<>(SB),NOSPLIT,$8
1202	MOVD	R0, g
1203	MOVD	R27, savedR27-8(SP)
1204	BL	runtime·save_g(SB)
1205	MOVD	savedR27-8(SP), R27
1206	RET
1207
1208TEXT runtime·emptyfunc(SB),0,$0-0
1209	RET
1210
1211TEXT runtime·abort(SB),NOSPLIT|NOFRAME,$0-0
1212	MOVD	ZR, R0
1213	MOVD	(R0), R0
1214	UNDEF
1215
1216TEXT runtime·return0(SB), NOSPLIT, $0
1217	MOVW	$0, R0
1218	RET
1219
1220// The top-most function running on a goroutine
1221// returns to goexit+PCQuantum.
1222TEXT runtime·goexit(SB),NOSPLIT|NOFRAME|TOPFRAME,$0-0
1223	MOVD	R0, R0	// NOP
1224	BL	runtime·goexit1(SB)	// does not return
1225
1226// This is called from .init_array and follows the platform, not Go, ABI.
1227TEXT runtime·addmoduledata(SB),NOSPLIT,$0-0
1228	SUB	$0x10, RSP
1229	MOVD	R27, 8(RSP) // The access to global variables below implicitly uses R27, which is callee-save
1230	MOVD	runtime·lastmoduledatap(SB), R1
1231	MOVD	R0, moduledata_next(R1)
1232	MOVD	R0, runtime·lastmoduledatap(SB)
1233	MOVD	8(RSP), R27
1234	ADD	$0x10, RSP
1235	RET
1236
1237TEXT ·checkASM(SB),NOSPLIT,$0-1
1238	MOVW	$1, R3
1239	MOVB	R3, ret+0(FP)
1240	RET
1241
1242// gcWriteBarrier informs the GC about heap pointer writes.
1243//
1244// gcWriteBarrier does NOT follow the Go ABI. It accepts the
1245// number of bytes of buffer needed in R25, and returns a pointer
1246// to the buffer space in R25.
1247// It clobbers condition codes.
1248// It does not clobber any general-purpose registers except R27,
1249// but may clobber others (e.g., floating point registers)
1250// The act of CALLing gcWriteBarrier will clobber R30 (LR).
1251TEXT gcWriteBarrier<>(SB),NOSPLIT,$200
1252	// Save the registers clobbered by the fast path.
1253	STP	(R0, R1), 184(RSP)
1254retry:
1255	MOVD	g_m(g), R0
1256	MOVD	m_p(R0), R0
1257	MOVD	(p_wbBuf+wbBuf_next)(R0), R1
1258	MOVD	(p_wbBuf+wbBuf_end)(R0), R27
1259	// Increment wbBuf.next position.
1260	ADD	R25, R1
1261	// Is the buffer full?
1262	CMP	R27, R1
1263	BHI	flush
1264	// Commit to the larger buffer.
1265	MOVD	R1, (p_wbBuf+wbBuf_next)(R0)
1266	// Make return value (the original next position)
1267	SUB	R25, R1, R25
1268	// Restore registers.
1269	LDP	184(RSP), (R0, R1)
1270	RET
1271
1272flush:
1273	// Save all general purpose registers since these could be
1274	// clobbered by wbBufFlush and were not saved by the caller.
1275	// R0 and R1 already saved
1276	STP	(R2, R3), 1*8(RSP)
1277	STP	(R4, R5), 3*8(RSP)
1278	STP	(R6, R7), 5*8(RSP)
1279	STP	(R8, R9), 7*8(RSP)
1280	STP	(R10, R11), 9*8(RSP)
1281	STP	(R12, R13), 11*8(RSP)
1282	STP	(R14, R15), 13*8(RSP)
1283	// R16, R17 may be clobbered by linker trampoline
1284	// R18 is unused.
1285	STP	(R19, R20), 15*8(RSP)
1286	STP	(R21, R22), 17*8(RSP)
1287	STP	(R23, R24), 19*8(RSP)
1288	STP	(R25, R26), 21*8(RSP)
1289	// R27 is temp register.
1290	// R28 is g.
1291	// R29 is frame pointer (unused).
1292	// R30 is LR, which was saved by the prologue.
1293	// R31 is SP.
1294
1295	CALL	runtime·wbBufFlush(SB)
1296	LDP	1*8(RSP), (R2, R3)
1297	LDP	3*8(RSP), (R4, R5)
1298	LDP	5*8(RSP), (R6, R7)
1299	LDP	7*8(RSP), (R8, R9)
1300	LDP	9*8(RSP), (R10, R11)
1301	LDP	11*8(RSP), (R12, R13)
1302	LDP	13*8(RSP), (R14, R15)
1303	LDP	15*8(RSP), (R19, R20)
1304	LDP	17*8(RSP), (R21, R22)
1305	LDP	19*8(RSP), (R23, R24)
1306	LDP	21*8(RSP), (R25, R26)
1307	JMP	retry
1308
1309TEXT runtime·gcWriteBarrier1<ABIInternal>(SB),NOSPLIT,$0
1310	MOVD	$8, R25
1311	JMP	gcWriteBarrier<>(SB)
1312TEXT runtime·gcWriteBarrier2<ABIInternal>(SB),NOSPLIT,$0
1313	MOVD	$16, R25
1314	JMP	gcWriteBarrier<>(SB)
1315TEXT runtime·gcWriteBarrier3<ABIInternal>(SB),NOSPLIT,$0
1316	MOVD	$24, R25
1317	JMP	gcWriteBarrier<>(SB)
1318TEXT runtime·gcWriteBarrier4<ABIInternal>(SB),NOSPLIT,$0
1319	MOVD	$32, R25
1320	JMP	gcWriteBarrier<>(SB)
1321TEXT runtime·gcWriteBarrier5<ABIInternal>(SB),NOSPLIT,$0
1322	MOVD	$40, R25
1323	JMP	gcWriteBarrier<>(SB)
1324TEXT runtime·gcWriteBarrier6<ABIInternal>(SB),NOSPLIT,$0
1325	MOVD	$48, R25
1326	JMP	gcWriteBarrier<>(SB)
1327TEXT runtime·gcWriteBarrier7<ABIInternal>(SB),NOSPLIT,$0
1328	MOVD	$56, R25
1329	JMP	gcWriteBarrier<>(SB)
1330TEXT runtime·gcWriteBarrier8<ABIInternal>(SB),NOSPLIT,$0
1331	MOVD	$64, R25
1332	JMP	gcWriteBarrier<>(SB)
1333
1334DATA	debugCallFrameTooLarge<>+0x00(SB)/20, $"call frame too large"
1335GLOBL	debugCallFrameTooLarge<>(SB), RODATA, $20	// Size duplicated below
1336
1337// debugCallV2 is the entry point for debugger-injected function
1338// calls on running goroutines. It informs the runtime that a
1339// debug call has been injected and creates a call frame for the
1340// debugger to fill in.
1341//
1342// To inject a function call, a debugger should:
1343// 1. Check that the goroutine is in state _Grunning and that
1344//    there are at least 288 bytes free on the stack.
1345// 2. Set SP as SP-16.
1346// 3. Store the current LR in (SP) (using the SP after step 2).
1347// 4. Store the current PC in the LR register.
1348// 5. Write the desired argument frame size at SP-16
1349// 6. Save all machine registers (including flags and fpsimd registers)
1350//    so they can be restored later by the debugger.
1351// 7. Set the PC to debugCallV2 and resume execution.
1352//
1353// If the goroutine is in state _Grunnable, then it's not generally
1354// safe to inject a call because it may return out via other runtime
1355// operations. Instead, the debugger should unwind the stack to find
1356// the return to non-runtime code, add a temporary breakpoint there,
1357// and inject the call once that breakpoint is hit.
1358//
1359// If the goroutine is in any other state, it's not safe to inject a call.
1360//
1361// This function communicates back to the debugger by setting R20 and
1362// invoking BRK to raise a breakpoint signal. Note that the signal PC of
1363// the signal triggered by the BRK instruction is the PC where the signal
1364// is trapped, not the next PC, so to resume execution, the debugger needs
1365// to set the signal PC to PC+4. See the comments in the implementation for
1366// the protocol the debugger is expected to follow. InjectDebugCall in the
1367// runtime tests demonstrates this protocol.
1368//
1369// The debugger must ensure that any pointers passed to the function
1370// obey escape analysis requirements. Specifically, it must not pass
1371// a stack pointer to an escaping argument. debugCallV2 cannot check
1372// this invariant.
1373//
1374// This is ABIInternal because Go code injects its PC directly into new
1375// goroutine stacks.
1376TEXT runtime·debugCallV2<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-0
1377	STP	(R29, R30), -280(RSP)
1378	SUB	$272, RSP, RSP
1379	SUB	$8, RSP, R29
1380	// Save all registers that may contain pointers so they can be
1381	// conservatively scanned.
1382	//
1383	// We can't do anything that might clobber any of these
1384	// registers before this.
1385	STP	(R27, g), (30*8)(RSP)
1386	STP	(R25, R26), (28*8)(RSP)
1387	STP	(R23, R24), (26*8)(RSP)
1388	STP	(R21, R22), (24*8)(RSP)
1389	STP	(R19, R20), (22*8)(RSP)
1390	STP	(R16, R17), (20*8)(RSP)
1391	STP	(R14, R15), (18*8)(RSP)
1392	STP	(R12, R13), (16*8)(RSP)
1393	STP	(R10, R11), (14*8)(RSP)
1394	STP	(R8, R9), (12*8)(RSP)
1395	STP	(R6, R7), (10*8)(RSP)
1396	STP	(R4, R5), (8*8)(RSP)
1397	STP	(R2, R3), (6*8)(RSP)
1398	STP	(R0, R1), (4*8)(RSP)
1399
1400	// Perform a safe-point check.
1401	MOVD	R30, 8(RSP) // Caller's PC
1402	CALL	runtime·debugCallCheck(SB)
1403	MOVD	16(RSP), R0
1404	CBZ	R0, good
1405
1406	// The safety check failed. Put the reason string at the top
1407	// of the stack.
1408	MOVD	R0, 8(RSP)
1409	MOVD	24(RSP), R0
1410	MOVD	R0, 16(RSP)
1411
1412	// Set R20 to 8 and invoke BRK. The debugger should get the
1413	// reason a call can't be injected from SP+8 and resume execution.
1414	MOVD	$8, R20
1415	BREAK
1416	JMP	restore
1417
1418good:
1419	// Registers are saved and it's safe to make a call.
1420	// Open up a call frame, moving the stack if necessary.
1421	//
1422	// Once the frame is allocated, this will set R20 to 0 and
1423	// invoke BRK. The debugger should write the argument
1424	// frame for the call at SP+8, set up argument registers,
1425	// set the LR as the signal PC + 4, set the PC to the function
1426	// to call, set R26 to point to the closure (if a closure call),
1427	// and resume execution.
1428	//
1429	// If the function returns, this will set R20 to 1 and invoke
1430	// BRK. The debugger can then inspect any return value saved
1431	// on the stack at SP+8 and in registers. To resume execution,
1432	// the debugger should restore the LR from (SP).
1433	//
1434	// If the function panics, this will set R20 to 2 and invoke BRK.
1435	// The interface{} value of the panic will be at SP+8. The debugger
1436	// can inspect the panic value and resume execution again.
1437#define DEBUG_CALL_DISPATCH(NAME,MAXSIZE)	\
1438	CMP	$MAXSIZE, R0;			\
1439	BGT	5(PC);				\
1440	MOVD	$NAME(SB), R0;			\
1441	MOVD	R0, 8(RSP);			\
1442	CALL	runtime·debugCallWrap(SB);	\
1443	JMP	restore
1444
1445	MOVD	256(RSP), R0 // the argument frame size
1446	DEBUG_CALL_DISPATCH(debugCall32<>, 32)
1447	DEBUG_CALL_DISPATCH(debugCall64<>, 64)
1448	DEBUG_CALL_DISPATCH(debugCall128<>, 128)
1449	DEBUG_CALL_DISPATCH(debugCall256<>, 256)
1450	DEBUG_CALL_DISPATCH(debugCall512<>, 512)
1451	DEBUG_CALL_DISPATCH(debugCall1024<>, 1024)
1452	DEBUG_CALL_DISPATCH(debugCall2048<>, 2048)
1453	DEBUG_CALL_DISPATCH(debugCall4096<>, 4096)
1454	DEBUG_CALL_DISPATCH(debugCall8192<>, 8192)
1455	DEBUG_CALL_DISPATCH(debugCall16384<>, 16384)
1456	DEBUG_CALL_DISPATCH(debugCall32768<>, 32768)
1457	DEBUG_CALL_DISPATCH(debugCall65536<>, 65536)
1458	// The frame size is too large. Report the error.
1459	MOVD	$debugCallFrameTooLarge<>(SB), R0
1460	MOVD	R0, 8(RSP)
1461	MOVD	$20, R0
1462	MOVD	R0, 16(RSP) // length of debugCallFrameTooLarge string
1463	MOVD	$8, R20
1464	BREAK
1465	JMP	restore
1466
1467restore:
1468	// Calls and failures resume here.
1469	//
1470	// Set R20 to 16 and invoke BRK. The debugger should restore
1471	// all registers except for PC and RSP and resume execution.
1472	MOVD	$16, R20
1473	BREAK
1474	// We must not modify flags after this point.
1475
1476	// Restore pointer-containing registers, which may have been
1477	// modified from the debugger's copy by stack copying.
1478	LDP	(30*8)(RSP), (R27, g)
1479	LDP	(28*8)(RSP), (R25, R26)
1480	LDP	(26*8)(RSP), (R23, R24)
1481	LDP	(24*8)(RSP), (R21, R22)
1482	LDP	(22*8)(RSP), (R19, R20)
1483	LDP	(20*8)(RSP), (R16, R17)
1484	LDP	(18*8)(RSP), (R14, R15)
1485	LDP	(16*8)(RSP), (R12, R13)
1486	LDP	(14*8)(RSP), (R10, R11)
1487	LDP	(12*8)(RSP), (R8, R9)
1488	LDP	(10*8)(RSP), (R6, R7)
1489	LDP	(8*8)(RSP), (R4, R5)
1490	LDP	(6*8)(RSP), (R2, R3)
1491	LDP	(4*8)(RSP), (R0, R1)
1492
1493	LDP	-8(RSP), (R29, R27)
1494	ADD	$288, RSP, RSP // Add 16 more bytes, see saveSigContext
1495	MOVD	-16(RSP), R30 // restore old lr
1496	JMP	(R27)
1497
1498// runtime.debugCallCheck assumes that functions defined with the
1499// DEBUG_CALL_FN macro are safe points to inject calls.
1500#define DEBUG_CALL_FN(NAME,MAXSIZE)		\
1501TEXT NAME(SB),WRAPPER,$MAXSIZE-0;		\
1502	NO_LOCAL_POINTERS;		\
1503	MOVD	$0, R20;		\
1504	BREAK;		\
1505	MOVD	$1, R20;		\
1506	BREAK;		\
1507	RET
1508DEBUG_CALL_FN(debugCall32<>, 32)
1509DEBUG_CALL_FN(debugCall64<>, 64)
1510DEBUG_CALL_FN(debugCall128<>, 128)
1511DEBUG_CALL_FN(debugCall256<>, 256)
1512DEBUG_CALL_FN(debugCall512<>, 512)
1513DEBUG_CALL_FN(debugCall1024<>, 1024)
1514DEBUG_CALL_FN(debugCall2048<>, 2048)
1515DEBUG_CALL_FN(debugCall4096<>, 4096)
1516DEBUG_CALL_FN(debugCall8192<>, 8192)
1517DEBUG_CALL_FN(debugCall16384<>, 16384)
1518DEBUG_CALL_FN(debugCall32768<>, 32768)
1519DEBUG_CALL_FN(debugCall65536<>, 65536)
1520
1521// func debugCallPanicked(val interface{})
1522TEXT runtime·debugCallPanicked(SB),NOSPLIT,$16-16
1523	// Copy the panic value to the top of stack at SP+8.
1524	MOVD	val_type+0(FP), R0
1525	MOVD	R0, 8(RSP)
1526	MOVD	val_data+8(FP), R0
1527	MOVD	R0, 16(RSP)
1528	MOVD	$2, R20
1529	BREAK
1530	RET
1531
1532// Note: these functions use a special calling convention to save generated code space.
1533// Arguments are passed in registers, but the space for those arguments are allocated
1534// in the caller's stack frame. These stubs write the args into that stack space and
1535// then tail call to the corresponding runtime handler.
1536// The tail call makes these stubs disappear in backtraces.
1537//
1538// Defined as ABIInternal since the compiler generates ABIInternal
1539// calls to it directly and it does not use the stack-based Go ABI.
1540TEXT runtime·panicIndex<ABIInternal>(SB),NOSPLIT,$0-16
1541	JMP	runtime·goPanicIndex<ABIInternal>(SB)
1542TEXT runtime·panicIndexU<ABIInternal>(SB),NOSPLIT,$0-16
1543	JMP	runtime·goPanicIndexU<ABIInternal>(SB)
1544TEXT runtime·panicSliceAlen<ABIInternal>(SB),NOSPLIT,$0-16
1545	MOVD	R1, R0
1546	MOVD	R2, R1
1547	JMP	runtime·goPanicSliceAlen<ABIInternal>(SB)
1548TEXT runtime·panicSliceAlenU<ABIInternal>(SB),NOSPLIT,$0-16
1549	MOVD	R1, R0
1550	MOVD	R2, R1
1551	JMP	runtime·goPanicSliceAlenU<ABIInternal>(SB)
1552TEXT runtime·panicSliceAcap<ABIInternal>(SB),NOSPLIT,$0-16
1553	MOVD	R1, R0
1554	MOVD	R2, R1
1555	JMP	runtime·goPanicSliceAcap<ABIInternal>(SB)
1556TEXT runtime·panicSliceAcapU<ABIInternal>(SB),NOSPLIT,$0-16
1557	MOVD	R1, R0
1558	MOVD	R2, R1
1559	JMP	runtime·goPanicSliceAcapU<ABIInternal>(SB)
1560TEXT runtime·panicSliceB<ABIInternal>(SB),NOSPLIT,$0-16
1561	JMP	runtime·goPanicSliceB<ABIInternal>(SB)
1562TEXT runtime·panicSliceBU<ABIInternal>(SB),NOSPLIT,$0-16
1563	JMP	runtime·goPanicSliceBU<ABIInternal>(SB)
1564TEXT runtime·panicSlice3Alen<ABIInternal>(SB),NOSPLIT,$0-16
1565	MOVD	R2, R0
1566	MOVD	R3, R1
1567	JMP	runtime·goPanicSlice3Alen<ABIInternal>(SB)
1568TEXT runtime·panicSlice3AlenU<ABIInternal>(SB),NOSPLIT,$0-16
1569	MOVD	R2, R0
1570	MOVD	R3, R1
1571	JMP	runtime·goPanicSlice3AlenU<ABIInternal>(SB)
1572TEXT runtime·panicSlice3Acap<ABIInternal>(SB),NOSPLIT,$0-16
1573	MOVD	R2, R0
1574	MOVD	R3, R1
1575	JMP	runtime·goPanicSlice3Acap<ABIInternal>(SB)
1576TEXT runtime·panicSlice3AcapU<ABIInternal>(SB),NOSPLIT,$0-16
1577	MOVD	R2, R0
1578	MOVD	R3, R1
1579	JMP	runtime·goPanicSlice3AcapU<ABIInternal>(SB)
1580TEXT runtime·panicSlice3B<ABIInternal>(SB),NOSPLIT,$0-16
1581	MOVD	R1, R0
1582	MOVD	R2, R1
1583	JMP	runtime·goPanicSlice3B<ABIInternal>(SB)
1584TEXT runtime·panicSlice3BU<ABIInternal>(SB),NOSPLIT,$0-16
1585	MOVD	R1, R0
1586	MOVD	R2, R1
1587	JMP	runtime·goPanicSlice3BU<ABIInternal>(SB)
1588TEXT runtime·panicSlice3C<ABIInternal>(SB),NOSPLIT,$0-16
1589	JMP	runtime·goPanicSlice3C<ABIInternal>(SB)
1590TEXT runtime·panicSlice3CU<ABIInternal>(SB),NOSPLIT,$0-16
1591	JMP	runtime·goPanicSlice3CU<ABIInternal>(SB)
1592TEXT runtime·panicSliceConvert<ABIInternal>(SB),NOSPLIT,$0-16
1593	MOVD	R2, R0
1594	MOVD	R3, R1
1595	JMP	runtime·goPanicSliceConvert<ABIInternal>(SB)
1596
1597TEXT ·getfp<ABIInternal>(SB),NOSPLIT|NOFRAME,$0
1598	MOVD R29, R0
1599	RET
1600