xref: /aosp_15_r20/external/pcre/src/sljit/sljitNativeARM_T2_32.c (revision 22dc650d8ae982c6770746019a6f94af92b0f024)
1*22dc650dSSadaf Ebrahimi /*
2*22dc650dSSadaf Ebrahimi  *    Stack-less Just-In-Time compiler
3*22dc650dSSadaf Ebrahimi  *
4*22dc650dSSadaf Ebrahimi  *    Copyright Zoltan Herczeg ([email protected]). All rights reserved.
5*22dc650dSSadaf Ebrahimi  *
6*22dc650dSSadaf Ebrahimi  * Redistribution and use in source and binary forms, with or without modification, are
7*22dc650dSSadaf Ebrahimi  * permitted provided that the following conditions are met:
8*22dc650dSSadaf Ebrahimi  *
9*22dc650dSSadaf Ebrahimi  *   1. Redistributions of source code must retain the above copyright notice, this list of
10*22dc650dSSadaf Ebrahimi  *      conditions and the following disclaimer.
11*22dc650dSSadaf Ebrahimi  *
12*22dc650dSSadaf Ebrahimi  *   2. Redistributions in binary form must reproduce the above copyright notice, this list
13*22dc650dSSadaf Ebrahimi  *      of conditions and the following disclaimer in the documentation and/or other materials
14*22dc650dSSadaf Ebrahimi  *      provided with the distribution.
15*22dc650dSSadaf Ebrahimi  *
16*22dc650dSSadaf Ebrahimi  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
17*22dc650dSSadaf Ebrahimi  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18*22dc650dSSadaf Ebrahimi  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
19*22dc650dSSadaf Ebrahimi  * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20*22dc650dSSadaf Ebrahimi  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
21*22dc650dSSadaf Ebrahimi  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22*22dc650dSSadaf Ebrahimi  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23*22dc650dSSadaf Ebrahimi  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
24*22dc650dSSadaf Ebrahimi  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25*22dc650dSSadaf Ebrahimi  */
26*22dc650dSSadaf Ebrahimi 
sljit_get_platform_name(void)27*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void)
28*22dc650dSSadaf Ebrahimi {
29*22dc650dSSadaf Ebrahimi #ifdef __SOFTFP__
30*22dc650dSSadaf Ebrahimi 	return "ARM-Thumb2" SLJIT_CPUINFO " ABI:softfp";
31*22dc650dSSadaf Ebrahimi #else
32*22dc650dSSadaf Ebrahimi 	return "ARM-Thumb2" SLJIT_CPUINFO " ABI:hardfp";
33*22dc650dSSadaf Ebrahimi #endif
34*22dc650dSSadaf Ebrahimi }
35*22dc650dSSadaf Ebrahimi 
36*22dc650dSSadaf Ebrahimi /* Length of an instruction word. */
37*22dc650dSSadaf Ebrahimi typedef sljit_u32 sljit_ins;
38*22dc650dSSadaf Ebrahimi 
39*22dc650dSSadaf Ebrahimi /* Last register + 1. */
40*22dc650dSSadaf Ebrahimi #define TMP_REG1	(SLJIT_NUMBER_OF_REGISTERS + 2)
41*22dc650dSSadaf Ebrahimi #define TMP_REG2	(SLJIT_NUMBER_OF_REGISTERS + 3)
42*22dc650dSSadaf Ebrahimi #define TMP_PC		(SLJIT_NUMBER_OF_REGISTERS + 4)
43*22dc650dSSadaf Ebrahimi 
44*22dc650dSSadaf Ebrahimi #define TMP_FREG1	(SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1)
45*22dc650dSSadaf Ebrahimi #define TMP_FREG2	(SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2)
46*22dc650dSSadaf Ebrahimi 
47*22dc650dSSadaf Ebrahimi /* See sljit_emit_enter and sljit_emit_op0 if you want to change them. */
48*22dc650dSSadaf Ebrahimi static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = {
49*22dc650dSSadaf Ebrahimi 	0, 0, 1, 2, 3, 11, 10, 9, 8, 7, 6, 5, 4, 13, 12, 14, 15
50*22dc650dSSadaf Ebrahimi };
51*22dc650dSSadaf Ebrahimi 
52*22dc650dSSadaf Ebrahimi static const sljit_u8 freg_map[((SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2) << 1) + 1] = {
53*22dc650dSSadaf Ebrahimi 	0,
54*22dc650dSSadaf Ebrahimi 	0, 1, 2, 3, 4, 5, 15, 14, 13, 12, 11, 10, 9, 8,
55*22dc650dSSadaf Ebrahimi 	7, 6,
56*22dc650dSSadaf Ebrahimi 	0, 1, 2, 3, 4, 5, 15, 14, 13, 12, 11, 10, 9, 8,
57*22dc650dSSadaf Ebrahimi 	7, 6
58*22dc650dSSadaf Ebrahimi };
59*22dc650dSSadaf Ebrahimi 
60*22dc650dSSadaf Ebrahimi static const sljit_u8 freg_ebit_map[((SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2) << 1) + 1] = {
61*22dc650dSSadaf Ebrahimi 	0,
62*22dc650dSSadaf Ebrahimi 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
63*22dc650dSSadaf Ebrahimi 	0, 0,
64*22dc650dSSadaf Ebrahimi 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
65*22dc650dSSadaf Ebrahimi 	1, 1
66*22dc650dSSadaf Ebrahimi };
67*22dc650dSSadaf Ebrahimi 
68*22dc650dSSadaf Ebrahimi #define COPY_BITS(src, from, to, bits) \
69*22dc650dSSadaf Ebrahimi 	((from >= to ? ((sljit_ins)(src) >> (from - to)) : ((sljit_ins)(src) << (to - from))) & (((1 << bits) - 1) << to))
70*22dc650dSSadaf Ebrahimi 
71*22dc650dSSadaf Ebrahimi #define NEGATE(uimm) ((sljit_uw)-(sljit_sw)(uimm))
72*22dc650dSSadaf Ebrahimi 
73*22dc650dSSadaf Ebrahimi /* Thumb16 encodings. */
74*22dc650dSSadaf Ebrahimi #define RD3(rd) ((sljit_ins)reg_map[rd])
75*22dc650dSSadaf Ebrahimi #define RN3(rn) ((sljit_ins)reg_map[rn] << 3)
76*22dc650dSSadaf Ebrahimi #define RM3(rm) ((sljit_ins)reg_map[rm] << 6)
77*22dc650dSSadaf Ebrahimi #define RDN3(rdn) ((sljit_ins)reg_map[rdn] << 8)
78*22dc650dSSadaf Ebrahimi #define IMM3(imm) ((sljit_ins)imm << 6)
79*22dc650dSSadaf Ebrahimi #define IMM8(imm) ((sljit_ins)imm)
80*22dc650dSSadaf Ebrahimi 
81*22dc650dSSadaf Ebrahimi /* Thumb16 helpers. */
82*22dc650dSSadaf Ebrahimi #define SET_REGS44(rd, rn) \
83*22dc650dSSadaf Ebrahimi 	(((sljit_ins)reg_map[rn] << 3) | ((sljit_ins)reg_map[rd] & 0x7) | (((sljit_ins)reg_map[rd] & 0x8) << 4))
84*22dc650dSSadaf Ebrahimi #define IS_2_LO_REGS(reg1, reg2) \
85*22dc650dSSadaf Ebrahimi 	(reg_map[reg1] <= 7 && reg_map[reg2] <= 7)
86*22dc650dSSadaf Ebrahimi #define IS_3_LO_REGS(reg1, reg2, reg3) \
87*22dc650dSSadaf Ebrahimi 	(reg_map[reg1] <= 7 && reg_map[reg2] <= 7 && reg_map[reg3] <= 7)
88*22dc650dSSadaf Ebrahimi 
89*22dc650dSSadaf Ebrahimi /* Thumb32 encodings. */
90*22dc650dSSadaf Ebrahimi #define RM4(rm) ((sljit_ins)reg_map[rm])
91*22dc650dSSadaf Ebrahimi #define RD4(rd) ((sljit_ins)reg_map[rd] << 8)
92*22dc650dSSadaf Ebrahimi #define RT4(rt) ((sljit_ins)reg_map[rt] << 12)
93*22dc650dSSadaf Ebrahimi #define RN4(rn) ((sljit_ins)reg_map[rn] << 16)
94*22dc650dSSadaf Ebrahimi 
95*22dc650dSSadaf Ebrahimi #define VM4(vm) (((sljit_ins)freg_map[vm]) | ((sljit_ins)freg_ebit_map[vm] << 5))
96*22dc650dSSadaf Ebrahimi #define VD4(vd) (((sljit_ins)freg_map[vd] << 12) | ((sljit_ins)freg_ebit_map[vd] << 22))
97*22dc650dSSadaf Ebrahimi #define VN4(vn) (((sljit_ins)freg_map[vn] << 16) | ((sljit_ins)freg_ebit_map[vn] << 7))
98*22dc650dSSadaf Ebrahimi 
99*22dc650dSSadaf Ebrahimi #define IMM5(imm) \
100*22dc650dSSadaf Ebrahimi 	(COPY_BITS(imm, 2, 12, 3) | (((sljit_ins)imm & 0x3) << 6))
101*22dc650dSSadaf Ebrahimi #define IMM12(imm) \
102*22dc650dSSadaf Ebrahimi 	(COPY_BITS(imm, 11, 26, 1) | COPY_BITS(imm, 8, 12, 3) | ((sljit_ins)imm & 0xff))
103*22dc650dSSadaf Ebrahimi 
104*22dc650dSSadaf Ebrahimi /* --------------------------------------------------------------------- */
105*22dc650dSSadaf Ebrahimi /*  Instrucion forms                                                     */
106*22dc650dSSadaf Ebrahimi /* --------------------------------------------------------------------- */
107*22dc650dSSadaf Ebrahimi 
108*22dc650dSSadaf Ebrahimi /* dot '.' changed to _
109*22dc650dSSadaf Ebrahimi    I immediate form (possibly followed by number of immediate bits). */
110*22dc650dSSadaf Ebrahimi #define ADCI		0xf1400000
111*22dc650dSSadaf Ebrahimi #define ADCS		0x4140
112*22dc650dSSadaf Ebrahimi #define ADC_W		0xeb400000
113*22dc650dSSadaf Ebrahimi #define ADD		0x4400
114*22dc650dSSadaf Ebrahimi #define ADDS		0x1800
115*22dc650dSSadaf Ebrahimi #define ADDSI3		0x1c00
116*22dc650dSSadaf Ebrahimi #define ADDSI8		0x3000
117*22dc650dSSadaf Ebrahimi #define ADDWI		0xf2000000
118*22dc650dSSadaf Ebrahimi #define ADD_SP		0x4485
119*22dc650dSSadaf Ebrahimi #define ADD_SP_I	0xb000
120*22dc650dSSadaf Ebrahimi #define ADD_W		0xeb000000
121*22dc650dSSadaf Ebrahimi #define ADD_WI		0xf1000000
122*22dc650dSSadaf Ebrahimi #define ANDI		0xf0000000
123*22dc650dSSadaf Ebrahimi #define ANDS		0x4000
124*22dc650dSSadaf Ebrahimi #define AND_W		0xea000000
125*22dc650dSSadaf Ebrahimi #define ASRS		0x4100
126*22dc650dSSadaf Ebrahimi #define ASRSI		0x1000
127*22dc650dSSadaf Ebrahimi #define ASR_W		0xfa40f000
128*22dc650dSSadaf Ebrahimi #define ASR_WI		0xea4f0020
129*22dc650dSSadaf Ebrahimi #define BCC		0xd000
130*22dc650dSSadaf Ebrahimi #define BICI		0xf0200000
131*22dc650dSSadaf Ebrahimi #define BKPT		0xbe00
132*22dc650dSSadaf Ebrahimi #define BLX		0x4780
133*22dc650dSSadaf Ebrahimi #define BX		0x4700
134*22dc650dSSadaf Ebrahimi #define CLZ		0xfab0f080
135*22dc650dSSadaf Ebrahimi #define CMNI_W		0xf1100f00
136*22dc650dSSadaf Ebrahimi #define CMP		0x4280
137*22dc650dSSadaf Ebrahimi #define CMPI		0x2800
138*22dc650dSSadaf Ebrahimi #define CMPI_W		0xf1b00f00
139*22dc650dSSadaf Ebrahimi #define CMP_X		0x4500
140*22dc650dSSadaf Ebrahimi #define CMP_W		0xebb00f00
141*22dc650dSSadaf Ebrahimi #define EORI		0xf0800000
142*22dc650dSSadaf Ebrahimi #define EORS		0x4040
143*22dc650dSSadaf Ebrahimi #define EOR_W		0xea800000
144*22dc650dSSadaf Ebrahimi #define IT		0xbf00
145*22dc650dSSadaf Ebrahimi #define LDR		0xf8d00000
146*22dc650dSSadaf Ebrahimi #define LDR_SP		0x9800
147*22dc650dSSadaf Ebrahimi #define LDRD		0xe9500000
148*22dc650dSSadaf Ebrahimi #define LDREX		0xe8500f00
149*22dc650dSSadaf Ebrahimi #define LDREXB		0xe8d00f4f
150*22dc650dSSadaf Ebrahimi #define LDREXH		0xe8d00f5f
151*22dc650dSSadaf Ebrahimi #define LDRI		0xf8500800
152*22dc650dSSadaf Ebrahimi #define LSLS		0x4080
153*22dc650dSSadaf Ebrahimi #define LSLSI		0x0000
154*22dc650dSSadaf Ebrahimi #define LSL_W		0xfa00f000
155*22dc650dSSadaf Ebrahimi #define LSL_WI		0xea4f0000
156*22dc650dSSadaf Ebrahimi #define LSRS		0x40c0
157*22dc650dSSadaf Ebrahimi #define LSRSI		0x0800
158*22dc650dSSadaf Ebrahimi #define LSR_W		0xfa20f000
159*22dc650dSSadaf Ebrahimi #define LSR_WI		0xea4f0010
160*22dc650dSSadaf Ebrahimi #define MLA		0xfb000000
161*22dc650dSSadaf Ebrahimi #define MOV		0x4600
162*22dc650dSSadaf Ebrahimi #define MOVS		0x0000
163*22dc650dSSadaf Ebrahimi #define MOVSI		0x2000
164*22dc650dSSadaf Ebrahimi #define MOVT		0xf2c00000
165*22dc650dSSadaf Ebrahimi #define MOVW		0xf2400000
166*22dc650dSSadaf Ebrahimi #define MOV_W		0xea4f0000
167*22dc650dSSadaf Ebrahimi #define MOV_WI		0xf04f0000
168*22dc650dSSadaf Ebrahimi #define MUL		0xfb00f000
169*22dc650dSSadaf Ebrahimi #define MVNS		0x43c0
170*22dc650dSSadaf Ebrahimi #define MVN_W		0xea6f0000
171*22dc650dSSadaf Ebrahimi #define MVN_WI		0xf06f0000
172*22dc650dSSadaf Ebrahimi #define NOP		0xbf00
173*22dc650dSSadaf Ebrahimi #define ORNI		0xf0600000
174*22dc650dSSadaf Ebrahimi #define ORRI		0xf0400000
175*22dc650dSSadaf Ebrahimi #define ORRS		0x4300
176*22dc650dSSadaf Ebrahimi #define ORR_W		0xea400000
177*22dc650dSSadaf Ebrahimi #define POP		0xbc00
178*22dc650dSSadaf Ebrahimi #define POP_W		0xe8bd0000
179*22dc650dSSadaf Ebrahimi #define PUSH		0xb400
180*22dc650dSSadaf Ebrahimi #define PUSH_W		0xe92d0000
181*22dc650dSSadaf Ebrahimi #define REV		0xba00
182*22dc650dSSadaf Ebrahimi #define REV_W		0xfa90f080
183*22dc650dSSadaf Ebrahimi #define REV16		0xba40
184*22dc650dSSadaf Ebrahimi #define REV16_W		0xfa90f090
185*22dc650dSSadaf Ebrahimi #define RBIT		0xfa90f0a0
186*22dc650dSSadaf Ebrahimi #define RORS		0x41c0
187*22dc650dSSadaf Ebrahimi #define ROR_W		0xfa60f000
188*22dc650dSSadaf Ebrahimi #define ROR_WI		0xea4f0030
189*22dc650dSSadaf Ebrahimi #define RSB_WI		0xf1c00000
190*22dc650dSSadaf Ebrahimi #define RSBSI		0x4240
191*22dc650dSSadaf Ebrahimi #define SBCI		0xf1600000
192*22dc650dSSadaf Ebrahimi #define SBCS		0x4180
193*22dc650dSSadaf Ebrahimi #define SBC_W		0xeb600000
194*22dc650dSSadaf Ebrahimi #define SDIV		0xfb90f0f0
195*22dc650dSSadaf Ebrahimi #define SMULL		0xfb800000
196*22dc650dSSadaf Ebrahimi #define STR_SP		0x9000
197*22dc650dSSadaf Ebrahimi #define STRD		0xe9400000
198*22dc650dSSadaf Ebrahimi #define STREX		0xe8400000
199*22dc650dSSadaf Ebrahimi #define STREXB		0xe8c00f40
200*22dc650dSSadaf Ebrahimi #define STREXH		0xe8c00f50
201*22dc650dSSadaf Ebrahimi #define SUBS		0x1a00
202*22dc650dSSadaf Ebrahimi #define SUBSI3		0x1e00
203*22dc650dSSadaf Ebrahimi #define SUBSI8		0x3800
204*22dc650dSSadaf Ebrahimi #define SUB_W		0xeba00000
205*22dc650dSSadaf Ebrahimi #define SUBWI		0xf2a00000
206*22dc650dSSadaf Ebrahimi #define SUB_SP_I	0xb080
207*22dc650dSSadaf Ebrahimi #define SUB_WI		0xf1a00000
208*22dc650dSSadaf Ebrahimi #define SXTB		0xb240
209*22dc650dSSadaf Ebrahimi #define SXTB_W		0xfa4ff080
210*22dc650dSSadaf Ebrahimi #define SXTH		0xb200
211*22dc650dSSadaf Ebrahimi #define SXTH_W		0xfa0ff080
212*22dc650dSSadaf Ebrahimi #define TST		0x4200
213*22dc650dSSadaf Ebrahimi #define TSTI		0xf0000f00
214*22dc650dSSadaf Ebrahimi #define TST_W		0xea000f00
215*22dc650dSSadaf Ebrahimi #define UDIV		0xfbb0f0f0
216*22dc650dSSadaf Ebrahimi #define UMULL		0xfba00000
217*22dc650dSSadaf Ebrahimi #define UXTB		0xb2c0
218*22dc650dSSadaf Ebrahimi #define UXTB_W		0xfa5ff080
219*22dc650dSSadaf Ebrahimi #define UXTH		0xb280
220*22dc650dSSadaf Ebrahimi #define UXTH_W		0xfa1ff080
221*22dc650dSSadaf Ebrahimi #define VABS_F32	0xeeb00ac0
222*22dc650dSSadaf Ebrahimi #define VADD_F32	0xee300a00
223*22dc650dSSadaf Ebrahimi #define VAND		0xef000110
224*22dc650dSSadaf Ebrahimi #define VCMP_F32	0xeeb40a40
225*22dc650dSSadaf Ebrahimi #define VCVT_F32_S32	0xeeb80ac0
226*22dc650dSSadaf Ebrahimi #define VCVT_F32_U32	0xeeb80a40
227*22dc650dSSadaf Ebrahimi #define VCVT_F64_F32	0xeeb70ac0
228*22dc650dSSadaf Ebrahimi #define VCVT_S32_F32	0xeebd0ac0
229*22dc650dSSadaf Ebrahimi #define VDIV_F32	0xee800a00
230*22dc650dSSadaf Ebrahimi #define VDUP		0xee800b10
231*22dc650dSSadaf Ebrahimi #define VDUP_s		0xffb00c00
232*22dc650dSSadaf Ebrahimi #define VEOR		0xff000110
233*22dc650dSSadaf Ebrahimi #define VLD1		0xf9200000
234*22dc650dSSadaf Ebrahimi #define VLD1_r		0xf9a00c00
235*22dc650dSSadaf Ebrahimi #define VLD1_s		0xf9a00000
236*22dc650dSSadaf Ebrahimi #define VLDR_F32	0xed100a00
237*22dc650dSSadaf Ebrahimi #define VMOV_F32	0xeeb00a40
238*22dc650dSSadaf Ebrahimi #define VMOV		0xee000a10
239*22dc650dSSadaf Ebrahimi #define VMOV2		0xec400a10
240*22dc650dSSadaf Ebrahimi #define VMOV_i		0xef800010
241*22dc650dSSadaf Ebrahimi #define VMOV_s		0xee000b10
242*22dc650dSSadaf Ebrahimi #define VMOVN		0xffb20200
243*22dc650dSSadaf Ebrahimi #define VMRS		0xeef1fa10
244*22dc650dSSadaf Ebrahimi #define VMUL_F32	0xee200a00
245*22dc650dSSadaf Ebrahimi #define VNEG_F32	0xeeb10a40
246*22dc650dSSadaf Ebrahimi #define VORR		0xef200110
247*22dc650dSSadaf Ebrahimi #define VPOP		0xecbd0b00
248*22dc650dSSadaf Ebrahimi #define VPUSH		0xed2d0b00
249*22dc650dSSadaf Ebrahimi #define VSHLL		0xef800a10
250*22dc650dSSadaf Ebrahimi #define VSHR		0xef800010
251*22dc650dSSadaf Ebrahimi #define VSRA		0xef800110
252*22dc650dSSadaf Ebrahimi #define VST1		0xf9000000
253*22dc650dSSadaf Ebrahimi #define VST1_s		0xf9800000
254*22dc650dSSadaf Ebrahimi #define VSTR_F32	0xed000a00
255*22dc650dSSadaf Ebrahimi #define VSUB_F32	0xee300a40
256*22dc650dSSadaf Ebrahimi 
257*22dc650dSSadaf Ebrahimi #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
258*22dc650dSSadaf Ebrahimi 
function_check_is_freg(struct sljit_compiler * compiler,sljit_s32 fr,sljit_s32 is_32)259*22dc650dSSadaf Ebrahimi static sljit_s32 function_check_is_freg(struct sljit_compiler *compiler, sljit_s32 fr, sljit_s32 is_32)
260*22dc650dSSadaf Ebrahimi {
261*22dc650dSSadaf Ebrahimi 	if (compiler->scratches == -1)
262*22dc650dSSadaf Ebrahimi 		return 0;
263*22dc650dSSadaf Ebrahimi 
264*22dc650dSSadaf Ebrahimi 	if (is_32 && fr >= SLJIT_F64_SECOND(SLJIT_FR0))
265*22dc650dSSadaf Ebrahimi 		fr -= SLJIT_F64_SECOND(0);
266*22dc650dSSadaf Ebrahimi 
267*22dc650dSSadaf Ebrahimi 	return (fr >= SLJIT_FR0 && fr < (SLJIT_FR0 + compiler->fscratches))
268*22dc650dSSadaf Ebrahimi 		|| (fr > (SLJIT_FS0 - compiler->fsaveds) && fr <= SLJIT_FS0)
269*22dc650dSSadaf Ebrahimi 		|| (fr >= SLJIT_TMP_FREGISTER_BASE && fr < (SLJIT_TMP_FREGISTER_BASE + SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS));
270*22dc650dSSadaf Ebrahimi }
271*22dc650dSSadaf Ebrahimi 
272*22dc650dSSadaf Ebrahimi #endif /* SLJIT_ARGUMENT_CHECKS */
273*22dc650dSSadaf Ebrahimi 
push_inst16(struct sljit_compiler * compiler,sljit_ins inst)274*22dc650dSSadaf Ebrahimi static sljit_s32 push_inst16(struct sljit_compiler *compiler, sljit_ins inst)
275*22dc650dSSadaf Ebrahimi {
276*22dc650dSSadaf Ebrahimi 	sljit_u16 *ptr;
277*22dc650dSSadaf Ebrahimi 	SLJIT_ASSERT(!(inst & 0xffff0000));
278*22dc650dSSadaf Ebrahimi 
279*22dc650dSSadaf Ebrahimi 	ptr = (sljit_u16*)ensure_buf(compiler, sizeof(sljit_u16));
280*22dc650dSSadaf Ebrahimi 	FAIL_IF(!ptr);
281*22dc650dSSadaf Ebrahimi 	*ptr = (sljit_u16)(inst);
282*22dc650dSSadaf Ebrahimi 	compiler->size++;
283*22dc650dSSadaf Ebrahimi 	return SLJIT_SUCCESS;
284*22dc650dSSadaf Ebrahimi }
285*22dc650dSSadaf Ebrahimi 
push_inst32(struct sljit_compiler * compiler,sljit_ins inst)286*22dc650dSSadaf Ebrahimi static sljit_s32 push_inst32(struct sljit_compiler *compiler, sljit_ins inst)
287*22dc650dSSadaf Ebrahimi {
288*22dc650dSSadaf Ebrahimi 	sljit_u16 *ptr = (sljit_u16*)ensure_buf(compiler, sizeof(sljit_ins));
289*22dc650dSSadaf Ebrahimi 	FAIL_IF(!ptr);
290*22dc650dSSadaf Ebrahimi 	*ptr++ = (sljit_u16)(inst >> 16);
291*22dc650dSSadaf Ebrahimi 	*ptr = (sljit_u16)(inst);
292*22dc650dSSadaf Ebrahimi 	compiler->size += 2;
293*22dc650dSSadaf Ebrahimi 	return SLJIT_SUCCESS;
294*22dc650dSSadaf Ebrahimi }
295*22dc650dSSadaf Ebrahimi 
emit_imm32_const(struct sljit_compiler * compiler,sljit_s32 dst,sljit_uw imm)296*22dc650dSSadaf Ebrahimi static sljit_s32 emit_imm32_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_uw imm)
297*22dc650dSSadaf Ebrahimi {
298*22dc650dSSadaf Ebrahimi 	FAIL_IF(push_inst32(compiler, MOVW | RD4(dst)
299*22dc650dSSadaf Ebrahimi 		| COPY_BITS(imm, 12, 16, 4) | COPY_BITS(imm, 11, 26, 1) | COPY_BITS(imm, 8, 12, 3) | (imm & 0xff)));
300*22dc650dSSadaf Ebrahimi 	return push_inst32(compiler, MOVT | RD4(dst)
301*22dc650dSSadaf Ebrahimi 		| COPY_BITS(imm, 12 + 16, 16, 4) | COPY_BITS(imm, 11 + 16, 26, 1) | COPY_BITS(imm, 8 + 16, 12, 3) | ((imm & 0xff0000) >> 16));
302*22dc650dSSadaf Ebrahimi }
303*22dc650dSSadaf Ebrahimi 
304*22dc650dSSadaf Ebrahimi /* Dst must be in bits[11-8] */
set_imm32_const(sljit_u16 * inst,sljit_ins dst,sljit_uw new_imm)305*22dc650dSSadaf Ebrahimi static void set_imm32_const(sljit_u16 *inst, sljit_ins dst, sljit_uw new_imm)
306*22dc650dSSadaf Ebrahimi {
307*22dc650dSSadaf Ebrahimi 	inst[0] = (sljit_u16)((MOVW >> 16) | COPY_BITS(new_imm, 12, 0, 4) | COPY_BITS(new_imm, 11, 10, 1));
308*22dc650dSSadaf Ebrahimi 	inst[1] = (sljit_u16)(dst | COPY_BITS(new_imm, 8, 12, 3) | (new_imm & 0xff));
309*22dc650dSSadaf Ebrahimi 	inst[2] = (sljit_u16)((MOVT >> 16) | COPY_BITS(new_imm, 12 + 16, 0, 4) | COPY_BITS(new_imm, 11 + 16, 10, 1));
310*22dc650dSSadaf Ebrahimi 	inst[3] = (sljit_u16)(dst | COPY_BITS(new_imm, 8 + 16, 12, 3) | ((new_imm & 0xff0000) >> 16));
311*22dc650dSSadaf Ebrahimi }
312*22dc650dSSadaf Ebrahimi 
modify_imm32_const(sljit_u16 * inst,sljit_uw new_imm)313*22dc650dSSadaf Ebrahimi static SLJIT_INLINE void modify_imm32_const(sljit_u16 *inst, sljit_uw new_imm)
314*22dc650dSSadaf Ebrahimi {
315*22dc650dSSadaf Ebrahimi 	sljit_ins dst = inst[1] & 0x0f00;
316*22dc650dSSadaf Ebrahimi 	SLJIT_ASSERT(((inst[0] & 0xfbf0) == (MOVW >> 16)) && ((inst[2] & 0xfbf0) == (MOVT >> 16)) && dst == (inst[3] & 0x0f00));
317*22dc650dSSadaf Ebrahimi 	set_imm32_const(inst, dst, new_imm);
318*22dc650dSSadaf Ebrahimi }
319*22dc650dSSadaf Ebrahimi 
detect_jump_type(struct sljit_jump * jump,sljit_u16 * code_ptr,sljit_u16 * code,sljit_sw executable_offset)320*22dc650dSSadaf Ebrahimi static SLJIT_INLINE sljit_u16* detect_jump_type(struct sljit_jump *jump, sljit_u16 *code_ptr, sljit_u16 *code, sljit_sw executable_offset)
321*22dc650dSSadaf Ebrahimi {
322*22dc650dSSadaf Ebrahimi 	sljit_sw diff;
323*22dc650dSSadaf Ebrahimi 
324*22dc650dSSadaf Ebrahimi 	if (jump->flags & SLJIT_REWRITABLE_JUMP)
325*22dc650dSSadaf Ebrahimi 		goto exit;
326*22dc650dSSadaf Ebrahimi 
327*22dc650dSSadaf Ebrahimi 	if (jump->flags & JUMP_ADDR) {
328*22dc650dSSadaf Ebrahimi 		/* Branch to ARM code is not optimized yet. */
329*22dc650dSSadaf Ebrahimi 		if (!(jump->u.target & 0x1))
330*22dc650dSSadaf Ebrahimi 			goto exit;
331*22dc650dSSadaf Ebrahimi 		diff = (sljit_sw)jump->u.target - (sljit_sw)(code_ptr + 2) - executable_offset;
332*22dc650dSSadaf Ebrahimi 	} else {
333*22dc650dSSadaf Ebrahimi 		SLJIT_ASSERT(jump->u.label != NULL);
334*22dc650dSSadaf Ebrahimi 		diff = (sljit_sw)(code + jump->u.label->size) - (sljit_sw)(code_ptr + 2);
335*22dc650dSSadaf Ebrahimi 	}
336*22dc650dSSadaf Ebrahimi 
337*22dc650dSSadaf Ebrahimi 	if (jump->flags & IS_COND) {
338*22dc650dSSadaf Ebrahimi 		SLJIT_ASSERT(!(jump->flags & IS_BL));
339*22dc650dSSadaf Ebrahimi 		/* Size of the prefix IT instruction. */
340*22dc650dSSadaf Ebrahimi 		diff += SSIZE_OF(u16);
341*22dc650dSSadaf Ebrahimi 		if (diff <= 0xff && diff >= -0x100) {
342*22dc650dSSadaf Ebrahimi 			jump->flags |= PATCH_TYPE1;
343*22dc650dSSadaf Ebrahimi 			jump->addr = (sljit_uw)(code_ptr - 1);
344*22dc650dSSadaf Ebrahimi 			return code_ptr - 1;
345*22dc650dSSadaf Ebrahimi 		}
346*22dc650dSSadaf Ebrahimi 		if (diff <= 0xfffff && diff >= -0x100000) {
347*22dc650dSSadaf Ebrahimi 			jump->flags |= PATCH_TYPE2;
348*22dc650dSSadaf Ebrahimi 			jump->addr = (sljit_uw)(code_ptr - 1);
349*22dc650dSSadaf Ebrahimi 			return code_ptr;
350*22dc650dSSadaf Ebrahimi 		}
351*22dc650dSSadaf Ebrahimi 		diff -= SSIZE_OF(u16);
352*22dc650dSSadaf Ebrahimi 	} else if (jump->flags & IS_BL) {
353*22dc650dSSadaf Ebrahimi 		/* Branch and link. */
354*22dc650dSSadaf Ebrahimi 		if (diff <= 0xffffff && diff >= -0x1000000) {
355*22dc650dSSadaf Ebrahimi 			jump->flags |= PATCH_TYPE5;
356*22dc650dSSadaf Ebrahimi 			return code_ptr + 1;
357*22dc650dSSadaf Ebrahimi 		}
358*22dc650dSSadaf Ebrahimi 		goto exit;
359*22dc650dSSadaf Ebrahimi 	} else if (diff <= 0x7ff && diff >= -0x800) {
360*22dc650dSSadaf Ebrahimi 		jump->flags |= PATCH_TYPE3;
361*22dc650dSSadaf Ebrahimi 		return code_ptr;
362*22dc650dSSadaf Ebrahimi 	}
363*22dc650dSSadaf Ebrahimi 
364*22dc650dSSadaf Ebrahimi 	if (diff <= 0xffffff && diff >= -0x1000000) {
365*22dc650dSSadaf Ebrahimi 		jump->flags |= PATCH_TYPE4;
366*22dc650dSSadaf Ebrahimi 		return code_ptr + 1;
367*22dc650dSSadaf Ebrahimi 	}
368*22dc650dSSadaf Ebrahimi 
369*22dc650dSSadaf Ebrahimi exit:
370*22dc650dSSadaf Ebrahimi 	code_ptr[4] = code_ptr[0];
371*22dc650dSSadaf Ebrahimi 
372*22dc650dSSadaf Ebrahimi 	if (jump->flags & IS_COND) {
373*22dc650dSSadaf Ebrahimi 		code_ptr[3] = code_ptr[-1];
374*22dc650dSSadaf Ebrahimi 		jump->addr = (sljit_uw)(code_ptr - 1);
375*22dc650dSSadaf Ebrahimi 	}
376*22dc650dSSadaf Ebrahimi 
377*22dc650dSSadaf Ebrahimi 	return code_ptr + 4;
378*22dc650dSSadaf Ebrahimi }
379*22dc650dSSadaf Ebrahimi 
mov_addr_get_length(struct sljit_jump * jump,sljit_u16 * code_ptr,sljit_u16 * code,sljit_sw executable_offset)380*22dc650dSSadaf Ebrahimi static SLJIT_INLINE sljit_sw mov_addr_get_length(struct sljit_jump *jump, sljit_u16 *code_ptr, sljit_u16 *code, sljit_sw executable_offset)
381*22dc650dSSadaf Ebrahimi {
382*22dc650dSSadaf Ebrahimi 	sljit_uw addr;
383*22dc650dSSadaf Ebrahimi 	sljit_sw diff;
384*22dc650dSSadaf Ebrahimi 	SLJIT_UNUSED_ARG(executable_offset);
385*22dc650dSSadaf Ebrahimi 
386*22dc650dSSadaf Ebrahimi 	if (jump->flags & JUMP_ADDR)
387*22dc650dSSadaf Ebrahimi 		addr = jump->u.target;
388*22dc650dSSadaf Ebrahimi 	else
389*22dc650dSSadaf Ebrahimi 		addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code + jump->u.label->size, executable_offset);
390*22dc650dSSadaf Ebrahimi 
391*22dc650dSSadaf Ebrahimi 	/* The pc+4 offset is represented by the 2 * SSIZE_OF(sljit_u16) below. */
392*22dc650dSSadaf Ebrahimi 	diff = (sljit_sw)addr - (sljit_sw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
393*22dc650dSSadaf Ebrahimi 
394*22dc650dSSadaf Ebrahimi 	/* Note: ADR with imm8 does not set the last bit (Thumb2 flag). */
395*22dc650dSSadaf Ebrahimi 
396*22dc650dSSadaf Ebrahimi 	if (diff <= 0xffd + 2 * SSIZE_OF(u16) && diff >= -0xfff + 2 * SSIZE_OF(u16)) {
397*22dc650dSSadaf Ebrahimi 		jump->flags |= PATCH_TYPE6;
398*22dc650dSSadaf Ebrahimi 		return 1;
399*22dc650dSSadaf Ebrahimi 	}
400*22dc650dSSadaf Ebrahimi 
401*22dc650dSSadaf Ebrahimi 	return 3;
402*22dc650dSSadaf Ebrahimi }
403*22dc650dSSadaf Ebrahimi 
generate_jump_or_mov_addr(struct sljit_jump * jump,sljit_sw executable_offset)404*22dc650dSSadaf Ebrahimi static SLJIT_INLINE void generate_jump_or_mov_addr(struct sljit_jump *jump, sljit_sw executable_offset)
405*22dc650dSSadaf Ebrahimi {
406*22dc650dSSadaf Ebrahimi 	sljit_s32 type = (jump->flags >> 4) & 0xf;
407*22dc650dSSadaf Ebrahimi 	sljit_u16 *jump_inst = (sljit_u16*)jump->addr;
408*22dc650dSSadaf Ebrahimi 	sljit_sw diff;
409*22dc650dSSadaf Ebrahimi 	sljit_ins ins;
410*22dc650dSSadaf Ebrahimi 
411*22dc650dSSadaf Ebrahimi 	diff = (sljit_sw)((jump->flags & JUMP_ADDR) ? jump->u.target : jump->u.label->u.addr);
412*22dc650dSSadaf Ebrahimi 
413*22dc650dSSadaf Ebrahimi 	if (SLJIT_UNLIKELY(type == 0)) {
414*22dc650dSSadaf Ebrahimi 		ins = (jump->flags & JUMP_MOV_ADDR) ? *jump_inst : RDN3(TMP_REG1);
415*22dc650dSSadaf Ebrahimi 		set_imm32_const((sljit_u16*)jump->addr, ins, (sljit_uw)diff);
416*22dc650dSSadaf Ebrahimi 		return;
417*22dc650dSSadaf Ebrahimi 	}
418*22dc650dSSadaf Ebrahimi 
419*22dc650dSSadaf Ebrahimi 	if (SLJIT_UNLIKELY(type == 6)) {
420*22dc650dSSadaf Ebrahimi 		SLJIT_ASSERT(jump->flags & JUMP_MOV_ADDR);
421*22dc650dSSadaf Ebrahimi 		diff -= (sljit_sw)SLJIT_ADD_EXEC_OFFSET(jump_inst + 2, executable_offset) & ~(sljit_sw)0x3;
422*22dc650dSSadaf Ebrahimi 
423*22dc650dSSadaf Ebrahimi 		SLJIT_ASSERT(diff <= 0xfff && diff >= -0xfff);
424*22dc650dSSadaf Ebrahimi 
425*22dc650dSSadaf Ebrahimi 		ins = ADDWI >> 16;
426*22dc650dSSadaf Ebrahimi 		if (diff <= 0) {
427*22dc650dSSadaf Ebrahimi 			diff = -diff;
428*22dc650dSSadaf Ebrahimi 			ins = SUBWI >> 16;
429*22dc650dSSadaf Ebrahimi 		}
430*22dc650dSSadaf Ebrahimi 
431*22dc650dSSadaf Ebrahimi 		jump_inst[1] = (sljit_u16)(jump_inst[0] | COPY_BITS(diff, 8, 12, 3) | (diff & 0xff));
432*22dc650dSSadaf Ebrahimi 		jump_inst[0] = (sljit_u16)(ins | 0xf | COPY_BITS(diff, 11, 10, 1));
433*22dc650dSSadaf Ebrahimi 		return;
434*22dc650dSSadaf Ebrahimi 	}
435*22dc650dSSadaf Ebrahimi 
436*22dc650dSSadaf Ebrahimi 	SLJIT_ASSERT((diff & 0x1) != 0 && !(jump->flags & JUMP_MOV_ADDR));
437*22dc650dSSadaf Ebrahimi 	diff = (diff - (sljit_sw)(jump->addr + sizeof(sljit_u32)) - executable_offset) >> 1;
438*22dc650dSSadaf Ebrahimi 
439*22dc650dSSadaf Ebrahimi 	switch (type) {
440*22dc650dSSadaf Ebrahimi 	case 1:
441*22dc650dSSadaf Ebrahimi 		/* Encoding T1 of 'B' instruction */
442*22dc650dSSadaf Ebrahimi 		SLJIT_ASSERT(diff <= 0x7f && diff >= -0x80 && (jump->flags & IS_COND));
443*22dc650dSSadaf Ebrahimi 		jump_inst[0] = (sljit_u16)(0xd000 | (jump->flags & 0xf00) | ((sljit_ins)diff & 0xff));
444*22dc650dSSadaf Ebrahimi 		return;
445*22dc650dSSadaf Ebrahimi 	case 2:
446*22dc650dSSadaf Ebrahimi 		/* Encoding T3 of 'B' instruction */
447*22dc650dSSadaf Ebrahimi 		SLJIT_ASSERT(diff <= 0x7ffff && diff >= -0x80000 && (jump->flags & IS_COND));
448*22dc650dSSadaf Ebrahimi 		jump_inst[0] = (sljit_u16)(0xf000 | COPY_BITS(jump->flags, 8, 6, 4) | COPY_BITS(diff, 11, 0, 6) | COPY_BITS(diff, 19, 10, 1));
449*22dc650dSSadaf Ebrahimi 		jump_inst[1] = (sljit_u16)(0x8000 | COPY_BITS(diff, 17, 13, 1) | COPY_BITS(diff, 18, 11, 1) | ((sljit_ins)diff & 0x7ff));
450*22dc650dSSadaf Ebrahimi 		return;
451*22dc650dSSadaf Ebrahimi 	case 3:
452*22dc650dSSadaf Ebrahimi 		/* Encoding T2 of 'B' instruction */
453*22dc650dSSadaf Ebrahimi 		SLJIT_ASSERT(diff <= 0x3ff && diff >= -0x400 && !(jump->flags & IS_COND));
454*22dc650dSSadaf Ebrahimi 		jump_inst[0] = (sljit_u16)(0xe000 | (diff & 0x7ff));
455*22dc650dSSadaf Ebrahimi 		return;
456*22dc650dSSadaf Ebrahimi 	}
457*22dc650dSSadaf Ebrahimi 
458*22dc650dSSadaf Ebrahimi 	SLJIT_ASSERT(diff <= 0x7fffff && diff >= -0x800000);
459*22dc650dSSadaf Ebrahimi 
460*22dc650dSSadaf Ebrahimi 	/* Really complex instruction form for branches. Negate with sign bit. */
461*22dc650dSSadaf Ebrahimi 	diff ^= ((diff >> 2) & 0x600000) ^ 0x600000;
462*22dc650dSSadaf Ebrahimi 
463*22dc650dSSadaf Ebrahimi 	jump_inst[0] = (sljit_u16)(0xf000 | COPY_BITS(diff, 11, 0, 10) | COPY_BITS(diff, 23, 10, 1));
464*22dc650dSSadaf Ebrahimi 	jump_inst[1] = (sljit_u16)((diff & 0x7ff) | COPY_BITS(diff, 22, 13, 1) | COPY_BITS(diff, 21, 11, 1));
465*22dc650dSSadaf Ebrahimi 
466*22dc650dSSadaf Ebrahimi 	SLJIT_ASSERT(type == 4 || type == 5);
467*22dc650dSSadaf Ebrahimi 
468*22dc650dSSadaf Ebrahimi 	/* The others have a common form. */
469*22dc650dSSadaf Ebrahimi 	if (type == 4) /* Encoding T4 of 'B' instruction */
470*22dc650dSSadaf Ebrahimi 		jump_inst[1] |= 0x9000;
471*22dc650dSSadaf Ebrahimi 	else /* Encoding T1 of 'BL' instruction */
472*22dc650dSSadaf Ebrahimi 		jump_inst[1] |= 0xd000;
473*22dc650dSSadaf Ebrahimi }
474*22dc650dSSadaf Ebrahimi 
reduce_code_size(struct sljit_compiler * compiler)475*22dc650dSSadaf Ebrahimi static void reduce_code_size(struct sljit_compiler *compiler)
476*22dc650dSSadaf Ebrahimi {
477*22dc650dSSadaf Ebrahimi 	struct sljit_label *label;
478*22dc650dSSadaf Ebrahimi 	struct sljit_jump *jump;
479*22dc650dSSadaf Ebrahimi 	struct sljit_const *const_;
480*22dc650dSSadaf Ebrahimi 	SLJIT_NEXT_DEFINE_TYPES;
481*22dc650dSSadaf Ebrahimi 	sljit_uw total_size;
482*22dc650dSSadaf Ebrahimi 	sljit_uw size_reduce = 0;
483*22dc650dSSadaf Ebrahimi 	sljit_sw diff;
484*22dc650dSSadaf Ebrahimi 
485*22dc650dSSadaf Ebrahimi 	label = compiler->labels;
486*22dc650dSSadaf Ebrahimi 	jump = compiler->jumps;
487*22dc650dSSadaf Ebrahimi 	const_ = compiler->consts;
488*22dc650dSSadaf Ebrahimi 	SLJIT_NEXT_INIT_TYPES();
489*22dc650dSSadaf Ebrahimi 
490*22dc650dSSadaf Ebrahimi 	while (1) {
491*22dc650dSSadaf Ebrahimi 		SLJIT_GET_NEXT_MIN();
492*22dc650dSSadaf Ebrahimi 
493*22dc650dSSadaf Ebrahimi 		if (next_min_addr == SLJIT_MAX_ADDRESS)
494*22dc650dSSadaf Ebrahimi 			break;
495*22dc650dSSadaf Ebrahimi 
496*22dc650dSSadaf Ebrahimi 		if (next_min_addr == next_label_size) {
497*22dc650dSSadaf Ebrahimi 			label->size -= size_reduce;
498*22dc650dSSadaf Ebrahimi 
499*22dc650dSSadaf Ebrahimi 			label = label->next;
500*22dc650dSSadaf Ebrahimi 			next_label_size = SLJIT_GET_NEXT_SIZE(label);
501*22dc650dSSadaf Ebrahimi 		}
502*22dc650dSSadaf Ebrahimi 
503*22dc650dSSadaf Ebrahimi 		if (next_min_addr == next_const_addr) {
504*22dc650dSSadaf Ebrahimi 			const_->addr -= size_reduce;
505*22dc650dSSadaf Ebrahimi 			const_ = const_->next;
506*22dc650dSSadaf Ebrahimi 			next_const_addr = SLJIT_GET_NEXT_ADDRESS(const_);
507*22dc650dSSadaf Ebrahimi 			continue;
508*22dc650dSSadaf Ebrahimi 		}
509*22dc650dSSadaf Ebrahimi 
510*22dc650dSSadaf Ebrahimi 		if (next_min_addr != next_jump_addr)
511*22dc650dSSadaf Ebrahimi 			continue;
512*22dc650dSSadaf Ebrahimi 
513*22dc650dSSadaf Ebrahimi 		jump->addr -= size_reduce;
514*22dc650dSSadaf Ebrahimi 		if (!(jump->flags & JUMP_MOV_ADDR)) {
515*22dc650dSSadaf Ebrahimi 			total_size = JUMP_MAX_SIZE;
516*22dc650dSSadaf Ebrahimi 
517*22dc650dSSadaf Ebrahimi 			if (!(jump->flags & (SLJIT_REWRITABLE_JUMP | JUMP_ADDR))) {
518*22dc650dSSadaf Ebrahimi 				/* Unit size: instruction. */
519*22dc650dSSadaf Ebrahimi 				diff = (sljit_sw)jump->u.label->size - (sljit_sw)jump->addr - 2;
520*22dc650dSSadaf Ebrahimi 
521*22dc650dSSadaf Ebrahimi 				if (jump->flags & IS_COND) {
522*22dc650dSSadaf Ebrahimi 					diff++;
523*22dc650dSSadaf Ebrahimi 
524*22dc650dSSadaf Ebrahimi 					if (diff <= (0xff / SSIZE_OF(u16)) && diff >= (-0x100 / SSIZE_OF(u16)))
525*22dc650dSSadaf Ebrahimi 						total_size = 0;
526*22dc650dSSadaf Ebrahimi 					else if (diff <= (0xfffff / SSIZE_OF(u16)) && diff >= (-0x100000 / SSIZE_OF(u16)))
527*22dc650dSSadaf Ebrahimi 						total_size = 1;
528*22dc650dSSadaf Ebrahimi 					diff--;
529*22dc650dSSadaf Ebrahimi 				} else if (!(jump->flags & IS_BL) && diff <= (0x7ff / SSIZE_OF(u16)) && diff >= (-0x800 / SSIZE_OF(u16)))
530*22dc650dSSadaf Ebrahimi 					total_size = 1;
531*22dc650dSSadaf Ebrahimi 
532*22dc650dSSadaf Ebrahimi 				if (total_size == JUMP_MAX_SIZE && diff <= (0xffffff / SSIZE_OF(u16)) && diff >= (-0x1000000 / SSIZE_OF(u16)))
533*22dc650dSSadaf Ebrahimi 					total_size = 2;
534*22dc650dSSadaf Ebrahimi 			}
535*22dc650dSSadaf Ebrahimi 
536*22dc650dSSadaf Ebrahimi 			size_reduce += JUMP_MAX_SIZE - total_size;
537*22dc650dSSadaf Ebrahimi 		} else {
538*22dc650dSSadaf Ebrahimi 			/* Real size minus 1. Unit size: instruction. */
539*22dc650dSSadaf Ebrahimi 			total_size = 3;
540*22dc650dSSadaf Ebrahimi 
541*22dc650dSSadaf Ebrahimi 			if (!(jump->flags & JUMP_ADDR)) {
542*22dc650dSSadaf Ebrahimi 				diff = (sljit_sw)jump->u.label->size - (sljit_sw)jump->addr;
543*22dc650dSSadaf Ebrahimi 
544*22dc650dSSadaf Ebrahimi 				if (diff <= (0xffd / SSIZE_OF(u16)) && diff >= (-0xfff / SSIZE_OF(u16)))
545*22dc650dSSadaf Ebrahimi 					total_size = 1;
546*22dc650dSSadaf Ebrahimi 			}
547*22dc650dSSadaf Ebrahimi 
548*22dc650dSSadaf Ebrahimi 			size_reduce += 3 - total_size;
549*22dc650dSSadaf Ebrahimi 		}
550*22dc650dSSadaf Ebrahimi 
551*22dc650dSSadaf Ebrahimi 		jump->flags |= total_size << JUMP_SIZE_SHIFT;
552*22dc650dSSadaf Ebrahimi 		jump = jump->next;
553*22dc650dSSadaf Ebrahimi 		next_jump_addr = SLJIT_GET_NEXT_ADDRESS(jump);
554*22dc650dSSadaf Ebrahimi 	}
555*22dc650dSSadaf Ebrahimi 
556*22dc650dSSadaf Ebrahimi 	compiler->size -= size_reduce;
557*22dc650dSSadaf Ebrahimi }
558*22dc650dSSadaf Ebrahimi 
sljit_generate_code(struct sljit_compiler * compiler,sljit_s32 options,void * exec_allocator_data)559*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler, sljit_s32 options, void *exec_allocator_data)
560*22dc650dSSadaf Ebrahimi {
561*22dc650dSSadaf Ebrahimi 	struct sljit_memory_fragment *buf;
562*22dc650dSSadaf Ebrahimi 	sljit_u16 *code;
563*22dc650dSSadaf Ebrahimi 	sljit_u16 *code_ptr;
564*22dc650dSSadaf Ebrahimi 	sljit_u16 *buf_ptr;
565*22dc650dSSadaf Ebrahimi 	sljit_u16 *buf_end;
566*22dc650dSSadaf Ebrahimi 	sljit_uw half_count;
567*22dc650dSSadaf Ebrahimi 	SLJIT_NEXT_DEFINE_TYPES;
568*22dc650dSSadaf Ebrahimi 	sljit_sw addr;
569*22dc650dSSadaf Ebrahimi 	sljit_sw executable_offset;
570*22dc650dSSadaf Ebrahimi 
571*22dc650dSSadaf Ebrahimi 	struct sljit_label *label;
572*22dc650dSSadaf Ebrahimi 	struct sljit_jump *jump;
573*22dc650dSSadaf Ebrahimi 	struct sljit_const *const_;
574*22dc650dSSadaf Ebrahimi 
575*22dc650dSSadaf Ebrahimi 	CHECK_ERROR_PTR();
576*22dc650dSSadaf Ebrahimi 	CHECK_PTR(check_sljit_generate_code(compiler));
577*22dc650dSSadaf Ebrahimi 
578*22dc650dSSadaf Ebrahimi 	reduce_code_size(compiler);
579*22dc650dSSadaf Ebrahimi 
580*22dc650dSSadaf Ebrahimi 	code = (sljit_u16*)allocate_executable_memory(compiler->size * sizeof(sljit_u16), options, exec_allocator_data, &executable_offset);
581*22dc650dSSadaf Ebrahimi 	PTR_FAIL_WITH_EXEC_IF(code);
582*22dc650dSSadaf Ebrahimi 
583*22dc650dSSadaf Ebrahimi 	reverse_buf(compiler);
584*22dc650dSSadaf Ebrahimi 	buf = compiler->buf;
585*22dc650dSSadaf Ebrahimi 
586*22dc650dSSadaf Ebrahimi 	code_ptr = code;
587*22dc650dSSadaf Ebrahimi 	half_count = 0;
588*22dc650dSSadaf Ebrahimi 	label = compiler->labels;
589*22dc650dSSadaf Ebrahimi 	jump = compiler->jumps;
590*22dc650dSSadaf Ebrahimi 	const_ = compiler->consts;
591*22dc650dSSadaf Ebrahimi 	SLJIT_NEXT_INIT_TYPES();
592*22dc650dSSadaf Ebrahimi 	SLJIT_GET_NEXT_MIN();
593*22dc650dSSadaf Ebrahimi 
594*22dc650dSSadaf Ebrahimi 	do {
595*22dc650dSSadaf Ebrahimi 		buf_ptr = (sljit_u16*)buf->memory;
596*22dc650dSSadaf Ebrahimi 		buf_end = buf_ptr + (buf->used_size >> 1);
597*22dc650dSSadaf Ebrahimi 		do {
598*22dc650dSSadaf Ebrahimi 			*code_ptr = *buf_ptr++;
599*22dc650dSSadaf Ebrahimi 			if (next_min_addr == half_count) {
600*22dc650dSSadaf Ebrahimi 				SLJIT_ASSERT(!label || label->size >= half_count);
601*22dc650dSSadaf Ebrahimi 				SLJIT_ASSERT(!jump || jump->addr >= half_count);
602*22dc650dSSadaf Ebrahimi 				SLJIT_ASSERT(!const_ || const_->addr >= half_count);
603*22dc650dSSadaf Ebrahimi 
604*22dc650dSSadaf Ebrahimi 				/* These structures are ordered by their address. */
605*22dc650dSSadaf Ebrahimi 				if (next_min_addr == next_label_size) {
606*22dc650dSSadaf Ebrahimi 					label->u.addr = ((sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset)) | 0x1;
607*22dc650dSSadaf Ebrahimi 					label->size = (sljit_uw)(code_ptr - code);
608*22dc650dSSadaf Ebrahimi 					label = label->next;
609*22dc650dSSadaf Ebrahimi 					next_label_size = SLJIT_GET_NEXT_SIZE(label);
610*22dc650dSSadaf Ebrahimi 				}
611*22dc650dSSadaf Ebrahimi 
612*22dc650dSSadaf Ebrahimi 				if (next_min_addr == next_jump_addr) {
613*22dc650dSSadaf Ebrahimi 					if (!(jump->flags & JUMP_MOV_ADDR)) {
614*22dc650dSSadaf Ebrahimi 						half_count = half_count - 1 + (jump->flags >> JUMP_SIZE_SHIFT);
615*22dc650dSSadaf Ebrahimi 						jump->addr = (sljit_uw)code_ptr;
616*22dc650dSSadaf Ebrahimi 						code_ptr = detect_jump_type(jump, code_ptr, code, executable_offset);
617*22dc650dSSadaf Ebrahimi 						SLJIT_ASSERT((sljit_uw)code_ptr - jump->addr <
618*22dc650dSSadaf Ebrahimi 							((jump->flags >> JUMP_SIZE_SHIFT) + ((jump->flags & 0xf0) <= PATCH_TYPE2)) * sizeof(sljit_u16));
619*22dc650dSSadaf Ebrahimi 					} else {
620*22dc650dSSadaf Ebrahimi 						half_count += jump->flags >> JUMP_SIZE_SHIFT;
621*22dc650dSSadaf Ebrahimi 						addr = (sljit_sw)code_ptr;
622*22dc650dSSadaf Ebrahimi 						code_ptr += mov_addr_get_length(jump, code_ptr, code, executable_offset);
623*22dc650dSSadaf Ebrahimi 						jump->addr = (sljit_uw)addr;
624*22dc650dSSadaf Ebrahimi 					}
625*22dc650dSSadaf Ebrahimi 
626*22dc650dSSadaf Ebrahimi 					jump = jump->next;
627*22dc650dSSadaf Ebrahimi 					next_jump_addr = SLJIT_GET_NEXT_ADDRESS(jump);
628*22dc650dSSadaf Ebrahimi 				} else if (next_min_addr == next_const_addr) {
629*22dc650dSSadaf Ebrahimi 					const_->addr = (sljit_uw)code_ptr;
630*22dc650dSSadaf Ebrahimi 					const_ = const_->next;
631*22dc650dSSadaf Ebrahimi 					next_const_addr = SLJIT_GET_NEXT_ADDRESS(const_);
632*22dc650dSSadaf Ebrahimi 				}
633*22dc650dSSadaf Ebrahimi 
634*22dc650dSSadaf Ebrahimi 				SLJIT_GET_NEXT_MIN();
635*22dc650dSSadaf Ebrahimi 			}
636*22dc650dSSadaf Ebrahimi 			code_ptr++;
637*22dc650dSSadaf Ebrahimi 			half_count++;
638*22dc650dSSadaf Ebrahimi 		} while (buf_ptr < buf_end);
639*22dc650dSSadaf Ebrahimi 
640*22dc650dSSadaf Ebrahimi 		buf = buf->next;
641*22dc650dSSadaf Ebrahimi 	} while (buf);
642*22dc650dSSadaf Ebrahimi 
643*22dc650dSSadaf Ebrahimi 	if (label && label->size == half_count) {
644*22dc650dSSadaf Ebrahimi 		label->u.addr = ((sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset)) | 0x1;
645*22dc650dSSadaf Ebrahimi 		label->size = (sljit_uw)(code_ptr - code);
646*22dc650dSSadaf Ebrahimi 		label = label->next;
647*22dc650dSSadaf Ebrahimi 	}
648*22dc650dSSadaf Ebrahimi 
649*22dc650dSSadaf Ebrahimi 	SLJIT_ASSERT(!label);
650*22dc650dSSadaf Ebrahimi 	SLJIT_ASSERT(!jump);
651*22dc650dSSadaf Ebrahimi 	SLJIT_ASSERT(!const_);
652*22dc650dSSadaf Ebrahimi 	SLJIT_ASSERT(code_ptr - code <= (sljit_sw)compiler->size);
653*22dc650dSSadaf Ebrahimi 
654*22dc650dSSadaf Ebrahimi 	jump = compiler->jumps;
655*22dc650dSSadaf Ebrahimi 	while (jump) {
656*22dc650dSSadaf Ebrahimi 		generate_jump_or_mov_addr(jump, executable_offset);
657*22dc650dSSadaf Ebrahimi 		jump = jump->next;
658*22dc650dSSadaf Ebrahimi 	}
659*22dc650dSSadaf Ebrahimi 
660*22dc650dSSadaf Ebrahimi 	compiler->error = SLJIT_ERR_COMPILED;
661*22dc650dSSadaf Ebrahimi 	compiler->executable_offset = executable_offset;
662*22dc650dSSadaf Ebrahimi 	compiler->executable_size = (sljit_uw)(code_ptr - code) * sizeof(sljit_u16);
663*22dc650dSSadaf Ebrahimi 
664*22dc650dSSadaf Ebrahimi 	code = (sljit_u16 *)SLJIT_ADD_EXEC_OFFSET(code, executable_offset);
665*22dc650dSSadaf Ebrahimi 	code_ptr = (sljit_u16 *)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
666*22dc650dSSadaf Ebrahimi 
667*22dc650dSSadaf Ebrahimi 	SLJIT_CACHE_FLUSH(code, code_ptr);
668*22dc650dSSadaf Ebrahimi 	SLJIT_UPDATE_WX_FLAGS(code, code_ptr, 1);
669*22dc650dSSadaf Ebrahimi 
670*22dc650dSSadaf Ebrahimi 	/* Set thumb mode flag. */
671*22dc650dSSadaf Ebrahimi 	return (void*)((sljit_uw)code | 0x1);
672*22dc650dSSadaf Ebrahimi }
673*22dc650dSSadaf Ebrahimi 
sljit_has_cpu_feature(sljit_s32 feature_type)674*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
675*22dc650dSSadaf Ebrahimi {
676*22dc650dSSadaf Ebrahimi 	switch (feature_type) {
677*22dc650dSSadaf Ebrahimi 	case SLJIT_HAS_FPU:
678*22dc650dSSadaf Ebrahimi 	case SLJIT_HAS_F64_AS_F32_PAIR:
679*22dc650dSSadaf Ebrahimi 	case SLJIT_HAS_SIMD:
680*22dc650dSSadaf Ebrahimi #ifdef SLJIT_IS_FPU_AVAILABLE
681*22dc650dSSadaf Ebrahimi 		return (SLJIT_IS_FPU_AVAILABLE) != 0;
682*22dc650dSSadaf Ebrahimi #else
683*22dc650dSSadaf Ebrahimi 		/* Available by default. */
684*22dc650dSSadaf Ebrahimi 		return 1;
685*22dc650dSSadaf Ebrahimi #endif
686*22dc650dSSadaf Ebrahimi 
687*22dc650dSSadaf Ebrahimi 	case SLJIT_SIMD_REGS_ARE_PAIRS:
688*22dc650dSSadaf Ebrahimi 	case SLJIT_HAS_CLZ:
689*22dc650dSSadaf Ebrahimi 	case SLJIT_HAS_CTZ:
690*22dc650dSSadaf Ebrahimi 	case SLJIT_HAS_REV:
691*22dc650dSSadaf Ebrahimi 	case SLJIT_HAS_ROT:
692*22dc650dSSadaf Ebrahimi 	case SLJIT_HAS_CMOV:
693*22dc650dSSadaf Ebrahimi 	case SLJIT_HAS_PREFETCH:
694*22dc650dSSadaf Ebrahimi 	case SLJIT_HAS_COPY_F32:
695*22dc650dSSadaf Ebrahimi 	case SLJIT_HAS_COPY_F64:
696*22dc650dSSadaf Ebrahimi 	case SLJIT_HAS_ATOMIC:
697*22dc650dSSadaf Ebrahimi 		return 1;
698*22dc650dSSadaf Ebrahimi 
699*22dc650dSSadaf Ebrahimi 	default:
700*22dc650dSSadaf Ebrahimi 		return 0;
701*22dc650dSSadaf Ebrahimi 	}
702*22dc650dSSadaf Ebrahimi }
703*22dc650dSSadaf Ebrahimi 
704*22dc650dSSadaf Ebrahimi /* --------------------------------------------------------------------- */
705*22dc650dSSadaf Ebrahimi /*  Core code generator functions.                                       */
706*22dc650dSSadaf Ebrahimi /* --------------------------------------------------------------------- */
707*22dc650dSSadaf Ebrahimi 
708*22dc650dSSadaf Ebrahimi #define INVALID_IMM	0x80000000
get_imm(sljit_uw imm)709*22dc650dSSadaf Ebrahimi static sljit_uw get_imm(sljit_uw imm)
710*22dc650dSSadaf Ebrahimi {
711*22dc650dSSadaf Ebrahimi 	/* Thumb immediate form. */
712*22dc650dSSadaf Ebrahimi 	sljit_s32 counter;
713*22dc650dSSadaf Ebrahimi 
714*22dc650dSSadaf Ebrahimi 	if (imm <= 0xff)
715*22dc650dSSadaf Ebrahimi 		return imm;
716*22dc650dSSadaf Ebrahimi 
717*22dc650dSSadaf Ebrahimi 	if ((imm & 0xffff) == (imm >> 16)) {
718*22dc650dSSadaf Ebrahimi 		/* Some special cases. */
719*22dc650dSSadaf Ebrahimi 		if (!(imm & 0xff00))
720*22dc650dSSadaf Ebrahimi 			return (1 << 12) | (imm & 0xff);
721*22dc650dSSadaf Ebrahimi 		if (!(imm & 0xff))
722*22dc650dSSadaf Ebrahimi 			return (2 << 12) | ((imm >> 8) & 0xff);
723*22dc650dSSadaf Ebrahimi 		if ((imm & 0xff00) == ((imm & 0xff) << 8))
724*22dc650dSSadaf Ebrahimi 			return (3 << 12) | (imm & 0xff);
725*22dc650dSSadaf Ebrahimi 	}
726*22dc650dSSadaf Ebrahimi 
727*22dc650dSSadaf Ebrahimi 	/* Assembly optimization: count leading zeroes? */
728*22dc650dSSadaf Ebrahimi 	counter = 8;
729*22dc650dSSadaf Ebrahimi 	if (!(imm & 0xffff0000)) {
730*22dc650dSSadaf Ebrahimi 		counter += 16;
731*22dc650dSSadaf Ebrahimi 		imm <<= 16;
732*22dc650dSSadaf Ebrahimi 	}
733*22dc650dSSadaf Ebrahimi 	if (!(imm & 0xff000000)) {
734*22dc650dSSadaf Ebrahimi 		counter += 8;
735*22dc650dSSadaf Ebrahimi 		imm <<= 8;
736*22dc650dSSadaf Ebrahimi 	}
737*22dc650dSSadaf Ebrahimi 	if (!(imm & 0xf0000000)) {
738*22dc650dSSadaf Ebrahimi 		counter += 4;
739*22dc650dSSadaf Ebrahimi 		imm <<= 4;
740*22dc650dSSadaf Ebrahimi 	}
741*22dc650dSSadaf Ebrahimi 	if (!(imm & 0xc0000000)) {
742*22dc650dSSadaf Ebrahimi 		counter += 2;
743*22dc650dSSadaf Ebrahimi 		imm <<= 2;
744*22dc650dSSadaf Ebrahimi 	}
745*22dc650dSSadaf Ebrahimi 	if (!(imm & 0x80000000)) {
746*22dc650dSSadaf Ebrahimi 		counter += 1;
747*22dc650dSSadaf Ebrahimi 		imm <<= 1;
748*22dc650dSSadaf Ebrahimi 	}
749*22dc650dSSadaf Ebrahimi 	/* Since imm >= 128, this must be true. */
750*22dc650dSSadaf Ebrahimi 	SLJIT_ASSERT(counter <= 31);
751*22dc650dSSadaf Ebrahimi 
752*22dc650dSSadaf Ebrahimi 	if (imm & 0x00ffffff)
753*22dc650dSSadaf Ebrahimi 		return INVALID_IMM; /* Cannot be encoded. */
754*22dc650dSSadaf Ebrahimi 
755*22dc650dSSadaf Ebrahimi 	return ((imm >> 24) & 0x7f) | COPY_BITS(counter, 4, 26, 1) | COPY_BITS(counter, 1, 12, 3) | COPY_BITS(counter, 0, 7, 1);
756*22dc650dSSadaf Ebrahimi }
757*22dc650dSSadaf Ebrahimi 
load_immediate(struct sljit_compiler * compiler,sljit_s32 dst,sljit_uw imm)758*22dc650dSSadaf Ebrahimi static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst, sljit_uw imm)
759*22dc650dSSadaf Ebrahimi {
760*22dc650dSSadaf Ebrahimi 	sljit_uw tmp;
761*22dc650dSSadaf Ebrahimi 
762*22dc650dSSadaf Ebrahimi 	/* MOVS cannot be used since it destroy flags. */
763*22dc650dSSadaf Ebrahimi 
764*22dc650dSSadaf Ebrahimi 	if (imm >= 0x10000) {
765*22dc650dSSadaf Ebrahimi 		tmp = get_imm(imm);
766*22dc650dSSadaf Ebrahimi 		if (tmp != INVALID_IMM)
767*22dc650dSSadaf Ebrahimi 			return push_inst32(compiler, MOV_WI | RD4(dst) | tmp);
768*22dc650dSSadaf Ebrahimi 		tmp = get_imm(~imm);
769*22dc650dSSadaf Ebrahimi 		if (tmp != INVALID_IMM)
770*22dc650dSSadaf Ebrahimi 			return push_inst32(compiler, MVN_WI | RD4(dst) | tmp);
771*22dc650dSSadaf Ebrahimi 	}
772*22dc650dSSadaf Ebrahimi 
773*22dc650dSSadaf Ebrahimi 	/* set low 16 bits, set hi 16 bits to 0. */
774*22dc650dSSadaf Ebrahimi 	FAIL_IF(push_inst32(compiler, MOVW | RD4(dst)
775*22dc650dSSadaf Ebrahimi 		| COPY_BITS(imm, 12, 16, 4) | COPY_BITS(imm, 11, 26, 1) | COPY_BITS(imm, 8, 12, 3) | (imm & 0xff)));
776*22dc650dSSadaf Ebrahimi 
777*22dc650dSSadaf Ebrahimi 	/* set hi 16 bit if needed. */
778*22dc650dSSadaf Ebrahimi 	if (imm >= 0x10000)
779*22dc650dSSadaf Ebrahimi 		return push_inst32(compiler, MOVT | RD4(dst)
780*22dc650dSSadaf Ebrahimi 			| COPY_BITS(imm, 12 + 16, 16, 4) | COPY_BITS(imm, 11 + 16, 26, 1) | COPY_BITS(imm, 8 + 16, 12, 3) | ((imm & 0xff0000) >> 16));
781*22dc650dSSadaf Ebrahimi 	return SLJIT_SUCCESS;
782*22dc650dSSadaf Ebrahimi }
783*22dc650dSSadaf Ebrahimi 
784*22dc650dSSadaf Ebrahimi #define ARG1_IMM	0x0010000
785*22dc650dSSadaf Ebrahimi #define ARG2_IMM	0x0020000
786*22dc650dSSadaf Ebrahimi /* SET_FLAGS must be 0x100000 as it is also the value of S bit (can be used for optimization). */
787*22dc650dSSadaf Ebrahimi #define SET_FLAGS	0x0100000
788*22dc650dSSadaf Ebrahimi #define UNUSED_RETURN	0x0200000
789*22dc650dSSadaf Ebrahimi #define REGISTER_OP	0x0400000
790*22dc650dSSadaf Ebrahimi 
emit_op_imm(struct sljit_compiler * compiler,sljit_s32 flags,sljit_s32 dst,sljit_uw arg1,sljit_uw arg2)791*22dc650dSSadaf Ebrahimi static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 dst, sljit_uw arg1, sljit_uw arg2)
792*22dc650dSSadaf Ebrahimi {
793*22dc650dSSadaf Ebrahimi 	/* dst must be register
794*22dc650dSSadaf Ebrahimi 	   arg1 must be register, imm
795*22dc650dSSadaf Ebrahimi 	   arg2 must be register, imm */
796*22dc650dSSadaf Ebrahimi 	sljit_s32 reg;
797*22dc650dSSadaf Ebrahimi 	sljit_uw imm, imm2;
798*22dc650dSSadaf Ebrahimi 
799*22dc650dSSadaf Ebrahimi 	if (SLJIT_UNLIKELY((flags & (ARG1_IMM | ARG2_IMM)) == (ARG1_IMM | ARG2_IMM))) {
800*22dc650dSSadaf Ebrahimi 		/* Both are immediates, no temporaries are used. */
801*22dc650dSSadaf Ebrahimi 		flags &= ~ARG1_IMM;
802*22dc650dSSadaf Ebrahimi 		FAIL_IF(load_immediate(compiler, TMP_REG1, arg1));
803*22dc650dSSadaf Ebrahimi 		arg1 = TMP_REG1;
804*22dc650dSSadaf Ebrahimi 	}
805*22dc650dSSadaf Ebrahimi 
806*22dc650dSSadaf Ebrahimi 	if (flags & (ARG1_IMM | ARG2_IMM)) {
807*22dc650dSSadaf Ebrahimi 		reg = (sljit_s32)((flags & ARG2_IMM) ? arg1 : arg2);
808*22dc650dSSadaf Ebrahimi 		imm = (flags & ARG2_IMM) ? arg2 : arg1;
809*22dc650dSSadaf Ebrahimi 
810*22dc650dSSadaf Ebrahimi 		switch (flags & 0xffff) {
811*22dc650dSSadaf Ebrahimi 		case SLJIT_CLZ:
812*22dc650dSSadaf Ebrahimi 		case SLJIT_CTZ:
813*22dc650dSSadaf Ebrahimi 		case SLJIT_REV:
814*22dc650dSSadaf Ebrahimi 		case SLJIT_REV_U16:
815*22dc650dSSadaf Ebrahimi 		case SLJIT_REV_S16:
816*22dc650dSSadaf Ebrahimi 		case SLJIT_REV_U32:
817*22dc650dSSadaf Ebrahimi 		case SLJIT_REV_S32:
818*22dc650dSSadaf Ebrahimi 		case SLJIT_MUL:
819*22dc650dSSadaf Ebrahimi 		case SLJIT_MULADD:
820*22dc650dSSadaf Ebrahimi 			/* No form with immediate operand. */
821*22dc650dSSadaf Ebrahimi 			break;
822*22dc650dSSadaf Ebrahimi 		case SLJIT_MOV:
823*22dc650dSSadaf Ebrahimi 			SLJIT_ASSERT(!(flags & SET_FLAGS) && (flags & ARG2_IMM) && arg1 == TMP_REG2);
824*22dc650dSSadaf Ebrahimi 			return load_immediate(compiler, dst, imm);
825*22dc650dSSadaf Ebrahimi 		case SLJIT_ADD:
826*22dc650dSSadaf Ebrahimi 			compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD;
827*22dc650dSSadaf Ebrahimi 			imm2 = NEGATE(imm);
828*22dc650dSSadaf Ebrahimi 			if (IS_2_LO_REGS(reg, dst)) {
829*22dc650dSSadaf Ebrahimi 				if (imm <= 0x7)
830*22dc650dSSadaf Ebrahimi 					return push_inst16(compiler, ADDSI3 | IMM3(imm) | RD3(dst) | RN3(reg));
831*22dc650dSSadaf Ebrahimi 				if (imm2 <= 0x7)
832*22dc650dSSadaf Ebrahimi 					return push_inst16(compiler, SUBSI3 | IMM3(imm2) | RD3(dst) | RN3(reg));
833*22dc650dSSadaf Ebrahimi 				if (reg == dst) {
834*22dc650dSSadaf Ebrahimi 					if (imm <= 0xff)
835*22dc650dSSadaf Ebrahimi 						return push_inst16(compiler, ADDSI8 | IMM8(imm) | RDN3(dst));
836*22dc650dSSadaf Ebrahimi 					if (imm2 <= 0xff)
837*22dc650dSSadaf Ebrahimi 						return push_inst16(compiler, SUBSI8 | IMM8(imm2) | RDN3(dst));
838*22dc650dSSadaf Ebrahimi 				}
839*22dc650dSSadaf Ebrahimi 			}
840*22dc650dSSadaf Ebrahimi 			if (!(flags & SET_FLAGS)) {
841*22dc650dSSadaf Ebrahimi 				if (imm <= 0xfff)
842*22dc650dSSadaf Ebrahimi 					return push_inst32(compiler, ADDWI | RD4(dst) | RN4(reg) | IMM12(imm));
843*22dc650dSSadaf Ebrahimi 				if (imm2 <= 0xfff)
844*22dc650dSSadaf Ebrahimi 					return push_inst32(compiler, SUBWI | RD4(dst) | RN4(reg) | IMM12(imm2));
845*22dc650dSSadaf Ebrahimi 			}
846*22dc650dSSadaf Ebrahimi 			imm2 = get_imm(imm);
847*22dc650dSSadaf Ebrahimi 			if (imm2 != INVALID_IMM)
848*22dc650dSSadaf Ebrahimi 				return push_inst32(compiler, ADD_WI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm2);
849*22dc650dSSadaf Ebrahimi 			imm = get_imm(NEGATE(imm));
850*22dc650dSSadaf Ebrahimi 			if (imm != INVALID_IMM)
851*22dc650dSSadaf Ebrahimi 				return push_inst32(compiler, SUB_WI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm);
852*22dc650dSSadaf Ebrahimi 			break;
853*22dc650dSSadaf Ebrahimi 		case SLJIT_ADDC:
854*22dc650dSSadaf Ebrahimi 			compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD;
855*22dc650dSSadaf Ebrahimi 			imm2 = get_imm(imm);
856*22dc650dSSadaf Ebrahimi 			if (imm2 != INVALID_IMM)
857*22dc650dSSadaf Ebrahimi 				return push_inst32(compiler, ADCI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm2);
858*22dc650dSSadaf Ebrahimi 			if (flags & ARG2_IMM) {
859*22dc650dSSadaf Ebrahimi 				imm = get_imm(~imm);
860*22dc650dSSadaf Ebrahimi 				if (imm != INVALID_IMM)
861*22dc650dSSadaf Ebrahimi 					return push_inst32(compiler, SBCI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm);
862*22dc650dSSadaf Ebrahimi 			}
863*22dc650dSSadaf Ebrahimi 			break;
864*22dc650dSSadaf Ebrahimi 		case SLJIT_SUB:
865*22dc650dSSadaf Ebrahimi 			compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB;
866*22dc650dSSadaf Ebrahimi 			if (flags & ARG1_IMM) {
867*22dc650dSSadaf Ebrahimi 				if (imm == 0 && IS_2_LO_REGS(reg, dst))
868*22dc650dSSadaf Ebrahimi 					return push_inst16(compiler, RSBSI | RD3(dst) | RN3(reg));
869*22dc650dSSadaf Ebrahimi 				imm = get_imm(imm);
870*22dc650dSSadaf Ebrahimi 				if (imm != INVALID_IMM)
871*22dc650dSSadaf Ebrahimi 					return push_inst32(compiler, RSB_WI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm);
872*22dc650dSSadaf Ebrahimi 				break;
873*22dc650dSSadaf Ebrahimi 			}
874*22dc650dSSadaf Ebrahimi 			if (flags & UNUSED_RETURN) {
875*22dc650dSSadaf Ebrahimi 				if (imm <= 0xff && reg_map[reg] <= 7)
876*22dc650dSSadaf Ebrahimi 					return push_inst16(compiler, CMPI | IMM8(imm) | RDN3(reg));
877*22dc650dSSadaf Ebrahimi 				imm2 = get_imm(imm);
878*22dc650dSSadaf Ebrahimi 				if (imm2 != INVALID_IMM)
879*22dc650dSSadaf Ebrahimi 					return push_inst32(compiler, CMPI_W | RN4(reg) | imm2);
880*22dc650dSSadaf Ebrahimi 				imm = get_imm(NEGATE(imm));
881*22dc650dSSadaf Ebrahimi 				if (imm != INVALID_IMM)
882*22dc650dSSadaf Ebrahimi 					return push_inst32(compiler, CMNI_W | RN4(reg) | imm);
883*22dc650dSSadaf Ebrahimi 				break;
884*22dc650dSSadaf Ebrahimi 			}
885*22dc650dSSadaf Ebrahimi 			imm2 = NEGATE(imm);
886*22dc650dSSadaf Ebrahimi 			if (IS_2_LO_REGS(reg, dst)) {
887*22dc650dSSadaf Ebrahimi 				if (imm <= 0x7)
888*22dc650dSSadaf Ebrahimi 					return push_inst16(compiler, SUBSI3 | IMM3(imm) | RD3(dst) | RN3(reg));
889*22dc650dSSadaf Ebrahimi 				if (imm2 <= 0x7)
890*22dc650dSSadaf Ebrahimi 					return push_inst16(compiler, ADDSI3 | IMM3(imm2) | RD3(dst) | RN3(reg));
891*22dc650dSSadaf Ebrahimi 				if (reg == dst) {
892*22dc650dSSadaf Ebrahimi 					if (imm <= 0xff)
893*22dc650dSSadaf Ebrahimi 						return push_inst16(compiler, SUBSI8 | IMM8(imm) | RDN3(dst));
894*22dc650dSSadaf Ebrahimi 					if (imm2 <= 0xff)
895*22dc650dSSadaf Ebrahimi 						return push_inst16(compiler, ADDSI8 | IMM8(imm2) | RDN3(dst));
896*22dc650dSSadaf Ebrahimi 				}
897*22dc650dSSadaf Ebrahimi 			}
898*22dc650dSSadaf Ebrahimi 			if (!(flags & SET_FLAGS)) {
899*22dc650dSSadaf Ebrahimi 				if (imm <= 0xfff)
900*22dc650dSSadaf Ebrahimi 					return push_inst32(compiler, SUBWI | RD4(dst) | RN4(reg) | IMM12(imm));
901*22dc650dSSadaf Ebrahimi 				if (imm2 <= 0xfff)
902*22dc650dSSadaf Ebrahimi 					return push_inst32(compiler, ADDWI | RD4(dst) | RN4(reg) | IMM12(imm2));
903*22dc650dSSadaf Ebrahimi 			}
904*22dc650dSSadaf Ebrahimi 			imm2 = get_imm(imm);
905*22dc650dSSadaf Ebrahimi 			if (imm2 != INVALID_IMM)
906*22dc650dSSadaf Ebrahimi 				return push_inst32(compiler, SUB_WI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm2);
907*22dc650dSSadaf Ebrahimi 			imm = get_imm(NEGATE(imm));
908*22dc650dSSadaf Ebrahimi 			if (imm != INVALID_IMM)
909*22dc650dSSadaf Ebrahimi 				return push_inst32(compiler, ADD_WI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm);
910*22dc650dSSadaf Ebrahimi 			break;
911*22dc650dSSadaf Ebrahimi 		case SLJIT_SUBC:
912*22dc650dSSadaf Ebrahimi 			compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB;
913*22dc650dSSadaf Ebrahimi 			if (flags & ARG1_IMM)
914*22dc650dSSadaf Ebrahimi 				break;
915*22dc650dSSadaf Ebrahimi 			imm2 = get_imm(imm);
916*22dc650dSSadaf Ebrahimi 			if (imm2 != INVALID_IMM)
917*22dc650dSSadaf Ebrahimi 				return push_inst32(compiler, SBCI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm2);
918*22dc650dSSadaf Ebrahimi 			imm = get_imm(~imm);
919*22dc650dSSadaf Ebrahimi 			if (imm != INVALID_IMM)
920*22dc650dSSadaf Ebrahimi 				return push_inst32(compiler, ADCI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm);
921*22dc650dSSadaf Ebrahimi 			break;
922*22dc650dSSadaf Ebrahimi 		case SLJIT_AND:
923*22dc650dSSadaf Ebrahimi 			imm2 = get_imm(imm);
924*22dc650dSSadaf Ebrahimi 			if (imm2 != INVALID_IMM)
925*22dc650dSSadaf Ebrahimi 				return push_inst32(compiler, ((flags & UNUSED_RETURN) ? TSTI : ANDI) | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm2);
926*22dc650dSSadaf Ebrahimi 			imm = get_imm(~imm);
927*22dc650dSSadaf Ebrahimi 			if (imm != INVALID_IMM)
928*22dc650dSSadaf Ebrahimi 				return push_inst32(compiler, BICI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm);
929*22dc650dSSadaf Ebrahimi 			break;
930*22dc650dSSadaf Ebrahimi 		case SLJIT_OR:
931*22dc650dSSadaf Ebrahimi 			imm2 = get_imm(imm);
932*22dc650dSSadaf Ebrahimi 			if (imm2 != INVALID_IMM)
933*22dc650dSSadaf Ebrahimi 				return push_inst32(compiler, ORRI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm2);
934*22dc650dSSadaf Ebrahimi 			imm = get_imm(~imm);
935*22dc650dSSadaf Ebrahimi 			if (imm != INVALID_IMM)
936*22dc650dSSadaf Ebrahimi 				return push_inst32(compiler, ORNI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm);
937*22dc650dSSadaf Ebrahimi 			break;
938*22dc650dSSadaf Ebrahimi 		case SLJIT_XOR:
939*22dc650dSSadaf Ebrahimi 			if (imm == (sljit_uw)-1) {
940*22dc650dSSadaf Ebrahimi 				if (IS_2_LO_REGS(dst, reg))
941*22dc650dSSadaf Ebrahimi 					return push_inst16(compiler, MVNS | RD3(dst) | RN3(reg));
942*22dc650dSSadaf Ebrahimi 				return push_inst32(compiler, MVN_W | (flags & SET_FLAGS) | RD4(dst) | RM4(reg));
943*22dc650dSSadaf Ebrahimi 			}
944*22dc650dSSadaf Ebrahimi 			imm = get_imm(imm);
945*22dc650dSSadaf Ebrahimi 			if (imm != INVALID_IMM)
946*22dc650dSSadaf Ebrahimi 				return push_inst32(compiler, EORI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm);
947*22dc650dSSadaf Ebrahimi 			break;
948*22dc650dSSadaf Ebrahimi 		case SLJIT_SHL:
949*22dc650dSSadaf Ebrahimi 		case SLJIT_MSHL:
950*22dc650dSSadaf Ebrahimi 		case SLJIT_LSHR:
951*22dc650dSSadaf Ebrahimi 		case SLJIT_MLSHR:
952*22dc650dSSadaf Ebrahimi 		case SLJIT_ASHR:
953*22dc650dSSadaf Ebrahimi 		case SLJIT_MASHR:
954*22dc650dSSadaf Ebrahimi 		case SLJIT_ROTL:
955*22dc650dSSadaf Ebrahimi 		case SLJIT_ROTR:
956*22dc650dSSadaf Ebrahimi 			if (flags & ARG1_IMM)
957*22dc650dSSadaf Ebrahimi 				break;
958*22dc650dSSadaf Ebrahimi 			imm &= 0x1f;
959*22dc650dSSadaf Ebrahimi 
960*22dc650dSSadaf Ebrahimi 			if (imm == 0) {
961*22dc650dSSadaf Ebrahimi 				if (!(flags & SET_FLAGS))
962*22dc650dSSadaf Ebrahimi 					return push_inst16(compiler, MOV | SET_REGS44(dst, reg));
963*22dc650dSSadaf Ebrahimi 				if (IS_2_LO_REGS(dst, reg))
964*22dc650dSSadaf Ebrahimi 					return push_inst16(compiler, MOVS | RD3(dst) | RN3(reg));
965*22dc650dSSadaf Ebrahimi 				return push_inst32(compiler, MOV_W | SET_FLAGS | RD4(dst) | RM4(reg));
966*22dc650dSSadaf Ebrahimi 			}
967*22dc650dSSadaf Ebrahimi 
968*22dc650dSSadaf Ebrahimi 			switch (flags & 0xffff) {
969*22dc650dSSadaf Ebrahimi 			case SLJIT_SHL:
970*22dc650dSSadaf Ebrahimi 			case SLJIT_MSHL:
971*22dc650dSSadaf Ebrahimi 				if (IS_2_LO_REGS(dst, reg))
972*22dc650dSSadaf Ebrahimi 					return push_inst16(compiler, LSLSI | RD3(dst) | RN3(reg) | (imm << 6));
973*22dc650dSSadaf Ebrahimi 				return push_inst32(compiler, LSL_WI | (flags & SET_FLAGS) | RD4(dst) | RM4(reg) | IMM5(imm));
974*22dc650dSSadaf Ebrahimi 			case SLJIT_LSHR:
975*22dc650dSSadaf Ebrahimi 			case SLJIT_MLSHR:
976*22dc650dSSadaf Ebrahimi 				if (IS_2_LO_REGS(dst, reg))
977*22dc650dSSadaf Ebrahimi 					return push_inst16(compiler, LSRSI | RD3(dst) | RN3(reg) | (imm << 6));
978*22dc650dSSadaf Ebrahimi 				return push_inst32(compiler, LSR_WI | (flags & SET_FLAGS) | RD4(dst) | RM4(reg) | IMM5(imm));
979*22dc650dSSadaf Ebrahimi 			case SLJIT_ASHR:
980*22dc650dSSadaf Ebrahimi 			case SLJIT_MASHR:
981*22dc650dSSadaf Ebrahimi 				if (IS_2_LO_REGS(dst, reg))
982*22dc650dSSadaf Ebrahimi 					return push_inst16(compiler, ASRSI | RD3(dst) | RN3(reg) | (imm << 6));
983*22dc650dSSadaf Ebrahimi 				return push_inst32(compiler, ASR_WI | (flags & SET_FLAGS) | RD4(dst) | RM4(reg) | IMM5(imm));
984*22dc650dSSadaf Ebrahimi 			case SLJIT_ROTL:
985*22dc650dSSadaf Ebrahimi 				imm = (imm ^ 0x1f) + 1;
986*22dc650dSSadaf Ebrahimi 				/* fallthrough */
987*22dc650dSSadaf Ebrahimi 			default: /* SLJIT_ROTR */
988*22dc650dSSadaf Ebrahimi 				return push_inst32(compiler, ROR_WI | RD4(dst) | RM4(reg) | IMM5(imm));
989*22dc650dSSadaf Ebrahimi 			}
990*22dc650dSSadaf Ebrahimi 		default:
991*22dc650dSSadaf Ebrahimi 			SLJIT_UNREACHABLE();
992*22dc650dSSadaf Ebrahimi 			break;
993*22dc650dSSadaf Ebrahimi 		}
994*22dc650dSSadaf Ebrahimi 
995*22dc650dSSadaf Ebrahimi 		if (flags & ARG2_IMM) {
996*22dc650dSSadaf Ebrahimi 			imm = arg2;
997*22dc650dSSadaf Ebrahimi 			arg2 = (arg1 == TMP_REG1) ? TMP_REG2 : TMP_REG1;
998*22dc650dSSadaf Ebrahimi 			FAIL_IF(load_immediate(compiler, (sljit_s32)arg2, imm));
999*22dc650dSSadaf Ebrahimi 		} else {
1000*22dc650dSSadaf Ebrahimi 			imm = arg1;
1001*22dc650dSSadaf Ebrahimi 			arg1 = (arg2 == TMP_REG1) ? TMP_REG2 : TMP_REG1;
1002*22dc650dSSadaf Ebrahimi 			FAIL_IF(load_immediate(compiler, (sljit_s32)arg1, imm));
1003*22dc650dSSadaf Ebrahimi 		}
1004*22dc650dSSadaf Ebrahimi 
1005*22dc650dSSadaf Ebrahimi 		SLJIT_ASSERT(arg1 != arg2);
1006*22dc650dSSadaf Ebrahimi 	}
1007*22dc650dSSadaf Ebrahimi 
1008*22dc650dSSadaf Ebrahimi 	/* Both arguments are registers. */
1009*22dc650dSSadaf Ebrahimi 	switch (flags & 0xffff) {
1010*22dc650dSSadaf Ebrahimi 	case SLJIT_MOV:
1011*22dc650dSSadaf Ebrahimi 	case SLJIT_MOV_U32:
1012*22dc650dSSadaf Ebrahimi 	case SLJIT_MOV_S32:
1013*22dc650dSSadaf Ebrahimi 	case SLJIT_MOV32:
1014*22dc650dSSadaf Ebrahimi 	case SLJIT_MOV_P:
1015*22dc650dSSadaf Ebrahimi 		SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG2);
1016*22dc650dSSadaf Ebrahimi 		if (dst == (sljit_s32)arg2)
1017*22dc650dSSadaf Ebrahimi 			return SLJIT_SUCCESS;
1018*22dc650dSSadaf Ebrahimi 		return push_inst16(compiler, MOV | SET_REGS44(dst, arg2));
1019*22dc650dSSadaf Ebrahimi 	case SLJIT_MOV_U8:
1020*22dc650dSSadaf Ebrahimi 		SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG2);
1021*22dc650dSSadaf Ebrahimi 		if (IS_2_LO_REGS(dst, arg2))
1022*22dc650dSSadaf Ebrahimi 			return push_inst16(compiler, UXTB | RD3(dst) | RN3(arg2));
1023*22dc650dSSadaf Ebrahimi 		return push_inst32(compiler, UXTB_W | RD4(dst) | RM4(arg2));
1024*22dc650dSSadaf Ebrahimi 	case SLJIT_MOV_S8:
1025*22dc650dSSadaf Ebrahimi 		SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG2);
1026*22dc650dSSadaf Ebrahimi 		if (IS_2_LO_REGS(dst, arg2))
1027*22dc650dSSadaf Ebrahimi 			return push_inst16(compiler, SXTB | RD3(dst) | RN3(arg2));
1028*22dc650dSSadaf Ebrahimi 		return push_inst32(compiler, SXTB_W | RD4(dst) | RM4(arg2));
1029*22dc650dSSadaf Ebrahimi 	case SLJIT_MOV_U16:
1030*22dc650dSSadaf Ebrahimi 		SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG2);
1031*22dc650dSSadaf Ebrahimi 		if (IS_2_LO_REGS(dst, arg2))
1032*22dc650dSSadaf Ebrahimi 			return push_inst16(compiler, UXTH | RD3(dst) | RN3(arg2));
1033*22dc650dSSadaf Ebrahimi 		return push_inst32(compiler, UXTH_W | RD4(dst) | RM4(arg2));
1034*22dc650dSSadaf Ebrahimi 	case SLJIT_MOV_S16:
1035*22dc650dSSadaf Ebrahimi 		SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG2);
1036*22dc650dSSadaf Ebrahimi 		if (IS_2_LO_REGS(dst, arg2))
1037*22dc650dSSadaf Ebrahimi 			return push_inst16(compiler, SXTH | RD3(dst) | RN3(arg2));
1038*22dc650dSSadaf Ebrahimi 		return push_inst32(compiler, SXTH_W | RD4(dst) | RM4(arg2));
1039*22dc650dSSadaf Ebrahimi 	case SLJIT_CLZ:
1040*22dc650dSSadaf Ebrahimi 		SLJIT_ASSERT(arg1 == TMP_REG2);
1041*22dc650dSSadaf Ebrahimi 		return push_inst32(compiler, CLZ | RN4(arg2) | RD4(dst) | RM4(arg2));
1042*22dc650dSSadaf Ebrahimi 	case SLJIT_CTZ:
1043*22dc650dSSadaf Ebrahimi 		SLJIT_ASSERT(arg1 == TMP_REG2);
1044*22dc650dSSadaf Ebrahimi 		FAIL_IF(push_inst32(compiler, RBIT | RN4(arg2) | RD4(dst) | RM4(arg2)));
1045*22dc650dSSadaf Ebrahimi 		return push_inst32(compiler, CLZ | RN4(dst) | RD4(dst) | RM4(dst));
1046*22dc650dSSadaf Ebrahimi 	case SLJIT_REV:
1047*22dc650dSSadaf Ebrahimi 	case SLJIT_REV_U32:
1048*22dc650dSSadaf Ebrahimi 	case SLJIT_REV_S32:
1049*22dc650dSSadaf Ebrahimi 		SLJIT_ASSERT(arg1 == TMP_REG2);
1050*22dc650dSSadaf Ebrahimi 		if (IS_2_LO_REGS(dst, arg2))
1051*22dc650dSSadaf Ebrahimi 			return push_inst16(compiler, REV | RD3(dst) | RN3(arg2));
1052*22dc650dSSadaf Ebrahimi 		return push_inst32(compiler, REV_W | RN4(arg2) | RD4(dst) | RM4(arg2));
1053*22dc650dSSadaf Ebrahimi 	case SLJIT_REV_U16:
1054*22dc650dSSadaf Ebrahimi 	case SLJIT_REV_S16:
1055*22dc650dSSadaf Ebrahimi 		SLJIT_ASSERT(arg1 == TMP_REG2);
1056*22dc650dSSadaf Ebrahimi 
1057*22dc650dSSadaf Ebrahimi 		if (IS_2_LO_REGS(dst, arg2))
1058*22dc650dSSadaf Ebrahimi 			FAIL_IF(push_inst16(compiler, REV16 | RD3(dst) | RN3(arg2)));
1059*22dc650dSSadaf Ebrahimi 		else
1060*22dc650dSSadaf Ebrahimi 			FAIL_IF(push_inst32(compiler, REV16_W | RN4(arg2) | RD4(dst) | RM4(arg2)));
1061*22dc650dSSadaf Ebrahimi 
1062*22dc650dSSadaf Ebrahimi 		if (!(flags & REGISTER_OP))
1063*22dc650dSSadaf Ebrahimi 			return SLJIT_SUCCESS;
1064*22dc650dSSadaf Ebrahimi 
1065*22dc650dSSadaf Ebrahimi 		flags &= 0xffff;
1066*22dc650dSSadaf Ebrahimi 		if (reg_map[dst] <= 7)
1067*22dc650dSSadaf Ebrahimi 			return push_inst16(compiler, (flags == SLJIT_REV_U16 ? UXTH : SXTH) | RD3(dst) | RN3(dst));
1068*22dc650dSSadaf Ebrahimi 		return push_inst32(compiler, (flags == SLJIT_REV_U16 ? UXTH_W : SXTH_W) | RD4(dst) | RM4(dst));
1069*22dc650dSSadaf Ebrahimi 	case SLJIT_ADD:
1070*22dc650dSSadaf Ebrahimi 		compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD;
1071*22dc650dSSadaf Ebrahimi 		if (IS_3_LO_REGS(dst, arg1, arg2))
1072*22dc650dSSadaf Ebrahimi 			return push_inst16(compiler, ADDS | RD3(dst) | RN3(arg1) | RM3(arg2));
1073*22dc650dSSadaf Ebrahimi 		if (dst == (sljit_s32)arg1 && !(flags & SET_FLAGS))
1074*22dc650dSSadaf Ebrahimi 			return push_inst16(compiler, ADD | SET_REGS44(dst, arg2));
1075*22dc650dSSadaf Ebrahimi 		return push_inst32(compiler, ADD_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2));
1076*22dc650dSSadaf Ebrahimi 	case SLJIT_ADDC:
1077*22dc650dSSadaf Ebrahimi 		compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD;
1078*22dc650dSSadaf Ebrahimi 		if (dst == (sljit_s32)arg1 && IS_2_LO_REGS(dst, arg2))
1079*22dc650dSSadaf Ebrahimi 			return push_inst16(compiler, ADCS | RD3(dst) | RN3(arg2));
1080*22dc650dSSadaf Ebrahimi 		return push_inst32(compiler, ADC_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2));
1081*22dc650dSSadaf Ebrahimi 	case SLJIT_SUB:
1082*22dc650dSSadaf Ebrahimi 		compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB;
1083*22dc650dSSadaf Ebrahimi 		if (flags & UNUSED_RETURN) {
1084*22dc650dSSadaf Ebrahimi 			if (IS_2_LO_REGS(arg1, arg2))
1085*22dc650dSSadaf Ebrahimi 				return push_inst16(compiler, CMP | RD3(arg1) | RN3(arg2));
1086*22dc650dSSadaf Ebrahimi 			return push_inst16(compiler, CMP_X | SET_REGS44(arg1, arg2));
1087*22dc650dSSadaf Ebrahimi 		}
1088*22dc650dSSadaf Ebrahimi 		if (IS_3_LO_REGS(dst, arg1, arg2))
1089*22dc650dSSadaf Ebrahimi 			return push_inst16(compiler, SUBS | RD3(dst) | RN3(arg1) | RM3(arg2));
1090*22dc650dSSadaf Ebrahimi 		return push_inst32(compiler, SUB_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2));
1091*22dc650dSSadaf Ebrahimi 	case SLJIT_SUBC:
1092*22dc650dSSadaf Ebrahimi 		compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB;
1093*22dc650dSSadaf Ebrahimi 		if (dst == (sljit_s32)arg1 && IS_2_LO_REGS(dst, arg2))
1094*22dc650dSSadaf Ebrahimi 			return push_inst16(compiler, SBCS | RD3(dst) | RN3(arg2));
1095*22dc650dSSadaf Ebrahimi 		return push_inst32(compiler, SBC_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2));
1096*22dc650dSSadaf Ebrahimi 	case SLJIT_MUL:
1097*22dc650dSSadaf Ebrahimi 		compiler->status_flags_state = 0;
1098*22dc650dSSadaf Ebrahimi 		if (!(flags & SET_FLAGS))
1099*22dc650dSSadaf Ebrahimi 			return push_inst32(compiler, MUL | RD4(dst) | RN4(arg1) | RM4(arg2));
1100*22dc650dSSadaf Ebrahimi 		reg = (dst == TMP_REG2) ? TMP_REG1 : TMP_REG2;
1101*22dc650dSSadaf Ebrahimi 		FAIL_IF(push_inst32(compiler, SMULL | RT4(dst) | RD4(reg) | RN4(arg1) | RM4(arg2)));
1102*22dc650dSSadaf Ebrahimi 		/* cmp TMP_REG2, dst asr #31. */
1103*22dc650dSSadaf Ebrahimi 		return push_inst32(compiler, CMP_W | RN4(reg) | 0x70e0 | RM4(dst));
1104*22dc650dSSadaf Ebrahimi 	case SLJIT_AND:
1105*22dc650dSSadaf Ebrahimi 		if (dst == (sljit_s32)arg1 && IS_2_LO_REGS(dst, arg2))
1106*22dc650dSSadaf Ebrahimi 			return push_inst16(compiler, ANDS | RD3(dst) | RN3(arg2));
1107*22dc650dSSadaf Ebrahimi 		if ((flags & UNUSED_RETURN) && IS_2_LO_REGS(arg1, arg2))
1108*22dc650dSSadaf Ebrahimi 			return push_inst16(compiler, TST | RD3(arg1) | RN3(arg2));
1109*22dc650dSSadaf Ebrahimi 		return push_inst32(compiler, ((flags & UNUSED_RETURN) ? TST_W : AND_W) | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2));
1110*22dc650dSSadaf Ebrahimi 	case SLJIT_OR:
1111*22dc650dSSadaf Ebrahimi 		if (dst == (sljit_s32)arg1 && IS_2_LO_REGS(dst, arg2))
1112*22dc650dSSadaf Ebrahimi 			return push_inst16(compiler, ORRS | RD3(dst) | RN3(arg2));
1113*22dc650dSSadaf Ebrahimi 		return push_inst32(compiler, ORR_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2));
1114*22dc650dSSadaf Ebrahimi 	case SLJIT_XOR:
1115*22dc650dSSadaf Ebrahimi 		if (dst == (sljit_s32)arg1 && IS_2_LO_REGS(dst, arg2))
1116*22dc650dSSadaf Ebrahimi 			return push_inst16(compiler, EORS | RD3(dst) | RN3(arg2));
1117*22dc650dSSadaf Ebrahimi 		return push_inst32(compiler, EOR_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2));
1118*22dc650dSSadaf Ebrahimi 	case SLJIT_MSHL:
1119*22dc650dSSadaf Ebrahimi 		reg = (arg2 == TMP_REG1) ? TMP_REG1 : TMP_REG2;
1120*22dc650dSSadaf Ebrahimi 		FAIL_IF(push_inst32(compiler, ANDI | RD4(reg) | RN4(arg2) | 0x1f));
1121*22dc650dSSadaf Ebrahimi 		arg2 = (sljit_uw)reg;
1122*22dc650dSSadaf Ebrahimi 		/* fallthrough */
1123*22dc650dSSadaf Ebrahimi 	case SLJIT_SHL:
1124*22dc650dSSadaf Ebrahimi 		if (dst == (sljit_s32)arg1 && IS_2_LO_REGS(dst, arg2))
1125*22dc650dSSadaf Ebrahimi 			return push_inst16(compiler, LSLS | RD3(dst) | RN3(arg2));
1126*22dc650dSSadaf Ebrahimi 		return push_inst32(compiler, LSL_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2));
1127*22dc650dSSadaf Ebrahimi 	case SLJIT_MLSHR:
1128*22dc650dSSadaf Ebrahimi 		reg = (arg2 == TMP_REG1) ? TMP_REG1 : TMP_REG2;
1129*22dc650dSSadaf Ebrahimi 		FAIL_IF(push_inst32(compiler, ANDI | RD4(reg) | RN4(arg2) | 0x1f));
1130*22dc650dSSadaf Ebrahimi 		arg2 = (sljit_uw)reg;
1131*22dc650dSSadaf Ebrahimi 		/* fallthrough */
1132*22dc650dSSadaf Ebrahimi 	case SLJIT_LSHR:
1133*22dc650dSSadaf Ebrahimi 		if (dst == (sljit_s32)arg1 && IS_2_LO_REGS(dst, arg2))
1134*22dc650dSSadaf Ebrahimi 			return push_inst16(compiler, LSRS | RD3(dst) | RN3(arg2));
1135*22dc650dSSadaf Ebrahimi 		return push_inst32(compiler, LSR_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2));
1136*22dc650dSSadaf Ebrahimi 	case SLJIT_MASHR:
1137*22dc650dSSadaf Ebrahimi 		reg = (arg2 == TMP_REG1) ? TMP_REG1 : TMP_REG2;
1138*22dc650dSSadaf Ebrahimi 		FAIL_IF(push_inst32(compiler, ANDI | RD4(reg) | RN4(arg2) | 0x1f));
1139*22dc650dSSadaf Ebrahimi 		arg2 = (sljit_uw)reg;
1140*22dc650dSSadaf Ebrahimi 		/* fallthrough */
1141*22dc650dSSadaf Ebrahimi 	case SLJIT_ASHR:
1142*22dc650dSSadaf Ebrahimi 		if (dst == (sljit_s32)arg1 && IS_2_LO_REGS(dst, arg2))
1143*22dc650dSSadaf Ebrahimi 			return push_inst16(compiler, ASRS | RD3(dst) | RN3(arg2));
1144*22dc650dSSadaf Ebrahimi 		return push_inst32(compiler, ASR_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2));
1145*22dc650dSSadaf Ebrahimi 	case SLJIT_ROTL:
1146*22dc650dSSadaf Ebrahimi 		reg = (arg2 == TMP_REG1) ? TMP_REG1 : TMP_REG2;
1147*22dc650dSSadaf Ebrahimi 		FAIL_IF(push_inst32(compiler, RSB_WI | RD4(reg) | RN4(arg2) | 0));
1148*22dc650dSSadaf Ebrahimi 		arg2 = (sljit_uw)reg;
1149*22dc650dSSadaf Ebrahimi 		/* fallthrough */
1150*22dc650dSSadaf Ebrahimi 	case SLJIT_ROTR:
1151*22dc650dSSadaf Ebrahimi 		if (dst == (sljit_s32)arg1 && IS_2_LO_REGS(dst, arg2))
1152*22dc650dSSadaf Ebrahimi 			return push_inst16(compiler, RORS | RD3(dst) | RN3(arg2));
1153*22dc650dSSadaf Ebrahimi 		return push_inst32(compiler, ROR_W | RD4(dst) | RN4(arg1) | RM4(arg2));
1154*22dc650dSSadaf Ebrahimi 	case SLJIT_MULADD:
1155*22dc650dSSadaf Ebrahimi 		compiler->status_flags_state = 0;
1156*22dc650dSSadaf Ebrahimi 		return push_inst32(compiler, MLA | RD4(dst) | RN4(arg1) | RM4(arg2) | RT4(dst));
1157*22dc650dSSadaf Ebrahimi 	}
1158*22dc650dSSadaf Ebrahimi 
1159*22dc650dSSadaf Ebrahimi 	SLJIT_UNREACHABLE();
1160*22dc650dSSadaf Ebrahimi 	return SLJIT_SUCCESS;
1161*22dc650dSSadaf Ebrahimi }
1162*22dc650dSSadaf Ebrahimi 
1163*22dc650dSSadaf Ebrahimi #define STORE		0x01
1164*22dc650dSSadaf Ebrahimi #define SIGNED		0x02
1165*22dc650dSSadaf Ebrahimi 
1166*22dc650dSSadaf Ebrahimi #define WORD_SIZE	0x00
1167*22dc650dSSadaf Ebrahimi #define BYTE_SIZE	0x04
1168*22dc650dSSadaf Ebrahimi #define HALF_SIZE	0x08
1169*22dc650dSSadaf Ebrahimi #define PRELOAD		0x0c
1170*22dc650dSSadaf Ebrahimi 
1171*22dc650dSSadaf Ebrahimi #define IS_WORD_SIZE(flags)		(!((flags) & (BYTE_SIZE | HALF_SIZE)))
1172*22dc650dSSadaf Ebrahimi #define ALIGN_CHECK(argw, imm, shift)	(!((argw) & ~((imm) << (shift))))
1173*22dc650dSSadaf Ebrahimi 
1174*22dc650dSSadaf Ebrahimi /*
1175*22dc650dSSadaf Ebrahimi   1st letter:
1176*22dc650dSSadaf Ebrahimi   w = word
1177*22dc650dSSadaf Ebrahimi   b = byte
1178*22dc650dSSadaf Ebrahimi   h = half
1179*22dc650dSSadaf Ebrahimi 
1180*22dc650dSSadaf Ebrahimi   2nd letter:
1181*22dc650dSSadaf Ebrahimi   s = signed
1182*22dc650dSSadaf Ebrahimi   u = unsigned
1183*22dc650dSSadaf Ebrahimi 
1184*22dc650dSSadaf Ebrahimi   3rd letter:
1185*22dc650dSSadaf Ebrahimi   l = load
1186*22dc650dSSadaf Ebrahimi   s = store
1187*22dc650dSSadaf Ebrahimi */
1188*22dc650dSSadaf Ebrahimi 
1189*22dc650dSSadaf Ebrahimi static const sljit_ins sljit_mem16[12] = {
1190*22dc650dSSadaf Ebrahimi /* w u l */ 0x5800 /* ldr */,
1191*22dc650dSSadaf Ebrahimi /* w u s */ 0x5000 /* str */,
1192*22dc650dSSadaf Ebrahimi /* w s l */ 0x5800 /* ldr */,
1193*22dc650dSSadaf Ebrahimi /* w s s */ 0x5000 /* str */,
1194*22dc650dSSadaf Ebrahimi 
1195*22dc650dSSadaf Ebrahimi /* b u l */ 0x5c00 /* ldrb */,
1196*22dc650dSSadaf Ebrahimi /* b u s */ 0x5400 /* strb */,
1197*22dc650dSSadaf Ebrahimi /* b s l */ 0x5600 /* ldrsb */,
1198*22dc650dSSadaf Ebrahimi /* b s s */ 0x5400 /* strb */,
1199*22dc650dSSadaf Ebrahimi 
1200*22dc650dSSadaf Ebrahimi /* h u l */ 0x5a00 /* ldrh */,
1201*22dc650dSSadaf Ebrahimi /* h u s */ 0x5200 /* strh */,
1202*22dc650dSSadaf Ebrahimi /* h s l */ 0x5e00 /* ldrsh */,
1203*22dc650dSSadaf Ebrahimi /* h s s */ 0x5200 /* strh */,
1204*22dc650dSSadaf Ebrahimi };
1205*22dc650dSSadaf Ebrahimi 
1206*22dc650dSSadaf Ebrahimi static const sljit_ins sljit_mem16_imm5[12] = {
1207*22dc650dSSadaf Ebrahimi /* w u l */ 0x6800 /* ldr imm5 */,
1208*22dc650dSSadaf Ebrahimi /* w u s */ 0x6000 /* str imm5 */,
1209*22dc650dSSadaf Ebrahimi /* w s l */ 0x6800 /* ldr imm5 */,
1210*22dc650dSSadaf Ebrahimi /* w s s */ 0x6000 /* str imm5 */,
1211*22dc650dSSadaf Ebrahimi 
1212*22dc650dSSadaf Ebrahimi /* b u l */ 0x7800 /* ldrb imm5 */,
1213*22dc650dSSadaf Ebrahimi /* b u s */ 0x7000 /* strb imm5 */,
1214*22dc650dSSadaf Ebrahimi /* b s l */ 0x0000 /* not allowed */,
1215*22dc650dSSadaf Ebrahimi /* b s s */ 0x7000 /* strb imm5 */,
1216*22dc650dSSadaf Ebrahimi 
1217*22dc650dSSadaf Ebrahimi /* h u l */ 0x8800 /* ldrh imm5 */,
1218*22dc650dSSadaf Ebrahimi /* h u s */ 0x8000 /* strh imm5 */,
1219*22dc650dSSadaf Ebrahimi /* h s l */ 0x0000 /* not allowed */,
1220*22dc650dSSadaf Ebrahimi /* h s s */ 0x8000 /* strh imm5 */,
1221*22dc650dSSadaf Ebrahimi };
1222*22dc650dSSadaf Ebrahimi 
1223*22dc650dSSadaf Ebrahimi #define MEM_IMM8	0xc00
1224*22dc650dSSadaf Ebrahimi #define MEM_IMM12	0x800000
1225*22dc650dSSadaf Ebrahimi static const sljit_ins sljit_mem32[13] = {
1226*22dc650dSSadaf Ebrahimi /* w u l */ 0xf8500000 /* ldr.w */,
1227*22dc650dSSadaf Ebrahimi /* w u s */ 0xf8400000 /* str.w */,
1228*22dc650dSSadaf Ebrahimi /* w s l */ 0xf8500000 /* ldr.w */,
1229*22dc650dSSadaf Ebrahimi /* w s s */ 0xf8400000 /* str.w */,
1230*22dc650dSSadaf Ebrahimi 
1231*22dc650dSSadaf Ebrahimi /* b u l */ 0xf8100000 /* ldrb.w */,
1232*22dc650dSSadaf Ebrahimi /* b u s */ 0xf8000000 /* strb.w */,
1233*22dc650dSSadaf Ebrahimi /* b s l */ 0xf9100000 /* ldrsb.w */,
1234*22dc650dSSadaf Ebrahimi /* b s s */ 0xf8000000 /* strb.w */,
1235*22dc650dSSadaf Ebrahimi 
1236*22dc650dSSadaf Ebrahimi /* h u l */ 0xf8300000 /* ldrh.w */,
1237*22dc650dSSadaf Ebrahimi /* h u s */ 0xf8200000 /* strsh.w */,
1238*22dc650dSSadaf Ebrahimi /* h s l */ 0xf9300000 /* ldrsh.w */,
1239*22dc650dSSadaf Ebrahimi /* h s s */ 0xf8200000 /* strsh.w */,
1240*22dc650dSSadaf Ebrahimi 
1241*22dc650dSSadaf Ebrahimi /* p u l */ 0xf8100000 /* pld */,
1242*22dc650dSSadaf Ebrahimi };
1243*22dc650dSSadaf Ebrahimi 
1244*22dc650dSSadaf Ebrahimi /* Helper function. Dst should be reg + value, using at most 1 instruction, flags does not set. */
emit_set_delta(struct sljit_compiler * compiler,sljit_s32 dst,sljit_s32 reg,sljit_sw value)1245*22dc650dSSadaf Ebrahimi static sljit_s32 emit_set_delta(struct sljit_compiler *compiler, sljit_s32 dst, sljit_s32 reg, sljit_sw value)
1246*22dc650dSSadaf Ebrahimi {
1247*22dc650dSSadaf Ebrahimi 	sljit_uw imm;
1248*22dc650dSSadaf Ebrahimi 
1249*22dc650dSSadaf Ebrahimi 	if (value >= 0) {
1250*22dc650dSSadaf Ebrahimi 		if (value <= 0xfff)
1251*22dc650dSSadaf Ebrahimi 			return push_inst32(compiler, ADDWI | RD4(dst) | RN4(reg) | IMM12(value));
1252*22dc650dSSadaf Ebrahimi 		imm = get_imm((sljit_uw)value);
1253*22dc650dSSadaf Ebrahimi 		if (imm != INVALID_IMM)
1254*22dc650dSSadaf Ebrahimi 			return push_inst32(compiler, ADD_WI | RD4(dst) | RN4(reg) | imm);
1255*22dc650dSSadaf Ebrahimi 	}
1256*22dc650dSSadaf Ebrahimi 	else {
1257*22dc650dSSadaf Ebrahimi 		value = -value;
1258*22dc650dSSadaf Ebrahimi 		if (value <= 0xfff)
1259*22dc650dSSadaf Ebrahimi 			return push_inst32(compiler, SUBWI | RD4(dst) | RN4(reg) | IMM12(value));
1260*22dc650dSSadaf Ebrahimi 		imm = get_imm((sljit_uw)value);
1261*22dc650dSSadaf Ebrahimi 		if (imm != INVALID_IMM)
1262*22dc650dSSadaf Ebrahimi 			return push_inst32(compiler, SUB_WI | RD4(dst) | RN4(reg) | imm);
1263*22dc650dSSadaf Ebrahimi 	}
1264*22dc650dSSadaf Ebrahimi 	return SLJIT_ERR_UNSUPPORTED;
1265*22dc650dSSadaf Ebrahimi }
1266*22dc650dSSadaf Ebrahimi 
emit_op_mem(struct sljit_compiler * compiler,sljit_s32 flags,sljit_s32 reg,sljit_s32 arg,sljit_sw argw,sljit_s32 tmp_reg)1267*22dc650dSSadaf Ebrahimi static SLJIT_INLINE sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg,
1268*22dc650dSSadaf Ebrahimi 	sljit_s32 arg, sljit_sw argw, sljit_s32 tmp_reg)
1269*22dc650dSSadaf Ebrahimi {
1270*22dc650dSSadaf Ebrahimi 	sljit_s32 other_r;
1271*22dc650dSSadaf Ebrahimi 	sljit_uw imm, tmp;
1272*22dc650dSSadaf Ebrahimi 
1273*22dc650dSSadaf Ebrahimi 	SLJIT_ASSERT(arg & SLJIT_MEM);
1274*22dc650dSSadaf Ebrahimi 	SLJIT_ASSERT((arg & REG_MASK) != tmp_reg || (arg == SLJIT_MEM1(tmp_reg) && argw >= -0xff && argw <= 0xfff));
1275*22dc650dSSadaf Ebrahimi 
1276*22dc650dSSadaf Ebrahimi 	if (SLJIT_UNLIKELY(!(arg & REG_MASK))) {
1277*22dc650dSSadaf Ebrahimi 		imm = get_imm((sljit_uw)argw & ~(sljit_uw)0xfff);
1278*22dc650dSSadaf Ebrahimi 		if (imm != INVALID_IMM) {
1279*22dc650dSSadaf Ebrahimi 			FAIL_IF(push_inst32(compiler, MOV_WI | RD4(tmp_reg) | imm));
1280*22dc650dSSadaf Ebrahimi 			return push_inst32(compiler, sljit_mem32[flags] | MEM_IMM12 | RT4(reg) | RN4(tmp_reg) | (argw & 0xfff));
1281*22dc650dSSadaf Ebrahimi 		}
1282*22dc650dSSadaf Ebrahimi 
1283*22dc650dSSadaf Ebrahimi 		FAIL_IF(load_immediate(compiler, tmp_reg, (sljit_uw)argw));
1284*22dc650dSSadaf Ebrahimi 		if (IS_2_LO_REGS(reg, tmp_reg) && sljit_mem16_imm5[flags])
1285*22dc650dSSadaf Ebrahimi 			return push_inst16(compiler, sljit_mem16_imm5[flags] | RD3(reg) | RN3(tmp_reg));
1286*22dc650dSSadaf Ebrahimi 		return push_inst32(compiler, sljit_mem32[flags] | MEM_IMM12 | RT4(reg) | RN4(tmp_reg));
1287*22dc650dSSadaf Ebrahimi 	}
1288*22dc650dSSadaf Ebrahimi 
1289*22dc650dSSadaf Ebrahimi 	if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) {
1290*22dc650dSSadaf Ebrahimi 		argw &= 0x3;
1291*22dc650dSSadaf Ebrahimi 		other_r = OFFS_REG(arg);
1292*22dc650dSSadaf Ebrahimi 		arg &= REG_MASK;
1293*22dc650dSSadaf Ebrahimi 
1294*22dc650dSSadaf Ebrahimi 		if (!argw && IS_3_LO_REGS(reg, arg, other_r))
1295*22dc650dSSadaf Ebrahimi 			return push_inst16(compiler, sljit_mem16[flags] | RD3(reg) | RN3(arg) | RM3(other_r));
1296*22dc650dSSadaf Ebrahimi 		return push_inst32(compiler, sljit_mem32[flags] | RT4(reg) | RN4(arg) | RM4(other_r) | ((sljit_ins)argw << 4));
1297*22dc650dSSadaf Ebrahimi 	}
1298*22dc650dSSadaf Ebrahimi 
1299*22dc650dSSadaf Ebrahimi 	arg &= REG_MASK;
1300*22dc650dSSadaf Ebrahimi 
1301*22dc650dSSadaf Ebrahimi 	if (argw > 0xfff) {
1302*22dc650dSSadaf Ebrahimi 		imm = get_imm((sljit_uw)(argw & ~0xfff));
1303*22dc650dSSadaf Ebrahimi 		if (imm != INVALID_IMM) {
1304*22dc650dSSadaf Ebrahimi 			push_inst32(compiler, ADD_WI | RD4(tmp_reg) | RN4(arg) | imm);
1305*22dc650dSSadaf Ebrahimi 			arg = tmp_reg;
1306*22dc650dSSadaf Ebrahimi 			argw = argw & 0xfff;
1307*22dc650dSSadaf Ebrahimi 		}
1308*22dc650dSSadaf Ebrahimi 	}
1309*22dc650dSSadaf Ebrahimi 	else if (argw < -0xff) {
1310*22dc650dSSadaf Ebrahimi 		tmp = (sljit_uw)((-argw + 0xfff) & ~0xfff);
1311*22dc650dSSadaf Ebrahimi 		SLJIT_ASSERT(tmp >= (sljit_uw)-argw);
1312*22dc650dSSadaf Ebrahimi 		imm = get_imm(tmp);
1313*22dc650dSSadaf Ebrahimi 
1314*22dc650dSSadaf Ebrahimi 		if (imm != INVALID_IMM) {
1315*22dc650dSSadaf Ebrahimi 			push_inst32(compiler, SUB_WI | RD4(tmp_reg) | RN4(arg) | imm);
1316*22dc650dSSadaf Ebrahimi 			arg = tmp_reg;
1317*22dc650dSSadaf Ebrahimi 			argw += (sljit_sw)tmp;
1318*22dc650dSSadaf Ebrahimi 
1319*22dc650dSSadaf Ebrahimi 			SLJIT_ASSERT(argw >= 0 && argw <= 0xfff);
1320*22dc650dSSadaf Ebrahimi 		}
1321*22dc650dSSadaf Ebrahimi 	}
1322*22dc650dSSadaf Ebrahimi 
1323*22dc650dSSadaf Ebrahimi 	/* 16 bit instruction forms. */
1324*22dc650dSSadaf Ebrahimi 	if (IS_2_LO_REGS(reg, arg) && sljit_mem16_imm5[flags]) {
1325*22dc650dSSadaf Ebrahimi 		tmp = 3;
1326*22dc650dSSadaf Ebrahimi 		if (IS_WORD_SIZE(flags)) {
1327*22dc650dSSadaf Ebrahimi 			if (ALIGN_CHECK(argw, 0x1f, 2))
1328*22dc650dSSadaf Ebrahimi 				tmp = 2;
1329*22dc650dSSadaf Ebrahimi 		}
1330*22dc650dSSadaf Ebrahimi 		else if (flags & BYTE_SIZE)
1331*22dc650dSSadaf Ebrahimi 		{
1332*22dc650dSSadaf Ebrahimi 			if (ALIGN_CHECK(argw, 0x1f, 0))
1333*22dc650dSSadaf Ebrahimi 				tmp = 0;
1334*22dc650dSSadaf Ebrahimi 		}
1335*22dc650dSSadaf Ebrahimi 		else {
1336*22dc650dSSadaf Ebrahimi 			SLJIT_ASSERT(flags & HALF_SIZE);
1337*22dc650dSSadaf Ebrahimi 			if (ALIGN_CHECK(argw, 0x1f, 1))
1338*22dc650dSSadaf Ebrahimi 				tmp = 1;
1339*22dc650dSSadaf Ebrahimi 		}
1340*22dc650dSSadaf Ebrahimi 
1341*22dc650dSSadaf Ebrahimi 		if (tmp < 3)
1342*22dc650dSSadaf Ebrahimi 			return push_inst16(compiler, sljit_mem16_imm5[flags] | RD3(reg) | RN3(arg) | ((sljit_ins)argw << (6 - tmp)));
1343*22dc650dSSadaf Ebrahimi 	}
1344*22dc650dSSadaf Ebrahimi 	else if (SLJIT_UNLIKELY(arg == SLJIT_SP) && IS_WORD_SIZE(flags) && ALIGN_CHECK(argw, 0xff, 2) && reg_map[reg] <= 7) {
1345*22dc650dSSadaf Ebrahimi 		/* SP based immediate. */
1346*22dc650dSSadaf Ebrahimi 		return push_inst16(compiler, STR_SP | (sljit_ins)((flags & STORE) ? 0 : 0x800) | RDN3(reg) | ((sljit_ins)argw >> 2));
1347*22dc650dSSadaf Ebrahimi 	}
1348*22dc650dSSadaf Ebrahimi 
1349*22dc650dSSadaf Ebrahimi 	if (argw >= 0 && argw <= 0xfff)
1350*22dc650dSSadaf Ebrahimi 		return push_inst32(compiler, sljit_mem32[flags] | MEM_IMM12 | RT4(reg) | RN4(arg) | (sljit_ins)argw);
1351*22dc650dSSadaf Ebrahimi 	else if (argw < 0 && argw >= -0xff)
1352*22dc650dSSadaf Ebrahimi 		return push_inst32(compiler, sljit_mem32[flags] | MEM_IMM8 | RT4(reg) | RN4(arg) | (sljit_ins)-argw);
1353*22dc650dSSadaf Ebrahimi 
1354*22dc650dSSadaf Ebrahimi 	SLJIT_ASSERT(arg != tmp_reg);
1355*22dc650dSSadaf Ebrahimi 
1356*22dc650dSSadaf Ebrahimi 	FAIL_IF(load_immediate(compiler, tmp_reg, (sljit_uw)argw));
1357*22dc650dSSadaf Ebrahimi 	if (IS_3_LO_REGS(reg, arg, tmp_reg))
1358*22dc650dSSadaf Ebrahimi 		return push_inst16(compiler, sljit_mem16[flags] | RD3(reg) | RN3(arg) | RM3(tmp_reg));
1359*22dc650dSSadaf Ebrahimi 	return push_inst32(compiler, sljit_mem32[flags] | RT4(reg) | RN4(arg) | RM4(tmp_reg));
1360*22dc650dSSadaf Ebrahimi }
1361*22dc650dSSadaf Ebrahimi 
1362*22dc650dSSadaf Ebrahimi #undef ALIGN_CHECK
1363*22dc650dSSadaf Ebrahimi #undef IS_WORD_SIZE
1364*22dc650dSSadaf Ebrahimi 
1365*22dc650dSSadaf Ebrahimi /* --------------------------------------------------------------------- */
1366*22dc650dSSadaf Ebrahimi /*  Entry, exit                                                          */
1367*22dc650dSSadaf Ebrahimi /* --------------------------------------------------------------------- */
1368*22dc650dSSadaf Ebrahimi 
sljit_emit_enter(struct sljit_compiler * compiler,sljit_s32 options,sljit_s32 arg_types,sljit_s32 scratches,sljit_s32 saveds,sljit_s32 fscratches,sljit_s32 fsaveds,sljit_s32 local_size)1369*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler,
1370*22dc650dSSadaf Ebrahimi 	sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
1371*22dc650dSSadaf Ebrahimi 	sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
1372*22dc650dSSadaf Ebrahimi {
1373*22dc650dSSadaf Ebrahimi 	sljit_s32 size, i, tmp, word_arg_count;
1374*22dc650dSSadaf Ebrahimi 	sljit_s32 saved_arg_count = SLJIT_KEPT_SAVEDS_COUNT(options);
1375*22dc650dSSadaf Ebrahimi 	sljit_uw offset;
1376*22dc650dSSadaf Ebrahimi 	sljit_uw imm = 0;
1377*22dc650dSSadaf Ebrahimi #ifdef __SOFTFP__
1378*22dc650dSSadaf Ebrahimi 	sljit_u32 float_arg_count;
1379*22dc650dSSadaf Ebrahimi #else
1380*22dc650dSSadaf Ebrahimi 	sljit_u32 old_offset, f32_offset;
1381*22dc650dSSadaf Ebrahimi 	sljit_u32 remap[3];
1382*22dc650dSSadaf Ebrahimi 	sljit_u32 *remap_ptr = remap;
1383*22dc650dSSadaf Ebrahimi #endif
1384*22dc650dSSadaf Ebrahimi 
1385*22dc650dSSadaf Ebrahimi 	CHECK_ERROR();
1386*22dc650dSSadaf Ebrahimi 	CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
1387*22dc650dSSadaf Ebrahimi 	set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
1388*22dc650dSSadaf Ebrahimi 
1389*22dc650dSSadaf Ebrahimi 	tmp = SLJIT_S0 - saveds;
1390*22dc650dSSadaf Ebrahimi 	for (i = SLJIT_S0 - saved_arg_count; i > tmp; i--)
1391*22dc650dSSadaf Ebrahimi 		imm |= (sljit_uw)1 << reg_map[i];
1392*22dc650dSSadaf Ebrahimi 
1393*22dc650dSSadaf Ebrahimi 	for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--)
1394*22dc650dSSadaf Ebrahimi 		imm |= (sljit_uw)1 << reg_map[i];
1395*22dc650dSSadaf Ebrahimi 
1396*22dc650dSSadaf Ebrahimi 	/* At least two registers must be set for PUSH_W and one for PUSH instruction. */
1397*22dc650dSSadaf Ebrahimi 	FAIL_IF((imm & 0xff00)
1398*22dc650dSSadaf Ebrahimi 		? push_inst32(compiler, PUSH_W | (1 << 14) | imm)
1399*22dc650dSSadaf Ebrahimi 		: push_inst16(compiler, PUSH | (1 << 8) | imm));
1400*22dc650dSSadaf Ebrahimi 
1401*22dc650dSSadaf Ebrahimi 	/* Stack must be aligned to 8 bytes: (LR, R4) */
1402*22dc650dSSadaf Ebrahimi 	size = GET_SAVED_REGISTERS_SIZE(scratches, saveds - saved_arg_count, 1);
1403*22dc650dSSadaf Ebrahimi 
1404*22dc650dSSadaf Ebrahimi 	if (fsaveds > 0 || fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG) {
1405*22dc650dSSadaf Ebrahimi 		if ((size & SSIZE_OF(sw)) != 0) {
1406*22dc650dSSadaf Ebrahimi 			FAIL_IF(push_inst16(compiler, SUB_SP_I | (sizeof(sljit_sw) >> 2)));
1407*22dc650dSSadaf Ebrahimi 			size += SSIZE_OF(sw);
1408*22dc650dSSadaf Ebrahimi 		}
1409*22dc650dSSadaf Ebrahimi 
1410*22dc650dSSadaf Ebrahimi 		if (fsaveds + fscratches >= SLJIT_NUMBER_OF_FLOAT_REGISTERS) {
1411*22dc650dSSadaf Ebrahimi 			FAIL_IF(push_inst32(compiler, VPUSH | VD4(SLJIT_FS0) | ((sljit_uw)SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS << 1)));
1412*22dc650dSSadaf Ebrahimi 		} else {
1413*22dc650dSSadaf Ebrahimi 			if (fsaveds > 0)
1414*22dc650dSSadaf Ebrahimi 				FAIL_IF(push_inst32(compiler, VPUSH | VD4(SLJIT_FS0) | ((sljit_uw)fsaveds << 1)));
1415*22dc650dSSadaf Ebrahimi 			if (fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG)
1416*22dc650dSSadaf Ebrahimi 				FAIL_IF(push_inst32(compiler, VPUSH | VD4(fscratches) | ((sljit_uw)(fscratches - (SLJIT_FIRST_SAVED_FLOAT_REG - 1)) << 1)));
1417*22dc650dSSadaf Ebrahimi 		}
1418*22dc650dSSadaf Ebrahimi 	}
1419*22dc650dSSadaf Ebrahimi 
1420*22dc650dSSadaf Ebrahimi 	local_size = ((size + local_size + 0x7) & ~0x7) - size;
1421*22dc650dSSadaf Ebrahimi 	compiler->local_size = local_size;
1422*22dc650dSSadaf Ebrahimi 
1423*22dc650dSSadaf Ebrahimi 	if (options & SLJIT_ENTER_REG_ARG)
1424*22dc650dSSadaf Ebrahimi 		arg_types = 0;
1425*22dc650dSSadaf Ebrahimi 
1426*22dc650dSSadaf Ebrahimi 	arg_types >>= SLJIT_ARG_SHIFT;
1427*22dc650dSSadaf Ebrahimi 	word_arg_count = 0;
1428*22dc650dSSadaf Ebrahimi 	saved_arg_count = 0;
1429*22dc650dSSadaf Ebrahimi #ifdef __SOFTFP__
1430*22dc650dSSadaf Ebrahimi 	SLJIT_COMPILE_ASSERT(SLJIT_FR0 == 1, float_register_index_start);
1431*22dc650dSSadaf Ebrahimi 
1432*22dc650dSSadaf Ebrahimi 	offset = 0;
1433*22dc650dSSadaf Ebrahimi 	float_arg_count = 0;
1434*22dc650dSSadaf Ebrahimi 
1435*22dc650dSSadaf Ebrahimi 	while (arg_types) {
1436*22dc650dSSadaf Ebrahimi 		switch (arg_types & SLJIT_ARG_MASK) {
1437*22dc650dSSadaf Ebrahimi 		case SLJIT_ARG_TYPE_F64:
1438*22dc650dSSadaf Ebrahimi 			if (offset & 0x7)
1439*22dc650dSSadaf Ebrahimi 				offset += sizeof(sljit_sw);
1440*22dc650dSSadaf Ebrahimi 
1441*22dc650dSSadaf Ebrahimi 			if (offset < 4 * sizeof(sljit_sw))
1442*22dc650dSSadaf Ebrahimi 				FAIL_IF(push_inst32(compiler, VMOV2 | (offset << 10) | ((offset + sizeof(sljit_sw)) << 14) | float_arg_count));
1443*22dc650dSSadaf Ebrahimi 			else
1444*22dc650dSSadaf Ebrahimi 				FAIL_IF(push_inst32(compiler, VLDR_F32 | 0x800100 | RN4(SLJIT_SP)
1445*22dc650dSSadaf Ebrahimi 					| (float_arg_count << 12) | ((offset + (sljit_uw)size - 4 * sizeof(sljit_sw)) >> 2)));
1446*22dc650dSSadaf Ebrahimi 			float_arg_count++;
1447*22dc650dSSadaf Ebrahimi 			offset += sizeof(sljit_f64) - sizeof(sljit_sw);
1448*22dc650dSSadaf Ebrahimi 			break;
1449*22dc650dSSadaf Ebrahimi 		case SLJIT_ARG_TYPE_F32:
1450*22dc650dSSadaf Ebrahimi 			if (offset < 4 * sizeof(sljit_sw))
1451*22dc650dSSadaf Ebrahimi 				FAIL_IF(push_inst32(compiler, VMOV | (float_arg_count << 16) | (offset << 10)));
1452*22dc650dSSadaf Ebrahimi 			else
1453*22dc650dSSadaf Ebrahimi 				FAIL_IF(push_inst32(compiler, VLDR_F32 | 0x800000 | RN4(SLJIT_SP)
1454*22dc650dSSadaf Ebrahimi 					| (float_arg_count << 12) | ((offset + (sljit_uw)size - 4 * sizeof(sljit_sw)) >> 2)));
1455*22dc650dSSadaf Ebrahimi 			float_arg_count++;
1456*22dc650dSSadaf Ebrahimi 			break;
1457*22dc650dSSadaf Ebrahimi 		default:
1458*22dc650dSSadaf Ebrahimi 			word_arg_count++;
1459*22dc650dSSadaf Ebrahimi 
1460*22dc650dSSadaf Ebrahimi 			if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) {
1461*22dc650dSSadaf Ebrahimi 				tmp = SLJIT_S0 - saved_arg_count;
1462*22dc650dSSadaf Ebrahimi 				saved_arg_count++;
1463*22dc650dSSadaf Ebrahimi 			} else if (word_arg_count - 1 != (sljit_s32)(offset >> 2))
1464*22dc650dSSadaf Ebrahimi 				tmp = word_arg_count;
1465*22dc650dSSadaf Ebrahimi 			else
1466*22dc650dSSadaf Ebrahimi 				break;
1467*22dc650dSSadaf Ebrahimi 
1468*22dc650dSSadaf Ebrahimi 			if (offset < 4 * sizeof(sljit_sw))
1469*22dc650dSSadaf Ebrahimi 				FAIL_IF(push_inst16(compiler, MOV | ((sljit_ins)reg_map[tmp] & 0x7) | (((sljit_ins)reg_map[tmp] & 0x8) << 4) | (offset << 1)));
1470*22dc650dSSadaf Ebrahimi 			else if (reg_map[tmp] <= 7)
1471*22dc650dSSadaf Ebrahimi 				FAIL_IF(push_inst16(compiler, LDR_SP | RDN3(tmp)
1472*22dc650dSSadaf Ebrahimi 					| ((offset + (sljit_uw)size - 4 * sizeof(sljit_sw)) >> 2)));
1473*22dc650dSSadaf Ebrahimi 			else
1474*22dc650dSSadaf Ebrahimi 				FAIL_IF(push_inst32(compiler, LDR | RT4(tmp) | RN4(SLJIT_SP)
1475*22dc650dSSadaf Ebrahimi 					| ((offset + (sljit_uw)size - 4 * sizeof(sljit_sw)))));
1476*22dc650dSSadaf Ebrahimi 			break;
1477*22dc650dSSadaf Ebrahimi 		}
1478*22dc650dSSadaf Ebrahimi 
1479*22dc650dSSadaf Ebrahimi 		offset += sizeof(sljit_sw);
1480*22dc650dSSadaf Ebrahimi 		arg_types >>= SLJIT_ARG_SHIFT;
1481*22dc650dSSadaf Ebrahimi 	}
1482*22dc650dSSadaf Ebrahimi 
1483*22dc650dSSadaf Ebrahimi 	compiler->args_size = offset;
1484*22dc650dSSadaf Ebrahimi #else
1485*22dc650dSSadaf Ebrahimi 	offset = SLJIT_FR0;
1486*22dc650dSSadaf Ebrahimi 	old_offset = SLJIT_FR0;
1487*22dc650dSSadaf Ebrahimi 	f32_offset = 0;
1488*22dc650dSSadaf Ebrahimi 
1489*22dc650dSSadaf Ebrahimi 	while (arg_types) {
1490*22dc650dSSadaf Ebrahimi 		switch (arg_types & SLJIT_ARG_MASK) {
1491*22dc650dSSadaf Ebrahimi 		case SLJIT_ARG_TYPE_F64:
1492*22dc650dSSadaf Ebrahimi 			if (offset != old_offset)
1493*22dc650dSSadaf Ebrahimi 				*remap_ptr++ = VMOV_F32 | SLJIT_32 | VD4(offset) | VM4(old_offset);
1494*22dc650dSSadaf Ebrahimi 			old_offset++;
1495*22dc650dSSadaf Ebrahimi 			offset++;
1496*22dc650dSSadaf Ebrahimi 			break;
1497*22dc650dSSadaf Ebrahimi 		case SLJIT_ARG_TYPE_F32:
1498*22dc650dSSadaf Ebrahimi 			if (f32_offset != 0) {
1499*22dc650dSSadaf Ebrahimi 				*remap_ptr++ = VMOV_F32 | 0x20 | VD4(offset) | VM4(f32_offset);
1500*22dc650dSSadaf Ebrahimi 				f32_offset = 0;
1501*22dc650dSSadaf Ebrahimi 			} else {
1502*22dc650dSSadaf Ebrahimi 				if (offset != old_offset)
1503*22dc650dSSadaf Ebrahimi 					*remap_ptr++ = VMOV_F32 | VD4(offset) | VM4(old_offset);
1504*22dc650dSSadaf Ebrahimi 				f32_offset = old_offset;
1505*22dc650dSSadaf Ebrahimi 				old_offset++;
1506*22dc650dSSadaf Ebrahimi 			}
1507*22dc650dSSadaf Ebrahimi 			offset++;
1508*22dc650dSSadaf Ebrahimi 			break;
1509*22dc650dSSadaf Ebrahimi 		default:
1510*22dc650dSSadaf Ebrahimi 			if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) {
1511*22dc650dSSadaf Ebrahimi 				FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_S0 - saved_arg_count, SLJIT_R0 + word_arg_count)));
1512*22dc650dSSadaf Ebrahimi 				saved_arg_count++;
1513*22dc650dSSadaf Ebrahimi 			}
1514*22dc650dSSadaf Ebrahimi 
1515*22dc650dSSadaf Ebrahimi 			word_arg_count++;
1516*22dc650dSSadaf Ebrahimi 			break;
1517*22dc650dSSadaf Ebrahimi 		}
1518*22dc650dSSadaf Ebrahimi 		arg_types >>= SLJIT_ARG_SHIFT;
1519*22dc650dSSadaf Ebrahimi 	}
1520*22dc650dSSadaf Ebrahimi 
1521*22dc650dSSadaf Ebrahimi 	SLJIT_ASSERT((sljit_uw)(remap_ptr - remap) <= sizeof(remap));
1522*22dc650dSSadaf Ebrahimi 
1523*22dc650dSSadaf Ebrahimi 	while (remap_ptr > remap)
1524*22dc650dSSadaf Ebrahimi 		FAIL_IF(push_inst32(compiler, *(--remap_ptr)));
1525*22dc650dSSadaf Ebrahimi #endif
1526*22dc650dSSadaf Ebrahimi 
1527*22dc650dSSadaf Ebrahimi #ifdef _WIN32
1528*22dc650dSSadaf Ebrahimi 	if (local_size >= 4096) {
1529*22dc650dSSadaf Ebrahimi 		imm = get_imm(4096);
1530*22dc650dSSadaf Ebrahimi 		SLJIT_ASSERT(imm != INVALID_IMM);
1531*22dc650dSSadaf Ebrahimi 
1532*22dc650dSSadaf Ebrahimi 		FAIL_IF(push_inst32(compiler, SUB_WI | RD4(SLJIT_SP) | RN4(SLJIT_SP) | imm));
1533*22dc650dSSadaf Ebrahimi 
1534*22dc650dSSadaf Ebrahimi 		if (local_size < 4 * 4096) {
1535*22dc650dSSadaf Ebrahimi 			if (local_size > 2 * 4096) {
1536*22dc650dSSadaf Ebrahimi 				if (local_size > 3 * 4096) {
1537*22dc650dSSadaf Ebrahimi 					FAIL_IF(push_inst32(compiler, LDRI | 0x400 | RT4(TMP_REG1) | RN4(SLJIT_SP)));
1538*22dc650dSSadaf Ebrahimi 					FAIL_IF(push_inst32(compiler, SUB_WI | RD4(SLJIT_SP) | RN4(SLJIT_SP) | imm));
1539*22dc650dSSadaf Ebrahimi 				}
1540*22dc650dSSadaf Ebrahimi 
1541*22dc650dSSadaf Ebrahimi 				FAIL_IF(push_inst32(compiler, LDRI | 0x400 | RT4(TMP_REG1) | RN4(SLJIT_SP)));
1542*22dc650dSSadaf Ebrahimi 				FAIL_IF(push_inst32(compiler, SUB_WI | RD4(SLJIT_SP) | RN4(SLJIT_SP) | imm));
1543*22dc650dSSadaf Ebrahimi 			}
1544*22dc650dSSadaf Ebrahimi 		} else {
1545*22dc650dSSadaf Ebrahimi 			FAIL_IF(load_immediate(compiler, TMP_REG2, ((sljit_uw)local_size >> 12) - 1));
1546*22dc650dSSadaf Ebrahimi 			FAIL_IF(push_inst32(compiler, LDRI | 0x400 | RT4(TMP_REG1) | RN4(SLJIT_SP)));
1547*22dc650dSSadaf Ebrahimi 			FAIL_IF(push_inst32(compiler, SUB_WI | RD4(SLJIT_SP) | RN4(SLJIT_SP) | imm));
1548*22dc650dSSadaf Ebrahimi 			FAIL_IF(push_inst32(compiler, SUB_WI | SET_FLAGS | RD4(TMP_REG2) | RN4(TMP_REG2) | 1));
1549*22dc650dSSadaf Ebrahimi 			FAIL_IF(push_inst16(compiler, BCC | (0x1 << 8) /* not-equal */ | (-8 & 0xff)));
1550*22dc650dSSadaf Ebrahimi 		}
1551*22dc650dSSadaf Ebrahimi 
1552*22dc650dSSadaf Ebrahimi 		FAIL_IF(push_inst32(compiler, LDRI | 0x400 | RT4(TMP_REG1) | RN4(SLJIT_SP)));
1553*22dc650dSSadaf Ebrahimi 		local_size &= 0xfff;
1554*22dc650dSSadaf Ebrahimi 	}
1555*22dc650dSSadaf Ebrahimi 
1556*22dc650dSSadaf Ebrahimi 	if (local_size >= 256) {
1557*22dc650dSSadaf Ebrahimi 		SLJIT_ASSERT(local_size < 4096);
1558*22dc650dSSadaf Ebrahimi 
1559*22dc650dSSadaf Ebrahimi 		if (local_size <= (127 << 2))
1560*22dc650dSSadaf Ebrahimi 			FAIL_IF(push_inst16(compiler, SUB_SP_I | ((sljit_uw)local_size >> 2)));
1561*22dc650dSSadaf Ebrahimi 		else
1562*22dc650dSSadaf Ebrahimi 			FAIL_IF(emit_op_imm(compiler, SLJIT_SUB | ARG2_IMM, SLJIT_SP, SLJIT_SP, (sljit_uw)local_size));
1563*22dc650dSSadaf Ebrahimi 
1564*22dc650dSSadaf Ebrahimi 		FAIL_IF(push_inst32(compiler, LDRI | 0x400 | RT4(TMP_REG1) | RN4(SLJIT_SP)));
1565*22dc650dSSadaf Ebrahimi 	} else if (local_size > 0)
1566*22dc650dSSadaf Ebrahimi 		FAIL_IF(push_inst32(compiler, LDRI | 0x500 | RT4(TMP_REG1) | RN4(SLJIT_SP) | (sljit_uw)local_size));
1567*22dc650dSSadaf Ebrahimi #else /* !_WIN32 */
1568*22dc650dSSadaf Ebrahimi 	if (local_size > 0) {
1569*22dc650dSSadaf Ebrahimi 		if (local_size <= (127 << 2))
1570*22dc650dSSadaf Ebrahimi 			FAIL_IF(push_inst16(compiler, SUB_SP_I | ((sljit_uw)local_size >> 2)));
1571*22dc650dSSadaf Ebrahimi 		else
1572*22dc650dSSadaf Ebrahimi 			FAIL_IF(emit_op_imm(compiler, SLJIT_SUB | ARG2_IMM, SLJIT_SP, SLJIT_SP, (sljit_uw)local_size));
1573*22dc650dSSadaf Ebrahimi 	}
1574*22dc650dSSadaf Ebrahimi #endif /* _WIN32 */
1575*22dc650dSSadaf Ebrahimi 
1576*22dc650dSSadaf Ebrahimi 	return SLJIT_SUCCESS;
1577*22dc650dSSadaf Ebrahimi }
1578*22dc650dSSadaf Ebrahimi 
sljit_set_context(struct sljit_compiler * compiler,sljit_s32 options,sljit_s32 arg_types,sljit_s32 scratches,sljit_s32 saveds,sljit_s32 fscratches,sljit_s32 fsaveds,sljit_s32 local_size)1579*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler,
1580*22dc650dSSadaf Ebrahimi 	sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
1581*22dc650dSSadaf Ebrahimi 	sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
1582*22dc650dSSadaf Ebrahimi {
1583*22dc650dSSadaf Ebrahimi 	sljit_s32 size;
1584*22dc650dSSadaf Ebrahimi 
1585*22dc650dSSadaf Ebrahimi 	CHECK_ERROR();
1586*22dc650dSSadaf Ebrahimi 	CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
1587*22dc650dSSadaf Ebrahimi 	set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
1588*22dc650dSSadaf Ebrahimi 
1589*22dc650dSSadaf Ebrahimi 	size = GET_SAVED_REGISTERS_SIZE(scratches, saveds - SLJIT_KEPT_SAVEDS_COUNT(options), 1);
1590*22dc650dSSadaf Ebrahimi 
1591*22dc650dSSadaf Ebrahimi 	/* Doubles are saved, so alignment is unaffected. */
1592*22dc650dSSadaf Ebrahimi 	if ((size & SSIZE_OF(sw)) != 0 && (fsaveds > 0 || fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG))
1593*22dc650dSSadaf Ebrahimi 		size += SSIZE_OF(sw);
1594*22dc650dSSadaf Ebrahimi 
1595*22dc650dSSadaf Ebrahimi 	compiler->local_size = ((size + local_size + 0x7) & ~0x7) - size;
1596*22dc650dSSadaf Ebrahimi 	return SLJIT_SUCCESS;
1597*22dc650dSSadaf Ebrahimi }
1598*22dc650dSSadaf Ebrahimi 
emit_add_sp(struct sljit_compiler * compiler,sljit_uw imm)1599*22dc650dSSadaf Ebrahimi static sljit_s32 emit_add_sp(struct sljit_compiler *compiler, sljit_uw imm)
1600*22dc650dSSadaf Ebrahimi {
1601*22dc650dSSadaf Ebrahimi 	sljit_uw imm2;
1602*22dc650dSSadaf Ebrahimi 
1603*22dc650dSSadaf Ebrahimi 	/* The TMP_REG1 register must keep its value. */
1604*22dc650dSSadaf Ebrahimi 	if (imm <= (127u << 2))
1605*22dc650dSSadaf Ebrahimi 		return push_inst16(compiler, ADD_SP_I | (imm >> 2));
1606*22dc650dSSadaf Ebrahimi 
1607*22dc650dSSadaf Ebrahimi 	if (imm <= 0xfff)
1608*22dc650dSSadaf Ebrahimi 		return push_inst32(compiler, ADDWI | RD4(SLJIT_SP) | RN4(SLJIT_SP) | IMM12(imm));
1609*22dc650dSSadaf Ebrahimi 
1610*22dc650dSSadaf Ebrahimi 	imm2 = get_imm(imm);
1611*22dc650dSSadaf Ebrahimi 
1612*22dc650dSSadaf Ebrahimi 	if (imm2 != INVALID_IMM)
1613*22dc650dSSadaf Ebrahimi 		return push_inst32(compiler, ADD_WI | RD4(SLJIT_SP) | RN4(SLJIT_SP) | imm2);
1614*22dc650dSSadaf Ebrahimi 
1615*22dc650dSSadaf Ebrahimi 	FAIL_IF(load_immediate(compiler, TMP_REG2, imm));
1616*22dc650dSSadaf Ebrahimi 	return push_inst16(compiler, ADD_SP | RN3(TMP_REG2));
1617*22dc650dSSadaf Ebrahimi }
1618*22dc650dSSadaf Ebrahimi 
emit_stack_frame_release(struct sljit_compiler * compiler,sljit_s32 frame_size)1619*22dc650dSSadaf Ebrahimi static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit_s32 frame_size)
1620*22dc650dSSadaf Ebrahimi {
1621*22dc650dSSadaf Ebrahimi 	sljit_s32 local_size, fscratches, fsaveds, i, tmp;
1622*22dc650dSSadaf Ebrahimi 	sljit_s32 restored_reg = 0;
1623*22dc650dSSadaf Ebrahimi 	sljit_s32 lr_dst = TMP_PC;
1624*22dc650dSSadaf Ebrahimi 	sljit_uw reg_list = 0;
1625*22dc650dSSadaf Ebrahimi 
1626*22dc650dSSadaf Ebrahimi 	SLJIT_ASSERT(reg_map[TMP_REG2] == 14 && frame_size <= 128);
1627*22dc650dSSadaf Ebrahimi 
1628*22dc650dSSadaf Ebrahimi 	local_size = compiler->local_size;
1629*22dc650dSSadaf Ebrahimi 	fscratches = compiler->fscratches;
1630*22dc650dSSadaf Ebrahimi 	fsaveds = compiler->fsaveds;
1631*22dc650dSSadaf Ebrahimi 
1632*22dc650dSSadaf Ebrahimi 	if (fsaveds > 0 || fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG) {
1633*22dc650dSSadaf Ebrahimi 		if (local_size > 0)
1634*22dc650dSSadaf Ebrahimi 			FAIL_IF(emit_add_sp(compiler, (sljit_uw)local_size));
1635*22dc650dSSadaf Ebrahimi 
1636*22dc650dSSadaf Ebrahimi 		if (fsaveds + fscratches >= SLJIT_NUMBER_OF_FLOAT_REGISTERS) {
1637*22dc650dSSadaf Ebrahimi 			FAIL_IF(push_inst32(compiler, VPOP | VD4(SLJIT_FS0) | ((sljit_uw)SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS << 1)));
1638*22dc650dSSadaf Ebrahimi 		} else {
1639*22dc650dSSadaf Ebrahimi 			if (fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG)
1640*22dc650dSSadaf Ebrahimi 				FAIL_IF(push_inst32(compiler, VPOP | VD4(fscratches) | ((sljit_uw)(fscratches - (SLJIT_FIRST_SAVED_FLOAT_REG - 1)) << 1)));
1641*22dc650dSSadaf Ebrahimi 			if (fsaveds > 0)
1642*22dc650dSSadaf Ebrahimi 				FAIL_IF(push_inst32(compiler, VPOP | VD4(SLJIT_FS0) | ((sljit_uw)fsaveds << 1)));
1643*22dc650dSSadaf Ebrahimi 		}
1644*22dc650dSSadaf Ebrahimi 
1645*22dc650dSSadaf Ebrahimi 		local_size = GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds, 1) & 0x7;
1646*22dc650dSSadaf Ebrahimi 	}
1647*22dc650dSSadaf Ebrahimi 
1648*22dc650dSSadaf Ebrahimi 	if (frame_size < 0) {
1649*22dc650dSSadaf Ebrahimi 		lr_dst = TMP_REG2;
1650*22dc650dSSadaf Ebrahimi 		frame_size = 0;
1651*22dc650dSSadaf Ebrahimi 	} else if (frame_size > 0) {
1652*22dc650dSSadaf Ebrahimi 		SLJIT_ASSERT(frame_size == 1 || (frame_size & 0x7) == 0);
1653*22dc650dSSadaf Ebrahimi 		lr_dst = 0;
1654*22dc650dSSadaf Ebrahimi 		frame_size &= ~0x7;
1655*22dc650dSSadaf Ebrahimi 	}
1656*22dc650dSSadaf Ebrahimi 
1657*22dc650dSSadaf Ebrahimi 	tmp = SLJIT_S0 - compiler->saveds;
1658*22dc650dSSadaf Ebrahimi 	i = SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options);
1659*22dc650dSSadaf Ebrahimi 	if (tmp < i) {
1660*22dc650dSSadaf Ebrahimi 		restored_reg = i;
1661*22dc650dSSadaf Ebrahimi 		do {
1662*22dc650dSSadaf Ebrahimi 			reg_list |= (sljit_uw)1 << reg_map[i];
1663*22dc650dSSadaf Ebrahimi 		} while (--i > tmp);
1664*22dc650dSSadaf Ebrahimi 	}
1665*22dc650dSSadaf Ebrahimi 
1666*22dc650dSSadaf Ebrahimi 	i = compiler->scratches;
1667*22dc650dSSadaf Ebrahimi 	if (i >= SLJIT_FIRST_SAVED_REG) {
1668*22dc650dSSadaf Ebrahimi 		restored_reg = i;
1669*22dc650dSSadaf Ebrahimi 		do {
1670*22dc650dSSadaf Ebrahimi 			reg_list |= (sljit_uw)1 << reg_map[i];
1671*22dc650dSSadaf Ebrahimi 		} while (--i >= SLJIT_FIRST_SAVED_REG);
1672*22dc650dSSadaf Ebrahimi 	}
1673*22dc650dSSadaf Ebrahimi 
1674*22dc650dSSadaf Ebrahimi 	if (lr_dst == TMP_REG2 && reg_list == 0) {
1675*22dc650dSSadaf Ebrahimi 		reg_list |= (sljit_uw)1 << reg_map[TMP_REG2];
1676*22dc650dSSadaf Ebrahimi 		restored_reg = TMP_REG2;
1677*22dc650dSSadaf Ebrahimi 		lr_dst = 0;
1678*22dc650dSSadaf Ebrahimi 	}
1679*22dc650dSSadaf Ebrahimi 
1680*22dc650dSSadaf Ebrahimi 	if (lr_dst == 0 && (reg_list & (reg_list - 1)) == 0) {
1681*22dc650dSSadaf Ebrahimi 		/* The local_size does not include the saved registers. */
1682*22dc650dSSadaf Ebrahimi 		tmp = 0;
1683*22dc650dSSadaf Ebrahimi 		if (reg_list != 0) {
1684*22dc650dSSadaf Ebrahimi 			tmp = 2;
1685*22dc650dSSadaf Ebrahimi 			if (local_size <= 0xfff) {
1686*22dc650dSSadaf Ebrahimi 				if (local_size == 0) {
1687*22dc650dSSadaf Ebrahimi 					SLJIT_ASSERT(restored_reg != TMP_REG2);
1688*22dc650dSSadaf Ebrahimi 					if (frame_size == 0)
1689*22dc650dSSadaf Ebrahimi 						return push_inst32(compiler, LDRI | RT4(restored_reg) | RN4(SLJIT_SP) | 0x308);
1690*22dc650dSSadaf Ebrahimi 					if (frame_size > 2 * SSIZE_OF(sw))
1691*22dc650dSSadaf Ebrahimi 						return push_inst32(compiler, LDRI | RT4(restored_reg) | RN4(SLJIT_SP) | 0x100 | (sljit_ins)(frame_size - (2 * SSIZE_OF(sw))));
1692*22dc650dSSadaf Ebrahimi 				}
1693*22dc650dSSadaf Ebrahimi 
1694*22dc650dSSadaf Ebrahimi 				if (reg_map[restored_reg] <= 7 && local_size <= 0x3fc)
1695*22dc650dSSadaf Ebrahimi 					FAIL_IF(push_inst16(compiler, STR_SP | 0x800 | RDN3(restored_reg) | (sljit_ins)(local_size >> 2)));
1696*22dc650dSSadaf Ebrahimi 				else
1697*22dc650dSSadaf Ebrahimi 					FAIL_IF(push_inst32(compiler, LDR | RT4(restored_reg) | RN4(SLJIT_SP) | (sljit_ins)local_size));
1698*22dc650dSSadaf Ebrahimi 				tmp = 1;
1699*22dc650dSSadaf Ebrahimi 			} else if (frame_size == 0) {
1700*22dc650dSSadaf Ebrahimi 				frame_size = (restored_reg == TMP_REG2) ? SSIZE_OF(sw) : 2 * SSIZE_OF(sw);
1701*22dc650dSSadaf Ebrahimi 				tmp = 3;
1702*22dc650dSSadaf Ebrahimi 			}
1703*22dc650dSSadaf Ebrahimi 
1704*22dc650dSSadaf Ebrahimi 			/* Place for the saved register. */
1705*22dc650dSSadaf Ebrahimi 			if (restored_reg != TMP_REG2)
1706*22dc650dSSadaf Ebrahimi 				local_size += SSIZE_OF(sw);
1707*22dc650dSSadaf Ebrahimi 		}
1708*22dc650dSSadaf Ebrahimi 
1709*22dc650dSSadaf Ebrahimi 		/* Place for the lr register. */
1710*22dc650dSSadaf Ebrahimi 		local_size += SSIZE_OF(sw);
1711*22dc650dSSadaf Ebrahimi 
1712*22dc650dSSadaf Ebrahimi 		if (frame_size > local_size)
1713*22dc650dSSadaf Ebrahimi 			FAIL_IF(push_inst16(compiler, SUB_SP_I | ((sljit_ins)(frame_size - local_size) >> 2)));
1714*22dc650dSSadaf Ebrahimi 		else if (frame_size < local_size)
1715*22dc650dSSadaf Ebrahimi 			FAIL_IF(emit_add_sp(compiler, (sljit_uw)(local_size - frame_size)));
1716*22dc650dSSadaf Ebrahimi 
1717*22dc650dSSadaf Ebrahimi 		if (tmp <= 1)
1718*22dc650dSSadaf Ebrahimi 			return SLJIT_SUCCESS;
1719*22dc650dSSadaf Ebrahimi 
1720*22dc650dSSadaf Ebrahimi 		if (tmp == 2) {
1721*22dc650dSSadaf Ebrahimi 			frame_size -= SSIZE_OF(sw);
1722*22dc650dSSadaf Ebrahimi 			if (restored_reg != TMP_REG2)
1723*22dc650dSSadaf Ebrahimi 				frame_size -= SSIZE_OF(sw);
1724*22dc650dSSadaf Ebrahimi 
1725*22dc650dSSadaf Ebrahimi 			if (reg_map[restored_reg] <= 7)
1726*22dc650dSSadaf Ebrahimi 				return push_inst16(compiler, STR_SP | 0x800 | RDN3(restored_reg) | (sljit_ins)(frame_size >> 2));
1727*22dc650dSSadaf Ebrahimi 
1728*22dc650dSSadaf Ebrahimi 			return push_inst32(compiler, LDR | RT4(restored_reg) | RN4(SLJIT_SP) | (sljit_ins)frame_size);
1729*22dc650dSSadaf Ebrahimi 		}
1730*22dc650dSSadaf Ebrahimi 
1731*22dc650dSSadaf Ebrahimi 		tmp = (restored_reg == TMP_REG2) ? 0x304 : 0x308;
1732*22dc650dSSadaf Ebrahimi 		return push_inst32(compiler, LDRI | RT4(restored_reg) | RN4(SLJIT_SP) | (sljit_ins)tmp);
1733*22dc650dSSadaf Ebrahimi 	}
1734*22dc650dSSadaf Ebrahimi 
1735*22dc650dSSadaf Ebrahimi 	if (local_size > 0)
1736*22dc650dSSadaf Ebrahimi 		FAIL_IF(emit_add_sp(compiler, (sljit_uw)local_size));
1737*22dc650dSSadaf Ebrahimi 
1738*22dc650dSSadaf Ebrahimi 	if (!(reg_list & 0xff00) && lr_dst != TMP_REG2) {
1739*22dc650dSSadaf Ebrahimi 		if (lr_dst == TMP_PC)
1740*22dc650dSSadaf Ebrahimi 			reg_list |= 1u << 8;
1741*22dc650dSSadaf Ebrahimi 
1742*22dc650dSSadaf Ebrahimi 		/* At least one register must be set for POP instruction. */
1743*22dc650dSSadaf Ebrahimi 		SLJIT_ASSERT(reg_list != 0);
1744*22dc650dSSadaf Ebrahimi 
1745*22dc650dSSadaf Ebrahimi 		FAIL_IF(push_inst16(compiler, POP | reg_list));
1746*22dc650dSSadaf Ebrahimi 	} else {
1747*22dc650dSSadaf Ebrahimi 		if (lr_dst != 0)
1748*22dc650dSSadaf Ebrahimi 			reg_list |= (sljit_uw)1 << reg_map[lr_dst];
1749*22dc650dSSadaf Ebrahimi 
1750*22dc650dSSadaf Ebrahimi 		/* At least two registers must be set for POP_W instruction. */
1751*22dc650dSSadaf Ebrahimi 		SLJIT_ASSERT((reg_list & (reg_list - 1)) != 0);
1752*22dc650dSSadaf Ebrahimi 
1753*22dc650dSSadaf Ebrahimi 		FAIL_IF(push_inst32(compiler, POP_W | reg_list));
1754*22dc650dSSadaf Ebrahimi 	}
1755*22dc650dSSadaf Ebrahimi 
1756*22dc650dSSadaf Ebrahimi 	if (frame_size > 0)
1757*22dc650dSSadaf Ebrahimi 		return push_inst16(compiler, SUB_SP_I | (((sljit_ins)frame_size - sizeof(sljit_sw)) >> 2));
1758*22dc650dSSadaf Ebrahimi 
1759*22dc650dSSadaf Ebrahimi 	if (lr_dst != 0)
1760*22dc650dSSadaf Ebrahimi 		return SLJIT_SUCCESS;
1761*22dc650dSSadaf Ebrahimi 
1762*22dc650dSSadaf Ebrahimi 	return push_inst16(compiler, ADD_SP_I | 1);
1763*22dc650dSSadaf Ebrahimi }
1764*22dc650dSSadaf Ebrahimi 
sljit_emit_return_void(struct sljit_compiler * compiler)1765*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler *compiler)
1766*22dc650dSSadaf Ebrahimi {
1767*22dc650dSSadaf Ebrahimi 	CHECK_ERROR();
1768*22dc650dSSadaf Ebrahimi 	CHECK(check_sljit_emit_return_void(compiler));
1769*22dc650dSSadaf Ebrahimi 
1770*22dc650dSSadaf Ebrahimi 	return emit_stack_frame_release(compiler, 0);
1771*22dc650dSSadaf Ebrahimi }
1772*22dc650dSSadaf Ebrahimi 
sljit_emit_return_to(struct sljit_compiler * compiler,sljit_s32 src,sljit_sw srcw)1773*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_to(struct sljit_compiler *compiler,
1774*22dc650dSSadaf Ebrahimi 	sljit_s32 src, sljit_sw srcw)
1775*22dc650dSSadaf Ebrahimi {
1776*22dc650dSSadaf Ebrahimi 	CHECK_ERROR();
1777*22dc650dSSadaf Ebrahimi 	CHECK(check_sljit_emit_return_to(compiler, src, srcw));
1778*22dc650dSSadaf Ebrahimi 
1779*22dc650dSSadaf Ebrahimi 	if (src & SLJIT_MEM) {
1780*22dc650dSSadaf Ebrahimi 		FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG1, src, srcw, TMP_REG1));
1781*22dc650dSSadaf Ebrahimi 		src = TMP_REG1;
1782*22dc650dSSadaf Ebrahimi 		srcw = 0;
1783*22dc650dSSadaf Ebrahimi 	} else if (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options))) {
1784*22dc650dSSadaf Ebrahimi 		FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(TMP_REG1, src)));
1785*22dc650dSSadaf Ebrahimi 		src = TMP_REG1;
1786*22dc650dSSadaf Ebrahimi 		srcw = 0;
1787*22dc650dSSadaf Ebrahimi 	}
1788*22dc650dSSadaf Ebrahimi 
1789*22dc650dSSadaf Ebrahimi 	FAIL_IF(emit_stack_frame_release(compiler, 1));
1790*22dc650dSSadaf Ebrahimi 
1791*22dc650dSSadaf Ebrahimi 	SLJIT_SKIP_CHECKS(compiler);
1792*22dc650dSSadaf Ebrahimi 	return sljit_emit_ijump(compiler, SLJIT_JUMP, src, srcw);
1793*22dc650dSSadaf Ebrahimi }
1794*22dc650dSSadaf Ebrahimi 
1795*22dc650dSSadaf Ebrahimi /* --------------------------------------------------------------------- */
1796*22dc650dSSadaf Ebrahimi /*  Operators                                                            */
1797*22dc650dSSadaf Ebrahimi /* --------------------------------------------------------------------- */
1798*22dc650dSSadaf Ebrahimi 
1799*22dc650dSSadaf Ebrahimi #if !(defined __ARM_FEATURE_IDIV) && !(defined __ARM_ARCH_EXT_IDIV__)
1800*22dc650dSSadaf Ebrahimi 
1801*22dc650dSSadaf Ebrahimi #ifdef __cplusplus
1802*22dc650dSSadaf Ebrahimi extern "C" {
1803*22dc650dSSadaf Ebrahimi #endif
1804*22dc650dSSadaf Ebrahimi 
1805*22dc650dSSadaf Ebrahimi #ifdef _WIN32
1806*22dc650dSSadaf Ebrahimi extern unsigned long long __rt_udiv(unsigned int denominator, unsigned int numerator);
1807*22dc650dSSadaf Ebrahimi extern long long __rt_sdiv(int denominator, int numerator);
1808*22dc650dSSadaf Ebrahimi #elif defined(__GNUC__)
1809*22dc650dSSadaf Ebrahimi extern unsigned int __aeabi_uidivmod(unsigned int numerator, int unsigned denominator);
1810*22dc650dSSadaf Ebrahimi extern int __aeabi_idivmod(int numerator, int denominator);
1811*22dc650dSSadaf Ebrahimi #else
1812*22dc650dSSadaf Ebrahimi #error "Software divmod functions are needed"
1813*22dc650dSSadaf Ebrahimi #endif
1814*22dc650dSSadaf Ebrahimi 
1815*22dc650dSSadaf Ebrahimi #ifdef __cplusplus
1816*22dc650dSSadaf Ebrahimi }
1817*22dc650dSSadaf Ebrahimi #endif
1818*22dc650dSSadaf Ebrahimi 
1819*22dc650dSSadaf Ebrahimi #endif /* !__ARM_FEATURE_IDIV && !__ARM_ARCH_EXT_IDIV__ */
1820*22dc650dSSadaf Ebrahimi 
sljit_emit_op0(struct sljit_compiler * compiler,sljit_s32 op)1821*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op)
1822*22dc650dSSadaf Ebrahimi {
1823*22dc650dSSadaf Ebrahimi #if !(defined __ARM_FEATURE_IDIV) && !(defined __ARM_ARCH_EXT_IDIV__)
1824*22dc650dSSadaf Ebrahimi 	sljit_uw saved_reg_list[3];
1825*22dc650dSSadaf Ebrahimi 	sljit_uw saved_reg_count;
1826*22dc650dSSadaf Ebrahimi #endif
1827*22dc650dSSadaf Ebrahimi 
1828*22dc650dSSadaf Ebrahimi 	CHECK_ERROR();
1829*22dc650dSSadaf Ebrahimi 	CHECK(check_sljit_emit_op0(compiler, op));
1830*22dc650dSSadaf Ebrahimi 
1831*22dc650dSSadaf Ebrahimi 	op = GET_OPCODE(op);
1832*22dc650dSSadaf Ebrahimi 	switch (op) {
1833*22dc650dSSadaf Ebrahimi 	case SLJIT_BREAKPOINT:
1834*22dc650dSSadaf Ebrahimi 		return push_inst16(compiler, BKPT);
1835*22dc650dSSadaf Ebrahimi 	case SLJIT_NOP:
1836*22dc650dSSadaf Ebrahimi 		return push_inst16(compiler, NOP);
1837*22dc650dSSadaf Ebrahimi 	case SLJIT_LMUL_UW:
1838*22dc650dSSadaf Ebrahimi 	case SLJIT_LMUL_SW:
1839*22dc650dSSadaf Ebrahimi 		return push_inst32(compiler, (op == SLJIT_LMUL_UW ? UMULL : SMULL)
1840*22dc650dSSadaf Ebrahimi 			| RD4(SLJIT_R1) | RT4(SLJIT_R0) | RN4(SLJIT_R0) | RM4(SLJIT_R1));
1841*22dc650dSSadaf Ebrahimi #if (defined __ARM_FEATURE_IDIV) || (defined __ARM_ARCH_EXT_IDIV__)
1842*22dc650dSSadaf Ebrahimi 	case SLJIT_DIVMOD_UW:
1843*22dc650dSSadaf Ebrahimi 	case SLJIT_DIVMOD_SW:
1844*22dc650dSSadaf Ebrahimi 		FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(TMP_REG1, SLJIT_R0)));
1845*22dc650dSSadaf Ebrahimi 		FAIL_IF(push_inst32(compiler, (op == SLJIT_DIVMOD_UW ? UDIV : SDIV) | RD4(SLJIT_R0) | RN4(SLJIT_R0) | RM4(SLJIT_R1)));
1846*22dc650dSSadaf Ebrahimi 		FAIL_IF(push_inst32(compiler, MUL | RD4(SLJIT_R1) | RN4(SLJIT_R0) | RM4(SLJIT_R1)));
1847*22dc650dSSadaf Ebrahimi 		return push_inst32(compiler, SUB_W | RD4(SLJIT_R1) | RN4(TMP_REG1) | RM4(SLJIT_R1));
1848*22dc650dSSadaf Ebrahimi 	case SLJIT_DIV_UW:
1849*22dc650dSSadaf Ebrahimi 	case SLJIT_DIV_SW:
1850*22dc650dSSadaf Ebrahimi 		return push_inst32(compiler, (op == SLJIT_DIV_UW ? UDIV : SDIV) | RD4(SLJIT_R0) | RN4(SLJIT_R0) | RM4(SLJIT_R1));
1851*22dc650dSSadaf Ebrahimi #else /* !__ARM_FEATURE_IDIV && !__ARM_ARCH_EXT_IDIV__ */
1852*22dc650dSSadaf Ebrahimi 	case SLJIT_DIVMOD_UW:
1853*22dc650dSSadaf Ebrahimi 	case SLJIT_DIVMOD_SW:
1854*22dc650dSSadaf Ebrahimi 	case SLJIT_DIV_UW:
1855*22dc650dSSadaf Ebrahimi 	case SLJIT_DIV_SW:
1856*22dc650dSSadaf Ebrahimi 		SLJIT_COMPILE_ASSERT((SLJIT_DIVMOD_UW & 0x2) == 0 && SLJIT_DIV_UW - 0x2 == SLJIT_DIVMOD_UW, bad_div_opcode_assignments);
1857*22dc650dSSadaf Ebrahimi 		SLJIT_ASSERT(reg_map[2] == 1 && reg_map[3] == 2 && reg_map[4] == 3);
1858*22dc650dSSadaf Ebrahimi 
1859*22dc650dSSadaf Ebrahimi 		saved_reg_count = 0;
1860*22dc650dSSadaf Ebrahimi 		if (compiler->scratches >= 4)
1861*22dc650dSSadaf Ebrahimi 			saved_reg_list[saved_reg_count++] = 3;
1862*22dc650dSSadaf Ebrahimi 		if (compiler->scratches >= 3)
1863*22dc650dSSadaf Ebrahimi 			saved_reg_list[saved_reg_count++] = 2;
1864*22dc650dSSadaf Ebrahimi 		if (op >= SLJIT_DIV_UW)
1865*22dc650dSSadaf Ebrahimi 			saved_reg_list[saved_reg_count++] = 1;
1866*22dc650dSSadaf Ebrahimi 
1867*22dc650dSSadaf Ebrahimi 		if (saved_reg_count > 0) {
1868*22dc650dSSadaf Ebrahimi 			FAIL_IF(push_inst32(compiler, 0xf84d0d00 | (saved_reg_count >= 3 ? 16 : 8)
1869*22dc650dSSadaf Ebrahimi 						| (saved_reg_list[0] << 12) /* str rX, [sp, #-8/-16]! */));
1870*22dc650dSSadaf Ebrahimi 			if (saved_reg_count >= 2) {
1871*22dc650dSSadaf Ebrahimi 				SLJIT_ASSERT(saved_reg_list[1] < 8);
1872*22dc650dSSadaf Ebrahimi 				FAIL_IF(push_inst16(compiler, 0x9001 | (saved_reg_list[1] << 8) /* str rX, [sp, #4] */));
1873*22dc650dSSadaf Ebrahimi 			}
1874*22dc650dSSadaf Ebrahimi 			if (saved_reg_count >= 3) {
1875*22dc650dSSadaf Ebrahimi 				SLJIT_ASSERT(saved_reg_list[2] < 8);
1876*22dc650dSSadaf Ebrahimi 				FAIL_IF(push_inst16(compiler, 0x9002 | (saved_reg_list[2] << 8) /* str rX, [sp, #8] */));
1877*22dc650dSSadaf Ebrahimi 			}
1878*22dc650dSSadaf Ebrahimi 		}
1879*22dc650dSSadaf Ebrahimi 
1880*22dc650dSSadaf Ebrahimi #ifdef _WIN32
1881*22dc650dSSadaf Ebrahimi 		FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(TMP_REG1, SLJIT_R0)));
1882*22dc650dSSadaf Ebrahimi 		FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_R0, SLJIT_R1)));
1883*22dc650dSSadaf Ebrahimi 		FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_R1, TMP_REG1)));
1884*22dc650dSSadaf Ebrahimi 		FAIL_IF(sljit_emit_ijump(compiler, SLJIT_FAST_CALL, SLJIT_IMM,
1885*22dc650dSSadaf Ebrahimi 			((op | 0x2) == SLJIT_DIV_UW ? SLJIT_FUNC_ADDR(__rt_udiv) : SLJIT_FUNC_ADDR(__rt_sdiv))));
1886*22dc650dSSadaf Ebrahimi #elif defined(__GNUC__)
1887*22dc650dSSadaf Ebrahimi 		FAIL_IF(sljit_emit_ijump(compiler, SLJIT_FAST_CALL, SLJIT_IMM,
1888*22dc650dSSadaf Ebrahimi 			((op | 0x2) == SLJIT_DIV_UW ? SLJIT_FUNC_ADDR(__aeabi_uidivmod) : SLJIT_FUNC_ADDR(__aeabi_idivmod))));
1889*22dc650dSSadaf Ebrahimi #else
1890*22dc650dSSadaf Ebrahimi #error "Software divmod functions are needed"
1891*22dc650dSSadaf Ebrahimi #endif
1892*22dc650dSSadaf Ebrahimi 
1893*22dc650dSSadaf Ebrahimi 		if (saved_reg_count > 0) {
1894*22dc650dSSadaf Ebrahimi 			if (saved_reg_count >= 3) {
1895*22dc650dSSadaf Ebrahimi 				SLJIT_ASSERT(saved_reg_list[2] < 8);
1896*22dc650dSSadaf Ebrahimi 				FAIL_IF(push_inst16(compiler, 0x9802 | (saved_reg_list[2] << 8) /* ldr rX, [sp, #8] */));
1897*22dc650dSSadaf Ebrahimi 			}
1898*22dc650dSSadaf Ebrahimi 			if (saved_reg_count >= 2) {
1899*22dc650dSSadaf Ebrahimi 				SLJIT_ASSERT(saved_reg_list[1] < 8);
1900*22dc650dSSadaf Ebrahimi 				FAIL_IF(push_inst16(compiler, 0x9801 | (saved_reg_list[1] << 8) /* ldr rX, [sp, #4] */));
1901*22dc650dSSadaf Ebrahimi 			}
1902*22dc650dSSadaf Ebrahimi 			return push_inst32(compiler, 0xf85d0b00 | (saved_reg_count >= 3 ? 16 : 8)
1903*22dc650dSSadaf Ebrahimi 						| (saved_reg_list[0] << 12) /* ldr rX, [sp], #8/16 */);
1904*22dc650dSSadaf Ebrahimi 		}
1905*22dc650dSSadaf Ebrahimi 		return SLJIT_SUCCESS;
1906*22dc650dSSadaf Ebrahimi #endif /* __ARM_FEATURE_IDIV || __ARM_ARCH_EXT_IDIV__ */
1907*22dc650dSSadaf Ebrahimi 	case SLJIT_ENDBR:
1908*22dc650dSSadaf Ebrahimi 	case SLJIT_SKIP_FRAMES_BEFORE_RETURN:
1909*22dc650dSSadaf Ebrahimi 		return SLJIT_SUCCESS;
1910*22dc650dSSadaf Ebrahimi 	}
1911*22dc650dSSadaf Ebrahimi 
1912*22dc650dSSadaf Ebrahimi 	return SLJIT_SUCCESS;
1913*22dc650dSSadaf Ebrahimi }
1914*22dc650dSSadaf Ebrahimi 
sljit_emit_op1(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)1915*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op,
1916*22dc650dSSadaf Ebrahimi 	sljit_s32 dst, sljit_sw dstw,
1917*22dc650dSSadaf Ebrahimi 	sljit_s32 src, sljit_sw srcw)
1918*22dc650dSSadaf Ebrahimi {
1919*22dc650dSSadaf Ebrahimi 	sljit_s32 dst_r, flags;
1920*22dc650dSSadaf Ebrahimi 
1921*22dc650dSSadaf Ebrahimi 	CHECK_ERROR();
1922*22dc650dSSadaf Ebrahimi 	CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw));
1923*22dc650dSSadaf Ebrahimi 	ADJUST_LOCAL_OFFSET(dst, dstw);
1924*22dc650dSSadaf Ebrahimi 	ADJUST_LOCAL_OFFSET(src, srcw);
1925*22dc650dSSadaf Ebrahimi 
1926*22dc650dSSadaf Ebrahimi 	dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;
1927*22dc650dSSadaf Ebrahimi 
1928*22dc650dSSadaf Ebrahimi 	op = GET_OPCODE(op);
1929*22dc650dSSadaf Ebrahimi 	if (op >= SLJIT_MOV && op <= SLJIT_MOV_P) {
1930*22dc650dSSadaf Ebrahimi 		switch (op) {
1931*22dc650dSSadaf Ebrahimi 		case SLJIT_MOV:
1932*22dc650dSSadaf Ebrahimi 		case SLJIT_MOV_U32:
1933*22dc650dSSadaf Ebrahimi 		case SLJIT_MOV_S32:
1934*22dc650dSSadaf Ebrahimi 		case SLJIT_MOV32:
1935*22dc650dSSadaf Ebrahimi 		case SLJIT_MOV_P:
1936*22dc650dSSadaf Ebrahimi 			flags = WORD_SIZE;
1937*22dc650dSSadaf Ebrahimi 			break;
1938*22dc650dSSadaf Ebrahimi 		case SLJIT_MOV_U8:
1939*22dc650dSSadaf Ebrahimi 			flags = BYTE_SIZE;
1940*22dc650dSSadaf Ebrahimi 			if (src == SLJIT_IMM)
1941*22dc650dSSadaf Ebrahimi 				srcw = (sljit_u8)srcw;
1942*22dc650dSSadaf Ebrahimi 			break;
1943*22dc650dSSadaf Ebrahimi 		case SLJIT_MOV_S8:
1944*22dc650dSSadaf Ebrahimi 			flags = BYTE_SIZE | SIGNED;
1945*22dc650dSSadaf Ebrahimi 			if (src == SLJIT_IMM)
1946*22dc650dSSadaf Ebrahimi 				srcw = (sljit_s8)srcw;
1947*22dc650dSSadaf Ebrahimi 			break;
1948*22dc650dSSadaf Ebrahimi 		case SLJIT_MOV_U16:
1949*22dc650dSSadaf Ebrahimi 			flags = HALF_SIZE;
1950*22dc650dSSadaf Ebrahimi 			if (src == SLJIT_IMM)
1951*22dc650dSSadaf Ebrahimi 				srcw = (sljit_u16)srcw;
1952*22dc650dSSadaf Ebrahimi 			break;
1953*22dc650dSSadaf Ebrahimi 		case SLJIT_MOV_S16:
1954*22dc650dSSadaf Ebrahimi 			flags = HALF_SIZE | SIGNED;
1955*22dc650dSSadaf Ebrahimi 			if (src == SLJIT_IMM)
1956*22dc650dSSadaf Ebrahimi 				srcw = (sljit_s16)srcw;
1957*22dc650dSSadaf Ebrahimi 			break;
1958*22dc650dSSadaf Ebrahimi 		default:
1959*22dc650dSSadaf Ebrahimi 			SLJIT_UNREACHABLE();
1960*22dc650dSSadaf Ebrahimi 			flags = 0;
1961*22dc650dSSadaf Ebrahimi 			break;
1962*22dc650dSSadaf Ebrahimi 		}
1963*22dc650dSSadaf Ebrahimi 
1964*22dc650dSSadaf Ebrahimi 		if (src == SLJIT_IMM)
1965*22dc650dSSadaf Ebrahimi 			FAIL_IF(emit_op_imm(compiler, SLJIT_MOV | ARG2_IMM, dst_r, TMP_REG2, (sljit_uw)srcw));
1966*22dc650dSSadaf Ebrahimi 		else if (src & SLJIT_MEM)
1967*22dc650dSSadaf Ebrahimi 			FAIL_IF(emit_op_mem(compiler, flags, dst_r, src, srcw, TMP_REG1));
1968*22dc650dSSadaf Ebrahimi 		else if (FAST_IS_REG(dst))
1969*22dc650dSSadaf Ebrahimi 			return emit_op_imm(compiler, op, dst_r, TMP_REG2, (sljit_uw)src);
1970*22dc650dSSadaf Ebrahimi 		else
1971*22dc650dSSadaf Ebrahimi 			dst_r = src;
1972*22dc650dSSadaf Ebrahimi 
1973*22dc650dSSadaf Ebrahimi 		if (!(dst & SLJIT_MEM))
1974*22dc650dSSadaf Ebrahimi 			return SLJIT_SUCCESS;
1975*22dc650dSSadaf Ebrahimi 
1976*22dc650dSSadaf Ebrahimi 		return emit_op_mem(compiler, flags | STORE, dst_r, dst, dstw, TMP_REG1);
1977*22dc650dSSadaf Ebrahimi 	}
1978*22dc650dSSadaf Ebrahimi 
1979*22dc650dSSadaf Ebrahimi 	SLJIT_COMPILE_ASSERT(WORD_SIZE == 0, word_size_must_be_0);
1980*22dc650dSSadaf Ebrahimi 	flags = WORD_SIZE;
1981*22dc650dSSadaf Ebrahimi 
1982*22dc650dSSadaf Ebrahimi 	if (op == SLJIT_REV_U16 || op == SLJIT_REV_S16) {
1983*22dc650dSSadaf Ebrahimi 		if (!(dst & SLJIT_MEM) && (!(src & SLJIT_MEM) || op == SLJIT_REV_S16))
1984*22dc650dSSadaf Ebrahimi 			op |= REGISTER_OP;
1985*22dc650dSSadaf Ebrahimi 		flags |= HALF_SIZE;
1986*22dc650dSSadaf Ebrahimi 	}
1987*22dc650dSSadaf Ebrahimi 
1988*22dc650dSSadaf Ebrahimi 	if (src & SLJIT_MEM) {
1989*22dc650dSSadaf Ebrahimi 		FAIL_IF(emit_op_mem(compiler, flags, TMP_REG1, src, srcw, TMP_REG1));
1990*22dc650dSSadaf Ebrahimi 		src = TMP_REG1;
1991*22dc650dSSadaf Ebrahimi 	}
1992*22dc650dSSadaf Ebrahimi 
1993*22dc650dSSadaf Ebrahimi 	emit_op_imm(compiler, op, dst_r, TMP_REG2, (sljit_uw)src);
1994*22dc650dSSadaf Ebrahimi 
1995*22dc650dSSadaf Ebrahimi 	if (SLJIT_UNLIKELY(dst & SLJIT_MEM))
1996*22dc650dSSadaf Ebrahimi 		return emit_op_mem(compiler, flags | STORE, dst_r, dst, dstw, TMP_REG1);
1997*22dc650dSSadaf Ebrahimi 	return SLJIT_SUCCESS;
1998*22dc650dSSadaf Ebrahimi }
1999*22dc650dSSadaf Ebrahimi 
sljit_emit_op2(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2000*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op,
2001*22dc650dSSadaf Ebrahimi 	sljit_s32 dst, sljit_sw dstw,
2002*22dc650dSSadaf Ebrahimi 	sljit_s32 src1, sljit_sw src1w,
2003*22dc650dSSadaf Ebrahimi 	sljit_s32 src2, sljit_sw src2w)
2004*22dc650dSSadaf Ebrahimi {
2005*22dc650dSSadaf Ebrahimi 	sljit_s32 dst_reg, src2_tmp_reg, flags;
2006*22dc650dSSadaf Ebrahimi 
2007*22dc650dSSadaf Ebrahimi 	CHECK_ERROR();
2008*22dc650dSSadaf Ebrahimi 	CHECK(check_sljit_emit_op2(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w));
2009*22dc650dSSadaf Ebrahimi 	ADJUST_LOCAL_OFFSET(dst, dstw);
2010*22dc650dSSadaf Ebrahimi 	ADJUST_LOCAL_OFFSET(src1, src1w);
2011*22dc650dSSadaf Ebrahimi 	ADJUST_LOCAL_OFFSET(src2, src2w);
2012*22dc650dSSadaf Ebrahimi 
2013*22dc650dSSadaf Ebrahimi 	dst_reg = FAST_IS_REG(dst) ? dst : TMP_REG2;
2014*22dc650dSSadaf Ebrahimi 	flags = HAS_FLAGS(op) ? SET_FLAGS : 0;
2015*22dc650dSSadaf Ebrahimi 
2016*22dc650dSSadaf Ebrahimi 	if (dst == TMP_REG1)
2017*22dc650dSSadaf Ebrahimi 		flags |= UNUSED_RETURN;
2018*22dc650dSSadaf Ebrahimi 
2019*22dc650dSSadaf Ebrahimi 	if (src2 == SLJIT_IMM)
2020*22dc650dSSadaf Ebrahimi 		flags |= ARG2_IMM;
2021*22dc650dSSadaf Ebrahimi 	else if (src2 & SLJIT_MEM) {
2022*22dc650dSSadaf Ebrahimi 		src2_tmp_reg = FAST_IS_REG(src1) ? TMP_REG1 : TMP_REG2;
2023*22dc650dSSadaf Ebrahimi 		emit_op_mem(compiler, WORD_SIZE, src2_tmp_reg, src2, src2w, TMP_REG1);
2024*22dc650dSSadaf Ebrahimi 		src2w = src2_tmp_reg;
2025*22dc650dSSadaf Ebrahimi 	} else
2026*22dc650dSSadaf Ebrahimi 		src2w = src2;
2027*22dc650dSSadaf Ebrahimi 
2028*22dc650dSSadaf Ebrahimi 	if (src1 == SLJIT_IMM)
2029*22dc650dSSadaf Ebrahimi 		flags |= ARG1_IMM;
2030*22dc650dSSadaf Ebrahimi 	else if (src1 & SLJIT_MEM) {
2031*22dc650dSSadaf Ebrahimi 		emit_op_mem(compiler, WORD_SIZE, TMP_REG1, src1, src1w, TMP_REG1);
2032*22dc650dSSadaf Ebrahimi 		src1w = TMP_REG1;
2033*22dc650dSSadaf Ebrahimi 	} else
2034*22dc650dSSadaf Ebrahimi 		src1w = src1;
2035*22dc650dSSadaf Ebrahimi 
2036*22dc650dSSadaf Ebrahimi 	emit_op_imm(compiler, flags | GET_OPCODE(op), dst_reg, (sljit_uw)src1w, (sljit_uw)src2w);
2037*22dc650dSSadaf Ebrahimi 
2038*22dc650dSSadaf Ebrahimi 	if (!(dst & SLJIT_MEM))
2039*22dc650dSSadaf Ebrahimi 		return SLJIT_SUCCESS;
2040*22dc650dSSadaf Ebrahimi 	return emit_op_mem(compiler, WORD_SIZE | STORE, dst_reg, dst, dstw, TMP_REG1);
2041*22dc650dSSadaf Ebrahimi }
2042*22dc650dSSadaf Ebrahimi 
sljit_emit_op2u(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2043*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compiler, sljit_s32 op,
2044*22dc650dSSadaf Ebrahimi 	sljit_s32 src1, sljit_sw src1w,
2045*22dc650dSSadaf Ebrahimi 	sljit_s32 src2, sljit_sw src2w)
2046*22dc650dSSadaf Ebrahimi {
2047*22dc650dSSadaf Ebrahimi 	CHECK_ERROR();
2048*22dc650dSSadaf Ebrahimi 	CHECK(check_sljit_emit_op2(compiler, op, 1, 0, 0, src1, src1w, src2, src2w));
2049*22dc650dSSadaf Ebrahimi 
2050*22dc650dSSadaf Ebrahimi 	SLJIT_SKIP_CHECKS(compiler);
2051*22dc650dSSadaf Ebrahimi 	return sljit_emit_op2(compiler, op, TMP_REG1, 0, src1, src1w, src2, src2w);
2052*22dc650dSSadaf Ebrahimi }
2053*22dc650dSSadaf Ebrahimi 
sljit_emit_op2r(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst_reg,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2054*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2r(struct sljit_compiler *compiler, sljit_s32 op,
2055*22dc650dSSadaf Ebrahimi 	sljit_s32 dst_reg,
2056*22dc650dSSadaf Ebrahimi 	sljit_s32 src1, sljit_sw src1w,
2057*22dc650dSSadaf Ebrahimi 	sljit_s32 src2, sljit_sw src2w)
2058*22dc650dSSadaf Ebrahimi {
2059*22dc650dSSadaf Ebrahimi 	CHECK_ERROR();
2060*22dc650dSSadaf Ebrahimi 	CHECK(check_sljit_emit_op2r(compiler, op, dst_reg, src1, src1w, src2, src2w));
2061*22dc650dSSadaf Ebrahimi 
2062*22dc650dSSadaf Ebrahimi 	switch (GET_OPCODE(op)) {
2063*22dc650dSSadaf Ebrahimi 	case SLJIT_MULADD:
2064*22dc650dSSadaf Ebrahimi 		SLJIT_SKIP_CHECKS(compiler);
2065*22dc650dSSadaf Ebrahimi 		return sljit_emit_op2(compiler, op, dst_reg, 0, src1, src1w, src2, src2w);
2066*22dc650dSSadaf Ebrahimi 	}
2067*22dc650dSSadaf Ebrahimi 
2068*22dc650dSSadaf Ebrahimi 	return SLJIT_SUCCESS;
2069*22dc650dSSadaf Ebrahimi }
2070*22dc650dSSadaf Ebrahimi 
sljit_emit_shift_into(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst_reg,sljit_s32 src1_reg,sljit_s32 src2_reg,sljit_s32 src3,sljit_sw src3w)2071*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler *compiler, sljit_s32 op,
2072*22dc650dSSadaf Ebrahimi 	sljit_s32 dst_reg,
2073*22dc650dSSadaf Ebrahimi 	sljit_s32 src1_reg,
2074*22dc650dSSadaf Ebrahimi 	sljit_s32 src2_reg,
2075*22dc650dSSadaf Ebrahimi 	sljit_s32 src3, sljit_sw src3w)
2076*22dc650dSSadaf Ebrahimi {
2077*22dc650dSSadaf Ebrahimi 	sljit_s32 is_left;
2078*22dc650dSSadaf Ebrahimi 
2079*22dc650dSSadaf Ebrahimi 	CHECK_ERROR();
2080*22dc650dSSadaf Ebrahimi 	CHECK(check_sljit_emit_shift_into(compiler, op, dst_reg, src1_reg, src2_reg, src3, src3w));
2081*22dc650dSSadaf Ebrahimi 
2082*22dc650dSSadaf Ebrahimi 	op = GET_OPCODE(op);
2083*22dc650dSSadaf Ebrahimi 	is_left = (op == SLJIT_SHL || op == SLJIT_MSHL);
2084*22dc650dSSadaf Ebrahimi 
2085*22dc650dSSadaf Ebrahimi 	if (src1_reg == src2_reg) {
2086*22dc650dSSadaf Ebrahimi 		SLJIT_SKIP_CHECKS(compiler);
2087*22dc650dSSadaf Ebrahimi 		return sljit_emit_op2(compiler, is_left ? SLJIT_ROTL : SLJIT_ROTR, dst_reg, 0, src1_reg, 0, src3, src3w);
2088*22dc650dSSadaf Ebrahimi 	}
2089*22dc650dSSadaf Ebrahimi 
2090*22dc650dSSadaf Ebrahimi 	ADJUST_LOCAL_OFFSET(src3, src3w);
2091*22dc650dSSadaf Ebrahimi 
2092*22dc650dSSadaf Ebrahimi 	if (src3 == SLJIT_IMM) {
2093*22dc650dSSadaf Ebrahimi 		src3w &= 0x1f;
2094*22dc650dSSadaf Ebrahimi 
2095*22dc650dSSadaf Ebrahimi 		if (src3w == 0)
2096*22dc650dSSadaf Ebrahimi 			return SLJIT_SUCCESS;
2097*22dc650dSSadaf Ebrahimi 
2098*22dc650dSSadaf Ebrahimi 		if (IS_2_LO_REGS(dst_reg, src1_reg))
2099*22dc650dSSadaf Ebrahimi 			FAIL_IF(push_inst16(compiler, (is_left ? LSLSI : LSRSI) | RD3(dst_reg) | RN3(src1_reg) | ((sljit_ins)src3w << 6)));
2100*22dc650dSSadaf Ebrahimi 		else
2101*22dc650dSSadaf Ebrahimi 			FAIL_IF(push_inst32(compiler, (is_left ? LSL_WI : LSR_WI) | RD4(dst_reg) | RM4(src1_reg) | IMM5(src3w)));
2102*22dc650dSSadaf Ebrahimi 
2103*22dc650dSSadaf Ebrahimi 		src3w = (src3w ^ 0x1f) + 1;
2104*22dc650dSSadaf Ebrahimi 		return push_inst32(compiler, ORR_W | RD4(dst_reg) | RN4(dst_reg) | RM4(src2_reg) | (is_left ? 0x10 : 0x0) | IMM5(src3w));
2105*22dc650dSSadaf Ebrahimi 	}
2106*22dc650dSSadaf Ebrahimi 
2107*22dc650dSSadaf Ebrahimi 	if (src3 & SLJIT_MEM) {
2108*22dc650dSSadaf Ebrahimi 		FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG2, src3, src3w, TMP_REG2));
2109*22dc650dSSadaf Ebrahimi 		src3 = TMP_REG2;
2110*22dc650dSSadaf Ebrahimi 	}
2111*22dc650dSSadaf Ebrahimi 
2112*22dc650dSSadaf Ebrahimi 	if (op == SLJIT_MSHL || op == SLJIT_MLSHR || dst_reg == src3) {
2113*22dc650dSSadaf Ebrahimi 		FAIL_IF(push_inst32(compiler, ANDI | RD4(TMP_REG2) | RN4(src3) | 0x1f));
2114*22dc650dSSadaf Ebrahimi 		src3 = TMP_REG2;
2115*22dc650dSSadaf Ebrahimi 	}
2116*22dc650dSSadaf Ebrahimi 
2117*22dc650dSSadaf Ebrahimi 	if (dst_reg == src1_reg && IS_2_LO_REGS(dst_reg, src3))
2118*22dc650dSSadaf Ebrahimi 		FAIL_IF(push_inst16(compiler, (is_left ? LSLS : LSRS) | RD3(dst_reg) | RN3(src3)));
2119*22dc650dSSadaf Ebrahimi 	else
2120*22dc650dSSadaf Ebrahimi 		FAIL_IF(push_inst32(compiler, (is_left ? LSL_W : LSR_W) | RD4(dst_reg) | RN4(src1_reg) | RM4(src3)));
2121*22dc650dSSadaf Ebrahimi 
2122*22dc650dSSadaf Ebrahimi 	FAIL_IF(push_inst32(compiler, (is_left ? LSR_WI : LSL_WI) | RD4(TMP_REG1) | RM4(src2_reg) | (1 << 6)));
2123*22dc650dSSadaf Ebrahimi 	FAIL_IF(push_inst32(compiler, EORI | RD4(TMP_REG2) | RN4(src3) | 0x1f));
2124*22dc650dSSadaf Ebrahimi 	FAIL_IF(push_inst32(compiler, (is_left ? LSR_W : LSL_W) | RD4(TMP_REG1) | RN4(TMP_REG1) | RM4(TMP_REG2)));
2125*22dc650dSSadaf Ebrahimi 	return push_inst32(compiler, ORR_W | RD4(dst_reg) | RN4(dst_reg) | RM4(TMP_REG1));
2126*22dc650dSSadaf Ebrahimi }
2127*22dc650dSSadaf Ebrahimi 
sljit_emit_op_src(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 src,sljit_sw srcw)2128*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op,
2129*22dc650dSSadaf Ebrahimi 	sljit_s32 src, sljit_sw srcw)
2130*22dc650dSSadaf Ebrahimi {
2131*22dc650dSSadaf Ebrahimi 	CHECK_ERROR();
2132*22dc650dSSadaf Ebrahimi 	CHECK(check_sljit_emit_op_src(compiler, op, src, srcw));
2133*22dc650dSSadaf Ebrahimi 	ADJUST_LOCAL_OFFSET(src, srcw);
2134*22dc650dSSadaf Ebrahimi 
2135*22dc650dSSadaf Ebrahimi 	switch (op) {
2136*22dc650dSSadaf Ebrahimi 	case SLJIT_FAST_RETURN:
2137*22dc650dSSadaf Ebrahimi 		SLJIT_ASSERT(reg_map[TMP_REG2] == 14);
2138*22dc650dSSadaf Ebrahimi 
2139*22dc650dSSadaf Ebrahimi 		if (FAST_IS_REG(src))
2140*22dc650dSSadaf Ebrahimi 			FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(TMP_REG2, src)));
2141*22dc650dSSadaf Ebrahimi 		else
2142*22dc650dSSadaf Ebrahimi 			FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG2, src, srcw, TMP_REG2));
2143*22dc650dSSadaf Ebrahimi 
2144*22dc650dSSadaf Ebrahimi 		return push_inst16(compiler, BX | RN3(TMP_REG2));
2145*22dc650dSSadaf Ebrahimi 	case SLJIT_SKIP_FRAMES_BEFORE_FAST_RETURN:
2146*22dc650dSSadaf Ebrahimi 		return SLJIT_SUCCESS;
2147*22dc650dSSadaf Ebrahimi 	case SLJIT_PREFETCH_L1:
2148*22dc650dSSadaf Ebrahimi 	case SLJIT_PREFETCH_L2:
2149*22dc650dSSadaf Ebrahimi 	case SLJIT_PREFETCH_L3:
2150*22dc650dSSadaf Ebrahimi 	case SLJIT_PREFETCH_ONCE:
2151*22dc650dSSadaf Ebrahimi 		return emit_op_mem(compiler, PRELOAD, TMP_PC, src, srcw, TMP_REG1);
2152*22dc650dSSadaf Ebrahimi 	}
2153*22dc650dSSadaf Ebrahimi 
2154*22dc650dSSadaf Ebrahimi 	return SLJIT_SUCCESS;
2155*22dc650dSSadaf Ebrahimi }
2156*22dc650dSSadaf Ebrahimi 
sljit_emit_op_dst(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw)2157*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_dst(struct sljit_compiler *compiler, sljit_s32 op,
2158*22dc650dSSadaf Ebrahimi 	sljit_s32 dst, sljit_sw dstw)
2159*22dc650dSSadaf Ebrahimi {
2160*22dc650dSSadaf Ebrahimi 	sljit_s32 size, dst_r;
2161*22dc650dSSadaf Ebrahimi 
2162*22dc650dSSadaf Ebrahimi 	CHECK_ERROR();
2163*22dc650dSSadaf Ebrahimi 	CHECK(check_sljit_emit_op_dst(compiler, op, dst, dstw));
2164*22dc650dSSadaf Ebrahimi 	ADJUST_LOCAL_OFFSET(dst, dstw);
2165*22dc650dSSadaf Ebrahimi 
2166*22dc650dSSadaf Ebrahimi 	switch (op) {
2167*22dc650dSSadaf Ebrahimi 	case SLJIT_FAST_ENTER:
2168*22dc650dSSadaf Ebrahimi 		SLJIT_ASSERT(reg_map[TMP_REG2] == 14);
2169*22dc650dSSadaf Ebrahimi 
2170*22dc650dSSadaf Ebrahimi 		if (FAST_IS_REG(dst))
2171*22dc650dSSadaf Ebrahimi 			return push_inst16(compiler, MOV | SET_REGS44(dst, TMP_REG2));
2172*22dc650dSSadaf Ebrahimi 		break;
2173*22dc650dSSadaf Ebrahimi 	case SLJIT_GET_RETURN_ADDRESS:
2174*22dc650dSSadaf Ebrahimi 		size = GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds - SLJIT_KEPT_SAVEDS_COUNT(compiler->options), 0);
2175*22dc650dSSadaf Ebrahimi 
2176*22dc650dSSadaf Ebrahimi 		if (compiler->fsaveds > 0 || compiler->fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG) {
2177*22dc650dSSadaf Ebrahimi 			/* The size of pc is not added above. */
2178*22dc650dSSadaf Ebrahimi 			if ((size & SSIZE_OF(sw)) == 0)
2179*22dc650dSSadaf Ebrahimi 				size += SSIZE_OF(sw);
2180*22dc650dSSadaf Ebrahimi 
2181*22dc650dSSadaf Ebrahimi 			size += GET_SAVED_FLOAT_REGISTERS_SIZE(compiler->fscratches, compiler->fsaveds, f64);
2182*22dc650dSSadaf Ebrahimi 		}
2183*22dc650dSSadaf Ebrahimi 
2184*22dc650dSSadaf Ebrahimi 		SLJIT_ASSERT(((compiler->local_size + size + SSIZE_OF(sw)) & 0x7) == 0);
2185*22dc650dSSadaf Ebrahimi 
2186*22dc650dSSadaf Ebrahimi 		dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;
2187*22dc650dSSadaf Ebrahimi 		FAIL_IF(emit_op_mem(compiler, WORD_SIZE, dst_r, SLJIT_MEM1(SLJIT_SP), compiler->local_size + size, TMP_REG1));
2188*22dc650dSSadaf Ebrahimi 		break;
2189*22dc650dSSadaf Ebrahimi 	}
2190*22dc650dSSadaf Ebrahimi 
2191*22dc650dSSadaf Ebrahimi 	if (dst & SLJIT_MEM)
2192*22dc650dSSadaf Ebrahimi 		return emit_op_mem(compiler, WORD_SIZE | STORE, TMP_REG2, dst, dstw, TMP_REG1);
2193*22dc650dSSadaf Ebrahimi 
2194*22dc650dSSadaf Ebrahimi 	return SLJIT_SUCCESS;
2195*22dc650dSSadaf Ebrahimi }
2196*22dc650dSSadaf Ebrahimi 
sljit_get_register_index(sljit_s32 type,sljit_s32 reg)2197*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 type, sljit_s32 reg)
2198*22dc650dSSadaf Ebrahimi {
2199*22dc650dSSadaf Ebrahimi 	CHECK_REG_INDEX(check_sljit_get_register_index(type, reg));
2200*22dc650dSSadaf Ebrahimi 
2201*22dc650dSSadaf Ebrahimi 	if (type == SLJIT_GP_REGISTER)
2202*22dc650dSSadaf Ebrahimi 		return reg_map[reg];
2203*22dc650dSSadaf Ebrahimi 
2204*22dc650dSSadaf Ebrahimi 	if (type == SLJIT_FLOAT_REGISTER || type == SLJIT_SIMD_REG_64)
2205*22dc650dSSadaf Ebrahimi 		return freg_map[reg];
2206*22dc650dSSadaf Ebrahimi 
2207*22dc650dSSadaf Ebrahimi 	if (type != SLJIT_SIMD_REG_128)
2208*22dc650dSSadaf Ebrahimi 		return freg_map[reg] & ~0x1;
2209*22dc650dSSadaf Ebrahimi 
2210*22dc650dSSadaf Ebrahimi 	return -1;
2211*22dc650dSSadaf Ebrahimi }
2212*22dc650dSSadaf Ebrahimi 
sljit_emit_op_custom(struct sljit_compiler * compiler,void * instruction,sljit_u32 size)2213*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler,
2214*22dc650dSSadaf Ebrahimi 	void *instruction, sljit_u32 size)
2215*22dc650dSSadaf Ebrahimi {
2216*22dc650dSSadaf Ebrahimi 	CHECK_ERROR();
2217*22dc650dSSadaf Ebrahimi 	CHECK(check_sljit_emit_op_custom(compiler, instruction, size));
2218*22dc650dSSadaf Ebrahimi 
2219*22dc650dSSadaf Ebrahimi 	if (size == 2)
2220*22dc650dSSadaf Ebrahimi 		return push_inst16(compiler, *(sljit_u16*)instruction);
2221*22dc650dSSadaf Ebrahimi 	return push_inst32(compiler, *(sljit_ins*)instruction);
2222*22dc650dSSadaf Ebrahimi }
2223*22dc650dSSadaf Ebrahimi 
2224*22dc650dSSadaf Ebrahimi /* --------------------------------------------------------------------- */
2225*22dc650dSSadaf Ebrahimi /*  Floating point operators                                             */
2226*22dc650dSSadaf Ebrahimi /* --------------------------------------------------------------------- */
2227*22dc650dSSadaf Ebrahimi 
2228*22dc650dSSadaf Ebrahimi #define FPU_LOAD (1 << 20)
2229*22dc650dSSadaf Ebrahimi 
emit_fop_mem(struct sljit_compiler * compiler,sljit_s32 flags,sljit_s32 reg,sljit_s32 arg,sljit_sw argw)2230*22dc650dSSadaf Ebrahimi static sljit_s32 emit_fop_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw)
2231*22dc650dSSadaf Ebrahimi {
2232*22dc650dSSadaf Ebrahimi 	sljit_uw imm;
2233*22dc650dSSadaf Ebrahimi 	sljit_ins inst = VSTR_F32 | (flags & (SLJIT_32 | FPU_LOAD));
2234*22dc650dSSadaf Ebrahimi 
2235*22dc650dSSadaf Ebrahimi 	SLJIT_ASSERT(arg & SLJIT_MEM);
2236*22dc650dSSadaf Ebrahimi 
2237*22dc650dSSadaf Ebrahimi 	/* Fast loads and stores. */
2238*22dc650dSSadaf Ebrahimi 	if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) {
2239*22dc650dSSadaf Ebrahimi 		FAIL_IF(push_inst32(compiler, ADD_W | RD4(TMP_REG1) | RN4(arg & REG_MASK) | RM4(OFFS_REG(arg)) | (((sljit_uw)argw & 0x3) << 6)));
2240*22dc650dSSadaf Ebrahimi 		arg = SLJIT_MEM | TMP_REG1;
2241*22dc650dSSadaf Ebrahimi 		argw = 0;
2242*22dc650dSSadaf Ebrahimi 	}
2243*22dc650dSSadaf Ebrahimi 
2244*22dc650dSSadaf Ebrahimi 	if ((arg & REG_MASK) && (argw & 0x3) == 0) {
2245*22dc650dSSadaf Ebrahimi 		if (!(argw & ~0x3fc))
2246*22dc650dSSadaf Ebrahimi 			return push_inst32(compiler, inst | 0x800000 | RN4(arg & REG_MASK) | VD4(reg) | ((sljit_uw)argw >> 2));
2247*22dc650dSSadaf Ebrahimi 		if (!(-argw & ~0x3fc))
2248*22dc650dSSadaf Ebrahimi 			return push_inst32(compiler, inst | RN4(arg & REG_MASK) | VD4(reg) | ((sljit_uw)-argw >> 2));
2249*22dc650dSSadaf Ebrahimi 	}
2250*22dc650dSSadaf Ebrahimi 
2251*22dc650dSSadaf Ebrahimi 	if (arg & REG_MASK) {
2252*22dc650dSSadaf Ebrahimi 		if (emit_set_delta(compiler, TMP_REG1, arg & REG_MASK, argw) != SLJIT_ERR_UNSUPPORTED) {
2253*22dc650dSSadaf Ebrahimi 			FAIL_IF(compiler->error);
2254*22dc650dSSadaf Ebrahimi 			return push_inst32(compiler, inst | 0x800000 | RN4(TMP_REG1) | VD4(reg));
2255*22dc650dSSadaf Ebrahimi 		}
2256*22dc650dSSadaf Ebrahimi 
2257*22dc650dSSadaf Ebrahimi 		imm = get_imm((sljit_uw)argw & ~(sljit_uw)0x3fc);
2258*22dc650dSSadaf Ebrahimi 		if (imm != INVALID_IMM) {
2259*22dc650dSSadaf Ebrahimi 			FAIL_IF(push_inst32(compiler, ADD_WI | RD4(TMP_REG1) | RN4(arg & REG_MASK) | imm));
2260*22dc650dSSadaf Ebrahimi 			return push_inst32(compiler, inst | 0x800000 | RN4(TMP_REG1) | VD4(reg) | (((sljit_uw)argw & 0x3fc) >> 2));
2261*22dc650dSSadaf Ebrahimi 		}
2262*22dc650dSSadaf Ebrahimi 
2263*22dc650dSSadaf Ebrahimi 		imm = get_imm((sljit_uw)-argw & ~(sljit_uw)0x3fc);
2264*22dc650dSSadaf Ebrahimi 		if (imm != INVALID_IMM) {
2265*22dc650dSSadaf Ebrahimi 			argw = -argw;
2266*22dc650dSSadaf Ebrahimi 			FAIL_IF(push_inst32(compiler, SUB_WI | RD4(TMP_REG1) | RN4(arg & REG_MASK) | imm));
2267*22dc650dSSadaf Ebrahimi 			return push_inst32(compiler, inst | RN4(TMP_REG1) | VD4(reg) | (((sljit_uw)argw & 0x3fc) >> 2));
2268*22dc650dSSadaf Ebrahimi 		}
2269*22dc650dSSadaf Ebrahimi 	}
2270*22dc650dSSadaf Ebrahimi 
2271*22dc650dSSadaf Ebrahimi 	FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)argw));
2272*22dc650dSSadaf Ebrahimi 	if (arg & REG_MASK)
2273*22dc650dSSadaf Ebrahimi 		FAIL_IF(push_inst16(compiler, ADD | SET_REGS44(TMP_REG1, (arg & REG_MASK))));
2274*22dc650dSSadaf Ebrahimi 	return push_inst32(compiler, inst | 0x800000 | RN4(TMP_REG1) | VD4(reg));
2275*22dc650dSSadaf Ebrahimi }
2276*22dc650dSSadaf Ebrahimi 
sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)2277*22dc650dSSadaf Ebrahimi static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op,
2278*22dc650dSSadaf Ebrahimi 	sljit_s32 dst, sljit_sw dstw,
2279*22dc650dSSadaf Ebrahimi 	sljit_s32 src, sljit_sw srcw)
2280*22dc650dSSadaf Ebrahimi {
2281*22dc650dSSadaf Ebrahimi 	op ^= SLJIT_32;
2282*22dc650dSSadaf Ebrahimi 
2283*22dc650dSSadaf Ebrahimi 	if (src & SLJIT_MEM) {
2284*22dc650dSSadaf Ebrahimi 		FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, TMP_FREG1, src, srcw));
2285*22dc650dSSadaf Ebrahimi 		src = TMP_FREG1;
2286*22dc650dSSadaf Ebrahimi 	}
2287*22dc650dSSadaf Ebrahimi 
2288*22dc650dSSadaf Ebrahimi 	FAIL_IF(push_inst32(compiler, VCVT_S32_F32 | (op & SLJIT_32) | VD4(TMP_FREG1) | VM4(src)));
2289*22dc650dSSadaf Ebrahimi 
2290*22dc650dSSadaf Ebrahimi 	if (FAST_IS_REG(dst))
2291*22dc650dSSadaf Ebrahimi 		return push_inst32(compiler, VMOV | (1 << 20) | RT4(dst) | VN4(TMP_FREG1));
2292*22dc650dSSadaf Ebrahimi 
2293*22dc650dSSadaf Ebrahimi 	/* Store the integer value from a VFP register. */
2294*22dc650dSSadaf Ebrahimi 	return emit_fop_mem(compiler, 0, TMP_FREG1, dst, dstw);
2295*22dc650dSSadaf Ebrahimi }
2296*22dc650dSSadaf Ebrahimi 
sljit_emit_fop1_conv_f64_from_w(struct sljit_compiler * compiler,sljit_ins ins,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)2297*22dc650dSSadaf Ebrahimi static sljit_s32 sljit_emit_fop1_conv_f64_from_w(struct sljit_compiler *compiler, sljit_ins ins,
2298*22dc650dSSadaf Ebrahimi 	sljit_s32 dst, sljit_sw dstw,
2299*22dc650dSSadaf Ebrahimi 	sljit_s32 src, sljit_sw srcw)
2300*22dc650dSSadaf Ebrahimi {
2301*22dc650dSSadaf Ebrahimi 	sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
2302*22dc650dSSadaf Ebrahimi 
2303*22dc650dSSadaf Ebrahimi 	if (FAST_IS_REG(src))
2304*22dc650dSSadaf Ebrahimi 		FAIL_IF(push_inst32(compiler, VMOV | RT4(src) | VN4(TMP_FREG1)));
2305*22dc650dSSadaf Ebrahimi 	else if (src & SLJIT_MEM) {
2306*22dc650dSSadaf Ebrahimi 		/* Load the integer value into a VFP register. */
2307*22dc650dSSadaf Ebrahimi 		FAIL_IF(emit_fop_mem(compiler, FPU_LOAD, TMP_FREG1, src, srcw));
2308*22dc650dSSadaf Ebrahimi 	}
2309*22dc650dSSadaf Ebrahimi 	else {
2310*22dc650dSSadaf Ebrahimi 		FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)srcw));
2311*22dc650dSSadaf Ebrahimi 		FAIL_IF(push_inst32(compiler, VMOV | RT4(TMP_REG1) | VN4(TMP_FREG1)));
2312*22dc650dSSadaf Ebrahimi 	}
2313*22dc650dSSadaf Ebrahimi 
2314*22dc650dSSadaf Ebrahimi 	FAIL_IF(push_inst32(compiler, ins | VD4(dst_r) | VM4(TMP_FREG1)));
2315*22dc650dSSadaf Ebrahimi 
2316*22dc650dSSadaf Ebrahimi 	if (dst & SLJIT_MEM)
2317*22dc650dSSadaf Ebrahimi 		return emit_fop_mem(compiler, (ins & SLJIT_32), TMP_FREG1, dst, dstw);
2318*22dc650dSSadaf Ebrahimi 	return SLJIT_SUCCESS;
2319*22dc650dSSadaf Ebrahimi }
2320*22dc650dSSadaf Ebrahimi 
sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)2321*22dc650dSSadaf Ebrahimi static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op,
2322*22dc650dSSadaf Ebrahimi 	sljit_s32 dst, sljit_sw dstw,
2323*22dc650dSSadaf Ebrahimi 	sljit_s32 src, sljit_sw srcw)
2324*22dc650dSSadaf Ebrahimi {
2325*22dc650dSSadaf Ebrahimi 	return sljit_emit_fop1_conv_f64_from_w(compiler, VCVT_F32_S32 | (~op & SLJIT_32), dst, dstw, src, srcw);
2326*22dc650dSSadaf Ebrahimi }
2327*22dc650dSSadaf Ebrahimi 
sljit_emit_fop1_conv_f64_from_uw(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)2328*22dc650dSSadaf Ebrahimi static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_uw(struct sljit_compiler *compiler, sljit_s32 op,
2329*22dc650dSSadaf Ebrahimi 	sljit_s32 dst, sljit_sw dstw,
2330*22dc650dSSadaf Ebrahimi 	sljit_s32 src, sljit_sw srcw)
2331*22dc650dSSadaf Ebrahimi {
2332*22dc650dSSadaf Ebrahimi 	return sljit_emit_fop1_conv_f64_from_w(compiler, VCVT_F32_U32 | (~op & SLJIT_32), dst, dstw, src, srcw);
2333*22dc650dSSadaf Ebrahimi }
2334*22dc650dSSadaf Ebrahimi 
sljit_emit_fop1_cmp(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2335*22dc650dSSadaf Ebrahimi static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op,
2336*22dc650dSSadaf Ebrahimi 	sljit_s32 src1, sljit_sw src1w,
2337*22dc650dSSadaf Ebrahimi 	sljit_s32 src2, sljit_sw src2w)
2338*22dc650dSSadaf Ebrahimi {
2339*22dc650dSSadaf Ebrahimi 	op ^= SLJIT_32;
2340*22dc650dSSadaf Ebrahimi 
2341*22dc650dSSadaf Ebrahimi 	if (src1 & SLJIT_MEM) {
2342*22dc650dSSadaf Ebrahimi 		FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, TMP_FREG1, src1, src1w));
2343*22dc650dSSadaf Ebrahimi 		src1 = TMP_FREG1;
2344*22dc650dSSadaf Ebrahimi 	}
2345*22dc650dSSadaf Ebrahimi 
2346*22dc650dSSadaf Ebrahimi 	if (src2 & SLJIT_MEM) {
2347*22dc650dSSadaf Ebrahimi 		FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, TMP_FREG2, src2, src2w));
2348*22dc650dSSadaf Ebrahimi 		src2 = TMP_FREG2;
2349*22dc650dSSadaf Ebrahimi 	}
2350*22dc650dSSadaf Ebrahimi 
2351*22dc650dSSadaf Ebrahimi 	FAIL_IF(push_inst32(compiler, VCMP_F32 | (op & SLJIT_32) | VD4(src1) | VM4(src2)));
2352*22dc650dSSadaf Ebrahimi 	FAIL_IF(push_inst32(compiler, VMRS));
2353*22dc650dSSadaf Ebrahimi 
2354*22dc650dSSadaf Ebrahimi 	if (GET_FLAG_TYPE(op) != SLJIT_UNORDERED_OR_EQUAL)
2355*22dc650dSSadaf Ebrahimi 		return SLJIT_SUCCESS;
2356*22dc650dSSadaf Ebrahimi 
2357*22dc650dSSadaf Ebrahimi 	FAIL_IF(push_inst16(compiler, IT | (0x6 << 4) | 0x8));
2358*22dc650dSSadaf Ebrahimi 	return push_inst16(compiler, CMP /* Rm, Rn = r0 */);
2359*22dc650dSSadaf Ebrahimi }
2360*22dc650dSSadaf Ebrahimi 
sljit_emit_fop1(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)2361*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op,
2362*22dc650dSSadaf Ebrahimi 	sljit_s32 dst, sljit_sw dstw,
2363*22dc650dSSadaf Ebrahimi 	sljit_s32 src, sljit_sw srcw)
2364*22dc650dSSadaf Ebrahimi {
2365*22dc650dSSadaf Ebrahimi 	sljit_s32 dst_r;
2366*22dc650dSSadaf Ebrahimi 
2367*22dc650dSSadaf Ebrahimi 	CHECK_ERROR();
2368*22dc650dSSadaf Ebrahimi 
2369*22dc650dSSadaf Ebrahimi 	SLJIT_COMPILE_ASSERT((SLJIT_32 == 0x100), float_transfer_bit_error);
2370*22dc650dSSadaf Ebrahimi 	SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw);
2371*22dc650dSSadaf Ebrahimi 
2372*22dc650dSSadaf Ebrahimi 	dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
2373*22dc650dSSadaf Ebrahimi 
2374*22dc650dSSadaf Ebrahimi 	if (GET_OPCODE(op) != SLJIT_CONV_F64_FROM_F32)
2375*22dc650dSSadaf Ebrahimi 		op ^= SLJIT_32;
2376*22dc650dSSadaf Ebrahimi 
2377*22dc650dSSadaf Ebrahimi 	if (src & SLJIT_MEM) {
2378*22dc650dSSadaf Ebrahimi 		FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, dst_r, src, srcw));
2379*22dc650dSSadaf Ebrahimi 		src = dst_r;
2380*22dc650dSSadaf Ebrahimi 	}
2381*22dc650dSSadaf Ebrahimi 
2382*22dc650dSSadaf Ebrahimi 	switch (GET_OPCODE(op)) {
2383*22dc650dSSadaf Ebrahimi 	case SLJIT_MOV_F64:
2384*22dc650dSSadaf Ebrahimi 		if (src != dst_r) {
2385*22dc650dSSadaf Ebrahimi 			if (!(dst & SLJIT_MEM))
2386*22dc650dSSadaf Ebrahimi 				FAIL_IF(push_inst32(compiler, VMOV_F32 | (op & SLJIT_32) | VD4(dst_r) | VM4(src)));
2387*22dc650dSSadaf Ebrahimi 			else
2388*22dc650dSSadaf Ebrahimi 				dst_r = src;
2389*22dc650dSSadaf Ebrahimi 		}
2390*22dc650dSSadaf Ebrahimi 		break;
2391*22dc650dSSadaf Ebrahimi 	case SLJIT_NEG_F64:
2392*22dc650dSSadaf Ebrahimi 		FAIL_IF(push_inst32(compiler, VNEG_F32 | (op & SLJIT_32) | VD4(dst_r) | VM4(src)));
2393*22dc650dSSadaf Ebrahimi 		break;
2394*22dc650dSSadaf Ebrahimi 	case SLJIT_ABS_F64:
2395*22dc650dSSadaf Ebrahimi 		FAIL_IF(push_inst32(compiler, VABS_F32 | (op & SLJIT_32) | VD4(dst_r) | VM4(src)));
2396*22dc650dSSadaf Ebrahimi 		break;
2397*22dc650dSSadaf Ebrahimi 	case SLJIT_CONV_F64_FROM_F32:
2398*22dc650dSSadaf Ebrahimi 		FAIL_IF(push_inst32(compiler, VCVT_F64_F32 | (op & SLJIT_32) | VD4(dst_r) | VM4(src)));
2399*22dc650dSSadaf Ebrahimi 		op ^= SLJIT_32;
2400*22dc650dSSadaf Ebrahimi 		break;
2401*22dc650dSSadaf Ebrahimi 	}
2402*22dc650dSSadaf Ebrahimi 
2403*22dc650dSSadaf Ebrahimi 	if (dst & SLJIT_MEM)
2404*22dc650dSSadaf Ebrahimi 		return emit_fop_mem(compiler, (op & SLJIT_32), dst_r, dst, dstw);
2405*22dc650dSSadaf Ebrahimi 	return SLJIT_SUCCESS;
2406*22dc650dSSadaf Ebrahimi }
2407*22dc650dSSadaf Ebrahimi 
sljit_emit_fop2(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2408*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op,
2409*22dc650dSSadaf Ebrahimi 	sljit_s32 dst, sljit_sw dstw,
2410*22dc650dSSadaf Ebrahimi 	sljit_s32 src1, sljit_sw src1w,
2411*22dc650dSSadaf Ebrahimi 	sljit_s32 src2, sljit_sw src2w)
2412*22dc650dSSadaf Ebrahimi {
2413*22dc650dSSadaf Ebrahimi 	sljit_s32 dst_r;
2414*22dc650dSSadaf Ebrahimi 
2415*22dc650dSSadaf Ebrahimi 	CHECK_ERROR();
2416*22dc650dSSadaf Ebrahimi 	CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
2417*22dc650dSSadaf Ebrahimi 	ADJUST_LOCAL_OFFSET(dst, dstw);
2418*22dc650dSSadaf Ebrahimi 	ADJUST_LOCAL_OFFSET(src1, src1w);
2419*22dc650dSSadaf Ebrahimi 	ADJUST_LOCAL_OFFSET(src2, src2w);
2420*22dc650dSSadaf Ebrahimi 
2421*22dc650dSSadaf Ebrahimi 	op ^= SLJIT_32;
2422*22dc650dSSadaf Ebrahimi 
2423*22dc650dSSadaf Ebrahimi 	dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
2424*22dc650dSSadaf Ebrahimi 	if (src1 & SLJIT_MEM) {
2425*22dc650dSSadaf Ebrahimi 		FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, TMP_FREG1, src1, src1w));
2426*22dc650dSSadaf Ebrahimi 		src1 = TMP_FREG1;
2427*22dc650dSSadaf Ebrahimi 	}
2428*22dc650dSSadaf Ebrahimi 	if (src2 & SLJIT_MEM) {
2429*22dc650dSSadaf Ebrahimi 		FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, TMP_FREG2, src2, src2w));
2430*22dc650dSSadaf Ebrahimi 		src2 = TMP_FREG2;
2431*22dc650dSSadaf Ebrahimi 	}
2432*22dc650dSSadaf Ebrahimi 
2433*22dc650dSSadaf Ebrahimi 	switch (GET_OPCODE(op)) {
2434*22dc650dSSadaf Ebrahimi 	case SLJIT_ADD_F64:
2435*22dc650dSSadaf Ebrahimi 		FAIL_IF(push_inst32(compiler, VADD_F32 | (op & SLJIT_32) | VD4(dst_r) | VN4(src1) | VM4(src2)));
2436*22dc650dSSadaf Ebrahimi 		break;
2437*22dc650dSSadaf Ebrahimi 	case SLJIT_SUB_F64:
2438*22dc650dSSadaf Ebrahimi 		FAIL_IF(push_inst32(compiler, VSUB_F32 | (op & SLJIT_32) | VD4(dst_r) | VN4(src1) | VM4(src2)));
2439*22dc650dSSadaf Ebrahimi 		break;
2440*22dc650dSSadaf Ebrahimi 	case SLJIT_MUL_F64:
2441*22dc650dSSadaf Ebrahimi 		FAIL_IF(push_inst32(compiler, VMUL_F32 | (op & SLJIT_32) | VD4(dst_r) | VN4(src1) | VM4(src2)));
2442*22dc650dSSadaf Ebrahimi 		break;
2443*22dc650dSSadaf Ebrahimi 	case SLJIT_DIV_F64:
2444*22dc650dSSadaf Ebrahimi 		FAIL_IF(push_inst32(compiler, VDIV_F32 | (op & SLJIT_32) | VD4(dst_r) | VN4(src1) | VM4(src2)));
2445*22dc650dSSadaf Ebrahimi 		break;
2446*22dc650dSSadaf Ebrahimi 	case SLJIT_COPYSIGN_F64:
2447*22dc650dSSadaf Ebrahimi 		FAIL_IF(push_inst32(compiler, VMOV | (1 << 20) | VN4(src2) | RT4(TMP_REG1) | ((op & SLJIT_32) ? (1 << 7) : 0)));
2448*22dc650dSSadaf Ebrahimi 		FAIL_IF(push_inst32(compiler, VABS_F32 | (op & SLJIT_32) | VD4(dst_r) | VM4(src1)));
2449*22dc650dSSadaf Ebrahimi 		FAIL_IF(push_inst32(compiler, CMPI_W | RN4(TMP_REG1) | 0));
2450*22dc650dSSadaf Ebrahimi 		FAIL_IF(push_inst16(compiler, IT | (0xb << 4) | 0x8));
2451*22dc650dSSadaf Ebrahimi 		return push_inst32(compiler, VNEG_F32 | (op & SLJIT_32) | VD4(dst_r) | VM4(dst_r));
2452*22dc650dSSadaf Ebrahimi 	}
2453*22dc650dSSadaf Ebrahimi 
2454*22dc650dSSadaf Ebrahimi 	if (!(dst & SLJIT_MEM))
2455*22dc650dSSadaf Ebrahimi 		return SLJIT_SUCCESS;
2456*22dc650dSSadaf Ebrahimi 	return emit_fop_mem(compiler, (op & SLJIT_32), TMP_FREG1, dst, dstw);
2457*22dc650dSSadaf Ebrahimi }
2458*22dc650dSSadaf Ebrahimi 
sljit_emit_fset32(struct sljit_compiler * compiler,sljit_s32 freg,sljit_f32 value)2459*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset32(struct sljit_compiler *compiler,
2460*22dc650dSSadaf Ebrahimi 	sljit_s32 freg, sljit_f32 value)
2461*22dc650dSSadaf Ebrahimi {
2462*22dc650dSSadaf Ebrahimi #if defined(__ARM_NEON) && __ARM_NEON
2463*22dc650dSSadaf Ebrahimi 	sljit_u32 exp;
2464*22dc650dSSadaf Ebrahimi 	sljit_ins ins;
2465*22dc650dSSadaf Ebrahimi #endif /* NEON */
2466*22dc650dSSadaf Ebrahimi 	union {
2467*22dc650dSSadaf Ebrahimi 		sljit_u32 imm;
2468*22dc650dSSadaf Ebrahimi 		sljit_f32 value;
2469*22dc650dSSadaf Ebrahimi 	} u;
2470*22dc650dSSadaf Ebrahimi 
2471*22dc650dSSadaf Ebrahimi 	CHECK_ERROR();
2472*22dc650dSSadaf Ebrahimi 	CHECK(check_sljit_emit_fset32(compiler, freg, value));
2473*22dc650dSSadaf Ebrahimi 
2474*22dc650dSSadaf Ebrahimi 	u.value = value;
2475*22dc650dSSadaf Ebrahimi 
2476*22dc650dSSadaf Ebrahimi #if defined(__ARM_NEON) && __ARM_NEON
2477*22dc650dSSadaf Ebrahimi 	if ((u.imm << (32 - 19)) == 0) {
2478*22dc650dSSadaf Ebrahimi 		exp = (u.imm >> (23 + 2)) & 0x3f;
2479*22dc650dSSadaf Ebrahimi 
2480*22dc650dSSadaf Ebrahimi 		if (exp == 0x20 || exp == 0x1f) {
2481*22dc650dSSadaf Ebrahimi 			ins = ((u.imm >> 24) & 0x80) | ((u.imm >> 19) & 0x7f);
2482*22dc650dSSadaf Ebrahimi 			return push_inst32(compiler, (VMOV_F32 ^ (1 << 6)) | ((ins & 0xf0) << 12) | VD4(freg) | (ins & 0xf));
2483*22dc650dSSadaf Ebrahimi 		}
2484*22dc650dSSadaf Ebrahimi 	}
2485*22dc650dSSadaf Ebrahimi #endif /* NEON */
2486*22dc650dSSadaf Ebrahimi 
2487*22dc650dSSadaf Ebrahimi 	FAIL_IF(load_immediate(compiler, TMP_REG1, u.imm));
2488*22dc650dSSadaf Ebrahimi 	return push_inst32(compiler, VMOV | VN4(freg) | RT4(TMP_REG1));
2489*22dc650dSSadaf Ebrahimi }
2490*22dc650dSSadaf Ebrahimi 
sljit_emit_fset64(struct sljit_compiler * compiler,sljit_s32 freg,sljit_f64 value)2491*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset64(struct sljit_compiler *compiler,
2492*22dc650dSSadaf Ebrahimi 	sljit_s32 freg, sljit_f64 value)
2493*22dc650dSSadaf Ebrahimi {
2494*22dc650dSSadaf Ebrahimi #if defined(__ARM_NEON) && __ARM_NEON
2495*22dc650dSSadaf Ebrahimi 	sljit_u32 exp;
2496*22dc650dSSadaf Ebrahimi 	sljit_ins ins;
2497*22dc650dSSadaf Ebrahimi #endif /* NEON */
2498*22dc650dSSadaf Ebrahimi 	union {
2499*22dc650dSSadaf Ebrahimi 		sljit_u32 imm[2];
2500*22dc650dSSadaf Ebrahimi 		sljit_f64 value;
2501*22dc650dSSadaf Ebrahimi 	} u;
2502*22dc650dSSadaf Ebrahimi 
2503*22dc650dSSadaf Ebrahimi 	CHECK_ERROR();
2504*22dc650dSSadaf Ebrahimi 	CHECK(check_sljit_emit_fset64(compiler, freg, value));
2505*22dc650dSSadaf Ebrahimi 
2506*22dc650dSSadaf Ebrahimi 	u.value = value;
2507*22dc650dSSadaf Ebrahimi 
2508*22dc650dSSadaf Ebrahimi #if defined(__ARM_NEON) && __ARM_NEON
2509*22dc650dSSadaf Ebrahimi 	if (u.imm[0] == 0 && (u.imm[1] << (64 - 48)) == 0) {
2510*22dc650dSSadaf Ebrahimi 		exp = (u.imm[1] >> ((52 - 32) + 2)) & 0x1ff;
2511*22dc650dSSadaf Ebrahimi 
2512*22dc650dSSadaf Ebrahimi 		if (exp == 0x100 || exp == 0xff) {
2513*22dc650dSSadaf Ebrahimi 			ins = ((u.imm[1] >> (56 - 32)) & 0x80) | ((u.imm[1] >> (48 - 32)) & 0x7f);
2514*22dc650dSSadaf Ebrahimi 			return push_inst32(compiler, (VMOV_F32 ^ (1 << 6)) | (1 << 8) | ((ins & 0xf0) << 12) | VD4(freg) | (ins & 0xf));
2515*22dc650dSSadaf Ebrahimi 		}
2516*22dc650dSSadaf Ebrahimi 	}
2517*22dc650dSSadaf Ebrahimi #endif /* NEON */
2518*22dc650dSSadaf Ebrahimi 
2519*22dc650dSSadaf Ebrahimi 	FAIL_IF(load_immediate(compiler, TMP_REG1, u.imm[0]));
2520*22dc650dSSadaf Ebrahimi 	if (u.imm[0] == u.imm[1])
2521*22dc650dSSadaf Ebrahimi 		return push_inst32(compiler, VMOV2 | RN4(TMP_REG1) | RT4(TMP_REG1) | VM4(freg));
2522*22dc650dSSadaf Ebrahimi 
2523*22dc650dSSadaf Ebrahimi 	FAIL_IF(load_immediate(compiler, TMP_REG2, u.imm[1]));
2524*22dc650dSSadaf Ebrahimi 	return push_inst32(compiler, VMOV2 | RN4(TMP_REG2) | RT4(TMP_REG1) | VM4(freg));
2525*22dc650dSSadaf Ebrahimi }
2526*22dc650dSSadaf Ebrahimi 
sljit_emit_fcopy(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 freg,sljit_s32 reg)2527*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fcopy(struct sljit_compiler *compiler, sljit_s32 op,
2528*22dc650dSSadaf Ebrahimi 	sljit_s32 freg, sljit_s32 reg)
2529*22dc650dSSadaf Ebrahimi {
2530*22dc650dSSadaf Ebrahimi 	sljit_s32 reg2;
2531*22dc650dSSadaf Ebrahimi 	sljit_ins inst;
2532*22dc650dSSadaf Ebrahimi 
2533*22dc650dSSadaf Ebrahimi 	CHECK_ERROR();
2534*22dc650dSSadaf Ebrahimi 	CHECK(check_sljit_emit_fcopy(compiler, op, freg, reg));
2535*22dc650dSSadaf Ebrahimi 
2536*22dc650dSSadaf Ebrahimi 	if (reg & REG_PAIR_MASK) {
2537*22dc650dSSadaf Ebrahimi 		reg2 = REG_PAIR_SECOND(reg);
2538*22dc650dSSadaf Ebrahimi 		reg = REG_PAIR_FIRST(reg);
2539*22dc650dSSadaf Ebrahimi 
2540*22dc650dSSadaf Ebrahimi 		inst = VMOV2 | RN4(reg) | RT4(reg2) | VM4(freg);
2541*22dc650dSSadaf Ebrahimi 	} else {
2542*22dc650dSSadaf Ebrahimi 		inst = VMOV | VN4(freg) | RT4(reg);
2543*22dc650dSSadaf Ebrahimi 
2544*22dc650dSSadaf Ebrahimi 		if (!(op & SLJIT_32))
2545*22dc650dSSadaf Ebrahimi 			inst |= 1 << 7;
2546*22dc650dSSadaf Ebrahimi 	}
2547*22dc650dSSadaf Ebrahimi 
2548*22dc650dSSadaf Ebrahimi 	if (GET_OPCODE(op) == SLJIT_COPY_FROM_F64)
2549*22dc650dSSadaf Ebrahimi 		inst |= 1 << 20;
2550*22dc650dSSadaf Ebrahimi 
2551*22dc650dSSadaf Ebrahimi 	return push_inst32(compiler, inst);
2552*22dc650dSSadaf Ebrahimi }
2553*22dc650dSSadaf Ebrahimi 
2554*22dc650dSSadaf Ebrahimi /* --------------------------------------------------------------------- */
2555*22dc650dSSadaf Ebrahimi /*  Conditional instructions                                             */
2556*22dc650dSSadaf Ebrahimi /* --------------------------------------------------------------------- */
2557*22dc650dSSadaf Ebrahimi 
get_cc(struct sljit_compiler * compiler,sljit_s32 type)2558*22dc650dSSadaf Ebrahimi static sljit_uw get_cc(struct sljit_compiler *compiler, sljit_s32 type)
2559*22dc650dSSadaf Ebrahimi {
2560*22dc650dSSadaf Ebrahimi 	switch (type) {
2561*22dc650dSSadaf Ebrahimi 	case SLJIT_EQUAL:
2562*22dc650dSSadaf Ebrahimi 	case SLJIT_ATOMIC_STORED:
2563*22dc650dSSadaf Ebrahimi 	case SLJIT_F_EQUAL:
2564*22dc650dSSadaf Ebrahimi 	case SLJIT_ORDERED_EQUAL:
2565*22dc650dSSadaf Ebrahimi 	case SLJIT_UNORDERED_OR_EQUAL:
2566*22dc650dSSadaf Ebrahimi 		return 0x0;
2567*22dc650dSSadaf Ebrahimi 
2568*22dc650dSSadaf Ebrahimi 	case SLJIT_NOT_EQUAL:
2569*22dc650dSSadaf Ebrahimi 	case SLJIT_ATOMIC_NOT_STORED:
2570*22dc650dSSadaf Ebrahimi 	case SLJIT_F_NOT_EQUAL:
2571*22dc650dSSadaf Ebrahimi 	case SLJIT_UNORDERED_OR_NOT_EQUAL:
2572*22dc650dSSadaf Ebrahimi 	case SLJIT_ORDERED_NOT_EQUAL:
2573*22dc650dSSadaf Ebrahimi 		return 0x1;
2574*22dc650dSSadaf Ebrahimi 
2575*22dc650dSSadaf Ebrahimi 	case SLJIT_CARRY:
2576*22dc650dSSadaf Ebrahimi 		if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD)
2577*22dc650dSSadaf Ebrahimi 			return 0x2;
2578*22dc650dSSadaf Ebrahimi 		/* fallthrough */
2579*22dc650dSSadaf Ebrahimi 
2580*22dc650dSSadaf Ebrahimi 	case SLJIT_LESS:
2581*22dc650dSSadaf Ebrahimi 		return 0x3;
2582*22dc650dSSadaf Ebrahimi 
2583*22dc650dSSadaf Ebrahimi 	case SLJIT_NOT_CARRY:
2584*22dc650dSSadaf Ebrahimi 		if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD)
2585*22dc650dSSadaf Ebrahimi 			return 0x3;
2586*22dc650dSSadaf Ebrahimi 		/* fallthrough */
2587*22dc650dSSadaf Ebrahimi 
2588*22dc650dSSadaf Ebrahimi 	case SLJIT_GREATER_EQUAL:
2589*22dc650dSSadaf Ebrahimi 		return 0x2;
2590*22dc650dSSadaf Ebrahimi 
2591*22dc650dSSadaf Ebrahimi 	case SLJIT_GREATER:
2592*22dc650dSSadaf Ebrahimi 	case SLJIT_UNORDERED_OR_GREATER:
2593*22dc650dSSadaf Ebrahimi 		return 0x8;
2594*22dc650dSSadaf Ebrahimi 
2595*22dc650dSSadaf Ebrahimi 	case SLJIT_LESS_EQUAL:
2596*22dc650dSSadaf Ebrahimi 	case SLJIT_F_LESS_EQUAL:
2597*22dc650dSSadaf Ebrahimi 	case SLJIT_ORDERED_LESS_EQUAL:
2598*22dc650dSSadaf Ebrahimi 		return 0x9;
2599*22dc650dSSadaf Ebrahimi 
2600*22dc650dSSadaf Ebrahimi 	case SLJIT_SIG_LESS:
2601*22dc650dSSadaf Ebrahimi 	case SLJIT_UNORDERED_OR_LESS:
2602*22dc650dSSadaf Ebrahimi 		return 0xb;
2603*22dc650dSSadaf Ebrahimi 
2604*22dc650dSSadaf Ebrahimi 	case SLJIT_SIG_GREATER_EQUAL:
2605*22dc650dSSadaf Ebrahimi 	case SLJIT_F_GREATER_EQUAL:
2606*22dc650dSSadaf Ebrahimi 	case SLJIT_ORDERED_GREATER_EQUAL:
2607*22dc650dSSadaf Ebrahimi 		return 0xa;
2608*22dc650dSSadaf Ebrahimi 
2609*22dc650dSSadaf Ebrahimi 	case SLJIT_SIG_GREATER:
2610*22dc650dSSadaf Ebrahimi 	case SLJIT_F_GREATER:
2611*22dc650dSSadaf Ebrahimi 	case SLJIT_ORDERED_GREATER:
2612*22dc650dSSadaf Ebrahimi 		return 0xc;
2613*22dc650dSSadaf Ebrahimi 
2614*22dc650dSSadaf Ebrahimi 	case SLJIT_SIG_LESS_EQUAL:
2615*22dc650dSSadaf Ebrahimi 	case SLJIT_UNORDERED_OR_LESS_EQUAL:
2616*22dc650dSSadaf Ebrahimi 		return 0xd;
2617*22dc650dSSadaf Ebrahimi 
2618*22dc650dSSadaf Ebrahimi 	case SLJIT_OVERFLOW:
2619*22dc650dSSadaf Ebrahimi 		if (!(compiler->status_flags_state & (SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB)))
2620*22dc650dSSadaf Ebrahimi 			return 0x1;
2621*22dc650dSSadaf Ebrahimi 		/* fallthrough */
2622*22dc650dSSadaf Ebrahimi 
2623*22dc650dSSadaf Ebrahimi 	case SLJIT_UNORDERED:
2624*22dc650dSSadaf Ebrahimi 		return 0x6;
2625*22dc650dSSadaf Ebrahimi 
2626*22dc650dSSadaf Ebrahimi 	case SLJIT_NOT_OVERFLOW:
2627*22dc650dSSadaf Ebrahimi 		if (!(compiler->status_flags_state & (SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB)))
2628*22dc650dSSadaf Ebrahimi 			return 0x0;
2629*22dc650dSSadaf Ebrahimi 		/* fallthrough */
2630*22dc650dSSadaf Ebrahimi 
2631*22dc650dSSadaf Ebrahimi 	case SLJIT_ORDERED:
2632*22dc650dSSadaf Ebrahimi 		return 0x7;
2633*22dc650dSSadaf Ebrahimi 
2634*22dc650dSSadaf Ebrahimi 	case SLJIT_F_LESS:
2635*22dc650dSSadaf Ebrahimi 	case SLJIT_ORDERED_LESS:
2636*22dc650dSSadaf Ebrahimi 		return 0x4;
2637*22dc650dSSadaf Ebrahimi 
2638*22dc650dSSadaf Ebrahimi 	case SLJIT_UNORDERED_OR_GREATER_EQUAL:
2639*22dc650dSSadaf Ebrahimi 		return 0x5;
2640*22dc650dSSadaf Ebrahimi 
2641*22dc650dSSadaf Ebrahimi 	default: /* SLJIT_JUMP */
2642*22dc650dSSadaf Ebrahimi 		SLJIT_UNREACHABLE();
2643*22dc650dSSadaf Ebrahimi 		return 0xe;
2644*22dc650dSSadaf Ebrahimi 	}
2645*22dc650dSSadaf Ebrahimi }
2646*22dc650dSSadaf Ebrahimi 
sljit_emit_label(struct sljit_compiler * compiler)2647*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)
2648*22dc650dSSadaf Ebrahimi {
2649*22dc650dSSadaf Ebrahimi 	struct sljit_label *label;
2650*22dc650dSSadaf Ebrahimi 
2651*22dc650dSSadaf Ebrahimi 	CHECK_ERROR_PTR();
2652*22dc650dSSadaf Ebrahimi 	CHECK_PTR(check_sljit_emit_label(compiler));
2653*22dc650dSSadaf Ebrahimi 
2654*22dc650dSSadaf Ebrahimi 	if (compiler->last_label && compiler->last_label->size == compiler->size)
2655*22dc650dSSadaf Ebrahimi 		return compiler->last_label;
2656*22dc650dSSadaf Ebrahimi 
2657*22dc650dSSadaf Ebrahimi 	label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));
2658*22dc650dSSadaf Ebrahimi 	PTR_FAIL_IF(!label);
2659*22dc650dSSadaf Ebrahimi 	set_label(label, compiler);
2660*22dc650dSSadaf Ebrahimi 	return label;
2661*22dc650dSSadaf Ebrahimi }
2662*22dc650dSSadaf Ebrahimi 
sljit_emit_jump(struct sljit_compiler * compiler,sljit_s32 type)2663*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type)
2664*22dc650dSSadaf Ebrahimi {
2665*22dc650dSSadaf Ebrahimi 	struct sljit_jump *jump;
2666*22dc650dSSadaf Ebrahimi 	sljit_ins cc;
2667*22dc650dSSadaf Ebrahimi 
2668*22dc650dSSadaf Ebrahimi 	CHECK_ERROR_PTR();
2669*22dc650dSSadaf Ebrahimi 	CHECK_PTR(check_sljit_emit_jump(compiler, type));
2670*22dc650dSSadaf Ebrahimi 
2671*22dc650dSSadaf Ebrahimi 	jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
2672*22dc650dSSadaf Ebrahimi 	PTR_FAIL_IF(!jump);
2673*22dc650dSSadaf Ebrahimi 	set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
2674*22dc650dSSadaf Ebrahimi 	type &= 0xff;
2675*22dc650dSSadaf Ebrahimi 
2676*22dc650dSSadaf Ebrahimi 	if (type < SLJIT_JUMP) {
2677*22dc650dSSadaf Ebrahimi 		jump->flags |= IS_COND;
2678*22dc650dSSadaf Ebrahimi 		cc = get_cc(compiler, type);
2679*22dc650dSSadaf Ebrahimi 		jump->flags |= cc << 8;
2680*22dc650dSSadaf Ebrahimi 		PTR_FAIL_IF(push_inst16(compiler, IT | (cc << 4) | 0x8));
2681*22dc650dSSadaf Ebrahimi 	}
2682*22dc650dSSadaf Ebrahimi 
2683*22dc650dSSadaf Ebrahimi 	jump->addr = compiler->size;
2684*22dc650dSSadaf Ebrahimi 	if (type <= SLJIT_JUMP)
2685*22dc650dSSadaf Ebrahimi 		PTR_FAIL_IF(push_inst16(compiler, BX | RN3(TMP_REG1)));
2686*22dc650dSSadaf Ebrahimi 	else {
2687*22dc650dSSadaf Ebrahimi 		jump->flags |= IS_BL;
2688*22dc650dSSadaf Ebrahimi 		PTR_FAIL_IF(push_inst16(compiler, BLX | RN3(TMP_REG1)));
2689*22dc650dSSadaf Ebrahimi 	}
2690*22dc650dSSadaf Ebrahimi 
2691*22dc650dSSadaf Ebrahimi 	/* Maximum number of instructions required for generating a constant. */
2692*22dc650dSSadaf Ebrahimi 	compiler->size += JUMP_MAX_SIZE - 1;
2693*22dc650dSSadaf Ebrahimi 	return jump;
2694*22dc650dSSadaf Ebrahimi }
2695*22dc650dSSadaf Ebrahimi 
2696*22dc650dSSadaf Ebrahimi #ifdef __SOFTFP__
2697*22dc650dSSadaf Ebrahimi 
softfloat_call_with_args(struct sljit_compiler * compiler,sljit_s32 arg_types,sljit_s32 * src,sljit_u32 * extra_space)2698*22dc650dSSadaf Ebrahimi static sljit_s32 softfloat_call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types, sljit_s32 *src, sljit_u32 *extra_space)
2699*22dc650dSSadaf Ebrahimi {
2700*22dc650dSSadaf Ebrahimi 	sljit_u32 is_tail_call = *extra_space & SLJIT_CALL_RETURN;
2701*22dc650dSSadaf Ebrahimi 	sljit_u32 offset = 0;
2702*22dc650dSSadaf Ebrahimi 	sljit_u32 word_arg_offset = 0;
2703*22dc650dSSadaf Ebrahimi 	sljit_u32 float_arg_count = 0;
2704*22dc650dSSadaf Ebrahimi 	sljit_s32 types = 0;
2705*22dc650dSSadaf Ebrahimi 	sljit_u32 src_offset = 4 * sizeof(sljit_sw);
2706*22dc650dSSadaf Ebrahimi 	sljit_u8 offsets[4];
2707*22dc650dSSadaf Ebrahimi 	sljit_u8 *offset_ptr = offsets;
2708*22dc650dSSadaf Ebrahimi 
2709*22dc650dSSadaf Ebrahimi 	if (src && FAST_IS_REG(*src))
2710*22dc650dSSadaf Ebrahimi 		src_offset = (sljit_u32)reg_map[*src] * sizeof(sljit_sw);
2711*22dc650dSSadaf Ebrahimi 
2712*22dc650dSSadaf Ebrahimi 	arg_types >>= SLJIT_ARG_SHIFT;
2713*22dc650dSSadaf Ebrahimi 
2714*22dc650dSSadaf Ebrahimi 	while (arg_types) {
2715*22dc650dSSadaf Ebrahimi 		types = (types << SLJIT_ARG_SHIFT) | (arg_types & SLJIT_ARG_MASK);
2716*22dc650dSSadaf Ebrahimi 
2717*22dc650dSSadaf Ebrahimi 		switch (arg_types & SLJIT_ARG_MASK) {
2718*22dc650dSSadaf Ebrahimi 		case SLJIT_ARG_TYPE_F64:
2719*22dc650dSSadaf Ebrahimi 			if (offset & 0x7)
2720*22dc650dSSadaf Ebrahimi 				offset += sizeof(sljit_sw);
2721*22dc650dSSadaf Ebrahimi 			*offset_ptr++ = (sljit_u8)offset;
2722*22dc650dSSadaf Ebrahimi 			offset += sizeof(sljit_f64);
2723*22dc650dSSadaf Ebrahimi 			float_arg_count++;
2724*22dc650dSSadaf Ebrahimi 			break;
2725*22dc650dSSadaf Ebrahimi 		case SLJIT_ARG_TYPE_F32:
2726*22dc650dSSadaf Ebrahimi 			*offset_ptr++ = (sljit_u8)offset;
2727*22dc650dSSadaf Ebrahimi 			offset += sizeof(sljit_f32);
2728*22dc650dSSadaf Ebrahimi 			float_arg_count++;
2729*22dc650dSSadaf Ebrahimi 			break;
2730*22dc650dSSadaf Ebrahimi 		default:
2731*22dc650dSSadaf Ebrahimi 			*offset_ptr++ = (sljit_u8)offset;
2732*22dc650dSSadaf Ebrahimi 			offset += sizeof(sljit_sw);
2733*22dc650dSSadaf Ebrahimi 			word_arg_offset += sizeof(sljit_sw);
2734*22dc650dSSadaf Ebrahimi 			break;
2735*22dc650dSSadaf Ebrahimi 		}
2736*22dc650dSSadaf Ebrahimi 
2737*22dc650dSSadaf Ebrahimi 		arg_types >>= SLJIT_ARG_SHIFT;
2738*22dc650dSSadaf Ebrahimi 	}
2739*22dc650dSSadaf Ebrahimi 
2740*22dc650dSSadaf Ebrahimi 	if (offset > 4 * sizeof(sljit_sw) && (!is_tail_call || offset > compiler->args_size)) {
2741*22dc650dSSadaf Ebrahimi 		/* Keep lr register on the stack. */
2742*22dc650dSSadaf Ebrahimi 		if (is_tail_call)
2743*22dc650dSSadaf Ebrahimi 			offset += sizeof(sljit_sw);
2744*22dc650dSSadaf Ebrahimi 
2745*22dc650dSSadaf Ebrahimi 		offset = ((offset - 4 * sizeof(sljit_sw)) + 0x7) & ~(sljit_uw)0x7;
2746*22dc650dSSadaf Ebrahimi 
2747*22dc650dSSadaf Ebrahimi 		*extra_space = offset;
2748*22dc650dSSadaf Ebrahimi 
2749*22dc650dSSadaf Ebrahimi 		if (is_tail_call)
2750*22dc650dSSadaf Ebrahimi 			FAIL_IF(emit_stack_frame_release(compiler, (sljit_s32)offset));
2751*22dc650dSSadaf Ebrahimi 		else
2752*22dc650dSSadaf Ebrahimi 			FAIL_IF(push_inst16(compiler, SUB_SP_I | (offset >> 2)));
2753*22dc650dSSadaf Ebrahimi 	} else {
2754*22dc650dSSadaf Ebrahimi 		if (is_tail_call)
2755*22dc650dSSadaf Ebrahimi 			FAIL_IF(emit_stack_frame_release(compiler, -1));
2756*22dc650dSSadaf Ebrahimi 		*extra_space = 0;
2757*22dc650dSSadaf Ebrahimi 	}
2758*22dc650dSSadaf Ebrahimi 
2759*22dc650dSSadaf Ebrahimi 	SLJIT_ASSERT(reg_map[TMP_REG1] == 12);
2760*22dc650dSSadaf Ebrahimi 
2761*22dc650dSSadaf Ebrahimi 	/* Process arguments in reversed direction. */
2762*22dc650dSSadaf Ebrahimi 	while (types) {
2763*22dc650dSSadaf Ebrahimi 		switch (types & SLJIT_ARG_MASK) {
2764*22dc650dSSadaf Ebrahimi 		case SLJIT_ARG_TYPE_F64:
2765*22dc650dSSadaf Ebrahimi 			float_arg_count--;
2766*22dc650dSSadaf Ebrahimi 			offset = *(--offset_ptr);
2767*22dc650dSSadaf Ebrahimi 
2768*22dc650dSSadaf Ebrahimi 			SLJIT_ASSERT((offset & 0x7) == 0);
2769*22dc650dSSadaf Ebrahimi 
2770*22dc650dSSadaf Ebrahimi 			if (offset < 4 * sizeof(sljit_sw)) {
2771*22dc650dSSadaf Ebrahimi 				if (src_offset == offset || src_offset == offset + sizeof(sljit_sw)) {
2772*22dc650dSSadaf Ebrahimi 					FAIL_IF(push_inst16(compiler, MOV | (src_offset << 1) | 4 | (1 << 7)));
2773*22dc650dSSadaf Ebrahimi 					*src = TMP_REG1;
2774*22dc650dSSadaf Ebrahimi 				}
2775*22dc650dSSadaf Ebrahimi 				FAIL_IF(push_inst32(compiler, VMOV2 | 0x100000 | (offset << 10) | ((offset + sizeof(sljit_sw)) << 14) | float_arg_count));
2776*22dc650dSSadaf Ebrahimi 			} else
2777*22dc650dSSadaf Ebrahimi 				FAIL_IF(push_inst32(compiler, VSTR_F32 | 0x800100 | RN4(SLJIT_SP)
2778*22dc650dSSadaf Ebrahimi 						| (float_arg_count << 12) | ((offset - 4 * sizeof(sljit_sw)) >> 2)));
2779*22dc650dSSadaf Ebrahimi 			break;
2780*22dc650dSSadaf Ebrahimi 		case SLJIT_ARG_TYPE_F32:
2781*22dc650dSSadaf Ebrahimi 			float_arg_count--;
2782*22dc650dSSadaf Ebrahimi 			offset = *(--offset_ptr);
2783*22dc650dSSadaf Ebrahimi 
2784*22dc650dSSadaf Ebrahimi 			if (offset < 4 * sizeof(sljit_sw)) {
2785*22dc650dSSadaf Ebrahimi 				if (src_offset == offset) {
2786*22dc650dSSadaf Ebrahimi 					FAIL_IF(push_inst16(compiler, MOV | (src_offset << 1) | 4 | (1 << 7)));
2787*22dc650dSSadaf Ebrahimi 					*src = TMP_REG1;
2788*22dc650dSSadaf Ebrahimi 				}
2789*22dc650dSSadaf Ebrahimi 				FAIL_IF(push_inst32(compiler, VMOV | 0x100000 | (float_arg_count << 16) | (offset << 10)));
2790*22dc650dSSadaf Ebrahimi 			} else
2791*22dc650dSSadaf Ebrahimi 				FAIL_IF(push_inst32(compiler, VSTR_F32 | 0x800000 | RN4(SLJIT_SP)
2792*22dc650dSSadaf Ebrahimi 						| (float_arg_count << 12) | ((offset - 4 * sizeof(sljit_sw)) >> 2)));
2793*22dc650dSSadaf Ebrahimi 			break;
2794*22dc650dSSadaf Ebrahimi 		default:
2795*22dc650dSSadaf Ebrahimi 			word_arg_offset -= sizeof(sljit_sw);
2796*22dc650dSSadaf Ebrahimi 			offset = *(--offset_ptr);
2797*22dc650dSSadaf Ebrahimi 
2798*22dc650dSSadaf Ebrahimi 			SLJIT_ASSERT(offset >= word_arg_offset);
2799*22dc650dSSadaf Ebrahimi 
2800*22dc650dSSadaf Ebrahimi 			if (offset != word_arg_offset) {
2801*22dc650dSSadaf Ebrahimi 				if (offset < 4 * sizeof(sljit_sw)) {
2802*22dc650dSSadaf Ebrahimi 					if (src_offset == offset) {
2803*22dc650dSSadaf Ebrahimi 						FAIL_IF(push_inst16(compiler, MOV | (src_offset << 1) | 4 | (1 << 7)));
2804*22dc650dSSadaf Ebrahimi 						*src = TMP_REG1;
2805*22dc650dSSadaf Ebrahimi 					}
2806*22dc650dSSadaf Ebrahimi 					else if (src_offset == word_arg_offset) {
2807*22dc650dSSadaf Ebrahimi 						*src = (sljit_s32)(1 + (offset >> 2));
2808*22dc650dSSadaf Ebrahimi 						src_offset = offset;
2809*22dc650dSSadaf Ebrahimi 					}
2810*22dc650dSSadaf Ebrahimi 					FAIL_IF(push_inst16(compiler, MOV | (offset >> 2) | (word_arg_offset << 1)));
2811*22dc650dSSadaf Ebrahimi 				} else
2812*22dc650dSSadaf Ebrahimi 					FAIL_IF(push_inst16(compiler, STR_SP | (word_arg_offset << 6) | ((offset - 4 * sizeof(sljit_sw)) >> 2)));
2813*22dc650dSSadaf Ebrahimi 			}
2814*22dc650dSSadaf Ebrahimi 			break;
2815*22dc650dSSadaf Ebrahimi 		}
2816*22dc650dSSadaf Ebrahimi 
2817*22dc650dSSadaf Ebrahimi 		types >>= SLJIT_ARG_SHIFT;
2818*22dc650dSSadaf Ebrahimi 	}
2819*22dc650dSSadaf Ebrahimi 
2820*22dc650dSSadaf Ebrahimi 	return SLJIT_SUCCESS;
2821*22dc650dSSadaf Ebrahimi }
2822*22dc650dSSadaf Ebrahimi 
softfloat_post_call_with_args(struct sljit_compiler * compiler,sljit_s32 arg_types)2823*22dc650dSSadaf Ebrahimi static sljit_s32 softfloat_post_call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types)
2824*22dc650dSSadaf Ebrahimi {
2825*22dc650dSSadaf Ebrahimi 	if ((arg_types & SLJIT_ARG_MASK) == SLJIT_ARG_TYPE_F64)
2826*22dc650dSSadaf Ebrahimi 		FAIL_IF(push_inst32(compiler, VMOV2 | (1 << 16) | (0 << 12) | 0));
2827*22dc650dSSadaf Ebrahimi 	if ((arg_types & SLJIT_ARG_MASK) == SLJIT_ARG_TYPE_F32)
2828*22dc650dSSadaf Ebrahimi 		FAIL_IF(push_inst32(compiler, VMOV | (0 << 16) | (0 << 12)));
2829*22dc650dSSadaf Ebrahimi 
2830*22dc650dSSadaf Ebrahimi 	return SLJIT_SUCCESS;
2831*22dc650dSSadaf Ebrahimi }
2832*22dc650dSSadaf Ebrahimi 
2833*22dc650dSSadaf Ebrahimi #else
2834*22dc650dSSadaf Ebrahimi 
hardfloat_call_with_args(struct sljit_compiler * compiler,sljit_s32 arg_types)2835*22dc650dSSadaf Ebrahimi static sljit_s32 hardfloat_call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types)
2836*22dc650dSSadaf Ebrahimi {
2837*22dc650dSSadaf Ebrahimi 	sljit_u32 offset = SLJIT_FR0;
2838*22dc650dSSadaf Ebrahimi 	sljit_u32 new_offset = SLJIT_FR0;
2839*22dc650dSSadaf Ebrahimi 	sljit_u32 f32_offset = 0;
2840*22dc650dSSadaf Ebrahimi 
2841*22dc650dSSadaf Ebrahimi 	/* Remove return value. */
2842*22dc650dSSadaf Ebrahimi 	arg_types >>= SLJIT_ARG_SHIFT;
2843*22dc650dSSadaf Ebrahimi 
2844*22dc650dSSadaf Ebrahimi 	while (arg_types) {
2845*22dc650dSSadaf Ebrahimi 		switch (arg_types & SLJIT_ARG_MASK) {
2846*22dc650dSSadaf Ebrahimi 		case SLJIT_ARG_TYPE_F64:
2847*22dc650dSSadaf Ebrahimi 			if (offset != new_offset)
2848*22dc650dSSadaf Ebrahimi 				FAIL_IF(push_inst32(compiler, VMOV_F32 | SLJIT_32 | VD4(new_offset) | VM4(offset)));
2849*22dc650dSSadaf Ebrahimi 
2850*22dc650dSSadaf Ebrahimi 			new_offset++;
2851*22dc650dSSadaf Ebrahimi 			offset++;
2852*22dc650dSSadaf Ebrahimi 			break;
2853*22dc650dSSadaf Ebrahimi 		case SLJIT_ARG_TYPE_F32:
2854*22dc650dSSadaf Ebrahimi 			if (f32_offset != 0) {
2855*22dc650dSSadaf Ebrahimi 				FAIL_IF(push_inst32(compiler, VMOV_F32 | 0x400000 | VD4(f32_offset) | VM4(offset)));
2856*22dc650dSSadaf Ebrahimi 				f32_offset = 0;
2857*22dc650dSSadaf Ebrahimi 			} else {
2858*22dc650dSSadaf Ebrahimi 				if (offset != new_offset)
2859*22dc650dSSadaf Ebrahimi 					FAIL_IF(push_inst32(compiler, VMOV_F32 | 0x400000 | VD4(new_offset) | VM4(offset)));
2860*22dc650dSSadaf Ebrahimi 				f32_offset = new_offset;
2861*22dc650dSSadaf Ebrahimi 				new_offset++;
2862*22dc650dSSadaf Ebrahimi 			}
2863*22dc650dSSadaf Ebrahimi 			offset++;
2864*22dc650dSSadaf Ebrahimi 			break;
2865*22dc650dSSadaf Ebrahimi 		}
2866*22dc650dSSadaf Ebrahimi 		arg_types >>= SLJIT_ARG_SHIFT;
2867*22dc650dSSadaf Ebrahimi 	}
2868*22dc650dSSadaf Ebrahimi 
2869*22dc650dSSadaf Ebrahimi 	return SLJIT_SUCCESS;
2870*22dc650dSSadaf Ebrahimi }
2871*22dc650dSSadaf Ebrahimi 
2872*22dc650dSSadaf Ebrahimi #endif
2873*22dc650dSSadaf Ebrahimi 
sljit_emit_call(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 arg_types)2874*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type,
2875*22dc650dSSadaf Ebrahimi 	sljit_s32 arg_types)
2876*22dc650dSSadaf Ebrahimi {
2877*22dc650dSSadaf Ebrahimi #ifdef __SOFTFP__
2878*22dc650dSSadaf Ebrahimi 	struct sljit_jump *jump;
2879*22dc650dSSadaf Ebrahimi 	sljit_u32 extra_space = (sljit_u32)type;
2880*22dc650dSSadaf Ebrahimi #endif
2881*22dc650dSSadaf Ebrahimi 
2882*22dc650dSSadaf Ebrahimi 	CHECK_ERROR_PTR();
2883*22dc650dSSadaf Ebrahimi 	CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types));
2884*22dc650dSSadaf Ebrahimi 
2885*22dc650dSSadaf Ebrahimi #ifdef __SOFTFP__
2886*22dc650dSSadaf Ebrahimi 	if ((type & 0xff) != SLJIT_CALL_REG_ARG) {
2887*22dc650dSSadaf Ebrahimi 		PTR_FAIL_IF(softfloat_call_with_args(compiler, arg_types, NULL, &extra_space));
2888*22dc650dSSadaf Ebrahimi 		SLJIT_ASSERT((extra_space & 0x7) == 0);
2889*22dc650dSSadaf Ebrahimi 
2890*22dc650dSSadaf Ebrahimi 		if ((type & SLJIT_CALL_RETURN) && extra_space == 0)
2891*22dc650dSSadaf Ebrahimi 			type = SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP);
2892*22dc650dSSadaf Ebrahimi 
2893*22dc650dSSadaf Ebrahimi 		SLJIT_SKIP_CHECKS(compiler);
2894*22dc650dSSadaf Ebrahimi 		jump = sljit_emit_jump(compiler, type);
2895*22dc650dSSadaf Ebrahimi 		PTR_FAIL_IF(jump == NULL);
2896*22dc650dSSadaf Ebrahimi 
2897*22dc650dSSadaf Ebrahimi 		if (extra_space > 0) {
2898*22dc650dSSadaf Ebrahimi 			if (type & SLJIT_CALL_RETURN)
2899*22dc650dSSadaf Ebrahimi 				PTR_FAIL_IF(push_inst32(compiler, LDR | RT4(TMP_REG2)
2900*22dc650dSSadaf Ebrahimi 					| RN4(SLJIT_SP) | (extra_space - sizeof(sljit_sw))));
2901*22dc650dSSadaf Ebrahimi 
2902*22dc650dSSadaf Ebrahimi 			PTR_FAIL_IF(push_inst16(compiler, ADD_SP_I | (extra_space >> 2)));
2903*22dc650dSSadaf Ebrahimi 
2904*22dc650dSSadaf Ebrahimi 			if (type & SLJIT_CALL_RETURN) {
2905*22dc650dSSadaf Ebrahimi 				PTR_FAIL_IF(push_inst16(compiler, BX | RN3(TMP_REG2)));
2906*22dc650dSSadaf Ebrahimi 				return jump;
2907*22dc650dSSadaf Ebrahimi 			}
2908*22dc650dSSadaf Ebrahimi 		}
2909*22dc650dSSadaf Ebrahimi 
2910*22dc650dSSadaf Ebrahimi 		SLJIT_ASSERT(!(type & SLJIT_CALL_RETURN));
2911*22dc650dSSadaf Ebrahimi 		PTR_FAIL_IF(softfloat_post_call_with_args(compiler, arg_types));
2912*22dc650dSSadaf Ebrahimi 		return jump;
2913*22dc650dSSadaf Ebrahimi 	}
2914*22dc650dSSadaf Ebrahimi #endif /* __SOFTFP__ */
2915*22dc650dSSadaf Ebrahimi 
2916*22dc650dSSadaf Ebrahimi 	if (type & SLJIT_CALL_RETURN) {
2917*22dc650dSSadaf Ebrahimi 		/* ldmia sp!, {..., lr} */
2918*22dc650dSSadaf Ebrahimi 		PTR_FAIL_IF(emit_stack_frame_release(compiler, -1));
2919*22dc650dSSadaf Ebrahimi 		type = SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP);
2920*22dc650dSSadaf Ebrahimi 	}
2921*22dc650dSSadaf Ebrahimi 
2922*22dc650dSSadaf Ebrahimi #ifndef __SOFTFP__
2923*22dc650dSSadaf Ebrahimi 	if ((type & 0xff) != SLJIT_CALL_REG_ARG)
2924*22dc650dSSadaf Ebrahimi 		PTR_FAIL_IF(hardfloat_call_with_args(compiler, arg_types));
2925*22dc650dSSadaf Ebrahimi #endif /* !__SOFTFP__ */
2926*22dc650dSSadaf Ebrahimi 
2927*22dc650dSSadaf Ebrahimi 	SLJIT_SKIP_CHECKS(compiler);
2928*22dc650dSSadaf Ebrahimi 	return sljit_emit_jump(compiler, type);
2929*22dc650dSSadaf Ebrahimi }
2930*22dc650dSSadaf Ebrahimi 
sljit_emit_ijump(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 src,sljit_sw srcw)2931*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw)
2932*22dc650dSSadaf Ebrahimi {
2933*22dc650dSSadaf Ebrahimi 	struct sljit_jump *jump;
2934*22dc650dSSadaf Ebrahimi 
2935*22dc650dSSadaf Ebrahimi 	CHECK_ERROR();
2936*22dc650dSSadaf Ebrahimi 	CHECK(check_sljit_emit_ijump(compiler, type, src, srcw));
2937*22dc650dSSadaf Ebrahimi 	ADJUST_LOCAL_OFFSET(src, srcw);
2938*22dc650dSSadaf Ebrahimi 
2939*22dc650dSSadaf Ebrahimi 	SLJIT_ASSERT(reg_map[TMP_REG1] != 14);
2940*22dc650dSSadaf Ebrahimi 
2941*22dc650dSSadaf Ebrahimi 	if (src != SLJIT_IMM) {
2942*22dc650dSSadaf Ebrahimi 		if (FAST_IS_REG(src)) {
2943*22dc650dSSadaf Ebrahimi 			SLJIT_ASSERT(reg_map[src] != 14);
2944*22dc650dSSadaf Ebrahimi 			return push_inst16(compiler, (type <= SLJIT_JUMP ? BX : BLX) | RN3(src));
2945*22dc650dSSadaf Ebrahimi 		}
2946*22dc650dSSadaf Ebrahimi 
2947*22dc650dSSadaf Ebrahimi 		FAIL_IF(emit_op_mem(compiler, WORD_SIZE, type <= SLJIT_JUMP ? TMP_PC : TMP_REG1, src, srcw, TMP_REG1));
2948*22dc650dSSadaf Ebrahimi 		if (type >= SLJIT_FAST_CALL)
2949*22dc650dSSadaf Ebrahimi 			return push_inst16(compiler, BLX | RN3(TMP_REG1));
2950*22dc650dSSadaf Ebrahimi 	}
2951*22dc650dSSadaf Ebrahimi 
2952*22dc650dSSadaf Ebrahimi 	/* These jumps are converted to jump/call instructions when possible. */
2953*22dc650dSSadaf Ebrahimi 	jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
2954*22dc650dSSadaf Ebrahimi 	FAIL_IF(!jump);
2955*22dc650dSSadaf Ebrahimi 	set_jump(jump, compiler, JUMP_ADDR | ((type >= SLJIT_FAST_CALL) ? IS_BL : 0));
2956*22dc650dSSadaf Ebrahimi 	jump->u.target = (sljit_uw)srcw;
2957*22dc650dSSadaf Ebrahimi 
2958*22dc650dSSadaf Ebrahimi 	jump->addr = compiler->size;
2959*22dc650dSSadaf Ebrahimi 	/* Maximum number of instructions required for generating a constant. */
2960*22dc650dSSadaf Ebrahimi 	compiler->size += JUMP_MAX_SIZE - 1;
2961*22dc650dSSadaf Ebrahimi 	return push_inst16(compiler, (type <= SLJIT_JUMP ? BX : BLX) | RN3(TMP_REG1));
2962*22dc650dSSadaf Ebrahimi }
2963*22dc650dSSadaf Ebrahimi 
sljit_emit_icall(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 arg_types,sljit_s32 src,sljit_sw srcw)2964*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type,
2965*22dc650dSSadaf Ebrahimi 	sljit_s32 arg_types,
2966*22dc650dSSadaf Ebrahimi 	sljit_s32 src, sljit_sw srcw)
2967*22dc650dSSadaf Ebrahimi {
2968*22dc650dSSadaf Ebrahimi #ifdef __SOFTFP__
2969*22dc650dSSadaf Ebrahimi 	sljit_u32 extra_space = (sljit_u32)type;
2970*22dc650dSSadaf Ebrahimi #endif
2971*22dc650dSSadaf Ebrahimi 
2972*22dc650dSSadaf Ebrahimi 	CHECK_ERROR();
2973*22dc650dSSadaf Ebrahimi 	CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw));
2974*22dc650dSSadaf Ebrahimi 
2975*22dc650dSSadaf Ebrahimi 	if (src & SLJIT_MEM) {
2976*22dc650dSSadaf Ebrahimi 		FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG1, src, srcw, TMP_REG1));
2977*22dc650dSSadaf Ebrahimi 		src = TMP_REG1;
2978*22dc650dSSadaf Ebrahimi 	}
2979*22dc650dSSadaf Ebrahimi 
2980*22dc650dSSadaf Ebrahimi 	if ((type & SLJIT_CALL_RETURN) && (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options)))) {
2981*22dc650dSSadaf Ebrahimi 		FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(TMP_REG1, src)));
2982*22dc650dSSadaf Ebrahimi 		src = TMP_REG1;
2983*22dc650dSSadaf Ebrahimi 	}
2984*22dc650dSSadaf Ebrahimi 
2985*22dc650dSSadaf Ebrahimi #ifdef __SOFTFP__
2986*22dc650dSSadaf Ebrahimi 	if ((type & 0xff) != SLJIT_CALL_REG_ARG) {
2987*22dc650dSSadaf Ebrahimi 		FAIL_IF(softfloat_call_with_args(compiler, arg_types, &src, &extra_space));
2988*22dc650dSSadaf Ebrahimi 		SLJIT_ASSERT((extra_space & 0x7) == 0);
2989*22dc650dSSadaf Ebrahimi 
2990*22dc650dSSadaf Ebrahimi 		if ((type & SLJIT_CALL_RETURN) && extra_space == 0)
2991*22dc650dSSadaf Ebrahimi 			type = SLJIT_JUMP;
2992*22dc650dSSadaf Ebrahimi 
2993*22dc650dSSadaf Ebrahimi 		SLJIT_SKIP_CHECKS(compiler);
2994*22dc650dSSadaf Ebrahimi 		FAIL_IF(sljit_emit_ijump(compiler, type, src, srcw));
2995*22dc650dSSadaf Ebrahimi 
2996*22dc650dSSadaf Ebrahimi 		if (extra_space > 0) {
2997*22dc650dSSadaf Ebrahimi 			if (type & SLJIT_CALL_RETURN)
2998*22dc650dSSadaf Ebrahimi 				FAIL_IF(push_inst32(compiler, LDR | RT4(TMP_REG2)
2999*22dc650dSSadaf Ebrahimi 					| RN4(SLJIT_SP) | (extra_space - sizeof(sljit_sw))));
3000*22dc650dSSadaf Ebrahimi 
3001*22dc650dSSadaf Ebrahimi 			FAIL_IF(push_inst16(compiler, ADD_SP_I | (extra_space >> 2)));
3002*22dc650dSSadaf Ebrahimi 
3003*22dc650dSSadaf Ebrahimi 			if (type & SLJIT_CALL_RETURN)
3004*22dc650dSSadaf Ebrahimi 				return push_inst16(compiler, BX | RN3(TMP_REG2));
3005*22dc650dSSadaf Ebrahimi 		}
3006*22dc650dSSadaf Ebrahimi 
3007*22dc650dSSadaf Ebrahimi 		SLJIT_ASSERT(!(type & SLJIT_CALL_RETURN));
3008*22dc650dSSadaf Ebrahimi 		return softfloat_post_call_with_args(compiler, arg_types);
3009*22dc650dSSadaf Ebrahimi 	}
3010*22dc650dSSadaf Ebrahimi #endif /* __SOFTFP__ */
3011*22dc650dSSadaf Ebrahimi 
3012*22dc650dSSadaf Ebrahimi 	if (type & SLJIT_CALL_RETURN) {
3013*22dc650dSSadaf Ebrahimi 		/* ldmia sp!, {..., lr} */
3014*22dc650dSSadaf Ebrahimi 		FAIL_IF(emit_stack_frame_release(compiler, -1));
3015*22dc650dSSadaf Ebrahimi 		type = SLJIT_JUMP;
3016*22dc650dSSadaf Ebrahimi 	}
3017*22dc650dSSadaf Ebrahimi 
3018*22dc650dSSadaf Ebrahimi #ifndef __SOFTFP__
3019*22dc650dSSadaf Ebrahimi 	if ((type & 0xff) != SLJIT_CALL_REG_ARG)
3020*22dc650dSSadaf Ebrahimi 		FAIL_IF(hardfloat_call_with_args(compiler, arg_types));
3021*22dc650dSSadaf Ebrahimi #endif /* !__SOFTFP__ */
3022*22dc650dSSadaf Ebrahimi 
3023*22dc650dSSadaf Ebrahimi 	SLJIT_SKIP_CHECKS(compiler);
3024*22dc650dSSadaf Ebrahimi 	return sljit_emit_ijump(compiler, type, src, srcw);
3025*22dc650dSSadaf Ebrahimi }
3026*22dc650dSSadaf Ebrahimi 
3027*22dc650dSSadaf Ebrahimi #ifdef __SOFTFP__
3028*22dc650dSSadaf Ebrahimi 
emit_fmov_before_return(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 src,sljit_sw srcw)3029*22dc650dSSadaf Ebrahimi static SLJIT_INLINE sljit_s32 emit_fmov_before_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw)
3030*22dc650dSSadaf Ebrahimi {
3031*22dc650dSSadaf Ebrahimi 	if (compiler->options & SLJIT_ENTER_REG_ARG) {
3032*22dc650dSSadaf Ebrahimi 		if (src == SLJIT_FR0)
3033*22dc650dSSadaf Ebrahimi 			return SLJIT_SUCCESS;
3034*22dc650dSSadaf Ebrahimi 
3035*22dc650dSSadaf Ebrahimi 		SLJIT_SKIP_CHECKS(compiler);
3036*22dc650dSSadaf Ebrahimi 		return sljit_emit_fop1(compiler, op, SLJIT_RETURN_FREG, 0, src, srcw);
3037*22dc650dSSadaf Ebrahimi 	}
3038*22dc650dSSadaf Ebrahimi 
3039*22dc650dSSadaf Ebrahimi 	if (FAST_IS_REG(src)) {
3040*22dc650dSSadaf Ebrahimi 		if (op & SLJIT_32)
3041*22dc650dSSadaf Ebrahimi 			return push_inst32(compiler, VMOV | (1 << 20) | VN4(src) | RT4(SLJIT_R0));
3042*22dc650dSSadaf Ebrahimi 		return push_inst32(compiler, VMOV2 | (1 << 20) | VM4(src) | RT4(SLJIT_R0) | RN4(SLJIT_R1));
3043*22dc650dSSadaf Ebrahimi 	}
3044*22dc650dSSadaf Ebrahimi 
3045*22dc650dSSadaf Ebrahimi 	SLJIT_SKIP_CHECKS(compiler);
3046*22dc650dSSadaf Ebrahimi 
3047*22dc650dSSadaf Ebrahimi 	if (op & SLJIT_32)
3048*22dc650dSSadaf Ebrahimi 		return sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, src, srcw);
3049*22dc650dSSadaf Ebrahimi 	return sljit_emit_mem(compiler, SLJIT_MOV, SLJIT_REG_PAIR(SLJIT_R0, SLJIT_R1), src, srcw);
3050*22dc650dSSadaf Ebrahimi }
3051*22dc650dSSadaf Ebrahimi 
3052*22dc650dSSadaf Ebrahimi #endif /* __SOFTFP__ */
3053*22dc650dSSadaf Ebrahimi 
sljit_emit_op_flags(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 type)3054*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op,
3055*22dc650dSSadaf Ebrahimi 	sljit_s32 dst, sljit_sw dstw,
3056*22dc650dSSadaf Ebrahimi 	sljit_s32 type)
3057*22dc650dSSadaf Ebrahimi {
3058*22dc650dSSadaf Ebrahimi 	sljit_s32 dst_r, flags = GET_ALL_FLAGS(op);
3059*22dc650dSSadaf Ebrahimi 	sljit_ins cc;
3060*22dc650dSSadaf Ebrahimi 
3061*22dc650dSSadaf Ebrahimi 	CHECK_ERROR();
3062*22dc650dSSadaf Ebrahimi 	CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type));
3063*22dc650dSSadaf Ebrahimi 	ADJUST_LOCAL_OFFSET(dst, dstw);
3064*22dc650dSSadaf Ebrahimi 
3065*22dc650dSSadaf Ebrahimi 	op = GET_OPCODE(op);
3066*22dc650dSSadaf Ebrahimi 	cc = get_cc(compiler, type);
3067*22dc650dSSadaf Ebrahimi 	dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
3068*22dc650dSSadaf Ebrahimi 
3069*22dc650dSSadaf Ebrahimi 	if (op < SLJIT_ADD) {
3070*22dc650dSSadaf Ebrahimi 		FAIL_IF(push_inst16(compiler, IT | (cc << 4) | (((cc & 0x1) ^ 0x1) << 3) | 0x4));
3071*22dc650dSSadaf Ebrahimi 		if (reg_map[dst_r] > 7) {
3072*22dc650dSSadaf Ebrahimi 			FAIL_IF(push_inst32(compiler, MOV_WI | RD4(dst_r) | 1));
3073*22dc650dSSadaf Ebrahimi 			FAIL_IF(push_inst32(compiler, MOV_WI | RD4(dst_r) | 0));
3074*22dc650dSSadaf Ebrahimi 		} else {
3075*22dc650dSSadaf Ebrahimi 			/* The movsi (immediate) instruction does not set flags in IT block. */
3076*22dc650dSSadaf Ebrahimi 			FAIL_IF(push_inst16(compiler, MOVSI | RDN3(dst_r) | 1));
3077*22dc650dSSadaf Ebrahimi 			FAIL_IF(push_inst16(compiler, MOVSI | RDN3(dst_r) | 0));
3078*22dc650dSSadaf Ebrahimi 		}
3079*22dc650dSSadaf Ebrahimi 		if (!(dst & SLJIT_MEM))
3080*22dc650dSSadaf Ebrahimi 			return SLJIT_SUCCESS;
3081*22dc650dSSadaf Ebrahimi 		return emit_op_mem(compiler, WORD_SIZE | STORE, TMP_REG1, dst, dstw, TMP_REG2);
3082*22dc650dSSadaf Ebrahimi 	}
3083*22dc650dSSadaf Ebrahimi 
3084*22dc650dSSadaf Ebrahimi 	if (dst & SLJIT_MEM)
3085*22dc650dSSadaf Ebrahimi 		FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG1, dst, dstw, TMP_REG2));
3086*22dc650dSSadaf Ebrahimi 
3087*22dc650dSSadaf Ebrahimi 	if (op == SLJIT_AND) {
3088*22dc650dSSadaf Ebrahimi 		FAIL_IF(push_inst16(compiler, IT | (cc << 4) | (((cc & 0x1) ^ 0x1) << 3) | 0x4));
3089*22dc650dSSadaf Ebrahimi 		FAIL_IF(push_inst32(compiler, ANDI | RN4(dst_r) | RD4(dst_r) | 1));
3090*22dc650dSSadaf Ebrahimi 		FAIL_IF(push_inst32(compiler, ANDI | RN4(dst_r) | RD4(dst_r) | 0));
3091*22dc650dSSadaf Ebrahimi 	}
3092*22dc650dSSadaf Ebrahimi 	else {
3093*22dc650dSSadaf Ebrahimi 		FAIL_IF(push_inst16(compiler, IT | (cc << 4) | 0x8));
3094*22dc650dSSadaf Ebrahimi 		FAIL_IF(push_inst32(compiler, ((op == SLJIT_OR) ? ORRI : EORI) | RN4(dst_r) | RD4(dst_r) | 1));
3095*22dc650dSSadaf Ebrahimi 	}
3096*22dc650dSSadaf Ebrahimi 
3097*22dc650dSSadaf Ebrahimi 	if (dst & SLJIT_MEM)
3098*22dc650dSSadaf Ebrahimi 		FAIL_IF(emit_op_mem(compiler, WORD_SIZE | STORE, TMP_REG1, dst, dstw, TMP_REG2));
3099*22dc650dSSadaf Ebrahimi 
3100*22dc650dSSadaf Ebrahimi 	if (!(flags & SLJIT_SET_Z))
3101*22dc650dSSadaf Ebrahimi 		return SLJIT_SUCCESS;
3102*22dc650dSSadaf Ebrahimi 
3103*22dc650dSSadaf Ebrahimi 	/* The condition must always be set, even if the ORR/EORI is not executed above. */
3104*22dc650dSSadaf Ebrahimi 	return push_inst32(compiler, MOV_W | SET_FLAGS | RD4(TMP_REG1) | RM4(dst_r));
3105*22dc650dSSadaf Ebrahimi }
3106*22dc650dSSadaf Ebrahimi 
sljit_emit_select(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 dst_reg,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2_reg)3107*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_select(struct sljit_compiler *compiler, sljit_s32 type,
3108*22dc650dSSadaf Ebrahimi 	sljit_s32 dst_reg,
3109*22dc650dSSadaf Ebrahimi 	sljit_s32 src1, sljit_sw src1w,
3110*22dc650dSSadaf Ebrahimi 	sljit_s32 src2_reg)
3111*22dc650dSSadaf Ebrahimi {
3112*22dc650dSSadaf Ebrahimi 	sljit_uw cc, tmp;
3113*22dc650dSSadaf Ebrahimi 
3114*22dc650dSSadaf Ebrahimi 	CHECK_ERROR();
3115*22dc650dSSadaf Ebrahimi 	CHECK(check_sljit_emit_select(compiler, type, dst_reg, src1, src1w, src2_reg));
3116*22dc650dSSadaf Ebrahimi 
3117*22dc650dSSadaf Ebrahimi 	ADJUST_LOCAL_OFFSET(src1, src1w);
3118*22dc650dSSadaf Ebrahimi 
3119*22dc650dSSadaf Ebrahimi 	if (src2_reg != dst_reg && src1 == dst_reg) {
3120*22dc650dSSadaf Ebrahimi 		src1 = src2_reg;
3121*22dc650dSSadaf Ebrahimi 		src1w = 0;
3122*22dc650dSSadaf Ebrahimi 		src2_reg = dst_reg;
3123*22dc650dSSadaf Ebrahimi 		type ^= 0x1;
3124*22dc650dSSadaf Ebrahimi 	}
3125*22dc650dSSadaf Ebrahimi 
3126*22dc650dSSadaf Ebrahimi 	if (src1 & SLJIT_MEM) {
3127*22dc650dSSadaf Ebrahimi 		FAIL_IF(emit_op_mem(compiler, WORD_SIZE, (src2_reg != dst_reg) ? dst_reg : TMP_REG1, src1, src1w, TMP_REG1));
3128*22dc650dSSadaf Ebrahimi 
3129*22dc650dSSadaf Ebrahimi 		if (src2_reg != dst_reg) {
3130*22dc650dSSadaf Ebrahimi 			src1 = src2_reg;
3131*22dc650dSSadaf Ebrahimi 			src1w = 0;
3132*22dc650dSSadaf Ebrahimi 			type ^= 0x1;
3133*22dc650dSSadaf Ebrahimi 		} else {
3134*22dc650dSSadaf Ebrahimi 			src1 = TMP_REG1;
3135*22dc650dSSadaf Ebrahimi 			src1w = 0;
3136*22dc650dSSadaf Ebrahimi 		}
3137*22dc650dSSadaf Ebrahimi 	} else if (dst_reg != src2_reg)
3138*22dc650dSSadaf Ebrahimi 		FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(dst_reg, src2_reg)));
3139*22dc650dSSadaf Ebrahimi 
3140*22dc650dSSadaf Ebrahimi 	cc = get_cc(compiler, type & ~SLJIT_32);
3141*22dc650dSSadaf Ebrahimi 
3142*22dc650dSSadaf Ebrahimi 	if (src1 != SLJIT_IMM) {
3143*22dc650dSSadaf Ebrahimi 		FAIL_IF(push_inst16(compiler, IT | (cc << 4) | 0x8));
3144*22dc650dSSadaf Ebrahimi 		return push_inst16(compiler, MOV | SET_REGS44(dst_reg, src1));
3145*22dc650dSSadaf Ebrahimi 	}
3146*22dc650dSSadaf Ebrahimi 
3147*22dc650dSSadaf Ebrahimi 	tmp = (sljit_uw)src1w;
3148*22dc650dSSadaf Ebrahimi 
3149*22dc650dSSadaf Ebrahimi 	if (tmp < 0x10000) {
3150*22dc650dSSadaf Ebrahimi 		/* set low 16 bits, set hi 16 bits to 0. */
3151*22dc650dSSadaf Ebrahimi 		FAIL_IF(push_inst16(compiler, IT | (cc << 4) | 0x8));
3152*22dc650dSSadaf Ebrahimi 		return push_inst32(compiler, MOVW | RD4(dst_reg)
3153*22dc650dSSadaf Ebrahimi 			| COPY_BITS(tmp, 12, 16, 4) | COPY_BITS(tmp, 11, 26, 1) | COPY_BITS(tmp, 8, 12, 3) | (tmp & 0xff));
3154*22dc650dSSadaf Ebrahimi 	}
3155*22dc650dSSadaf Ebrahimi 
3156*22dc650dSSadaf Ebrahimi 	tmp = get_imm((sljit_uw)src1w);
3157*22dc650dSSadaf Ebrahimi 	if (tmp != INVALID_IMM) {
3158*22dc650dSSadaf Ebrahimi 		FAIL_IF(push_inst16(compiler, IT | (cc << 4) | 0x8));
3159*22dc650dSSadaf Ebrahimi 		return push_inst32(compiler, MOV_WI | RD4(dst_reg) | tmp);
3160*22dc650dSSadaf Ebrahimi 	}
3161*22dc650dSSadaf Ebrahimi 
3162*22dc650dSSadaf Ebrahimi 	tmp = get_imm(~(sljit_uw)src1w);
3163*22dc650dSSadaf Ebrahimi 	if (tmp != INVALID_IMM) {
3164*22dc650dSSadaf Ebrahimi 		FAIL_IF(push_inst16(compiler, IT | (cc << 4) | 0x8));
3165*22dc650dSSadaf Ebrahimi 		return push_inst32(compiler, MVN_WI | RD4(dst_reg) | tmp);
3166*22dc650dSSadaf Ebrahimi 	}
3167*22dc650dSSadaf Ebrahimi 
3168*22dc650dSSadaf Ebrahimi 	FAIL_IF(push_inst16(compiler, IT | (cc << 4) | ((cc & 0x1) << 3) | 0x4));
3169*22dc650dSSadaf Ebrahimi 
3170*22dc650dSSadaf Ebrahimi 	tmp = (sljit_uw)src1w;
3171*22dc650dSSadaf Ebrahimi 	FAIL_IF(push_inst32(compiler, MOVW | RD4(dst_reg)
3172*22dc650dSSadaf Ebrahimi 		| COPY_BITS(tmp, 12, 16, 4) | COPY_BITS(tmp, 11, 26, 1) | COPY_BITS(tmp, 8, 12, 3) | (tmp & 0xff)));
3173*22dc650dSSadaf Ebrahimi 	return push_inst32(compiler, MOVT | RD4(dst_reg)
3174*22dc650dSSadaf Ebrahimi 		| COPY_BITS(tmp, 12 + 16, 16, 4) | COPY_BITS(tmp, 11 + 16, 26, 1) | COPY_BITS(tmp, 8 + 16, 12, 3) | ((tmp & 0xff0000) >> 16));
3175*22dc650dSSadaf Ebrahimi }
3176*22dc650dSSadaf Ebrahimi 
sljit_emit_fselect(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 dst_freg,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2_freg)3177*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fselect(struct sljit_compiler *compiler, sljit_s32 type,
3178*22dc650dSSadaf Ebrahimi 	sljit_s32 dst_freg,
3179*22dc650dSSadaf Ebrahimi 	sljit_s32 src1, sljit_sw src1w,
3180*22dc650dSSadaf Ebrahimi 	sljit_s32 src2_freg)
3181*22dc650dSSadaf Ebrahimi {
3182*22dc650dSSadaf Ebrahimi 	CHECK_ERROR();
3183*22dc650dSSadaf Ebrahimi 	CHECK(check_sljit_emit_fselect(compiler, type, dst_freg, src1, src1w, src2_freg));
3184*22dc650dSSadaf Ebrahimi 
3185*22dc650dSSadaf Ebrahimi 	ADJUST_LOCAL_OFFSET(src1, src1w);
3186*22dc650dSSadaf Ebrahimi 
3187*22dc650dSSadaf Ebrahimi 	type ^= SLJIT_32;
3188*22dc650dSSadaf Ebrahimi 
3189*22dc650dSSadaf Ebrahimi 	if (dst_freg != src2_freg) {
3190*22dc650dSSadaf Ebrahimi 		if (dst_freg == src1) {
3191*22dc650dSSadaf Ebrahimi 			src1 = src2_freg;
3192*22dc650dSSadaf Ebrahimi 			src1w = 0;
3193*22dc650dSSadaf Ebrahimi 			type ^= 0x1;
3194*22dc650dSSadaf Ebrahimi 		} else
3195*22dc650dSSadaf Ebrahimi 			FAIL_IF(push_inst32(compiler, VMOV_F32 | (type & SLJIT_32) | VD4(dst_freg) | VM4(src2_freg)));
3196*22dc650dSSadaf Ebrahimi 	}
3197*22dc650dSSadaf Ebrahimi 
3198*22dc650dSSadaf Ebrahimi 	if (src1 & SLJIT_MEM) {
3199*22dc650dSSadaf Ebrahimi 		FAIL_IF(emit_fop_mem(compiler, (type & SLJIT_32) | FPU_LOAD, TMP_FREG2, src1, src1w));
3200*22dc650dSSadaf Ebrahimi 		src1 = TMP_FREG2;
3201*22dc650dSSadaf Ebrahimi 	}
3202*22dc650dSSadaf Ebrahimi 
3203*22dc650dSSadaf Ebrahimi 	FAIL_IF(push_inst16(compiler, IT | (get_cc(compiler, type & ~SLJIT_32) << 4) | 0x8));
3204*22dc650dSSadaf Ebrahimi 	return push_inst32(compiler, VMOV_F32 | (type & SLJIT_32) | VD4(dst_freg) | VM4(src1));
3205*22dc650dSSadaf Ebrahimi }
3206*22dc650dSSadaf Ebrahimi 
sljit_emit_mem(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 reg,sljit_s32 mem,sljit_sw memw)3207*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compiler, sljit_s32 type,
3208*22dc650dSSadaf Ebrahimi 	sljit_s32 reg,
3209*22dc650dSSadaf Ebrahimi 	sljit_s32 mem, sljit_sw memw)
3210*22dc650dSSadaf Ebrahimi {
3211*22dc650dSSadaf Ebrahimi 	sljit_s32 flags;
3212*22dc650dSSadaf Ebrahimi 	sljit_uw imm, tmp;
3213*22dc650dSSadaf Ebrahimi 
3214*22dc650dSSadaf Ebrahimi 	CHECK_ERROR();
3215*22dc650dSSadaf Ebrahimi 	CHECK(check_sljit_emit_mem(compiler, type, reg, mem, memw));
3216*22dc650dSSadaf Ebrahimi 
3217*22dc650dSSadaf Ebrahimi 	if (!(reg & REG_PAIR_MASK))
3218*22dc650dSSadaf Ebrahimi 		return sljit_emit_mem_unaligned(compiler, type, reg, mem, memw);
3219*22dc650dSSadaf Ebrahimi 
3220*22dc650dSSadaf Ebrahimi 	if (type & (SLJIT_MEM_UNALIGNED | SLJIT_MEM_ALIGNED_16 | SLJIT_MEM_ALIGNED_32)) {
3221*22dc650dSSadaf Ebrahimi 		if ((mem & REG_MASK) == 0) {
3222*22dc650dSSadaf Ebrahimi 			if ((memw & 0xfff) >= (0x1000 - SSIZE_OF(sw))) {
3223*22dc650dSSadaf Ebrahimi 				imm = get_imm((sljit_uw)((memw + 0x1000) & ~0xfff));
3224*22dc650dSSadaf Ebrahimi 
3225*22dc650dSSadaf Ebrahimi 				if (imm != INVALID_IMM)
3226*22dc650dSSadaf Ebrahimi 					memw = (memw & 0xfff) - 0x1000;
3227*22dc650dSSadaf Ebrahimi 			} else {
3228*22dc650dSSadaf Ebrahimi 				imm = get_imm((sljit_uw)(memw & ~0xfff));
3229*22dc650dSSadaf Ebrahimi 
3230*22dc650dSSadaf Ebrahimi 				if (imm != INVALID_IMM)
3231*22dc650dSSadaf Ebrahimi 					memw &= 0xfff;
3232*22dc650dSSadaf Ebrahimi 			}
3233*22dc650dSSadaf Ebrahimi 
3234*22dc650dSSadaf Ebrahimi 			if (imm == INVALID_IMM) {
3235*22dc650dSSadaf Ebrahimi 				FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)memw));
3236*22dc650dSSadaf Ebrahimi 				memw = 0;
3237*22dc650dSSadaf Ebrahimi 			} else
3238*22dc650dSSadaf Ebrahimi 				FAIL_IF(push_inst32(compiler, MOV_WI | RD4(TMP_REG1) | imm));
3239*22dc650dSSadaf Ebrahimi 
3240*22dc650dSSadaf Ebrahimi 			mem = SLJIT_MEM1(TMP_REG1);
3241*22dc650dSSadaf Ebrahimi 		} else if (mem & OFFS_REG_MASK) {
3242*22dc650dSSadaf Ebrahimi 			FAIL_IF(push_inst32(compiler, ADD_W | RD4(TMP_REG1) | RN4(mem & REG_MASK) | RM4(OFFS_REG(mem)) | ((sljit_uw)(memw & 0x3) << 6)));
3243*22dc650dSSadaf Ebrahimi 			memw = 0;
3244*22dc650dSSadaf Ebrahimi 			mem = SLJIT_MEM1(TMP_REG1);
3245*22dc650dSSadaf Ebrahimi 		} else if (memw < -0xff) {
3246*22dc650dSSadaf Ebrahimi 			/* Zero value can be included in the first case. */
3247*22dc650dSSadaf Ebrahimi 			if ((-memw & 0xfff) <= SSIZE_OF(sw))
3248*22dc650dSSadaf Ebrahimi 				tmp = (sljit_uw)((-memw + 0x7ff) & ~0x7ff);
3249*22dc650dSSadaf Ebrahimi 			else
3250*22dc650dSSadaf Ebrahimi 				tmp = (sljit_uw)((-memw + 0xfff) & ~0xfff);
3251*22dc650dSSadaf Ebrahimi 
3252*22dc650dSSadaf Ebrahimi 			SLJIT_ASSERT(tmp >= (sljit_uw)-memw);
3253*22dc650dSSadaf Ebrahimi 			imm = get_imm(tmp);
3254*22dc650dSSadaf Ebrahimi 
3255*22dc650dSSadaf Ebrahimi 			if (imm != INVALID_IMM) {
3256*22dc650dSSadaf Ebrahimi 				FAIL_IF(push_inst32(compiler, SUB_WI | RD4(TMP_REG1) | RN4(mem & REG_MASK) | imm));
3257*22dc650dSSadaf Ebrahimi 				memw += (sljit_sw)tmp;
3258*22dc650dSSadaf Ebrahimi 				SLJIT_ASSERT(memw >= 0 && memw <= 0xfff - SSIZE_OF(sw));
3259*22dc650dSSadaf Ebrahimi 			} else {
3260*22dc650dSSadaf Ebrahimi 				FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)memw));
3261*22dc650dSSadaf Ebrahimi 				FAIL_IF(push_inst16(compiler, ADD | SET_REGS44(TMP_REG1, mem & REG_MASK)));
3262*22dc650dSSadaf Ebrahimi 				memw = 0;
3263*22dc650dSSadaf Ebrahimi 			}
3264*22dc650dSSadaf Ebrahimi 
3265*22dc650dSSadaf Ebrahimi 			mem = SLJIT_MEM1(TMP_REG1);
3266*22dc650dSSadaf Ebrahimi 		} else if (memw >= (0x1000 - SSIZE_OF(sw))) {
3267*22dc650dSSadaf Ebrahimi 			if ((memw & 0xfff) >= (0x1000 - SSIZE_OF(sw))) {
3268*22dc650dSSadaf Ebrahimi 				imm = get_imm((sljit_uw)((memw + 0x1000) & ~0xfff));
3269*22dc650dSSadaf Ebrahimi 
3270*22dc650dSSadaf Ebrahimi 				if (imm != INVALID_IMM)
3271*22dc650dSSadaf Ebrahimi 					memw = (memw & 0xfff) - 0x1000;
3272*22dc650dSSadaf Ebrahimi 			} else {
3273*22dc650dSSadaf Ebrahimi 				imm = get_imm((sljit_uw)(memw & ~0xfff));
3274*22dc650dSSadaf Ebrahimi 
3275*22dc650dSSadaf Ebrahimi 				if (imm != INVALID_IMM)
3276*22dc650dSSadaf Ebrahimi 					memw &= 0xfff;
3277*22dc650dSSadaf Ebrahimi 			}
3278*22dc650dSSadaf Ebrahimi 
3279*22dc650dSSadaf Ebrahimi 			if (imm != INVALID_IMM) {
3280*22dc650dSSadaf Ebrahimi 				SLJIT_ASSERT(memw >= -0xff && memw <= 0xfff);
3281*22dc650dSSadaf Ebrahimi 				FAIL_IF(push_inst32(compiler, ADD_WI | RD4(TMP_REG1) | RN4(mem & REG_MASK) | imm));
3282*22dc650dSSadaf Ebrahimi 			} else {
3283*22dc650dSSadaf Ebrahimi 				FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)memw));
3284*22dc650dSSadaf Ebrahimi 				FAIL_IF(push_inst16(compiler, ADD | SET_REGS44(TMP_REG1, mem & REG_MASK)));
3285*22dc650dSSadaf Ebrahimi 				memw = 0;
3286*22dc650dSSadaf Ebrahimi 			}
3287*22dc650dSSadaf Ebrahimi 
3288*22dc650dSSadaf Ebrahimi 			mem = SLJIT_MEM1(TMP_REG1);
3289*22dc650dSSadaf Ebrahimi 		}
3290*22dc650dSSadaf Ebrahimi 
3291*22dc650dSSadaf Ebrahimi 		flags = WORD_SIZE;
3292*22dc650dSSadaf Ebrahimi 
3293*22dc650dSSadaf Ebrahimi 		SLJIT_ASSERT(memw <= 0xfff - SSIZE_OF(sw) && memw >= -0xff);
3294*22dc650dSSadaf Ebrahimi 
3295*22dc650dSSadaf Ebrahimi 		if (type & SLJIT_MEM_STORE) {
3296*22dc650dSSadaf Ebrahimi 			flags |= STORE;
3297*22dc650dSSadaf Ebrahimi 		} else if (REG_PAIR_FIRST(reg) == (mem & REG_MASK)) {
3298*22dc650dSSadaf Ebrahimi 			FAIL_IF(emit_op_mem(compiler, WORD_SIZE, REG_PAIR_SECOND(reg), mem, memw + SSIZE_OF(sw), TMP_REG2));
3299*22dc650dSSadaf Ebrahimi 			return emit_op_mem(compiler, WORD_SIZE, REG_PAIR_FIRST(reg), mem, memw, TMP_REG2);
3300*22dc650dSSadaf Ebrahimi 		}
3301*22dc650dSSadaf Ebrahimi 
3302*22dc650dSSadaf Ebrahimi 		FAIL_IF(emit_op_mem(compiler, flags, REG_PAIR_FIRST(reg), mem, memw, TMP_REG2));
3303*22dc650dSSadaf Ebrahimi 		return emit_op_mem(compiler, flags, REG_PAIR_SECOND(reg), mem, memw + SSIZE_OF(sw), TMP_REG2);
3304*22dc650dSSadaf Ebrahimi 	}
3305*22dc650dSSadaf Ebrahimi 
3306*22dc650dSSadaf Ebrahimi 	flags = 1 << 23;
3307*22dc650dSSadaf Ebrahimi 
3308*22dc650dSSadaf Ebrahimi 	if ((mem & REG_MASK) == 0) {
3309*22dc650dSSadaf Ebrahimi 		tmp = (sljit_uw)(memw & 0x7fc);
3310*22dc650dSSadaf Ebrahimi 		imm = get_imm((sljit_uw)((memw + (tmp <= 0x400 ? 0 : 0x400)) & ~0x3fc));
3311*22dc650dSSadaf Ebrahimi 
3312*22dc650dSSadaf Ebrahimi 		if (imm == INVALID_IMM) {
3313*22dc650dSSadaf Ebrahimi 			FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)memw));
3314*22dc650dSSadaf Ebrahimi 			memw = 0;
3315*22dc650dSSadaf Ebrahimi 		} else {
3316*22dc650dSSadaf Ebrahimi 			FAIL_IF(push_inst32(compiler, MOV_WI | RD4(TMP_REG1) | imm));
3317*22dc650dSSadaf Ebrahimi 			memw = (memw & 0x3fc) >> 2;
3318*22dc650dSSadaf Ebrahimi 
3319*22dc650dSSadaf Ebrahimi 			if (tmp > 0x400) {
3320*22dc650dSSadaf Ebrahimi 				memw = 0x100 - memw;
3321*22dc650dSSadaf Ebrahimi 				flags = 0;
3322*22dc650dSSadaf Ebrahimi 			}
3323*22dc650dSSadaf Ebrahimi 
3324*22dc650dSSadaf Ebrahimi 			SLJIT_ASSERT(memw >= 0 && memw <= 0xff);
3325*22dc650dSSadaf Ebrahimi 		}
3326*22dc650dSSadaf Ebrahimi 
3327*22dc650dSSadaf Ebrahimi 		mem = SLJIT_MEM1(TMP_REG1);
3328*22dc650dSSadaf Ebrahimi 	} else if (mem & OFFS_REG_MASK) {
3329*22dc650dSSadaf Ebrahimi 		FAIL_IF(push_inst32(compiler, ADD_W | RD4(TMP_REG1) | RN4(mem & REG_MASK) | RM4(OFFS_REG(mem)) | ((sljit_uw)(memw & 0x3) << 6)));
3330*22dc650dSSadaf Ebrahimi 		memw = 0;
3331*22dc650dSSadaf Ebrahimi 		mem = SLJIT_MEM1(TMP_REG1);
3332*22dc650dSSadaf Ebrahimi 	} else if (memw < 0) {
3333*22dc650dSSadaf Ebrahimi 		if ((-memw & ~0x3fc) == 0) {
3334*22dc650dSSadaf Ebrahimi 			flags = 0;
3335*22dc650dSSadaf Ebrahimi 			memw = -memw >> 2;
3336*22dc650dSSadaf Ebrahimi 		} else {
3337*22dc650dSSadaf Ebrahimi 			tmp = (sljit_uw)(-memw & 0x7fc);
3338*22dc650dSSadaf Ebrahimi 			imm = get_imm((sljit_uw)((-memw + (tmp <= 0x400 ? 0 : 0x400)) & ~0x3fc));
3339*22dc650dSSadaf Ebrahimi 
3340*22dc650dSSadaf Ebrahimi 			if (imm != INVALID_IMM) {
3341*22dc650dSSadaf Ebrahimi 				FAIL_IF(push_inst32(compiler, SUB_WI | RD4(TMP_REG1) | RN4(mem & REG_MASK) | imm));
3342*22dc650dSSadaf Ebrahimi 				memw = (-memw & 0x3fc) >> 2;
3343*22dc650dSSadaf Ebrahimi 
3344*22dc650dSSadaf Ebrahimi 				if (tmp <= 0x400)
3345*22dc650dSSadaf Ebrahimi 					flags = 0;
3346*22dc650dSSadaf Ebrahimi 				else
3347*22dc650dSSadaf Ebrahimi 					memw = 0x100 - memw;
3348*22dc650dSSadaf Ebrahimi 			} else {
3349*22dc650dSSadaf Ebrahimi 				FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)memw));
3350*22dc650dSSadaf Ebrahimi 				FAIL_IF(push_inst16(compiler, ADD | SET_REGS44(TMP_REG1, mem & REG_MASK)));
3351*22dc650dSSadaf Ebrahimi 				memw = 0;
3352*22dc650dSSadaf Ebrahimi 			}
3353*22dc650dSSadaf Ebrahimi 
3354*22dc650dSSadaf Ebrahimi 			mem = SLJIT_MEM1(TMP_REG1);
3355*22dc650dSSadaf Ebrahimi 		}
3356*22dc650dSSadaf Ebrahimi 	} else if ((memw & ~0x3fc) != 0) {
3357*22dc650dSSadaf Ebrahimi 		tmp = (sljit_uw)(memw & 0x7fc);
3358*22dc650dSSadaf Ebrahimi 		imm = get_imm((sljit_uw)((memw + (tmp <= 0x400 ? 0 : 0x400)) & ~0x3fc));
3359*22dc650dSSadaf Ebrahimi 
3360*22dc650dSSadaf Ebrahimi 		if (imm != INVALID_IMM) {
3361*22dc650dSSadaf Ebrahimi 			FAIL_IF(push_inst32(compiler, ADD_WI | RD4(TMP_REG1) | RN4(mem & REG_MASK) | imm));
3362*22dc650dSSadaf Ebrahimi 			memw = (memw & 0x3fc) >> 2;
3363*22dc650dSSadaf Ebrahimi 
3364*22dc650dSSadaf Ebrahimi 			if (tmp > 0x400) {
3365*22dc650dSSadaf Ebrahimi 				memw = 0x100 - memw;
3366*22dc650dSSadaf Ebrahimi 				flags = 0;
3367*22dc650dSSadaf Ebrahimi 			}
3368*22dc650dSSadaf Ebrahimi 		} else {
3369*22dc650dSSadaf Ebrahimi 			FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)memw));
3370*22dc650dSSadaf Ebrahimi 			FAIL_IF(push_inst16(compiler, ADD | SET_REGS44(TMP_REG1, mem & REG_MASK)));
3371*22dc650dSSadaf Ebrahimi 			memw = 0;
3372*22dc650dSSadaf Ebrahimi 		}
3373*22dc650dSSadaf Ebrahimi 
3374*22dc650dSSadaf Ebrahimi 		mem = SLJIT_MEM1(TMP_REG1);
3375*22dc650dSSadaf Ebrahimi 	} else
3376*22dc650dSSadaf Ebrahimi 		memw >>= 2;
3377*22dc650dSSadaf Ebrahimi 
3378*22dc650dSSadaf Ebrahimi 	SLJIT_ASSERT(memw >= 0 && memw <= 0xff);
3379*22dc650dSSadaf Ebrahimi 	return push_inst32(compiler, ((type & SLJIT_MEM_STORE) ? STRD : LDRD) | (sljit_ins)flags | RN4(mem & REG_MASK) | RT4(REG_PAIR_FIRST(reg)) | RD4(REG_PAIR_SECOND(reg)) | (sljit_ins)memw);
3380*22dc650dSSadaf Ebrahimi }
3381*22dc650dSSadaf Ebrahimi 
sljit_emit_mem_update(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 reg,sljit_s32 mem,sljit_sw memw)3382*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem_update(struct sljit_compiler *compiler, sljit_s32 type,
3383*22dc650dSSadaf Ebrahimi 	sljit_s32 reg,
3384*22dc650dSSadaf Ebrahimi 	sljit_s32 mem, sljit_sw memw)
3385*22dc650dSSadaf Ebrahimi {
3386*22dc650dSSadaf Ebrahimi 	sljit_s32 flags;
3387*22dc650dSSadaf Ebrahimi 	sljit_ins inst;
3388*22dc650dSSadaf Ebrahimi 
3389*22dc650dSSadaf Ebrahimi 	CHECK_ERROR();
3390*22dc650dSSadaf Ebrahimi 	CHECK(check_sljit_emit_mem_update(compiler, type, reg, mem, memw));
3391*22dc650dSSadaf Ebrahimi 
3392*22dc650dSSadaf Ebrahimi 	if ((mem & OFFS_REG_MASK) || (memw > 255 || memw < -255))
3393*22dc650dSSadaf Ebrahimi 		return SLJIT_ERR_UNSUPPORTED;
3394*22dc650dSSadaf Ebrahimi 
3395*22dc650dSSadaf Ebrahimi 	if (type & SLJIT_MEM_SUPP)
3396*22dc650dSSadaf Ebrahimi 		return SLJIT_SUCCESS;
3397*22dc650dSSadaf Ebrahimi 
3398*22dc650dSSadaf Ebrahimi 	switch (type & 0xff) {
3399*22dc650dSSadaf Ebrahimi 	case SLJIT_MOV:
3400*22dc650dSSadaf Ebrahimi 	case SLJIT_MOV_U32:
3401*22dc650dSSadaf Ebrahimi 	case SLJIT_MOV_S32:
3402*22dc650dSSadaf Ebrahimi 	case SLJIT_MOV32:
3403*22dc650dSSadaf Ebrahimi 	case SLJIT_MOV_P:
3404*22dc650dSSadaf Ebrahimi 		flags = WORD_SIZE;
3405*22dc650dSSadaf Ebrahimi 		break;
3406*22dc650dSSadaf Ebrahimi 	case SLJIT_MOV_U8:
3407*22dc650dSSadaf Ebrahimi 		flags = BYTE_SIZE;
3408*22dc650dSSadaf Ebrahimi 		break;
3409*22dc650dSSadaf Ebrahimi 	case SLJIT_MOV_S8:
3410*22dc650dSSadaf Ebrahimi 		flags = BYTE_SIZE | SIGNED;
3411*22dc650dSSadaf Ebrahimi 		break;
3412*22dc650dSSadaf Ebrahimi 	case SLJIT_MOV_U16:
3413*22dc650dSSadaf Ebrahimi 		flags = HALF_SIZE;
3414*22dc650dSSadaf Ebrahimi 		break;
3415*22dc650dSSadaf Ebrahimi 	case SLJIT_MOV_S16:
3416*22dc650dSSadaf Ebrahimi 		flags = HALF_SIZE | SIGNED;
3417*22dc650dSSadaf Ebrahimi 		break;
3418*22dc650dSSadaf Ebrahimi 	default:
3419*22dc650dSSadaf Ebrahimi 		SLJIT_UNREACHABLE();
3420*22dc650dSSadaf Ebrahimi 		flags = WORD_SIZE;
3421*22dc650dSSadaf Ebrahimi 		break;
3422*22dc650dSSadaf Ebrahimi 	}
3423*22dc650dSSadaf Ebrahimi 
3424*22dc650dSSadaf Ebrahimi 	if (type & SLJIT_MEM_STORE)
3425*22dc650dSSadaf Ebrahimi 		flags |= STORE;
3426*22dc650dSSadaf Ebrahimi 
3427*22dc650dSSadaf Ebrahimi 	inst = sljit_mem32[flags] | 0x900;
3428*22dc650dSSadaf Ebrahimi 
3429*22dc650dSSadaf Ebrahimi 	if (!(type & SLJIT_MEM_POST))
3430*22dc650dSSadaf Ebrahimi 		inst |= 0x400;
3431*22dc650dSSadaf Ebrahimi 
3432*22dc650dSSadaf Ebrahimi 	if (memw >= 0)
3433*22dc650dSSadaf Ebrahimi 		inst |= 0x200;
3434*22dc650dSSadaf Ebrahimi 	else
3435*22dc650dSSadaf Ebrahimi 		memw = -memw;
3436*22dc650dSSadaf Ebrahimi 
3437*22dc650dSSadaf Ebrahimi 	return push_inst32(compiler, inst | RT4(reg) | RN4(mem & REG_MASK) | (sljit_ins)memw);
3438*22dc650dSSadaf Ebrahimi }
3439*22dc650dSSadaf Ebrahimi 
update_mem_addr(struct sljit_compiler * compiler,sljit_s32 * mem,sljit_sw * memw,sljit_s32 max_offset)3440*22dc650dSSadaf Ebrahimi static sljit_s32 update_mem_addr(struct sljit_compiler *compiler, sljit_s32 *mem, sljit_sw *memw, sljit_s32 max_offset)
3441*22dc650dSSadaf Ebrahimi {
3442*22dc650dSSadaf Ebrahimi 	sljit_s32 arg = *mem;
3443*22dc650dSSadaf Ebrahimi 	sljit_sw argw = *memw;
3444*22dc650dSSadaf Ebrahimi 	sljit_uw imm;
3445*22dc650dSSadaf Ebrahimi 
3446*22dc650dSSadaf Ebrahimi 	*mem = TMP_REG1;
3447*22dc650dSSadaf Ebrahimi 
3448*22dc650dSSadaf Ebrahimi 	if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) {
3449*22dc650dSSadaf Ebrahimi 		*memw = 0;
3450*22dc650dSSadaf Ebrahimi 		return push_inst32(compiler, ADD_W | RD4(TMP_REG1) | RN4(arg & REG_MASK) | RM4(OFFS_REG(arg)) | ((sljit_uw)(argw & 0x3) << 6));
3451*22dc650dSSadaf Ebrahimi 	}
3452*22dc650dSSadaf Ebrahimi 
3453*22dc650dSSadaf Ebrahimi 	arg &= REG_MASK;
3454*22dc650dSSadaf Ebrahimi 
3455*22dc650dSSadaf Ebrahimi 	if (arg) {
3456*22dc650dSSadaf Ebrahimi 		if (argw <= max_offset && argw >= -0xff) {
3457*22dc650dSSadaf Ebrahimi 			*mem = arg;
3458*22dc650dSSadaf Ebrahimi 			return SLJIT_SUCCESS;
3459*22dc650dSSadaf Ebrahimi 		}
3460*22dc650dSSadaf Ebrahimi 
3461*22dc650dSSadaf Ebrahimi 		if (argw < 0) {
3462*22dc650dSSadaf Ebrahimi 			imm = get_imm((sljit_uw)(-argw & ~0xff));
3463*22dc650dSSadaf Ebrahimi 
3464*22dc650dSSadaf Ebrahimi 			if (imm) {
3465*22dc650dSSadaf Ebrahimi 				*memw = -(-argw & 0xff);
3466*22dc650dSSadaf Ebrahimi 				return push_inst32(compiler, SUB_WI | RD4(TMP_REG1) | RN4(arg) | imm);
3467*22dc650dSSadaf Ebrahimi 			}
3468*22dc650dSSadaf Ebrahimi 		} else if ((argw & 0xfff) <= max_offset) {
3469*22dc650dSSadaf Ebrahimi 			imm = get_imm((sljit_uw)(argw & ~0xfff));
3470*22dc650dSSadaf Ebrahimi 
3471*22dc650dSSadaf Ebrahimi 			if (imm) {
3472*22dc650dSSadaf Ebrahimi 				*memw = argw & 0xfff;
3473*22dc650dSSadaf Ebrahimi 				return push_inst32(compiler, ADD_WI | RD4(TMP_REG1) | RN4(arg) | imm);
3474*22dc650dSSadaf Ebrahimi 			}
3475*22dc650dSSadaf Ebrahimi 		} else {
3476*22dc650dSSadaf Ebrahimi 			imm = get_imm((sljit_uw)((argw | 0xfff) + 1));
3477*22dc650dSSadaf Ebrahimi 
3478*22dc650dSSadaf Ebrahimi 			if (imm) {
3479*22dc650dSSadaf Ebrahimi 				*memw = (argw & 0xfff) - 0x1000;
3480*22dc650dSSadaf Ebrahimi 				return push_inst32(compiler, ADD_WI | RD4(TMP_REG1) | RN4(arg) | imm);
3481*22dc650dSSadaf Ebrahimi 			}
3482*22dc650dSSadaf Ebrahimi 		}
3483*22dc650dSSadaf Ebrahimi 	}
3484*22dc650dSSadaf Ebrahimi 
3485*22dc650dSSadaf Ebrahimi 	imm = (sljit_uw)(argw & ~0xfff);
3486*22dc650dSSadaf Ebrahimi 
3487*22dc650dSSadaf Ebrahimi 	if ((argw & 0xfff) > max_offset) {
3488*22dc650dSSadaf Ebrahimi 		imm += 0x1000;
3489*22dc650dSSadaf Ebrahimi 		*memw = (argw & 0xfff) - 0x1000;
3490*22dc650dSSadaf Ebrahimi 	} else
3491*22dc650dSSadaf Ebrahimi 		*memw = argw & 0xfff;
3492*22dc650dSSadaf Ebrahimi 
3493*22dc650dSSadaf Ebrahimi 	FAIL_IF(load_immediate(compiler, TMP_REG1, imm));
3494*22dc650dSSadaf Ebrahimi 
3495*22dc650dSSadaf Ebrahimi 	if (arg == 0)
3496*22dc650dSSadaf Ebrahimi 		return SLJIT_SUCCESS;
3497*22dc650dSSadaf Ebrahimi 
3498*22dc650dSSadaf Ebrahimi 	return push_inst16(compiler, ADD | SET_REGS44(TMP_REG1, arg));
3499*22dc650dSSadaf Ebrahimi }
3500*22dc650dSSadaf Ebrahimi 
sljit_emit_fmem(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 freg,sljit_s32 mem,sljit_sw memw)3501*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem(struct sljit_compiler *compiler, sljit_s32 type,
3502*22dc650dSSadaf Ebrahimi 	sljit_s32 freg,
3503*22dc650dSSadaf Ebrahimi 	sljit_s32 mem, sljit_sw memw)
3504*22dc650dSSadaf Ebrahimi {
3505*22dc650dSSadaf Ebrahimi 	CHECK_ERROR();
3506*22dc650dSSadaf Ebrahimi 	CHECK(check_sljit_emit_fmem(compiler, type, freg, mem, memw));
3507*22dc650dSSadaf Ebrahimi 
3508*22dc650dSSadaf Ebrahimi 	if (type & SLJIT_MEM_ALIGNED_32)
3509*22dc650dSSadaf Ebrahimi 		return emit_fop_mem(compiler, ((type ^ SLJIT_32) & SLJIT_32) | ((type & SLJIT_MEM_STORE) ? 0 : FPU_LOAD), freg, mem, memw);
3510*22dc650dSSadaf Ebrahimi 
3511*22dc650dSSadaf Ebrahimi 	if (type & SLJIT_MEM_STORE) {
3512*22dc650dSSadaf Ebrahimi 		FAIL_IF(push_inst32(compiler, VMOV | (1 << 20) | VN4(freg) | RT4(TMP_REG2)));
3513*22dc650dSSadaf Ebrahimi 
3514*22dc650dSSadaf Ebrahimi 		if (type & SLJIT_32)
3515*22dc650dSSadaf Ebrahimi 			return emit_op_mem(compiler, WORD_SIZE | STORE, TMP_REG2, mem, memw, TMP_REG1);
3516*22dc650dSSadaf Ebrahimi 
3517*22dc650dSSadaf Ebrahimi 		FAIL_IF(update_mem_addr(compiler, &mem, &memw, 0xfff - 4));
3518*22dc650dSSadaf Ebrahimi 		mem |= SLJIT_MEM;
3519*22dc650dSSadaf Ebrahimi 
3520*22dc650dSSadaf Ebrahimi 		FAIL_IF(emit_op_mem(compiler, WORD_SIZE | STORE, TMP_REG2, mem, memw, TMP_REG1));
3521*22dc650dSSadaf Ebrahimi 		FAIL_IF(push_inst32(compiler, VMOV | (1 << 20) | VN4(freg) | 0x80 | RT4(TMP_REG2)));
3522*22dc650dSSadaf Ebrahimi 		return emit_op_mem(compiler, WORD_SIZE | STORE, TMP_REG2, mem, memw + 4, TMP_REG1);
3523*22dc650dSSadaf Ebrahimi 	}
3524*22dc650dSSadaf Ebrahimi 
3525*22dc650dSSadaf Ebrahimi 	if (type & SLJIT_32) {
3526*22dc650dSSadaf Ebrahimi 		FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG2, mem, memw, TMP_REG1));
3527*22dc650dSSadaf Ebrahimi 		return push_inst32(compiler, VMOV | VN4(freg) | RT4(TMP_REG2));
3528*22dc650dSSadaf Ebrahimi 	}
3529*22dc650dSSadaf Ebrahimi 
3530*22dc650dSSadaf Ebrahimi 	FAIL_IF(update_mem_addr(compiler, &mem, &memw, 0xfff - 4));
3531*22dc650dSSadaf Ebrahimi 	mem |= SLJIT_MEM;
3532*22dc650dSSadaf Ebrahimi 
3533*22dc650dSSadaf Ebrahimi 	FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG2, mem, memw, TMP_REG1));
3534*22dc650dSSadaf Ebrahimi 	FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG1, mem, memw + 4, TMP_REG1));
3535*22dc650dSSadaf Ebrahimi 	return push_inst32(compiler, VMOV2 | VM4(freg) | RT4(TMP_REG2) | RN4(TMP_REG1));
3536*22dc650dSSadaf Ebrahimi }
3537*22dc650dSSadaf Ebrahimi 
sljit_emit_simd_mem_offset(struct sljit_compiler * compiler,sljit_s32 * mem_ptr,sljit_sw memw)3538*22dc650dSSadaf Ebrahimi static sljit_s32 sljit_emit_simd_mem_offset(struct sljit_compiler *compiler, sljit_s32 *mem_ptr, sljit_sw memw)
3539*22dc650dSSadaf Ebrahimi {
3540*22dc650dSSadaf Ebrahimi 	sljit_uw imm;
3541*22dc650dSSadaf Ebrahimi 	sljit_s32 mem = *mem_ptr;
3542*22dc650dSSadaf Ebrahimi 
3543*22dc650dSSadaf Ebrahimi 	if (SLJIT_UNLIKELY(mem & OFFS_REG_MASK)) {
3544*22dc650dSSadaf Ebrahimi 		*mem_ptr = TMP_REG1;
3545*22dc650dSSadaf Ebrahimi 		return push_inst32(compiler, ADD_W | RD4(TMP_REG1) | RN4(mem & REG_MASK) | RM4(OFFS_REG(mem)) | ((sljit_uw)(memw & 0x3) << 6));
3546*22dc650dSSadaf Ebrahimi 	}
3547*22dc650dSSadaf Ebrahimi 
3548*22dc650dSSadaf Ebrahimi 	if (SLJIT_UNLIKELY(!(mem & REG_MASK))) {
3549*22dc650dSSadaf Ebrahimi 		*mem_ptr = TMP_REG1;
3550*22dc650dSSadaf Ebrahimi 		return load_immediate(compiler, TMP_REG1, (sljit_uw)memw);
3551*22dc650dSSadaf Ebrahimi 	}
3552*22dc650dSSadaf Ebrahimi 
3553*22dc650dSSadaf Ebrahimi 	mem &= REG_MASK;
3554*22dc650dSSadaf Ebrahimi 
3555*22dc650dSSadaf Ebrahimi 	if (memw == 0) {
3556*22dc650dSSadaf Ebrahimi 		*mem_ptr = mem;
3557*22dc650dSSadaf Ebrahimi 		return SLJIT_SUCCESS;
3558*22dc650dSSadaf Ebrahimi 	}
3559*22dc650dSSadaf Ebrahimi 
3560*22dc650dSSadaf Ebrahimi 	*mem_ptr = TMP_REG1;
3561*22dc650dSSadaf Ebrahimi 	imm = get_imm((sljit_uw)(memw < 0 ? -memw : memw));
3562*22dc650dSSadaf Ebrahimi 
3563*22dc650dSSadaf Ebrahimi 	if (imm != INVALID_IMM)
3564*22dc650dSSadaf Ebrahimi 		return push_inst32(compiler, ((memw < 0) ? SUB_WI : ADD_WI) | RD4(TMP_REG1) | RN4(mem) | imm);
3565*22dc650dSSadaf Ebrahimi 
3566*22dc650dSSadaf Ebrahimi 	FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)memw));
3567*22dc650dSSadaf Ebrahimi 	return push_inst16(compiler, ADD | SET_REGS44(TMP_REG1, mem));
3568*22dc650dSSadaf Ebrahimi }
3569*22dc650dSSadaf Ebrahimi 
simd_get_quad_reg_index(sljit_s32 freg)3570*22dc650dSSadaf Ebrahimi static SLJIT_INLINE sljit_s32 simd_get_quad_reg_index(sljit_s32 freg)
3571*22dc650dSSadaf Ebrahimi {
3572*22dc650dSSadaf Ebrahimi 	freg += freg & 0x1;
3573*22dc650dSSadaf Ebrahimi 
3574*22dc650dSSadaf Ebrahimi 	SLJIT_ASSERT((freg_map[freg] & 0x1) == (freg <= SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS));
3575*22dc650dSSadaf Ebrahimi 
3576*22dc650dSSadaf Ebrahimi 	if (freg <= SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS)
3577*22dc650dSSadaf Ebrahimi 		freg--;
3578*22dc650dSSadaf Ebrahimi 
3579*22dc650dSSadaf Ebrahimi 	return freg;
3580*22dc650dSSadaf Ebrahimi }
3581*22dc650dSSadaf Ebrahimi 
3582*22dc650dSSadaf Ebrahimi #define SLJIT_QUAD_OTHER_HALF(freg) ((((freg) & 0x1) << 1) - 1)
3583*22dc650dSSadaf Ebrahimi 
sljit_emit_simd_mov(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 freg,sljit_s32 srcdst,sljit_sw srcdstw)3584*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_mov(struct sljit_compiler *compiler, sljit_s32 type,
3585*22dc650dSSadaf Ebrahimi 	sljit_s32 freg,
3586*22dc650dSSadaf Ebrahimi 	sljit_s32 srcdst, sljit_sw srcdstw)
3587*22dc650dSSadaf Ebrahimi {
3588*22dc650dSSadaf Ebrahimi 	sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
3589*22dc650dSSadaf Ebrahimi 	sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
3590*22dc650dSSadaf Ebrahimi 	sljit_s32 alignment = SLJIT_SIMD_GET_ELEM2_SIZE(type);
3591*22dc650dSSadaf Ebrahimi 	sljit_ins ins;
3592*22dc650dSSadaf Ebrahimi 
3593*22dc650dSSadaf Ebrahimi 	CHECK_ERROR();
3594*22dc650dSSadaf Ebrahimi 	CHECK(check_sljit_emit_simd_mov(compiler, type, freg, srcdst, srcdstw));
3595*22dc650dSSadaf Ebrahimi 
3596*22dc650dSSadaf Ebrahimi 	ADJUST_LOCAL_OFFSET(srcdst, srcdstw);
3597*22dc650dSSadaf Ebrahimi 
3598*22dc650dSSadaf Ebrahimi 	if (reg_size != 3 && reg_size != 4)
3599*22dc650dSSadaf Ebrahimi 		return SLJIT_ERR_UNSUPPORTED;
3600*22dc650dSSadaf Ebrahimi 
3601*22dc650dSSadaf Ebrahimi 	if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3))
3602*22dc650dSSadaf Ebrahimi 		return SLJIT_ERR_UNSUPPORTED;
3603*22dc650dSSadaf Ebrahimi 
3604*22dc650dSSadaf Ebrahimi 	if (type & SLJIT_SIMD_TEST)
3605*22dc650dSSadaf Ebrahimi 		return SLJIT_SUCCESS;
3606*22dc650dSSadaf Ebrahimi 
3607*22dc650dSSadaf Ebrahimi 	if (reg_size == 4)
3608*22dc650dSSadaf Ebrahimi 		freg = simd_get_quad_reg_index(freg);
3609*22dc650dSSadaf Ebrahimi 
3610*22dc650dSSadaf Ebrahimi 	if (!(srcdst & SLJIT_MEM)) {
3611*22dc650dSSadaf Ebrahimi 		if (reg_size == 4)
3612*22dc650dSSadaf Ebrahimi 			srcdst = simd_get_quad_reg_index(srcdst);
3613*22dc650dSSadaf Ebrahimi 
3614*22dc650dSSadaf Ebrahimi 		if (type & SLJIT_SIMD_STORE)
3615*22dc650dSSadaf Ebrahimi 			ins = VD4(srcdst) | VN4(freg) | VM4(freg);
3616*22dc650dSSadaf Ebrahimi 		else
3617*22dc650dSSadaf Ebrahimi 			ins = VD4(freg) | VN4(srcdst) | VM4(srcdst);
3618*22dc650dSSadaf Ebrahimi 
3619*22dc650dSSadaf Ebrahimi 		if (reg_size == 4)
3620*22dc650dSSadaf Ebrahimi 			ins |= (sljit_ins)1 << 6;
3621*22dc650dSSadaf Ebrahimi 
3622*22dc650dSSadaf Ebrahimi 		return push_inst32(compiler, VORR | ins);
3623*22dc650dSSadaf Ebrahimi 	}
3624*22dc650dSSadaf Ebrahimi 
3625*22dc650dSSadaf Ebrahimi 	FAIL_IF(sljit_emit_simd_mem_offset(compiler, &srcdst, srcdstw));
3626*22dc650dSSadaf Ebrahimi 
3627*22dc650dSSadaf Ebrahimi 	if (elem_size > 3)
3628*22dc650dSSadaf Ebrahimi 		elem_size = 3;
3629*22dc650dSSadaf Ebrahimi 
3630*22dc650dSSadaf Ebrahimi 	ins = ((type & SLJIT_SIMD_STORE) ? VST1 : VLD1) | VD4(freg)
3631*22dc650dSSadaf Ebrahimi 		| (sljit_ins)((reg_size == 3) ? (0x7 << 8) : (0xa << 8));
3632*22dc650dSSadaf Ebrahimi 
3633*22dc650dSSadaf Ebrahimi 	SLJIT_ASSERT(reg_size >= alignment);
3634*22dc650dSSadaf Ebrahimi 
3635*22dc650dSSadaf Ebrahimi 	if (alignment == 3)
3636*22dc650dSSadaf Ebrahimi 		ins |= 0x10;
3637*22dc650dSSadaf Ebrahimi 	else if (alignment >= 4)
3638*22dc650dSSadaf Ebrahimi 		ins |= 0x20;
3639*22dc650dSSadaf Ebrahimi 
3640*22dc650dSSadaf Ebrahimi 	return push_inst32(compiler, ins | RN4(srcdst) | ((sljit_ins)elem_size) << 6 | 0xf);
3641*22dc650dSSadaf Ebrahimi }
3642*22dc650dSSadaf Ebrahimi 
simd_get_imm(sljit_s32 elem_size,sljit_uw value)3643*22dc650dSSadaf Ebrahimi static sljit_ins simd_get_imm(sljit_s32 elem_size, sljit_uw value)
3644*22dc650dSSadaf Ebrahimi {
3645*22dc650dSSadaf Ebrahimi 	sljit_ins result;
3646*22dc650dSSadaf Ebrahimi 
3647*22dc650dSSadaf Ebrahimi 	if (elem_size > 1 && (sljit_u16)value == (value >> 16)) {
3648*22dc650dSSadaf Ebrahimi 		elem_size = 1;
3649*22dc650dSSadaf Ebrahimi 		value = (sljit_u16)value;
3650*22dc650dSSadaf Ebrahimi 	}
3651*22dc650dSSadaf Ebrahimi 
3652*22dc650dSSadaf Ebrahimi 	if (elem_size == 1 && (sljit_u8)value == (value >> 8)) {
3653*22dc650dSSadaf Ebrahimi 		elem_size = 0;
3654*22dc650dSSadaf Ebrahimi 		value = (sljit_u8)value;
3655*22dc650dSSadaf Ebrahimi 	}
3656*22dc650dSSadaf Ebrahimi 
3657*22dc650dSSadaf Ebrahimi 	switch (elem_size) {
3658*22dc650dSSadaf Ebrahimi 	case 0:
3659*22dc650dSSadaf Ebrahimi 		SLJIT_ASSERT(value <= 0xff);
3660*22dc650dSSadaf Ebrahimi 		result = 0xe00;
3661*22dc650dSSadaf Ebrahimi 		break;
3662*22dc650dSSadaf Ebrahimi 	case 1:
3663*22dc650dSSadaf Ebrahimi 		SLJIT_ASSERT(value <= 0xffff);
3664*22dc650dSSadaf Ebrahimi 		result = 0;
3665*22dc650dSSadaf Ebrahimi 
3666*22dc650dSSadaf Ebrahimi 		while (1) {
3667*22dc650dSSadaf Ebrahimi 			if (value <= 0xff) {
3668*22dc650dSSadaf Ebrahimi 				result |= 0x800;
3669*22dc650dSSadaf Ebrahimi 				break;
3670*22dc650dSSadaf Ebrahimi 			}
3671*22dc650dSSadaf Ebrahimi 
3672*22dc650dSSadaf Ebrahimi 			if ((value & 0xff) == 0) {
3673*22dc650dSSadaf Ebrahimi 				value >>= 8;
3674*22dc650dSSadaf Ebrahimi 				result |= 0xa00;
3675*22dc650dSSadaf Ebrahimi 				break;
3676*22dc650dSSadaf Ebrahimi 			}
3677*22dc650dSSadaf Ebrahimi 
3678*22dc650dSSadaf Ebrahimi 			if (result != 0)
3679*22dc650dSSadaf Ebrahimi 				return ~(sljit_ins)0;
3680*22dc650dSSadaf Ebrahimi 
3681*22dc650dSSadaf Ebrahimi 			value ^= (sljit_uw)0xffff;
3682*22dc650dSSadaf Ebrahimi 			result = (1 << 5);
3683*22dc650dSSadaf Ebrahimi 		}
3684*22dc650dSSadaf Ebrahimi 		break;
3685*22dc650dSSadaf Ebrahimi 	default:
3686*22dc650dSSadaf Ebrahimi 		SLJIT_ASSERT(value <= 0xffffffff);
3687*22dc650dSSadaf Ebrahimi 		result = 0;
3688*22dc650dSSadaf Ebrahimi 
3689*22dc650dSSadaf Ebrahimi 		while (1) {
3690*22dc650dSSadaf Ebrahimi 			if (value <= 0xff) {
3691*22dc650dSSadaf Ebrahimi 				result |= 0x000;
3692*22dc650dSSadaf Ebrahimi 				break;
3693*22dc650dSSadaf Ebrahimi 			}
3694*22dc650dSSadaf Ebrahimi 
3695*22dc650dSSadaf Ebrahimi 			if ((value & ~(sljit_uw)0xff00) == 0) {
3696*22dc650dSSadaf Ebrahimi 				value >>= 8;
3697*22dc650dSSadaf Ebrahimi 				result |= 0x200;
3698*22dc650dSSadaf Ebrahimi 				break;
3699*22dc650dSSadaf Ebrahimi 			}
3700*22dc650dSSadaf Ebrahimi 
3701*22dc650dSSadaf Ebrahimi 			if ((value & ~(sljit_uw)0xff0000) == 0) {
3702*22dc650dSSadaf Ebrahimi 				value >>= 16;
3703*22dc650dSSadaf Ebrahimi 				result |= 0x400;
3704*22dc650dSSadaf Ebrahimi 				break;
3705*22dc650dSSadaf Ebrahimi 			}
3706*22dc650dSSadaf Ebrahimi 
3707*22dc650dSSadaf Ebrahimi 			if ((value & ~(sljit_uw)0xff000000) == 0) {
3708*22dc650dSSadaf Ebrahimi 				value >>= 24;
3709*22dc650dSSadaf Ebrahimi 				result |= 0x600;
3710*22dc650dSSadaf Ebrahimi 				break;
3711*22dc650dSSadaf Ebrahimi 			}
3712*22dc650dSSadaf Ebrahimi 
3713*22dc650dSSadaf Ebrahimi 			if ((value & (sljit_uw)0xff) == 0xff && (value >> 16) == 0) {
3714*22dc650dSSadaf Ebrahimi 				value >>= 8;
3715*22dc650dSSadaf Ebrahimi 				result |= 0xc00;
3716*22dc650dSSadaf Ebrahimi 				break;
3717*22dc650dSSadaf Ebrahimi 			}
3718*22dc650dSSadaf Ebrahimi 
3719*22dc650dSSadaf Ebrahimi 			if ((value & (sljit_uw)0xffff) == 0xffff && (value >> 24) == 0) {
3720*22dc650dSSadaf Ebrahimi 				value >>= 16;
3721*22dc650dSSadaf Ebrahimi 				result |= 0xd00;
3722*22dc650dSSadaf Ebrahimi 				break;
3723*22dc650dSSadaf Ebrahimi 			}
3724*22dc650dSSadaf Ebrahimi 
3725*22dc650dSSadaf Ebrahimi 			if (result != 0)
3726*22dc650dSSadaf Ebrahimi 				return ~(sljit_ins)0;
3727*22dc650dSSadaf Ebrahimi 
3728*22dc650dSSadaf Ebrahimi 			value = ~value;
3729*22dc650dSSadaf Ebrahimi 			result = (1 << 5);
3730*22dc650dSSadaf Ebrahimi 		}
3731*22dc650dSSadaf Ebrahimi 		break;
3732*22dc650dSSadaf Ebrahimi 	}
3733*22dc650dSSadaf Ebrahimi 
3734*22dc650dSSadaf Ebrahimi 	return ((sljit_ins)value & 0xf) | (((sljit_ins)value & 0x70) << 12) | (((sljit_ins)value & 0x80) << 21) | result;
3735*22dc650dSSadaf Ebrahimi }
3736*22dc650dSSadaf Ebrahimi 
sljit_emit_simd_replicate(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 freg,sljit_s32 src,sljit_sw srcw)3737*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_replicate(struct sljit_compiler *compiler, sljit_s32 type,
3738*22dc650dSSadaf Ebrahimi 	sljit_s32 freg,
3739*22dc650dSSadaf Ebrahimi 	sljit_s32 src, sljit_sw srcw)
3740*22dc650dSSadaf Ebrahimi {
3741*22dc650dSSadaf Ebrahimi 	sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
3742*22dc650dSSadaf Ebrahimi 	sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
3743*22dc650dSSadaf Ebrahimi 	sljit_ins ins, imm;
3744*22dc650dSSadaf Ebrahimi 
3745*22dc650dSSadaf Ebrahimi 	CHECK_ERROR();
3746*22dc650dSSadaf Ebrahimi 	CHECK(check_sljit_emit_simd_replicate(compiler, type, freg, src, srcw));
3747*22dc650dSSadaf Ebrahimi 
3748*22dc650dSSadaf Ebrahimi 	ADJUST_LOCAL_OFFSET(src, srcw);
3749*22dc650dSSadaf Ebrahimi 
3750*22dc650dSSadaf Ebrahimi 	if (reg_size != 3 && reg_size != 4)
3751*22dc650dSSadaf Ebrahimi 		return SLJIT_ERR_UNSUPPORTED;
3752*22dc650dSSadaf Ebrahimi 
3753*22dc650dSSadaf Ebrahimi 	if ((type & SLJIT_SIMD_FLOAT) ? (elem_size < 2 || elem_size > 3) : (elem_size > 2))
3754*22dc650dSSadaf Ebrahimi 		return SLJIT_ERR_UNSUPPORTED;
3755*22dc650dSSadaf Ebrahimi 
3756*22dc650dSSadaf Ebrahimi 	if (type & SLJIT_SIMD_TEST)
3757*22dc650dSSadaf Ebrahimi 		return SLJIT_SUCCESS;
3758*22dc650dSSadaf Ebrahimi 
3759*22dc650dSSadaf Ebrahimi 	if (reg_size == 4)
3760*22dc650dSSadaf Ebrahimi 		freg = simd_get_quad_reg_index(freg);
3761*22dc650dSSadaf Ebrahimi 
3762*22dc650dSSadaf Ebrahimi 	if (src == SLJIT_IMM && srcw == 0)
3763*22dc650dSSadaf Ebrahimi 		return push_inst32(compiler, VMOV_i | ((reg_size == 4) ? (1 << 6) : 0) | VD4(freg));
3764*22dc650dSSadaf Ebrahimi 
3765*22dc650dSSadaf Ebrahimi 	if (SLJIT_UNLIKELY(elem_size == 3)) {
3766*22dc650dSSadaf Ebrahimi 		SLJIT_ASSERT(type & SLJIT_SIMD_FLOAT);
3767*22dc650dSSadaf Ebrahimi 
3768*22dc650dSSadaf Ebrahimi 		if (src & SLJIT_MEM) {
3769*22dc650dSSadaf Ebrahimi 			FAIL_IF(emit_fop_mem(compiler, FPU_LOAD | SLJIT_32, freg, src, srcw));
3770*22dc650dSSadaf Ebrahimi 			src = freg;
3771*22dc650dSSadaf Ebrahimi 		} else if (freg != src)
3772*22dc650dSSadaf Ebrahimi 			FAIL_IF(push_inst32(compiler, VORR | VD4(freg) | VN4(src) | VM4(src)));
3773*22dc650dSSadaf Ebrahimi 
3774*22dc650dSSadaf Ebrahimi 		freg += SLJIT_QUAD_OTHER_HALF(freg);
3775*22dc650dSSadaf Ebrahimi 
3776*22dc650dSSadaf Ebrahimi 		if (freg != src)
3777*22dc650dSSadaf Ebrahimi 			return push_inst32(compiler, VORR | VD4(freg) | VN4(src) | VM4(src));
3778*22dc650dSSadaf Ebrahimi 		return SLJIT_SUCCESS;
3779*22dc650dSSadaf Ebrahimi 	}
3780*22dc650dSSadaf Ebrahimi 
3781*22dc650dSSadaf Ebrahimi 	if (src & SLJIT_MEM) {
3782*22dc650dSSadaf Ebrahimi 		FAIL_IF(sljit_emit_simd_mem_offset(compiler, &src, srcw));
3783*22dc650dSSadaf Ebrahimi 
3784*22dc650dSSadaf Ebrahimi 		ins = (sljit_ins)(elem_size << 6);
3785*22dc650dSSadaf Ebrahimi 
3786*22dc650dSSadaf Ebrahimi 		if (reg_size == 4)
3787*22dc650dSSadaf Ebrahimi 			ins |= 1 << 5;
3788*22dc650dSSadaf Ebrahimi 
3789*22dc650dSSadaf Ebrahimi 		return push_inst32(compiler, VLD1_r | ins | VD4(freg) | RN4(src) | 0xf);
3790*22dc650dSSadaf Ebrahimi 	}
3791*22dc650dSSadaf Ebrahimi 
3792*22dc650dSSadaf Ebrahimi 	if (type & SLJIT_SIMD_FLOAT) {
3793*22dc650dSSadaf Ebrahimi 		SLJIT_ASSERT(elem_size == 2);
3794*22dc650dSSadaf Ebrahimi 		ins = ((sljit_ins)freg_ebit_map[src] << (16 + 2 + 1)) | ((sljit_ins)1 << (16 + 2));
3795*22dc650dSSadaf Ebrahimi 
3796*22dc650dSSadaf Ebrahimi 		if (reg_size == 4)
3797*22dc650dSSadaf Ebrahimi 			ins |= (sljit_ins)1 << 6;
3798*22dc650dSSadaf Ebrahimi 
3799*22dc650dSSadaf Ebrahimi 		return push_inst32(compiler, VDUP_s | ins | VD4(freg) | (sljit_ins)freg_map[src]);
3800*22dc650dSSadaf Ebrahimi 	}
3801*22dc650dSSadaf Ebrahimi 
3802*22dc650dSSadaf Ebrahimi 	if (src == SLJIT_IMM) {
3803*22dc650dSSadaf Ebrahimi 		if (elem_size < 2)
3804*22dc650dSSadaf Ebrahimi 			srcw &= ((sljit_sw)1 << (((sljit_sw)1 << elem_size) << 3)) - 1;
3805*22dc650dSSadaf Ebrahimi 
3806*22dc650dSSadaf Ebrahimi 		imm = simd_get_imm(elem_size, (sljit_uw)srcw);
3807*22dc650dSSadaf Ebrahimi 
3808*22dc650dSSadaf Ebrahimi 		if (imm != ~(sljit_ins)0) {
3809*22dc650dSSadaf Ebrahimi 			if (reg_size == 4)
3810*22dc650dSSadaf Ebrahimi 				imm |= (sljit_ins)1 << 6;
3811*22dc650dSSadaf Ebrahimi 
3812*22dc650dSSadaf Ebrahimi 			return push_inst32(compiler, VMOV_i | imm | VD4(freg));
3813*22dc650dSSadaf Ebrahimi 		}
3814*22dc650dSSadaf Ebrahimi 
3815*22dc650dSSadaf Ebrahimi 		FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)srcw));
3816*22dc650dSSadaf Ebrahimi 		src = TMP_REG1;
3817*22dc650dSSadaf Ebrahimi 	}
3818*22dc650dSSadaf Ebrahimi 
3819*22dc650dSSadaf Ebrahimi 	switch (elem_size) {
3820*22dc650dSSadaf Ebrahimi 	case 0:
3821*22dc650dSSadaf Ebrahimi 		ins = 1 << 22;
3822*22dc650dSSadaf Ebrahimi 		break;
3823*22dc650dSSadaf Ebrahimi 	case 1:
3824*22dc650dSSadaf Ebrahimi 		ins = 1 << 5;
3825*22dc650dSSadaf Ebrahimi 		break;
3826*22dc650dSSadaf Ebrahimi 	default:
3827*22dc650dSSadaf Ebrahimi 		ins = 0;
3828*22dc650dSSadaf Ebrahimi 		break;
3829*22dc650dSSadaf Ebrahimi 	}
3830*22dc650dSSadaf Ebrahimi 
3831*22dc650dSSadaf Ebrahimi 	if (reg_size == 4)
3832*22dc650dSSadaf Ebrahimi 		ins |= (sljit_ins)1 << 21;
3833*22dc650dSSadaf Ebrahimi 
3834*22dc650dSSadaf Ebrahimi 	return push_inst32(compiler, VDUP | ins | VN4(freg) | RT4(src));
3835*22dc650dSSadaf Ebrahimi }
3836*22dc650dSSadaf Ebrahimi 
sljit_emit_simd_lane_mov(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 freg,sljit_s32 lane_index,sljit_s32 srcdst,sljit_sw srcdstw)3837*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_mov(struct sljit_compiler *compiler, sljit_s32 type,
3838*22dc650dSSadaf Ebrahimi 	sljit_s32 freg, sljit_s32 lane_index,
3839*22dc650dSSadaf Ebrahimi 	sljit_s32 srcdst, sljit_sw srcdstw)
3840*22dc650dSSadaf Ebrahimi {
3841*22dc650dSSadaf Ebrahimi 	sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
3842*22dc650dSSadaf Ebrahimi 	sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
3843*22dc650dSSadaf Ebrahimi 	sljit_ins ins;
3844*22dc650dSSadaf Ebrahimi 
3845*22dc650dSSadaf Ebrahimi 	CHECK_ERROR();
3846*22dc650dSSadaf Ebrahimi 	CHECK(check_sljit_emit_simd_lane_mov(compiler, type, freg, lane_index, srcdst, srcdstw));
3847*22dc650dSSadaf Ebrahimi 
3848*22dc650dSSadaf Ebrahimi 	ADJUST_LOCAL_OFFSET(srcdst, srcdstw);
3849*22dc650dSSadaf Ebrahimi 
3850*22dc650dSSadaf Ebrahimi 	if (reg_size != 3 && reg_size != 4)
3851*22dc650dSSadaf Ebrahimi 		return SLJIT_ERR_UNSUPPORTED;
3852*22dc650dSSadaf Ebrahimi 
3853*22dc650dSSadaf Ebrahimi 	if ((type & SLJIT_SIMD_FLOAT) ? (elem_size < 2 || elem_size > 3) : (elem_size > 2))
3854*22dc650dSSadaf Ebrahimi 		return SLJIT_ERR_UNSUPPORTED;
3855*22dc650dSSadaf Ebrahimi 
3856*22dc650dSSadaf Ebrahimi 	if (type & SLJIT_SIMD_TEST)
3857*22dc650dSSadaf Ebrahimi 		return SLJIT_SUCCESS;
3858*22dc650dSSadaf Ebrahimi 
3859*22dc650dSSadaf Ebrahimi 	if (reg_size == 4)
3860*22dc650dSSadaf Ebrahimi 		freg = simd_get_quad_reg_index(freg);
3861*22dc650dSSadaf Ebrahimi 
3862*22dc650dSSadaf Ebrahimi 	if (type & SLJIT_SIMD_LANE_ZERO) {
3863*22dc650dSSadaf Ebrahimi 		ins = (reg_size == 3) ? 0 : ((sljit_ins)1 << 6);
3864*22dc650dSSadaf Ebrahimi 
3865*22dc650dSSadaf Ebrahimi 		if (type & SLJIT_SIMD_FLOAT) {
3866*22dc650dSSadaf Ebrahimi 			if (elem_size == 3 && !(srcdst & SLJIT_MEM)) {
3867*22dc650dSSadaf Ebrahimi 				if (lane_index == 1)
3868*22dc650dSSadaf Ebrahimi 					freg += SLJIT_QUAD_OTHER_HALF(freg);
3869*22dc650dSSadaf Ebrahimi 
3870*22dc650dSSadaf Ebrahimi 				if (srcdst != freg)
3871*22dc650dSSadaf Ebrahimi 					FAIL_IF(push_inst32(compiler, VORR | VD4(freg) | VN4(srcdst) | VM4(srcdst)));
3872*22dc650dSSadaf Ebrahimi 
3873*22dc650dSSadaf Ebrahimi 				freg += SLJIT_QUAD_OTHER_HALF(freg);
3874*22dc650dSSadaf Ebrahimi 				return push_inst32(compiler, VMOV_i | VD4(freg));
3875*22dc650dSSadaf Ebrahimi 			}
3876*22dc650dSSadaf Ebrahimi 
3877*22dc650dSSadaf Ebrahimi 			if (srcdst == freg || (elem_size == 3 && srcdst == (freg + SLJIT_QUAD_OTHER_HALF(freg)))) {
3878*22dc650dSSadaf Ebrahimi 				FAIL_IF(push_inst32(compiler, VORR | ins | VD4(TMP_FREG2) | VN4(freg) | VM4(freg)));
3879*22dc650dSSadaf Ebrahimi 				srcdst = TMP_FREG2;
3880*22dc650dSSadaf Ebrahimi 				srcdstw = 0;
3881*22dc650dSSadaf Ebrahimi 			}
3882*22dc650dSSadaf Ebrahimi 		}
3883*22dc650dSSadaf Ebrahimi 
3884*22dc650dSSadaf Ebrahimi 		FAIL_IF(push_inst32(compiler, VMOV_i | ins | VD4(freg)));
3885*22dc650dSSadaf Ebrahimi 	}
3886*22dc650dSSadaf Ebrahimi 
3887*22dc650dSSadaf Ebrahimi 	if (reg_size == 4 && lane_index >= (0x8 >> elem_size)) {
3888*22dc650dSSadaf Ebrahimi 		lane_index -= (0x8 >> elem_size);
3889*22dc650dSSadaf Ebrahimi 		freg += SLJIT_QUAD_OTHER_HALF(freg);
3890*22dc650dSSadaf Ebrahimi 	}
3891*22dc650dSSadaf Ebrahimi 
3892*22dc650dSSadaf Ebrahimi 	if (srcdst & SLJIT_MEM) {
3893*22dc650dSSadaf Ebrahimi 		if (elem_size == 3)
3894*22dc650dSSadaf Ebrahimi 			return emit_fop_mem(compiler, ((type & SLJIT_SIMD_STORE) ? 0 : FPU_LOAD) | SLJIT_32, freg, srcdst, srcdstw);
3895*22dc650dSSadaf Ebrahimi 
3896*22dc650dSSadaf Ebrahimi 		FAIL_IF(sljit_emit_simd_mem_offset(compiler, &srcdst, srcdstw));
3897*22dc650dSSadaf Ebrahimi 
3898*22dc650dSSadaf Ebrahimi 		lane_index = lane_index << elem_size;
3899*22dc650dSSadaf Ebrahimi 		ins = (sljit_ins)((elem_size << 10) | (lane_index << 5));
3900*22dc650dSSadaf Ebrahimi 		return push_inst32(compiler, ((type & SLJIT_SIMD_STORE) ? VST1_s : VLD1_s) | ins | VD4(freg) | RN4(srcdst) | 0xf);
3901*22dc650dSSadaf Ebrahimi 	}
3902*22dc650dSSadaf Ebrahimi 
3903*22dc650dSSadaf Ebrahimi 	if (type & SLJIT_SIMD_FLOAT) {
3904*22dc650dSSadaf Ebrahimi 		if (elem_size == 3) {
3905*22dc650dSSadaf Ebrahimi 			if (type & SLJIT_SIMD_STORE)
3906*22dc650dSSadaf Ebrahimi 				return push_inst32(compiler, VORR | VD4(srcdst) | VN4(freg) | VM4(freg));
3907*22dc650dSSadaf Ebrahimi 			return push_inst32(compiler, VMOV_F32 | SLJIT_32 | VD4(freg) | VM4(srcdst));
3908*22dc650dSSadaf Ebrahimi 		}
3909*22dc650dSSadaf Ebrahimi 
3910*22dc650dSSadaf Ebrahimi 		if (type & SLJIT_SIMD_STORE) {
3911*22dc650dSSadaf Ebrahimi 			if (freg_ebit_map[freg] == 0) {
3912*22dc650dSSadaf Ebrahimi 				if (lane_index == 1)
3913*22dc650dSSadaf Ebrahimi 					freg = SLJIT_F64_SECOND(freg);
3914*22dc650dSSadaf Ebrahimi 
3915*22dc650dSSadaf Ebrahimi 				return push_inst32(compiler, VMOV_F32 | VD4(srcdst) | VM4(freg));
3916*22dc650dSSadaf Ebrahimi 			}
3917*22dc650dSSadaf Ebrahimi 
3918*22dc650dSSadaf Ebrahimi 			FAIL_IF(push_inst32(compiler, VMOV_s | (1 << 20) | ((sljit_ins)lane_index << 21) | VN4(freg) | RT4(TMP_REG1)));
3919*22dc650dSSadaf Ebrahimi 			return push_inst32(compiler, VMOV | VN4(srcdst) | RT4(TMP_REG1));
3920*22dc650dSSadaf Ebrahimi 		}
3921*22dc650dSSadaf Ebrahimi 
3922*22dc650dSSadaf Ebrahimi 		FAIL_IF(push_inst32(compiler, VMOV | (1 << 20) | VN4(srcdst) | RT4(TMP_REG1)));
3923*22dc650dSSadaf Ebrahimi 		return push_inst32(compiler, VMOV_s | ((sljit_ins)lane_index << 21) | VN4(freg) | RT4(TMP_REG1));
3924*22dc650dSSadaf Ebrahimi 	}
3925*22dc650dSSadaf Ebrahimi 
3926*22dc650dSSadaf Ebrahimi 	if (srcdst == SLJIT_IMM) {
3927*22dc650dSSadaf Ebrahimi 		if (elem_size < 2)
3928*22dc650dSSadaf Ebrahimi 			srcdstw &= ((sljit_sw)1 << (((sljit_sw)1 << elem_size) << 3)) - 1;
3929*22dc650dSSadaf Ebrahimi 
3930*22dc650dSSadaf Ebrahimi 		FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)srcdstw));
3931*22dc650dSSadaf Ebrahimi 		srcdst = TMP_REG1;
3932*22dc650dSSadaf Ebrahimi 	}
3933*22dc650dSSadaf Ebrahimi 
3934*22dc650dSSadaf Ebrahimi 	if (elem_size == 0)
3935*22dc650dSSadaf Ebrahimi 		ins = 0x400000;
3936*22dc650dSSadaf Ebrahimi 	else if (elem_size == 1)
3937*22dc650dSSadaf Ebrahimi 		ins = 0x20;
3938*22dc650dSSadaf Ebrahimi 	else
3939*22dc650dSSadaf Ebrahimi 		ins = 0;
3940*22dc650dSSadaf Ebrahimi 
3941*22dc650dSSadaf Ebrahimi 	lane_index = lane_index << elem_size;
3942*22dc650dSSadaf Ebrahimi 	ins |= (sljit_ins)(((lane_index & 0x4) << 19) | ((lane_index & 0x3) << 5));
3943*22dc650dSSadaf Ebrahimi 
3944*22dc650dSSadaf Ebrahimi 	if (type & SLJIT_SIMD_STORE) {
3945*22dc650dSSadaf Ebrahimi 		ins |= (1 << 20);
3946*22dc650dSSadaf Ebrahimi 
3947*22dc650dSSadaf Ebrahimi 		if (elem_size < 2 && !(type & SLJIT_SIMD_LANE_SIGNED))
3948*22dc650dSSadaf Ebrahimi 			ins |= (1 << 23);
3949*22dc650dSSadaf Ebrahimi 	}
3950*22dc650dSSadaf Ebrahimi 
3951*22dc650dSSadaf Ebrahimi 	return push_inst32(compiler, VMOV_s | ins | VN4(freg) | RT4(srcdst));
3952*22dc650dSSadaf Ebrahimi }
3953*22dc650dSSadaf Ebrahimi 
sljit_emit_simd_lane_replicate(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 freg,sljit_s32 src,sljit_s32 src_lane_index)3954*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_replicate(struct sljit_compiler *compiler, sljit_s32 type,
3955*22dc650dSSadaf Ebrahimi 	sljit_s32 freg,
3956*22dc650dSSadaf Ebrahimi 	sljit_s32 src, sljit_s32 src_lane_index)
3957*22dc650dSSadaf Ebrahimi {
3958*22dc650dSSadaf Ebrahimi 	sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
3959*22dc650dSSadaf Ebrahimi 	sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
3960*22dc650dSSadaf Ebrahimi 	sljit_ins ins;
3961*22dc650dSSadaf Ebrahimi 
3962*22dc650dSSadaf Ebrahimi 	CHECK_ERROR();
3963*22dc650dSSadaf Ebrahimi 	CHECK(check_sljit_emit_simd_lane_replicate(compiler, type, freg, src, src_lane_index));
3964*22dc650dSSadaf Ebrahimi 
3965*22dc650dSSadaf Ebrahimi 	if (reg_size != 3 && reg_size != 4)
3966*22dc650dSSadaf Ebrahimi 		return SLJIT_ERR_UNSUPPORTED;
3967*22dc650dSSadaf Ebrahimi 
3968*22dc650dSSadaf Ebrahimi 	if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3))
3969*22dc650dSSadaf Ebrahimi 		return SLJIT_ERR_UNSUPPORTED;
3970*22dc650dSSadaf Ebrahimi 
3971*22dc650dSSadaf Ebrahimi 	if (type & SLJIT_SIMD_TEST)
3972*22dc650dSSadaf Ebrahimi 		return SLJIT_SUCCESS;
3973*22dc650dSSadaf Ebrahimi 
3974*22dc650dSSadaf Ebrahimi 	if (reg_size == 4) {
3975*22dc650dSSadaf Ebrahimi 		freg = simd_get_quad_reg_index(freg);
3976*22dc650dSSadaf Ebrahimi 		src = simd_get_quad_reg_index(src);
3977*22dc650dSSadaf Ebrahimi 
3978*22dc650dSSadaf Ebrahimi 		if (src_lane_index >= (0x8 >> elem_size)) {
3979*22dc650dSSadaf Ebrahimi 			src_lane_index -= (0x8 >> elem_size);
3980*22dc650dSSadaf Ebrahimi 			src += SLJIT_QUAD_OTHER_HALF(src);
3981*22dc650dSSadaf Ebrahimi 		}
3982*22dc650dSSadaf Ebrahimi 	}
3983*22dc650dSSadaf Ebrahimi 
3984*22dc650dSSadaf Ebrahimi 	if (elem_size == 3) {
3985*22dc650dSSadaf Ebrahimi 		if (freg != src)
3986*22dc650dSSadaf Ebrahimi 			FAIL_IF(push_inst32(compiler, VORR | VD4(freg) | VN4(src) | VM4(src)));
3987*22dc650dSSadaf Ebrahimi 
3988*22dc650dSSadaf Ebrahimi 		freg += SLJIT_QUAD_OTHER_HALF(freg);
3989*22dc650dSSadaf Ebrahimi 
3990*22dc650dSSadaf Ebrahimi 		if (freg != src)
3991*22dc650dSSadaf Ebrahimi 			return push_inst32(compiler, VORR | VD4(freg) | VN4(src) | VM4(src));
3992*22dc650dSSadaf Ebrahimi 		return SLJIT_SUCCESS;
3993*22dc650dSSadaf Ebrahimi 	}
3994*22dc650dSSadaf Ebrahimi 
3995*22dc650dSSadaf Ebrahimi 	ins = ((((sljit_ins)src_lane_index << 1) | 1) << (16 + elem_size));
3996*22dc650dSSadaf Ebrahimi 
3997*22dc650dSSadaf Ebrahimi 	if (reg_size == 4)
3998*22dc650dSSadaf Ebrahimi 		ins |= (sljit_ins)1 << 6;
3999*22dc650dSSadaf Ebrahimi 
4000*22dc650dSSadaf Ebrahimi 	return push_inst32(compiler, VDUP_s | ins | VD4(freg) | VM4(src));
4001*22dc650dSSadaf Ebrahimi }
4002*22dc650dSSadaf Ebrahimi 
sljit_emit_simd_extend(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 freg,sljit_s32 src,sljit_sw srcw)4003*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_extend(struct sljit_compiler *compiler, sljit_s32 type,
4004*22dc650dSSadaf Ebrahimi 	sljit_s32 freg,
4005*22dc650dSSadaf Ebrahimi 	sljit_s32 src, sljit_sw srcw)
4006*22dc650dSSadaf Ebrahimi {
4007*22dc650dSSadaf Ebrahimi 	sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
4008*22dc650dSSadaf Ebrahimi 	sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
4009*22dc650dSSadaf Ebrahimi 	sljit_s32 elem2_size = SLJIT_SIMD_GET_ELEM2_SIZE(type);
4010*22dc650dSSadaf Ebrahimi 	sljit_s32 dst_reg;
4011*22dc650dSSadaf Ebrahimi 
4012*22dc650dSSadaf Ebrahimi 	CHECK_ERROR();
4013*22dc650dSSadaf Ebrahimi 	CHECK(check_sljit_emit_simd_extend(compiler, type, freg, src, srcw));
4014*22dc650dSSadaf Ebrahimi 
4015*22dc650dSSadaf Ebrahimi 	ADJUST_LOCAL_OFFSET(src, srcw);
4016*22dc650dSSadaf Ebrahimi 
4017*22dc650dSSadaf Ebrahimi 	if (reg_size != 3 && reg_size != 4)
4018*22dc650dSSadaf Ebrahimi 		return SLJIT_ERR_UNSUPPORTED;
4019*22dc650dSSadaf Ebrahimi 
4020*22dc650dSSadaf Ebrahimi 	if ((type & SLJIT_SIMD_FLOAT) && (elem_size != 2 || elem2_size != 3))
4021*22dc650dSSadaf Ebrahimi 		return SLJIT_ERR_UNSUPPORTED;
4022*22dc650dSSadaf Ebrahimi 
4023*22dc650dSSadaf Ebrahimi 	if (type & SLJIT_SIMD_TEST)
4024*22dc650dSSadaf Ebrahimi 		return SLJIT_SUCCESS;
4025*22dc650dSSadaf Ebrahimi 
4026*22dc650dSSadaf Ebrahimi 	if (reg_size == 4)
4027*22dc650dSSadaf Ebrahimi 		freg = simd_get_quad_reg_index(freg);
4028*22dc650dSSadaf Ebrahimi 
4029*22dc650dSSadaf Ebrahimi 	if (src & SLJIT_MEM) {
4030*22dc650dSSadaf Ebrahimi 		FAIL_IF(sljit_emit_simd_mem_offset(compiler, &src, srcw));
4031*22dc650dSSadaf Ebrahimi 		if (reg_size == 4 && elem2_size - elem_size == 1)
4032*22dc650dSSadaf Ebrahimi 			FAIL_IF(push_inst32(compiler, VLD1 | (0x7 << 8) | VD4(freg) | RN4(src) | 0xf));
4033*22dc650dSSadaf Ebrahimi 		else
4034*22dc650dSSadaf Ebrahimi 			FAIL_IF(push_inst32(compiler, VLD1_s | (sljit_ins)((reg_size - elem2_size + elem_size) << 10) | VD4(freg) | RN4(src) | 0xf));
4035*22dc650dSSadaf Ebrahimi 		src = freg;
4036*22dc650dSSadaf Ebrahimi 	} else if (reg_size == 4)
4037*22dc650dSSadaf Ebrahimi 		src = simd_get_quad_reg_index(src);
4038*22dc650dSSadaf Ebrahimi 
4039*22dc650dSSadaf Ebrahimi 	if (!(type & SLJIT_SIMD_FLOAT)) {
4040*22dc650dSSadaf Ebrahimi 		dst_reg = (reg_size == 4) ? freg : TMP_FREG2;
4041*22dc650dSSadaf Ebrahimi 
4042*22dc650dSSadaf Ebrahimi 		do {
4043*22dc650dSSadaf Ebrahimi 			FAIL_IF(push_inst32(compiler, VSHLL | ((type & SLJIT_SIMD_EXTEND_SIGNED) ? 0 : (1 << 28))
4044*22dc650dSSadaf Ebrahimi 				| ((sljit_ins)1 << (19 + elem_size)) | VD4(dst_reg) | VM4(src)));
4045*22dc650dSSadaf Ebrahimi 			src = dst_reg;
4046*22dc650dSSadaf Ebrahimi 		} while (++elem_size < elem2_size);
4047*22dc650dSSadaf Ebrahimi 
4048*22dc650dSSadaf Ebrahimi 		if (dst_reg == TMP_FREG2)
4049*22dc650dSSadaf Ebrahimi 			return push_inst32(compiler, VORR | VD4(freg) | VN4(TMP_FREG2) | VM4(TMP_FREG2));
4050*22dc650dSSadaf Ebrahimi 		return SLJIT_SUCCESS;
4051*22dc650dSSadaf Ebrahimi 	}
4052*22dc650dSSadaf Ebrahimi 
4053*22dc650dSSadaf Ebrahimi 	/* No SIMD variant, must use VFP instead. */
4054*22dc650dSSadaf Ebrahimi 	SLJIT_ASSERT(reg_size == 4);
4055*22dc650dSSadaf Ebrahimi 
4056*22dc650dSSadaf Ebrahimi 	if (freg == src) {
4057*22dc650dSSadaf Ebrahimi 		freg += SLJIT_QUAD_OTHER_HALF(freg);
4058*22dc650dSSadaf Ebrahimi 		FAIL_IF(push_inst32(compiler, VCVT_F64_F32 | VD4(freg) | VM4(src) | 0x20));
4059*22dc650dSSadaf Ebrahimi 		freg += SLJIT_QUAD_OTHER_HALF(freg);
4060*22dc650dSSadaf Ebrahimi 		return push_inst32(compiler, VCVT_F64_F32 | VD4(freg) | VM4(src));
4061*22dc650dSSadaf Ebrahimi 	}
4062*22dc650dSSadaf Ebrahimi 
4063*22dc650dSSadaf Ebrahimi 	FAIL_IF(push_inst32(compiler, VCVT_F64_F32 | VD4(freg) | VM4(src)));
4064*22dc650dSSadaf Ebrahimi 	freg += SLJIT_QUAD_OTHER_HALF(freg);
4065*22dc650dSSadaf Ebrahimi 	return push_inst32(compiler, VCVT_F64_F32 | VD4(freg) | VM4(src) | 0x20);
4066*22dc650dSSadaf Ebrahimi }
4067*22dc650dSSadaf Ebrahimi 
sljit_emit_simd_sign(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 freg,sljit_s32 dst,sljit_sw dstw)4068*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_sign(struct sljit_compiler *compiler, sljit_s32 type,
4069*22dc650dSSadaf Ebrahimi 	sljit_s32 freg,
4070*22dc650dSSadaf Ebrahimi 	sljit_s32 dst, sljit_sw dstw)
4071*22dc650dSSadaf Ebrahimi {
4072*22dc650dSSadaf Ebrahimi 	sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
4073*22dc650dSSadaf Ebrahimi 	sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
4074*22dc650dSSadaf Ebrahimi 	sljit_ins ins, imms;
4075*22dc650dSSadaf Ebrahimi 	sljit_s32 dst_r;
4076*22dc650dSSadaf Ebrahimi 
4077*22dc650dSSadaf Ebrahimi 	CHECK_ERROR();
4078*22dc650dSSadaf Ebrahimi 	CHECK(check_sljit_emit_simd_sign(compiler, type, freg, dst, dstw));
4079*22dc650dSSadaf Ebrahimi 
4080*22dc650dSSadaf Ebrahimi 	ADJUST_LOCAL_OFFSET(dst, dstw);
4081*22dc650dSSadaf Ebrahimi 
4082*22dc650dSSadaf Ebrahimi 	if (reg_size != 3 && reg_size != 4)
4083*22dc650dSSadaf Ebrahimi 		return SLJIT_ERR_UNSUPPORTED;
4084*22dc650dSSadaf Ebrahimi 
4085*22dc650dSSadaf Ebrahimi 	if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3))
4086*22dc650dSSadaf Ebrahimi 		return SLJIT_ERR_UNSUPPORTED;
4087*22dc650dSSadaf Ebrahimi 
4088*22dc650dSSadaf Ebrahimi 	if (type & SLJIT_SIMD_TEST)
4089*22dc650dSSadaf Ebrahimi 		return SLJIT_SUCCESS;
4090*22dc650dSSadaf Ebrahimi 
4091*22dc650dSSadaf Ebrahimi 	switch (elem_size) {
4092*22dc650dSSadaf Ebrahimi 	case 0:
4093*22dc650dSSadaf Ebrahimi 		imms = 0x243219;
4094*22dc650dSSadaf Ebrahimi 		ins = VSHR | (1 << 28) | (0x9 << 16);
4095*22dc650dSSadaf Ebrahimi 		break;
4096*22dc650dSSadaf Ebrahimi 	case 1:
4097*22dc650dSSadaf Ebrahimi 		imms = (reg_size == 4) ? 0x243219 : 0x2231;
4098*22dc650dSSadaf Ebrahimi 		ins = VSHR | (1 << 28) | (0x11 << 16);
4099*22dc650dSSadaf Ebrahimi 		break;
4100*22dc650dSSadaf Ebrahimi 	case 2:
4101*22dc650dSSadaf Ebrahimi 		imms = (reg_size == 4) ? 0x2231 : 0x21;
4102*22dc650dSSadaf Ebrahimi 		ins = VSHR | (1 << 28) | (0x21 << 16);
4103*22dc650dSSadaf Ebrahimi 		break;
4104*22dc650dSSadaf Ebrahimi 	default:
4105*22dc650dSSadaf Ebrahimi 		imms = 0x21;
4106*22dc650dSSadaf Ebrahimi 		ins = VSHR | (1 << 28) | (0x1 << 16) | (1 << 7);
4107*22dc650dSSadaf Ebrahimi 		break;
4108*22dc650dSSadaf Ebrahimi 	}
4109*22dc650dSSadaf Ebrahimi 
4110*22dc650dSSadaf Ebrahimi 	if (reg_size == 4) {
4111*22dc650dSSadaf Ebrahimi 		freg = simd_get_quad_reg_index(freg);
4112*22dc650dSSadaf Ebrahimi 		ins |= (sljit_ins)1 << 6;
4113*22dc650dSSadaf Ebrahimi 	}
4114*22dc650dSSadaf Ebrahimi 
4115*22dc650dSSadaf Ebrahimi 	SLJIT_ASSERT((freg_map[TMP_FREG2] & 0x1) == 0);
4116*22dc650dSSadaf Ebrahimi 	FAIL_IF(push_inst32(compiler, ins | VD4(TMP_FREG2) | VM4(freg)));
4117*22dc650dSSadaf Ebrahimi 
4118*22dc650dSSadaf Ebrahimi 	if (reg_size == 4 && elem_size > 0)
4119*22dc650dSSadaf Ebrahimi 		FAIL_IF(push_inst32(compiler, VMOVN | ((sljit_ins)(elem_size - 1) << 18) | VD4(TMP_FREG2) | VM4(TMP_FREG2)));
4120*22dc650dSSadaf Ebrahimi 
4121*22dc650dSSadaf Ebrahimi 	ins = (reg_size == 4 && elem_size == 0) ? (1 << 6) : 0;
4122*22dc650dSSadaf Ebrahimi 
4123*22dc650dSSadaf Ebrahimi 	while (imms >= 0x100) {
4124*22dc650dSSadaf Ebrahimi 		FAIL_IF(push_inst32(compiler, VSRA | (1 << 28) | ins | ((imms & 0xff) << 16) | VD4(TMP_FREG2) | VM4(TMP_FREG2)));
4125*22dc650dSSadaf Ebrahimi 		imms >>= 8;
4126*22dc650dSSadaf Ebrahimi 	}
4127*22dc650dSSadaf Ebrahimi 
4128*22dc650dSSadaf Ebrahimi 	FAIL_IF(push_inst32(compiler, VSRA | (1 << 28) | ins | (1 << 7) | (imms << 16) | VD4(TMP_FREG2) | VM4(TMP_FREG2)));
4129*22dc650dSSadaf Ebrahimi 
4130*22dc650dSSadaf Ebrahimi 	dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
4131*22dc650dSSadaf Ebrahimi 	FAIL_IF(push_inst32(compiler, VMOV_s | (1 << 20) | (1 << 23) | (0x2 << 21) | RT4(dst_r) | VN4(TMP_FREG2)));
4132*22dc650dSSadaf Ebrahimi 
4133*22dc650dSSadaf Ebrahimi 	if (reg_size == 4 && elem_size == 0) {
4134*22dc650dSSadaf Ebrahimi 		SLJIT_ASSERT(freg_map[TMP_FREG2] + 1 == freg_map[TMP_FREG1]);
4135*22dc650dSSadaf Ebrahimi 		FAIL_IF(push_inst32(compiler, VMOV_s | (1 << 20) | (1 << 23) | (0x2 << 21) | RT4(TMP_REG2)| VN4(TMP_FREG1)));
4136*22dc650dSSadaf Ebrahimi 		FAIL_IF(push_inst32(compiler, ORR_W | RD4(dst_r) | RN4(dst_r) | RM4(TMP_REG2) | (0x2 << 12)));
4137*22dc650dSSadaf Ebrahimi 	}
4138*22dc650dSSadaf Ebrahimi 
4139*22dc650dSSadaf Ebrahimi 	if (dst_r == TMP_REG1)
4140*22dc650dSSadaf Ebrahimi 		return emit_op_mem(compiler, STORE | WORD_SIZE, TMP_REG1, dst, dstw, TMP_REG2);
4141*22dc650dSSadaf Ebrahimi 
4142*22dc650dSSadaf Ebrahimi 	return SLJIT_SUCCESS;
4143*22dc650dSSadaf Ebrahimi }
4144*22dc650dSSadaf Ebrahimi 
sljit_emit_simd_op2(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 dst_freg,sljit_s32 src1_freg,sljit_s32 src2_freg)4145*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_op2(struct sljit_compiler *compiler, sljit_s32 type,
4146*22dc650dSSadaf Ebrahimi 	sljit_s32 dst_freg, sljit_s32 src1_freg, sljit_s32 src2_freg)
4147*22dc650dSSadaf Ebrahimi {
4148*22dc650dSSadaf Ebrahimi 	sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
4149*22dc650dSSadaf Ebrahimi 	sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
4150*22dc650dSSadaf Ebrahimi 	sljit_ins ins = 0;
4151*22dc650dSSadaf Ebrahimi 
4152*22dc650dSSadaf Ebrahimi 	CHECK_ERROR();
4153*22dc650dSSadaf Ebrahimi 	CHECK(check_sljit_emit_simd_op2(compiler, type, dst_freg, src1_freg, src2_freg));
4154*22dc650dSSadaf Ebrahimi 
4155*22dc650dSSadaf Ebrahimi 	if (reg_size != 3 && reg_size != 4)
4156*22dc650dSSadaf Ebrahimi 		return SLJIT_ERR_UNSUPPORTED;
4157*22dc650dSSadaf Ebrahimi 
4158*22dc650dSSadaf Ebrahimi 	if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3))
4159*22dc650dSSadaf Ebrahimi 		return SLJIT_ERR_UNSUPPORTED;
4160*22dc650dSSadaf Ebrahimi 
4161*22dc650dSSadaf Ebrahimi 	switch (SLJIT_SIMD_GET_OPCODE(type)) {
4162*22dc650dSSadaf Ebrahimi 	case SLJIT_SIMD_OP2_AND:
4163*22dc650dSSadaf Ebrahimi 		ins = VAND;
4164*22dc650dSSadaf Ebrahimi 		break;
4165*22dc650dSSadaf Ebrahimi 	case SLJIT_SIMD_OP2_OR:
4166*22dc650dSSadaf Ebrahimi 		ins = VORR;
4167*22dc650dSSadaf Ebrahimi 		break;
4168*22dc650dSSadaf Ebrahimi 	case SLJIT_SIMD_OP2_XOR:
4169*22dc650dSSadaf Ebrahimi 		ins = VEOR;
4170*22dc650dSSadaf Ebrahimi 		break;
4171*22dc650dSSadaf Ebrahimi 	}
4172*22dc650dSSadaf Ebrahimi 
4173*22dc650dSSadaf Ebrahimi 	if (type & SLJIT_SIMD_TEST)
4174*22dc650dSSadaf Ebrahimi 		return SLJIT_SUCCESS;
4175*22dc650dSSadaf Ebrahimi 
4176*22dc650dSSadaf Ebrahimi 	if (reg_size == 4) {
4177*22dc650dSSadaf Ebrahimi 		dst_freg = simd_get_quad_reg_index(dst_freg);
4178*22dc650dSSadaf Ebrahimi 		src1_freg = simd_get_quad_reg_index(src1_freg);
4179*22dc650dSSadaf Ebrahimi 		src2_freg = simd_get_quad_reg_index(src2_freg);
4180*22dc650dSSadaf Ebrahimi 		ins |= (sljit_ins)1 << 6;
4181*22dc650dSSadaf Ebrahimi 	}
4182*22dc650dSSadaf Ebrahimi 
4183*22dc650dSSadaf Ebrahimi 	return push_inst32(compiler, ins | VD4(dst_freg) | VN4(src1_freg) | VM4(src2_freg));
4184*22dc650dSSadaf Ebrahimi }
4185*22dc650dSSadaf Ebrahimi 
4186*22dc650dSSadaf Ebrahimi #undef FPU_LOAD
4187*22dc650dSSadaf Ebrahimi 
sljit_emit_atomic_load(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst_reg,sljit_s32 mem_reg)4188*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_load(struct sljit_compiler *compiler, sljit_s32 op,
4189*22dc650dSSadaf Ebrahimi 	sljit_s32 dst_reg,
4190*22dc650dSSadaf Ebrahimi 	sljit_s32 mem_reg)
4191*22dc650dSSadaf Ebrahimi {
4192*22dc650dSSadaf Ebrahimi 	sljit_ins ins;
4193*22dc650dSSadaf Ebrahimi 
4194*22dc650dSSadaf Ebrahimi 	CHECK_ERROR();
4195*22dc650dSSadaf Ebrahimi 	CHECK(check_sljit_emit_atomic_load(compiler, op, dst_reg, mem_reg));
4196*22dc650dSSadaf Ebrahimi 
4197*22dc650dSSadaf Ebrahimi 	switch (GET_OPCODE(op)) {
4198*22dc650dSSadaf Ebrahimi 	case SLJIT_MOV_U8:
4199*22dc650dSSadaf Ebrahimi 		ins = LDREXB;
4200*22dc650dSSadaf Ebrahimi 		break;
4201*22dc650dSSadaf Ebrahimi 	case SLJIT_MOV_U16:
4202*22dc650dSSadaf Ebrahimi 		ins = LDREXH;
4203*22dc650dSSadaf Ebrahimi 		break;
4204*22dc650dSSadaf Ebrahimi 	default:
4205*22dc650dSSadaf Ebrahimi 		ins = LDREX;
4206*22dc650dSSadaf Ebrahimi 		break;
4207*22dc650dSSadaf Ebrahimi 	}
4208*22dc650dSSadaf Ebrahimi 
4209*22dc650dSSadaf Ebrahimi 	return push_inst32(compiler, ins | RN4(mem_reg) | RT4(dst_reg));
4210*22dc650dSSadaf Ebrahimi }
4211*22dc650dSSadaf Ebrahimi 
sljit_emit_atomic_store(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 src_reg,sljit_s32 mem_reg,sljit_s32 temp_reg)4212*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_store(struct sljit_compiler *compiler, sljit_s32 op,
4213*22dc650dSSadaf Ebrahimi 	sljit_s32 src_reg,
4214*22dc650dSSadaf Ebrahimi 	sljit_s32 mem_reg,
4215*22dc650dSSadaf Ebrahimi 	sljit_s32 temp_reg)
4216*22dc650dSSadaf Ebrahimi {
4217*22dc650dSSadaf Ebrahimi 	sljit_ins ins;
4218*22dc650dSSadaf Ebrahimi 
4219*22dc650dSSadaf Ebrahimi 	/* temp_reg == mem_reg is undefined so use another temp register */
4220*22dc650dSSadaf Ebrahimi 	SLJIT_UNUSED_ARG(temp_reg);
4221*22dc650dSSadaf Ebrahimi 
4222*22dc650dSSadaf Ebrahimi 	CHECK_ERROR();
4223*22dc650dSSadaf Ebrahimi 	CHECK(check_sljit_emit_atomic_store(compiler, op, src_reg, mem_reg, temp_reg));
4224*22dc650dSSadaf Ebrahimi 
4225*22dc650dSSadaf Ebrahimi 	switch (GET_OPCODE(op)) {
4226*22dc650dSSadaf Ebrahimi 	case SLJIT_MOV_U8:
4227*22dc650dSSadaf Ebrahimi 		ins = STREXB | RM4(TMP_REG1);
4228*22dc650dSSadaf Ebrahimi 		break;
4229*22dc650dSSadaf Ebrahimi 	case SLJIT_MOV_U16:
4230*22dc650dSSadaf Ebrahimi 		ins = STREXH | RM4(TMP_REG1);
4231*22dc650dSSadaf Ebrahimi 		break;
4232*22dc650dSSadaf Ebrahimi 	default:
4233*22dc650dSSadaf Ebrahimi 		ins = STREX | RD4(TMP_REG1);
4234*22dc650dSSadaf Ebrahimi 		break;
4235*22dc650dSSadaf Ebrahimi 	}
4236*22dc650dSSadaf Ebrahimi 
4237*22dc650dSSadaf Ebrahimi 	FAIL_IF(push_inst32(compiler, ins | RN4(mem_reg) | RT4(src_reg)));
4238*22dc650dSSadaf Ebrahimi 	if (op & SLJIT_SET_ATOMIC_STORED)
4239*22dc650dSSadaf Ebrahimi 		return push_inst32(compiler, CMPI_W | RN4(TMP_REG1));
4240*22dc650dSSadaf Ebrahimi 
4241*22dc650dSSadaf Ebrahimi 	return SLJIT_SUCCESS;
4242*22dc650dSSadaf Ebrahimi }
4243*22dc650dSSadaf Ebrahimi 
sljit_emit_const(struct sljit_compiler * compiler,sljit_s32 dst,sljit_sw dstw,sljit_sw init_value)4244*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value)
4245*22dc650dSSadaf Ebrahimi {
4246*22dc650dSSadaf Ebrahimi 	struct sljit_const *const_;
4247*22dc650dSSadaf Ebrahimi 	sljit_s32 dst_r;
4248*22dc650dSSadaf Ebrahimi 
4249*22dc650dSSadaf Ebrahimi 	CHECK_ERROR_PTR();
4250*22dc650dSSadaf Ebrahimi 	CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value));
4251*22dc650dSSadaf Ebrahimi 	ADJUST_LOCAL_OFFSET(dst, dstw);
4252*22dc650dSSadaf Ebrahimi 
4253*22dc650dSSadaf Ebrahimi 	const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const));
4254*22dc650dSSadaf Ebrahimi 	PTR_FAIL_IF(!const_);
4255*22dc650dSSadaf Ebrahimi 	set_const(const_, compiler);
4256*22dc650dSSadaf Ebrahimi 
4257*22dc650dSSadaf Ebrahimi 	dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
4258*22dc650dSSadaf Ebrahimi 	PTR_FAIL_IF(emit_imm32_const(compiler, dst_r, (sljit_uw)init_value));
4259*22dc650dSSadaf Ebrahimi 
4260*22dc650dSSadaf Ebrahimi 	if (dst & SLJIT_MEM)
4261*22dc650dSSadaf Ebrahimi 		PTR_FAIL_IF(emit_op_mem(compiler, WORD_SIZE | STORE, dst_r, dst, dstw, TMP_REG2));
4262*22dc650dSSadaf Ebrahimi 	return const_;
4263*22dc650dSSadaf Ebrahimi }
4264*22dc650dSSadaf Ebrahimi 
sljit_emit_mov_addr(struct sljit_compiler * compiler,sljit_s32 dst,sljit_sw dstw)4265*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_mov_addr(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
4266*22dc650dSSadaf Ebrahimi {
4267*22dc650dSSadaf Ebrahimi 	struct sljit_jump *jump;
4268*22dc650dSSadaf Ebrahimi 	sljit_s32 dst_r;
4269*22dc650dSSadaf Ebrahimi 
4270*22dc650dSSadaf Ebrahimi 	CHECK_ERROR_PTR();
4271*22dc650dSSadaf Ebrahimi 	CHECK_PTR(check_sljit_emit_mov_addr(compiler, dst, dstw));
4272*22dc650dSSadaf Ebrahimi 	ADJUST_LOCAL_OFFSET(dst, dstw);
4273*22dc650dSSadaf Ebrahimi 
4274*22dc650dSSadaf Ebrahimi 	jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
4275*22dc650dSSadaf Ebrahimi 	PTR_FAIL_IF(!jump);
4276*22dc650dSSadaf Ebrahimi 	set_mov_addr(jump, compiler, 0);
4277*22dc650dSSadaf Ebrahimi 
4278*22dc650dSSadaf Ebrahimi 	dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
4279*22dc650dSSadaf Ebrahimi 	PTR_FAIL_IF(push_inst16(compiler, RDN3(dst_r)));
4280*22dc650dSSadaf Ebrahimi 	compiler->size += 3;
4281*22dc650dSSadaf Ebrahimi 
4282*22dc650dSSadaf Ebrahimi 	if (dst & SLJIT_MEM)
4283*22dc650dSSadaf Ebrahimi 		PTR_FAIL_IF(emit_op_mem(compiler, WORD_SIZE | STORE, dst_r, dst, dstw, TMP_REG2));
4284*22dc650dSSadaf Ebrahimi 	return jump;
4285*22dc650dSSadaf Ebrahimi }
4286*22dc650dSSadaf Ebrahimi 
sljit_set_jump_addr(sljit_uw addr,sljit_uw new_target,sljit_sw executable_offset)4287*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset)
4288*22dc650dSSadaf Ebrahimi {
4289*22dc650dSSadaf Ebrahimi 	sljit_u16 *inst = (sljit_u16*)addr;
4290*22dc650dSSadaf Ebrahimi 	SLJIT_UNUSED_ARG(executable_offset);
4291*22dc650dSSadaf Ebrahimi 
4292*22dc650dSSadaf Ebrahimi 	SLJIT_UPDATE_WX_FLAGS(inst, inst + 4, 0);
4293*22dc650dSSadaf Ebrahimi 	modify_imm32_const(inst, new_target);
4294*22dc650dSSadaf Ebrahimi 	SLJIT_UPDATE_WX_FLAGS(inst, inst + 4, 1);
4295*22dc650dSSadaf Ebrahimi 	inst = (sljit_u16 *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
4296*22dc650dSSadaf Ebrahimi 	SLJIT_CACHE_FLUSH(inst, inst + 4);
4297*22dc650dSSadaf Ebrahimi }
4298*22dc650dSSadaf Ebrahimi 
sljit_set_const(sljit_uw addr,sljit_sw new_constant,sljit_sw executable_offset)4299*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset)
4300*22dc650dSSadaf Ebrahimi {
4301*22dc650dSSadaf Ebrahimi 	sljit_set_jump_addr(addr, (sljit_uw)new_constant, executable_offset);
4302*22dc650dSSadaf Ebrahimi }
4303