1*22dc650dSSadaf Ebrahimi /*
2*22dc650dSSadaf Ebrahimi * Stack-less Just-In-Time compiler
3*22dc650dSSadaf Ebrahimi *
4*22dc650dSSadaf Ebrahimi * Copyright Zoltan Herczeg ([email protected]). All rights reserved.
5*22dc650dSSadaf Ebrahimi *
6*22dc650dSSadaf Ebrahimi * Redistribution and use in source and binary forms, with or without modification, are
7*22dc650dSSadaf Ebrahimi * permitted provided that the following conditions are met:
8*22dc650dSSadaf Ebrahimi *
9*22dc650dSSadaf Ebrahimi * 1. Redistributions of source code must retain the above copyright notice, this list of
10*22dc650dSSadaf Ebrahimi * conditions and the following disclaimer.
11*22dc650dSSadaf Ebrahimi *
12*22dc650dSSadaf Ebrahimi * 2. Redistributions in binary form must reproduce the above copyright notice, this list
13*22dc650dSSadaf Ebrahimi * of conditions and the following disclaimer in the documentation and/or other materials
14*22dc650dSSadaf Ebrahimi * provided with the distribution.
15*22dc650dSSadaf Ebrahimi *
16*22dc650dSSadaf Ebrahimi * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
17*22dc650dSSadaf Ebrahimi * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18*22dc650dSSadaf Ebrahimi * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
19*22dc650dSSadaf Ebrahimi * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20*22dc650dSSadaf Ebrahimi * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
21*22dc650dSSadaf Ebrahimi * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22*22dc650dSSadaf Ebrahimi * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23*22dc650dSSadaf Ebrahimi * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
24*22dc650dSSadaf Ebrahimi * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25*22dc650dSSadaf Ebrahimi */
26*22dc650dSSadaf Ebrahimi
sljit_get_platform_name(void)27*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void)
28*22dc650dSSadaf Ebrahimi {
29*22dc650dSSadaf Ebrahimi return "x86" SLJIT_CPUINFO;
30*22dc650dSSadaf Ebrahimi }
31*22dc650dSSadaf Ebrahimi
32*22dc650dSSadaf Ebrahimi /*
33*22dc650dSSadaf Ebrahimi 32b register indexes:
34*22dc650dSSadaf Ebrahimi 0 - EAX
35*22dc650dSSadaf Ebrahimi 1 - ECX
36*22dc650dSSadaf Ebrahimi 2 - EDX
37*22dc650dSSadaf Ebrahimi 3 - EBX
38*22dc650dSSadaf Ebrahimi 4 - ESP
39*22dc650dSSadaf Ebrahimi 5 - EBP
40*22dc650dSSadaf Ebrahimi 6 - ESI
41*22dc650dSSadaf Ebrahimi 7 - EDI
42*22dc650dSSadaf Ebrahimi */
43*22dc650dSSadaf Ebrahimi
44*22dc650dSSadaf Ebrahimi /*
45*22dc650dSSadaf Ebrahimi 64b register indexes:
46*22dc650dSSadaf Ebrahimi 0 - RAX
47*22dc650dSSadaf Ebrahimi 1 - RCX
48*22dc650dSSadaf Ebrahimi 2 - RDX
49*22dc650dSSadaf Ebrahimi 3 - RBX
50*22dc650dSSadaf Ebrahimi 4 - RSP
51*22dc650dSSadaf Ebrahimi 5 - RBP
52*22dc650dSSadaf Ebrahimi 6 - RSI
53*22dc650dSSadaf Ebrahimi 7 - RDI
54*22dc650dSSadaf Ebrahimi 8 - R8 - From now on REX prefix is required
55*22dc650dSSadaf Ebrahimi 9 - R9
56*22dc650dSSadaf Ebrahimi 10 - R10
57*22dc650dSSadaf Ebrahimi 11 - R11
58*22dc650dSSadaf Ebrahimi 12 - R12
59*22dc650dSSadaf Ebrahimi 13 - R13
60*22dc650dSSadaf Ebrahimi 14 - R14
61*22dc650dSSadaf Ebrahimi 15 - R15
62*22dc650dSSadaf Ebrahimi */
63*22dc650dSSadaf Ebrahimi
64*22dc650dSSadaf Ebrahimi #define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2)
65*22dc650dSSadaf Ebrahimi #define TMP_FREG (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1)
66*22dc650dSSadaf Ebrahimi
67*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
68*22dc650dSSadaf Ebrahimi
69*22dc650dSSadaf Ebrahimi static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 3] = {
70*22dc650dSSadaf Ebrahimi 0, 0, 2, 1, 0, 0, 0, 0, 0, 0, 5, 7, 6, 4, 3
71*22dc650dSSadaf Ebrahimi };
72*22dc650dSSadaf Ebrahimi
73*22dc650dSSadaf Ebrahimi static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2] = {
74*22dc650dSSadaf Ebrahimi 0, 1, 2, 3, 4, 5, 6, 7, 0
75*22dc650dSSadaf Ebrahimi };
76*22dc650dSSadaf Ebrahimi
77*22dc650dSSadaf Ebrahimi #define CHECK_EXTRA_REGS(p, w, do) \
78*22dc650dSSadaf Ebrahimi if (p >= SLJIT_R3 && p <= SLJIT_S3) { \
79*22dc650dSSadaf Ebrahimi w = (2 * SSIZE_OF(sw)) + ((p) - SLJIT_R3) * SSIZE_OF(sw); \
80*22dc650dSSadaf Ebrahimi p = SLJIT_MEM1(SLJIT_SP); \
81*22dc650dSSadaf Ebrahimi do; \
82*22dc650dSSadaf Ebrahimi }
83*22dc650dSSadaf Ebrahimi
84*22dc650dSSadaf Ebrahimi #else /* SLJIT_CONFIG_X86_32 */
85*22dc650dSSadaf Ebrahimi
86*22dc650dSSadaf Ebrahimi #define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3)
87*22dc650dSSadaf Ebrahimi
88*22dc650dSSadaf Ebrahimi /* Note: r12 & 0x7 == 0b100, which decoded as SIB byte present
89*22dc650dSSadaf Ebrahimi Note: avoid to use r12 and r13 for memory addressing
90*22dc650dSSadaf Ebrahimi therefore r12 is better to be a higher saved register. */
91*22dc650dSSadaf Ebrahimi #ifndef _WIN64
92*22dc650dSSadaf Ebrahimi /* Args: rdi(=7), rsi(=6), rdx(=2), rcx(=1), r8, r9. Scratches: rax(=0), r10, r11 */
93*22dc650dSSadaf Ebrahimi static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 4] = {
94*22dc650dSSadaf Ebrahimi 0, 0, 6, 7, 1, 8, 11, 10, 12, 5, 13, 14, 15, 3, 4, 2, 9
95*22dc650dSSadaf Ebrahimi };
96*22dc650dSSadaf Ebrahimi /* low-map. reg_map & 0x7. */
97*22dc650dSSadaf Ebrahimi static const sljit_u8 reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 4] = {
98*22dc650dSSadaf Ebrahimi 0, 0, 6, 7, 1, 0, 3, 2, 4, 5, 5, 6, 7, 3, 4, 2, 1
99*22dc650dSSadaf Ebrahimi };
100*22dc650dSSadaf Ebrahimi #else
101*22dc650dSSadaf Ebrahimi /* Args: rcx(=1), rdx(=2), r8, r9. Scratches: rax(=0), r10, r11 */
102*22dc650dSSadaf Ebrahimi static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 4] = {
103*22dc650dSSadaf Ebrahimi 0, 0, 2, 8, 1, 11, 12, 5, 13, 14, 15, 7, 6, 3, 4, 9, 10
104*22dc650dSSadaf Ebrahimi };
105*22dc650dSSadaf Ebrahimi /* low-map. reg_map & 0x7. */
106*22dc650dSSadaf Ebrahimi static const sljit_u8 reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 4] = {
107*22dc650dSSadaf Ebrahimi 0, 0, 2, 0, 1, 3, 4, 5, 5, 6, 7, 7, 6, 3, 4, 1, 2
108*22dc650dSSadaf Ebrahimi };
109*22dc650dSSadaf Ebrahimi #endif
110*22dc650dSSadaf Ebrahimi
111*22dc650dSSadaf Ebrahimi /* Args: xmm0-xmm3 */
112*22dc650dSSadaf Ebrahimi static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2] = {
113*22dc650dSSadaf Ebrahimi 0, 0, 1, 2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 4
114*22dc650dSSadaf Ebrahimi };
115*22dc650dSSadaf Ebrahimi /* low-map. freg_map & 0x7. */
116*22dc650dSSadaf Ebrahimi static const sljit_u8 freg_lmap[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2] = {
117*22dc650dSSadaf Ebrahimi 0, 0, 1, 2, 3, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 4
118*22dc650dSSadaf Ebrahimi };
119*22dc650dSSadaf Ebrahimi
120*22dc650dSSadaf Ebrahimi #define REX_W 0x48
121*22dc650dSSadaf Ebrahimi #define REX_R 0x44
122*22dc650dSSadaf Ebrahimi #define REX_X 0x42
123*22dc650dSSadaf Ebrahimi #define REX_B 0x41
124*22dc650dSSadaf Ebrahimi #define REX 0x40
125*22dc650dSSadaf Ebrahimi
126*22dc650dSSadaf Ebrahimi #ifndef _WIN64
127*22dc650dSSadaf Ebrahimi #define HALFWORD_MAX 0x7fffffffl
128*22dc650dSSadaf Ebrahimi #define HALFWORD_MIN -0x80000000l
129*22dc650dSSadaf Ebrahimi #else
130*22dc650dSSadaf Ebrahimi #define HALFWORD_MAX 0x7fffffffll
131*22dc650dSSadaf Ebrahimi #define HALFWORD_MIN -0x80000000ll
132*22dc650dSSadaf Ebrahimi #endif
133*22dc650dSSadaf Ebrahimi
134*22dc650dSSadaf Ebrahimi #define IS_HALFWORD(x) ((x) <= HALFWORD_MAX && (x) >= HALFWORD_MIN)
135*22dc650dSSadaf Ebrahimi #define NOT_HALFWORD(x) ((x) > HALFWORD_MAX || (x) < HALFWORD_MIN)
136*22dc650dSSadaf Ebrahimi
137*22dc650dSSadaf Ebrahimi #define CHECK_EXTRA_REGS(p, w, do)
138*22dc650dSSadaf Ebrahimi
139*22dc650dSSadaf Ebrahimi #endif /* SLJIT_CONFIG_X86_32 */
140*22dc650dSSadaf Ebrahimi
141*22dc650dSSadaf Ebrahimi #define U8(v) ((sljit_u8)(v))
142*22dc650dSSadaf Ebrahimi
143*22dc650dSSadaf Ebrahimi /* Size flags for emit_x86_instruction: */
144*22dc650dSSadaf Ebrahimi #define EX86_BIN_INS ((sljit_uw)0x000010)
145*22dc650dSSadaf Ebrahimi #define EX86_SHIFT_INS ((sljit_uw)0x000020)
146*22dc650dSSadaf Ebrahimi #define EX86_BYTE_ARG ((sljit_uw)0x000040)
147*22dc650dSSadaf Ebrahimi #define EX86_HALF_ARG ((sljit_uw)0x000080)
148*22dc650dSSadaf Ebrahimi /* Size flags for both emit_x86_instruction and emit_vex_instruction: */
149*22dc650dSSadaf Ebrahimi #define EX86_REX ((sljit_uw)0x000100)
150*22dc650dSSadaf Ebrahimi #define EX86_NO_REXW ((sljit_uw)0x000200)
151*22dc650dSSadaf Ebrahimi #define EX86_PREF_66 ((sljit_uw)0x000400)
152*22dc650dSSadaf Ebrahimi #define EX86_PREF_F2 ((sljit_uw)0x000800)
153*22dc650dSSadaf Ebrahimi #define EX86_PREF_F3 ((sljit_uw)0x001000)
154*22dc650dSSadaf Ebrahimi #define EX86_SSE2_OP1 ((sljit_uw)0x002000)
155*22dc650dSSadaf Ebrahimi #define EX86_SSE2_OP2 ((sljit_uw)0x004000)
156*22dc650dSSadaf Ebrahimi #define EX86_SSE2 (EX86_SSE2_OP1 | EX86_SSE2_OP2)
157*22dc650dSSadaf Ebrahimi #define EX86_VEX_EXT ((sljit_uw)0x008000)
158*22dc650dSSadaf Ebrahimi /* Op flags for emit_vex_instruction: */
159*22dc650dSSadaf Ebrahimi #define VEX_OP_0F38 ((sljit_uw)0x010000)
160*22dc650dSSadaf Ebrahimi #define VEX_OP_0F3A ((sljit_uw)0x020000)
161*22dc650dSSadaf Ebrahimi #define VEX_SSE2_OPV ((sljit_uw)0x040000)
162*22dc650dSSadaf Ebrahimi #define VEX_AUTO_W ((sljit_uw)0x080000)
163*22dc650dSSadaf Ebrahimi #define VEX_W ((sljit_uw)0x100000)
164*22dc650dSSadaf Ebrahimi #define VEX_256 ((sljit_uw)0x200000)
165*22dc650dSSadaf Ebrahimi
166*22dc650dSSadaf Ebrahimi #define EX86_SELECT_66(op) (((op) & SLJIT_32) ? 0 : EX86_PREF_66)
167*22dc650dSSadaf Ebrahimi #define EX86_SELECT_F2_F3(op) (((op) & SLJIT_32) ? EX86_PREF_F3 : EX86_PREF_F2)
168*22dc650dSSadaf Ebrahimi
169*22dc650dSSadaf Ebrahimi /* --------------------------------------------------------------------- */
170*22dc650dSSadaf Ebrahimi /* Instruction forms */
171*22dc650dSSadaf Ebrahimi /* --------------------------------------------------------------------- */
172*22dc650dSSadaf Ebrahimi
173*22dc650dSSadaf Ebrahimi #define ADD (/* BINARY */ 0 << 3)
174*22dc650dSSadaf Ebrahimi #define ADD_EAX_i32 0x05
175*22dc650dSSadaf Ebrahimi #define ADD_r_rm 0x03
176*22dc650dSSadaf Ebrahimi #define ADD_rm_r 0x01
177*22dc650dSSadaf Ebrahimi #define ADDSD_x_xm 0x58
178*22dc650dSSadaf Ebrahimi #define ADC (/* BINARY */ 2 << 3)
179*22dc650dSSadaf Ebrahimi #define ADC_EAX_i32 0x15
180*22dc650dSSadaf Ebrahimi #define ADC_r_rm 0x13
181*22dc650dSSadaf Ebrahimi #define ADC_rm_r 0x11
182*22dc650dSSadaf Ebrahimi #define AND (/* BINARY */ 4 << 3)
183*22dc650dSSadaf Ebrahimi #define AND_EAX_i32 0x25
184*22dc650dSSadaf Ebrahimi #define AND_r_rm 0x23
185*22dc650dSSadaf Ebrahimi #define AND_rm_r 0x21
186*22dc650dSSadaf Ebrahimi #define ANDPD_x_xm 0x54
187*22dc650dSSadaf Ebrahimi #define BSR_r_rm (/* GROUP_0F */ 0xbd)
188*22dc650dSSadaf Ebrahimi #define BSF_r_rm (/* GROUP_0F */ 0xbc)
189*22dc650dSSadaf Ebrahimi #define BSWAP_r (/* GROUP_0F */ 0xc8)
190*22dc650dSSadaf Ebrahimi #define CALL_i32 0xe8
191*22dc650dSSadaf Ebrahimi #define CALL_rm (/* GROUP_FF */ 2 << 3)
192*22dc650dSSadaf Ebrahimi #define CDQ 0x99
193*22dc650dSSadaf Ebrahimi #define CMOVE_r_rm (/* GROUP_0F */ 0x44)
194*22dc650dSSadaf Ebrahimi #define CMP (/* BINARY */ 7 << 3)
195*22dc650dSSadaf Ebrahimi #define CMP_EAX_i32 0x3d
196*22dc650dSSadaf Ebrahimi #define CMP_r_rm 0x3b
197*22dc650dSSadaf Ebrahimi #define CMP_rm_r 0x39
198*22dc650dSSadaf Ebrahimi #define CMPS_x_xm 0xc2
199*22dc650dSSadaf Ebrahimi #define CMPXCHG_rm_r 0xb1
200*22dc650dSSadaf Ebrahimi #define CMPXCHG_rm8_r 0xb0
201*22dc650dSSadaf Ebrahimi #define CVTPD2PS_x_xm 0x5a
202*22dc650dSSadaf Ebrahimi #define CVTPS2PD_x_xm 0x5a
203*22dc650dSSadaf Ebrahimi #define CVTSI2SD_x_rm 0x2a
204*22dc650dSSadaf Ebrahimi #define CVTTSD2SI_r_xm 0x2c
205*22dc650dSSadaf Ebrahimi #define DIV (/* GROUP_F7 */ 6 << 3)
206*22dc650dSSadaf Ebrahimi #define DIVSD_x_xm 0x5e
207*22dc650dSSadaf Ebrahimi #define EXTRACTPS_x_xm 0x17
208*22dc650dSSadaf Ebrahimi #define FLDS 0xd9
209*22dc650dSSadaf Ebrahimi #define FLDL 0xdd
210*22dc650dSSadaf Ebrahimi #define FSTPS 0xd9
211*22dc650dSSadaf Ebrahimi #define FSTPD 0xdd
212*22dc650dSSadaf Ebrahimi #define INSERTPS_x_xm 0x21
213*22dc650dSSadaf Ebrahimi #define INT3 0xcc
214*22dc650dSSadaf Ebrahimi #define IDIV (/* GROUP_F7 */ 7 << 3)
215*22dc650dSSadaf Ebrahimi #define IMUL (/* GROUP_F7 */ 5 << 3)
216*22dc650dSSadaf Ebrahimi #define IMUL_r_rm (/* GROUP_0F */ 0xaf)
217*22dc650dSSadaf Ebrahimi #define IMUL_r_rm_i8 0x6b
218*22dc650dSSadaf Ebrahimi #define IMUL_r_rm_i32 0x69
219*22dc650dSSadaf Ebrahimi #define JL_i8 0x7c
220*22dc650dSSadaf Ebrahimi #define JE_i8 0x74
221*22dc650dSSadaf Ebrahimi #define JNC_i8 0x73
222*22dc650dSSadaf Ebrahimi #define JNE_i8 0x75
223*22dc650dSSadaf Ebrahimi #define JMP_i8 0xeb
224*22dc650dSSadaf Ebrahimi #define JMP_i32 0xe9
225*22dc650dSSadaf Ebrahimi #define JMP_rm (/* GROUP_FF */ 4 << 3)
226*22dc650dSSadaf Ebrahimi #define LEA_r_m 0x8d
227*22dc650dSSadaf Ebrahimi #define LOOP_i8 0xe2
228*22dc650dSSadaf Ebrahimi #define LZCNT_r_rm (/* GROUP_F3 */ /* GROUP_0F */ 0xbd)
229*22dc650dSSadaf Ebrahimi #define MOV_r_rm 0x8b
230*22dc650dSSadaf Ebrahimi #define MOV_r_i32 0xb8
231*22dc650dSSadaf Ebrahimi #define MOV_rm_r 0x89
232*22dc650dSSadaf Ebrahimi #define MOV_rm_i32 0xc7
233*22dc650dSSadaf Ebrahimi #define MOV_rm8_i8 0xc6
234*22dc650dSSadaf Ebrahimi #define MOV_rm8_r8 0x88
235*22dc650dSSadaf Ebrahimi #define MOVAPS_x_xm 0x28
236*22dc650dSSadaf Ebrahimi #define MOVAPS_xm_x 0x29
237*22dc650dSSadaf Ebrahimi #define MOVD_x_rm 0x6e
238*22dc650dSSadaf Ebrahimi #define MOVD_rm_x 0x7e
239*22dc650dSSadaf Ebrahimi #define MOVDDUP_x_xm 0x12
240*22dc650dSSadaf Ebrahimi #define MOVDQA_x_xm 0x6f
241*22dc650dSSadaf Ebrahimi #define MOVDQA_xm_x 0x7f
242*22dc650dSSadaf Ebrahimi #define MOVHLPS_x_x 0x12
243*22dc650dSSadaf Ebrahimi #define MOVHPD_m_x 0x17
244*22dc650dSSadaf Ebrahimi #define MOVHPD_x_m 0x16
245*22dc650dSSadaf Ebrahimi #define MOVLHPS_x_x 0x16
246*22dc650dSSadaf Ebrahimi #define MOVLPD_m_x 0x13
247*22dc650dSSadaf Ebrahimi #define MOVLPD_x_m 0x12
248*22dc650dSSadaf Ebrahimi #define MOVMSKPS_r_x (/* GROUP_0F */ 0x50)
249*22dc650dSSadaf Ebrahimi #define MOVQ_x_xm (/* GROUP_0F */ 0x7e)
250*22dc650dSSadaf Ebrahimi #define MOVSD_x_xm 0x10
251*22dc650dSSadaf Ebrahimi #define MOVSD_xm_x 0x11
252*22dc650dSSadaf Ebrahimi #define MOVSHDUP_x_xm 0x16
253*22dc650dSSadaf Ebrahimi #define MOVSXD_r_rm 0x63
254*22dc650dSSadaf Ebrahimi #define MOVSX_r_rm8 (/* GROUP_0F */ 0xbe)
255*22dc650dSSadaf Ebrahimi #define MOVSX_r_rm16 (/* GROUP_0F */ 0xbf)
256*22dc650dSSadaf Ebrahimi #define MOVUPS_x_xm 0x10
257*22dc650dSSadaf Ebrahimi #define MOVZX_r_rm8 (/* GROUP_0F */ 0xb6)
258*22dc650dSSadaf Ebrahimi #define MOVZX_r_rm16 (/* GROUP_0F */ 0xb7)
259*22dc650dSSadaf Ebrahimi #define MUL (/* GROUP_F7 */ 4 << 3)
260*22dc650dSSadaf Ebrahimi #define MULSD_x_xm 0x59
261*22dc650dSSadaf Ebrahimi #define NEG_rm (/* GROUP_F7 */ 3 << 3)
262*22dc650dSSadaf Ebrahimi #define NOP 0x90
263*22dc650dSSadaf Ebrahimi #define NOT_rm (/* GROUP_F7 */ 2 << 3)
264*22dc650dSSadaf Ebrahimi #define OR (/* BINARY */ 1 << 3)
265*22dc650dSSadaf Ebrahimi #define OR_r_rm 0x0b
266*22dc650dSSadaf Ebrahimi #define OR_EAX_i32 0x0d
267*22dc650dSSadaf Ebrahimi #define OR_rm_r 0x09
268*22dc650dSSadaf Ebrahimi #define OR_rm8_r8 0x08
269*22dc650dSSadaf Ebrahimi #define ORPD_x_xm 0x56
270*22dc650dSSadaf Ebrahimi #define PACKSSWB_x_xm (/* GROUP_0F */ 0x63)
271*22dc650dSSadaf Ebrahimi #define PAND_x_xm 0xdb
272*22dc650dSSadaf Ebrahimi #define PCMPEQD_x_xm 0x76
273*22dc650dSSadaf Ebrahimi #define PINSRB_x_rm_i8 0x20
274*22dc650dSSadaf Ebrahimi #define PINSRW_x_rm_i8 0xc4
275*22dc650dSSadaf Ebrahimi #define PINSRD_x_rm_i8 0x22
276*22dc650dSSadaf Ebrahimi #define PEXTRB_rm_x_i8 0x14
277*22dc650dSSadaf Ebrahimi #define PEXTRW_rm_x_i8 0x15
278*22dc650dSSadaf Ebrahimi #define PEXTRD_rm_x_i8 0x16
279*22dc650dSSadaf Ebrahimi #define PMOVMSKB_r_x (/* GROUP_0F */ 0xd7)
280*22dc650dSSadaf Ebrahimi #define PMOVSXBD_x_xm 0x21
281*22dc650dSSadaf Ebrahimi #define PMOVSXBQ_x_xm 0x22
282*22dc650dSSadaf Ebrahimi #define PMOVSXBW_x_xm 0x20
283*22dc650dSSadaf Ebrahimi #define PMOVSXDQ_x_xm 0x25
284*22dc650dSSadaf Ebrahimi #define PMOVSXWD_x_xm 0x23
285*22dc650dSSadaf Ebrahimi #define PMOVSXWQ_x_xm 0x24
286*22dc650dSSadaf Ebrahimi #define PMOVZXBD_x_xm 0x31
287*22dc650dSSadaf Ebrahimi #define PMOVZXBQ_x_xm 0x32
288*22dc650dSSadaf Ebrahimi #define PMOVZXBW_x_xm 0x30
289*22dc650dSSadaf Ebrahimi #define PMOVZXDQ_x_xm 0x35
290*22dc650dSSadaf Ebrahimi #define PMOVZXWD_x_xm 0x33
291*22dc650dSSadaf Ebrahimi #define PMOVZXWQ_x_xm 0x34
292*22dc650dSSadaf Ebrahimi #define POP_r 0x58
293*22dc650dSSadaf Ebrahimi #define POP_rm 0x8f
294*22dc650dSSadaf Ebrahimi #define POPF 0x9d
295*22dc650dSSadaf Ebrahimi #define POR_x_xm 0xeb
296*22dc650dSSadaf Ebrahimi #define PREFETCH 0x18
297*22dc650dSSadaf Ebrahimi #define PSHUFB_x_xm 0x00
298*22dc650dSSadaf Ebrahimi #define PSHUFD_x_xm 0x70
299*22dc650dSSadaf Ebrahimi #define PSHUFLW_x_xm 0x70
300*22dc650dSSadaf Ebrahimi #define PSRLDQ_x 0x73
301*22dc650dSSadaf Ebrahimi #define PSLLD_x_i8 0x72
302*22dc650dSSadaf Ebrahimi #define PSLLQ_x_i8 0x73
303*22dc650dSSadaf Ebrahimi #define PUSH_i32 0x68
304*22dc650dSSadaf Ebrahimi #define PUSH_r 0x50
305*22dc650dSSadaf Ebrahimi #define PUSH_rm (/* GROUP_FF */ 6 << 3)
306*22dc650dSSadaf Ebrahimi #define PUSHF 0x9c
307*22dc650dSSadaf Ebrahimi #define PXOR_x_xm 0xef
308*22dc650dSSadaf Ebrahimi #define ROL (/* SHIFT */ 0 << 3)
309*22dc650dSSadaf Ebrahimi #define ROR (/* SHIFT */ 1 << 3)
310*22dc650dSSadaf Ebrahimi #define RET_near 0xc3
311*22dc650dSSadaf Ebrahimi #define RET_i16 0xc2
312*22dc650dSSadaf Ebrahimi #define SBB (/* BINARY */ 3 << 3)
313*22dc650dSSadaf Ebrahimi #define SBB_EAX_i32 0x1d
314*22dc650dSSadaf Ebrahimi #define SBB_r_rm 0x1b
315*22dc650dSSadaf Ebrahimi #define SBB_rm_r 0x19
316*22dc650dSSadaf Ebrahimi #define SAR (/* SHIFT */ 7 << 3)
317*22dc650dSSadaf Ebrahimi #define SHL (/* SHIFT */ 4 << 3)
318*22dc650dSSadaf Ebrahimi #define SHLD (/* GROUP_0F */ 0xa5)
319*22dc650dSSadaf Ebrahimi #define SHRD (/* GROUP_0F */ 0xad)
320*22dc650dSSadaf Ebrahimi #define SHR (/* SHIFT */ 5 << 3)
321*22dc650dSSadaf Ebrahimi #define SHUFPS_x_xm 0xc6
322*22dc650dSSadaf Ebrahimi #define SUB (/* BINARY */ 5 << 3)
323*22dc650dSSadaf Ebrahimi #define SUB_EAX_i32 0x2d
324*22dc650dSSadaf Ebrahimi #define SUB_r_rm 0x2b
325*22dc650dSSadaf Ebrahimi #define SUB_rm_r 0x29
326*22dc650dSSadaf Ebrahimi #define SUBSD_x_xm 0x5c
327*22dc650dSSadaf Ebrahimi #define TEST_EAX_i32 0xa9
328*22dc650dSSadaf Ebrahimi #define TEST_rm_r 0x85
329*22dc650dSSadaf Ebrahimi #define TZCNT_r_rm (/* GROUP_F3 */ /* GROUP_0F */ 0xbc)
330*22dc650dSSadaf Ebrahimi #define UCOMISD_x_xm 0x2e
331*22dc650dSSadaf Ebrahimi #define UNPCKLPD_x_xm 0x14
332*22dc650dSSadaf Ebrahimi #define UNPCKLPS_x_xm 0x14
333*22dc650dSSadaf Ebrahimi #define VBROADCASTSD_x_xm 0x19
334*22dc650dSSadaf Ebrahimi #define VBROADCASTSS_x_xm 0x18
335*22dc650dSSadaf Ebrahimi #define VEXTRACTF128_x_ym 0x19
336*22dc650dSSadaf Ebrahimi #define VEXTRACTI128_x_ym 0x39
337*22dc650dSSadaf Ebrahimi #define VINSERTF128_y_y_xm 0x18
338*22dc650dSSadaf Ebrahimi #define VINSERTI128_y_y_xm 0x38
339*22dc650dSSadaf Ebrahimi #define VPBROADCASTB_x_xm 0x78
340*22dc650dSSadaf Ebrahimi #define VPBROADCASTD_x_xm 0x58
341*22dc650dSSadaf Ebrahimi #define VPBROADCASTQ_x_xm 0x59
342*22dc650dSSadaf Ebrahimi #define VPBROADCASTW_x_xm 0x79
343*22dc650dSSadaf Ebrahimi #define VPERMPD_y_ym 0x01
344*22dc650dSSadaf Ebrahimi #define VPERMQ_y_ym 0x00
345*22dc650dSSadaf Ebrahimi #define XCHG_EAX_r 0x90
346*22dc650dSSadaf Ebrahimi #define XCHG_r_rm 0x87
347*22dc650dSSadaf Ebrahimi #define XOR (/* BINARY */ 6 << 3)
348*22dc650dSSadaf Ebrahimi #define XOR_EAX_i32 0x35
349*22dc650dSSadaf Ebrahimi #define XOR_r_rm 0x33
350*22dc650dSSadaf Ebrahimi #define XOR_rm_r 0x31
351*22dc650dSSadaf Ebrahimi #define XORPD_x_xm 0x57
352*22dc650dSSadaf Ebrahimi
353*22dc650dSSadaf Ebrahimi #define GROUP_0F 0x0f
354*22dc650dSSadaf Ebrahimi #define GROUP_66 0x66
355*22dc650dSSadaf Ebrahimi #define GROUP_F3 0xf3
356*22dc650dSSadaf Ebrahimi #define GROUP_F7 0xf7
357*22dc650dSSadaf Ebrahimi #define GROUP_FF 0xff
358*22dc650dSSadaf Ebrahimi #define GROUP_BINARY_81 0x81
359*22dc650dSSadaf Ebrahimi #define GROUP_BINARY_83 0x83
360*22dc650dSSadaf Ebrahimi #define GROUP_SHIFT_1 0xd1
361*22dc650dSSadaf Ebrahimi #define GROUP_SHIFT_N 0xc1
362*22dc650dSSadaf Ebrahimi #define GROUP_SHIFT_CL 0xd3
363*22dc650dSSadaf Ebrahimi #define GROUP_LOCK 0xf0
364*22dc650dSSadaf Ebrahimi
365*22dc650dSSadaf Ebrahimi #define MOD_REG 0xc0
366*22dc650dSSadaf Ebrahimi #define MOD_DISP8 0x40
367*22dc650dSSadaf Ebrahimi
368*22dc650dSSadaf Ebrahimi #define INC_SIZE(s) (*inst++ = U8(s), compiler->size += (s))
369*22dc650dSSadaf Ebrahimi
370*22dc650dSSadaf Ebrahimi #define PUSH_REG(r) (*inst++ = U8(PUSH_r + (r)))
371*22dc650dSSadaf Ebrahimi #define POP_REG(r) (*inst++ = U8(POP_r + (r)))
372*22dc650dSSadaf Ebrahimi #define RET() (*inst++ = RET_near)
373*22dc650dSSadaf Ebrahimi #define RET_I16(n) (*inst++ = RET_i16, *inst++ = U8(n), *inst++ = 0)
374*22dc650dSSadaf Ebrahimi
375*22dc650dSSadaf Ebrahimi #define SLJIT_INST_LABEL 255
376*22dc650dSSadaf Ebrahimi #define SLJIT_INST_JUMP 254
377*22dc650dSSadaf Ebrahimi #define SLJIT_INST_MOV_ADDR 253
378*22dc650dSSadaf Ebrahimi #define SLJIT_INST_CONST 252
379*22dc650dSSadaf Ebrahimi
380*22dc650dSSadaf Ebrahimi /* Multithreading does not affect these static variables, since they store
381*22dc650dSSadaf Ebrahimi built-in CPU features. Therefore they can be overwritten by different threads
382*22dc650dSSadaf Ebrahimi if they detect the CPU features in the same time. */
383*22dc650dSSadaf Ebrahimi #define CPU_FEATURE_DETECTED 0x001
384*22dc650dSSadaf Ebrahimi #if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
385*22dc650dSSadaf Ebrahimi #define CPU_FEATURE_SSE2 0x002
386*22dc650dSSadaf Ebrahimi #endif
387*22dc650dSSadaf Ebrahimi #define CPU_FEATURE_SSE41 0x004
388*22dc650dSSadaf Ebrahimi #define CPU_FEATURE_LZCNT 0x008
389*22dc650dSSadaf Ebrahimi #define CPU_FEATURE_TZCNT 0x010
390*22dc650dSSadaf Ebrahimi #define CPU_FEATURE_CMOV 0x020
391*22dc650dSSadaf Ebrahimi #define CPU_FEATURE_AVX 0x040
392*22dc650dSSadaf Ebrahimi #define CPU_FEATURE_AVX2 0x080
393*22dc650dSSadaf Ebrahimi #define CPU_FEATURE_OSXSAVE 0x100
394*22dc650dSSadaf Ebrahimi
395*22dc650dSSadaf Ebrahimi static sljit_u32 cpu_feature_list = 0;
396*22dc650dSSadaf Ebrahimi
397*22dc650dSSadaf Ebrahimi #ifdef _WIN32_WCE
398*22dc650dSSadaf Ebrahimi #include <cmnintrin.h>
399*22dc650dSSadaf Ebrahimi #elif defined(_MSC_VER) && _MSC_VER >= 1400
400*22dc650dSSadaf Ebrahimi #include <intrin.h>
401*22dc650dSSadaf Ebrahimi #endif
402*22dc650dSSadaf Ebrahimi
403*22dc650dSSadaf Ebrahimi /******************************************************/
404*22dc650dSSadaf Ebrahimi /* Unaligned-store functions */
405*22dc650dSSadaf Ebrahimi /******************************************************/
406*22dc650dSSadaf Ebrahimi
sljit_unaligned_store_s16(void * addr,sljit_s16 value)407*22dc650dSSadaf Ebrahimi static SLJIT_INLINE void sljit_unaligned_store_s16(void *addr, sljit_s16 value)
408*22dc650dSSadaf Ebrahimi {
409*22dc650dSSadaf Ebrahimi SLJIT_MEMCPY(addr, &value, sizeof(value));
410*22dc650dSSadaf Ebrahimi }
411*22dc650dSSadaf Ebrahimi
sljit_unaligned_store_s32(void * addr,sljit_s32 value)412*22dc650dSSadaf Ebrahimi static SLJIT_INLINE void sljit_unaligned_store_s32(void *addr, sljit_s32 value)
413*22dc650dSSadaf Ebrahimi {
414*22dc650dSSadaf Ebrahimi SLJIT_MEMCPY(addr, &value, sizeof(value));
415*22dc650dSSadaf Ebrahimi }
416*22dc650dSSadaf Ebrahimi
sljit_unaligned_store_sw(void * addr,sljit_sw value)417*22dc650dSSadaf Ebrahimi static SLJIT_INLINE void sljit_unaligned_store_sw(void *addr, sljit_sw value)
418*22dc650dSSadaf Ebrahimi {
419*22dc650dSSadaf Ebrahimi SLJIT_MEMCPY(addr, &value, sizeof(value));
420*22dc650dSSadaf Ebrahimi }
421*22dc650dSSadaf Ebrahimi
422*22dc650dSSadaf Ebrahimi /******************************************************/
423*22dc650dSSadaf Ebrahimi /* Utility functions */
424*22dc650dSSadaf Ebrahimi /******************************************************/
425*22dc650dSSadaf Ebrahimi
execute_cpu_id(sljit_u32 info[4])426*22dc650dSSadaf Ebrahimi static void execute_cpu_id(sljit_u32 info[4])
427*22dc650dSSadaf Ebrahimi {
428*22dc650dSSadaf Ebrahimi #if defined(_MSC_VER) && _MSC_VER >= 1400
429*22dc650dSSadaf Ebrahimi
430*22dc650dSSadaf Ebrahimi __cpuidex((int*)info, (int)info[0], (int)info[2]);
431*22dc650dSSadaf Ebrahimi
432*22dc650dSSadaf Ebrahimi #elif defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__SUNPRO_C) || defined(__TINYC__)
433*22dc650dSSadaf Ebrahimi
434*22dc650dSSadaf Ebrahimi /* AT&T syntax. */
435*22dc650dSSadaf Ebrahimi __asm__ (
436*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
437*22dc650dSSadaf Ebrahimi "movl %0, %%esi\n"
438*22dc650dSSadaf Ebrahimi "movl (%%esi), %%eax\n"
439*22dc650dSSadaf Ebrahimi "movl 8(%%esi), %%ecx\n"
440*22dc650dSSadaf Ebrahimi "pushl %%ebx\n"
441*22dc650dSSadaf Ebrahimi "cpuid\n"
442*22dc650dSSadaf Ebrahimi "movl %%eax, (%%esi)\n"
443*22dc650dSSadaf Ebrahimi "movl %%ebx, 4(%%esi)\n"
444*22dc650dSSadaf Ebrahimi "popl %%ebx\n"
445*22dc650dSSadaf Ebrahimi "movl %%ecx, 8(%%esi)\n"
446*22dc650dSSadaf Ebrahimi "movl %%edx, 12(%%esi)\n"
447*22dc650dSSadaf Ebrahimi #else /* !SLJIT_CONFIG_X86_32 */
448*22dc650dSSadaf Ebrahimi "movq %0, %%rsi\n"
449*22dc650dSSadaf Ebrahimi "movl (%%rsi), %%eax\n"
450*22dc650dSSadaf Ebrahimi "movl 8(%%rsi), %%ecx\n"
451*22dc650dSSadaf Ebrahimi "cpuid\n"
452*22dc650dSSadaf Ebrahimi "movl %%eax, (%%rsi)\n"
453*22dc650dSSadaf Ebrahimi "movl %%ebx, 4(%%rsi)\n"
454*22dc650dSSadaf Ebrahimi "movl %%ecx, 8(%%rsi)\n"
455*22dc650dSSadaf Ebrahimi "movl %%edx, 12(%%rsi)\n"
456*22dc650dSSadaf Ebrahimi #endif /* SLJIT_CONFIG_X86_32 */
457*22dc650dSSadaf Ebrahimi :
458*22dc650dSSadaf Ebrahimi : "r" (info)
459*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
460*22dc650dSSadaf Ebrahimi : "memory", "eax", "ecx", "edx", "esi"
461*22dc650dSSadaf Ebrahimi #else /* !SLJIT_CONFIG_X86_32 */
462*22dc650dSSadaf Ebrahimi : "memory", "rax", "rbx", "rcx", "rdx", "rsi"
463*22dc650dSSadaf Ebrahimi #endif /* SLJIT_CONFIG_X86_32 */
464*22dc650dSSadaf Ebrahimi );
465*22dc650dSSadaf Ebrahimi
466*22dc650dSSadaf Ebrahimi #else /* _MSC_VER < 1400 */
467*22dc650dSSadaf Ebrahimi
468*22dc650dSSadaf Ebrahimi /* Intel syntax. */
469*22dc650dSSadaf Ebrahimi __asm {
470*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
471*22dc650dSSadaf Ebrahimi mov esi, info
472*22dc650dSSadaf Ebrahimi mov eax, [esi]
473*22dc650dSSadaf Ebrahimi mov ecx, [esi + 8]
474*22dc650dSSadaf Ebrahimi cpuid
475*22dc650dSSadaf Ebrahimi mov [esi], eax
476*22dc650dSSadaf Ebrahimi mov [esi + 4], ebx
477*22dc650dSSadaf Ebrahimi mov [esi + 8], ecx
478*22dc650dSSadaf Ebrahimi mov [esi + 12], edx
479*22dc650dSSadaf Ebrahimi #else /* !SLJIT_CONFIG_X86_32 */
480*22dc650dSSadaf Ebrahimi mov rsi, info
481*22dc650dSSadaf Ebrahimi mov eax, [rsi]
482*22dc650dSSadaf Ebrahimi mov ecx, [rsi + 8]
483*22dc650dSSadaf Ebrahimi cpuid
484*22dc650dSSadaf Ebrahimi mov [rsi], eax
485*22dc650dSSadaf Ebrahimi mov [rsi + 4], ebx
486*22dc650dSSadaf Ebrahimi mov [rsi + 8], ecx
487*22dc650dSSadaf Ebrahimi mov [rsi + 12], edx
488*22dc650dSSadaf Ebrahimi #endif /* SLJIT_CONFIG_X86_32 */
489*22dc650dSSadaf Ebrahimi }
490*22dc650dSSadaf Ebrahimi
491*22dc650dSSadaf Ebrahimi #endif /* _MSC_VER && _MSC_VER >= 1400 */
492*22dc650dSSadaf Ebrahimi }
493*22dc650dSSadaf Ebrahimi
execute_get_xcr0_low(void)494*22dc650dSSadaf Ebrahimi static sljit_u32 execute_get_xcr0_low(void)
495*22dc650dSSadaf Ebrahimi {
496*22dc650dSSadaf Ebrahimi sljit_u32 xcr0;
497*22dc650dSSadaf Ebrahimi
498*22dc650dSSadaf Ebrahimi #if defined(_MSC_VER) && _MSC_VER >= 1400
499*22dc650dSSadaf Ebrahimi
500*22dc650dSSadaf Ebrahimi xcr0 = (sljit_u32)_xgetbv(0);
501*22dc650dSSadaf Ebrahimi
502*22dc650dSSadaf Ebrahimi #elif defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__SUNPRO_C) || defined(__TINYC__)
503*22dc650dSSadaf Ebrahimi
504*22dc650dSSadaf Ebrahimi /* AT&T syntax. */
505*22dc650dSSadaf Ebrahimi __asm__ (
506*22dc650dSSadaf Ebrahimi "xorl %%ecx, %%ecx\n"
507*22dc650dSSadaf Ebrahimi "xgetbv\n"
508*22dc650dSSadaf Ebrahimi : "=a" (xcr0)
509*22dc650dSSadaf Ebrahimi :
510*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
511*22dc650dSSadaf Ebrahimi : "ecx", "edx"
512*22dc650dSSadaf Ebrahimi #else /* !SLJIT_CONFIG_X86_32 */
513*22dc650dSSadaf Ebrahimi : "rcx", "rdx"
514*22dc650dSSadaf Ebrahimi #endif /* SLJIT_CONFIG_X86_32 */
515*22dc650dSSadaf Ebrahimi );
516*22dc650dSSadaf Ebrahimi
517*22dc650dSSadaf Ebrahimi #else /* _MSC_VER < 1400 */
518*22dc650dSSadaf Ebrahimi
519*22dc650dSSadaf Ebrahimi /* Intel syntax. */
520*22dc650dSSadaf Ebrahimi __asm {
521*22dc650dSSadaf Ebrahimi mov ecx, 0
522*22dc650dSSadaf Ebrahimi xgetbv
523*22dc650dSSadaf Ebrahimi mov xcr0, eax
524*22dc650dSSadaf Ebrahimi }
525*22dc650dSSadaf Ebrahimi
526*22dc650dSSadaf Ebrahimi #endif /* _MSC_VER && _MSC_VER >= 1400 */
527*22dc650dSSadaf Ebrahimi return xcr0;
528*22dc650dSSadaf Ebrahimi }
529*22dc650dSSadaf Ebrahimi
get_cpu_features(void)530*22dc650dSSadaf Ebrahimi static void get_cpu_features(void)
531*22dc650dSSadaf Ebrahimi {
532*22dc650dSSadaf Ebrahimi sljit_u32 feature_list = CPU_FEATURE_DETECTED;
533*22dc650dSSadaf Ebrahimi sljit_u32 info[4] = {0};
534*22dc650dSSadaf Ebrahimi sljit_u32 max_id;
535*22dc650dSSadaf Ebrahimi
536*22dc650dSSadaf Ebrahimi execute_cpu_id(info);
537*22dc650dSSadaf Ebrahimi max_id = info[0];
538*22dc650dSSadaf Ebrahimi
539*22dc650dSSadaf Ebrahimi if (max_id >= 7) {
540*22dc650dSSadaf Ebrahimi info[0] = 7;
541*22dc650dSSadaf Ebrahimi info[2] = 0;
542*22dc650dSSadaf Ebrahimi execute_cpu_id(info);
543*22dc650dSSadaf Ebrahimi
544*22dc650dSSadaf Ebrahimi if (info[1] & 0x8)
545*22dc650dSSadaf Ebrahimi feature_list |= CPU_FEATURE_TZCNT;
546*22dc650dSSadaf Ebrahimi if (info[1] & 0x20)
547*22dc650dSSadaf Ebrahimi feature_list |= CPU_FEATURE_AVX2;
548*22dc650dSSadaf Ebrahimi }
549*22dc650dSSadaf Ebrahimi
550*22dc650dSSadaf Ebrahimi if (max_id >= 1) {
551*22dc650dSSadaf Ebrahimi info[0] = 1;
552*22dc650dSSadaf Ebrahimi execute_cpu_id(info);
553*22dc650dSSadaf Ebrahimi
554*22dc650dSSadaf Ebrahimi if (info[2] & 0x80000)
555*22dc650dSSadaf Ebrahimi feature_list |= CPU_FEATURE_SSE41;
556*22dc650dSSadaf Ebrahimi if (info[2] & 0x8000000)
557*22dc650dSSadaf Ebrahimi feature_list |= CPU_FEATURE_OSXSAVE;
558*22dc650dSSadaf Ebrahimi if (info[2] & 0x10000000)
559*22dc650dSSadaf Ebrahimi feature_list |= CPU_FEATURE_AVX;
560*22dc650dSSadaf Ebrahimi #if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
561*22dc650dSSadaf Ebrahimi if (info[3] & 0x4000000)
562*22dc650dSSadaf Ebrahimi feature_list |= CPU_FEATURE_SSE2;
563*22dc650dSSadaf Ebrahimi #endif
564*22dc650dSSadaf Ebrahimi if (info[3] & 0x8000)
565*22dc650dSSadaf Ebrahimi feature_list |= CPU_FEATURE_CMOV;
566*22dc650dSSadaf Ebrahimi }
567*22dc650dSSadaf Ebrahimi
568*22dc650dSSadaf Ebrahimi info[0] = 0x80000001;
569*22dc650dSSadaf Ebrahimi execute_cpu_id(info);
570*22dc650dSSadaf Ebrahimi
571*22dc650dSSadaf Ebrahimi if (info[2] & 0x20)
572*22dc650dSSadaf Ebrahimi feature_list |= CPU_FEATURE_LZCNT;
573*22dc650dSSadaf Ebrahimi
574*22dc650dSSadaf Ebrahimi if ((feature_list & CPU_FEATURE_OSXSAVE) && (execute_get_xcr0_low() & 0x4) == 0)
575*22dc650dSSadaf Ebrahimi feature_list &= ~(sljit_u32)(CPU_FEATURE_AVX | CPU_FEATURE_AVX2);
576*22dc650dSSadaf Ebrahimi
577*22dc650dSSadaf Ebrahimi cpu_feature_list = feature_list;
578*22dc650dSSadaf Ebrahimi }
579*22dc650dSSadaf Ebrahimi
get_jump_code(sljit_uw type)580*22dc650dSSadaf Ebrahimi static sljit_u8 get_jump_code(sljit_uw type)
581*22dc650dSSadaf Ebrahimi {
582*22dc650dSSadaf Ebrahimi switch (type) {
583*22dc650dSSadaf Ebrahimi case SLJIT_EQUAL:
584*22dc650dSSadaf Ebrahimi case SLJIT_ATOMIC_STORED:
585*22dc650dSSadaf Ebrahimi case SLJIT_F_EQUAL:
586*22dc650dSSadaf Ebrahimi case SLJIT_UNORDERED_OR_EQUAL:
587*22dc650dSSadaf Ebrahimi return 0x84 /* je */;
588*22dc650dSSadaf Ebrahimi
589*22dc650dSSadaf Ebrahimi case SLJIT_NOT_EQUAL:
590*22dc650dSSadaf Ebrahimi case SLJIT_ATOMIC_NOT_STORED:
591*22dc650dSSadaf Ebrahimi case SLJIT_F_NOT_EQUAL:
592*22dc650dSSadaf Ebrahimi case SLJIT_ORDERED_NOT_EQUAL:
593*22dc650dSSadaf Ebrahimi return 0x85 /* jne */;
594*22dc650dSSadaf Ebrahimi
595*22dc650dSSadaf Ebrahimi case SLJIT_LESS:
596*22dc650dSSadaf Ebrahimi case SLJIT_CARRY:
597*22dc650dSSadaf Ebrahimi case SLJIT_F_LESS:
598*22dc650dSSadaf Ebrahimi case SLJIT_UNORDERED_OR_LESS:
599*22dc650dSSadaf Ebrahimi case SLJIT_UNORDERED_OR_GREATER:
600*22dc650dSSadaf Ebrahimi return 0x82 /* jc */;
601*22dc650dSSadaf Ebrahimi
602*22dc650dSSadaf Ebrahimi case SLJIT_GREATER_EQUAL:
603*22dc650dSSadaf Ebrahimi case SLJIT_NOT_CARRY:
604*22dc650dSSadaf Ebrahimi case SLJIT_F_GREATER_EQUAL:
605*22dc650dSSadaf Ebrahimi case SLJIT_ORDERED_GREATER_EQUAL:
606*22dc650dSSadaf Ebrahimi case SLJIT_ORDERED_LESS_EQUAL:
607*22dc650dSSadaf Ebrahimi return 0x83 /* jae */;
608*22dc650dSSadaf Ebrahimi
609*22dc650dSSadaf Ebrahimi case SLJIT_GREATER:
610*22dc650dSSadaf Ebrahimi case SLJIT_F_GREATER:
611*22dc650dSSadaf Ebrahimi case SLJIT_ORDERED_LESS:
612*22dc650dSSadaf Ebrahimi case SLJIT_ORDERED_GREATER:
613*22dc650dSSadaf Ebrahimi return 0x87 /* jnbe */;
614*22dc650dSSadaf Ebrahimi
615*22dc650dSSadaf Ebrahimi case SLJIT_LESS_EQUAL:
616*22dc650dSSadaf Ebrahimi case SLJIT_F_LESS_EQUAL:
617*22dc650dSSadaf Ebrahimi case SLJIT_UNORDERED_OR_GREATER_EQUAL:
618*22dc650dSSadaf Ebrahimi case SLJIT_UNORDERED_OR_LESS_EQUAL:
619*22dc650dSSadaf Ebrahimi return 0x86 /* jbe */;
620*22dc650dSSadaf Ebrahimi
621*22dc650dSSadaf Ebrahimi case SLJIT_SIG_LESS:
622*22dc650dSSadaf Ebrahimi return 0x8c /* jl */;
623*22dc650dSSadaf Ebrahimi
624*22dc650dSSadaf Ebrahimi case SLJIT_SIG_GREATER_EQUAL:
625*22dc650dSSadaf Ebrahimi return 0x8d /* jnl */;
626*22dc650dSSadaf Ebrahimi
627*22dc650dSSadaf Ebrahimi case SLJIT_SIG_GREATER:
628*22dc650dSSadaf Ebrahimi return 0x8f /* jnle */;
629*22dc650dSSadaf Ebrahimi
630*22dc650dSSadaf Ebrahimi case SLJIT_SIG_LESS_EQUAL:
631*22dc650dSSadaf Ebrahimi return 0x8e /* jle */;
632*22dc650dSSadaf Ebrahimi
633*22dc650dSSadaf Ebrahimi case SLJIT_OVERFLOW:
634*22dc650dSSadaf Ebrahimi return 0x80 /* jo */;
635*22dc650dSSadaf Ebrahimi
636*22dc650dSSadaf Ebrahimi case SLJIT_NOT_OVERFLOW:
637*22dc650dSSadaf Ebrahimi return 0x81 /* jno */;
638*22dc650dSSadaf Ebrahimi
639*22dc650dSSadaf Ebrahimi case SLJIT_UNORDERED:
640*22dc650dSSadaf Ebrahimi case SLJIT_ORDERED_EQUAL: /* NaN. */
641*22dc650dSSadaf Ebrahimi return 0x8a /* jp */;
642*22dc650dSSadaf Ebrahimi
643*22dc650dSSadaf Ebrahimi case SLJIT_ORDERED:
644*22dc650dSSadaf Ebrahimi case SLJIT_UNORDERED_OR_NOT_EQUAL: /* Not NaN. */
645*22dc650dSSadaf Ebrahimi return 0x8b /* jpo */;
646*22dc650dSSadaf Ebrahimi }
647*22dc650dSSadaf Ebrahimi return 0;
648*22dc650dSSadaf Ebrahimi }
649*22dc650dSSadaf Ebrahimi
650*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
651*22dc650dSSadaf Ebrahimi static sljit_u8* detect_far_jump_type(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_sw executable_offset);
652*22dc650dSSadaf Ebrahimi #else /* !SLJIT_CONFIG_X86_32 */
653*22dc650dSSadaf Ebrahimi static sljit_u8* detect_far_jump_type(struct sljit_jump *jump, sljit_u8 *code_ptr);
654*22dc650dSSadaf Ebrahimi static sljit_u8* generate_mov_addr_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_u8 *code, sljit_sw executable_offset);
655*22dc650dSSadaf Ebrahimi #endif /* SLJIT_CONFIG_X86_32 */
656*22dc650dSSadaf Ebrahimi
detect_near_jump_type(struct sljit_jump * jump,sljit_u8 * code_ptr,sljit_u8 * code,sljit_sw executable_offset)657*22dc650dSSadaf Ebrahimi static sljit_u8* detect_near_jump_type(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_u8 *code, sljit_sw executable_offset)
658*22dc650dSSadaf Ebrahimi {
659*22dc650dSSadaf Ebrahimi sljit_uw type = jump->flags >> TYPE_SHIFT;
660*22dc650dSSadaf Ebrahimi sljit_s32 short_jump;
661*22dc650dSSadaf Ebrahimi sljit_uw label_addr;
662*22dc650dSSadaf Ebrahimi
663*22dc650dSSadaf Ebrahimi if (jump->flags & JUMP_ADDR)
664*22dc650dSSadaf Ebrahimi label_addr = jump->u.target - (sljit_uw)executable_offset;
665*22dc650dSSadaf Ebrahimi else
666*22dc650dSSadaf Ebrahimi label_addr = (sljit_uw)(code + jump->u.label->size);
667*22dc650dSSadaf Ebrahimi
668*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
669*22dc650dSSadaf Ebrahimi if ((sljit_sw)(label_addr - (sljit_uw)(code_ptr + 6)) > HALFWORD_MAX || (sljit_sw)(label_addr - (sljit_uw)(code_ptr + 5)) < HALFWORD_MIN)
670*22dc650dSSadaf Ebrahimi return detect_far_jump_type(jump, code_ptr);
671*22dc650dSSadaf Ebrahimi #endif /* SLJIT_CONFIG_X86_64 */
672*22dc650dSSadaf Ebrahimi
673*22dc650dSSadaf Ebrahimi short_jump = (sljit_sw)(label_addr - (sljit_uw)(code_ptr + 2)) >= -0x80 && (sljit_sw)(label_addr - (sljit_uw)(code_ptr + 2)) <= 0x7f;
674*22dc650dSSadaf Ebrahimi
675*22dc650dSSadaf Ebrahimi if (type == SLJIT_JUMP) {
676*22dc650dSSadaf Ebrahimi if (short_jump)
677*22dc650dSSadaf Ebrahimi *code_ptr++ = JMP_i8;
678*22dc650dSSadaf Ebrahimi else
679*22dc650dSSadaf Ebrahimi *code_ptr++ = JMP_i32;
680*22dc650dSSadaf Ebrahimi } else if (type > SLJIT_JUMP) {
681*22dc650dSSadaf Ebrahimi short_jump = 0;
682*22dc650dSSadaf Ebrahimi *code_ptr++ = CALL_i32;
683*22dc650dSSadaf Ebrahimi } else if (short_jump) {
684*22dc650dSSadaf Ebrahimi *code_ptr++ = U8(get_jump_code(type) - 0x10);
685*22dc650dSSadaf Ebrahimi } else {
686*22dc650dSSadaf Ebrahimi *code_ptr++ = GROUP_0F;
687*22dc650dSSadaf Ebrahimi *code_ptr++ = get_jump_code(type);
688*22dc650dSSadaf Ebrahimi }
689*22dc650dSSadaf Ebrahimi
690*22dc650dSSadaf Ebrahimi jump->addr = (sljit_uw)code_ptr;
691*22dc650dSSadaf Ebrahimi
692*22dc650dSSadaf Ebrahimi if (short_jump) {
693*22dc650dSSadaf Ebrahimi jump->flags |= PATCH_MB;
694*22dc650dSSadaf Ebrahimi code_ptr += sizeof(sljit_s8);
695*22dc650dSSadaf Ebrahimi } else {
696*22dc650dSSadaf Ebrahimi jump->flags |= PATCH_MW;
697*22dc650dSSadaf Ebrahimi code_ptr += sizeof(sljit_s32);
698*22dc650dSSadaf Ebrahimi }
699*22dc650dSSadaf Ebrahimi
700*22dc650dSSadaf Ebrahimi return code_ptr;
701*22dc650dSSadaf Ebrahimi }
702*22dc650dSSadaf Ebrahimi
generate_jump_or_mov_addr(struct sljit_jump * jump,sljit_sw executable_offset)703*22dc650dSSadaf Ebrahimi static void generate_jump_or_mov_addr(struct sljit_jump *jump, sljit_sw executable_offset)
704*22dc650dSSadaf Ebrahimi {
705*22dc650dSSadaf Ebrahimi sljit_uw flags = jump->flags;
706*22dc650dSSadaf Ebrahimi sljit_uw addr = (flags & JUMP_ADDR) ? jump->u.target : jump->u.label->u.addr;
707*22dc650dSSadaf Ebrahimi sljit_uw jump_addr = jump->addr;
708*22dc650dSSadaf Ebrahimi SLJIT_UNUSED_ARG(executable_offset);
709*22dc650dSSadaf Ebrahimi
710*22dc650dSSadaf Ebrahimi if (SLJIT_UNLIKELY(flags & JUMP_MOV_ADDR)) {
711*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
712*22dc650dSSadaf Ebrahimi sljit_unaligned_store_sw((void*)(jump_addr - sizeof(sljit_sw)), (sljit_sw)addr);
713*22dc650dSSadaf Ebrahimi #else /* SLJIT_CONFIG_X86_32 */
714*22dc650dSSadaf Ebrahimi if (flags & PATCH_MD) {
715*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(addr > HALFWORD_MAX);
716*22dc650dSSadaf Ebrahimi sljit_unaligned_store_sw((void*)(jump_addr - sizeof(sljit_sw)), (sljit_sw)addr);
717*22dc650dSSadaf Ebrahimi return;
718*22dc650dSSadaf Ebrahimi }
719*22dc650dSSadaf Ebrahimi
720*22dc650dSSadaf Ebrahimi if (flags & PATCH_MW) {
721*22dc650dSSadaf Ebrahimi addr -= (sljit_uw)SLJIT_ADD_EXEC_OFFSET((sljit_u8*)jump_addr, executable_offset);
722*22dc650dSSadaf Ebrahimi SLJIT_ASSERT((sljit_sw)addr <= HALFWORD_MAX && (sljit_sw)addr >= HALFWORD_MIN);
723*22dc650dSSadaf Ebrahimi } else {
724*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(addr <= HALFWORD_MAX);
725*22dc650dSSadaf Ebrahimi }
726*22dc650dSSadaf Ebrahimi sljit_unaligned_store_s32((void*)(jump_addr - sizeof(sljit_s32)), (sljit_s32)addr);
727*22dc650dSSadaf Ebrahimi #endif /* !SLJIT_CONFIG_X86_32 */
728*22dc650dSSadaf Ebrahimi return;
729*22dc650dSSadaf Ebrahimi }
730*22dc650dSSadaf Ebrahimi
731*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
732*22dc650dSSadaf Ebrahimi if (SLJIT_UNLIKELY(flags & PATCH_MD)) {
733*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(!(flags & JUMP_ADDR));
734*22dc650dSSadaf Ebrahimi sljit_unaligned_store_sw((void*)jump_addr, (sljit_sw)addr);
735*22dc650dSSadaf Ebrahimi return;
736*22dc650dSSadaf Ebrahimi }
737*22dc650dSSadaf Ebrahimi #endif /* SLJIT_CONFIG_X86_64 */
738*22dc650dSSadaf Ebrahimi
739*22dc650dSSadaf Ebrahimi addr -= (sljit_uw)SLJIT_ADD_EXEC_OFFSET((sljit_u8*)jump_addr, executable_offset);
740*22dc650dSSadaf Ebrahimi
741*22dc650dSSadaf Ebrahimi if (flags & PATCH_MB) {
742*22dc650dSSadaf Ebrahimi addr -= sizeof(sljit_s8);
743*22dc650dSSadaf Ebrahimi SLJIT_ASSERT((sljit_sw)addr <= 0x7f && (sljit_sw)addr >= -0x80);
744*22dc650dSSadaf Ebrahimi *(sljit_u8*)jump_addr = U8(addr);
745*22dc650dSSadaf Ebrahimi return;
746*22dc650dSSadaf Ebrahimi } else if (flags & PATCH_MW) {
747*22dc650dSSadaf Ebrahimi addr -= sizeof(sljit_s32);
748*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
749*22dc650dSSadaf Ebrahimi sljit_unaligned_store_sw((void*)jump_addr, (sljit_sw)addr);
750*22dc650dSSadaf Ebrahimi #else /* !SLJIT_CONFIG_X86_32 */
751*22dc650dSSadaf Ebrahimi SLJIT_ASSERT((sljit_sw)addr <= HALFWORD_MAX && (sljit_sw)addr >= HALFWORD_MIN);
752*22dc650dSSadaf Ebrahimi sljit_unaligned_store_s32((void*)jump_addr, (sljit_s32)addr);
753*22dc650dSSadaf Ebrahimi #endif /* SLJIT_CONFIG_X86_32 */
754*22dc650dSSadaf Ebrahimi }
755*22dc650dSSadaf Ebrahimi }
756*22dc650dSSadaf Ebrahimi
reduce_code_size(struct sljit_compiler * compiler)757*22dc650dSSadaf Ebrahimi static void reduce_code_size(struct sljit_compiler *compiler)
758*22dc650dSSadaf Ebrahimi {
759*22dc650dSSadaf Ebrahimi struct sljit_label *label;
760*22dc650dSSadaf Ebrahimi struct sljit_jump *jump;
761*22dc650dSSadaf Ebrahimi sljit_uw next_label_size;
762*22dc650dSSadaf Ebrahimi sljit_uw next_jump_addr;
763*22dc650dSSadaf Ebrahimi sljit_uw next_min_addr;
764*22dc650dSSadaf Ebrahimi sljit_uw size_reduce = 0;
765*22dc650dSSadaf Ebrahimi sljit_sw diff;
766*22dc650dSSadaf Ebrahimi sljit_uw type;
767*22dc650dSSadaf Ebrahimi #if (defined SLJIT_DEBUG && SLJIT_DEBUG)
768*22dc650dSSadaf Ebrahimi sljit_uw size_reduce_max;
769*22dc650dSSadaf Ebrahimi #endif /* SLJIT_DEBUG */
770*22dc650dSSadaf Ebrahimi
771*22dc650dSSadaf Ebrahimi label = compiler->labels;
772*22dc650dSSadaf Ebrahimi jump = compiler->jumps;
773*22dc650dSSadaf Ebrahimi
774*22dc650dSSadaf Ebrahimi next_label_size = SLJIT_GET_NEXT_SIZE(label);
775*22dc650dSSadaf Ebrahimi next_jump_addr = SLJIT_GET_NEXT_ADDRESS(jump);
776*22dc650dSSadaf Ebrahimi
777*22dc650dSSadaf Ebrahimi while (1) {
778*22dc650dSSadaf Ebrahimi next_min_addr = next_label_size;
779*22dc650dSSadaf Ebrahimi if (next_jump_addr < next_min_addr)
780*22dc650dSSadaf Ebrahimi next_min_addr = next_jump_addr;
781*22dc650dSSadaf Ebrahimi
782*22dc650dSSadaf Ebrahimi if (next_min_addr == SLJIT_MAX_ADDRESS)
783*22dc650dSSadaf Ebrahimi break;
784*22dc650dSSadaf Ebrahimi
785*22dc650dSSadaf Ebrahimi if (next_min_addr == next_label_size) {
786*22dc650dSSadaf Ebrahimi label->size -= size_reduce;
787*22dc650dSSadaf Ebrahimi
788*22dc650dSSadaf Ebrahimi label = label->next;
789*22dc650dSSadaf Ebrahimi next_label_size = SLJIT_GET_NEXT_SIZE(label);
790*22dc650dSSadaf Ebrahimi }
791*22dc650dSSadaf Ebrahimi
792*22dc650dSSadaf Ebrahimi if (next_min_addr != next_jump_addr)
793*22dc650dSSadaf Ebrahimi continue;
794*22dc650dSSadaf Ebrahimi
795*22dc650dSSadaf Ebrahimi if (!(jump->flags & JUMP_MOV_ADDR)) {
796*22dc650dSSadaf Ebrahimi #if (defined SLJIT_DEBUG && SLJIT_DEBUG)
797*22dc650dSSadaf Ebrahimi size_reduce_max = size_reduce + (((jump->flags >> TYPE_SHIFT) < SLJIT_JUMP) ? CJUMP_MAX_SIZE : JUMP_MAX_SIZE);
798*22dc650dSSadaf Ebrahimi #endif /* SLJIT_DEBUG */
799*22dc650dSSadaf Ebrahimi
800*22dc650dSSadaf Ebrahimi if (!(jump->flags & SLJIT_REWRITABLE_JUMP)) {
801*22dc650dSSadaf Ebrahimi if (jump->flags & JUMP_ADDR) {
802*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
803*22dc650dSSadaf Ebrahimi if (jump->u.target <= 0xffffffffl)
804*22dc650dSSadaf Ebrahimi size_reduce += sizeof(sljit_s32);
805*22dc650dSSadaf Ebrahimi #endif /* SLJIT_CONFIG_X86_64 */
806*22dc650dSSadaf Ebrahimi } else {
807*22dc650dSSadaf Ebrahimi /* Unit size: instruction. */
808*22dc650dSSadaf Ebrahimi diff = (sljit_sw)jump->u.label->size - (sljit_sw)(jump->addr - size_reduce);
809*22dc650dSSadaf Ebrahimi type = jump->flags >> TYPE_SHIFT;
810*22dc650dSSadaf Ebrahimi
811*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
812*22dc650dSSadaf Ebrahimi if (type == SLJIT_JUMP) {
813*22dc650dSSadaf Ebrahimi if (diff <= 0x7f + 2 && diff >= -0x80 + 2)
814*22dc650dSSadaf Ebrahimi size_reduce += JUMP_MAX_SIZE - 2;
815*22dc650dSSadaf Ebrahimi else if (diff <= HALFWORD_MAX + 5 && diff >= HALFWORD_MIN + 5)
816*22dc650dSSadaf Ebrahimi size_reduce += JUMP_MAX_SIZE - 5;
817*22dc650dSSadaf Ebrahimi } else if (type < SLJIT_JUMP) {
818*22dc650dSSadaf Ebrahimi if (diff <= 0x7f + 2 && diff >= -0x80 + 2)
819*22dc650dSSadaf Ebrahimi size_reduce += CJUMP_MAX_SIZE - 2;
820*22dc650dSSadaf Ebrahimi else if (diff <= HALFWORD_MAX + 6 && diff >= HALFWORD_MIN + 6)
821*22dc650dSSadaf Ebrahimi size_reduce += CJUMP_MAX_SIZE - 6;
822*22dc650dSSadaf Ebrahimi } else {
823*22dc650dSSadaf Ebrahimi if (diff <= HALFWORD_MAX + 5 && diff >= HALFWORD_MIN + 5)
824*22dc650dSSadaf Ebrahimi size_reduce += JUMP_MAX_SIZE - 5;
825*22dc650dSSadaf Ebrahimi }
826*22dc650dSSadaf Ebrahimi #else /* !SLJIT_CONFIG_X86_64 */
827*22dc650dSSadaf Ebrahimi if (type == SLJIT_JUMP) {
828*22dc650dSSadaf Ebrahimi if (diff <= 0x7f + 2 && diff >= -0x80 + 2)
829*22dc650dSSadaf Ebrahimi size_reduce += JUMP_MAX_SIZE - 2;
830*22dc650dSSadaf Ebrahimi } else if (type < SLJIT_JUMP) {
831*22dc650dSSadaf Ebrahimi if (diff <= 0x7f + 2 && diff >= -0x80 + 2)
832*22dc650dSSadaf Ebrahimi size_reduce += CJUMP_MAX_SIZE - 2;
833*22dc650dSSadaf Ebrahimi }
834*22dc650dSSadaf Ebrahimi #endif /* SLJIT_CONFIG_X86_64 */
835*22dc650dSSadaf Ebrahimi }
836*22dc650dSSadaf Ebrahimi }
837*22dc650dSSadaf Ebrahimi
838*22dc650dSSadaf Ebrahimi #if (defined SLJIT_DEBUG && SLJIT_DEBUG)
839*22dc650dSSadaf Ebrahimi jump->flags |= (size_reduce_max - size_reduce) << JUMP_SIZE_SHIFT;
840*22dc650dSSadaf Ebrahimi #endif /* SLJIT_DEBUG */
841*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
842*22dc650dSSadaf Ebrahimi } else {
843*22dc650dSSadaf Ebrahimi #if (defined SLJIT_DEBUG && SLJIT_DEBUG)
844*22dc650dSSadaf Ebrahimi size_reduce_max = size_reduce + 10;
845*22dc650dSSadaf Ebrahimi #endif /* SLJIT_DEBUG */
846*22dc650dSSadaf Ebrahimi
847*22dc650dSSadaf Ebrahimi if (!(jump->flags & JUMP_ADDR)) {
848*22dc650dSSadaf Ebrahimi diff = (sljit_sw)jump->u.label->size - (sljit_sw)(jump->addr - size_reduce - 3);
849*22dc650dSSadaf Ebrahimi
850*22dc650dSSadaf Ebrahimi if (diff <= HALFWORD_MAX && diff >= HALFWORD_MIN)
851*22dc650dSSadaf Ebrahimi size_reduce += 3;
852*22dc650dSSadaf Ebrahimi } else if (jump->u.target <= 0xffffffffl)
853*22dc650dSSadaf Ebrahimi size_reduce += (jump->flags & MOV_ADDR_HI) ? 4 : 5;
854*22dc650dSSadaf Ebrahimi
855*22dc650dSSadaf Ebrahimi #if (defined SLJIT_DEBUG && SLJIT_DEBUG)
856*22dc650dSSadaf Ebrahimi jump->flags |= (size_reduce_max - size_reduce) << JUMP_SIZE_SHIFT;
857*22dc650dSSadaf Ebrahimi #endif /* SLJIT_DEBUG */
858*22dc650dSSadaf Ebrahimi #endif /* SLJIT_CONFIG_X86_64 */
859*22dc650dSSadaf Ebrahimi }
860*22dc650dSSadaf Ebrahimi
861*22dc650dSSadaf Ebrahimi jump = jump->next;
862*22dc650dSSadaf Ebrahimi next_jump_addr = SLJIT_GET_NEXT_ADDRESS(jump);
863*22dc650dSSadaf Ebrahimi }
864*22dc650dSSadaf Ebrahimi
865*22dc650dSSadaf Ebrahimi compiler->size -= size_reduce;
866*22dc650dSSadaf Ebrahimi }
867*22dc650dSSadaf Ebrahimi
sljit_generate_code(struct sljit_compiler * compiler,sljit_s32 options,void * exec_allocator_data)868*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler, sljit_s32 options, void *exec_allocator_data)
869*22dc650dSSadaf Ebrahimi {
870*22dc650dSSadaf Ebrahimi struct sljit_memory_fragment *buf;
871*22dc650dSSadaf Ebrahimi sljit_u8 *code;
872*22dc650dSSadaf Ebrahimi sljit_u8 *code_ptr;
873*22dc650dSSadaf Ebrahimi sljit_u8 *buf_ptr;
874*22dc650dSSadaf Ebrahimi sljit_u8 *buf_end;
875*22dc650dSSadaf Ebrahimi sljit_u8 len;
876*22dc650dSSadaf Ebrahimi sljit_sw executable_offset;
877*22dc650dSSadaf Ebrahimi #if (defined SLJIT_DEBUG && SLJIT_DEBUG)
878*22dc650dSSadaf Ebrahimi sljit_uw addr;
879*22dc650dSSadaf Ebrahimi #endif /* SLJIT_DEBUG */
880*22dc650dSSadaf Ebrahimi
881*22dc650dSSadaf Ebrahimi struct sljit_label *label;
882*22dc650dSSadaf Ebrahimi struct sljit_jump *jump;
883*22dc650dSSadaf Ebrahimi struct sljit_const *const_;
884*22dc650dSSadaf Ebrahimi
885*22dc650dSSadaf Ebrahimi CHECK_ERROR_PTR();
886*22dc650dSSadaf Ebrahimi CHECK_PTR(check_sljit_generate_code(compiler));
887*22dc650dSSadaf Ebrahimi
888*22dc650dSSadaf Ebrahimi reduce_code_size(compiler);
889*22dc650dSSadaf Ebrahimi
890*22dc650dSSadaf Ebrahimi /* Second code generation pass. */
891*22dc650dSSadaf Ebrahimi code = (sljit_u8*)allocate_executable_memory(compiler->size, options, exec_allocator_data, &executable_offset);
892*22dc650dSSadaf Ebrahimi PTR_FAIL_WITH_EXEC_IF(code);
893*22dc650dSSadaf Ebrahimi
894*22dc650dSSadaf Ebrahimi reverse_buf(compiler);
895*22dc650dSSadaf Ebrahimi buf = compiler->buf;
896*22dc650dSSadaf Ebrahimi
897*22dc650dSSadaf Ebrahimi code_ptr = code;
898*22dc650dSSadaf Ebrahimi label = compiler->labels;
899*22dc650dSSadaf Ebrahimi jump = compiler->jumps;
900*22dc650dSSadaf Ebrahimi const_ = compiler->consts;
901*22dc650dSSadaf Ebrahimi
902*22dc650dSSadaf Ebrahimi do {
903*22dc650dSSadaf Ebrahimi buf_ptr = buf->memory;
904*22dc650dSSadaf Ebrahimi buf_end = buf_ptr + buf->used_size;
905*22dc650dSSadaf Ebrahimi do {
906*22dc650dSSadaf Ebrahimi len = *buf_ptr++;
907*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(len > 0);
908*22dc650dSSadaf Ebrahimi if (len < SLJIT_INST_CONST) {
909*22dc650dSSadaf Ebrahimi /* The code is already generated. */
910*22dc650dSSadaf Ebrahimi SLJIT_MEMCPY(code_ptr, buf_ptr, len);
911*22dc650dSSadaf Ebrahimi code_ptr += len;
912*22dc650dSSadaf Ebrahimi buf_ptr += len;
913*22dc650dSSadaf Ebrahimi } else {
914*22dc650dSSadaf Ebrahimi switch (len) {
915*22dc650dSSadaf Ebrahimi case SLJIT_INST_LABEL:
916*22dc650dSSadaf Ebrahimi label->u.addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
917*22dc650dSSadaf Ebrahimi label->size = (sljit_uw)(code_ptr - code);
918*22dc650dSSadaf Ebrahimi label = label->next;
919*22dc650dSSadaf Ebrahimi break;
920*22dc650dSSadaf Ebrahimi case SLJIT_INST_JUMP:
921*22dc650dSSadaf Ebrahimi #if (defined SLJIT_DEBUG && SLJIT_DEBUG)
922*22dc650dSSadaf Ebrahimi addr = (sljit_uw)code_ptr;
923*22dc650dSSadaf Ebrahimi #endif /* SLJIT_DEBUG */
924*22dc650dSSadaf Ebrahimi if (!(jump->flags & SLJIT_REWRITABLE_JUMP))
925*22dc650dSSadaf Ebrahimi code_ptr = detect_near_jump_type(jump, code_ptr, code, executable_offset);
926*22dc650dSSadaf Ebrahimi else {
927*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
928*22dc650dSSadaf Ebrahimi code_ptr = detect_far_jump_type(jump, code_ptr, executable_offset);
929*22dc650dSSadaf Ebrahimi #else /* !SLJIT_CONFIG_X86_32 */
930*22dc650dSSadaf Ebrahimi code_ptr = detect_far_jump_type(jump, code_ptr);
931*22dc650dSSadaf Ebrahimi #endif /* SLJIT_CONFIG_X86_32 */
932*22dc650dSSadaf Ebrahimi }
933*22dc650dSSadaf Ebrahimi
934*22dc650dSSadaf Ebrahimi SLJIT_ASSERT((sljit_uw)code_ptr - addr <= ((jump->flags >> JUMP_SIZE_SHIFT) & 0x1f));
935*22dc650dSSadaf Ebrahimi jump = jump->next;
936*22dc650dSSadaf Ebrahimi break;
937*22dc650dSSadaf Ebrahimi case SLJIT_INST_MOV_ADDR:
938*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
939*22dc650dSSadaf Ebrahimi code_ptr = generate_mov_addr_code(jump, code_ptr, code, executable_offset);
940*22dc650dSSadaf Ebrahimi #endif /* SLJIT_CONFIG_X86_64 */
941*22dc650dSSadaf Ebrahimi jump->addr = (sljit_uw)code_ptr;
942*22dc650dSSadaf Ebrahimi jump = jump->next;
943*22dc650dSSadaf Ebrahimi break;
944*22dc650dSSadaf Ebrahimi default:
945*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(len == SLJIT_INST_CONST);
946*22dc650dSSadaf Ebrahimi const_->addr = ((sljit_uw)code_ptr) - sizeof(sljit_sw);
947*22dc650dSSadaf Ebrahimi const_ = const_->next;
948*22dc650dSSadaf Ebrahimi break;
949*22dc650dSSadaf Ebrahimi }
950*22dc650dSSadaf Ebrahimi }
951*22dc650dSSadaf Ebrahimi } while (buf_ptr < buf_end);
952*22dc650dSSadaf Ebrahimi
953*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(buf_ptr == buf_end);
954*22dc650dSSadaf Ebrahimi buf = buf->next;
955*22dc650dSSadaf Ebrahimi } while (buf);
956*22dc650dSSadaf Ebrahimi
957*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(!label);
958*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(!jump);
959*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(!const_);
960*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(code_ptr <= code + compiler->size);
961*22dc650dSSadaf Ebrahimi
962*22dc650dSSadaf Ebrahimi jump = compiler->jumps;
963*22dc650dSSadaf Ebrahimi while (jump) {
964*22dc650dSSadaf Ebrahimi generate_jump_or_mov_addr(jump, executable_offset);
965*22dc650dSSadaf Ebrahimi jump = jump->next;
966*22dc650dSSadaf Ebrahimi }
967*22dc650dSSadaf Ebrahimi
968*22dc650dSSadaf Ebrahimi compiler->error = SLJIT_ERR_COMPILED;
969*22dc650dSSadaf Ebrahimi compiler->executable_offset = executable_offset;
970*22dc650dSSadaf Ebrahimi compiler->executable_size = (sljit_uw)(code_ptr - code);
971*22dc650dSSadaf Ebrahimi
972*22dc650dSSadaf Ebrahimi code = (sljit_u8*)SLJIT_ADD_EXEC_OFFSET(code, executable_offset);
973*22dc650dSSadaf Ebrahimi
974*22dc650dSSadaf Ebrahimi SLJIT_UPDATE_WX_FLAGS(code, (sljit_u8*)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset), 1);
975*22dc650dSSadaf Ebrahimi return (void*)code;
976*22dc650dSSadaf Ebrahimi }
977*22dc650dSSadaf Ebrahimi
sljit_has_cpu_feature(sljit_s32 feature_type)978*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
979*22dc650dSSadaf Ebrahimi {
980*22dc650dSSadaf Ebrahimi switch (feature_type) {
981*22dc650dSSadaf Ebrahimi case SLJIT_HAS_FPU:
982*22dc650dSSadaf Ebrahimi #ifdef SLJIT_IS_FPU_AVAILABLE
983*22dc650dSSadaf Ebrahimi return (SLJIT_IS_FPU_AVAILABLE) != 0;
984*22dc650dSSadaf Ebrahimi #elif (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
985*22dc650dSSadaf Ebrahimi if (cpu_feature_list == 0)
986*22dc650dSSadaf Ebrahimi get_cpu_features();
987*22dc650dSSadaf Ebrahimi return (cpu_feature_list & CPU_FEATURE_SSE2) != 0;
988*22dc650dSSadaf Ebrahimi #else /* SLJIT_DETECT_SSE2 */
989*22dc650dSSadaf Ebrahimi return 1;
990*22dc650dSSadaf Ebrahimi #endif /* SLJIT_DETECT_SSE2 */
991*22dc650dSSadaf Ebrahimi
992*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
993*22dc650dSSadaf Ebrahimi case SLJIT_HAS_VIRTUAL_REGISTERS:
994*22dc650dSSadaf Ebrahimi return 1;
995*22dc650dSSadaf Ebrahimi #endif /* SLJIT_CONFIG_X86_32 */
996*22dc650dSSadaf Ebrahimi
997*22dc650dSSadaf Ebrahimi case SLJIT_HAS_CLZ:
998*22dc650dSSadaf Ebrahimi if (cpu_feature_list == 0)
999*22dc650dSSadaf Ebrahimi get_cpu_features();
1000*22dc650dSSadaf Ebrahimi
1001*22dc650dSSadaf Ebrahimi return (cpu_feature_list & CPU_FEATURE_LZCNT) ? 1 : 2;
1002*22dc650dSSadaf Ebrahimi
1003*22dc650dSSadaf Ebrahimi case SLJIT_HAS_CTZ:
1004*22dc650dSSadaf Ebrahimi if (cpu_feature_list == 0)
1005*22dc650dSSadaf Ebrahimi get_cpu_features();
1006*22dc650dSSadaf Ebrahimi
1007*22dc650dSSadaf Ebrahimi return (cpu_feature_list & CPU_FEATURE_TZCNT) ? 1 : 2;
1008*22dc650dSSadaf Ebrahimi
1009*22dc650dSSadaf Ebrahimi case SLJIT_HAS_CMOV:
1010*22dc650dSSadaf Ebrahimi if (cpu_feature_list == 0)
1011*22dc650dSSadaf Ebrahimi get_cpu_features();
1012*22dc650dSSadaf Ebrahimi return (cpu_feature_list & CPU_FEATURE_CMOV) != 0;
1013*22dc650dSSadaf Ebrahimi
1014*22dc650dSSadaf Ebrahimi case SLJIT_HAS_REV:
1015*22dc650dSSadaf Ebrahimi case SLJIT_HAS_ROT:
1016*22dc650dSSadaf Ebrahimi case SLJIT_HAS_PREFETCH:
1017*22dc650dSSadaf Ebrahimi case SLJIT_HAS_COPY_F32:
1018*22dc650dSSadaf Ebrahimi case SLJIT_HAS_COPY_F64:
1019*22dc650dSSadaf Ebrahimi case SLJIT_HAS_ATOMIC:
1020*22dc650dSSadaf Ebrahimi return 1;
1021*22dc650dSSadaf Ebrahimi
1022*22dc650dSSadaf Ebrahimi #if !(defined SLJIT_IS_FPU_AVAILABLE) || SLJIT_IS_FPU_AVAILABLE
1023*22dc650dSSadaf Ebrahimi case SLJIT_HAS_AVX:
1024*22dc650dSSadaf Ebrahimi if (cpu_feature_list == 0)
1025*22dc650dSSadaf Ebrahimi get_cpu_features();
1026*22dc650dSSadaf Ebrahimi return (cpu_feature_list & CPU_FEATURE_AVX) != 0;
1027*22dc650dSSadaf Ebrahimi case SLJIT_HAS_AVX2:
1028*22dc650dSSadaf Ebrahimi if (cpu_feature_list == 0)
1029*22dc650dSSadaf Ebrahimi get_cpu_features();
1030*22dc650dSSadaf Ebrahimi return (cpu_feature_list & CPU_FEATURE_AVX2) != 0;
1031*22dc650dSSadaf Ebrahimi case SLJIT_HAS_SIMD:
1032*22dc650dSSadaf Ebrahimi if (cpu_feature_list == 0)
1033*22dc650dSSadaf Ebrahimi get_cpu_features();
1034*22dc650dSSadaf Ebrahimi return (cpu_feature_list & CPU_FEATURE_SSE41) != 0;
1035*22dc650dSSadaf Ebrahimi #endif /* SLJIT_IS_FPU_AVAILABLE */
1036*22dc650dSSadaf Ebrahimi default:
1037*22dc650dSSadaf Ebrahimi return 0;
1038*22dc650dSSadaf Ebrahimi }
1039*22dc650dSSadaf Ebrahimi }
1040*22dc650dSSadaf Ebrahimi
sljit_cmp_info(sljit_s32 type)1041*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_cmp_info(sljit_s32 type)
1042*22dc650dSSadaf Ebrahimi {
1043*22dc650dSSadaf Ebrahimi switch (type) {
1044*22dc650dSSadaf Ebrahimi case SLJIT_ORDERED_EQUAL:
1045*22dc650dSSadaf Ebrahimi case SLJIT_UNORDERED_OR_NOT_EQUAL:
1046*22dc650dSSadaf Ebrahimi return 2;
1047*22dc650dSSadaf Ebrahimi }
1048*22dc650dSSadaf Ebrahimi
1049*22dc650dSSadaf Ebrahimi return 0;
1050*22dc650dSSadaf Ebrahimi }
1051*22dc650dSSadaf Ebrahimi
1052*22dc650dSSadaf Ebrahimi /* --------------------------------------------------------------------- */
1053*22dc650dSSadaf Ebrahimi /* Operators */
1054*22dc650dSSadaf Ebrahimi /* --------------------------------------------------------------------- */
1055*22dc650dSSadaf Ebrahimi
1056*22dc650dSSadaf Ebrahimi #define BINARY_OPCODE(opcode) (((opcode ## _EAX_i32) << 24) | ((opcode ## _r_rm) << 16) | ((opcode ## _rm_r) << 8) | (opcode))
1057*22dc650dSSadaf Ebrahimi
1058*22dc650dSSadaf Ebrahimi #define BINARY_IMM32(op_imm, immw, arg, argw) \
1059*22dc650dSSadaf Ebrahimi do { \
1060*22dc650dSSadaf Ebrahimi inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \
1061*22dc650dSSadaf Ebrahimi FAIL_IF(!inst); \
1062*22dc650dSSadaf Ebrahimi *(inst + 1) |= (op_imm); \
1063*22dc650dSSadaf Ebrahimi } while (0)
1064*22dc650dSSadaf Ebrahimi
1065*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1066*22dc650dSSadaf Ebrahimi
1067*22dc650dSSadaf Ebrahimi #define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \
1068*22dc650dSSadaf Ebrahimi do { \
1069*22dc650dSSadaf Ebrahimi if (IS_HALFWORD(immw) || compiler->mode32) { \
1070*22dc650dSSadaf Ebrahimi BINARY_IMM32(op_imm, immw, arg, argw); \
1071*22dc650dSSadaf Ebrahimi } \
1072*22dc650dSSadaf Ebrahimi else { \
1073*22dc650dSSadaf Ebrahimi FAIL_IF(emit_load_imm64(compiler, FAST_IS_REG(arg) ? TMP_REG2 : TMP_REG1, immw)); \
1074*22dc650dSSadaf Ebrahimi inst = emit_x86_instruction(compiler, 1, FAST_IS_REG(arg) ? TMP_REG2 : TMP_REG1, 0, arg, argw); \
1075*22dc650dSSadaf Ebrahimi FAIL_IF(!inst); \
1076*22dc650dSSadaf Ebrahimi *inst = (op_mr); \
1077*22dc650dSSadaf Ebrahimi } \
1078*22dc650dSSadaf Ebrahimi } while (0)
1079*22dc650dSSadaf Ebrahimi
1080*22dc650dSSadaf Ebrahimi #define BINARY_EAX_IMM(op_eax_imm, immw) \
1081*22dc650dSSadaf Ebrahimi FAIL_IF(emit_do_imm32(compiler, (!compiler->mode32) ? REX_W : 0, (op_eax_imm), immw))
1082*22dc650dSSadaf Ebrahimi
1083*22dc650dSSadaf Ebrahimi #else /* !SLJIT_CONFIG_X86_64 */
1084*22dc650dSSadaf Ebrahimi
1085*22dc650dSSadaf Ebrahimi #define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \
1086*22dc650dSSadaf Ebrahimi BINARY_IMM32(op_imm, immw, arg, argw)
1087*22dc650dSSadaf Ebrahimi
1088*22dc650dSSadaf Ebrahimi #define BINARY_EAX_IMM(op_eax_imm, immw) \
1089*22dc650dSSadaf Ebrahimi FAIL_IF(emit_do_imm(compiler, (op_eax_imm), immw))
1090*22dc650dSSadaf Ebrahimi
1091*22dc650dSSadaf Ebrahimi #endif /* SLJIT_CONFIG_X86_64 */
1092*22dc650dSSadaf Ebrahimi
emit_byte(struct sljit_compiler * compiler,sljit_u8 byte)1093*22dc650dSSadaf Ebrahimi static sljit_s32 emit_byte(struct sljit_compiler *compiler, sljit_u8 byte)
1094*22dc650dSSadaf Ebrahimi {
1095*22dc650dSSadaf Ebrahimi sljit_u8 *inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
1096*22dc650dSSadaf Ebrahimi FAIL_IF(!inst);
1097*22dc650dSSadaf Ebrahimi INC_SIZE(1);
1098*22dc650dSSadaf Ebrahimi *inst = byte;
1099*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
1100*22dc650dSSadaf Ebrahimi }
1101*22dc650dSSadaf Ebrahimi
1102*22dc650dSSadaf Ebrahimi static sljit_s32 emit_mov(struct sljit_compiler *compiler,
1103*22dc650dSSadaf Ebrahimi sljit_s32 dst, sljit_sw dstw,
1104*22dc650dSSadaf Ebrahimi sljit_s32 src, sljit_sw srcw);
1105*22dc650dSSadaf Ebrahimi
1106*22dc650dSSadaf Ebrahimi #define EMIT_MOV(compiler, dst, dstw, src, srcw) \
1107*22dc650dSSadaf Ebrahimi FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw));
1108*22dc650dSSadaf Ebrahimi
1109*22dc650dSSadaf Ebrahimi static sljit_s32 emit_groupf(struct sljit_compiler *compiler,
1110*22dc650dSSadaf Ebrahimi sljit_uw op,
1111*22dc650dSSadaf Ebrahimi sljit_s32 dst, sljit_s32 src, sljit_sw srcw);
1112*22dc650dSSadaf Ebrahimi
1113*22dc650dSSadaf Ebrahimi static sljit_s32 emit_groupf_ext(struct sljit_compiler *compiler,
1114*22dc650dSSadaf Ebrahimi sljit_uw op,
1115*22dc650dSSadaf Ebrahimi sljit_s32 dst, sljit_s32 src, sljit_sw srcw);
1116*22dc650dSSadaf Ebrahimi
1117*22dc650dSSadaf Ebrahimi static SLJIT_INLINE sljit_s32 emit_sse2_store(struct sljit_compiler *compiler,
1118*22dc650dSSadaf Ebrahimi sljit_s32 single, sljit_s32 dst, sljit_sw dstw, sljit_s32 src);
1119*22dc650dSSadaf Ebrahimi
1120*22dc650dSSadaf Ebrahimi static SLJIT_INLINE sljit_s32 emit_sse2_load(struct sljit_compiler *compiler,
1121*22dc650dSSadaf Ebrahimi sljit_s32 single, sljit_s32 dst, sljit_s32 src, sljit_sw srcw);
1122*22dc650dSSadaf Ebrahimi
1123*22dc650dSSadaf Ebrahimi static sljit_s32 emit_cmp_binary(struct sljit_compiler *compiler,
1124*22dc650dSSadaf Ebrahimi sljit_s32 src1, sljit_sw src1w,
1125*22dc650dSSadaf Ebrahimi sljit_s32 src2, sljit_sw src2w);
1126*22dc650dSSadaf Ebrahimi
1127*22dc650dSSadaf Ebrahimi static sljit_s32 emit_cmov_generic(struct sljit_compiler *compiler, sljit_s32 type,
1128*22dc650dSSadaf Ebrahimi sljit_s32 dst_reg,
1129*22dc650dSSadaf Ebrahimi sljit_s32 src, sljit_sw srcw);
1130*22dc650dSSadaf Ebrahimi
emit_endbranch(struct sljit_compiler * compiler)1131*22dc650dSSadaf Ebrahimi static SLJIT_INLINE sljit_s32 emit_endbranch(struct sljit_compiler *compiler)
1132*22dc650dSSadaf Ebrahimi {
1133*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_CET && SLJIT_CONFIG_X86_CET)
1134*22dc650dSSadaf Ebrahimi /* Emit endbr32/endbr64 when CET is enabled. */
1135*22dc650dSSadaf Ebrahimi sljit_u8 *inst;
1136*22dc650dSSadaf Ebrahimi inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
1137*22dc650dSSadaf Ebrahimi FAIL_IF(!inst);
1138*22dc650dSSadaf Ebrahimi INC_SIZE(4);
1139*22dc650dSSadaf Ebrahimi inst[0] = GROUP_F3;
1140*22dc650dSSadaf Ebrahimi inst[1] = GROUP_0F;
1141*22dc650dSSadaf Ebrahimi inst[2] = 0x1e;
1142*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1143*22dc650dSSadaf Ebrahimi inst[3] = 0xfb;
1144*22dc650dSSadaf Ebrahimi #else /* !SLJIT_CONFIG_X86_32 */
1145*22dc650dSSadaf Ebrahimi inst[3] = 0xfa;
1146*22dc650dSSadaf Ebrahimi #endif /* SLJIT_CONFIG_X86_32 */
1147*22dc650dSSadaf Ebrahimi #else /* !SLJIT_CONFIG_X86_CET */
1148*22dc650dSSadaf Ebrahimi SLJIT_UNUSED_ARG(compiler);
1149*22dc650dSSadaf Ebrahimi #endif /* SLJIT_CONFIG_X86_CET */
1150*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
1151*22dc650dSSadaf Ebrahimi }
1152*22dc650dSSadaf Ebrahimi
1153*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_CET && SLJIT_CONFIG_X86_CET) && defined (__SHSTK__)
1154*22dc650dSSadaf Ebrahimi
emit_rdssp(struct sljit_compiler * compiler,sljit_s32 reg)1155*22dc650dSSadaf Ebrahimi static SLJIT_INLINE sljit_s32 emit_rdssp(struct sljit_compiler *compiler, sljit_s32 reg)
1156*22dc650dSSadaf Ebrahimi {
1157*22dc650dSSadaf Ebrahimi sljit_u8 *inst;
1158*22dc650dSSadaf Ebrahimi sljit_s32 size;
1159*22dc650dSSadaf Ebrahimi
1160*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1161*22dc650dSSadaf Ebrahimi size = 5;
1162*22dc650dSSadaf Ebrahimi #else
1163*22dc650dSSadaf Ebrahimi size = 4;
1164*22dc650dSSadaf Ebrahimi #endif
1165*22dc650dSSadaf Ebrahimi
1166*22dc650dSSadaf Ebrahimi inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
1167*22dc650dSSadaf Ebrahimi FAIL_IF(!inst);
1168*22dc650dSSadaf Ebrahimi INC_SIZE(size);
1169*22dc650dSSadaf Ebrahimi *inst++ = GROUP_F3;
1170*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1171*22dc650dSSadaf Ebrahimi *inst++ = REX_W | (reg_map[reg] <= 7 ? 0 : REX_B);
1172*22dc650dSSadaf Ebrahimi #endif
1173*22dc650dSSadaf Ebrahimi inst[0] = GROUP_0F;
1174*22dc650dSSadaf Ebrahimi inst[1] = 0x1e;
1175*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1176*22dc650dSSadaf Ebrahimi inst[2] = U8(MOD_REG | (0x1 << 3) | reg_lmap[reg]);
1177*22dc650dSSadaf Ebrahimi #else
1178*22dc650dSSadaf Ebrahimi inst[2] = U8(MOD_REG | (0x1 << 3) | reg_map[reg]);
1179*22dc650dSSadaf Ebrahimi #endif
1180*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
1181*22dc650dSSadaf Ebrahimi }
1182*22dc650dSSadaf Ebrahimi
emit_incssp(struct sljit_compiler * compiler,sljit_s32 reg)1183*22dc650dSSadaf Ebrahimi static SLJIT_INLINE sljit_s32 emit_incssp(struct sljit_compiler *compiler, sljit_s32 reg)
1184*22dc650dSSadaf Ebrahimi {
1185*22dc650dSSadaf Ebrahimi sljit_u8 *inst;
1186*22dc650dSSadaf Ebrahimi sljit_s32 size;
1187*22dc650dSSadaf Ebrahimi
1188*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1189*22dc650dSSadaf Ebrahimi size = 5;
1190*22dc650dSSadaf Ebrahimi #else
1191*22dc650dSSadaf Ebrahimi size = 4;
1192*22dc650dSSadaf Ebrahimi #endif
1193*22dc650dSSadaf Ebrahimi
1194*22dc650dSSadaf Ebrahimi inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
1195*22dc650dSSadaf Ebrahimi FAIL_IF(!inst);
1196*22dc650dSSadaf Ebrahimi INC_SIZE(size);
1197*22dc650dSSadaf Ebrahimi *inst++ = GROUP_F3;
1198*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1199*22dc650dSSadaf Ebrahimi *inst++ = REX_W | (reg_map[reg] <= 7 ? 0 : REX_B);
1200*22dc650dSSadaf Ebrahimi #endif
1201*22dc650dSSadaf Ebrahimi inst[0] = GROUP_0F;
1202*22dc650dSSadaf Ebrahimi inst[1] = 0xae;
1203*22dc650dSSadaf Ebrahimi inst[2] = (0x3 << 6) | (0x5 << 3) | (reg_map[reg] & 0x7);
1204*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
1205*22dc650dSSadaf Ebrahimi }
1206*22dc650dSSadaf Ebrahimi
1207*22dc650dSSadaf Ebrahimi #endif /* SLJIT_CONFIG_X86_CET && __SHSTK__ */
1208*22dc650dSSadaf Ebrahimi
cpu_has_shadow_stack(void)1209*22dc650dSSadaf Ebrahimi static SLJIT_INLINE sljit_s32 cpu_has_shadow_stack(void)
1210*22dc650dSSadaf Ebrahimi {
1211*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_CET && SLJIT_CONFIG_X86_CET) && defined (__SHSTK__)
1212*22dc650dSSadaf Ebrahimi return _get_ssp() != 0;
1213*22dc650dSSadaf Ebrahimi #else /* !SLJIT_CONFIG_X86_CET || !__SHSTK__ */
1214*22dc650dSSadaf Ebrahimi return 0;
1215*22dc650dSSadaf Ebrahimi #endif /* SLJIT_CONFIG_X86_CET && __SHSTK__ */
1216*22dc650dSSadaf Ebrahimi }
1217*22dc650dSSadaf Ebrahimi
adjust_shadow_stack(struct sljit_compiler * compiler,sljit_s32 src,sljit_sw srcw)1218*22dc650dSSadaf Ebrahimi static SLJIT_INLINE sljit_s32 adjust_shadow_stack(struct sljit_compiler *compiler,
1219*22dc650dSSadaf Ebrahimi sljit_s32 src, sljit_sw srcw)
1220*22dc650dSSadaf Ebrahimi {
1221*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_CET && SLJIT_CONFIG_X86_CET) && defined (__SHSTK__)
1222*22dc650dSSadaf Ebrahimi sljit_u8 *inst, *jz_after_cmp_inst;
1223*22dc650dSSadaf Ebrahimi sljit_uw size_jz_after_cmp_inst;
1224*22dc650dSSadaf Ebrahimi
1225*22dc650dSSadaf Ebrahimi sljit_uw size_before_rdssp_inst = compiler->size;
1226*22dc650dSSadaf Ebrahimi
1227*22dc650dSSadaf Ebrahimi /* Generate "RDSSP TMP_REG1". */
1228*22dc650dSSadaf Ebrahimi FAIL_IF(emit_rdssp(compiler, TMP_REG1));
1229*22dc650dSSadaf Ebrahimi
1230*22dc650dSSadaf Ebrahimi /* Load return address on shadow stack into TMP_REG1. */
1231*22dc650dSSadaf Ebrahimi EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(TMP_REG1), 0);
1232*22dc650dSSadaf Ebrahimi
1233*22dc650dSSadaf Ebrahimi /* Compare return address against TMP_REG1. */
1234*22dc650dSSadaf Ebrahimi FAIL_IF(emit_cmp_binary (compiler, TMP_REG1, 0, src, srcw));
1235*22dc650dSSadaf Ebrahimi
1236*22dc650dSSadaf Ebrahimi /* Generate JZ to skip shadow stack ajdustment when shadow
1237*22dc650dSSadaf Ebrahimi stack matches normal stack. */
1238*22dc650dSSadaf Ebrahimi inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
1239*22dc650dSSadaf Ebrahimi FAIL_IF(!inst);
1240*22dc650dSSadaf Ebrahimi INC_SIZE(2);
1241*22dc650dSSadaf Ebrahimi *inst++ = get_jump_code(SLJIT_EQUAL) - 0x10;
1242*22dc650dSSadaf Ebrahimi size_jz_after_cmp_inst = compiler->size;
1243*22dc650dSSadaf Ebrahimi jz_after_cmp_inst = inst;
1244*22dc650dSSadaf Ebrahimi
1245*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1246*22dc650dSSadaf Ebrahimi /* REX_W is not necessary. */
1247*22dc650dSSadaf Ebrahimi compiler->mode32 = 1;
1248*22dc650dSSadaf Ebrahimi #endif
1249*22dc650dSSadaf Ebrahimi /* Load 1 into TMP_REG1. */
1250*22dc650dSSadaf Ebrahimi EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, 1);
1251*22dc650dSSadaf Ebrahimi
1252*22dc650dSSadaf Ebrahimi /* Generate "INCSSP TMP_REG1". */
1253*22dc650dSSadaf Ebrahimi FAIL_IF(emit_incssp(compiler, TMP_REG1));
1254*22dc650dSSadaf Ebrahimi
1255*22dc650dSSadaf Ebrahimi /* Jump back to "RDSSP TMP_REG1" to check shadow stack again. */
1256*22dc650dSSadaf Ebrahimi inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
1257*22dc650dSSadaf Ebrahimi FAIL_IF(!inst);
1258*22dc650dSSadaf Ebrahimi INC_SIZE(2);
1259*22dc650dSSadaf Ebrahimi inst[0] = JMP_i8;
1260*22dc650dSSadaf Ebrahimi inst[1] = size_before_rdssp_inst - compiler->size;
1261*22dc650dSSadaf Ebrahimi
1262*22dc650dSSadaf Ebrahimi *jz_after_cmp_inst = compiler->size - size_jz_after_cmp_inst;
1263*22dc650dSSadaf Ebrahimi #else /* !SLJIT_CONFIG_X86_CET || !__SHSTK__ */
1264*22dc650dSSadaf Ebrahimi SLJIT_UNUSED_ARG(compiler);
1265*22dc650dSSadaf Ebrahimi SLJIT_UNUSED_ARG(src);
1266*22dc650dSSadaf Ebrahimi SLJIT_UNUSED_ARG(srcw);
1267*22dc650dSSadaf Ebrahimi #endif /* SLJIT_CONFIG_X86_CET && __SHSTK__ */
1268*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
1269*22dc650dSSadaf Ebrahimi }
1270*22dc650dSSadaf Ebrahimi
1271*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1272*22dc650dSSadaf Ebrahimi #include "sljitNativeX86_32.c"
1273*22dc650dSSadaf Ebrahimi #else
1274*22dc650dSSadaf Ebrahimi #include "sljitNativeX86_64.c"
1275*22dc650dSSadaf Ebrahimi #endif
1276*22dc650dSSadaf Ebrahimi
emit_mov(struct sljit_compiler * compiler,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)1277*22dc650dSSadaf Ebrahimi static sljit_s32 emit_mov(struct sljit_compiler *compiler,
1278*22dc650dSSadaf Ebrahimi sljit_s32 dst, sljit_sw dstw,
1279*22dc650dSSadaf Ebrahimi sljit_s32 src, sljit_sw srcw)
1280*22dc650dSSadaf Ebrahimi {
1281*22dc650dSSadaf Ebrahimi sljit_u8* inst;
1282*22dc650dSSadaf Ebrahimi
1283*22dc650dSSadaf Ebrahimi if (FAST_IS_REG(src)) {
1284*22dc650dSSadaf Ebrahimi inst = emit_x86_instruction(compiler, 1, src, 0, dst, dstw);
1285*22dc650dSSadaf Ebrahimi FAIL_IF(!inst);
1286*22dc650dSSadaf Ebrahimi *inst = MOV_rm_r;
1287*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
1288*22dc650dSSadaf Ebrahimi }
1289*22dc650dSSadaf Ebrahimi
1290*22dc650dSSadaf Ebrahimi if (src == SLJIT_IMM) {
1291*22dc650dSSadaf Ebrahimi if (FAST_IS_REG(dst)) {
1292*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1293*22dc650dSSadaf Ebrahimi return emit_do_imm(compiler, MOV_r_i32 | reg_map[dst], srcw);
1294*22dc650dSSadaf Ebrahimi #else
1295*22dc650dSSadaf Ebrahimi if (!compiler->mode32) {
1296*22dc650dSSadaf Ebrahimi if (NOT_HALFWORD(srcw))
1297*22dc650dSSadaf Ebrahimi return emit_load_imm64(compiler, dst, srcw);
1298*22dc650dSSadaf Ebrahimi }
1299*22dc650dSSadaf Ebrahimi else
1300*22dc650dSSadaf Ebrahimi return emit_do_imm32(compiler, (reg_map[dst] >= 8) ? REX_B : 0, U8(MOV_r_i32 | reg_lmap[dst]), srcw);
1301*22dc650dSSadaf Ebrahimi #endif
1302*22dc650dSSadaf Ebrahimi }
1303*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1304*22dc650dSSadaf Ebrahimi if (!compiler->mode32 && NOT_HALFWORD(srcw)) {
1305*22dc650dSSadaf Ebrahimi /* Immediate to memory move. Only SLJIT_MOV operation copies
1306*22dc650dSSadaf Ebrahimi an immediate directly into memory so TMP_REG1 can be used. */
1307*22dc650dSSadaf Ebrahimi FAIL_IF(emit_load_imm64(compiler, TMP_REG1, srcw));
1308*22dc650dSSadaf Ebrahimi inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
1309*22dc650dSSadaf Ebrahimi FAIL_IF(!inst);
1310*22dc650dSSadaf Ebrahimi *inst = MOV_rm_r;
1311*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
1312*22dc650dSSadaf Ebrahimi }
1313*22dc650dSSadaf Ebrahimi #endif
1314*22dc650dSSadaf Ebrahimi inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, dstw);
1315*22dc650dSSadaf Ebrahimi FAIL_IF(!inst);
1316*22dc650dSSadaf Ebrahimi *inst = MOV_rm_i32;
1317*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
1318*22dc650dSSadaf Ebrahimi }
1319*22dc650dSSadaf Ebrahimi if (FAST_IS_REG(dst)) {
1320*22dc650dSSadaf Ebrahimi inst = emit_x86_instruction(compiler, 1, dst, 0, src, srcw);
1321*22dc650dSSadaf Ebrahimi FAIL_IF(!inst);
1322*22dc650dSSadaf Ebrahimi *inst = MOV_r_rm;
1323*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
1324*22dc650dSSadaf Ebrahimi }
1325*22dc650dSSadaf Ebrahimi
1326*22dc650dSSadaf Ebrahimi /* Memory to memory move. Only SLJIT_MOV operation copies
1327*22dc650dSSadaf Ebrahimi data from memory to memory so TMP_REG1 can be used. */
1328*22dc650dSSadaf Ebrahimi inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src, srcw);
1329*22dc650dSSadaf Ebrahimi FAIL_IF(!inst);
1330*22dc650dSSadaf Ebrahimi *inst = MOV_r_rm;
1331*22dc650dSSadaf Ebrahimi inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
1332*22dc650dSSadaf Ebrahimi FAIL_IF(!inst);
1333*22dc650dSSadaf Ebrahimi *inst = MOV_rm_r;
1334*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
1335*22dc650dSSadaf Ebrahimi }
1336*22dc650dSSadaf Ebrahimi
emit_cmov_generic(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 dst_reg,sljit_s32 src,sljit_sw srcw)1337*22dc650dSSadaf Ebrahimi static sljit_s32 emit_cmov_generic(struct sljit_compiler *compiler, sljit_s32 type,
1338*22dc650dSSadaf Ebrahimi sljit_s32 dst_reg,
1339*22dc650dSSadaf Ebrahimi sljit_s32 src, sljit_sw srcw)
1340*22dc650dSSadaf Ebrahimi {
1341*22dc650dSSadaf Ebrahimi sljit_u8* inst;
1342*22dc650dSSadaf Ebrahimi sljit_uw size;
1343*22dc650dSSadaf Ebrahimi
1344*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(type >= SLJIT_EQUAL && type <= SLJIT_ORDERED_LESS_EQUAL);
1345*22dc650dSSadaf Ebrahimi
1346*22dc650dSSadaf Ebrahimi inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
1347*22dc650dSSadaf Ebrahimi FAIL_IF(!inst);
1348*22dc650dSSadaf Ebrahimi INC_SIZE(2);
1349*22dc650dSSadaf Ebrahimi inst[0] = U8(get_jump_code((sljit_uw)type ^ 0x1) - 0x10);
1350*22dc650dSSadaf Ebrahimi
1351*22dc650dSSadaf Ebrahimi size = compiler->size;
1352*22dc650dSSadaf Ebrahimi EMIT_MOV(compiler, dst_reg, 0, src, srcw);
1353*22dc650dSSadaf Ebrahimi
1354*22dc650dSSadaf Ebrahimi inst[1] = U8(compiler->size - size);
1355*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
1356*22dc650dSSadaf Ebrahimi }
1357*22dc650dSSadaf Ebrahimi
sljit_emit_op0(struct sljit_compiler * compiler,sljit_s32 op)1358*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op)
1359*22dc650dSSadaf Ebrahimi {
1360*22dc650dSSadaf Ebrahimi sljit_u8 *inst;
1361*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1362*22dc650dSSadaf Ebrahimi sljit_uw size;
1363*22dc650dSSadaf Ebrahimi #endif
1364*22dc650dSSadaf Ebrahimi
1365*22dc650dSSadaf Ebrahimi CHECK_ERROR();
1366*22dc650dSSadaf Ebrahimi CHECK(check_sljit_emit_op0(compiler, op));
1367*22dc650dSSadaf Ebrahimi
1368*22dc650dSSadaf Ebrahimi switch (GET_OPCODE(op)) {
1369*22dc650dSSadaf Ebrahimi case SLJIT_BREAKPOINT:
1370*22dc650dSSadaf Ebrahimi return emit_byte(compiler, INT3);
1371*22dc650dSSadaf Ebrahimi case SLJIT_NOP:
1372*22dc650dSSadaf Ebrahimi return emit_byte(compiler, NOP);
1373*22dc650dSSadaf Ebrahimi case SLJIT_LMUL_UW:
1374*22dc650dSSadaf Ebrahimi case SLJIT_LMUL_SW:
1375*22dc650dSSadaf Ebrahimi case SLJIT_DIVMOD_UW:
1376*22dc650dSSadaf Ebrahimi case SLJIT_DIVMOD_SW:
1377*22dc650dSSadaf Ebrahimi case SLJIT_DIV_UW:
1378*22dc650dSSadaf Ebrahimi case SLJIT_DIV_SW:
1379*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1380*22dc650dSSadaf Ebrahimi #ifdef _WIN64
1381*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(
1382*22dc650dSSadaf Ebrahimi reg_map[SLJIT_R0] == 0
1383*22dc650dSSadaf Ebrahimi && reg_map[SLJIT_R1] == 2
1384*22dc650dSSadaf Ebrahimi && reg_map[TMP_REG1] > 7);
1385*22dc650dSSadaf Ebrahimi #else
1386*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(
1387*22dc650dSSadaf Ebrahimi reg_map[SLJIT_R0] == 0
1388*22dc650dSSadaf Ebrahimi && reg_map[SLJIT_R1] < 7
1389*22dc650dSSadaf Ebrahimi && reg_map[TMP_REG1] == 2);
1390*22dc650dSSadaf Ebrahimi #endif
1391*22dc650dSSadaf Ebrahimi compiler->mode32 = op & SLJIT_32;
1392*22dc650dSSadaf Ebrahimi #endif
1393*22dc650dSSadaf Ebrahimi SLJIT_COMPILE_ASSERT((SLJIT_DIVMOD_UW & 0x2) == 0 && SLJIT_DIV_UW - 0x2 == SLJIT_DIVMOD_UW, bad_div_opcode_assignments);
1394*22dc650dSSadaf Ebrahimi
1395*22dc650dSSadaf Ebrahimi op = GET_OPCODE(op);
1396*22dc650dSSadaf Ebrahimi if ((op | 0x2) == SLJIT_DIV_UW) {
1397*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64)
1398*22dc650dSSadaf Ebrahimi EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R1, 0);
1399*22dc650dSSadaf Ebrahimi inst = emit_x86_instruction(compiler, 1, SLJIT_R1, 0, SLJIT_R1, 0);
1400*22dc650dSSadaf Ebrahimi #else
1401*22dc650dSSadaf Ebrahimi inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG1, 0);
1402*22dc650dSSadaf Ebrahimi #endif
1403*22dc650dSSadaf Ebrahimi FAIL_IF(!inst);
1404*22dc650dSSadaf Ebrahimi *inst = XOR_r_rm;
1405*22dc650dSSadaf Ebrahimi }
1406*22dc650dSSadaf Ebrahimi
1407*22dc650dSSadaf Ebrahimi if ((op | 0x2) == SLJIT_DIV_SW) {
1408*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64)
1409*22dc650dSSadaf Ebrahimi EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R1, 0);
1410*22dc650dSSadaf Ebrahimi #endif
1411*22dc650dSSadaf Ebrahimi
1412*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1413*22dc650dSSadaf Ebrahimi FAIL_IF(emit_byte(compiler, CDQ));
1414*22dc650dSSadaf Ebrahimi #else
1415*22dc650dSSadaf Ebrahimi if (!compiler->mode32) {
1416*22dc650dSSadaf Ebrahimi inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
1417*22dc650dSSadaf Ebrahimi FAIL_IF(!inst);
1418*22dc650dSSadaf Ebrahimi INC_SIZE(2);
1419*22dc650dSSadaf Ebrahimi inst[0] = REX_W;
1420*22dc650dSSadaf Ebrahimi inst[1] = CDQ;
1421*22dc650dSSadaf Ebrahimi } else
1422*22dc650dSSadaf Ebrahimi FAIL_IF(emit_byte(compiler, CDQ));
1423*22dc650dSSadaf Ebrahimi #endif
1424*22dc650dSSadaf Ebrahimi }
1425*22dc650dSSadaf Ebrahimi
1426*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1427*22dc650dSSadaf Ebrahimi inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
1428*22dc650dSSadaf Ebrahimi FAIL_IF(!inst);
1429*22dc650dSSadaf Ebrahimi INC_SIZE(2);
1430*22dc650dSSadaf Ebrahimi inst[0] = GROUP_F7;
1431*22dc650dSSadaf Ebrahimi inst[1] = MOD_REG | ((op >= SLJIT_DIVMOD_UW) ? reg_map[TMP_REG1] : reg_map[SLJIT_R1]);
1432*22dc650dSSadaf Ebrahimi #else /* !SLJIT_CONFIG_X86_32 */
1433*22dc650dSSadaf Ebrahimi #ifdef _WIN64
1434*22dc650dSSadaf Ebrahimi size = (!compiler->mode32 || op >= SLJIT_DIVMOD_UW) ? 3 : 2;
1435*22dc650dSSadaf Ebrahimi #else /* !_WIN64 */
1436*22dc650dSSadaf Ebrahimi size = (!compiler->mode32) ? 3 : 2;
1437*22dc650dSSadaf Ebrahimi #endif /* _WIN64 */
1438*22dc650dSSadaf Ebrahimi inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
1439*22dc650dSSadaf Ebrahimi FAIL_IF(!inst);
1440*22dc650dSSadaf Ebrahimi INC_SIZE(size);
1441*22dc650dSSadaf Ebrahimi #ifdef _WIN64
1442*22dc650dSSadaf Ebrahimi if (!compiler->mode32)
1443*22dc650dSSadaf Ebrahimi *inst++ = REX_W | ((op >= SLJIT_DIVMOD_UW) ? REX_B : 0);
1444*22dc650dSSadaf Ebrahimi else if (op >= SLJIT_DIVMOD_UW)
1445*22dc650dSSadaf Ebrahimi *inst++ = REX_B;
1446*22dc650dSSadaf Ebrahimi inst[0] = GROUP_F7;
1447*22dc650dSSadaf Ebrahimi inst[1] = MOD_REG | ((op >= SLJIT_DIVMOD_UW) ? reg_lmap[TMP_REG1] : reg_lmap[SLJIT_R1]);
1448*22dc650dSSadaf Ebrahimi #else /* !_WIN64 */
1449*22dc650dSSadaf Ebrahimi if (!compiler->mode32)
1450*22dc650dSSadaf Ebrahimi *inst++ = REX_W;
1451*22dc650dSSadaf Ebrahimi inst[0] = GROUP_F7;
1452*22dc650dSSadaf Ebrahimi inst[1] = MOD_REG | reg_map[SLJIT_R1];
1453*22dc650dSSadaf Ebrahimi #endif /* _WIN64 */
1454*22dc650dSSadaf Ebrahimi #endif /* SLJIT_CONFIG_X86_32 */
1455*22dc650dSSadaf Ebrahimi switch (op) {
1456*22dc650dSSadaf Ebrahimi case SLJIT_LMUL_UW:
1457*22dc650dSSadaf Ebrahimi inst[1] |= MUL;
1458*22dc650dSSadaf Ebrahimi break;
1459*22dc650dSSadaf Ebrahimi case SLJIT_LMUL_SW:
1460*22dc650dSSadaf Ebrahimi inst[1] |= IMUL;
1461*22dc650dSSadaf Ebrahimi break;
1462*22dc650dSSadaf Ebrahimi case SLJIT_DIVMOD_UW:
1463*22dc650dSSadaf Ebrahimi case SLJIT_DIV_UW:
1464*22dc650dSSadaf Ebrahimi inst[1] |= DIV;
1465*22dc650dSSadaf Ebrahimi break;
1466*22dc650dSSadaf Ebrahimi case SLJIT_DIVMOD_SW:
1467*22dc650dSSadaf Ebrahimi case SLJIT_DIV_SW:
1468*22dc650dSSadaf Ebrahimi inst[1] |= IDIV;
1469*22dc650dSSadaf Ebrahimi break;
1470*22dc650dSSadaf Ebrahimi }
1471*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && !defined(_WIN64)
1472*22dc650dSSadaf Ebrahimi if (op <= SLJIT_DIVMOD_SW)
1473*22dc650dSSadaf Ebrahimi EMIT_MOV(compiler, SLJIT_R1, 0, TMP_REG1, 0);
1474*22dc650dSSadaf Ebrahimi #else
1475*22dc650dSSadaf Ebrahimi if (op >= SLJIT_DIV_UW)
1476*22dc650dSSadaf Ebrahimi EMIT_MOV(compiler, SLJIT_R1, 0, TMP_REG1, 0);
1477*22dc650dSSadaf Ebrahimi #endif
1478*22dc650dSSadaf Ebrahimi break;
1479*22dc650dSSadaf Ebrahimi case SLJIT_ENDBR:
1480*22dc650dSSadaf Ebrahimi return emit_endbranch(compiler);
1481*22dc650dSSadaf Ebrahimi case SLJIT_SKIP_FRAMES_BEFORE_RETURN:
1482*22dc650dSSadaf Ebrahimi return skip_frames_before_return(compiler);
1483*22dc650dSSadaf Ebrahimi }
1484*22dc650dSSadaf Ebrahimi
1485*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
1486*22dc650dSSadaf Ebrahimi }
1487*22dc650dSSadaf Ebrahimi
emit_mov_byte(struct sljit_compiler * compiler,sljit_s32 sign,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)1488*22dc650dSSadaf Ebrahimi static sljit_s32 emit_mov_byte(struct sljit_compiler *compiler, sljit_s32 sign,
1489*22dc650dSSadaf Ebrahimi sljit_s32 dst, sljit_sw dstw,
1490*22dc650dSSadaf Ebrahimi sljit_s32 src, sljit_sw srcw)
1491*22dc650dSSadaf Ebrahimi {
1492*22dc650dSSadaf Ebrahimi sljit_u8* inst;
1493*22dc650dSSadaf Ebrahimi sljit_s32 dst_r;
1494*22dc650dSSadaf Ebrahimi
1495*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1496*22dc650dSSadaf Ebrahimi compiler->mode32 = 0;
1497*22dc650dSSadaf Ebrahimi #endif
1498*22dc650dSSadaf Ebrahimi
1499*22dc650dSSadaf Ebrahimi if (src == SLJIT_IMM) {
1500*22dc650dSSadaf Ebrahimi if (FAST_IS_REG(dst)) {
1501*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1502*22dc650dSSadaf Ebrahimi return emit_do_imm(compiler, MOV_r_i32 | reg_map[dst], srcw);
1503*22dc650dSSadaf Ebrahimi #else
1504*22dc650dSSadaf Ebrahimi inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, 0);
1505*22dc650dSSadaf Ebrahimi FAIL_IF(!inst);
1506*22dc650dSSadaf Ebrahimi *inst = MOV_rm_i32;
1507*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
1508*22dc650dSSadaf Ebrahimi #endif
1509*22dc650dSSadaf Ebrahimi }
1510*22dc650dSSadaf Ebrahimi inst = emit_x86_instruction(compiler, 1 | EX86_BYTE_ARG | EX86_NO_REXW, SLJIT_IMM, srcw, dst, dstw);
1511*22dc650dSSadaf Ebrahimi FAIL_IF(!inst);
1512*22dc650dSSadaf Ebrahimi *inst = MOV_rm8_i8;
1513*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
1514*22dc650dSSadaf Ebrahimi }
1515*22dc650dSSadaf Ebrahimi
1516*22dc650dSSadaf Ebrahimi dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
1517*22dc650dSSadaf Ebrahimi
1518*22dc650dSSadaf Ebrahimi if ((dst & SLJIT_MEM) && FAST_IS_REG(src)) {
1519*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1520*22dc650dSSadaf Ebrahimi if (reg_map[src] >= 4) {
1521*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(dst_r == TMP_REG1);
1522*22dc650dSSadaf Ebrahimi EMIT_MOV(compiler, TMP_REG1, 0, src, 0);
1523*22dc650dSSadaf Ebrahimi } else
1524*22dc650dSSadaf Ebrahimi dst_r = src;
1525*22dc650dSSadaf Ebrahimi #else
1526*22dc650dSSadaf Ebrahimi dst_r = src;
1527*22dc650dSSadaf Ebrahimi #endif
1528*22dc650dSSadaf Ebrahimi } else {
1529*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1530*22dc650dSSadaf Ebrahimi if (FAST_IS_REG(src) && reg_map[src] >= 4) {
1531*22dc650dSSadaf Ebrahimi /* Both src and dst are registers. */
1532*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(FAST_IS_REG(dst));
1533*22dc650dSSadaf Ebrahimi
1534*22dc650dSSadaf Ebrahimi if (src == dst && !sign) {
1535*22dc650dSSadaf Ebrahimi inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 0xff, dst, 0);
1536*22dc650dSSadaf Ebrahimi FAIL_IF(!inst);
1537*22dc650dSSadaf Ebrahimi *(inst + 1) |= AND;
1538*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
1539*22dc650dSSadaf Ebrahimi }
1540*22dc650dSSadaf Ebrahimi
1541*22dc650dSSadaf Ebrahimi EMIT_MOV(compiler, TMP_REG1, 0, src, 0);
1542*22dc650dSSadaf Ebrahimi src = TMP_REG1;
1543*22dc650dSSadaf Ebrahimi srcw = 0;
1544*22dc650dSSadaf Ebrahimi }
1545*22dc650dSSadaf Ebrahimi #endif /* !SLJIT_CONFIG_X86_32 */
1546*22dc650dSSadaf Ebrahimi
1547*22dc650dSSadaf Ebrahimi /* src can be memory addr or reg_map[src] < 4 on x86_32 architectures. */
1548*22dc650dSSadaf Ebrahimi FAIL_IF(emit_groupf(compiler, sign ? MOVSX_r_rm8 : MOVZX_r_rm8, dst_r, src, srcw));
1549*22dc650dSSadaf Ebrahimi }
1550*22dc650dSSadaf Ebrahimi
1551*22dc650dSSadaf Ebrahimi if (dst & SLJIT_MEM) {
1552*22dc650dSSadaf Ebrahimi inst = emit_x86_instruction(compiler, 1 | EX86_REX | EX86_NO_REXW, dst_r, 0, dst, dstw);
1553*22dc650dSSadaf Ebrahimi FAIL_IF(!inst);
1554*22dc650dSSadaf Ebrahimi *inst = MOV_rm8_r8;
1555*22dc650dSSadaf Ebrahimi }
1556*22dc650dSSadaf Ebrahimi
1557*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
1558*22dc650dSSadaf Ebrahimi }
1559*22dc650dSSadaf Ebrahimi
emit_prefetch(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 src,sljit_sw srcw)1560*22dc650dSSadaf Ebrahimi static sljit_s32 emit_prefetch(struct sljit_compiler *compiler, sljit_s32 op,
1561*22dc650dSSadaf Ebrahimi sljit_s32 src, sljit_sw srcw)
1562*22dc650dSSadaf Ebrahimi {
1563*22dc650dSSadaf Ebrahimi sljit_u8* inst;
1564*22dc650dSSadaf Ebrahimi
1565*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1566*22dc650dSSadaf Ebrahimi compiler->mode32 = 1;
1567*22dc650dSSadaf Ebrahimi #endif
1568*22dc650dSSadaf Ebrahimi
1569*22dc650dSSadaf Ebrahimi inst = emit_x86_instruction(compiler, 2, 0, 0, src, srcw);
1570*22dc650dSSadaf Ebrahimi FAIL_IF(!inst);
1571*22dc650dSSadaf Ebrahimi inst[0] = GROUP_0F;
1572*22dc650dSSadaf Ebrahimi inst[1] = PREFETCH;
1573*22dc650dSSadaf Ebrahimi
1574*22dc650dSSadaf Ebrahimi if (op == SLJIT_PREFETCH_L1)
1575*22dc650dSSadaf Ebrahimi inst[2] |= (1 << 3);
1576*22dc650dSSadaf Ebrahimi else if (op == SLJIT_PREFETCH_L2)
1577*22dc650dSSadaf Ebrahimi inst[2] |= (2 << 3);
1578*22dc650dSSadaf Ebrahimi else if (op == SLJIT_PREFETCH_L3)
1579*22dc650dSSadaf Ebrahimi inst[2] |= (3 << 3);
1580*22dc650dSSadaf Ebrahimi
1581*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
1582*22dc650dSSadaf Ebrahimi }
1583*22dc650dSSadaf Ebrahimi
emit_mov_half(struct sljit_compiler * compiler,sljit_s32 sign,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)1584*22dc650dSSadaf Ebrahimi static sljit_s32 emit_mov_half(struct sljit_compiler *compiler, sljit_s32 sign,
1585*22dc650dSSadaf Ebrahimi sljit_s32 dst, sljit_sw dstw,
1586*22dc650dSSadaf Ebrahimi sljit_s32 src, sljit_sw srcw)
1587*22dc650dSSadaf Ebrahimi {
1588*22dc650dSSadaf Ebrahimi sljit_u8* inst;
1589*22dc650dSSadaf Ebrahimi sljit_s32 dst_r;
1590*22dc650dSSadaf Ebrahimi
1591*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1592*22dc650dSSadaf Ebrahimi compiler->mode32 = 0;
1593*22dc650dSSadaf Ebrahimi #endif
1594*22dc650dSSadaf Ebrahimi
1595*22dc650dSSadaf Ebrahimi if (src == SLJIT_IMM) {
1596*22dc650dSSadaf Ebrahimi if (FAST_IS_REG(dst)) {
1597*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1598*22dc650dSSadaf Ebrahimi return emit_do_imm(compiler, MOV_r_i32 | reg_map[dst], srcw);
1599*22dc650dSSadaf Ebrahimi #else
1600*22dc650dSSadaf Ebrahimi inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, 0);
1601*22dc650dSSadaf Ebrahimi FAIL_IF(!inst);
1602*22dc650dSSadaf Ebrahimi *inst = MOV_rm_i32;
1603*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
1604*22dc650dSSadaf Ebrahimi #endif
1605*22dc650dSSadaf Ebrahimi }
1606*22dc650dSSadaf Ebrahimi inst = emit_x86_instruction(compiler, 1 | EX86_HALF_ARG | EX86_NO_REXW | EX86_PREF_66, SLJIT_IMM, srcw, dst, dstw);
1607*22dc650dSSadaf Ebrahimi FAIL_IF(!inst);
1608*22dc650dSSadaf Ebrahimi *inst = MOV_rm_i32;
1609*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
1610*22dc650dSSadaf Ebrahimi }
1611*22dc650dSSadaf Ebrahimi
1612*22dc650dSSadaf Ebrahimi dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
1613*22dc650dSSadaf Ebrahimi
1614*22dc650dSSadaf Ebrahimi if ((dst & SLJIT_MEM) && FAST_IS_REG(src))
1615*22dc650dSSadaf Ebrahimi dst_r = src;
1616*22dc650dSSadaf Ebrahimi else
1617*22dc650dSSadaf Ebrahimi FAIL_IF(emit_groupf(compiler, sign ? MOVSX_r_rm16 : MOVZX_r_rm16, dst_r, src, srcw));
1618*22dc650dSSadaf Ebrahimi
1619*22dc650dSSadaf Ebrahimi if (dst & SLJIT_MEM) {
1620*22dc650dSSadaf Ebrahimi inst = emit_x86_instruction(compiler, 1 | EX86_NO_REXW | EX86_PREF_66, dst_r, 0, dst, dstw);
1621*22dc650dSSadaf Ebrahimi FAIL_IF(!inst);
1622*22dc650dSSadaf Ebrahimi *inst = MOV_rm_r;
1623*22dc650dSSadaf Ebrahimi }
1624*22dc650dSSadaf Ebrahimi
1625*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
1626*22dc650dSSadaf Ebrahimi }
1627*22dc650dSSadaf Ebrahimi
emit_unary(struct sljit_compiler * compiler,sljit_u8 opcode,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)1628*22dc650dSSadaf Ebrahimi static sljit_s32 emit_unary(struct sljit_compiler *compiler, sljit_u8 opcode,
1629*22dc650dSSadaf Ebrahimi sljit_s32 dst, sljit_sw dstw,
1630*22dc650dSSadaf Ebrahimi sljit_s32 src, sljit_sw srcw)
1631*22dc650dSSadaf Ebrahimi {
1632*22dc650dSSadaf Ebrahimi sljit_u8* inst;
1633*22dc650dSSadaf Ebrahimi
1634*22dc650dSSadaf Ebrahimi if (dst == src && dstw == srcw) {
1635*22dc650dSSadaf Ebrahimi /* Same input and output */
1636*22dc650dSSadaf Ebrahimi inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
1637*22dc650dSSadaf Ebrahimi FAIL_IF(!inst);
1638*22dc650dSSadaf Ebrahimi inst[0] = GROUP_F7;
1639*22dc650dSSadaf Ebrahimi inst[1] |= opcode;
1640*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
1641*22dc650dSSadaf Ebrahimi }
1642*22dc650dSSadaf Ebrahimi
1643*22dc650dSSadaf Ebrahimi if (FAST_IS_REG(dst)) {
1644*22dc650dSSadaf Ebrahimi EMIT_MOV(compiler, dst, 0, src, srcw);
1645*22dc650dSSadaf Ebrahimi inst = emit_x86_instruction(compiler, 1, 0, 0, dst, 0);
1646*22dc650dSSadaf Ebrahimi FAIL_IF(!inst);
1647*22dc650dSSadaf Ebrahimi inst[0] = GROUP_F7;
1648*22dc650dSSadaf Ebrahimi inst[1] |= opcode;
1649*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
1650*22dc650dSSadaf Ebrahimi }
1651*22dc650dSSadaf Ebrahimi
1652*22dc650dSSadaf Ebrahimi EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
1653*22dc650dSSadaf Ebrahimi inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
1654*22dc650dSSadaf Ebrahimi FAIL_IF(!inst);
1655*22dc650dSSadaf Ebrahimi inst[0] = GROUP_F7;
1656*22dc650dSSadaf Ebrahimi inst[1] |= opcode;
1657*22dc650dSSadaf Ebrahimi EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1658*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
1659*22dc650dSSadaf Ebrahimi }
1660*22dc650dSSadaf Ebrahimi
1661*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1662*22dc650dSSadaf Ebrahimi static const sljit_sw emit_clz_arg = 32 + 31;
1663*22dc650dSSadaf Ebrahimi static const sljit_sw emit_ctz_arg = 32;
1664*22dc650dSSadaf Ebrahimi #endif
1665*22dc650dSSadaf Ebrahimi
emit_clz_ctz(struct sljit_compiler * compiler,sljit_s32 is_clz,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)1666*22dc650dSSadaf Ebrahimi static sljit_s32 emit_clz_ctz(struct sljit_compiler *compiler, sljit_s32 is_clz,
1667*22dc650dSSadaf Ebrahimi sljit_s32 dst, sljit_sw dstw,
1668*22dc650dSSadaf Ebrahimi sljit_s32 src, sljit_sw srcw)
1669*22dc650dSSadaf Ebrahimi {
1670*22dc650dSSadaf Ebrahimi sljit_u8* inst;
1671*22dc650dSSadaf Ebrahimi sljit_s32 dst_r;
1672*22dc650dSSadaf Ebrahimi sljit_sw max;
1673*22dc650dSSadaf Ebrahimi
1674*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(cpu_feature_list != 0);
1675*22dc650dSSadaf Ebrahimi
1676*22dc650dSSadaf Ebrahimi dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
1677*22dc650dSSadaf Ebrahimi
1678*22dc650dSSadaf Ebrahimi if (is_clz ? (cpu_feature_list & CPU_FEATURE_LZCNT) : (cpu_feature_list & CPU_FEATURE_TZCNT)) {
1679*22dc650dSSadaf Ebrahimi FAIL_IF(emit_groupf(compiler, (is_clz ? LZCNT_r_rm : TZCNT_r_rm) | EX86_PREF_F3, dst_r, src, srcw));
1680*22dc650dSSadaf Ebrahimi
1681*22dc650dSSadaf Ebrahimi if (dst & SLJIT_MEM)
1682*22dc650dSSadaf Ebrahimi EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1683*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
1684*22dc650dSSadaf Ebrahimi }
1685*22dc650dSSadaf Ebrahimi
1686*22dc650dSSadaf Ebrahimi FAIL_IF(emit_groupf(compiler, is_clz ? BSR_r_rm : BSF_r_rm, dst_r, src, srcw));
1687*22dc650dSSadaf Ebrahimi
1688*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1689*22dc650dSSadaf Ebrahimi max = is_clz ? (32 + 31) : 32;
1690*22dc650dSSadaf Ebrahimi
1691*22dc650dSSadaf Ebrahimi if (cpu_feature_list & CPU_FEATURE_CMOV) {
1692*22dc650dSSadaf Ebrahimi if (dst_r != TMP_REG1) {
1693*22dc650dSSadaf Ebrahimi EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, max);
1694*22dc650dSSadaf Ebrahimi inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG1, 0);
1695*22dc650dSSadaf Ebrahimi }
1696*22dc650dSSadaf Ebrahimi else
1697*22dc650dSSadaf Ebrahimi inst = emit_x86_instruction(compiler, 2, dst_r, 0, SLJIT_MEM0(), is_clz ? (sljit_sw)&emit_clz_arg : (sljit_sw)&emit_ctz_arg);
1698*22dc650dSSadaf Ebrahimi
1699*22dc650dSSadaf Ebrahimi FAIL_IF(!inst);
1700*22dc650dSSadaf Ebrahimi inst[0] = GROUP_0F;
1701*22dc650dSSadaf Ebrahimi inst[1] = CMOVE_r_rm;
1702*22dc650dSSadaf Ebrahimi }
1703*22dc650dSSadaf Ebrahimi else
1704*22dc650dSSadaf Ebrahimi FAIL_IF(emit_cmov_generic(compiler, SLJIT_EQUAL, dst_r, SLJIT_IMM, max));
1705*22dc650dSSadaf Ebrahimi
1706*22dc650dSSadaf Ebrahimi if (is_clz) {
1707*22dc650dSSadaf Ebrahimi inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 31, dst_r, 0);
1708*22dc650dSSadaf Ebrahimi FAIL_IF(!inst);
1709*22dc650dSSadaf Ebrahimi *(inst + 1) |= XOR;
1710*22dc650dSSadaf Ebrahimi }
1711*22dc650dSSadaf Ebrahimi #else
1712*22dc650dSSadaf Ebrahimi if (is_clz)
1713*22dc650dSSadaf Ebrahimi max = compiler->mode32 ? (32 + 31) : (64 + 63);
1714*22dc650dSSadaf Ebrahimi else
1715*22dc650dSSadaf Ebrahimi max = compiler->mode32 ? 32 : 64;
1716*22dc650dSSadaf Ebrahimi
1717*22dc650dSSadaf Ebrahimi if (cpu_feature_list & CPU_FEATURE_CMOV) {
1718*22dc650dSSadaf Ebrahimi EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, max);
1719*22dc650dSSadaf Ebrahimi FAIL_IF(emit_groupf(compiler, CMOVE_r_rm, dst_r, TMP_REG2, 0));
1720*22dc650dSSadaf Ebrahimi } else
1721*22dc650dSSadaf Ebrahimi FAIL_IF(emit_cmov_generic(compiler, SLJIT_EQUAL, dst_r, SLJIT_IMM, max));
1722*22dc650dSSadaf Ebrahimi
1723*22dc650dSSadaf Ebrahimi if (is_clz) {
1724*22dc650dSSadaf Ebrahimi inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, max >> 1, dst_r, 0);
1725*22dc650dSSadaf Ebrahimi FAIL_IF(!inst);
1726*22dc650dSSadaf Ebrahimi *(inst + 1) |= XOR;
1727*22dc650dSSadaf Ebrahimi }
1728*22dc650dSSadaf Ebrahimi #endif
1729*22dc650dSSadaf Ebrahimi
1730*22dc650dSSadaf Ebrahimi if (dst & SLJIT_MEM)
1731*22dc650dSSadaf Ebrahimi EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1732*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
1733*22dc650dSSadaf Ebrahimi }
1734*22dc650dSSadaf Ebrahimi
emit_bswap(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)1735*22dc650dSSadaf Ebrahimi static sljit_s32 emit_bswap(struct sljit_compiler *compiler,
1736*22dc650dSSadaf Ebrahimi sljit_s32 op,
1737*22dc650dSSadaf Ebrahimi sljit_s32 dst, sljit_sw dstw,
1738*22dc650dSSadaf Ebrahimi sljit_s32 src, sljit_sw srcw)
1739*22dc650dSSadaf Ebrahimi {
1740*22dc650dSSadaf Ebrahimi sljit_u8 *inst;
1741*22dc650dSSadaf Ebrahimi sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
1742*22dc650dSSadaf Ebrahimi sljit_uw size;
1743*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1744*22dc650dSSadaf Ebrahimi sljit_u8 rex = 0;
1745*22dc650dSSadaf Ebrahimi #else /* !SLJIT_CONFIG_X86_64 */
1746*22dc650dSSadaf Ebrahimi sljit_s32 dst_is_ereg = op & SLJIT_32;
1747*22dc650dSSadaf Ebrahimi #endif /* SLJIT_CONFIG_X86_64 */
1748*22dc650dSSadaf Ebrahimi
1749*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1750*22dc650dSSadaf Ebrahimi if (op == SLJIT_REV_U32 || op == SLJIT_REV_S32)
1751*22dc650dSSadaf Ebrahimi compiler->mode32 = 1;
1752*22dc650dSSadaf Ebrahimi #else /* !SLJIT_CONFIG_X86_64 */
1753*22dc650dSSadaf Ebrahimi op &= ~SLJIT_32;
1754*22dc650dSSadaf Ebrahimi #endif /* SLJIT_CONFIG_X86_64 */
1755*22dc650dSSadaf Ebrahimi
1756*22dc650dSSadaf Ebrahimi if (src != dst_r) {
1757*22dc650dSSadaf Ebrahimi /* Only the lower 16 bit is read for eregs. */
1758*22dc650dSSadaf Ebrahimi if (op == SLJIT_REV_U16 || op == SLJIT_REV_S16)
1759*22dc650dSSadaf Ebrahimi FAIL_IF(emit_mov_half(compiler, 0, dst_r, 0, src, srcw));
1760*22dc650dSSadaf Ebrahimi else
1761*22dc650dSSadaf Ebrahimi EMIT_MOV(compiler, dst_r, 0, src, srcw);
1762*22dc650dSSadaf Ebrahimi }
1763*22dc650dSSadaf Ebrahimi
1764*22dc650dSSadaf Ebrahimi size = 2;
1765*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1766*22dc650dSSadaf Ebrahimi if (!compiler->mode32)
1767*22dc650dSSadaf Ebrahimi rex = REX_W;
1768*22dc650dSSadaf Ebrahimi
1769*22dc650dSSadaf Ebrahimi if (reg_map[dst_r] >= 8)
1770*22dc650dSSadaf Ebrahimi rex |= REX_B;
1771*22dc650dSSadaf Ebrahimi
1772*22dc650dSSadaf Ebrahimi if (rex != 0)
1773*22dc650dSSadaf Ebrahimi size++;
1774*22dc650dSSadaf Ebrahimi #endif /* SLJIT_CONFIG_X86_64 */
1775*22dc650dSSadaf Ebrahimi
1776*22dc650dSSadaf Ebrahimi inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
1777*22dc650dSSadaf Ebrahimi FAIL_IF(!inst);
1778*22dc650dSSadaf Ebrahimi INC_SIZE(size);
1779*22dc650dSSadaf Ebrahimi
1780*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1781*22dc650dSSadaf Ebrahimi if (rex != 0)
1782*22dc650dSSadaf Ebrahimi *inst++ = rex;
1783*22dc650dSSadaf Ebrahimi
1784*22dc650dSSadaf Ebrahimi inst[0] = GROUP_0F;
1785*22dc650dSSadaf Ebrahimi inst[1] = BSWAP_r | reg_lmap[dst_r];
1786*22dc650dSSadaf Ebrahimi #else /* !SLJIT_CONFIG_X86_64 */
1787*22dc650dSSadaf Ebrahimi inst[0] = GROUP_0F;
1788*22dc650dSSadaf Ebrahimi inst[1] = BSWAP_r | reg_map[dst_r];
1789*22dc650dSSadaf Ebrahimi #endif /* SLJIT_CONFIG_X86_64 */
1790*22dc650dSSadaf Ebrahimi
1791*22dc650dSSadaf Ebrahimi if (op == SLJIT_REV_U16 || op == SLJIT_REV_S16) {
1792*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1793*22dc650dSSadaf Ebrahimi size = compiler->mode32 ? 16 : 48;
1794*22dc650dSSadaf Ebrahimi #else /* !SLJIT_CONFIG_X86_64 */
1795*22dc650dSSadaf Ebrahimi size = 16;
1796*22dc650dSSadaf Ebrahimi #endif /* SLJIT_CONFIG_X86_64 */
1797*22dc650dSSadaf Ebrahimi
1798*22dc650dSSadaf Ebrahimi inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, (sljit_sw)size, dst_r, 0);
1799*22dc650dSSadaf Ebrahimi FAIL_IF(!inst);
1800*22dc650dSSadaf Ebrahimi if (op == SLJIT_REV_U16)
1801*22dc650dSSadaf Ebrahimi inst[1] |= SHR;
1802*22dc650dSSadaf Ebrahimi else
1803*22dc650dSSadaf Ebrahimi inst[1] |= SAR;
1804*22dc650dSSadaf Ebrahimi }
1805*22dc650dSSadaf Ebrahimi
1806*22dc650dSSadaf Ebrahimi if (dst & SLJIT_MEM) {
1807*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1808*22dc650dSSadaf Ebrahimi if (dst_is_ereg)
1809*22dc650dSSadaf Ebrahimi op = SLJIT_REV;
1810*22dc650dSSadaf Ebrahimi #endif /* SLJIT_CONFIG_X86_32 */
1811*22dc650dSSadaf Ebrahimi if (op == SLJIT_REV_U16 || op == SLJIT_REV_S16)
1812*22dc650dSSadaf Ebrahimi return emit_mov_half(compiler, 0, dst, dstw, TMP_REG1, 0);
1813*22dc650dSSadaf Ebrahimi
1814*22dc650dSSadaf Ebrahimi return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
1815*22dc650dSSadaf Ebrahimi }
1816*22dc650dSSadaf Ebrahimi
1817*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1818*22dc650dSSadaf Ebrahimi if (op == SLJIT_REV_S32) {
1819*22dc650dSSadaf Ebrahimi compiler->mode32 = 0;
1820*22dc650dSSadaf Ebrahimi inst = emit_x86_instruction(compiler, 1, dst, 0, dst, 0);
1821*22dc650dSSadaf Ebrahimi FAIL_IF(!inst);
1822*22dc650dSSadaf Ebrahimi *inst = MOVSXD_r_rm;
1823*22dc650dSSadaf Ebrahimi }
1824*22dc650dSSadaf Ebrahimi #endif /* SLJIT_CONFIG_X86_64 */
1825*22dc650dSSadaf Ebrahimi
1826*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
1827*22dc650dSSadaf Ebrahimi }
1828*22dc650dSSadaf Ebrahimi
sljit_emit_op1(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)1829*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op,
1830*22dc650dSSadaf Ebrahimi sljit_s32 dst, sljit_sw dstw,
1831*22dc650dSSadaf Ebrahimi sljit_s32 src, sljit_sw srcw)
1832*22dc650dSSadaf Ebrahimi {
1833*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1834*22dc650dSSadaf Ebrahimi sljit_s32 dst_is_ereg = 0;
1835*22dc650dSSadaf Ebrahimi #else /* !SLJIT_CONFIG_X86_32 */
1836*22dc650dSSadaf Ebrahimi sljit_s32 op_flags = GET_ALL_FLAGS(op);
1837*22dc650dSSadaf Ebrahimi #endif /* SLJIT_CONFIG_X86_32 */
1838*22dc650dSSadaf Ebrahimi
1839*22dc650dSSadaf Ebrahimi CHECK_ERROR();
1840*22dc650dSSadaf Ebrahimi CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw));
1841*22dc650dSSadaf Ebrahimi ADJUST_LOCAL_OFFSET(dst, dstw);
1842*22dc650dSSadaf Ebrahimi ADJUST_LOCAL_OFFSET(src, srcw);
1843*22dc650dSSadaf Ebrahimi
1844*22dc650dSSadaf Ebrahimi CHECK_EXTRA_REGS(dst, dstw, dst_is_ereg = 1);
1845*22dc650dSSadaf Ebrahimi CHECK_EXTRA_REGS(src, srcw, (void)0);
1846*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1847*22dc650dSSadaf Ebrahimi compiler->mode32 = op_flags & SLJIT_32;
1848*22dc650dSSadaf Ebrahimi #endif /* SLJIT_CONFIG_X86_64 */
1849*22dc650dSSadaf Ebrahimi
1850*22dc650dSSadaf Ebrahimi op = GET_OPCODE(op);
1851*22dc650dSSadaf Ebrahimi
1852*22dc650dSSadaf Ebrahimi if (op >= SLJIT_MOV && op <= SLJIT_MOV_P) {
1853*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1854*22dc650dSSadaf Ebrahimi compiler->mode32 = 0;
1855*22dc650dSSadaf Ebrahimi #endif /* SLJIT_CONFIG_X86_64 */
1856*22dc650dSSadaf Ebrahimi
1857*22dc650dSSadaf Ebrahimi if (FAST_IS_REG(src) && src == dst) {
1858*22dc650dSSadaf Ebrahimi if (!TYPE_CAST_NEEDED(op))
1859*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
1860*22dc650dSSadaf Ebrahimi }
1861*22dc650dSSadaf Ebrahimi
1862*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1863*22dc650dSSadaf Ebrahimi if (op_flags & SLJIT_32) {
1864*22dc650dSSadaf Ebrahimi if (src & SLJIT_MEM) {
1865*22dc650dSSadaf Ebrahimi if (op == SLJIT_MOV_S32)
1866*22dc650dSSadaf Ebrahimi op = SLJIT_MOV_U32;
1867*22dc650dSSadaf Ebrahimi }
1868*22dc650dSSadaf Ebrahimi else if (src == SLJIT_IMM) {
1869*22dc650dSSadaf Ebrahimi if (op == SLJIT_MOV_U32)
1870*22dc650dSSadaf Ebrahimi op = SLJIT_MOV_S32;
1871*22dc650dSSadaf Ebrahimi }
1872*22dc650dSSadaf Ebrahimi }
1873*22dc650dSSadaf Ebrahimi #endif /* SLJIT_CONFIG_X86_64 */
1874*22dc650dSSadaf Ebrahimi
1875*22dc650dSSadaf Ebrahimi if (src == SLJIT_IMM) {
1876*22dc650dSSadaf Ebrahimi switch (op) {
1877*22dc650dSSadaf Ebrahimi case SLJIT_MOV_U8:
1878*22dc650dSSadaf Ebrahimi srcw = (sljit_u8)srcw;
1879*22dc650dSSadaf Ebrahimi break;
1880*22dc650dSSadaf Ebrahimi case SLJIT_MOV_S8:
1881*22dc650dSSadaf Ebrahimi srcw = (sljit_s8)srcw;
1882*22dc650dSSadaf Ebrahimi break;
1883*22dc650dSSadaf Ebrahimi case SLJIT_MOV_U16:
1884*22dc650dSSadaf Ebrahimi srcw = (sljit_u16)srcw;
1885*22dc650dSSadaf Ebrahimi break;
1886*22dc650dSSadaf Ebrahimi case SLJIT_MOV_S16:
1887*22dc650dSSadaf Ebrahimi srcw = (sljit_s16)srcw;
1888*22dc650dSSadaf Ebrahimi break;
1889*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1890*22dc650dSSadaf Ebrahimi case SLJIT_MOV_U32:
1891*22dc650dSSadaf Ebrahimi srcw = (sljit_u32)srcw;
1892*22dc650dSSadaf Ebrahimi break;
1893*22dc650dSSadaf Ebrahimi case SLJIT_MOV_S32:
1894*22dc650dSSadaf Ebrahimi srcw = (sljit_s32)srcw;
1895*22dc650dSSadaf Ebrahimi break;
1896*22dc650dSSadaf Ebrahimi #endif /* SLJIT_CONFIG_X86_64 */
1897*22dc650dSSadaf Ebrahimi }
1898*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1899*22dc650dSSadaf Ebrahimi if (SLJIT_UNLIKELY(dst_is_ereg))
1900*22dc650dSSadaf Ebrahimi return emit_mov(compiler, dst, dstw, src, srcw);
1901*22dc650dSSadaf Ebrahimi #endif /* SLJIT_CONFIG_X86_32 */
1902*22dc650dSSadaf Ebrahimi }
1903*22dc650dSSadaf Ebrahimi
1904*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1905*22dc650dSSadaf Ebrahimi if (SLJIT_UNLIKELY(dst_is_ereg) && (!(op == SLJIT_MOV || op == SLJIT_MOV_U32 || op == SLJIT_MOV_S32 || op == SLJIT_MOV_P) || (src & SLJIT_MEM))) {
1906*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(dst == SLJIT_MEM1(SLJIT_SP));
1907*22dc650dSSadaf Ebrahimi dst = TMP_REG1;
1908*22dc650dSSadaf Ebrahimi }
1909*22dc650dSSadaf Ebrahimi #endif /* SLJIT_CONFIG_X86_32 */
1910*22dc650dSSadaf Ebrahimi
1911*22dc650dSSadaf Ebrahimi switch (op) {
1912*22dc650dSSadaf Ebrahimi case SLJIT_MOV:
1913*22dc650dSSadaf Ebrahimi case SLJIT_MOV_P:
1914*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1915*22dc650dSSadaf Ebrahimi case SLJIT_MOV_U32:
1916*22dc650dSSadaf Ebrahimi case SLJIT_MOV_S32:
1917*22dc650dSSadaf Ebrahimi case SLJIT_MOV32:
1918*22dc650dSSadaf Ebrahimi #endif /* SLJIT_CONFIG_X86_32 */
1919*22dc650dSSadaf Ebrahimi EMIT_MOV(compiler, dst, dstw, src, srcw);
1920*22dc650dSSadaf Ebrahimi break;
1921*22dc650dSSadaf Ebrahimi case SLJIT_MOV_U8:
1922*22dc650dSSadaf Ebrahimi FAIL_IF(emit_mov_byte(compiler, 0, dst, dstw, src, srcw));
1923*22dc650dSSadaf Ebrahimi break;
1924*22dc650dSSadaf Ebrahimi case SLJIT_MOV_S8:
1925*22dc650dSSadaf Ebrahimi FAIL_IF(emit_mov_byte(compiler, 1, dst, dstw, src, srcw));
1926*22dc650dSSadaf Ebrahimi break;
1927*22dc650dSSadaf Ebrahimi case SLJIT_MOV_U16:
1928*22dc650dSSadaf Ebrahimi FAIL_IF(emit_mov_half(compiler, 0, dst, dstw, src, srcw));
1929*22dc650dSSadaf Ebrahimi break;
1930*22dc650dSSadaf Ebrahimi case SLJIT_MOV_S16:
1931*22dc650dSSadaf Ebrahimi FAIL_IF(emit_mov_half(compiler, 1, dst, dstw, src, srcw));
1932*22dc650dSSadaf Ebrahimi break;
1933*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1934*22dc650dSSadaf Ebrahimi case SLJIT_MOV_U32:
1935*22dc650dSSadaf Ebrahimi FAIL_IF(emit_mov_int(compiler, 0, dst, dstw, src, srcw));
1936*22dc650dSSadaf Ebrahimi break;
1937*22dc650dSSadaf Ebrahimi case SLJIT_MOV_S32:
1938*22dc650dSSadaf Ebrahimi FAIL_IF(emit_mov_int(compiler, 1, dst, dstw, src, srcw));
1939*22dc650dSSadaf Ebrahimi break;
1940*22dc650dSSadaf Ebrahimi case SLJIT_MOV32:
1941*22dc650dSSadaf Ebrahimi compiler->mode32 = 1;
1942*22dc650dSSadaf Ebrahimi EMIT_MOV(compiler, dst, dstw, src, srcw);
1943*22dc650dSSadaf Ebrahimi compiler->mode32 = 0;
1944*22dc650dSSadaf Ebrahimi break;
1945*22dc650dSSadaf Ebrahimi #endif /* SLJIT_CONFIG_X86_64 */
1946*22dc650dSSadaf Ebrahimi }
1947*22dc650dSSadaf Ebrahimi
1948*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1949*22dc650dSSadaf Ebrahimi if (SLJIT_UNLIKELY(dst_is_ereg) && dst == TMP_REG1)
1950*22dc650dSSadaf Ebrahimi return emit_mov(compiler, SLJIT_MEM1(SLJIT_SP), dstw, TMP_REG1, 0);
1951*22dc650dSSadaf Ebrahimi #endif /* SLJIT_CONFIG_X86_32 */
1952*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
1953*22dc650dSSadaf Ebrahimi }
1954*22dc650dSSadaf Ebrahimi
1955*22dc650dSSadaf Ebrahimi switch (op) {
1956*22dc650dSSadaf Ebrahimi case SLJIT_CLZ:
1957*22dc650dSSadaf Ebrahimi case SLJIT_CTZ:
1958*22dc650dSSadaf Ebrahimi return emit_clz_ctz(compiler, (op == SLJIT_CLZ), dst, dstw, src, srcw);
1959*22dc650dSSadaf Ebrahimi case SLJIT_REV:
1960*22dc650dSSadaf Ebrahimi case SLJIT_REV_U16:
1961*22dc650dSSadaf Ebrahimi case SLJIT_REV_S16:
1962*22dc650dSSadaf Ebrahimi case SLJIT_REV_U32:
1963*22dc650dSSadaf Ebrahimi case SLJIT_REV_S32:
1964*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1965*22dc650dSSadaf Ebrahimi if (dst_is_ereg)
1966*22dc650dSSadaf Ebrahimi op |= SLJIT_32;
1967*22dc650dSSadaf Ebrahimi #endif /* SLJIT_CONFIG_X86_32 */
1968*22dc650dSSadaf Ebrahimi return emit_bswap(compiler, op, dst, dstw, src, srcw);
1969*22dc650dSSadaf Ebrahimi }
1970*22dc650dSSadaf Ebrahimi
1971*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
1972*22dc650dSSadaf Ebrahimi }
1973*22dc650dSSadaf Ebrahimi
emit_cum_binary(struct sljit_compiler * compiler,sljit_u32 op_types,sljit_s32 dst,sljit_sw dstw,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)1974*22dc650dSSadaf Ebrahimi static sljit_s32 emit_cum_binary(struct sljit_compiler *compiler,
1975*22dc650dSSadaf Ebrahimi sljit_u32 op_types,
1976*22dc650dSSadaf Ebrahimi sljit_s32 dst, sljit_sw dstw,
1977*22dc650dSSadaf Ebrahimi sljit_s32 src1, sljit_sw src1w,
1978*22dc650dSSadaf Ebrahimi sljit_s32 src2, sljit_sw src2w)
1979*22dc650dSSadaf Ebrahimi {
1980*22dc650dSSadaf Ebrahimi sljit_u8* inst;
1981*22dc650dSSadaf Ebrahimi sljit_u8 op_eax_imm = U8(op_types >> 24);
1982*22dc650dSSadaf Ebrahimi sljit_u8 op_rm = U8((op_types >> 16) & 0xff);
1983*22dc650dSSadaf Ebrahimi sljit_u8 op_mr = U8((op_types >> 8) & 0xff);
1984*22dc650dSSadaf Ebrahimi sljit_u8 op_imm = U8(op_types & 0xff);
1985*22dc650dSSadaf Ebrahimi
1986*22dc650dSSadaf Ebrahimi if (dst == src1 && dstw == src1w) {
1987*22dc650dSSadaf Ebrahimi if (src2 == SLJIT_IMM) {
1988*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1989*22dc650dSSadaf Ebrahimi if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1990*22dc650dSSadaf Ebrahimi #else
1991*22dc650dSSadaf Ebrahimi if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128)) {
1992*22dc650dSSadaf Ebrahimi #endif
1993*22dc650dSSadaf Ebrahimi BINARY_EAX_IMM(op_eax_imm, src2w);
1994*22dc650dSSadaf Ebrahimi }
1995*22dc650dSSadaf Ebrahimi else {
1996*22dc650dSSadaf Ebrahimi BINARY_IMM(op_imm, op_mr, src2w, dst, dstw);
1997*22dc650dSSadaf Ebrahimi }
1998*22dc650dSSadaf Ebrahimi }
1999*22dc650dSSadaf Ebrahimi else if (FAST_IS_REG(dst)) {
2000*22dc650dSSadaf Ebrahimi inst = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w);
2001*22dc650dSSadaf Ebrahimi FAIL_IF(!inst);
2002*22dc650dSSadaf Ebrahimi *inst = op_rm;
2003*22dc650dSSadaf Ebrahimi }
2004*22dc650dSSadaf Ebrahimi else if (FAST_IS_REG(src2)) {
2005*22dc650dSSadaf Ebrahimi /* Special exception for sljit_emit_op_flags. */
2006*22dc650dSSadaf Ebrahimi inst = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw);
2007*22dc650dSSadaf Ebrahimi FAIL_IF(!inst);
2008*22dc650dSSadaf Ebrahimi *inst = op_mr;
2009*22dc650dSSadaf Ebrahimi }
2010*22dc650dSSadaf Ebrahimi else {
2011*22dc650dSSadaf Ebrahimi EMIT_MOV(compiler, TMP_REG1, 0, src2, src2w);
2012*22dc650dSSadaf Ebrahimi inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
2013*22dc650dSSadaf Ebrahimi FAIL_IF(!inst);
2014*22dc650dSSadaf Ebrahimi *inst = op_mr;
2015*22dc650dSSadaf Ebrahimi }
2016*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
2017*22dc650dSSadaf Ebrahimi }
2018*22dc650dSSadaf Ebrahimi
2019*22dc650dSSadaf Ebrahimi /* Only for cumulative operations. */
2020*22dc650dSSadaf Ebrahimi if (dst == src2 && dstw == src2w) {
2021*22dc650dSSadaf Ebrahimi if (src1 == SLJIT_IMM) {
2022*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2023*22dc650dSSadaf Ebrahimi if ((dst == SLJIT_R0) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) {
2024*22dc650dSSadaf Ebrahimi #else
2025*22dc650dSSadaf Ebrahimi if ((dst == SLJIT_R0) && (src1w > 127 || src1w < -128)) {
2026*22dc650dSSadaf Ebrahimi #endif
2027*22dc650dSSadaf Ebrahimi BINARY_EAX_IMM(op_eax_imm, src1w);
2028*22dc650dSSadaf Ebrahimi }
2029*22dc650dSSadaf Ebrahimi else {
2030*22dc650dSSadaf Ebrahimi BINARY_IMM(op_imm, op_mr, src1w, dst, dstw);
2031*22dc650dSSadaf Ebrahimi }
2032*22dc650dSSadaf Ebrahimi }
2033*22dc650dSSadaf Ebrahimi else if (FAST_IS_REG(dst)) {
2034*22dc650dSSadaf Ebrahimi inst = emit_x86_instruction(compiler, 1, dst, dstw, src1, src1w);
2035*22dc650dSSadaf Ebrahimi FAIL_IF(!inst);
2036*22dc650dSSadaf Ebrahimi *inst = op_rm;
2037*22dc650dSSadaf Ebrahimi }
2038*22dc650dSSadaf Ebrahimi else if (FAST_IS_REG(src1)) {
2039*22dc650dSSadaf Ebrahimi inst = emit_x86_instruction(compiler, 1, src1, src1w, dst, dstw);
2040*22dc650dSSadaf Ebrahimi FAIL_IF(!inst);
2041*22dc650dSSadaf Ebrahimi *inst = op_mr;
2042*22dc650dSSadaf Ebrahimi }
2043*22dc650dSSadaf Ebrahimi else {
2044*22dc650dSSadaf Ebrahimi EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2045*22dc650dSSadaf Ebrahimi inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
2046*22dc650dSSadaf Ebrahimi FAIL_IF(!inst);
2047*22dc650dSSadaf Ebrahimi *inst = op_mr;
2048*22dc650dSSadaf Ebrahimi }
2049*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
2050*22dc650dSSadaf Ebrahimi }
2051*22dc650dSSadaf Ebrahimi
2052*22dc650dSSadaf Ebrahimi /* General version. */
2053*22dc650dSSadaf Ebrahimi if (FAST_IS_REG(dst)) {
2054*22dc650dSSadaf Ebrahimi EMIT_MOV(compiler, dst, 0, src1, src1w);
2055*22dc650dSSadaf Ebrahimi if (src2 == SLJIT_IMM) {
2056*22dc650dSSadaf Ebrahimi BINARY_IMM(op_imm, op_mr, src2w, dst, 0);
2057*22dc650dSSadaf Ebrahimi }
2058*22dc650dSSadaf Ebrahimi else {
2059*22dc650dSSadaf Ebrahimi inst = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w);
2060*22dc650dSSadaf Ebrahimi FAIL_IF(!inst);
2061*22dc650dSSadaf Ebrahimi *inst = op_rm;
2062*22dc650dSSadaf Ebrahimi }
2063*22dc650dSSadaf Ebrahimi }
2064*22dc650dSSadaf Ebrahimi else {
2065*22dc650dSSadaf Ebrahimi /* This version requires less memory writing. */
2066*22dc650dSSadaf Ebrahimi EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2067*22dc650dSSadaf Ebrahimi if (src2 == SLJIT_IMM) {
2068*22dc650dSSadaf Ebrahimi BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
2069*22dc650dSSadaf Ebrahimi }
2070*22dc650dSSadaf Ebrahimi else {
2071*22dc650dSSadaf Ebrahimi inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
2072*22dc650dSSadaf Ebrahimi FAIL_IF(!inst);
2073*22dc650dSSadaf Ebrahimi *inst = op_rm;
2074*22dc650dSSadaf Ebrahimi }
2075*22dc650dSSadaf Ebrahimi EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
2076*22dc650dSSadaf Ebrahimi }
2077*22dc650dSSadaf Ebrahimi
2078*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
2079*22dc650dSSadaf Ebrahimi }
2080*22dc650dSSadaf Ebrahimi
2081*22dc650dSSadaf Ebrahimi static sljit_s32 emit_non_cum_binary(struct sljit_compiler *compiler,
2082*22dc650dSSadaf Ebrahimi sljit_u32 op_types,
2083*22dc650dSSadaf Ebrahimi sljit_s32 dst, sljit_sw dstw,
2084*22dc650dSSadaf Ebrahimi sljit_s32 src1, sljit_sw src1w,
2085*22dc650dSSadaf Ebrahimi sljit_s32 src2, sljit_sw src2w)
2086*22dc650dSSadaf Ebrahimi {
2087*22dc650dSSadaf Ebrahimi sljit_u8* inst;
2088*22dc650dSSadaf Ebrahimi sljit_u8 op_eax_imm = U8(op_types >> 24);
2089*22dc650dSSadaf Ebrahimi sljit_u8 op_rm = U8((op_types >> 16) & 0xff);
2090*22dc650dSSadaf Ebrahimi sljit_u8 op_mr = U8((op_types >> 8) & 0xff);
2091*22dc650dSSadaf Ebrahimi sljit_u8 op_imm = U8(op_types & 0xff);
2092*22dc650dSSadaf Ebrahimi
2093*22dc650dSSadaf Ebrahimi if (dst == src1 && dstw == src1w) {
2094*22dc650dSSadaf Ebrahimi if (src2 == SLJIT_IMM) {
2095*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2096*22dc650dSSadaf Ebrahimi if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
2097*22dc650dSSadaf Ebrahimi #else
2098*22dc650dSSadaf Ebrahimi if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128)) {
2099*22dc650dSSadaf Ebrahimi #endif
2100*22dc650dSSadaf Ebrahimi BINARY_EAX_IMM(op_eax_imm, src2w);
2101*22dc650dSSadaf Ebrahimi }
2102*22dc650dSSadaf Ebrahimi else {
2103*22dc650dSSadaf Ebrahimi BINARY_IMM(op_imm, op_mr, src2w, dst, dstw);
2104*22dc650dSSadaf Ebrahimi }
2105*22dc650dSSadaf Ebrahimi }
2106*22dc650dSSadaf Ebrahimi else if (FAST_IS_REG(dst)) {
2107*22dc650dSSadaf Ebrahimi inst = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w);
2108*22dc650dSSadaf Ebrahimi FAIL_IF(!inst);
2109*22dc650dSSadaf Ebrahimi *inst = op_rm;
2110*22dc650dSSadaf Ebrahimi }
2111*22dc650dSSadaf Ebrahimi else if (FAST_IS_REG(src2)) {
2112*22dc650dSSadaf Ebrahimi inst = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw);
2113*22dc650dSSadaf Ebrahimi FAIL_IF(!inst);
2114*22dc650dSSadaf Ebrahimi *inst = op_mr;
2115*22dc650dSSadaf Ebrahimi }
2116*22dc650dSSadaf Ebrahimi else {
2117*22dc650dSSadaf Ebrahimi EMIT_MOV(compiler, TMP_REG1, 0, src2, src2w);
2118*22dc650dSSadaf Ebrahimi inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
2119*22dc650dSSadaf Ebrahimi FAIL_IF(!inst);
2120*22dc650dSSadaf Ebrahimi *inst = op_mr;
2121*22dc650dSSadaf Ebrahimi }
2122*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
2123*22dc650dSSadaf Ebrahimi }
2124*22dc650dSSadaf Ebrahimi
2125*22dc650dSSadaf Ebrahimi /* General version. */
2126*22dc650dSSadaf Ebrahimi if (FAST_IS_REG(dst) && dst != src2) {
2127*22dc650dSSadaf Ebrahimi EMIT_MOV(compiler, dst, 0, src1, src1w);
2128*22dc650dSSadaf Ebrahimi if (src2 == SLJIT_IMM) {
2129*22dc650dSSadaf Ebrahimi BINARY_IMM(op_imm, op_mr, src2w, dst, 0);
2130*22dc650dSSadaf Ebrahimi }
2131*22dc650dSSadaf Ebrahimi else {
2132*22dc650dSSadaf Ebrahimi inst = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w);
2133*22dc650dSSadaf Ebrahimi FAIL_IF(!inst);
2134*22dc650dSSadaf Ebrahimi *inst = op_rm;
2135*22dc650dSSadaf Ebrahimi }
2136*22dc650dSSadaf Ebrahimi }
2137*22dc650dSSadaf Ebrahimi else {
2138*22dc650dSSadaf Ebrahimi /* This version requires less memory writing. */
2139*22dc650dSSadaf Ebrahimi EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2140*22dc650dSSadaf Ebrahimi if (src2 == SLJIT_IMM) {
2141*22dc650dSSadaf Ebrahimi BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
2142*22dc650dSSadaf Ebrahimi }
2143*22dc650dSSadaf Ebrahimi else {
2144*22dc650dSSadaf Ebrahimi inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
2145*22dc650dSSadaf Ebrahimi FAIL_IF(!inst);
2146*22dc650dSSadaf Ebrahimi *inst = op_rm;
2147*22dc650dSSadaf Ebrahimi }
2148*22dc650dSSadaf Ebrahimi EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
2149*22dc650dSSadaf Ebrahimi }
2150*22dc650dSSadaf Ebrahimi
2151*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
2152*22dc650dSSadaf Ebrahimi }
2153*22dc650dSSadaf Ebrahimi
2154*22dc650dSSadaf Ebrahimi static sljit_s32 emit_mul(struct sljit_compiler *compiler,
2155*22dc650dSSadaf Ebrahimi sljit_s32 dst, sljit_sw dstw,
2156*22dc650dSSadaf Ebrahimi sljit_s32 src1, sljit_sw src1w,
2157*22dc650dSSadaf Ebrahimi sljit_s32 src2, sljit_sw src2w)
2158*22dc650dSSadaf Ebrahimi {
2159*22dc650dSSadaf Ebrahimi sljit_u8* inst;
2160*22dc650dSSadaf Ebrahimi sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
2161*22dc650dSSadaf Ebrahimi
2162*22dc650dSSadaf Ebrahimi /* Register destination. */
2163*22dc650dSSadaf Ebrahimi if (dst_r == src1 && src2 != SLJIT_IMM) {
2164*22dc650dSSadaf Ebrahimi FAIL_IF(emit_groupf(compiler, IMUL_r_rm, dst_r, src2, src2w));
2165*22dc650dSSadaf Ebrahimi } else if (dst_r == src2 && src1 != SLJIT_IMM) {
2166*22dc650dSSadaf Ebrahimi FAIL_IF(emit_groupf(compiler, IMUL_r_rm, dst_r, src1, src1w));
2167*22dc650dSSadaf Ebrahimi } else if (src1 == SLJIT_IMM) {
2168*22dc650dSSadaf Ebrahimi if (src2 == SLJIT_IMM) {
2169*22dc650dSSadaf Ebrahimi EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, src2w);
2170*22dc650dSSadaf Ebrahimi src2 = dst_r;
2171*22dc650dSSadaf Ebrahimi src2w = 0;
2172*22dc650dSSadaf Ebrahimi }
2173*22dc650dSSadaf Ebrahimi
2174*22dc650dSSadaf Ebrahimi if (src1w <= 127 && src1w >= -128) {
2175*22dc650dSSadaf Ebrahimi inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
2176*22dc650dSSadaf Ebrahimi FAIL_IF(!inst);
2177*22dc650dSSadaf Ebrahimi *inst = IMUL_r_rm_i8;
2178*22dc650dSSadaf Ebrahimi
2179*22dc650dSSadaf Ebrahimi FAIL_IF(emit_byte(compiler, U8(src1w)));
2180*22dc650dSSadaf Ebrahimi }
2181*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2182*22dc650dSSadaf Ebrahimi else {
2183*22dc650dSSadaf Ebrahimi inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
2184*22dc650dSSadaf Ebrahimi FAIL_IF(!inst);
2185*22dc650dSSadaf Ebrahimi *inst = IMUL_r_rm_i32;
2186*22dc650dSSadaf Ebrahimi inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
2187*22dc650dSSadaf Ebrahimi FAIL_IF(!inst);
2188*22dc650dSSadaf Ebrahimi INC_SIZE(4);
2189*22dc650dSSadaf Ebrahimi sljit_unaligned_store_sw(inst, src1w);
2190*22dc650dSSadaf Ebrahimi }
2191*22dc650dSSadaf Ebrahimi #else
2192*22dc650dSSadaf Ebrahimi else if (IS_HALFWORD(src1w)) {
2193*22dc650dSSadaf Ebrahimi inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
2194*22dc650dSSadaf Ebrahimi FAIL_IF(!inst);
2195*22dc650dSSadaf Ebrahimi *inst = IMUL_r_rm_i32;
2196*22dc650dSSadaf Ebrahimi inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
2197*22dc650dSSadaf Ebrahimi FAIL_IF(!inst);
2198*22dc650dSSadaf Ebrahimi INC_SIZE(4);
2199*22dc650dSSadaf Ebrahimi sljit_unaligned_store_s32(inst, (sljit_s32)src1w);
2200*22dc650dSSadaf Ebrahimi }
2201*22dc650dSSadaf Ebrahimi else {
2202*22dc650dSSadaf Ebrahimi if (dst_r != src2)
2203*22dc650dSSadaf Ebrahimi EMIT_MOV(compiler, dst_r, 0, src2, src2w);
2204*22dc650dSSadaf Ebrahimi FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src1w));
2205*22dc650dSSadaf Ebrahimi FAIL_IF(emit_groupf(compiler, IMUL_r_rm, dst_r, TMP_REG2, 0));
2206*22dc650dSSadaf Ebrahimi }
2207*22dc650dSSadaf Ebrahimi #endif
2208*22dc650dSSadaf Ebrahimi }
2209*22dc650dSSadaf Ebrahimi else if (src2 == SLJIT_IMM) {
2210*22dc650dSSadaf Ebrahimi /* Note: src1 is NOT immediate. */
2211*22dc650dSSadaf Ebrahimi
2212*22dc650dSSadaf Ebrahimi if (src2w <= 127 && src2w >= -128) {
2213*22dc650dSSadaf Ebrahimi inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
2214*22dc650dSSadaf Ebrahimi FAIL_IF(!inst);
2215*22dc650dSSadaf Ebrahimi *inst = IMUL_r_rm_i8;
2216*22dc650dSSadaf Ebrahimi
2217*22dc650dSSadaf Ebrahimi FAIL_IF(emit_byte(compiler, U8(src2w)));
2218*22dc650dSSadaf Ebrahimi }
2219*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2220*22dc650dSSadaf Ebrahimi else {
2221*22dc650dSSadaf Ebrahimi inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
2222*22dc650dSSadaf Ebrahimi FAIL_IF(!inst);
2223*22dc650dSSadaf Ebrahimi *inst = IMUL_r_rm_i32;
2224*22dc650dSSadaf Ebrahimi
2225*22dc650dSSadaf Ebrahimi inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
2226*22dc650dSSadaf Ebrahimi FAIL_IF(!inst);
2227*22dc650dSSadaf Ebrahimi INC_SIZE(4);
2228*22dc650dSSadaf Ebrahimi sljit_unaligned_store_sw(inst, src2w);
2229*22dc650dSSadaf Ebrahimi }
2230*22dc650dSSadaf Ebrahimi #else
2231*22dc650dSSadaf Ebrahimi else if (IS_HALFWORD(src2w)) {
2232*22dc650dSSadaf Ebrahimi inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
2233*22dc650dSSadaf Ebrahimi FAIL_IF(!inst);
2234*22dc650dSSadaf Ebrahimi *inst = IMUL_r_rm_i32;
2235*22dc650dSSadaf Ebrahimi
2236*22dc650dSSadaf Ebrahimi inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
2237*22dc650dSSadaf Ebrahimi FAIL_IF(!inst);
2238*22dc650dSSadaf Ebrahimi INC_SIZE(4);
2239*22dc650dSSadaf Ebrahimi sljit_unaligned_store_s32(inst, (sljit_s32)src2w);
2240*22dc650dSSadaf Ebrahimi } else {
2241*22dc650dSSadaf Ebrahimi if (dst_r != src1)
2242*22dc650dSSadaf Ebrahimi EMIT_MOV(compiler, dst_r, 0, src1, src1w);
2243*22dc650dSSadaf Ebrahimi FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w));
2244*22dc650dSSadaf Ebrahimi FAIL_IF(emit_groupf(compiler, IMUL_r_rm, dst_r, TMP_REG2, 0));
2245*22dc650dSSadaf Ebrahimi }
2246*22dc650dSSadaf Ebrahimi #endif
2247*22dc650dSSadaf Ebrahimi } else {
2248*22dc650dSSadaf Ebrahimi /* Neither argument is immediate. */
2249*22dc650dSSadaf Ebrahimi if (ADDRESSING_DEPENDS_ON(src2, dst_r))
2250*22dc650dSSadaf Ebrahimi dst_r = TMP_REG1;
2251*22dc650dSSadaf Ebrahimi EMIT_MOV(compiler, dst_r, 0, src1, src1w);
2252*22dc650dSSadaf Ebrahimi FAIL_IF(emit_groupf(compiler, IMUL_r_rm, dst_r, src2, src2w));
2253*22dc650dSSadaf Ebrahimi }
2254*22dc650dSSadaf Ebrahimi
2255*22dc650dSSadaf Ebrahimi if (dst & SLJIT_MEM)
2256*22dc650dSSadaf Ebrahimi EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
2257*22dc650dSSadaf Ebrahimi
2258*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
2259*22dc650dSSadaf Ebrahimi }
2260*22dc650dSSadaf Ebrahimi
2261*22dc650dSSadaf Ebrahimi static sljit_s32 emit_lea_binary(struct sljit_compiler *compiler,
2262*22dc650dSSadaf Ebrahimi sljit_s32 dst, sljit_sw dstw,
2263*22dc650dSSadaf Ebrahimi sljit_s32 src1, sljit_sw src1w,
2264*22dc650dSSadaf Ebrahimi sljit_s32 src2, sljit_sw src2w)
2265*22dc650dSSadaf Ebrahimi {
2266*22dc650dSSadaf Ebrahimi sljit_u8* inst;
2267*22dc650dSSadaf Ebrahimi sljit_s32 dst_r, done = 0;
2268*22dc650dSSadaf Ebrahimi
2269*22dc650dSSadaf Ebrahimi /* These cases better be left to handled by normal way. */
2270*22dc650dSSadaf Ebrahimi if (dst == src1 && dstw == src1w)
2271*22dc650dSSadaf Ebrahimi return SLJIT_ERR_UNSUPPORTED;
2272*22dc650dSSadaf Ebrahimi if (dst == src2 && dstw == src2w)
2273*22dc650dSSadaf Ebrahimi return SLJIT_ERR_UNSUPPORTED;
2274*22dc650dSSadaf Ebrahimi
2275*22dc650dSSadaf Ebrahimi dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
2276*22dc650dSSadaf Ebrahimi
2277*22dc650dSSadaf Ebrahimi if (FAST_IS_REG(src1)) {
2278*22dc650dSSadaf Ebrahimi if (FAST_IS_REG(src2)) {
2279*22dc650dSSadaf Ebrahimi inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM2(src1, src2), 0);
2280*22dc650dSSadaf Ebrahimi FAIL_IF(!inst);
2281*22dc650dSSadaf Ebrahimi *inst = LEA_r_m;
2282*22dc650dSSadaf Ebrahimi done = 1;
2283*22dc650dSSadaf Ebrahimi }
2284*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2285*22dc650dSSadaf Ebrahimi if (src2 == SLJIT_IMM && (compiler->mode32 || IS_HALFWORD(src2w))) {
2286*22dc650dSSadaf Ebrahimi inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), (sljit_s32)src2w);
2287*22dc650dSSadaf Ebrahimi #else
2288*22dc650dSSadaf Ebrahimi if (src2 == SLJIT_IMM) {
2289*22dc650dSSadaf Ebrahimi inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), src2w);
2290*22dc650dSSadaf Ebrahimi #endif
2291*22dc650dSSadaf Ebrahimi FAIL_IF(!inst);
2292*22dc650dSSadaf Ebrahimi *inst = LEA_r_m;
2293*22dc650dSSadaf Ebrahimi done = 1;
2294*22dc650dSSadaf Ebrahimi }
2295*22dc650dSSadaf Ebrahimi }
2296*22dc650dSSadaf Ebrahimi else if (FAST_IS_REG(src2)) {
2297*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2298*22dc650dSSadaf Ebrahimi if (src1 == SLJIT_IMM && (compiler->mode32 || IS_HALFWORD(src1w))) {
2299*22dc650dSSadaf Ebrahimi inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), (sljit_s32)src1w);
2300*22dc650dSSadaf Ebrahimi #else
2301*22dc650dSSadaf Ebrahimi if (src1 == SLJIT_IMM) {
2302*22dc650dSSadaf Ebrahimi inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), src1w);
2303*22dc650dSSadaf Ebrahimi #endif
2304*22dc650dSSadaf Ebrahimi FAIL_IF(!inst);
2305*22dc650dSSadaf Ebrahimi *inst = LEA_r_m;
2306*22dc650dSSadaf Ebrahimi done = 1;
2307*22dc650dSSadaf Ebrahimi }
2308*22dc650dSSadaf Ebrahimi }
2309*22dc650dSSadaf Ebrahimi
2310*22dc650dSSadaf Ebrahimi if (done) {
2311*22dc650dSSadaf Ebrahimi if (dst_r == TMP_REG1)
2312*22dc650dSSadaf Ebrahimi return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
2313*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
2314*22dc650dSSadaf Ebrahimi }
2315*22dc650dSSadaf Ebrahimi return SLJIT_ERR_UNSUPPORTED;
2316*22dc650dSSadaf Ebrahimi }
2317*22dc650dSSadaf Ebrahimi
2318*22dc650dSSadaf Ebrahimi static sljit_s32 emit_cmp_binary(struct sljit_compiler *compiler,
2319*22dc650dSSadaf Ebrahimi sljit_s32 src1, sljit_sw src1w,
2320*22dc650dSSadaf Ebrahimi sljit_s32 src2, sljit_sw src2w)
2321*22dc650dSSadaf Ebrahimi {
2322*22dc650dSSadaf Ebrahimi sljit_u8* inst;
2323*22dc650dSSadaf Ebrahimi
2324*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2325*22dc650dSSadaf Ebrahimi if (src1 == SLJIT_R0 && src2 == SLJIT_IMM && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
2326*22dc650dSSadaf Ebrahimi #else
2327*22dc650dSSadaf Ebrahimi if (src1 == SLJIT_R0 && src2 == SLJIT_IMM && (src2w > 127 || src2w < -128)) {
2328*22dc650dSSadaf Ebrahimi #endif
2329*22dc650dSSadaf Ebrahimi BINARY_EAX_IMM(CMP_EAX_i32, src2w);
2330*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
2331*22dc650dSSadaf Ebrahimi }
2332*22dc650dSSadaf Ebrahimi
2333*22dc650dSSadaf Ebrahimi if (FAST_IS_REG(src1)) {
2334*22dc650dSSadaf Ebrahimi if (src2 == SLJIT_IMM) {
2335*22dc650dSSadaf Ebrahimi BINARY_IMM(CMP, CMP_rm_r, src2w, src1, 0);
2336*22dc650dSSadaf Ebrahimi }
2337*22dc650dSSadaf Ebrahimi else {
2338*22dc650dSSadaf Ebrahimi inst = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w);
2339*22dc650dSSadaf Ebrahimi FAIL_IF(!inst);
2340*22dc650dSSadaf Ebrahimi *inst = CMP_r_rm;
2341*22dc650dSSadaf Ebrahimi }
2342*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
2343*22dc650dSSadaf Ebrahimi }
2344*22dc650dSSadaf Ebrahimi
2345*22dc650dSSadaf Ebrahimi if (FAST_IS_REG(src2) && src1 != SLJIT_IMM) {
2346*22dc650dSSadaf Ebrahimi inst = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w);
2347*22dc650dSSadaf Ebrahimi FAIL_IF(!inst);
2348*22dc650dSSadaf Ebrahimi *inst = CMP_rm_r;
2349*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
2350*22dc650dSSadaf Ebrahimi }
2351*22dc650dSSadaf Ebrahimi
2352*22dc650dSSadaf Ebrahimi if (src2 == SLJIT_IMM) {
2353*22dc650dSSadaf Ebrahimi if (src1 == SLJIT_IMM) {
2354*22dc650dSSadaf Ebrahimi EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2355*22dc650dSSadaf Ebrahimi src1 = TMP_REG1;
2356*22dc650dSSadaf Ebrahimi src1w = 0;
2357*22dc650dSSadaf Ebrahimi }
2358*22dc650dSSadaf Ebrahimi BINARY_IMM(CMP, CMP_rm_r, src2w, src1, src1w);
2359*22dc650dSSadaf Ebrahimi }
2360*22dc650dSSadaf Ebrahimi else {
2361*22dc650dSSadaf Ebrahimi EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2362*22dc650dSSadaf Ebrahimi inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
2363*22dc650dSSadaf Ebrahimi FAIL_IF(!inst);
2364*22dc650dSSadaf Ebrahimi *inst = CMP_r_rm;
2365*22dc650dSSadaf Ebrahimi }
2366*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
2367*22dc650dSSadaf Ebrahimi }
2368*22dc650dSSadaf Ebrahimi
2369*22dc650dSSadaf Ebrahimi static sljit_s32 emit_test_binary(struct sljit_compiler *compiler,
2370*22dc650dSSadaf Ebrahimi sljit_s32 src1, sljit_sw src1w,
2371*22dc650dSSadaf Ebrahimi sljit_s32 src2, sljit_sw src2w)
2372*22dc650dSSadaf Ebrahimi {
2373*22dc650dSSadaf Ebrahimi sljit_u8* inst;
2374*22dc650dSSadaf Ebrahimi
2375*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2376*22dc650dSSadaf Ebrahimi if (src1 == SLJIT_R0 && src2 == SLJIT_IMM && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
2377*22dc650dSSadaf Ebrahimi #else
2378*22dc650dSSadaf Ebrahimi if (src1 == SLJIT_R0 && src2 == SLJIT_IMM && (src2w > 127 || src2w < -128)) {
2379*22dc650dSSadaf Ebrahimi #endif
2380*22dc650dSSadaf Ebrahimi BINARY_EAX_IMM(TEST_EAX_i32, src2w);
2381*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
2382*22dc650dSSadaf Ebrahimi }
2383*22dc650dSSadaf Ebrahimi
2384*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2385*22dc650dSSadaf Ebrahimi if (src2 == SLJIT_R0 && src1 == SLJIT_IMM && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) {
2386*22dc650dSSadaf Ebrahimi #else
2387*22dc650dSSadaf Ebrahimi if (src2 == SLJIT_R0 && src1 == SLJIT_IMM && (src1w > 127 || src1w < -128)) {
2388*22dc650dSSadaf Ebrahimi #endif
2389*22dc650dSSadaf Ebrahimi BINARY_EAX_IMM(TEST_EAX_i32, src1w);
2390*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
2391*22dc650dSSadaf Ebrahimi }
2392*22dc650dSSadaf Ebrahimi
2393*22dc650dSSadaf Ebrahimi if (src1 != SLJIT_IMM) {
2394*22dc650dSSadaf Ebrahimi if (src2 == SLJIT_IMM) {
2395*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2396*22dc650dSSadaf Ebrahimi if (IS_HALFWORD(src2w) || compiler->mode32) {
2397*22dc650dSSadaf Ebrahimi inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, src1w);
2398*22dc650dSSadaf Ebrahimi FAIL_IF(!inst);
2399*22dc650dSSadaf Ebrahimi *inst = GROUP_F7;
2400*22dc650dSSadaf Ebrahimi } else {
2401*22dc650dSSadaf Ebrahimi FAIL_IF(emit_load_imm64(compiler, FAST_IS_REG(src1) ? TMP_REG2 : TMP_REG1, src2w));
2402*22dc650dSSadaf Ebrahimi inst = emit_x86_instruction(compiler, 1, FAST_IS_REG(src1) ? TMP_REG2 : TMP_REG1, 0, src1, src1w);
2403*22dc650dSSadaf Ebrahimi FAIL_IF(!inst);
2404*22dc650dSSadaf Ebrahimi *inst = TEST_rm_r;
2405*22dc650dSSadaf Ebrahimi }
2406*22dc650dSSadaf Ebrahimi #else
2407*22dc650dSSadaf Ebrahimi inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, src1w);
2408*22dc650dSSadaf Ebrahimi FAIL_IF(!inst);
2409*22dc650dSSadaf Ebrahimi *inst = GROUP_F7;
2410*22dc650dSSadaf Ebrahimi #endif
2411*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
2412*22dc650dSSadaf Ebrahimi }
2413*22dc650dSSadaf Ebrahimi else if (FAST_IS_REG(src1)) {
2414*22dc650dSSadaf Ebrahimi inst = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w);
2415*22dc650dSSadaf Ebrahimi FAIL_IF(!inst);
2416*22dc650dSSadaf Ebrahimi *inst = TEST_rm_r;
2417*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
2418*22dc650dSSadaf Ebrahimi }
2419*22dc650dSSadaf Ebrahimi }
2420*22dc650dSSadaf Ebrahimi
2421*22dc650dSSadaf Ebrahimi if (src2 != SLJIT_IMM) {
2422*22dc650dSSadaf Ebrahimi if (src1 == SLJIT_IMM) {
2423*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2424*22dc650dSSadaf Ebrahimi if (IS_HALFWORD(src1w) || compiler->mode32) {
2425*22dc650dSSadaf Ebrahimi inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src1w, src2, src2w);
2426*22dc650dSSadaf Ebrahimi FAIL_IF(!inst);
2427*22dc650dSSadaf Ebrahimi *inst = GROUP_F7;
2428*22dc650dSSadaf Ebrahimi }
2429*22dc650dSSadaf Ebrahimi else {
2430*22dc650dSSadaf Ebrahimi FAIL_IF(emit_load_imm64(compiler, TMP_REG1, src1w));
2431*22dc650dSSadaf Ebrahimi inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
2432*22dc650dSSadaf Ebrahimi FAIL_IF(!inst);
2433*22dc650dSSadaf Ebrahimi *inst = TEST_rm_r;
2434*22dc650dSSadaf Ebrahimi }
2435*22dc650dSSadaf Ebrahimi #else
2436*22dc650dSSadaf Ebrahimi inst = emit_x86_instruction(compiler, 1, src1, src1w, src2, src2w);
2437*22dc650dSSadaf Ebrahimi FAIL_IF(!inst);
2438*22dc650dSSadaf Ebrahimi *inst = GROUP_F7;
2439*22dc650dSSadaf Ebrahimi #endif
2440*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
2441*22dc650dSSadaf Ebrahimi }
2442*22dc650dSSadaf Ebrahimi else if (FAST_IS_REG(src2)) {
2443*22dc650dSSadaf Ebrahimi inst = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w);
2444*22dc650dSSadaf Ebrahimi FAIL_IF(!inst);
2445*22dc650dSSadaf Ebrahimi *inst = TEST_rm_r;
2446*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
2447*22dc650dSSadaf Ebrahimi }
2448*22dc650dSSadaf Ebrahimi }
2449*22dc650dSSadaf Ebrahimi
2450*22dc650dSSadaf Ebrahimi EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2451*22dc650dSSadaf Ebrahimi if (src2 == SLJIT_IMM) {
2452*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2453*22dc650dSSadaf Ebrahimi if (IS_HALFWORD(src2w) || compiler->mode32) {
2454*22dc650dSSadaf Ebrahimi inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REG1, 0);
2455*22dc650dSSadaf Ebrahimi FAIL_IF(!inst);
2456*22dc650dSSadaf Ebrahimi *inst = GROUP_F7;
2457*22dc650dSSadaf Ebrahimi }
2458*22dc650dSSadaf Ebrahimi else {
2459*22dc650dSSadaf Ebrahimi FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w));
2460*22dc650dSSadaf Ebrahimi inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, TMP_REG1, 0);
2461*22dc650dSSadaf Ebrahimi FAIL_IF(!inst);
2462*22dc650dSSadaf Ebrahimi *inst = TEST_rm_r;
2463*22dc650dSSadaf Ebrahimi }
2464*22dc650dSSadaf Ebrahimi #else
2465*22dc650dSSadaf Ebrahimi inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REG1, 0);
2466*22dc650dSSadaf Ebrahimi FAIL_IF(!inst);
2467*22dc650dSSadaf Ebrahimi *inst = GROUP_F7;
2468*22dc650dSSadaf Ebrahimi #endif
2469*22dc650dSSadaf Ebrahimi }
2470*22dc650dSSadaf Ebrahimi else {
2471*22dc650dSSadaf Ebrahimi inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
2472*22dc650dSSadaf Ebrahimi FAIL_IF(!inst);
2473*22dc650dSSadaf Ebrahimi *inst = TEST_rm_r;
2474*22dc650dSSadaf Ebrahimi }
2475*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
2476*22dc650dSSadaf Ebrahimi }
2477*22dc650dSSadaf Ebrahimi
2478*22dc650dSSadaf Ebrahimi static sljit_s32 emit_shift(struct sljit_compiler *compiler,
2479*22dc650dSSadaf Ebrahimi sljit_u8 mode,
2480*22dc650dSSadaf Ebrahimi sljit_s32 dst, sljit_sw dstw,
2481*22dc650dSSadaf Ebrahimi sljit_s32 src1, sljit_sw src1w,
2482*22dc650dSSadaf Ebrahimi sljit_s32 src2, sljit_sw src2w)
2483*22dc650dSSadaf Ebrahimi {
2484*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2485*22dc650dSSadaf Ebrahimi sljit_s32 mode32;
2486*22dc650dSSadaf Ebrahimi #endif
2487*22dc650dSSadaf Ebrahimi sljit_u8* inst;
2488*22dc650dSSadaf Ebrahimi
2489*22dc650dSSadaf Ebrahimi if (src2 == SLJIT_IMM || src2 == SLJIT_PREF_SHIFT_REG) {
2490*22dc650dSSadaf Ebrahimi if (dst == src1 && dstw == src1w) {
2491*22dc650dSSadaf Ebrahimi inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, dstw);
2492*22dc650dSSadaf Ebrahimi FAIL_IF(!inst);
2493*22dc650dSSadaf Ebrahimi inst[1] |= mode;
2494*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
2495*22dc650dSSadaf Ebrahimi }
2496*22dc650dSSadaf Ebrahimi if (dst == SLJIT_PREF_SHIFT_REG && src2 == SLJIT_PREF_SHIFT_REG) {
2497*22dc650dSSadaf Ebrahimi EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2498*22dc650dSSadaf Ebrahimi inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2499*22dc650dSSadaf Ebrahimi FAIL_IF(!inst);
2500*22dc650dSSadaf Ebrahimi inst[1] |= mode;
2501*22dc650dSSadaf Ebrahimi EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2502*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
2503*22dc650dSSadaf Ebrahimi }
2504*22dc650dSSadaf Ebrahimi if (FAST_IS_REG(dst)) {
2505*22dc650dSSadaf Ebrahimi EMIT_MOV(compiler, dst, 0, src1, src1w);
2506*22dc650dSSadaf Ebrahimi inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, 0);
2507*22dc650dSSadaf Ebrahimi FAIL_IF(!inst);
2508*22dc650dSSadaf Ebrahimi inst[1] |= mode;
2509*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
2510*22dc650dSSadaf Ebrahimi }
2511*22dc650dSSadaf Ebrahimi
2512*22dc650dSSadaf Ebrahimi EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2513*22dc650dSSadaf Ebrahimi inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REG1, 0);
2514*22dc650dSSadaf Ebrahimi FAIL_IF(!inst);
2515*22dc650dSSadaf Ebrahimi inst[1] |= mode;
2516*22dc650dSSadaf Ebrahimi EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
2517*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
2518*22dc650dSSadaf Ebrahimi }
2519*22dc650dSSadaf Ebrahimi
2520*22dc650dSSadaf Ebrahimi if (dst == SLJIT_PREF_SHIFT_REG) {
2521*22dc650dSSadaf Ebrahimi EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2522*22dc650dSSadaf Ebrahimi EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
2523*22dc650dSSadaf Ebrahimi inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2524*22dc650dSSadaf Ebrahimi FAIL_IF(!inst);
2525*22dc650dSSadaf Ebrahimi inst[1] |= mode;
2526*22dc650dSSadaf Ebrahimi return emit_mov(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2527*22dc650dSSadaf Ebrahimi }
2528*22dc650dSSadaf Ebrahimi
2529*22dc650dSSadaf Ebrahimi if (FAST_IS_REG(dst) && dst != src2 && dst != TMP_REG1 && !ADDRESSING_DEPENDS_ON(src2, dst)) {
2530*22dc650dSSadaf Ebrahimi if (src1 != dst)
2531*22dc650dSSadaf Ebrahimi EMIT_MOV(compiler, dst, 0, src1, src1w);
2532*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2533*22dc650dSSadaf Ebrahimi mode32 = compiler->mode32;
2534*22dc650dSSadaf Ebrahimi compiler->mode32 = 0;
2535*22dc650dSSadaf Ebrahimi #endif
2536*22dc650dSSadaf Ebrahimi EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_PREF_SHIFT_REG, 0);
2537*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2538*22dc650dSSadaf Ebrahimi compiler->mode32 = mode32;
2539*22dc650dSSadaf Ebrahimi #endif
2540*22dc650dSSadaf Ebrahimi EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
2541*22dc650dSSadaf Ebrahimi inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, dst, 0);
2542*22dc650dSSadaf Ebrahimi FAIL_IF(!inst);
2543*22dc650dSSadaf Ebrahimi inst[1] |= mode;
2544*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2545*22dc650dSSadaf Ebrahimi compiler->mode32 = 0;
2546*22dc650dSSadaf Ebrahimi #endif
2547*22dc650dSSadaf Ebrahimi EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2548*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2549*22dc650dSSadaf Ebrahimi compiler->mode32 = mode32;
2550*22dc650dSSadaf Ebrahimi #endif
2551*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
2552*22dc650dSSadaf Ebrahimi }
2553*22dc650dSSadaf Ebrahimi
2554*22dc650dSSadaf Ebrahimi /* This case is complex since ecx itself may be used for
2555*22dc650dSSadaf Ebrahimi addressing, and this case must be supported as well. */
2556*22dc650dSSadaf Ebrahimi EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2557*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2558*22dc650dSSadaf Ebrahimi EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_PREF_SHIFT_REG, 0);
2559*22dc650dSSadaf Ebrahimi #else /* !SLJIT_CONFIG_X86_32 */
2560*22dc650dSSadaf Ebrahimi mode32 = compiler->mode32;
2561*22dc650dSSadaf Ebrahimi compiler->mode32 = 0;
2562*22dc650dSSadaf Ebrahimi EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_PREF_SHIFT_REG, 0);
2563*22dc650dSSadaf Ebrahimi compiler->mode32 = mode32;
2564*22dc650dSSadaf Ebrahimi #endif /* SLJIT_CONFIG_X86_32 */
2565*22dc650dSSadaf Ebrahimi
2566*22dc650dSSadaf Ebrahimi EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
2567*22dc650dSSadaf Ebrahimi inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2568*22dc650dSSadaf Ebrahimi FAIL_IF(!inst);
2569*22dc650dSSadaf Ebrahimi inst[1] |= mode;
2570*22dc650dSSadaf Ebrahimi
2571*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2572*22dc650dSSadaf Ebrahimi EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, SLJIT_MEM1(SLJIT_SP), 0);
2573*22dc650dSSadaf Ebrahimi #else
2574*22dc650dSSadaf Ebrahimi compiler->mode32 = 0;
2575*22dc650dSSadaf Ebrahimi EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG2, 0);
2576*22dc650dSSadaf Ebrahimi compiler->mode32 = mode32;
2577*22dc650dSSadaf Ebrahimi #endif /* SLJIT_CONFIG_X86_32 */
2578*22dc650dSSadaf Ebrahimi
2579*22dc650dSSadaf Ebrahimi if (dst != TMP_REG1)
2580*22dc650dSSadaf Ebrahimi return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
2581*22dc650dSSadaf Ebrahimi
2582*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
2583*22dc650dSSadaf Ebrahimi }
2584*22dc650dSSadaf Ebrahimi
2585*22dc650dSSadaf Ebrahimi static sljit_s32 emit_shift_with_flags(struct sljit_compiler *compiler,
2586*22dc650dSSadaf Ebrahimi sljit_u8 mode, sljit_s32 set_flags,
2587*22dc650dSSadaf Ebrahimi sljit_s32 dst, sljit_sw dstw,
2588*22dc650dSSadaf Ebrahimi sljit_s32 src1, sljit_sw src1w,
2589*22dc650dSSadaf Ebrahimi sljit_s32 src2, sljit_sw src2w)
2590*22dc650dSSadaf Ebrahimi {
2591*22dc650dSSadaf Ebrahimi /* The CPU does not set flags if the shift count is 0. */
2592*22dc650dSSadaf Ebrahimi if (src2 == SLJIT_IMM) {
2593*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2594*22dc650dSSadaf Ebrahimi src2w &= compiler->mode32 ? 0x1f : 0x3f;
2595*22dc650dSSadaf Ebrahimi #else /* !SLJIT_CONFIG_X86_64 */
2596*22dc650dSSadaf Ebrahimi src2w &= 0x1f;
2597*22dc650dSSadaf Ebrahimi #endif /* SLJIT_CONFIG_X86_64 */
2598*22dc650dSSadaf Ebrahimi if (src2w != 0)
2599*22dc650dSSadaf Ebrahimi return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
2600*22dc650dSSadaf Ebrahimi
2601*22dc650dSSadaf Ebrahimi if (!set_flags)
2602*22dc650dSSadaf Ebrahimi return emit_mov(compiler, dst, dstw, src1, src1w);
2603*22dc650dSSadaf Ebrahimi /* OR dst, src, 0 */
2604*22dc650dSSadaf Ebrahimi return emit_cum_binary(compiler, BINARY_OPCODE(OR),
2605*22dc650dSSadaf Ebrahimi dst, dstw, src1, src1w, SLJIT_IMM, 0);
2606*22dc650dSSadaf Ebrahimi }
2607*22dc650dSSadaf Ebrahimi
2608*22dc650dSSadaf Ebrahimi if (!set_flags)
2609*22dc650dSSadaf Ebrahimi return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
2610*22dc650dSSadaf Ebrahimi
2611*22dc650dSSadaf Ebrahimi if (!FAST_IS_REG(dst))
2612*22dc650dSSadaf Ebrahimi FAIL_IF(emit_cmp_binary(compiler, src1, src1w, SLJIT_IMM, 0));
2613*22dc650dSSadaf Ebrahimi
2614*22dc650dSSadaf Ebrahimi FAIL_IF(emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w));
2615*22dc650dSSadaf Ebrahimi
2616*22dc650dSSadaf Ebrahimi if (FAST_IS_REG(dst))
2617*22dc650dSSadaf Ebrahimi return emit_cmp_binary(compiler, dst, dstw, SLJIT_IMM, 0);
2618*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
2619*22dc650dSSadaf Ebrahimi }
2620*22dc650dSSadaf Ebrahimi
2621*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op,
2622*22dc650dSSadaf Ebrahimi sljit_s32 dst, sljit_sw dstw,
2623*22dc650dSSadaf Ebrahimi sljit_s32 src1, sljit_sw src1w,
2624*22dc650dSSadaf Ebrahimi sljit_s32 src2, sljit_sw src2w)
2625*22dc650dSSadaf Ebrahimi {
2626*22dc650dSSadaf Ebrahimi CHECK_ERROR();
2627*22dc650dSSadaf Ebrahimi CHECK(check_sljit_emit_op2(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w));
2628*22dc650dSSadaf Ebrahimi ADJUST_LOCAL_OFFSET(dst, dstw);
2629*22dc650dSSadaf Ebrahimi ADJUST_LOCAL_OFFSET(src1, src1w);
2630*22dc650dSSadaf Ebrahimi ADJUST_LOCAL_OFFSET(src2, src2w);
2631*22dc650dSSadaf Ebrahimi
2632*22dc650dSSadaf Ebrahimi CHECK_EXTRA_REGS(dst, dstw, (void)0);
2633*22dc650dSSadaf Ebrahimi CHECK_EXTRA_REGS(src1, src1w, (void)0);
2634*22dc650dSSadaf Ebrahimi CHECK_EXTRA_REGS(src2, src2w, (void)0);
2635*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2636*22dc650dSSadaf Ebrahimi compiler->mode32 = op & SLJIT_32;
2637*22dc650dSSadaf Ebrahimi #endif
2638*22dc650dSSadaf Ebrahimi
2639*22dc650dSSadaf Ebrahimi switch (GET_OPCODE(op)) {
2640*22dc650dSSadaf Ebrahimi case SLJIT_ADD:
2641*22dc650dSSadaf Ebrahimi if (!HAS_FLAGS(op)) {
2642*22dc650dSSadaf Ebrahimi if (emit_lea_binary(compiler, dst, dstw, src1, src1w, src2, src2w) != SLJIT_ERR_UNSUPPORTED)
2643*22dc650dSSadaf Ebrahimi return compiler->error;
2644*22dc650dSSadaf Ebrahimi }
2645*22dc650dSSadaf Ebrahimi return emit_cum_binary(compiler, BINARY_OPCODE(ADD),
2646*22dc650dSSadaf Ebrahimi dst, dstw, src1, src1w, src2, src2w);
2647*22dc650dSSadaf Ebrahimi case SLJIT_ADDC:
2648*22dc650dSSadaf Ebrahimi return emit_cum_binary(compiler, BINARY_OPCODE(ADC),
2649*22dc650dSSadaf Ebrahimi dst, dstw, src1, src1w, src2, src2w);
2650*22dc650dSSadaf Ebrahimi case SLJIT_SUB:
2651*22dc650dSSadaf Ebrahimi if (src1 == SLJIT_IMM && src1w == 0)
2652*22dc650dSSadaf Ebrahimi return emit_unary(compiler, NEG_rm, dst, dstw, src2, src2w);
2653*22dc650dSSadaf Ebrahimi
2654*22dc650dSSadaf Ebrahimi if (!HAS_FLAGS(op)) {
2655*22dc650dSSadaf Ebrahimi if (src2 == SLJIT_IMM && emit_lea_binary(compiler, dst, dstw, src1, src1w, SLJIT_IMM, -src2w) != SLJIT_ERR_UNSUPPORTED)
2656*22dc650dSSadaf Ebrahimi return compiler->error;
2657*22dc650dSSadaf Ebrahimi if (FAST_IS_REG(dst) && src2 == dst) {
2658*22dc650dSSadaf Ebrahimi FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB), dst, 0, dst, 0, src1, src1w));
2659*22dc650dSSadaf Ebrahimi return emit_unary(compiler, NEG_rm, dst, 0, dst, 0);
2660*22dc650dSSadaf Ebrahimi }
2661*22dc650dSSadaf Ebrahimi }
2662*22dc650dSSadaf Ebrahimi
2663*22dc650dSSadaf Ebrahimi return emit_non_cum_binary(compiler, BINARY_OPCODE(SUB),
2664*22dc650dSSadaf Ebrahimi dst, dstw, src1, src1w, src2, src2w);
2665*22dc650dSSadaf Ebrahimi case SLJIT_SUBC:
2666*22dc650dSSadaf Ebrahimi return emit_non_cum_binary(compiler, BINARY_OPCODE(SBB),
2667*22dc650dSSadaf Ebrahimi dst, dstw, src1, src1w, src2, src2w);
2668*22dc650dSSadaf Ebrahimi case SLJIT_MUL:
2669*22dc650dSSadaf Ebrahimi return emit_mul(compiler, dst, dstw, src1, src1w, src2, src2w);
2670*22dc650dSSadaf Ebrahimi case SLJIT_AND:
2671*22dc650dSSadaf Ebrahimi return emit_cum_binary(compiler, BINARY_OPCODE(AND),
2672*22dc650dSSadaf Ebrahimi dst, dstw, src1, src1w, src2, src2w);
2673*22dc650dSSadaf Ebrahimi case SLJIT_OR:
2674*22dc650dSSadaf Ebrahimi return emit_cum_binary(compiler, BINARY_OPCODE(OR),
2675*22dc650dSSadaf Ebrahimi dst, dstw, src1, src1w, src2, src2w);
2676*22dc650dSSadaf Ebrahimi case SLJIT_XOR:
2677*22dc650dSSadaf Ebrahimi if (!HAS_FLAGS(op)) {
2678*22dc650dSSadaf Ebrahimi if (src2 == SLJIT_IMM && src2w == -1)
2679*22dc650dSSadaf Ebrahimi return emit_unary(compiler, NOT_rm, dst, dstw, src1, src1w);
2680*22dc650dSSadaf Ebrahimi if (src1 == SLJIT_IMM && src1w == -1)
2681*22dc650dSSadaf Ebrahimi return emit_unary(compiler, NOT_rm, dst, dstw, src2, src2w);
2682*22dc650dSSadaf Ebrahimi }
2683*22dc650dSSadaf Ebrahimi
2684*22dc650dSSadaf Ebrahimi return emit_cum_binary(compiler, BINARY_OPCODE(XOR),
2685*22dc650dSSadaf Ebrahimi dst, dstw, src1, src1w, src2, src2w);
2686*22dc650dSSadaf Ebrahimi case SLJIT_SHL:
2687*22dc650dSSadaf Ebrahimi case SLJIT_MSHL:
2688*22dc650dSSadaf Ebrahimi return emit_shift_with_flags(compiler, SHL, HAS_FLAGS(op),
2689*22dc650dSSadaf Ebrahimi dst, dstw, src1, src1w, src2, src2w);
2690*22dc650dSSadaf Ebrahimi case SLJIT_LSHR:
2691*22dc650dSSadaf Ebrahimi case SLJIT_MLSHR:
2692*22dc650dSSadaf Ebrahimi return emit_shift_with_flags(compiler, SHR, HAS_FLAGS(op),
2693*22dc650dSSadaf Ebrahimi dst, dstw, src1, src1w, src2, src2w);
2694*22dc650dSSadaf Ebrahimi case SLJIT_ASHR:
2695*22dc650dSSadaf Ebrahimi case SLJIT_MASHR:
2696*22dc650dSSadaf Ebrahimi return emit_shift_with_flags(compiler, SAR, HAS_FLAGS(op),
2697*22dc650dSSadaf Ebrahimi dst, dstw, src1, src1w, src2, src2w);
2698*22dc650dSSadaf Ebrahimi case SLJIT_ROTL:
2699*22dc650dSSadaf Ebrahimi return emit_shift_with_flags(compiler, ROL, 0,
2700*22dc650dSSadaf Ebrahimi dst, dstw, src1, src1w, src2, src2w);
2701*22dc650dSSadaf Ebrahimi case SLJIT_ROTR:
2702*22dc650dSSadaf Ebrahimi return emit_shift_with_flags(compiler, ROR, 0,
2703*22dc650dSSadaf Ebrahimi dst, dstw, src1, src1w, src2, src2w);
2704*22dc650dSSadaf Ebrahimi }
2705*22dc650dSSadaf Ebrahimi
2706*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
2707*22dc650dSSadaf Ebrahimi }
2708*22dc650dSSadaf Ebrahimi
2709*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compiler, sljit_s32 op,
2710*22dc650dSSadaf Ebrahimi sljit_s32 src1, sljit_sw src1w,
2711*22dc650dSSadaf Ebrahimi sljit_s32 src2, sljit_sw src2w)
2712*22dc650dSSadaf Ebrahimi {
2713*22dc650dSSadaf Ebrahimi sljit_s32 opcode = GET_OPCODE(op);
2714*22dc650dSSadaf Ebrahimi
2715*22dc650dSSadaf Ebrahimi CHECK_ERROR();
2716*22dc650dSSadaf Ebrahimi CHECK(check_sljit_emit_op2(compiler, op, 1, 0, 0, src1, src1w, src2, src2w));
2717*22dc650dSSadaf Ebrahimi
2718*22dc650dSSadaf Ebrahimi if (opcode != SLJIT_SUB && opcode != SLJIT_AND) {
2719*22dc650dSSadaf Ebrahimi SLJIT_SKIP_CHECKS(compiler);
2720*22dc650dSSadaf Ebrahimi return sljit_emit_op2(compiler, op, TMP_REG1, 0, src1, src1w, src2, src2w);
2721*22dc650dSSadaf Ebrahimi }
2722*22dc650dSSadaf Ebrahimi
2723*22dc650dSSadaf Ebrahimi ADJUST_LOCAL_OFFSET(src1, src1w);
2724*22dc650dSSadaf Ebrahimi ADJUST_LOCAL_OFFSET(src2, src2w);
2725*22dc650dSSadaf Ebrahimi
2726*22dc650dSSadaf Ebrahimi CHECK_EXTRA_REGS(src1, src1w, (void)0);
2727*22dc650dSSadaf Ebrahimi CHECK_EXTRA_REGS(src2, src2w, (void)0);
2728*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2729*22dc650dSSadaf Ebrahimi compiler->mode32 = op & SLJIT_32;
2730*22dc650dSSadaf Ebrahimi #endif
2731*22dc650dSSadaf Ebrahimi
2732*22dc650dSSadaf Ebrahimi if (opcode == SLJIT_SUB)
2733*22dc650dSSadaf Ebrahimi return emit_cmp_binary(compiler, src1, src1w, src2, src2w);
2734*22dc650dSSadaf Ebrahimi
2735*22dc650dSSadaf Ebrahimi return emit_test_binary(compiler, src1, src1w, src2, src2w);
2736*22dc650dSSadaf Ebrahimi }
2737*22dc650dSSadaf Ebrahimi
2738*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2r(struct sljit_compiler *compiler, sljit_s32 op,
2739*22dc650dSSadaf Ebrahimi sljit_s32 dst_reg,
2740*22dc650dSSadaf Ebrahimi sljit_s32 src1, sljit_sw src1w,
2741*22dc650dSSadaf Ebrahimi sljit_s32 src2, sljit_sw src2w)
2742*22dc650dSSadaf Ebrahimi {
2743*22dc650dSSadaf Ebrahimi sljit_u8* inst;
2744*22dc650dSSadaf Ebrahimi sljit_sw dstw = 0;
2745*22dc650dSSadaf Ebrahimi
2746*22dc650dSSadaf Ebrahimi CHECK_ERROR();
2747*22dc650dSSadaf Ebrahimi CHECK(check_sljit_emit_op2r(compiler, op, dst_reg, src1, src1w, src2, src2w));
2748*22dc650dSSadaf Ebrahimi ADJUST_LOCAL_OFFSET(src1, src1w);
2749*22dc650dSSadaf Ebrahimi ADJUST_LOCAL_OFFSET(src2, src2w);
2750*22dc650dSSadaf Ebrahimi
2751*22dc650dSSadaf Ebrahimi CHECK_EXTRA_REGS(dst_reg, dstw, (void)0);
2752*22dc650dSSadaf Ebrahimi CHECK_EXTRA_REGS(src1, src1w, (void)0);
2753*22dc650dSSadaf Ebrahimi CHECK_EXTRA_REGS(src2, src2w, (void)0);
2754*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2755*22dc650dSSadaf Ebrahimi compiler->mode32 = op & SLJIT_32;
2756*22dc650dSSadaf Ebrahimi #endif
2757*22dc650dSSadaf Ebrahimi
2758*22dc650dSSadaf Ebrahimi switch (GET_OPCODE(op)) {
2759*22dc650dSSadaf Ebrahimi case SLJIT_MULADD:
2760*22dc650dSSadaf Ebrahimi FAIL_IF(emit_mul(compiler, TMP_REG1, 0, src1, src1w, src2, src2w));
2761*22dc650dSSadaf Ebrahimi inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst_reg, dstw);
2762*22dc650dSSadaf Ebrahimi FAIL_IF(!inst);
2763*22dc650dSSadaf Ebrahimi *inst = ADD_rm_r;
2764*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
2765*22dc650dSSadaf Ebrahimi }
2766*22dc650dSSadaf Ebrahimi
2767*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
2768*22dc650dSSadaf Ebrahimi }
2769*22dc650dSSadaf Ebrahimi
2770*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler *compiler, sljit_s32 op,
2771*22dc650dSSadaf Ebrahimi sljit_s32 dst_reg,
2772*22dc650dSSadaf Ebrahimi sljit_s32 src1_reg,
2773*22dc650dSSadaf Ebrahimi sljit_s32 src2_reg,
2774*22dc650dSSadaf Ebrahimi sljit_s32 src3, sljit_sw src3w)
2775*22dc650dSSadaf Ebrahimi {
2776*22dc650dSSadaf Ebrahimi sljit_s32 is_rotate, is_left, move_src1;
2777*22dc650dSSadaf Ebrahimi sljit_u8* inst;
2778*22dc650dSSadaf Ebrahimi sljit_sw src1w = 0;
2779*22dc650dSSadaf Ebrahimi sljit_sw dstw = 0;
2780*22dc650dSSadaf Ebrahimi /* The whole register must be saved even for 32 bit operations. */
2781*22dc650dSSadaf Ebrahimi sljit_u8 restore_ecx = 0;
2782*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2783*22dc650dSSadaf Ebrahimi sljit_sw src2w = 0;
2784*22dc650dSSadaf Ebrahimi sljit_s32 restore_sp4 = 0;
2785*22dc650dSSadaf Ebrahimi #endif /* SLJIT_CONFIG_X86_32 */
2786*22dc650dSSadaf Ebrahimi
2787*22dc650dSSadaf Ebrahimi CHECK_ERROR();
2788*22dc650dSSadaf Ebrahimi CHECK(check_sljit_emit_shift_into(compiler, op, dst_reg, src1_reg, src2_reg, src3, src3w));
2789*22dc650dSSadaf Ebrahimi ADJUST_LOCAL_OFFSET(src3, src3w);
2790*22dc650dSSadaf Ebrahimi
2791*22dc650dSSadaf Ebrahimi CHECK_EXTRA_REGS(dst_reg, dstw, (void)0);
2792*22dc650dSSadaf Ebrahimi CHECK_EXTRA_REGS(src3, src3w, (void)0);
2793*22dc650dSSadaf Ebrahimi
2794*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2795*22dc650dSSadaf Ebrahimi compiler->mode32 = op & SLJIT_32;
2796*22dc650dSSadaf Ebrahimi #endif /* SLJIT_CONFIG_X86_64 */
2797*22dc650dSSadaf Ebrahimi
2798*22dc650dSSadaf Ebrahimi if (src3 == SLJIT_IMM) {
2799*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2800*22dc650dSSadaf Ebrahimi src3w &= 0x1f;
2801*22dc650dSSadaf Ebrahimi #else /* !SLJIT_CONFIG_X86_32 */
2802*22dc650dSSadaf Ebrahimi src3w &= (op & SLJIT_32) ? 0x1f : 0x3f;
2803*22dc650dSSadaf Ebrahimi #endif /* SLJIT_CONFIG_X86_32 */
2804*22dc650dSSadaf Ebrahimi
2805*22dc650dSSadaf Ebrahimi if (src3w == 0)
2806*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
2807*22dc650dSSadaf Ebrahimi }
2808*22dc650dSSadaf Ebrahimi
2809*22dc650dSSadaf Ebrahimi is_left = (GET_OPCODE(op) == SLJIT_SHL || GET_OPCODE(op) == SLJIT_MSHL);
2810*22dc650dSSadaf Ebrahimi
2811*22dc650dSSadaf Ebrahimi is_rotate = (src1_reg == src2_reg);
2812*22dc650dSSadaf Ebrahimi CHECK_EXTRA_REGS(src1_reg, src1w, (void)0);
2813*22dc650dSSadaf Ebrahimi CHECK_EXTRA_REGS(src2_reg, src2w, (void)0);
2814*22dc650dSSadaf Ebrahimi
2815*22dc650dSSadaf Ebrahimi if (is_rotate)
2816*22dc650dSSadaf Ebrahimi return emit_shift(compiler, is_left ? ROL : ROR, dst_reg, dstw, src1_reg, src1w, src3, src3w);
2817*22dc650dSSadaf Ebrahimi
2818*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2819*22dc650dSSadaf Ebrahimi if (src2_reg & SLJIT_MEM) {
2820*22dc650dSSadaf Ebrahimi EMIT_MOV(compiler, TMP_REG1, 0, src2_reg, src2w);
2821*22dc650dSSadaf Ebrahimi src2_reg = TMP_REG1;
2822*22dc650dSSadaf Ebrahimi }
2823*22dc650dSSadaf Ebrahimi #endif /* SLJIT_CONFIG_X86_32 */
2824*22dc650dSSadaf Ebrahimi
2825*22dc650dSSadaf Ebrahimi if (dst_reg == SLJIT_PREF_SHIFT_REG && src3 != SLJIT_IMM && (src3 != SLJIT_PREF_SHIFT_REG || src1_reg != SLJIT_PREF_SHIFT_REG)) {
2826*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2827*22dc650dSSadaf Ebrahimi EMIT_MOV(compiler, TMP_REG1, 0, src1_reg, src1w);
2828*22dc650dSSadaf Ebrahimi src1_reg = TMP_REG1;
2829*22dc650dSSadaf Ebrahimi src1w = 0;
2830*22dc650dSSadaf Ebrahimi #else /* !SLJIT_CONFIG_X86_64 */
2831*22dc650dSSadaf Ebrahimi if (src2_reg != TMP_REG1) {
2832*22dc650dSSadaf Ebrahimi EMIT_MOV(compiler, TMP_REG1, 0, src1_reg, src1w);
2833*22dc650dSSadaf Ebrahimi src1_reg = TMP_REG1;
2834*22dc650dSSadaf Ebrahimi src1w = 0;
2835*22dc650dSSadaf Ebrahimi } else if ((src1_reg & SLJIT_MEM) || src1_reg == SLJIT_PREF_SHIFT_REG) {
2836*22dc650dSSadaf Ebrahimi restore_sp4 = (src3 == SLJIT_R0) ? SLJIT_R1 : SLJIT_R0;
2837*22dc650dSSadaf Ebrahimi EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_s32), restore_sp4, 0);
2838*22dc650dSSadaf Ebrahimi EMIT_MOV(compiler, restore_sp4, 0, src1_reg, src1w);
2839*22dc650dSSadaf Ebrahimi src1_reg = restore_sp4;
2840*22dc650dSSadaf Ebrahimi src1w = 0;
2841*22dc650dSSadaf Ebrahimi } else {
2842*22dc650dSSadaf Ebrahimi EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_s32), src1_reg, 0);
2843*22dc650dSSadaf Ebrahimi restore_sp4 = src1_reg;
2844*22dc650dSSadaf Ebrahimi }
2845*22dc650dSSadaf Ebrahimi #endif /* SLJIT_CONFIG_X86_64 */
2846*22dc650dSSadaf Ebrahimi
2847*22dc650dSSadaf Ebrahimi if (src3 != SLJIT_PREF_SHIFT_REG)
2848*22dc650dSSadaf Ebrahimi EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src3, src3w);
2849*22dc650dSSadaf Ebrahimi } else {
2850*22dc650dSSadaf Ebrahimi if (src2_reg == SLJIT_PREF_SHIFT_REG && src3 != SLJIT_IMM && src3 != SLJIT_PREF_SHIFT_REG) {
2851*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2852*22dc650dSSadaf Ebrahimi compiler->mode32 = 0;
2853*22dc650dSSadaf Ebrahimi #endif /* SLJIT_CONFIG_X86_64 */
2854*22dc650dSSadaf Ebrahimi EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_PREF_SHIFT_REG, 0);
2855*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2856*22dc650dSSadaf Ebrahimi compiler->mode32 = op & SLJIT_32;
2857*22dc650dSSadaf Ebrahimi #endif /* SLJIT_CONFIG_X86_64 */
2858*22dc650dSSadaf Ebrahimi src2_reg = TMP_REG1;
2859*22dc650dSSadaf Ebrahimi restore_ecx = 1;
2860*22dc650dSSadaf Ebrahimi }
2861*22dc650dSSadaf Ebrahimi
2862*22dc650dSSadaf Ebrahimi move_src1 = 0;
2863*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2864*22dc650dSSadaf Ebrahimi if (dst_reg != src1_reg) {
2865*22dc650dSSadaf Ebrahimi if (dst_reg != src3) {
2866*22dc650dSSadaf Ebrahimi EMIT_MOV(compiler, dst_reg, 0, src1_reg, src1w);
2867*22dc650dSSadaf Ebrahimi src1_reg = dst_reg;
2868*22dc650dSSadaf Ebrahimi src1w = 0;
2869*22dc650dSSadaf Ebrahimi } else
2870*22dc650dSSadaf Ebrahimi move_src1 = 1;
2871*22dc650dSSadaf Ebrahimi }
2872*22dc650dSSadaf Ebrahimi #else /* !SLJIT_CONFIG_X86_64 */
2873*22dc650dSSadaf Ebrahimi if (dst_reg & SLJIT_MEM) {
2874*22dc650dSSadaf Ebrahimi if (src2_reg != TMP_REG1) {
2875*22dc650dSSadaf Ebrahimi EMIT_MOV(compiler, TMP_REG1, 0, src1_reg, src1w);
2876*22dc650dSSadaf Ebrahimi src1_reg = TMP_REG1;
2877*22dc650dSSadaf Ebrahimi src1w = 0;
2878*22dc650dSSadaf Ebrahimi } else if ((src1_reg & SLJIT_MEM) || src1_reg == SLJIT_PREF_SHIFT_REG) {
2879*22dc650dSSadaf Ebrahimi restore_sp4 = (src3 == SLJIT_R0) ? SLJIT_R1 : SLJIT_R0;
2880*22dc650dSSadaf Ebrahimi EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_s32), restore_sp4, 0);
2881*22dc650dSSadaf Ebrahimi EMIT_MOV(compiler, restore_sp4, 0, src1_reg, src1w);
2882*22dc650dSSadaf Ebrahimi src1_reg = restore_sp4;
2883*22dc650dSSadaf Ebrahimi src1w = 0;
2884*22dc650dSSadaf Ebrahimi } else {
2885*22dc650dSSadaf Ebrahimi EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_s32), src1_reg, 0);
2886*22dc650dSSadaf Ebrahimi restore_sp4 = src1_reg;
2887*22dc650dSSadaf Ebrahimi }
2888*22dc650dSSadaf Ebrahimi } else if (dst_reg != src1_reg) {
2889*22dc650dSSadaf Ebrahimi if (dst_reg != src3) {
2890*22dc650dSSadaf Ebrahimi EMIT_MOV(compiler, dst_reg, 0, src1_reg, src1w);
2891*22dc650dSSadaf Ebrahimi src1_reg = dst_reg;
2892*22dc650dSSadaf Ebrahimi src1w = 0;
2893*22dc650dSSadaf Ebrahimi } else
2894*22dc650dSSadaf Ebrahimi move_src1 = 1;
2895*22dc650dSSadaf Ebrahimi }
2896*22dc650dSSadaf Ebrahimi #endif /* SLJIT_CONFIG_X86_64 */
2897*22dc650dSSadaf Ebrahimi
2898*22dc650dSSadaf Ebrahimi if (src3 != SLJIT_IMM && src3 != SLJIT_PREF_SHIFT_REG) {
2899*22dc650dSSadaf Ebrahimi if (!restore_ecx) {
2900*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2901*22dc650dSSadaf Ebrahimi compiler->mode32 = 0;
2902*22dc650dSSadaf Ebrahimi EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_PREF_SHIFT_REG, 0);
2903*22dc650dSSadaf Ebrahimi compiler->mode32 = op & SLJIT_32;
2904*22dc650dSSadaf Ebrahimi restore_ecx = 1;
2905*22dc650dSSadaf Ebrahimi #else /* !SLJIT_CONFIG_X86_64 */
2906*22dc650dSSadaf Ebrahimi if (src1_reg != TMP_REG1 && src2_reg != TMP_REG1) {
2907*22dc650dSSadaf Ebrahimi EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_PREF_SHIFT_REG, 0);
2908*22dc650dSSadaf Ebrahimi restore_ecx = 1;
2909*22dc650dSSadaf Ebrahimi } else {
2910*22dc650dSSadaf Ebrahimi EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_PREF_SHIFT_REG, 0);
2911*22dc650dSSadaf Ebrahimi restore_ecx = 2;
2912*22dc650dSSadaf Ebrahimi }
2913*22dc650dSSadaf Ebrahimi #endif /* SLJIT_CONFIG_X86_64 */
2914*22dc650dSSadaf Ebrahimi }
2915*22dc650dSSadaf Ebrahimi EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src3, src3w);
2916*22dc650dSSadaf Ebrahimi }
2917*22dc650dSSadaf Ebrahimi
2918*22dc650dSSadaf Ebrahimi if (move_src1) {
2919*22dc650dSSadaf Ebrahimi EMIT_MOV(compiler, dst_reg, 0, src1_reg, src1w);
2920*22dc650dSSadaf Ebrahimi src1_reg = dst_reg;
2921*22dc650dSSadaf Ebrahimi src1w = 0;
2922*22dc650dSSadaf Ebrahimi }
2923*22dc650dSSadaf Ebrahimi }
2924*22dc650dSSadaf Ebrahimi
2925*22dc650dSSadaf Ebrahimi inst = emit_x86_instruction(compiler, 2, src2_reg, 0, src1_reg, src1w);
2926*22dc650dSSadaf Ebrahimi FAIL_IF(!inst);
2927*22dc650dSSadaf Ebrahimi inst[0] = GROUP_0F;
2928*22dc650dSSadaf Ebrahimi
2929*22dc650dSSadaf Ebrahimi if (src3 == SLJIT_IMM) {
2930*22dc650dSSadaf Ebrahimi inst[1] = U8((is_left ? SHLD : SHRD) - 1);
2931*22dc650dSSadaf Ebrahimi
2932*22dc650dSSadaf Ebrahimi /* Immediate argument is added separately. */
2933*22dc650dSSadaf Ebrahimi FAIL_IF(emit_byte(compiler, U8(src3w)));
2934*22dc650dSSadaf Ebrahimi } else
2935*22dc650dSSadaf Ebrahimi inst[1] = U8(is_left ? SHLD : SHRD);
2936*22dc650dSSadaf Ebrahimi
2937*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2938*22dc650dSSadaf Ebrahimi if (restore_ecx) {
2939*22dc650dSSadaf Ebrahimi compiler->mode32 = 0;
2940*22dc650dSSadaf Ebrahimi EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2941*22dc650dSSadaf Ebrahimi }
2942*22dc650dSSadaf Ebrahimi
2943*22dc650dSSadaf Ebrahimi if (src1_reg != dst_reg) {
2944*22dc650dSSadaf Ebrahimi compiler->mode32 = op & SLJIT_32;
2945*22dc650dSSadaf Ebrahimi return emit_mov(compiler, dst_reg, dstw, src1_reg, 0);
2946*22dc650dSSadaf Ebrahimi }
2947*22dc650dSSadaf Ebrahimi #else /* !SLJIT_CONFIG_X86_64 */
2948*22dc650dSSadaf Ebrahimi if (restore_ecx)
2949*22dc650dSSadaf Ebrahimi EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, restore_ecx == 1 ? TMP_REG1 : SLJIT_MEM1(SLJIT_SP), 0);
2950*22dc650dSSadaf Ebrahimi
2951*22dc650dSSadaf Ebrahimi if (src1_reg != dst_reg)
2952*22dc650dSSadaf Ebrahimi EMIT_MOV(compiler, dst_reg, dstw, src1_reg, 0);
2953*22dc650dSSadaf Ebrahimi
2954*22dc650dSSadaf Ebrahimi if (restore_sp4)
2955*22dc650dSSadaf Ebrahimi return emit_mov(compiler, restore_sp4, 0, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_s32));
2956*22dc650dSSadaf Ebrahimi #endif /* SLJIT_CONFIG_X86_32 */
2957*22dc650dSSadaf Ebrahimi
2958*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
2959*22dc650dSSadaf Ebrahimi }
2960*22dc650dSSadaf Ebrahimi
2961*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op,
2962*22dc650dSSadaf Ebrahimi sljit_s32 src, sljit_sw srcw)
2963*22dc650dSSadaf Ebrahimi {
2964*22dc650dSSadaf Ebrahimi CHECK_ERROR();
2965*22dc650dSSadaf Ebrahimi CHECK(check_sljit_emit_op_src(compiler, op, src, srcw));
2966*22dc650dSSadaf Ebrahimi ADJUST_LOCAL_OFFSET(src, srcw);
2967*22dc650dSSadaf Ebrahimi
2968*22dc650dSSadaf Ebrahimi CHECK_EXTRA_REGS(src, srcw, (void)0);
2969*22dc650dSSadaf Ebrahimi
2970*22dc650dSSadaf Ebrahimi switch (op) {
2971*22dc650dSSadaf Ebrahimi case SLJIT_FAST_RETURN:
2972*22dc650dSSadaf Ebrahimi return emit_fast_return(compiler, src, srcw);
2973*22dc650dSSadaf Ebrahimi case SLJIT_SKIP_FRAMES_BEFORE_FAST_RETURN:
2974*22dc650dSSadaf Ebrahimi /* Don't adjust shadow stack if it isn't enabled. */
2975*22dc650dSSadaf Ebrahimi if (!cpu_has_shadow_stack ())
2976*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
2977*22dc650dSSadaf Ebrahimi return adjust_shadow_stack(compiler, src, srcw);
2978*22dc650dSSadaf Ebrahimi case SLJIT_PREFETCH_L1:
2979*22dc650dSSadaf Ebrahimi case SLJIT_PREFETCH_L2:
2980*22dc650dSSadaf Ebrahimi case SLJIT_PREFETCH_L3:
2981*22dc650dSSadaf Ebrahimi case SLJIT_PREFETCH_ONCE:
2982*22dc650dSSadaf Ebrahimi return emit_prefetch(compiler, op, src, srcw);
2983*22dc650dSSadaf Ebrahimi }
2984*22dc650dSSadaf Ebrahimi
2985*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
2986*22dc650dSSadaf Ebrahimi }
2987*22dc650dSSadaf Ebrahimi
2988*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_dst(struct sljit_compiler *compiler, sljit_s32 op,
2989*22dc650dSSadaf Ebrahimi sljit_s32 dst, sljit_sw dstw)
2990*22dc650dSSadaf Ebrahimi {
2991*22dc650dSSadaf Ebrahimi CHECK_ERROR();
2992*22dc650dSSadaf Ebrahimi CHECK(check_sljit_emit_op_dst(compiler, op, dst, dstw));
2993*22dc650dSSadaf Ebrahimi ADJUST_LOCAL_OFFSET(dst, dstw);
2994*22dc650dSSadaf Ebrahimi
2995*22dc650dSSadaf Ebrahimi CHECK_EXTRA_REGS(dst, dstw, (void)0);
2996*22dc650dSSadaf Ebrahimi
2997*22dc650dSSadaf Ebrahimi switch (op) {
2998*22dc650dSSadaf Ebrahimi case SLJIT_FAST_ENTER:
2999*22dc650dSSadaf Ebrahimi return emit_fast_enter(compiler, dst, dstw);
3000*22dc650dSSadaf Ebrahimi case SLJIT_GET_RETURN_ADDRESS:
3001*22dc650dSSadaf Ebrahimi return sljit_emit_get_return_address(compiler, dst, dstw);
3002*22dc650dSSadaf Ebrahimi }
3003*22dc650dSSadaf Ebrahimi
3004*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
3005*22dc650dSSadaf Ebrahimi }
3006*22dc650dSSadaf Ebrahimi
3007*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 type, sljit_s32 reg)
3008*22dc650dSSadaf Ebrahimi {
3009*22dc650dSSadaf Ebrahimi CHECK_REG_INDEX(check_sljit_get_register_index(type, reg));
3010*22dc650dSSadaf Ebrahimi
3011*22dc650dSSadaf Ebrahimi if (type == SLJIT_GP_REGISTER) {
3012*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
3013*22dc650dSSadaf Ebrahimi if (reg >= SLJIT_R3 && reg <= SLJIT_R8)
3014*22dc650dSSadaf Ebrahimi return -1;
3015*22dc650dSSadaf Ebrahimi #endif /* SLJIT_CONFIG_X86_32 */
3016*22dc650dSSadaf Ebrahimi return reg_map[reg];
3017*22dc650dSSadaf Ebrahimi }
3018*22dc650dSSadaf Ebrahimi
3019*22dc650dSSadaf Ebrahimi if (type != SLJIT_FLOAT_REGISTER && type != SLJIT_SIMD_REG_128 && type != SLJIT_SIMD_REG_256 && type != SLJIT_SIMD_REG_512)
3020*22dc650dSSadaf Ebrahimi return -1;
3021*22dc650dSSadaf Ebrahimi
3022*22dc650dSSadaf Ebrahimi return freg_map[reg];
3023*22dc650dSSadaf Ebrahimi }
3024*22dc650dSSadaf Ebrahimi
3025*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler,
3026*22dc650dSSadaf Ebrahimi void *instruction, sljit_u32 size)
3027*22dc650dSSadaf Ebrahimi {
3028*22dc650dSSadaf Ebrahimi sljit_u8 *inst;
3029*22dc650dSSadaf Ebrahimi
3030*22dc650dSSadaf Ebrahimi CHECK_ERROR();
3031*22dc650dSSadaf Ebrahimi CHECK(check_sljit_emit_op_custom(compiler, instruction, size));
3032*22dc650dSSadaf Ebrahimi
3033*22dc650dSSadaf Ebrahimi inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
3034*22dc650dSSadaf Ebrahimi FAIL_IF(!inst);
3035*22dc650dSSadaf Ebrahimi INC_SIZE(size);
3036*22dc650dSSadaf Ebrahimi SLJIT_MEMCPY(inst, instruction, size);
3037*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
3038*22dc650dSSadaf Ebrahimi }
3039*22dc650dSSadaf Ebrahimi
3040*22dc650dSSadaf Ebrahimi /* --------------------------------------------------------------------- */
3041*22dc650dSSadaf Ebrahimi /* Floating point operators */
3042*22dc650dSSadaf Ebrahimi /* --------------------------------------------------------------------- */
3043*22dc650dSSadaf Ebrahimi
3044*22dc650dSSadaf Ebrahimi /* Alignment(3) + 4 * 16 bytes. */
3045*22dc650dSSadaf Ebrahimi static sljit_u32 sse2_data[3 + (4 * 4)];
3046*22dc650dSSadaf Ebrahimi static sljit_u32 *sse2_buffer;
3047*22dc650dSSadaf Ebrahimi
3048*22dc650dSSadaf Ebrahimi static void init_compiler(void)
3049*22dc650dSSadaf Ebrahimi {
3050*22dc650dSSadaf Ebrahimi get_cpu_features();
3051*22dc650dSSadaf Ebrahimi
3052*22dc650dSSadaf Ebrahimi /* Align to 16 bytes. */
3053*22dc650dSSadaf Ebrahimi sse2_buffer = (sljit_u32*)(((sljit_uw)sse2_data + 15) & ~(sljit_uw)0xf);
3054*22dc650dSSadaf Ebrahimi
3055*22dc650dSSadaf Ebrahimi /* Single precision constants (each constant is 16 byte long). */
3056*22dc650dSSadaf Ebrahimi sse2_buffer[0] = 0x80000000;
3057*22dc650dSSadaf Ebrahimi sse2_buffer[4] = 0x7fffffff;
3058*22dc650dSSadaf Ebrahimi /* Double precision constants (each constant is 16 byte long). */
3059*22dc650dSSadaf Ebrahimi sse2_buffer[8] = 0;
3060*22dc650dSSadaf Ebrahimi sse2_buffer[9] = 0x80000000;
3061*22dc650dSSadaf Ebrahimi sse2_buffer[12] = 0xffffffff;
3062*22dc650dSSadaf Ebrahimi sse2_buffer[13] = 0x7fffffff;
3063*22dc650dSSadaf Ebrahimi }
3064*22dc650dSSadaf Ebrahimi
3065*22dc650dSSadaf Ebrahimi static sljit_s32 emit_groupf(struct sljit_compiler *compiler,
3066*22dc650dSSadaf Ebrahimi sljit_uw op,
3067*22dc650dSSadaf Ebrahimi sljit_s32 dst, sljit_s32 src, sljit_sw srcw)
3068*22dc650dSSadaf Ebrahimi {
3069*22dc650dSSadaf Ebrahimi sljit_u8 *inst = emit_x86_instruction(compiler, 2 | (op & ~(sljit_uw)0xff), dst, 0, src, srcw);
3070*22dc650dSSadaf Ebrahimi FAIL_IF(!inst);
3071*22dc650dSSadaf Ebrahimi inst[0] = GROUP_0F;
3072*22dc650dSSadaf Ebrahimi inst[1] = op & 0xff;
3073*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
3074*22dc650dSSadaf Ebrahimi }
3075*22dc650dSSadaf Ebrahimi
3076*22dc650dSSadaf Ebrahimi static sljit_s32 emit_groupf_ext(struct sljit_compiler *compiler,
3077*22dc650dSSadaf Ebrahimi sljit_uw op,
3078*22dc650dSSadaf Ebrahimi sljit_s32 dst, sljit_s32 src, sljit_sw srcw)
3079*22dc650dSSadaf Ebrahimi {
3080*22dc650dSSadaf Ebrahimi sljit_u8 *inst;
3081*22dc650dSSadaf Ebrahimi
3082*22dc650dSSadaf Ebrahimi SLJIT_ASSERT((op & EX86_SSE2) && ((op & VEX_OP_0F38) || (op & VEX_OP_0F3A)));
3083*22dc650dSSadaf Ebrahimi
3084*22dc650dSSadaf Ebrahimi inst = emit_x86_instruction(compiler, 3 | (op & ~((sljit_uw)0xff | VEX_OP_0F38 | VEX_OP_0F3A)), dst, 0, src, srcw);
3085*22dc650dSSadaf Ebrahimi FAIL_IF(!inst);
3086*22dc650dSSadaf Ebrahimi inst[0] = GROUP_0F;
3087*22dc650dSSadaf Ebrahimi inst[1] = U8((op & VEX_OP_0F38) ? 0x38 : 0x3A);
3088*22dc650dSSadaf Ebrahimi inst[2] = op & 0xff;
3089*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
3090*22dc650dSSadaf Ebrahimi }
3091*22dc650dSSadaf Ebrahimi
3092*22dc650dSSadaf Ebrahimi static SLJIT_INLINE sljit_s32 emit_sse2_load(struct sljit_compiler *compiler,
3093*22dc650dSSadaf Ebrahimi sljit_s32 single, sljit_s32 dst, sljit_s32 src, sljit_sw srcw)
3094*22dc650dSSadaf Ebrahimi {
3095*22dc650dSSadaf Ebrahimi return emit_groupf(compiler, MOVSD_x_xm | (single ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2, dst, src, srcw);
3096*22dc650dSSadaf Ebrahimi }
3097*22dc650dSSadaf Ebrahimi
3098*22dc650dSSadaf Ebrahimi static SLJIT_INLINE sljit_s32 emit_sse2_store(struct sljit_compiler *compiler,
3099*22dc650dSSadaf Ebrahimi sljit_s32 single, sljit_s32 dst, sljit_sw dstw, sljit_s32 src)
3100*22dc650dSSadaf Ebrahimi {
3101*22dc650dSSadaf Ebrahimi return emit_groupf(compiler, MOVSD_xm_x | (single ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2, src, dst, dstw);
3102*22dc650dSSadaf Ebrahimi }
3103*22dc650dSSadaf Ebrahimi
3104*22dc650dSSadaf Ebrahimi static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op,
3105*22dc650dSSadaf Ebrahimi sljit_s32 dst, sljit_sw dstw,
3106*22dc650dSSadaf Ebrahimi sljit_s32 src, sljit_sw srcw)
3107*22dc650dSSadaf Ebrahimi {
3108*22dc650dSSadaf Ebrahimi sljit_s32 dst_r;
3109*22dc650dSSadaf Ebrahimi
3110*22dc650dSSadaf Ebrahimi CHECK_EXTRA_REGS(dst, dstw, (void)0);
3111*22dc650dSSadaf Ebrahimi dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
3112*22dc650dSSadaf Ebrahimi
3113*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3114*22dc650dSSadaf Ebrahimi if (GET_OPCODE(op) == SLJIT_CONV_SW_FROM_F64)
3115*22dc650dSSadaf Ebrahimi compiler->mode32 = 0;
3116*22dc650dSSadaf Ebrahimi #endif
3117*22dc650dSSadaf Ebrahimi
3118*22dc650dSSadaf Ebrahimi FAIL_IF(emit_groupf(compiler, CVTTSD2SI_r_xm | EX86_SELECT_F2_F3(op) | EX86_SSE2_OP2, dst_r, src, srcw));
3119*22dc650dSSadaf Ebrahimi
3120*22dc650dSSadaf Ebrahimi if (dst & SLJIT_MEM)
3121*22dc650dSSadaf Ebrahimi return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
3122*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
3123*22dc650dSSadaf Ebrahimi }
3124*22dc650dSSadaf Ebrahimi
3125*22dc650dSSadaf Ebrahimi static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op,
3126*22dc650dSSadaf Ebrahimi sljit_s32 dst, sljit_sw dstw,
3127*22dc650dSSadaf Ebrahimi sljit_s32 src, sljit_sw srcw)
3128*22dc650dSSadaf Ebrahimi {
3129*22dc650dSSadaf Ebrahimi sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG;
3130*22dc650dSSadaf Ebrahimi
3131*22dc650dSSadaf Ebrahimi CHECK_EXTRA_REGS(src, srcw, (void)0);
3132*22dc650dSSadaf Ebrahimi
3133*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3134*22dc650dSSadaf Ebrahimi if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_SW)
3135*22dc650dSSadaf Ebrahimi compiler->mode32 = 0;
3136*22dc650dSSadaf Ebrahimi #endif
3137*22dc650dSSadaf Ebrahimi
3138*22dc650dSSadaf Ebrahimi if (src == SLJIT_IMM) {
3139*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3140*22dc650dSSadaf Ebrahimi if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32)
3141*22dc650dSSadaf Ebrahimi srcw = (sljit_s32)srcw;
3142*22dc650dSSadaf Ebrahimi #endif
3143*22dc650dSSadaf Ebrahimi EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
3144*22dc650dSSadaf Ebrahimi src = TMP_REG1;
3145*22dc650dSSadaf Ebrahimi srcw = 0;
3146*22dc650dSSadaf Ebrahimi }
3147*22dc650dSSadaf Ebrahimi
3148*22dc650dSSadaf Ebrahimi FAIL_IF(emit_groupf(compiler, CVTSI2SD_x_rm | EX86_SELECT_F2_F3(op) | EX86_SSE2_OP1, dst_r, src, srcw));
3149*22dc650dSSadaf Ebrahimi
3150*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3151*22dc650dSSadaf Ebrahimi compiler->mode32 = 1;
3152*22dc650dSSadaf Ebrahimi #endif
3153*22dc650dSSadaf Ebrahimi if (dst_r == TMP_FREG)
3154*22dc650dSSadaf Ebrahimi return emit_sse2_store(compiler, op & SLJIT_32, dst, dstw, TMP_FREG);
3155*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
3156*22dc650dSSadaf Ebrahimi }
3157*22dc650dSSadaf Ebrahimi
3158*22dc650dSSadaf Ebrahimi static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op,
3159*22dc650dSSadaf Ebrahimi sljit_s32 src1, sljit_sw src1w,
3160*22dc650dSSadaf Ebrahimi sljit_s32 src2, sljit_sw src2w)
3161*22dc650dSSadaf Ebrahimi {
3162*22dc650dSSadaf Ebrahimi switch (GET_FLAG_TYPE(op)) {
3163*22dc650dSSadaf Ebrahimi case SLJIT_ORDERED_EQUAL:
3164*22dc650dSSadaf Ebrahimi /* Also: SLJIT_UNORDERED_OR_NOT_EQUAL */
3165*22dc650dSSadaf Ebrahimi FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src1, src1w));
3166*22dc650dSSadaf Ebrahimi FAIL_IF(emit_groupf(compiler, CMPS_x_xm | EX86_SELECT_F2_F3(op) | EX86_SSE2, TMP_FREG, src2, src2w));
3167*22dc650dSSadaf Ebrahimi
3168*22dc650dSSadaf Ebrahimi /* EQ */
3169*22dc650dSSadaf Ebrahimi FAIL_IF(emit_byte(compiler, 0));
3170*22dc650dSSadaf Ebrahimi
3171*22dc650dSSadaf Ebrahimi src1 = TMP_FREG;
3172*22dc650dSSadaf Ebrahimi src2 = TMP_FREG;
3173*22dc650dSSadaf Ebrahimi src2w = 0;
3174*22dc650dSSadaf Ebrahimi break;
3175*22dc650dSSadaf Ebrahimi
3176*22dc650dSSadaf Ebrahimi case SLJIT_ORDERED_LESS:
3177*22dc650dSSadaf Ebrahimi case SLJIT_UNORDERED_OR_GREATER:
3178*22dc650dSSadaf Ebrahimi /* Also: SLJIT_UNORDERED_OR_GREATER_EQUAL, SLJIT_ORDERED_LESS_EQUAL */
3179*22dc650dSSadaf Ebrahimi if (!FAST_IS_REG(src2)) {
3180*22dc650dSSadaf Ebrahimi FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src2, src2w));
3181*22dc650dSSadaf Ebrahimi src2 = TMP_FREG;
3182*22dc650dSSadaf Ebrahimi }
3183*22dc650dSSadaf Ebrahimi
3184*22dc650dSSadaf Ebrahimi return emit_groupf(compiler, UCOMISD_x_xm | EX86_SELECT_66(op) | EX86_SSE2, src2, src1, src1w);
3185*22dc650dSSadaf Ebrahimi }
3186*22dc650dSSadaf Ebrahimi
3187*22dc650dSSadaf Ebrahimi if (!FAST_IS_REG(src1)) {
3188*22dc650dSSadaf Ebrahimi FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src1, src1w));
3189*22dc650dSSadaf Ebrahimi src1 = TMP_FREG;
3190*22dc650dSSadaf Ebrahimi }
3191*22dc650dSSadaf Ebrahimi
3192*22dc650dSSadaf Ebrahimi return emit_groupf(compiler, UCOMISD_x_xm | EX86_SELECT_66(op) | EX86_SSE2, src1, src2, src2w);
3193*22dc650dSSadaf Ebrahimi }
3194*22dc650dSSadaf Ebrahimi
3195*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op,
3196*22dc650dSSadaf Ebrahimi sljit_s32 dst, sljit_sw dstw,
3197*22dc650dSSadaf Ebrahimi sljit_s32 src, sljit_sw srcw)
3198*22dc650dSSadaf Ebrahimi {
3199*22dc650dSSadaf Ebrahimi sljit_s32 dst_r;
3200*22dc650dSSadaf Ebrahimi sljit_u8 *inst;
3201*22dc650dSSadaf Ebrahimi
3202*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3203*22dc650dSSadaf Ebrahimi compiler->mode32 = 1;
3204*22dc650dSSadaf Ebrahimi #endif
3205*22dc650dSSadaf Ebrahimi
3206*22dc650dSSadaf Ebrahimi CHECK_ERROR();
3207*22dc650dSSadaf Ebrahimi SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw);
3208*22dc650dSSadaf Ebrahimi
3209*22dc650dSSadaf Ebrahimi if (GET_OPCODE(op) == SLJIT_MOV_F64) {
3210*22dc650dSSadaf Ebrahimi if (FAST_IS_REG(dst))
3211*22dc650dSSadaf Ebrahimi return emit_sse2_load(compiler, op & SLJIT_32, dst, src, srcw);
3212*22dc650dSSadaf Ebrahimi if (FAST_IS_REG(src))
3213*22dc650dSSadaf Ebrahimi return emit_sse2_store(compiler, op & SLJIT_32, dst, dstw, src);
3214*22dc650dSSadaf Ebrahimi FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src, srcw));
3215*22dc650dSSadaf Ebrahimi return emit_sse2_store(compiler, op & SLJIT_32, dst, dstw, TMP_FREG);
3216*22dc650dSSadaf Ebrahimi }
3217*22dc650dSSadaf Ebrahimi
3218*22dc650dSSadaf Ebrahimi if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_F32) {
3219*22dc650dSSadaf Ebrahimi dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG;
3220*22dc650dSSadaf Ebrahimi if (FAST_IS_REG(src)) {
3221*22dc650dSSadaf Ebrahimi /* We overwrite the high bits of source. From SLJIT point of view,
3222*22dc650dSSadaf Ebrahimi this is not an issue.
3223*22dc650dSSadaf Ebrahimi Note: In SSE3, we could also use MOVDDUP and MOVSLDUP. */
3224*22dc650dSSadaf Ebrahimi FAIL_IF(emit_groupf(compiler, UNPCKLPD_x_xm | ((op & SLJIT_32) ? EX86_PREF_66 : 0) | EX86_SSE2, src, src, 0));
3225*22dc650dSSadaf Ebrahimi } else {
3226*22dc650dSSadaf Ebrahimi FAIL_IF(emit_sse2_load(compiler, !(op & SLJIT_32), TMP_FREG, src, srcw));
3227*22dc650dSSadaf Ebrahimi src = TMP_FREG;
3228*22dc650dSSadaf Ebrahimi }
3229*22dc650dSSadaf Ebrahimi
3230*22dc650dSSadaf Ebrahimi FAIL_IF(emit_groupf(compiler, CVTPD2PS_x_xm | ((op & SLJIT_32) ? EX86_PREF_66 : 0) | EX86_SSE2, dst_r, src, 0));
3231*22dc650dSSadaf Ebrahimi if (dst_r == TMP_FREG)
3232*22dc650dSSadaf Ebrahimi return emit_sse2_store(compiler, op & SLJIT_32, dst, dstw, TMP_FREG);
3233*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
3234*22dc650dSSadaf Ebrahimi }
3235*22dc650dSSadaf Ebrahimi
3236*22dc650dSSadaf Ebrahimi if (FAST_IS_REG(dst)) {
3237*22dc650dSSadaf Ebrahimi dst_r = (dst == src) ? TMP_FREG : dst;
3238*22dc650dSSadaf Ebrahimi
3239*22dc650dSSadaf Ebrahimi if (src & SLJIT_MEM)
3240*22dc650dSSadaf Ebrahimi FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src, srcw));
3241*22dc650dSSadaf Ebrahimi
3242*22dc650dSSadaf Ebrahimi FAIL_IF(emit_groupf(compiler, PCMPEQD_x_xm | EX86_PREF_66 | EX86_SSE2, dst_r, dst_r, 0));
3243*22dc650dSSadaf Ebrahimi
3244*22dc650dSSadaf Ebrahimi inst = emit_x86_instruction(compiler, 2 | EX86_PREF_66 | EX86_SSE2_OP2, 0, 0, dst_r, 0);
3245*22dc650dSSadaf Ebrahimi inst[0] = GROUP_0F;
3246*22dc650dSSadaf Ebrahimi /* Same as PSRLD_x / PSRLQ_x */
3247*22dc650dSSadaf Ebrahimi inst[1] = (op & SLJIT_32) ? PSLLD_x_i8 : PSLLQ_x_i8;
3248*22dc650dSSadaf Ebrahimi
3249*22dc650dSSadaf Ebrahimi if (GET_OPCODE(op) == SLJIT_ABS_F64) {
3250*22dc650dSSadaf Ebrahimi inst[2] |= 2 << 3;
3251*22dc650dSSadaf Ebrahimi FAIL_IF(emit_byte(compiler, 1));
3252*22dc650dSSadaf Ebrahimi } else {
3253*22dc650dSSadaf Ebrahimi inst[2] |= 6 << 3;
3254*22dc650dSSadaf Ebrahimi FAIL_IF(emit_byte(compiler, ((op & SLJIT_32) ? 31 : 63)));
3255*22dc650dSSadaf Ebrahimi }
3256*22dc650dSSadaf Ebrahimi
3257*22dc650dSSadaf Ebrahimi if (dst_r != TMP_FREG)
3258*22dc650dSSadaf Ebrahimi dst_r = (src & SLJIT_MEM) ? TMP_FREG : src;
3259*22dc650dSSadaf Ebrahimi return emit_groupf(compiler, (GET_OPCODE(op) == SLJIT_NEG_F64 ? XORPD_x_xm : ANDPD_x_xm) | EX86_SSE2, dst, dst_r, 0);
3260*22dc650dSSadaf Ebrahimi }
3261*22dc650dSSadaf Ebrahimi
3262*22dc650dSSadaf Ebrahimi FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src, srcw));
3263*22dc650dSSadaf Ebrahimi
3264*22dc650dSSadaf Ebrahimi switch (GET_OPCODE(op)) {
3265*22dc650dSSadaf Ebrahimi case SLJIT_NEG_F64:
3266*22dc650dSSadaf Ebrahimi FAIL_IF(emit_groupf(compiler, XORPD_x_xm | EX86_SELECT_66(op) | EX86_SSE2, TMP_FREG, SLJIT_MEM0(), (sljit_sw)((op & SLJIT_32) ? sse2_buffer : sse2_buffer + 8)));
3267*22dc650dSSadaf Ebrahimi break;
3268*22dc650dSSadaf Ebrahimi
3269*22dc650dSSadaf Ebrahimi case SLJIT_ABS_F64:
3270*22dc650dSSadaf Ebrahimi FAIL_IF(emit_groupf(compiler, ANDPD_x_xm | EX86_SELECT_66(op) | EX86_SSE2, TMP_FREG, SLJIT_MEM0(), (sljit_sw)((op & SLJIT_32) ? sse2_buffer + 4 : sse2_buffer + 12)));
3271*22dc650dSSadaf Ebrahimi break;
3272*22dc650dSSadaf Ebrahimi }
3273*22dc650dSSadaf Ebrahimi
3274*22dc650dSSadaf Ebrahimi return emit_sse2_store(compiler, op & SLJIT_32, dst, dstw, TMP_FREG);
3275*22dc650dSSadaf Ebrahimi }
3276*22dc650dSSadaf Ebrahimi
3277*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op,
3278*22dc650dSSadaf Ebrahimi sljit_s32 dst, sljit_sw dstw,
3279*22dc650dSSadaf Ebrahimi sljit_s32 src1, sljit_sw src1w,
3280*22dc650dSSadaf Ebrahimi sljit_s32 src2, sljit_sw src2w)
3281*22dc650dSSadaf Ebrahimi {
3282*22dc650dSSadaf Ebrahimi sljit_s32 dst_r;
3283*22dc650dSSadaf Ebrahimi
3284*22dc650dSSadaf Ebrahimi CHECK_ERROR();
3285*22dc650dSSadaf Ebrahimi CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
3286*22dc650dSSadaf Ebrahimi ADJUST_LOCAL_OFFSET(dst, dstw);
3287*22dc650dSSadaf Ebrahimi ADJUST_LOCAL_OFFSET(src1, src1w);
3288*22dc650dSSadaf Ebrahimi ADJUST_LOCAL_OFFSET(src2, src2w);
3289*22dc650dSSadaf Ebrahimi
3290*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3291*22dc650dSSadaf Ebrahimi compiler->mode32 = 1;
3292*22dc650dSSadaf Ebrahimi #endif
3293*22dc650dSSadaf Ebrahimi
3294*22dc650dSSadaf Ebrahimi if (FAST_IS_REG(dst)) {
3295*22dc650dSSadaf Ebrahimi dst_r = dst;
3296*22dc650dSSadaf Ebrahimi if (dst == src1)
3297*22dc650dSSadaf Ebrahimi ; /* Do nothing here. */
3298*22dc650dSSadaf Ebrahimi else if (dst == src2 && (GET_OPCODE(op) == SLJIT_ADD_F64 || GET_OPCODE(op) == SLJIT_MUL_F64)) {
3299*22dc650dSSadaf Ebrahimi /* Swap arguments. */
3300*22dc650dSSadaf Ebrahimi src2 = src1;
3301*22dc650dSSadaf Ebrahimi src2w = src1w;
3302*22dc650dSSadaf Ebrahimi } else if (dst != src2)
3303*22dc650dSSadaf Ebrahimi FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, dst_r, src1, src1w));
3304*22dc650dSSadaf Ebrahimi else {
3305*22dc650dSSadaf Ebrahimi dst_r = TMP_FREG;
3306*22dc650dSSadaf Ebrahimi FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src1, src1w));
3307*22dc650dSSadaf Ebrahimi }
3308*22dc650dSSadaf Ebrahimi } else {
3309*22dc650dSSadaf Ebrahimi dst_r = TMP_FREG;
3310*22dc650dSSadaf Ebrahimi FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src1, src1w));
3311*22dc650dSSadaf Ebrahimi }
3312*22dc650dSSadaf Ebrahimi
3313*22dc650dSSadaf Ebrahimi switch (GET_OPCODE(op)) {
3314*22dc650dSSadaf Ebrahimi case SLJIT_ADD_F64:
3315*22dc650dSSadaf Ebrahimi FAIL_IF(emit_groupf(compiler, ADDSD_x_xm | EX86_SELECT_F2_F3(op) | EX86_SSE2, dst_r, src2, src2w));
3316*22dc650dSSadaf Ebrahimi break;
3317*22dc650dSSadaf Ebrahimi
3318*22dc650dSSadaf Ebrahimi case SLJIT_SUB_F64:
3319*22dc650dSSadaf Ebrahimi FAIL_IF(emit_groupf(compiler, SUBSD_x_xm | EX86_SELECT_F2_F3(op) | EX86_SSE2, dst_r, src2, src2w));
3320*22dc650dSSadaf Ebrahimi break;
3321*22dc650dSSadaf Ebrahimi
3322*22dc650dSSadaf Ebrahimi case SLJIT_MUL_F64:
3323*22dc650dSSadaf Ebrahimi FAIL_IF(emit_groupf(compiler, MULSD_x_xm | EX86_SELECT_F2_F3(op) | EX86_SSE2, dst_r, src2, src2w));
3324*22dc650dSSadaf Ebrahimi break;
3325*22dc650dSSadaf Ebrahimi
3326*22dc650dSSadaf Ebrahimi case SLJIT_DIV_F64:
3327*22dc650dSSadaf Ebrahimi FAIL_IF(emit_groupf(compiler, DIVSD_x_xm | EX86_SELECT_F2_F3(op) | EX86_SSE2, dst_r, src2, src2w));
3328*22dc650dSSadaf Ebrahimi break;
3329*22dc650dSSadaf Ebrahimi }
3330*22dc650dSSadaf Ebrahimi
3331*22dc650dSSadaf Ebrahimi if (dst_r != dst)
3332*22dc650dSSadaf Ebrahimi return emit_sse2_store(compiler, op & SLJIT_32, dst, dstw, TMP_FREG);
3333*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
3334*22dc650dSSadaf Ebrahimi }
3335*22dc650dSSadaf Ebrahimi
3336*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2r(struct sljit_compiler *compiler, sljit_s32 op,
3337*22dc650dSSadaf Ebrahimi sljit_s32 dst_freg,
3338*22dc650dSSadaf Ebrahimi sljit_s32 src1, sljit_sw src1w,
3339*22dc650dSSadaf Ebrahimi sljit_s32 src2, sljit_sw src2w)
3340*22dc650dSSadaf Ebrahimi {
3341*22dc650dSSadaf Ebrahimi sljit_uw pref;
3342*22dc650dSSadaf Ebrahimi
3343*22dc650dSSadaf Ebrahimi CHECK_ERROR();
3344*22dc650dSSadaf Ebrahimi CHECK(check_sljit_emit_fop2r(compiler, op, dst_freg, src1, src1w, src2, src2w));
3345*22dc650dSSadaf Ebrahimi ADJUST_LOCAL_OFFSET(src1, src1w);
3346*22dc650dSSadaf Ebrahimi ADJUST_LOCAL_OFFSET(src2, src2w);
3347*22dc650dSSadaf Ebrahimi
3348*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3349*22dc650dSSadaf Ebrahimi compiler->mode32 = 1;
3350*22dc650dSSadaf Ebrahimi #endif
3351*22dc650dSSadaf Ebrahimi
3352*22dc650dSSadaf Ebrahimi if (dst_freg == src1) {
3353*22dc650dSSadaf Ebrahimi FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src2, src2w));
3354*22dc650dSSadaf Ebrahimi pref = EX86_SELECT_66(op) | EX86_SSE2;
3355*22dc650dSSadaf Ebrahimi FAIL_IF(emit_groupf(compiler, XORPD_x_xm | pref, TMP_FREG, src1, src1w));
3356*22dc650dSSadaf Ebrahimi FAIL_IF(emit_groupf(compiler, ANDPD_x_xm | pref, TMP_FREG, SLJIT_MEM0(), (sljit_sw)((op & SLJIT_32) ? sse2_buffer : sse2_buffer + 8)));
3357*22dc650dSSadaf Ebrahimi return emit_groupf(compiler, XORPD_x_xm | pref, dst_freg, TMP_FREG, 0);
3358*22dc650dSSadaf Ebrahimi }
3359*22dc650dSSadaf Ebrahimi
3360*22dc650dSSadaf Ebrahimi if (src1 & SLJIT_MEM) {
3361*22dc650dSSadaf Ebrahimi FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src1, src1w));
3362*22dc650dSSadaf Ebrahimi src1 = TMP_FREG;
3363*22dc650dSSadaf Ebrahimi src1w = 0;
3364*22dc650dSSadaf Ebrahimi }
3365*22dc650dSSadaf Ebrahimi
3366*22dc650dSSadaf Ebrahimi if (dst_freg != src2)
3367*22dc650dSSadaf Ebrahimi FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, dst_freg, src2, src2w));
3368*22dc650dSSadaf Ebrahimi
3369*22dc650dSSadaf Ebrahimi pref = EX86_SELECT_66(op) | EX86_SSE2;
3370*22dc650dSSadaf Ebrahimi FAIL_IF(emit_groupf(compiler, XORPD_x_xm | pref, dst_freg, src1, src1w));
3371*22dc650dSSadaf Ebrahimi FAIL_IF(emit_groupf(compiler, ANDPD_x_xm | pref, dst_freg, SLJIT_MEM0(), (sljit_sw)((op & SLJIT_32) ? sse2_buffer : sse2_buffer + 8)));
3372*22dc650dSSadaf Ebrahimi return emit_groupf(compiler, XORPD_x_xm | pref, dst_freg, src1, src1w);
3373*22dc650dSSadaf Ebrahimi }
3374*22dc650dSSadaf Ebrahimi
3375*22dc650dSSadaf Ebrahimi /* --------------------------------------------------------------------- */
3376*22dc650dSSadaf Ebrahimi /* Conditional instructions */
3377*22dc650dSSadaf Ebrahimi /* --------------------------------------------------------------------- */
3378*22dc650dSSadaf Ebrahimi
3379*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)
3380*22dc650dSSadaf Ebrahimi {
3381*22dc650dSSadaf Ebrahimi sljit_u8 *inst;
3382*22dc650dSSadaf Ebrahimi struct sljit_label *label;
3383*22dc650dSSadaf Ebrahimi
3384*22dc650dSSadaf Ebrahimi CHECK_ERROR_PTR();
3385*22dc650dSSadaf Ebrahimi CHECK_PTR(check_sljit_emit_label(compiler));
3386*22dc650dSSadaf Ebrahimi
3387*22dc650dSSadaf Ebrahimi if (compiler->last_label && compiler->last_label->size == compiler->size)
3388*22dc650dSSadaf Ebrahimi return compiler->last_label;
3389*22dc650dSSadaf Ebrahimi
3390*22dc650dSSadaf Ebrahimi label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));
3391*22dc650dSSadaf Ebrahimi PTR_FAIL_IF(!label);
3392*22dc650dSSadaf Ebrahimi set_label(label, compiler);
3393*22dc650dSSadaf Ebrahimi
3394*22dc650dSSadaf Ebrahimi inst = (sljit_u8*)ensure_buf(compiler, 1);
3395*22dc650dSSadaf Ebrahimi PTR_FAIL_IF(!inst);
3396*22dc650dSSadaf Ebrahimi inst[0] = SLJIT_INST_LABEL;
3397*22dc650dSSadaf Ebrahimi
3398*22dc650dSSadaf Ebrahimi return label;
3399*22dc650dSSadaf Ebrahimi }
3400*22dc650dSSadaf Ebrahimi
3401*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type)
3402*22dc650dSSadaf Ebrahimi {
3403*22dc650dSSadaf Ebrahimi sljit_u8 *inst;
3404*22dc650dSSadaf Ebrahimi struct sljit_jump *jump;
3405*22dc650dSSadaf Ebrahimi
3406*22dc650dSSadaf Ebrahimi CHECK_ERROR_PTR();
3407*22dc650dSSadaf Ebrahimi CHECK_PTR(check_sljit_emit_jump(compiler, type));
3408*22dc650dSSadaf Ebrahimi
3409*22dc650dSSadaf Ebrahimi jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
3410*22dc650dSSadaf Ebrahimi PTR_FAIL_IF_NULL(jump);
3411*22dc650dSSadaf Ebrahimi set_jump(jump, compiler, (sljit_u32)((type & SLJIT_REWRITABLE_JUMP) | ((type & 0xff) << TYPE_SHIFT)));
3412*22dc650dSSadaf Ebrahimi type &= 0xff;
3413*22dc650dSSadaf Ebrahimi
3414*22dc650dSSadaf Ebrahimi jump->addr = compiler->size;
3415*22dc650dSSadaf Ebrahimi /* Worst case size. */
3416*22dc650dSSadaf Ebrahimi compiler->size += (type >= SLJIT_JUMP) ? JUMP_MAX_SIZE : CJUMP_MAX_SIZE;
3417*22dc650dSSadaf Ebrahimi inst = (sljit_u8*)ensure_buf(compiler, 1);
3418*22dc650dSSadaf Ebrahimi PTR_FAIL_IF_NULL(inst);
3419*22dc650dSSadaf Ebrahimi
3420*22dc650dSSadaf Ebrahimi inst[0] = SLJIT_INST_JUMP;
3421*22dc650dSSadaf Ebrahimi return jump;
3422*22dc650dSSadaf Ebrahimi }
3423*22dc650dSSadaf Ebrahimi
3424*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw)
3425*22dc650dSSadaf Ebrahimi {
3426*22dc650dSSadaf Ebrahimi sljit_u8 *inst;
3427*22dc650dSSadaf Ebrahimi struct sljit_jump *jump;
3428*22dc650dSSadaf Ebrahimi
3429*22dc650dSSadaf Ebrahimi CHECK_ERROR();
3430*22dc650dSSadaf Ebrahimi CHECK(check_sljit_emit_ijump(compiler, type, src, srcw));
3431*22dc650dSSadaf Ebrahimi ADJUST_LOCAL_OFFSET(src, srcw);
3432*22dc650dSSadaf Ebrahimi
3433*22dc650dSSadaf Ebrahimi CHECK_EXTRA_REGS(src, srcw, (void)0);
3434*22dc650dSSadaf Ebrahimi
3435*22dc650dSSadaf Ebrahimi if (src == SLJIT_IMM) {
3436*22dc650dSSadaf Ebrahimi jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
3437*22dc650dSSadaf Ebrahimi FAIL_IF_NULL(jump);
3438*22dc650dSSadaf Ebrahimi set_jump(jump, compiler, (sljit_u32)(JUMP_ADDR | (type << TYPE_SHIFT)));
3439*22dc650dSSadaf Ebrahimi jump->u.target = (sljit_uw)srcw;
3440*22dc650dSSadaf Ebrahimi
3441*22dc650dSSadaf Ebrahimi jump->addr = compiler->size;
3442*22dc650dSSadaf Ebrahimi /* Worst case size. */
3443*22dc650dSSadaf Ebrahimi compiler->size += JUMP_MAX_SIZE;
3444*22dc650dSSadaf Ebrahimi inst = (sljit_u8*)ensure_buf(compiler, 1);
3445*22dc650dSSadaf Ebrahimi FAIL_IF_NULL(inst);
3446*22dc650dSSadaf Ebrahimi
3447*22dc650dSSadaf Ebrahimi inst[0] = SLJIT_INST_JUMP;
3448*22dc650dSSadaf Ebrahimi } else {
3449*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3450*22dc650dSSadaf Ebrahimi /* REX_W is not necessary (src is not immediate). */
3451*22dc650dSSadaf Ebrahimi compiler->mode32 = 1;
3452*22dc650dSSadaf Ebrahimi #endif
3453*22dc650dSSadaf Ebrahimi inst = emit_x86_instruction(compiler, 1, 0, 0, src, srcw);
3454*22dc650dSSadaf Ebrahimi FAIL_IF(!inst);
3455*22dc650dSSadaf Ebrahimi inst[0] = GROUP_FF;
3456*22dc650dSSadaf Ebrahimi inst[1] = U8(inst[1] | ((type >= SLJIT_FAST_CALL) ? CALL_rm : JMP_rm));
3457*22dc650dSSadaf Ebrahimi }
3458*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
3459*22dc650dSSadaf Ebrahimi }
3460*22dc650dSSadaf Ebrahimi
3461*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op,
3462*22dc650dSSadaf Ebrahimi sljit_s32 dst, sljit_sw dstw,
3463*22dc650dSSadaf Ebrahimi sljit_s32 type)
3464*22dc650dSSadaf Ebrahimi {
3465*22dc650dSSadaf Ebrahimi sljit_u8 *inst;
3466*22dc650dSSadaf Ebrahimi sljit_u8 cond_set;
3467*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3468*22dc650dSSadaf Ebrahimi sljit_s32 reg;
3469*22dc650dSSadaf Ebrahimi #endif /* !SLJIT_CONFIG_X86_64 */
3470*22dc650dSSadaf Ebrahimi /* ADJUST_LOCAL_OFFSET and CHECK_EXTRA_REGS might overwrite these values. */
3471*22dc650dSSadaf Ebrahimi sljit_s32 dst_save = dst;
3472*22dc650dSSadaf Ebrahimi sljit_sw dstw_save = dstw;
3473*22dc650dSSadaf Ebrahimi
3474*22dc650dSSadaf Ebrahimi CHECK_ERROR();
3475*22dc650dSSadaf Ebrahimi CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type));
3476*22dc650dSSadaf Ebrahimi
3477*22dc650dSSadaf Ebrahimi ADJUST_LOCAL_OFFSET(dst, dstw);
3478*22dc650dSSadaf Ebrahimi CHECK_EXTRA_REGS(dst, dstw, (void)0);
3479*22dc650dSSadaf Ebrahimi
3480*22dc650dSSadaf Ebrahimi /* setcc = jcc + 0x10. */
3481*22dc650dSSadaf Ebrahimi cond_set = U8(get_jump_code((sljit_uw)type) + 0x10);
3482*22dc650dSSadaf Ebrahimi
3483*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3484*22dc650dSSadaf Ebrahimi if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && FAST_IS_REG(dst)) {
3485*22dc650dSSadaf Ebrahimi inst = (sljit_u8*)ensure_buf(compiler, 1 + 4 + 3);
3486*22dc650dSSadaf Ebrahimi FAIL_IF(!inst);
3487*22dc650dSSadaf Ebrahimi INC_SIZE(4 + 3);
3488*22dc650dSSadaf Ebrahimi /* Set low register to conditional flag. */
3489*22dc650dSSadaf Ebrahimi inst[0] = (reg_map[TMP_REG1] <= 7) ? REX : REX_B;
3490*22dc650dSSadaf Ebrahimi inst[1] = GROUP_0F;
3491*22dc650dSSadaf Ebrahimi inst[2] = cond_set;
3492*22dc650dSSadaf Ebrahimi inst[3] = MOD_REG | reg_lmap[TMP_REG1];
3493*22dc650dSSadaf Ebrahimi inst[4] = U8(REX | (reg_map[TMP_REG1] <= 7 ? 0 : REX_R) | (reg_map[dst] <= 7 ? 0 : REX_B));
3494*22dc650dSSadaf Ebrahimi inst[5] = OR_rm8_r8;
3495*22dc650dSSadaf Ebrahimi inst[6] = U8(MOD_REG | (reg_lmap[TMP_REG1] << 3) | reg_lmap[dst]);
3496*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
3497*22dc650dSSadaf Ebrahimi }
3498*22dc650dSSadaf Ebrahimi
3499*22dc650dSSadaf Ebrahimi reg = (GET_OPCODE(op) < SLJIT_ADD && FAST_IS_REG(dst)) ? dst : TMP_REG1;
3500*22dc650dSSadaf Ebrahimi
3501*22dc650dSSadaf Ebrahimi inst = (sljit_u8*)ensure_buf(compiler, 1 + 4 + 4);
3502*22dc650dSSadaf Ebrahimi FAIL_IF(!inst);
3503*22dc650dSSadaf Ebrahimi INC_SIZE(4 + 4);
3504*22dc650dSSadaf Ebrahimi /* Set low register to conditional flag. */
3505*22dc650dSSadaf Ebrahimi inst[0] = (reg_map[reg] <= 7) ? REX : REX_B;
3506*22dc650dSSadaf Ebrahimi inst[1] = GROUP_0F;
3507*22dc650dSSadaf Ebrahimi inst[2] = cond_set;
3508*22dc650dSSadaf Ebrahimi inst[3] = MOD_REG | reg_lmap[reg];
3509*22dc650dSSadaf Ebrahimi inst[4] = REX_W | (reg_map[reg] <= 7 ? 0 : (REX_B | REX_R));
3510*22dc650dSSadaf Ebrahimi /* The movzx instruction does not affect flags. */
3511*22dc650dSSadaf Ebrahimi inst[5] = GROUP_0F;
3512*22dc650dSSadaf Ebrahimi inst[6] = MOVZX_r_rm8;
3513*22dc650dSSadaf Ebrahimi inst[7] = U8(MOD_REG | (reg_lmap[reg] << 3) | reg_lmap[reg]);
3514*22dc650dSSadaf Ebrahimi
3515*22dc650dSSadaf Ebrahimi if (reg != TMP_REG1)
3516*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
3517*22dc650dSSadaf Ebrahimi
3518*22dc650dSSadaf Ebrahimi if (GET_OPCODE(op) < SLJIT_ADD) {
3519*22dc650dSSadaf Ebrahimi compiler->mode32 = GET_OPCODE(op) != SLJIT_MOV;
3520*22dc650dSSadaf Ebrahimi return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
3521*22dc650dSSadaf Ebrahimi }
3522*22dc650dSSadaf Ebrahimi
3523*22dc650dSSadaf Ebrahimi SLJIT_SKIP_CHECKS(compiler);
3524*22dc650dSSadaf Ebrahimi return sljit_emit_op2(compiler, op, dst_save, dstw_save, dst_save, dstw_save, TMP_REG1, 0);
3525*22dc650dSSadaf Ebrahimi
3526*22dc650dSSadaf Ebrahimi #else /* !SLJIT_CONFIG_X86_64 */
3527*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(reg_map[TMP_REG1] < 4);
3528*22dc650dSSadaf Ebrahimi
3529*22dc650dSSadaf Ebrahimi /* The SLJIT_CONFIG_X86_32 code path starts here. */
3530*22dc650dSSadaf Ebrahimi if (GET_OPCODE(op) < SLJIT_ADD && FAST_IS_REG(dst) && reg_map[dst] <= 4) {
3531*22dc650dSSadaf Ebrahimi /* Low byte is accessible. */
3532*22dc650dSSadaf Ebrahimi inst = (sljit_u8*)ensure_buf(compiler, 1 + 3 + 3);
3533*22dc650dSSadaf Ebrahimi FAIL_IF(!inst);
3534*22dc650dSSadaf Ebrahimi INC_SIZE(3 + 3);
3535*22dc650dSSadaf Ebrahimi /* Set low byte to conditional flag. */
3536*22dc650dSSadaf Ebrahimi inst[0] = GROUP_0F;
3537*22dc650dSSadaf Ebrahimi inst[1] = cond_set;
3538*22dc650dSSadaf Ebrahimi inst[2] = U8(MOD_REG | reg_map[dst]);
3539*22dc650dSSadaf Ebrahimi
3540*22dc650dSSadaf Ebrahimi inst[3] = GROUP_0F;
3541*22dc650dSSadaf Ebrahimi inst[4] = MOVZX_r_rm8;
3542*22dc650dSSadaf Ebrahimi inst[5] = U8(MOD_REG | (reg_map[dst] << 3) | reg_map[dst]);
3543*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
3544*22dc650dSSadaf Ebrahimi }
3545*22dc650dSSadaf Ebrahimi
3546*22dc650dSSadaf Ebrahimi if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && FAST_IS_REG(dst) && reg_map[dst] <= 4) {
3547*22dc650dSSadaf Ebrahimi inst = (sljit_u8*)ensure_buf(compiler, 1 + 3 + 2);
3548*22dc650dSSadaf Ebrahimi FAIL_IF(!inst);
3549*22dc650dSSadaf Ebrahimi INC_SIZE(3 + 2);
3550*22dc650dSSadaf Ebrahimi
3551*22dc650dSSadaf Ebrahimi /* Set low byte to conditional flag. */
3552*22dc650dSSadaf Ebrahimi inst[0] = GROUP_0F;
3553*22dc650dSSadaf Ebrahimi inst[1] = cond_set;
3554*22dc650dSSadaf Ebrahimi inst[2] = U8(MOD_REG | reg_map[TMP_REG1]);
3555*22dc650dSSadaf Ebrahimi
3556*22dc650dSSadaf Ebrahimi inst[3] = OR_rm8_r8;
3557*22dc650dSSadaf Ebrahimi inst[4] = U8(MOD_REG | (reg_map[TMP_REG1] << 3) | reg_map[dst]);
3558*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
3559*22dc650dSSadaf Ebrahimi }
3560*22dc650dSSadaf Ebrahimi
3561*22dc650dSSadaf Ebrahimi inst = (sljit_u8*)ensure_buf(compiler, 1 + 3 + 3);
3562*22dc650dSSadaf Ebrahimi FAIL_IF(!inst);
3563*22dc650dSSadaf Ebrahimi INC_SIZE(3 + 3);
3564*22dc650dSSadaf Ebrahimi /* Set low byte to conditional flag. */
3565*22dc650dSSadaf Ebrahimi inst[0] = GROUP_0F;
3566*22dc650dSSadaf Ebrahimi inst[1] = cond_set;
3567*22dc650dSSadaf Ebrahimi inst[2] = U8(MOD_REG | reg_map[TMP_REG1]);
3568*22dc650dSSadaf Ebrahimi
3569*22dc650dSSadaf Ebrahimi inst[3] = GROUP_0F;
3570*22dc650dSSadaf Ebrahimi inst[4] = MOVZX_r_rm8;
3571*22dc650dSSadaf Ebrahimi inst[5] = U8(MOD_REG | (reg_map[TMP_REG1] << 3) | reg_map[TMP_REG1]);
3572*22dc650dSSadaf Ebrahimi
3573*22dc650dSSadaf Ebrahimi if (GET_OPCODE(op) < SLJIT_ADD)
3574*22dc650dSSadaf Ebrahimi return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
3575*22dc650dSSadaf Ebrahimi
3576*22dc650dSSadaf Ebrahimi SLJIT_SKIP_CHECKS(compiler);
3577*22dc650dSSadaf Ebrahimi return sljit_emit_op2(compiler, op, dst_save, dstw_save, dst_save, dstw_save, TMP_REG1, 0);
3578*22dc650dSSadaf Ebrahimi #endif /* SLJIT_CONFIG_X86_64 */
3579*22dc650dSSadaf Ebrahimi }
3580*22dc650dSSadaf Ebrahimi
3581*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fselect(struct sljit_compiler *compiler, sljit_s32 type,
3582*22dc650dSSadaf Ebrahimi sljit_s32 dst_freg,
3583*22dc650dSSadaf Ebrahimi sljit_s32 src1, sljit_sw src1w,
3584*22dc650dSSadaf Ebrahimi sljit_s32 src2_freg)
3585*22dc650dSSadaf Ebrahimi {
3586*22dc650dSSadaf Ebrahimi sljit_u8* inst;
3587*22dc650dSSadaf Ebrahimi sljit_uw size;
3588*22dc650dSSadaf Ebrahimi
3589*22dc650dSSadaf Ebrahimi CHECK_ERROR();
3590*22dc650dSSadaf Ebrahimi CHECK(check_sljit_emit_fselect(compiler, type, dst_freg, src1, src1w, src2_freg));
3591*22dc650dSSadaf Ebrahimi
3592*22dc650dSSadaf Ebrahimi ADJUST_LOCAL_OFFSET(src1, src1w);
3593*22dc650dSSadaf Ebrahimi
3594*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3595*22dc650dSSadaf Ebrahimi compiler->mode32 = 1;
3596*22dc650dSSadaf Ebrahimi #endif /* SLJIT_CONFIG_X86_64 */
3597*22dc650dSSadaf Ebrahimi
3598*22dc650dSSadaf Ebrahimi if (dst_freg != src2_freg) {
3599*22dc650dSSadaf Ebrahimi if (dst_freg == src1) {
3600*22dc650dSSadaf Ebrahimi src1 = src2_freg;
3601*22dc650dSSadaf Ebrahimi src1w = 0;
3602*22dc650dSSadaf Ebrahimi type ^= 0x1;
3603*22dc650dSSadaf Ebrahimi } else
3604*22dc650dSSadaf Ebrahimi FAIL_IF(emit_sse2_load(compiler, type & SLJIT_32, dst_freg, src2_freg, 0));
3605*22dc650dSSadaf Ebrahimi }
3606*22dc650dSSadaf Ebrahimi
3607*22dc650dSSadaf Ebrahimi inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
3608*22dc650dSSadaf Ebrahimi FAIL_IF(!inst);
3609*22dc650dSSadaf Ebrahimi INC_SIZE(2);
3610*22dc650dSSadaf Ebrahimi inst[0] = U8(get_jump_code((sljit_uw)(type & ~SLJIT_32) ^ 0x1) - 0x10);
3611*22dc650dSSadaf Ebrahimi
3612*22dc650dSSadaf Ebrahimi size = compiler->size;
3613*22dc650dSSadaf Ebrahimi FAIL_IF(emit_sse2_load(compiler, type & SLJIT_32, dst_freg, src1, src1w));
3614*22dc650dSSadaf Ebrahimi
3615*22dc650dSSadaf Ebrahimi inst[1] = U8(compiler->size - size);
3616*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
3617*22dc650dSSadaf Ebrahimi }
3618*22dc650dSSadaf Ebrahimi
3619*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_mov(struct sljit_compiler *compiler, sljit_s32 type,
3620*22dc650dSSadaf Ebrahimi sljit_s32 freg,
3621*22dc650dSSadaf Ebrahimi sljit_s32 srcdst, sljit_sw srcdstw)
3622*22dc650dSSadaf Ebrahimi {
3623*22dc650dSSadaf Ebrahimi sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
3624*22dc650dSSadaf Ebrahimi sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
3625*22dc650dSSadaf Ebrahimi sljit_s32 alignment = SLJIT_SIMD_GET_ELEM2_SIZE(type);
3626*22dc650dSSadaf Ebrahimi sljit_uw op;
3627*22dc650dSSadaf Ebrahimi
3628*22dc650dSSadaf Ebrahimi CHECK_ERROR();
3629*22dc650dSSadaf Ebrahimi CHECK(check_sljit_emit_simd_mov(compiler, type, freg, srcdst, srcdstw));
3630*22dc650dSSadaf Ebrahimi
3631*22dc650dSSadaf Ebrahimi ADJUST_LOCAL_OFFSET(srcdst, srcdstw);
3632*22dc650dSSadaf Ebrahimi
3633*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3634*22dc650dSSadaf Ebrahimi compiler->mode32 = 1;
3635*22dc650dSSadaf Ebrahimi #endif /* SLJIT_CONFIG_X86_64 */
3636*22dc650dSSadaf Ebrahimi
3637*22dc650dSSadaf Ebrahimi switch (reg_size) {
3638*22dc650dSSadaf Ebrahimi case 4:
3639*22dc650dSSadaf Ebrahimi op = EX86_SSE2;
3640*22dc650dSSadaf Ebrahimi break;
3641*22dc650dSSadaf Ebrahimi case 5:
3642*22dc650dSSadaf Ebrahimi if (!(cpu_feature_list & CPU_FEATURE_AVX2))
3643*22dc650dSSadaf Ebrahimi return SLJIT_ERR_UNSUPPORTED;
3644*22dc650dSSadaf Ebrahimi op = EX86_SSE2 | VEX_256;
3645*22dc650dSSadaf Ebrahimi break;
3646*22dc650dSSadaf Ebrahimi default:
3647*22dc650dSSadaf Ebrahimi return SLJIT_ERR_UNSUPPORTED;
3648*22dc650dSSadaf Ebrahimi }
3649*22dc650dSSadaf Ebrahimi
3650*22dc650dSSadaf Ebrahimi if (!(srcdst & SLJIT_MEM))
3651*22dc650dSSadaf Ebrahimi alignment = reg_size;
3652*22dc650dSSadaf Ebrahimi
3653*22dc650dSSadaf Ebrahimi if (type & SLJIT_SIMD_FLOAT) {
3654*22dc650dSSadaf Ebrahimi if (elem_size == 2 || elem_size == 3) {
3655*22dc650dSSadaf Ebrahimi op |= alignment >= reg_size ? MOVAPS_x_xm : MOVUPS_x_xm;
3656*22dc650dSSadaf Ebrahimi
3657*22dc650dSSadaf Ebrahimi if (elem_size == 3)
3658*22dc650dSSadaf Ebrahimi op |= EX86_PREF_66;
3659*22dc650dSSadaf Ebrahimi
3660*22dc650dSSadaf Ebrahimi if (type & SLJIT_SIMD_STORE)
3661*22dc650dSSadaf Ebrahimi op += 1;
3662*22dc650dSSadaf Ebrahimi } else
3663*22dc650dSSadaf Ebrahimi return SLJIT_ERR_UNSUPPORTED;
3664*22dc650dSSadaf Ebrahimi } else {
3665*22dc650dSSadaf Ebrahimi op |= ((type & SLJIT_SIMD_STORE) ? MOVDQA_xm_x : MOVDQA_x_xm)
3666*22dc650dSSadaf Ebrahimi | (alignment >= reg_size ? EX86_PREF_66 : EX86_PREF_F3);
3667*22dc650dSSadaf Ebrahimi }
3668*22dc650dSSadaf Ebrahimi
3669*22dc650dSSadaf Ebrahimi if (type & SLJIT_SIMD_TEST)
3670*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
3671*22dc650dSSadaf Ebrahimi
3672*22dc650dSSadaf Ebrahimi if ((op & VEX_256) || ((cpu_feature_list & CPU_FEATURE_AVX) && (compiler->options & SLJIT_ENTER_USE_VEX)))
3673*22dc650dSSadaf Ebrahimi return emit_vex_instruction(compiler, op, freg, 0, srcdst, srcdstw);
3674*22dc650dSSadaf Ebrahimi
3675*22dc650dSSadaf Ebrahimi return emit_groupf(compiler, op, freg, srcdst, srcdstw);
3676*22dc650dSSadaf Ebrahimi }
3677*22dc650dSSadaf Ebrahimi
3678*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_replicate(struct sljit_compiler *compiler, sljit_s32 type,
3679*22dc650dSSadaf Ebrahimi sljit_s32 freg,
3680*22dc650dSSadaf Ebrahimi sljit_s32 src, sljit_sw srcw)
3681*22dc650dSSadaf Ebrahimi {
3682*22dc650dSSadaf Ebrahimi sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
3683*22dc650dSSadaf Ebrahimi sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
3684*22dc650dSSadaf Ebrahimi sljit_s32 use_vex = (cpu_feature_list & CPU_FEATURE_AVX) && (compiler->options & SLJIT_ENTER_USE_VEX);
3685*22dc650dSSadaf Ebrahimi sljit_u8 *inst;
3686*22dc650dSSadaf Ebrahimi sljit_u8 opcode = 0;
3687*22dc650dSSadaf Ebrahimi sljit_uw op;
3688*22dc650dSSadaf Ebrahimi
3689*22dc650dSSadaf Ebrahimi CHECK_ERROR();
3690*22dc650dSSadaf Ebrahimi CHECK(check_sljit_emit_simd_replicate(compiler, type, freg, src, srcw));
3691*22dc650dSSadaf Ebrahimi
3692*22dc650dSSadaf Ebrahimi ADJUST_LOCAL_OFFSET(src, srcw);
3693*22dc650dSSadaf Ebrahimi
3694*22dc650dSSadaf Ebrahimi if (!(type & SLJIT_SIMD_FLOAT)) {
3695*22dc650dSSadaf Ebrahimi CHECK_EXTRA_REGS(src, srcw, (void)0);
3696*22dc650dSSadaf Ebrahimi }
3697*22dc650dSSadaf Ebrahimi
3698*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
3699*22dc650dSSadaf Ebrahimi if ((type & SLJIT_SIMD_FLOAT) ? (elem_size < 2 || elem_size > 3) : (elem_size > 2))
3700*22dc650dSSadaf Ebrahimi return SLJIT_ERR_UNSUPPORTED;
3701*22dc650dSSadaf Ebrahimi #else /* !SLJIT_CONFIG_X86_32 */
3702*22dc650dSSadaf Ebrahimi compiler->mode32 = 1;
3703*22dc650dSSadaf Ebrahimi
3704*22dc650dSSadaf Ebrahimi if (elem_size > 3 || ((type & SLJIT_SIMD_FLOAT) && elem_size < 2))
3705*22dc650dSSadaf Ebrahimi return SLJIT_ERR_UNSUPPORTED;
3706*22dc650dSSadaf Ebrahimi #endif /* SLJIT_CONFIG_X86_32 */
3707*22dc650dSSadaf Ebrahimi
3708*22dc650dSSadaf Ebrahimi if (reg_size != 4 && (reg_size != 5 || !(cpu_feature_list & CPU_FEATURE_AVX2)))
3709*22dc650dSSadaf Ebrahimi return SLJIT_ERR_UNSUPPORTED;
3710*22dc650dSSadaf Ebrahimi
3711*22dc650dSSadaf Ebrahimi if (type & SLJIT_SIMD_TEST)
3712*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
3713*22dc650dSSadaf Ebrahimi
3714*22dc650dSSadaf Ebrahimi if (reg_size == 5)
3715*22dc650dSSadaf Ebrahimi use_vex = 1;
3716*22dc650dSSadaf Ebrahimi
3717*22dc650dSSadaf Ebrahimi if (use_vex && src != SLJIT_IMM) {
3718*22dc650dSSadaf Ebrahimi op = 0;
3719*22dc650dSSadaf Ebrahimi
3720*22dc650dSSadaf Ebrahimi switch (elem_size) {
3721*22dc650dSSadaf Ebrahimi case 0:
3722*22dc650dSSadaf Ebrahimi if (cpu_feature_list & CPU_FEATURE_AVX2)
3723*22dc650dSSadaf Ebrahimi op = VPBROADCASTB_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2;
3724*22dc650dSSadaf Ebrahimi break;
3725*22dc650dSSadaf Ebrahimi case 1:
3726*22dc650dSSadaf Ebrahimi if (cpu_feature_list & CPU_FEATURE_AVX2)
3727*22dc650dSSadaf Ebrahimi op = VPBROADCASTW_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2;
3728*22dc650dSSadaf Ebrahimi break;
3729*22dc650dSSadaf Ebrahimi case 2:
3730*22dc650dSSadaf Ebrahimi if (type & SLJIT_SIMD_FLOAT) {
3731*22dc650dSSadaf Ebrahimi if ((cpu_feature_list & CPU_FEATURE_AVX2) || ((cpu_feature_list & CPU_FEATURE_AVX) && (src & SLJIT_MEM)))
3732*22dc650dSSadaf Ebrahimi op = VBROADCASTSS_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2;
3733*22dc650dSSadaf Ebrahimi } else if (cpu_feature_list & CPU_FEATURE_AVX2)
3734*22dc650dSSadaf Ebrahimi op = VPBROADCASTD_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2;
3735*22dc650dSSadaf Ebrahimi break;
3736*22dc650dSSadaf Ebrahimi default:
3737*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3738*22dc650dSSadaf Ebrahimi if (!(type & SLJIT_SIMD_FLOAT)) {
3739*22dc650dSSadaf Ebrahimi if (cpu_feature_list & CPU_FEATURE_AVX2)
3740*22dc650dSSadaf Ebrahimi op = VPBROADCASTQ_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2;
3741*22dc650dSSadaf Ebrahimi break;
3742*22dc650dSSadaf Ebrahimi }
3743*22dc650dSSadaf Ebrahimi #endif /* SLJIT_CONFIG_X86_64 */
3744*22dc650dSSadaf Ebrahimi
3745*22dc650dSSadaf Ebrahimi if (reg_size == 5)
3746*22dc650dSSadaf Ebrahimi op = VBROADCASTSD_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2;
3747*22dc650dSSadaf Ebrahimi break;
3748*22dc650dSSadaf Ebrahimi }
3749*22dc650dSSadaf Ebrahimi
3750*22dc650dSSadaf Ebrahimi if (op != 0) {
3751*22dc650dSSadaf Ebrahimi if (!(src & SLJIT_MEM) && !(type & SLJIT_SIMD_FLOAT)) {
3752*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3753*22dc650dSSadaf Ebrahimi if (elem_size >= 3)
3754*22dc650dSSadaf Ebrahimi compiler->mode32 = 0;
3755*22dc650dSSadaf Ebrahimi #endif /* SLJIT_CONFIG_X86_64 */
3756*22dc650dSSadaf Ebrahimi FAIL_IF(emit_vex_instruction(compiler, MOVD_x_rm | VEX_AUTO_W | EX86_PREF_66 | EX86_SSE2_OP1, freg, 0, src, srcw));
3757*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3758*22dc650dSSadaf Ebrahimi compiler->mode32 = 1;
3759*22dc650dSSadaf Ebrahimi #endif /* SLJIT_CONFIG_X86_64 */
3760*22dc650dSSadaf Ebrahimi src = freg;
3761*22dc650dSSadaf Ebrahimi srcw = 0;
3762*22dc650dSSadaf Ebrahimi }
3763*22dc650dSSadaf Ebrahimi
3764*22dc650dSSadaf Ebrahimi if (reg_size == 5)
3765*22dc650dSSadaf Ebrahimi op |= VEX_256;
3766*22dc650dSSadaf Ebrahimi
3767*22dc650dSSadaf Ebrahimi return emit_vex_instruction(compiler, op, freg, 0, src, srcw);
3768*22dc650dSSadaf Ebrahimi }
3769*22dc650dSSadaf Ebrahimi }
3770*22dc650dSSadaf Ebrahimi
3771*22dc650dSSadaf Ebrahimi if (type & SLJIT_SIMD_FLOAT) {
3772*22dc650dSSadaf Ebrahimi if (src == SLJIT_IMM) {
3773*22dc650dSSadaf Ebrahimi if (use_vex)
3774*22dc650dSSadaf Ebrahimi return emit_vex_instruction(compiler, XORPD_x_xm | (reg_size == 5 ? VEX_256 : 0) | (elem_size == 3 ? EX86_PREF_66 : 0) | EX86_SSE2 | VEX_SSE2_OPV, freg, freg, freg, 0);
3775*22dc650dSSadaf Ebrahimi
3776*22dc650dSSadaf Ebrahimi return emit_groupf(compiler, XORPD_x_xm | (elem_size == 3 ? EX86_PREF_66 : 0) | EX86_SSE2, freg, freg, 0);
3777*22dc650dSSadaf Ebrahimi }
3778*22dc650dSSadaf Ebrahimi
3779*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(reg_size == 4);
3780*22dc650dSSadaf Ebrahimi
3781*22dc650dSSadaf Ebrahimi if (use_vex) {
3782*22dc650dSSadaf Ebrahimi if (elem_size == 3)
3783*22dc650dSSadaf Ebrahimi return emit_vex_instruction(compiler, MOVDDUP_x_xm | EX86_PREF_F2 | EX86_SSE2, freg, 0, src, srcw);
3784*22dc650dSSadaf Ebrahimi
3785*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(!(src & SLJIT_MEM));
3786*22dc650dSSadaf Ebrahimi FAIL_IF(emit_vex_instruction(compiler, SHUFPS_x_xm | EX86_SSE2 | VEX_SSE2_OPV, freg, src, src, 0));
3787*22dc650dSSadaf Ebrahimi return emit_byte(compiler, 0);
3788*22dc650dSSadaf Ebrahimi }
3789*22dc650dSSadaf Ebrahimi
3790*22dc650dSSadaf Ebrahimi if (elem_size == 2 && freg != src) {
3791*22dc650dSSadaf Ebrahimi FAIL_IF(emit_sse2_load(compiler, 1, freg, src, srcw));
3792*22dc650dSSadaf Ebrahimi src = freg;
3793*22dc650dSSadaf Ebrahimi srcw = 0;
3794*22dc650dSSadaf Ebrahimi }
3795*22dc650dSSadaf Ebrahimi
3796*22dc650dSSadaf Ebrahimi op = (elem_size == 2 ? SHUFPS_x_xm : MOVDDUP_x_xm) | (elem_size == 2 ? 0 : EX86_PREF_F2) | EX86_SSE2;
3797*22dc650dSSadaf Ebrahimi FAIL_IF(emit_groupf(compiler, op, freg, src, srcw));
3798*22dc650dSSadaf Ebrahimi
3799*22dc650dSSadaf Ebrahimi if (elem_size == 2)
3800*22dc650dSSadaf Ebrahimi return emit_byte(compiler, 0);
3801*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
3802*22dc650dSSadaf Ebrahimi }
3803*22dc650dSSadaf Ebrahimi
3804*22dc650dSSadaf Ebrahimi if (src == SLJIT_IMM) {
3805*22dc650dSSadaf Ebrahimi if (elem_size == 0) {
3806*22dc650dSSadaf Ebrahimi srcw = (sljit_u8)srcw;
3807*22dc650dSSadaf Ebrahimi srcw |= srcw << 8;
3808*22dc650dSSadaf Ebrahimi srcw |= srcw << 16;
3809*22dc650dSSadaf Ebrahimi elem_size = 2;
3810*22dc650dSSadaf Ebrahimi } else if (elem_size == 1) {
3811*22dc650dSSadaf Ebrahimi srcw = (sljit_u16)srcw;
3812*22dc650dSSadaf Ebrahimi srcw |= srcw << 16;
3813*22dc650dSSadaf Ebrahimi elem_size = 2;
3814*22dc650dSSadaf Ebrahimi }
3815*22dc650dSSadaf Ebrahimi
3816*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3817*22dc650dSSadaf Ebrahimi if (elem_size == 2 && (sljit_s32)srcw == -1)
3818*22dc650dSSadaf Ebrahimi srcw = -1;
3819*22dc650dSSadaf Ebrahimi #endif /* SLJIT_CONFIG_X86_64 */
3820*22dc650dSSadaf Ebrahimi
3821*22dc650dSSadaf Ebrahimi if (srcw == 0 || srcw == -1) {
3822*22dc650dSSadaf Ebrahimi if (use_vex)
3823*22dc650dSSadaf Ebrahimi return emit_vex_instruction(compiler, (srcw == 0 ? PXOR_x_xm : PCMPEQD_x_xm) | (reg_size == 5 ? VEX_256 : 0) | EX86_PREF_66 | EX86_SSE2 | VEX_SSE2_OPV, freg, freg, freg, 0);
3824*22dc650dSSadaf Ebrahimi
3825*22dc650dSSadaf Ebrahimi return emit_groupf(compiler, (srcw == 0 ? PXOR_x_xm : PCMPEQD_x_xm) | EX86_PREF_66 | EX86_SSE2, freg, freg, 0);
3826*22dc650dSSadaf Ebrahimi }
3827*22dc650dSSadaf Ebrahimi
3828*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3829*22dc650dSSadaf Ebrahimi if (elem_size == 3)
3830*22dc650dSSadaf Ebrahimi FAIL_IF(emit_load_imm64(compiler, TMP_REG1, srcw));
3831*22dc650dSSadaf Ebrahimi else
3832*22dc650dSSadaf Ebrahimi #endif /* SLJIT_CONFIG_X86_64 */
3833*22dc650dSSadaf Ebrahimi EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, srcw);
3834*22dc650dSSadaf Ebrahimi
3835*22dc650dSSadaf Ebrahimi src = TMP_REG1;
3836*22dc650dSSadaf Ebrahimi srcw = 0;
3837*22dc650dSSadaf Ebrahimi
3838*22dc650dSSadaf Ebrahimi }
3839*22dc650dSSadaf Ebrahimi
3840*22dc650dSSadaf Ebrahimi op = 2;
3841*22dc650dSSadaf Ebrahimi opcode = MOVD_x_rm;
3842*22dc650dSSadaf Ebrahimi
3843*22dc650dSSadaf Ebrahimi switch (elem_size) {
3844*22dc650dSSadaf Ebrahimi case 0:
3845*22dc650dSSadaf Ebrahimi if (!FAST_IS_REG(src)) {
3846*22dc650dSSadaf Ebrahimi opcode = 0x3a /* Prefix of PINSRB_x_rm_i8. */;
3847*22dc650dSSadaf Ebrahimi op = 3;
3848*22dc650dSSadaf Ebrahimi }
3849*22dc650dSSadaf Ebrahimi break;
3850*22dc650dSSadaf Ebrahimi case 1:
3851*22dc650dSSadaf Ebrahimi if (!FAST_IS_REG(src))
3852*22dc650dSSadaf Ebrahimi opcode = PINSRW_x_rm_i8;
3853*22dc650dSSadaf Ebrahimi break;
3854*22dc650dSSadaf Ebrahimi case 2:
3855*22dc650dSSadaf Ebrahimi break;
3856*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3857*22dc650dSSadaf Ebrahimi case 3:
3858*22dc650dSSadaf Ebrahimi /* MOVQ */
3859*22dc650dSSadaf Ebrahimi compiler->mode32 = 0;
3860*22dc650dSSadaf Ebrahimi break;
3861*22dc650dSSadaf Ebrahimi #endif /* SLJIT_CONFIG_X86_64 */
3862*22dc650dSSadaf Ebrahimi }
3863*22dc650dSSadaf Ebrahimi
3864*22dc650dSSadaf Ebrahimi if (use_vex) {
3865*22dc650dSSadaf Ebrahimi if (opcode != MOVD_x_rm) {
3866*22dc650dSSadaf Ebrahimi op = (opcode == 0x3a) ? (PINSRB_x_rm_i8 | VEX_OP_0F3A) : opcode;
3867*22dc650dSSadaf Ebrahimi FAIL_IF(emit_vex_instruction(compiler, op | EX86_PREF_66 | EX86_SSE2_OP1 | VEX_SSE2_OPV, freg, freg, src, srcw));
3868*22dc650dSSadaf Ebrahimi } else
3869*22dc650dSSadaf Ebrahimi FAIL_IF(emit_vex_instruction(compiler, MOVD_x_rm | VEX_AUTO_W | EX86_PREF_66 | EX86_SSE2_OP1, freg, 0, src, srcw));
3870*22dc650dSSadaf Ebrahimi } else {
3871*22dc650dSSadaf Ebrahimi inst = emit_x86_instruction(compiler, op | EX86_PREF_66 | EX86_SSE2_OP1, freg, 0, src, srcw);
3872*22dc650dSSadaf Ebrahimi FAIL_IF(!inst);
3873*22dc650dSSadaf Ebrahimi inst[0] = GROUP_0F;
3874*22dc650dSSadaf Ebrahimi inst[1] = opcode;
3875*22dc650dSSadaf Ebrahimi
3876*22dc650dSSadaf Ebrahimi if (op == 3) {
3877*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(opcode == 0x3a);
3878*22dc650dSSadaf Ebrahimi inst[2] = PINSRB_x_rm_i8;
3879*22dc650dSSadaf Ebrahimi }
3880*22dc650dSSadaf Ebrahimi }
3881*22dc650dSSadaf Ebrahimi
3882*22dc650dSSadaf Ebrahimi if (use_vex && elem_size >= 2) {
3883*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
3884*22dc650dSSadaf Ebrahimi op = VPBROADCASTD_x_xm;
3885*22dc650dSSadaf Ebrahimi #else /* !SLJIT_CONFIG_X86_32 */
3886*22dc650dSSadaf Ebrahimi op = (elem_size == 3) ? VPBROADCASTQ_x_xm : VPBROADCASTD_x_xm;
3887*22dc650dSSadaf Ebrahimi #endif /* SLJIT_CONFIG_X86_32 */
3888*22dc650dSSadaf Ebrahimi return emit_vex_instruction(compiler, op | ((reg_size == 5) ? VEX_256 : 0) | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, freg, 0, freg, 0);
3889*22dc650dSSadaf Ebrahimi }
3890*22dc650dSSadaf Ebrahimi
3891*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(reg_size == 4);
3892*22dc650dSSadaf Ebrahimi
3893*22dc650dSSadaf Ebrahimi if (opcode != MOVD_x_rm)
3894*22dc650dSSadaf Ebrahimi FAIL_IF(emit_byte(compiler, 0));
3895*22dc650dSSadaf Ebrahimi
3896*22dc650dSSadaf Ebrahimi switch (elem_size) {
3897*22dc650dSSadaf Ebrahimi case 0:
3898*22dc650dSSadaf Ebrahimi if (use_vex) {
3899*22dc650dSSadaf Ebrahimi FAIL_IF(emit_vex_instruction(compiler, PXOR_x_xm | EX86_PREF_66 | EX86_SSE2 | VEX_SSE2_OPV, TMP_FREG, TMP_FREG, TMP_FREG, 0));
3900*22dc650dSSadaf Ebrahimi return emit_vex_instruction(compiler, PSHUFB_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2 | VEX_SSE2_OPV, freg, freg, TMP_FREG, 0);
3901*22dc650dSSadaf Ebrahimi }
3902*22dc650dSSadaf Ebrahimi FAIL_IF(emit_groupf(compiler, PXOR_x_xm | EX86_PREF_66 | EX86_SSE2, TMP_FREG, TMP_FREG, 0));
3903*22dc650dSSadaf Ebrahimi return emit_groupf_ext(compiler, PSHUFB_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, freg, TMP_FREG, 0);
3904*22dc650dSSadaf Ebrahimi case 1:
3905*22dc650dSSadaf Ebrahimi if (use_vex)
3906*22dc650dSSadaf Ebrahimi FAIL_IF(emit_vex_instruction(compiler, PSHUFLW_x_xm | EX86_PREF_F2 | EX86_SSE2, freg, 0, freg, 0));
3907*22dc650dSSadaf Ebrahimi else
3908*22dc650dSSadaf Ebrahimi FAIL_IF(emit_groupf(compiler, PSHUFLW_x_xm | EX86_PREF_F2 | EX86_SSE2, freg, freg, 0));
3909*22dc650dSSadaf Ebrahimi FAIL_IF(emit_byte(compiler, 0));
3910*22dc650dSSadaf Ebrahimi /* fallthrough */
3911*22dc650dSSadaf Ebrahimi default:
3912*22dc650dSSadaf Ebrahimi if (use_vex)
3913*22dc650dSSadaf Ebrahimi FAIL_IF(emit_vex_instruction(compiler, PSHUFD_x_xm | EX86_PREF_66 | EX86_SSE2, freg, 0, freg, 0));
3914*22dc650dSSadaf Ebrahimi else
3915*22dc650dSSadaf Ebrahimi FAIL_IF(emit_groupf(compiler, PSHUFD_x_xm | EX86_PREF_66 | EX86_SSE2, freg, freg, 0));
3916*22dc650dSSadaf Ebrahimi return emit_byte(compiler, 0);
3917*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3918*22dc650dSSadaf Ebrahimi case 3:
3919*22dc650dSSadaf Ebrahimi compiler->mode32 = 1;
3920*22dc650dSSadaf Ebrahimi if (use_vex)
3921*22dc650dSSadaf Ebrahimi FAIL_IF(emit_vex_instruction(compiler, PSHUFD_x_xm | EX86_PREF_66 | EX86_SSE2, freg, 0, freg, 0));
3922*22dc650dSSadaf Ebrahimi else
3923*22dc650dSSadaf Ebrahimi FAIL_IF(emit_groupf(compiler, PSHUFD_x_xm | EX86_PREF_66 | EX86_SSE2, freg, freg, 0));
3924*22dc650dSSadaf Ebrahimi return emit_byte(compiler, 0x44);
3925*22dc650dSSadaf Ebrahimi #endif /* SLJIT_CONFIG_X86_64 */
3926*22dc650dSSadaf Ebrahimi }
3927*22dc650dSSadaf Ebrahimi }
3928*22dc650dSSadaf Ebrahimi
3929*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_mov(struct sljit_compiler *compiler, sljit_s32 type,
3930*22dc650dSSadaf Ebrahimi sljit_s32 freg, sljit_s32 lane_index,
3931*22dc650dSSadaf Ebrahimi sljit_s32 srcdst, sljit_sw srcdstw)
3932*22dc650dSSadaf Ebrahimi {
3933*22dc650dSSadaf Ebrahimi sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
3934*22dc650dSSadaf Ebrahimi sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
3935*22dc650dSSadaf Ebrahimi sljit_s32 use_vex = (cpu_feature_list & CPU_FEATURE_AVX) && (compiler->options & SLJIT_ENTER_USE_VEX);
3936*22dc650dSSadaf Ebrahimi sljit_u8 *inst;
3937*22dc650dSSadaf Ebrahimi sljit_u8 opcode = 0;
3938*22dc650dSSadaf Ebrahimi sljit_uw op;
3939*22dc650dSSadaf Ebrahimi sljit_s32 freg_orig = freg;
3940*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
3941*22dc650dSSadaf Ebrahimi sljit_s32 srcdst_is_ereg = 0;
3942*22dc650dSSadaf Ebrahimi sljit_s32 srcdst_orig = 0;
3943*22dc650dSSadaf Ebrahimi sljit_sw srcdstw_orig = 0;
3944*22dc650dSSadaf Ebrahimi #endif /* SLJIT_CONFIG_X86_32 */
3945*22dc650dSSadaf Ebrahimi
3946*22dc650dSSadaf Ebrahimi CHECK_ERROR();
3947*22dc650dSSadaf Ebrahimi CHECK(check_sljit_emit_simd_lane_mov(compiler, type, freg, lane_index, srcdst, srcdstw));
3948*22dc650dSSadaf Ebrahimi
3949*22dc650dSSadaf Ebrahimi ADJUST_LOCAL_OFFSET(srcdst, srcdstw);
3950*22dc650dSSadaf Ebrahimi
3951*22dc650dSSadaf Ebrahimi if (reg_size == 5) {
3952*22dc650dSSadaf Ebrahimi if (!(cpu_feature_list & CPU_FEATURE_AVX2))
3953*22dc650dSSadaf Ebrahimi return SLJIT_ERR_UNSUPPORTED;
3954*22dc650dSSadaf Ebrahimi use_vex = 1;
3955*22dc650dSSadaf Ebrahimi } else if (reg_size != 4)
3956*22dc650dSSadaf Ebrahimi return SLJIT_ERR_UNSUPPORTED;
3957*22dc650dSSadaf Ebrahimi
3958*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
3959*22dc650dSSadaf Ebrahimi if ((type & SLJIT_SIMD_FLOAT) ? (elem_size < 2 || elem_size > 3) : elem_size > 2)
3960*22dc650dSSadaf Ebrahimi return SLJIT_ERR_UNSUPPORTED;
3961*22dc650dSSadaf Ebrahimi #else /* SLJIT_CONFIG_X86_32 */
3962*22dc650dSSadaf Ebrahimi if (elem_size > 3 || ((type & SLJIT_SIMD_FLOAT) && elem_size < 2))
3963*22dc650dSSadaf Ebrahimi return SLJIT_ERR_UNSUPPORTED;
3964*22dc650dSSadaf Ebrahimi #endif /* SLJIT_CONFIG_X86_32 */
3965*22dc650dSSadaf Ebrahimi
3966*22dc650dSSadaf Ebrahimi if (type & SLJIT_SIMD_TEST)
3967*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
3968*22dc650dSSadaf Ebrahimi
3969*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3970*22dc650dSSadaf Ebrahimi compiler->mode32 = 1;
3971*22dc650dSSadaf Ebrahimi #else /* !SLJIT_CONFIG_X86_64 */
3972*22dc650dSSadaf Ebrahimi if (!(type & SLJIT_SIMD_FLOAT)) {
3973*22dc650dSSadaf Ebrahimi CHECK_EXTRA_REGS(srcdst, srcdstw, srcdst_is_ereg = 1);
3974*22dc650dSSadaf Ebrahimi
3975*22dc650dSSadaf Ebrahimi if ((type & SLJIT_SIMD_STORE) && ((srcdst_is_ereg && elem_size < 2) || (elem_size == 0 && (type & SLJIT_SIMD_LANE_SIGNED) && FAST_IS_REG(srcdst) && reg_map[srcdst] >= 4))) {
3976*22dc650dSSadaf Ebrahimi srcdst_orig = srcdst;
3977*22dc650dSSadaf Ebrahimi srcdstw_orig = srcdstw;
3978*22dc650dSSadaf Ebrahimi srcdst = TMP_REG1;
3979*22dc650dSSadaf Ebrahimi srcdstw = 0;
3980*22dc650dSSadaf Ebrahimi }
3981*22dc650dSSadaf Ebrahimi }
3982*22dc650dSSadaf Ebrahimi #endif /* SLJIT_CONFIG_X86_64 */
3983*22dc650dSSadaf Ebrahimi
3984*22dc650dSSadaf Ebrahimi if (type & SLJIT_SIMD_LANE_ZERO) {
3985*22dc650dSSadaf Ebrahimi if (lane_index == 0) {
3986*22dc650dSSadaf Ebrahimi if (!(type & SLJIT_SIMD_FLOAT)) {
3987*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3988*22dc650dSSadaf Ebrahimi if (elem_size == 3) {
3989*22dc650dSSadaf Ebrahimi compiler->mode32 = 0;
3990*22dc650dSSadaf Ebrahimi elem_size = 2;
3991*22dc650dSSadaf Ebrahimi }
3992*22dc650dSSadaf Ebrahimi #endif /* SLJIT_CONFIG_X86_64 */
3993*22dc650dSSadaf Ebrahimi if (srcdst == SLJIT_IMM) {
3994*22dc650dSSadaf Ebrahimi if (elem_size == 0)
3995*22dc650dSSadaf Ebrahimi srcdstw = (sljit_u8)srcdstw;
3996*22dc650dSSadaf Ebrahimi else if (elem_size == 1)
3997*22dc650dSSadaf Ebrahimi srcdstw = (sljit_u16)srcdstw;
3998*22dc650dSSadaf Ebrahimi
3999*22dc650dSSadaf Ebrahimi EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, srcdstw);
4000*22dc650dSSadaf Ebrahimi srcdst = TMP_REG1;
4001*22dc650dSSadaf Ebrahimi srcdstw = 0;
4002*22dc650dSSadaf Ebrahimi elem_size = 2;
4003*22dc650dSSadaf Ebrahimi }
4004*22dc650dSSadaf Ebrahimi
4005*22dc650dSSadaf Ebrahimi if (elem_size == 2) {
4006*22dc650dSSadaf Ebrahimi if (use_vex)
4007*22dc650dSSadaf Ebrahimi return emit_vex_instruction(compiler, MOVD_x_rm | VEX_AUTO_W | EX86_PREF_66 | EX86_SSE2_OP1, freg, 0, srcdst, srcdstw);
4008*22dc650dSSadaf Ebrahimi return emit_groupf(compiler, MOVD_x_rm | EX86_PREF_66 | EX86_SSE2_OP1, freg, srcdst, srcdstw);
4009*22dc650dSSadaf Ebrahimi }
4010*22dc650dSSadaf Ebrahimi } else if (srcdst & SLJIT_MEM) {
4011*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(elem_size == 2 || elem_size == 3);
4012*22dc650dSSadaf Ebrahimi
4013*22dc650dSSadaf Ebrahimi if (use_vex)
4014*22dc650dSSadaf Ebrahimi return emit_vex_instruction(compiler, MOVSD_x_xm | (elem_size == 2 ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2, freg, 0, srcdst, srcdstw);
4015*22dc650dSSadaf Ebrahimi return emit_groupf(compiler, MOVSD_x_xm | (elem_size == 2 ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2, freg, srcdst, srcdstw);
4016*22dc650dSSadaf Ebrahimi } else if (elem_size == 3) {
4017*22dc650dSSadaf Ebrahimi if (use_vex)
4018*22dc650dSSadaf Ebrahimi return emit_vex_instruction(compiler, MOVQ_x_xm | EX86_PREF_F3 | EX86_SSE2, freg, 0, srcdst, 0);
4019*22dc650dSSadaf Ebrahimi return emit_groupf(compiler, MOVQ_x_xm | EX86_PREF_F3 | EX86_SSE2, freg, srcdst, 0);
4020*22dc650dSSadaf Ebrahimi } else if (use_vex) {
4021*22dc650dSSadaf Ebrahimi FAIL_IF(emit_vex_instruction(compiler, XORPD_x_xm | EX86_SSE2 | VEX_SSE2_OPV, TMP_FREG, TMP_FREG, TMP_FREG, 0));
4022*22dc650dSSadaf Ebrahimi return emit_vex_instruction(compiler, MOVSD_x_xm | EX86_PREF_F3 | EX86_SSE2 | VEX_SSE2_OPV, freg, TMP_FREG, srcdst, 0);
4023*22dc650dSSadaf Ebrahimi }
4024*22dc650dSSadaf Ebrahimi }
4025*22dc650dSSadaf Ebrahimi
4026*22dc650dSSadaf Ebrahimi if (reg_size == 5 && lane_index >= (1 << (4 - elem_size))) {
4027*22dc650dSSadaf Ebrahimi freg = TMP_FREG;
4028*22dc650dSSadaf Ebrahimi lane_index -= (1 << (4 - elem_size));
4029*22dc650dSSadaf Ebrahimi } else if ((type & SLJIT_SIMD_FLOAT) && freg == srcdst) {
4030*22dc650dSSadaf Ebrahimi if (use_vex)
4031*22dc650dSSadaf Ebrahimi FAIL_IF(emit_vex_instruction(compiler, MOVSD_x_xm | (elem_size == 2 ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2 | VEX_SSE2_OPV, TMP_FREG, TMP_FREG, srcdst, srcdstw));
4032*22dc650dSSadaf Ebrahimi else
4033*22dc650dSSadaf Ebrahimi FAIL_IF(emit_sse2_load(compiler, elem_size == 2, TMP_FREG, srcdst, srcdstw));
4034*22dc650dSSadaf Ebrahimi srcdst = TMP_FREG;
4035*22dc650dSSadaf Ebrahimi srcdstw = 0;
4036*22dc650dSSadaf Ebrahimi }
4037*22dc650dSSadaf Ebrahimi
4038*22dc650dSSadaf Ebrahimi op = ((!(type & SLJIT_SIMD_FLOAT) || elem_size != 2) ? EX86_PREF_66 : 0)
4039*22dc650dSSadaf Ebrahimi | ((type & SLJIT_SIMD_FLOAT) ? XORPD_x_xm : PXOR_x_xm) | EX86_SSE2;
4040*22dc650dSSadaf Ebrahimi
4041*22dc650dSSadaf Ebrahimi if (use_vex)
4042*22dc650dSSadaf Ebrahimi FAIL_IF(emit_vex_instruction(compiler, op | (reg_size == 5 ? VEX_256 : 0) | VEX_SSE2_OPV, freg, freg, freg, 0));
4043*22dc650dSSadaf Ebrahimi else
4044*22dc650dSSadaf Ebrahimi FAIL_IF(emit_groupf(compiler, op, freg, freg, 0));
4045*22dc650dSSadaf Ebrahimi } else if (reg_size == 5 && lane_index >= (1 << (4 - elem_size))) {
4046*22dc650dSSadaf Ebrahimi FAIL_IF(emit_vex_instruction(compiler, ((type & SLJIT_SIMD_FLOAT) ? VEXTRACTF128_x_ym : VEXTRACTI128_x_ym) | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | EX86_SSE2, freg, 0, TMP_FREG, 0));
4047*22dc650dSSadaf Ebrahimi FAIL_IF(emit_byte(compiler, 1));
4048*22dc650dSSadaf Ebrahimi
4049*22dc650dSSadaf Ebrahimi freg = TMP_FREG;
4050*22dc650dSSadaf Ebrahimi lane_index -= (1 << (4 - elem_size));
4051*22dc650dSSadaf Ebrahimi }
4052*22dc650dSSadaf Ebrahimi
4053*22dc650dSSadaf Ebrahimi if (type & SLJIT_SIMD_FLOAT) {
4054*22dc650dSSadaf Ebrahimi if (elem_size == 3) {
4055*22dc650dSSadaf Ebrahimi if (srcdst & SLJIT_MEM) {
4056*22dc650dSSadaf Ebrahimi if (type & SLJIT_SIMD_STORE)
4057*22dc650dSSadaf Ebrahimi op = lane_index == 0 ? MOVLPD_m_x : MOVHPD_m_x;
4058*22dc650dSSadaf Ebrahimi else
4059*22dc650dSSadaf Ebrahimi op = lane_index == 0 ? MOVLPD_x_m : MOVHPD_x_m;
4060*22dc650dSSadaf Ebrahimi
4061*22dc650dSSadaf Ebrahimi /* VEX prefix clears upper bits of the target register. */
4062*22dc650dSSadaf Ebrahimi if (use_vex && ((type & SLJIT_SIMD_STORE) || reg_size == 4 || freg == TMP_FREG))
4063*22dc650dSSadaf Ebrahimi FAIL_IF(emit_vex_instruction(compiler, op | EX86_PREF_66 | EX86_SSE2
4064*22dc650dSSadaf Ebrahimi | ((type & SLJIT_SIMD_STORE) ? 0 : VEX_SSE2_OPV), freg, (type & SLJIT_SIMD_STORE) ? 0 : freg, srcdst, srcdstw));
4065*22dc650dSSadaf Ebrahimi else
4066*22dc650dSSadaf Ebrahimi FAIL_IF(emit_groupf(compiler, op | EX86_PREF_66 | EX86_SSE2, freg, srcdst, srcdstw));
4067*22dc650dSSadaf Ebrahimi
4068*22dc650dSSadaf Ebrahimi /* In case of store, freg is not TMP_FREG. */
4069*22dc650dSSadaf Ebrahimi } else if (type & SLJIT_SIMD_STORE) {
4070*22dc650dSSadaf Ebrahimi if (lane_index == 1) {
4071*22dc650dSSadaf Ebrahimi if (use_vex)
4072*22dc650dSSadaf Ebrahimi return emit_vex_instruction(compiler, MOVHLPS_x_x | EX86_SSE2 | VEX_SSE2_OPV, srcdst, srcdst, freg, 0);
4073*22dc650dSSadaf Ebrahimi return emit_groupf(compiler, MOVHLPS_x_x | EX86_SSE2, srcdst, freg, 0);
4074*22dc650dSSadaf Ebrahimi }
4075*22dc650dSSadaf Ebrahimi if (use_vex)
4076*22dc650dSSadaf Ebrahimi return emit_vex_instruction(compiler, MOVSD_x_xm | EX86_PREF_F2 | EX86_SSE2 | VEX_SSE2_OPV, srcdst, srcdst, freg, 0);
4077*22dc650dSSadaf Ebrahimi return emit_sse2_load(compiler, 0, srcdst, freg, 0);
4078*22dc650dSSadaf Ebrahimi } else if (use_vex && (reg_size == 4 || freg == TMP_FREG)) {
4079*22dc650dSSadaf Ebrahimi if (lane_index == 1)
4080*22dc650dSSadaf Ebrahimi FAIL_IF(emit_vex_instruction(compiler, MOVLHPS_x_x | EX86_SSE2 | VEX_SSE2_OPV, freg, freg, srcdst, 0));
4081*22dc650dSSadaf Ebrahimi else
4082*22dc650dSSadaf Ebrahimi FAIL_IF(emit_vex_instruction(compiler, MOVSD_x_xm | EX86_PREF_F2 | EX86_SSE2 | VEX_SSE2_OPV, freg, freg, srcdst, 0));
4083*22dc650dSSadaf Ebrahimi } else {
4084*22dc650dSSadaf Ebrahimi if (lane_index == 1)
4085*22dc650dSSadaf Ebrahimi FAIL_IF(emit_groupf(compiler, MOVLHPS_x_x | EX86_SSE2, freg, srcdst, 0));
4086*22dc650dSSadaf Ebrahimi else
4087*22dc650dSSadaf Ebrahimi FAIL_IF(emit_sse2_load(compiler, 0, freg, srcdst, 0));
4088*22dc650dSSadaf Ebrahimi }
4089*22dc650dSSadaf Ebrahimi } else if (type & SLJIT_SIMD_STORE) {
4090*22dc650dSSadaf Ebrahimi if (lane_index == 0) {
4091*22dc650dSSadaf Ebrahimi if (use_vex)
4092*22dc650dSSadaf Ebrahimi return emit_vex_instruction(compiler, ((srcdst & SLJIT_MEM) ? MOVSD_xm_x : MOVSD_x_xm) | EX86_PREF_F3 | EX86_SSE2
4093*22dc650dSSadaf Ebrahimi | ((srcdst & SLJIT_MEM) ? 0 : VEX_SSE2_OPV), freg, ((srcdst & SLJIT_MEM) ? 0 : freg), srcdst, srcdstw);
4094*22dc650dSSadaf Ebrahimi return emit_sse2_store(compiler, 1, srcdst, srcdstw, freg);
4095*22dc650dSSadaf Ebrahimi }
4096*22dc650dSSadaf Ebrahimi
4097*22dc650dSSadaf Ebrahimi if (srcdst & SLJIT_MEM) {
4098*22dc650dSSadaf Ebrahimi if (use_vex)
4099*22dc650dSSadaf Ebrahimi FAIL_IF(emit_vex_instruction(compiler, EXTRACTPS_x_xm | EX86_PREF_66 | VEX_OP_0F3A | EX86_SSE2, freg, 0, srcdst, srcdstw));
4100*22dc650dSSadaf Ebrahimi else
4101*22dc650dSSadaf Ebrahimi FAIL_IF(emit_groupf_ext(compiler, EXTRACTPS_x_xm | EX86_PREF_66 | VEX_OP_0F3A | EX86_SSE2, freg, srcdst, srcdstw));
4102*22dc650dSSadaf Ebrahimi return emit_byte(compiler, U8(lane_index));
4103*22dc650dSSadaf Ebrahimi }
4104*22dc650dSSadaf Ebrahimi
4105*22dc650dSSadaf Ebrahimi if (use_vex) {
4106*22dc650dSSadaf Ebrahimi FAIL_IF(emit_vex_instruction(compiler, SHUFPS_x_xm | EX86_SSE2 | VEX_SSE2_OPV, srcdst, freg, freg, 0));
4107*22dc650dSSadaf Ebrahimi return emit_byte(compiler, U8(lane_index));
4108*22dc650dSSadaf Ebrahimi }
4109*22dc650dSSadaf Ebrahimi
4110*22dc650dSSadaf Ebrahimi if (srcdst == freg)
4111*22dc650dSSadaf Ebrahimi op = SHUFPS_x_xm | EX86_SSE2;
4112*22dc650dSSadaf Ebrahimi else {
4113*22dc650dSSadaf Ebrahimi switch (lane_index) {
4114*22dc650dSSadaf Ebrahimi case 1:
4115*22dc650dSSadaf Ebrahimi op = MOVSHDUP_x_xm | EX86_PREF_F3 | EX86_SSE2;
4116*22dc650dSSadaf Ebrahimi break;
4117*22dc650dSSadaf Ebrahimi case 2:
4118*22dc650dSSadaf Ebrahimi op = MOVHLPS_x_x | EX86_SSE2;
4119*22dc650dSSadaf Ebrahimi break;
4120*22dc650dSSadaf Ebrahimi default:
4121*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(lane_index == 3);
4122*22dc650dSSadaf Ebrahimi op = PSHUFD_x_xm | EX86_PREF_66 | EX86_SSE2;
4123*22dc650dSSadaf Ebrahimi break;
4124*22dc650dSSadaf Ebrahimi }
4125*22dc650dSSadaf Ebrahimi }
4126*22dc650dSSadaf Ebrahimi
4127*22dc650dSSadaf Ebrahimi FAIL_IF(emit_groupf(compiler, op, srcdst, freg, 0));
4128*22dc650dSSadaf Ebrahimi
4129*22dc650dSSadaf Ebrahimi op &= 0xff;
4130*22dc650dSSadaf Ebrahimi if (op == SHUFPS_x_xm || op == PSHUFD_x_xm)
4131*22dc650dSSadaf Ebrahimi return emit_byte(compiler, U8(lane_index));
4132*22dc650dSSadaf Ebrahimi
4133*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
4134*22dc650dSSadaf Ebrahimi } else {
4135*22dc650dSSadaf Ebrahimi if (lane_index != 0 || (srcdst & SLJIT_MEM)) {
4136*22dc650dSSadaf Ebrahimi FAIL_IF(emit_groupf_ext(compiler, INSERTPS_x_xm | EX86_PREF_66 | VEX_OP_0F3A | EX86_SSE2, freg, srcdst, srcdstw));
4137*22dc650dSSadaf Ebrahimi FAIL_IF(emit_byte(compiler, U8(lane_index << 4)));
4138*22dc650dSSadaf Ebrahimi } else
4139*22dc650dSSadaf Ebrahimi FAIL_IF(emit_sse2_store(compiler, 1, freg, 0, srcdst));
4140*22dc650dSSadaf Ebrahimi }
4141*22dc650dSSadaf Ebrahimi
4142*22dc650dSSadaf Ebrahimi if (freg != TMP_FREG || (type & SLJIT_SIMD_STORE))
4143*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
4144*22dc650dSSadaf Ebrahimi
4145*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(reg_size == 5);
4146*22dc650dSSadaf Ebrahimi
4147*22dc650dSSadaf Ebrahimi if (type & SLJIT_SIMD_LANE_ZERO) {
4148*22dc650dSSadaf Ebrahimi FAIL_IF(emit_vex_instruction(compiler, VPERMPD_y_ym | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | VEX_W | EX86_SSE2, freg_orig, 0, TMP_FREG, 0));
4149*22dc650dSSadaf Ebrahimi return emit_byte(compiler, 0x4e);
4150*22dc650dSSadaf Ebrahimi }
4151*22dc650dSSadaf Ebrahimi
4152*22dc650dSSadaf Ebrahimi FAIL_IF(emit_vex_instruction(compiler, VINSERTF128_y_y_xm | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | EX86_SSE2 | VEX_SSE2_OPV, freg_orig, freg_orig, TMP_FREG, 0));
4153*22dc650dSSadaf Ebrahimi return emit_byte(compiler, 1);
4154*22dc650dSSadaf Ebrahimi }
4155*22dc650dSSadaf Ebrahimi
4156*22dc650dSSadaf Ebrahimi if (srcdst == SLJIT_IMM) {
4157*22dc650dSSadaf Ebrahimi EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, srcdstw);
4158*22dc650dSSadaf Ebrahimi srcdst = TMP_REG1;
4159*22dc650dSSadaf Ebrahimi srcdstw = 0;
4160*22dc650dSSadaf Ebrahimi }
4161*22dc650dSSadaf Ebrahimi
4162*22dc650dSSadaf Ebrahimi op = 3;
4163*22dc650dSSadaf Ebrahimi
4164*22dc650dSSadaf Ebrahimi switch (elem_size) {
4165*22dc650dSSadaf Ebrahimi case 0:
4166*22dc650dSSadaf Ebrahimi opcode = (type & SLJIT_SIMD_STORE) ? PEXTRB_rm_x_i8 : PINSRB_x_rm_i8;
4167*22dc650dSSadaf Ebrahimi break;
4168*22dc650dSSadaf Ebrahimi case 1:
4169*22dc650dSSadaf Ebrahimi if (!(type & SLJIT_SIMD_STORE)) {
4170*22dc650dSSadaf Ebrahimi op = 2;
4171*22dc650dSSadaf Ebrahimi opcode = PINSRW_x_rm_i8;
4172*22dc650dSSadaf Ebrahimi } else
4173*22dc650dSSadaf Ebrahimi opcode = PEXTRW_rm_x_i8;
4174*22dc650dSSadaf Ebrahimi break;
4175*22dc650dSSadaf Ebrahimi case 2:
4176*22dc650dSSadaf Ebrahimi opcode = (type & SLJIT_SIMD_STORE) ? PEXTRD_rm_x_i8 : PINSRD_x_rm_i8;
4177*22dc650dSSadaf Ebrahimi break;
4178*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
4179*22dc650dSSadaf Ebrahimi case 3:
4180*22dc650dSSadaf Ebrahimi /* PINSRQ / PEXTRQ */
4181*22dc650dSSadaf Ebrahimi opcode = (type & SLJIT_SIMD_STORE) ? PEXTRD_rm_x_i8 : PINSRD_x_rm_i8;
4182*22dc650dSSadaf Ebrahimi compiler->mode32 = 0;
4183*22dc650dSSadaf Ebrahimi break;
4184*22dc650dSSadaf Ebrahimi #endif /* SLJIT_CONFIG_X86_64 */
4185*22dc650dSSadaf Ebrahimi }
4186*22dc650dSSadaf Ebrahimi
4187*22dc650dSSadaf Ebrahimi if (use_vex && (type & SLJIT_SIMD_STORE)) {
4188*22dc650dSSadaf Ebrahimi op = opcode | ((op == 3) ? VEX_OP_0F3A : 0);
4189*22dc650dSSadaf Ebrahimi FAIL_IF(emit_vex_instruction(compiler, op | EX86_PREF_66 | VEX_AUTO_W | EX86_SSE2_OP1 | VEX_SSE2_OPV, freg, 0, srcdst, srcdstw));
4190*22dc650dSSadaf Ebrahimi } else {
4191*22dc650dSSadaf Ebrahimi inst = emit_x86_instruction(compiler, op | EX86_PREF_66 | EX86_SSE2_OP1, freg, 0, srcdst, srcdstw);
4192*22dc650dSSadaf Ebrahimi FAIL_IF(!inst);
4193*22dc650dSSadaf Ebrahimi inst[0] = GROUP_0F;
4194*22dc650dSSadaf Ebrahimi
4195*22dc650dSSadaf Ebrahimi if (op == 3) {
4196*22dc650dSSadaf Ebrahimi inst[1] = 0x3a;
4197*22dc650dSSadaf Ebrahimi inst[2] = opcode;
4198*22dc650dSSadaf Ebrahimi } else
4199*22dc650dSSadaf Ebrahimi inst[1] = opcode;
4200*22dc650dSSadaf Ebrahimi }
4201*22dc650dSSadaf Ebrahimi
4202*22dc650dSSadaf Ebrahimi FAIL_IF(emit_byte(compiler, U8(lane_index)));
4203*22dc650dSSadaf Ebrahimi
4204*22dc650dSSadaf Ebrahimi if (!(type & SLJIT_SIMD_LANE_SIGNED) || (srcdst & SLJIT_MEM)) {
4205*22dc650dSSadaf Ebrahimi if (freg == TMP_FREG && !(type & SLJIT_SIMD_STORE)) {
4206*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(reg_size == 5);
4207*22dc650dSSadaf Ebrahimi
4208*22dc650dSSadaf Ebrahimi if (type & SLJIT_SIMD_LANE_ZERO) {
4209*22dc650dSSadaf Ebrahimi FAIL_IF(emit_vex_instruction(compiler, VPERMQ_y_ym | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | VEX_W | EX86_SSE2, freg_orig, 0, TMP_FREG, 0));
4210*22dc650dSSadaf Ebrahimi return emit_byte(compiler, 0x4e);
4211*22dc650dSSadaf Ebrahimi }
4212*22dc650dSSadaf Ebrahimi
4213*22dc650dSSadaf Ebrahimi FAIL_IF(emit_vex_instruction(compiler, VINSERTI128_y_y_xm | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | EX86_SSE2 | VEX_SSE2_OPV, freg_orig, freg_orig, TMP_FREG, 0));
4214*22dc650dSSadaf Ebrahimi return emit_byte(compiler, 1);
4215*22dc650dSSadaf Ebrahimi }
4216*22dc650dSSadaf Ebrahimi
4217*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
4218*22dc650dSSadaf Ebrahimi if (srcdst_orig & SLJIT_MEM)
4219*22dc650dSSadaf Ebrahimi return emit_mov(compiler, srcdst_orig, srcdstw_orig, TMP_REG1, 0);
4220*22dc650dSSadaf Ebrahimi #endif /* SLJIT_CONFIG_X86_32 */
4221*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
4222*22dc650dSSadaf Ebrahimi }
4223*22dc650dSSadaf Ebrahimi
4224*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
4225*22dc650dSSadaf Ebrahimi if (elem_size >= 3)
4226*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
4227*22dc650dSSadaf Ebrahimi
4228*22dc650dSSadaf Ebrahimi compiler->mode32 = (type & SLJIT_32);
4229*22dc650dSSadaf Ebrahimi
4230*22dc650dSSadaf Ebrahimi op = 2;
4231*22dc650dSSadaf Ebrahimi
4232*22dc650dSSadaf Ebrahimi if (elem_size == 0)
4233*22dc650dSSadaf Ebrahimi op |= EX86_REX;
4234*22dc650dSSadaf Ebrahimi
4235*22dc650dSSadaf Ebrahimi if (elem_size == 2) {
4236*22dc650dSSadaf Ebrahimi if (type & SLJIT_32)
4237*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
4238*22dc650dSSadaf Ebrahimi
4239*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(!(compiler->mode32));
4240*22dc650dSSadaf Ebrahimi op = 1;
4241*22dc650dSSadaf Ebrahimi }
4242*22dc650dSSadaf Ebrahimi
4243*22dc650dSSadaf Ebrahimi inst = emit_x86_instruction(compiler, op, srcdst, 0, srcdst, 0);
4244*22dc650dSSadaf Ebrahimi FAIL_IF(!inst);
4245*22dc650dSSadaf Ebrahimi
4246*22dc650dSSadaf Ebrahimi if (op != 1) {
4247*22dc650dSSadaf Ebrahimi inst[0] = GROUP_0F;
4248*22dc650dSSadaf Ebrahimi inst[1] = U8((elem_size == 0) ? MOVSX_r_rm8 : MOVSX_r_rm16);
4249*22dc650dSSadaf Ebrahimi } else
4250*22dc650dSSadaf Ebrahimi inst[0] = MOVSXD_r_rm;
4251*22dc650dSSadaf Ebrahimi #else /* !SLJIT_CONFIG_X86_64 */
4252*22dc650dSSadaf Ebrahimi if (elem_size >= 2)
4253*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
4254*22dc650dSSadaf Ebrahimi
4255*22dc650dSSadaf Ebrahimi FAIL_IF(emit_groupf(compiler, (elem_size == 0) ? MOVSX_r_rm8 : MOVSX_r_rm16,
4256*22dc650dSSadaf Ebrahimi (srcdst_orig != 0 && FAST_IS_REG(srcdst_orig)) ? srcdst_orig : srcdst, srcdst, 0));
4257*22dc650dSSadaf Ebrahimi
4258*22dc650dSSadaf Ebrahimi if (srcdst_orig & SLJIT_MEM)
4259*22dc650dSSadaf Ebrahimi return emit_mov(compiler, srcdst_orig, srcdstw_orig, TMP_REG1, 0);
4260*22dc650dSSadaf Ebrahimi #endif /* SLJIT_CONFIG_X86_64 */
4261*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
4262*22dc650dSSadaf Ebrahimi }
4263*22dc650dSSadaf Ebrahimi
4264*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_replicate(struct sljit_compiler *compiler, sljit_s32 type,
4265*22dc650dSSadaf Ebrahimi sljit_s32 freg,
4266*22dc650dSSadaf Ebrahimi sljit_s32 src, sljit_s32 src_lane_index)
4267*22dc650dSSadaf Ebrahimi {
4268*22dc650dSSadaf Ebrahimi sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
4269*22dc650dSSadaf Ebrahimi sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
4270*22dc650dSSadaf Ebrahimi sljit_s32 use_vex = (cpu_feature_list & CPU_FEATURE_AVX) && (compiler->options & SLJIT_ENTER_USE_VEX);
4271*22dc650dSSadaf Ebrahimi sljit_uw pref;
4272*22dc650dSSadaf Ebrahimi sljit_u8 byte;
4273*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
4274*22dc650dSSadaf Ebrahimi sljit_s32 opcode3 = TMP_REG1;
4275*22dc650dSSadaf Ebrahimi #else /* !SLJIT_CONFIG_X86_32 */
4276*22dc650dSSadaf Ebrahimi sljit_s32 opcode3 = SLJIT_S0;
4277*22dc650dSSadaf Ebrahimi #endif /* SLJIT_CONFIG_X86_32 */
4278*22dc650dSSadaf Ebrahimi
4279*22dc650dSSadaf Ebrahimi CHECK_ERROR();
4280*22dc650dSSadaf Ebrahimi CHECK(check_sljit_emit_simd_lane_replicate(compiler, type, freg, src, src_lane_index));
4281*22dc650dSSadaf Ebrahimi
4282*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
4283*22dc650dSSadaf Ebrahimi compiler->mode32 = 1;
4284*22dc650dSSadaf Ebrahimi #endif /* SLJIT_CONFIG_X86_64 */
4285*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(reg_map[opcode3] == 3);
4286*22dc650dSSadaf Ebrahimi
4287*22dc650dSSadaf Ebrahimi if (reg_size == 5) {
4288*22dc650dSSadaf Ebrahimi if (!(cpu_feature_list & CPU_FEATURE_AVX2))
4289*22dc650dSSadaf Ebrahimi return SLJIT_ERR_UNSUPPORTED;
4290*22dc650dSSadaf Ebrahimi use_vex = 1;
4291*22dc650dSSadaf Ebrahimi } else if (reg_size != 4)
4292*22dc650dSSadaf Ebrahimi return SLJIT_ERR_UNSUPPORTED;
4293*22dc650dSSadaf Ebrahimi
4294*22dc650dSSadaf Ebrahimi if (type & SLJIT_SIMD_FLOAT) {
4295*22dc650dSSadaf Ebrahimi pref = 0;
4296*22dc650dSSadaf Ebrahimi byte = U8(src_lane_index);
4297*22dc650dSSadaf Ebrahimi
4298*22dc650dSSadaf Ebrahimi if (elem_size == 3) {
4299*22dc650dSSadaf Ebrahimi if (type & SLJIT_SIMD_TEST)
4300*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
4301*22dc650dSSadaf Ebrahimi
4302*22dc650dSSadaf Ebrahimi if (reg_size == 5) {
4303*22dc650dSSadaf Ebrahimi if (src_lane_index == 0)
4304*22dc650dSSadaf Ebrahimi return emit_vex_instruction(compiler, VBROADCASTSD_x_xm | VEX_256 | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, freg, 0, src, 0);
4305*22dc650dSSadaf Ebrahimi
4306*22dc650dSSadaf Ebrahimi FAIL_IF(emit_vex_instruction(compiler, VPERMPD_y_ym | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | VEX_W | EX86_SSE2, freg, 0, src, 0));
4307*22dc650dSSadaf Ebrahimi
4308*22dc650dSSadaf Ebrahimi byte = U8(byte | (byte << 2));
4309*22dc650dSSadaf Ebrahimi return emit_byte(compiler, U8(byte | (byte << 4)));
4310*22dc650dSSadaf Ebrahimi }
4311*22dc650dSSadaf Ebrahimi
4312*22dc650dSSadaf Ebrahimi if (src_lane_index == 0) {
4313*22dc650dSSadaf Ebrahimi if (use_vex)
4314*22dc650dSSadaf Ebrahimi return emit_vex_instruction(compiler, MOVDDUP_x_xm | EX86_PREF_F2 | EX86_SSE2, freg, 0, src, 0);
4315*22dc650dSSadaf Ebrahimi return emit_groupf(compiler, MOVDDUP_x_xm | EX86_PREF_F2 | EX86_SSE2, freg, src, 0);
4316*22dc650dSSadaf Ebrahimi }
4317*22dc650dSSadaf Ebrahimi
4318*22dc650dSSadaf Ebrahimi /* Changes it to SHUFPD_x_xm. */
4319*22dc650dSSadaf Ebrahimi pref = EX86_PREF_66;
4320*22dc650dSSadaf Ebrahimi } else if (elem_size != 2)
4321*22dc650dSSadaf Ebrahimi return SLJIT_ERR_UNSUPPORTED;
4322*22dc650dSSadaf Ebrahimi else if (type & SLJIT_SIMD_TEST)
4323*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
4324*22dc650dSSadaf Ebrahimi
4325*22dc650dSSadaf Ebrahimi if (reg_size == 5) {
4326*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(elem_size == 2);
4327*22dc650dSSadaf Ebrahimi
4328*22dc650dSSadaf Ebrahimi if (src_lane_index == 0)
4329*22dc650dSSadaf Ebrahimi return emit_vex_instruction(compiler, VBROADCASTSS_x_xm | VEX_256 | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, freg, 0, src, 0);
4330*22dc650dSSadaf Ebrahimi
4331*22dc650dSSadaf Ebrahimi FAIL_IF(emit_vex_instruction(compiler, VPERMPD_y_ym | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | VEX_W | EX86_SSE2, freg, 0, src, 0));
4332*22dc650dSSadaf Ebrahimi
4333*22dc650dSSadaf Ebrahimi byte = 0x44;
4334*22dc650dSSadaf Ebrahimi if (src_lane_index >= 4) {
4335*22dc650dSSadaf Ebrahimi byte = 0xee;
4336*22dc650dSSadaf Ebrahimi src_lane_index -= 4;
4337*22dc650dSSadaf Ebrahimi }
4338*22dc650dSSadaf Ebrahimi
4339*22dc650dSSadaf Ebrahimi FAIL_IF(emit_byte(compiler, byte));
4340*22dc650dSSadaf Ebrahimi FAIL_IF(emit_vex_instruction(compiler, SHUFPS_x_xm | VEX_256 | pref | EX86_SSE2 | VEX_SSE2_OPV, freg, freg, freg, 0));
4341*22dc650dSSadaf Ebrahimi byte = U8(src_lane_index);
4342*22dc650dSSadaf Ebrahimi } else if (use_vex) {
4343*22dc650dSSadaf Ebrahimi FAIL_IF(emit_vex_instruction(compiler, SHUFPS_x_xm | pref | EX86_SSE2 | VEX_SSE2_OPV, freg, src, src, 0));
4344*22dc650dSSadaf Ebrahimi } else {
4345*22dc650dSSadaf Ebrahimi if (freg != src)
4346*22dc650dSSadaf Ebrahimi FAIL_IF(emit_groupf(compiler, MOVAPS_x_xm | pref | EX86_SSE2, freg, src, 0));
4347*22dc650dSSadaf Ebrahimi
4348*22dc650dSSadaf Ebrahimi FAIL_IF(emit_groupf(compiler, SHUFPS_x_xm | pref | EX86_SSE2, freg, freg, 0));
4349*22dc650dSSadaf Ebrahimi }
4350*22dc650dSSadaf Ebrahimi
4351*22dc650dSSadaf Ebrahimi if (elem_size == 2) {
4352*22dc650dSSadaf Ebrahimi byte = U8(byte | (byte << 2));
4353*22dc650dSSadaf Ebrahimi byte = U8(byte | (byte << 4));
4354*22dc650dSSadaf Ebrahimi } else
4355*22dc650dSSadaf Ebrahimi byte = U8(byte | (byte << 1));
4356*22dc650dSSadaf Ebrahimi
4357*22dc650dSSadaf Ebrahimi return emit_byte(compiler, U8(byte));
4358*22dc650dSSadaf Ebrahimi }
4359*22dc650dSSadaf Ebrahimi
4360*22dc650dSSadaf Ebrahimi if (type & SLJIT_SIMD_TEST)
4361*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
4362*22dc650dSSadaf Ebrahimi
4363*22dc650dSSadaf Ebrahimi if (elem_size == 0) {
4364*22dc650dSSadaf Ebrahimi if (reg_size == 5 && src_lane_index >= 16) {
4365*22dc650dSSadaf Ebrahimi FAIL_IF(emit_vex_instruction(compiler, VPERMQ_y_ym | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | VEX_W | EX86_SSE2, freg, 0, src, 0));
4366*22dc650dSSadaf Ebrahimi FAIL_IF(emit_byte(compiler, src_lane_index >= 24 ? 0xff : 0xaa));
4367*22dc650dSSadaf Ebrahimi src_lane_index &= 0x7;
4368*22dc650dSSadaf Ebrahimi src = freg;
4369*22dc650dSSadaf Ebrahimi }
4370*22dc650dSSadaf Ebrahimi
4371*22dc650dSSadaf Ebrahimi if (src_lane_index != 0 || (freg != src && (!(cpu_feature_list & CPU_FEATURE_AVX2) || !use_vex))) {
4372*22dc650dSSadaf Ebrahimi pref = 0;
4373*22dc650dSSadaf Ebrahimi
4374*22dc650dSSadaf Ebrahimi if ((src_lane_index & 0x3) == 0) {
4375*22dc650dSSadaf Ebrahimi pref = EX86_PREF_66;
4376*22dc650dSSadaf Ebrahimi byte = U8(src_lane_index >> 2);
4377*22dc650dSSadaf Ebrahimi } else if (src_lane_index < 8 && (src_lane_index & 0x1) == 0) {
4378*22dc650dSSadaf Ebrahimi pref = EX86_PREF_F2;
4379*22dc650dSSadaf Ebrahimi byte = U8(src_lane_index >> 1);
4380*22dc650dSSadaf Ebrahimi } else {
4381*22dc650dSSadaf Ebrahimi if (!use_vex) {
4382*22dc650dSSadaf Ebrahimi if (freg != src)
4383*22dc650dSSadaf Ebrahimi FAIL_IF(emit_groupf(compiler, MOVDQA_x_xm | EX86_PREF_66 | EX86_SSE2, freg, src, 0));
4384*22dc650dSSadaf Ebrahimi
4385*22dc650dSSadaf Ebrahimi FAIL_IF(emit_groupf(compiler, PSRLDQ_x | EX86_PREF_66 | EX86_SSE2_OP2, opcode3, freg, 0));
4386*22dc650dSSadaf Ebrahimi } else
4387*22dc650dSSadaf Ebrahimi FAIL_IF(emit_vex_instruction(compiler, PSRLDQ_x | EX86_PREF_66 | EX86_SSE2_OP2 | VEX_SSE2_OPV, opcode3, freg, src, 0));
4388*22dc650dSSadaf Ebrahimi
4389*22dc650dSSadaf Ebrahimi FAIL_IF(emit_byte(compiler, U8(src_lane_index)));
4390*22dc650dSSadaf Ebrahimi }
4391*22dc650dSSadaf Ebrahimi
4392*22dc650dSSadaf Ebrahimi if (pref != 0) {
4393*22dc650dSSadaf Ebrahimi if (use_vex)
4394*22dc650dSSadaf Ebrahimi FAIL_IF(emit_vex_instruction(compiler, PSHUFLW_x_xm | pref | EX86_SSE2, freg, 0, src, 0));
4395*22dc650dSSadaf Ebrahimi else
4396*22dc650dSSadaf Ebrahimi FAIL_IF(emit_groupf(compiler, PSHUFLW_x_xm | pref | EX86_SSE2, freg, src, 0));
4397*22dc650dSSadaf Ebrahimi FAIL_IF(emit_byte(compiler, byte));
4398*22dc650dSSadaf Ebrahimi }
4399*22dc650dSSadaf Ebrahimi
4400*22dc650dSSadaf Ebrahimi src = freg;
4401*22dc650dSSadaf Ebrahimi }
4402*22dc650dSSadaf Ebrahimi
4403*22dc650dSSadaf Ebrahimi if (use_vex && (cpu_feature_list & CPU_FEATURE_AVX2))
4404*22dc650dSSadaf Ebrahimi return emit_vex_instruction(compiler, VPBROADCASTB_x_xm | (reg_size == 5 ? VEX_256 : 0) | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, freg, 0, src, 0);
4405*22dc650dSSadaf Ebrahimi
4406*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(reg_size == 4);
4407*22dc650dSSadaf Ebrahimi FAIL_IF(emit_groupf(compiler, PXOR_x_xm | EX86_PREF_66 | EX86_SSE2, TMP_FREG, TMP_FREG, 0));
4408*22dc650dSSadaf Ebrahimi return emit_groupf_ext(compiler, PSHUFB_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, freg, TMP_FREG, 0);
4409*22dc650dSSadaf Ebrahimi }
4410*22dc650dSSadaf Ebrahimi
4411*22dc650dSSadaf Ebrahimi if ((cpu_feature_list & CPU_FEATURE_AVX2) && use_vex && src_lane_index == 0 && elem_size <= 3) {
4412*22dc650dSSadaf Ebrahimi switch (elem_size) {
4413*22dc650dSSadaf Ebrahimi case 1:
4414*22dc650dSSadaf Ebrahimi pref = VPBROADCASTW_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2;
4415*22dc650dSSadaf Ebrahimi break;
4416*22dc650dSSadaf Ebrahimi case 2:
4417*22dc650dSSadaf Ebrahimi pref = VPBROADCASTD_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2;
4418*22dc650dSSadaf Ebrahimi break;
4419*22dc650dSSadaf Ebrahimi default:
4420*22dc650dSSadaf Ebrahimi pref = VPBROADCASTQ_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2;
4421*22dc650dSSadaf Ebrahimi break;
4422*22dc650dSSadaf Ebrahimi }
4423*22dc650dSSadaf Ebrahimi
4424*22dc650dSSadaf Ebrahimi if (reg_size == 5)
4425*22dc650dSSadaf Ebrahimi pref |= VEX_256;
4426*22dc650dSSadaf Ebrahimi
4427*22dc650dSSadaf Ebrahimi return emit_vex_instruction(compiler, pref, freg, 0, src, 0);
4428*22dc650dSSadaf Ebrahimi }
4429*22dc650dSSadaf Ebrahimi
4430*22dc650dSSadaf Ebrahimi if (reg_size == 5) {
4431*22dc650dSSadaf Ebrahimi switch (elem_size) {
4432*22dc650dSSadaf Ebrahimi case 1:
4433*22dc650dSSadaf Ebrahimi byte = U8(src_lane_index & 0x3);
4434*22dc650dSSadaf Ebrahimi src_lane_index >>= 2;
4435*22dc650dSSadaf Ebrahimi pref = PSHUFLW_x_xm | VEX_256 | ((src_lane_index & 1) == 0 ? EX86_PREF_F2 : EX86_PREF_F3) | EX86_SSE2;
4436*22dc650dSSadaf Ebrahimi break;
4437*22dc650dSSadaf Ebrahimi case 2:
4438*22dc650dSSadaf Ebrahimi byte = U8(src_lane_index & 0x3);
4439*22dc650dSSadaf Ebrahimi src_lane_index >>= 1;
4440*22dc650dSSadaf Ebrahimi pref = PSHUFD_x_xm | VEX_256 | EX86_PREF_66 | EX86_SSE2;
4441*22dc650dSSadaf Ebrahimi break;
4442*22dc650dSSadaf Ebrahimi case 3:
4443*22dc650dSSadaf Ebrahimi pref = 0;
4444*22dc650dSSadaf Ebrahimi break;
4445*22dc650dSSadaf Ebrahimi default:
4446*22dc650dSSadaf Ebrahimi FAIL_IF(emit_vex_instruction(compiler, VPERMQ_y_ym | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | VEX_W | EX86_SSE2, freg, 0, src, 0));
4447*22dc650dSSadaf Ebrahimi return emit_byte(compiler, U8(src_lane_index == 0 ? 0x44 : 0xee));
4448*22dc650dSSadaf Ebrahimi }
4449*22dc650dSSadaf Ebrahimi
4450*22dc650dSSadaf Ebrahimi if (pref != 0) {
4451*22dc650dSSadaf Ebrahimi FAIL_IF(emit_vex_instruction(compiler, pref, freg, 0, src, 0));
4452*22dc650dSSadaf Ebrahimi byte = U8(byte | (byte << 2));
4453*22dc650dSSadaf Ebrahimi FAIL_IF(emit_byte(compiler, U8(byte | (byte << 4))));
4454*22dc650dSSadaf Ebrahimi
4455*22dc650dSSadaf Ebrahimi if (src_lane_index == 0)
4456*22dc650dSSadaf Ebrahimi return emit_vex_instruction(compiler, VPBROADCASTQ_x_xm | VEX_256 | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, freg, 0, freg, 0);
4457*22dc650dSSadaf Ebrahimi
4458*22dc650dSSadaf Ebrahimi src = freg;
4459*22dc650dSSadaf Ebrahimi }
4460*22dc650dSSadaf Ebrahimi
4461*22dc650dSSadaf Ebrahimi FAIL_IF(emit_vex_instruction(compiler, VPERMQ_y_ym | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | VEX_W | EX86_SSE2, freg, 0, src, 0));
4462*22dc650dSSadaf Ebrahimi byte = U8(src_lane_index);
4463*22dc650dSSadaf Ebrahimi byte = U8(byte | (byte << 2));
4464*22dc650dSSadaf Ebrahimi return emit_byte(compiler, U8(byte | (byte << 4)));
4465*22dc650dSSadaf Ebrahimi }
4466*22dc650dSSadaf Ebrahimi
4467*22dc650dSSadaf Ebrahimi switch (elem_size) {
4468*22dc650dSSadaf Ebrahimi case 1:
4469*22dc650dSSadaf Ebrahimi byte = U8(src_lane_index & 0x3);
4470*22dc650dSSadaf Ebrahimi src_lane_index >>= 1;
4471*22dc650dSSadaf Ebrahimi pref = (src_lane_index & 2) == 0 ? EX86_PREF_F2 : EX86_PREF_F3;
4472*22dc650dSSadaf Ebrahimi
4473*22dc650dSSadaf Ebrahimi if (use_vex)
4474*22dc650dSSadaf Ebrahimi FAIL_IF(emit_vex_instruction(compiler, PSHUFLW_x_xm | pref | EX86_SSE2, freg, 0, src, 0));
4475*22dc650dSSadaf Ebrahimi else
4476*22dc650dSSadaf Ebrahimi FAIL_IF(emit_groupf(compiler, PSHUFLW_x_xm | pref | EX86_SSE2, freg, src, 0));
4477*22dc650dSSadaf Ebrahimi byte = U8(byte | (byte << 2));
4478*22dc650dSSadaf Ebrahimi FAIL_IF(emit_byte(compiler, U8(byte | (byte << 4))));
4479*22dc650dSSadaf Ebrahimi
4480*22dc650dSSadaf Ebrahimi if ((cpu_feature_list & CPU_FEATURE_AVX2) && use_vex && pref == EX86_PREF_F2)
4481*22dc650dSSadaf Ebrahimi return emit_vex_instruction(compiler, VPBROADCASTD_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, freg, 0, freg, 0);
4482*22dc650dSSadaf Ebrahimi
4483*22dc650dSSadaf Ebrahimi src = freg;
4484*22dc650dSSadaf Ebrahimi /* fallthrough */
4485*22dc650dSSadaf Ebrahimi case 2:
4486*22dc650dSSadaf Ebrahimi byte = U8(src_lane_index);
4487*22dc650dSSadaf Ebrahimi byte = U8(byte | (byte << 2));
4488*22dc650dSSadaf Ebrahimi break;
4489*22dc650dSSadaf Ebrahimi default:
4490*22dc650dSSadaf Ebrahimi byte = U8(src_lane_index << 1);
4491*22dc650dSSadaf Ebrahimi byte = U8(byte | (byte << 2) | 0x4);
4492*22dc650dSSadaf Ebrahimi break;
4493*22dc650dSSadaf Ebrahimi }
4494*22dc650dSSadaf Ebrahimi
4495*22dc650dSSadaf Ebrahimi if (use_vex)
4496*22dc650dSSadaf Ebrahimi FAIL_IF(emit_vex_instruction(compiler, PSHUFD_x_xm | EX86_PREF_66 | EX86_SSE2, freg, 0, src, 0));
4497*22dc650dSSadaf Ebrahimi else
4498*22dc650dSSadaf Ebrahimi FAIL_IF(emit_groupf(compiler, PSHUFD_x_xm | EX86_PREF_66 | EX86_SSE2, freg, src, 0));
4499*22dc650dSSadaf Ebrahimi return emit_byte(compiler, U8(byte | (byte << 4)));
4500*22dc650dSSadaf Ebrahimi }
4501*22dc650dSSadaf Ebrahimi
4502*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_extend(struct sljit_compiler *compiler, sljit_s32 type,
4503*22dc650dSSadaf Ebrahimi sljit_s32 freg,
4504*22dc650dSSadaf Ebrahimi sljit_s32 src, sljit_sw srcw)
4505*22dc650dSSadaf Ebrahimi {
4506*22dc650dSSadaf Ebrahimi sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
4507*22dc650dSSadaf Ebrahimi sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
4508*22dc650dSSadaf Ebrahimi sljit_s32 elem2_size = SLJIT_SIMD_GET_ELEM2_SIZE(type);
4509*22dc650dSSadaf Ebrahimi sljit_s32 use_vex = (cpu_feature_list & CPU_FEATURE_AVX) && (compiler->options & SLJIT_ENTER_USE_VEX);
4510*22dc650dSSadaf Ebrahimi sljit_u8 opcode;
4511*22dc650dSSadaf Ebrahimi
4512*22dc650dSSadaf Ebrahimi CHECK_ERROR();
4513*22dc650dSSadaf Ebrahimi CHECK(check_sljit_emit_simd_extend(compiler, type, freg, src, srcw));
4514*22dc650dSSadaf Ebrahimi
4515*22dc650dSSadaf Ebrahimi ADJUST_LOCAL_OFFSET(src, srcw);
4516*22dc650dSSadaf Ebrahimi
4517*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
4518*22dc650dSSadaf Ebrahimi compiler->mode32 = 1;
4519*22dc650dSSadaf Ebrahimi #endif /* SLJIT_CONFIG_X86_64 */
4520*22dc650dSSadaf Ebrahimi
4521*22dc650dSSadaf Ebrahimi if (reg_size == 5) {
4522*22dc650dSSadaf Ebrahimi if (!(cpu_feature_list & CPU_FEATURE_AVX2))
4523*22dc650dSSadaf Ebrahimi return SLJIT_ERR_UNSUPPORTED;
4524*22dc650dSSadaf Ebrahimi use_vex = 1;
4525*22dc650dSSadaf Ebrahimi } else if (reg_size != 4)
4526*22dc650dSSadaf Ebrahimi return SLJIT_ERR_UNSUPPORTED;
4527*22dc650dSSadaf Ebrahimi
4528*22dc650dSSadaf Ebrahimi if (type & SLJIT_SIMD_FLOAT) {
4529*22dc650dSSadaf Ebrahimi if (elem_size != 2 || elem2_size != 3)
4530*22dc650dSSadaf Ebrahimi return SLJIT_ERR_UNSUPPORTED;
4531*22dc650dSSadaf Ebrahimi
4532*22dc650dSSadaf Ebrahimi if (type & SLJIT_SIMD_TEST)
4533*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
4534*22dc650dSSadaf Ebrahimi
4535*22dc650dSSadaf Ebrahimi if (use_vex)
4536*22dc650dSSadaf Ebrahimi return emit_vex_instruction(compiler, CVTPS2PD_x_xm | ((reg_size == 5) ? VEX_256 : 0) | EX86_SSE2, freg, 0, src, srcw);
4537*22dc650dSSadaf Ebrahimi return emit_groupf(compiler, CVTPS2PD_x_xm | EX86_SSE2, freg, src, srcw);
4538*22dc650dSSadaf Ebrahimi }
4539*22dc650dSSadaf Ebrahimi
4540*22dc650dSSadaf Ebrahimi switch (elem_size) {
4541*22dc650dSSadaf Ebrahimi case 0:
4542*22dc650dSSadaf Ebrahimi if (elem2_size == 1)
4543*22dc650dSSadaf Ebrahimi opcode = (type & SLJIT_SIMD_EXTEND_SIGNED) ? PMOVSXBW_x_xm : PMOVZXBW_x_xm;
4544*22dc650dSSadaf Ebrahimi else if (elem2_size == 2)
4545*22dc650dSSadaf Ebrahimi opcode = (type & SLJIT_SIMD_EXTEND_SIGNED) ? PMOVSXBD_x_xm : PMOVZXBD_x_xm;
4546*22dc650dSSadaf Ebrahimi else if (elem2_size == 3)
4547*22dc650dSSadaf Ebrahimi opcode = (type & SLJIT_SIMD_EXTEND_SIGNED) ? PMOVSXBQ_x_xm : PMOVZXBQ_x_xm;
4548*22dc650dSSadaf Ebrahimi else
4549*22dc650dSSadaf Ebrahimi return SLJIT_ERR_UNSUPPORTED;
4550*22dc650dSSadaf Ebrahimi break;
4551*22dc650dSSadaf Ebrahimi case 1:
4552*22dc650dSSadaf Ebrahimi if (elem2_size == 2)
4553*22dc650dSSadaf Ebrahimi opcode = (type & SLJIT_SIMD_EXTEND_SIGNED) ? PMOVSXWD_x_xm : PMOVZXWD_x_xm;
4554*22dc650dSSadaf Ebrahimi else if (elem2_size == 3)
4555*22dc650dSSadaf Ebrahimi opcode = (type & SLJIT_SIMD_EXTEND_SIGNED) ? PMOVSXWQ_x_xm : PMOVZXWQ_x_xm;
4556*22dc650dSSadaf Ebrahimi else
4557*22dc650dSSadaf Ebrahimi return SLJIT_ERR_UNSUPPORTED;
4558*22dc650dSSadaf Ebrahimi break;
4559*22dc650dSSadaf Ebrahimi case 2:
4560*22dc650dSSadaf Ebrahimi if (elem2_size == 3)
4561*22dc650dSSadaf Ebrahimi opcode = (type & SLJIT_SIMD_EXTEND_SIGNED) ? PMOVSXDQ_x_xm : PMOVZXDQ_x_xm;
4562*22dc650dSSadaf Ebrahimi else
4563*22dc650dSSadaf Ebrahimi return SLJIT_ERR_UNSUPPORTED;
4564*22dc650dSSadaf Ebrahimi break;
4565*22dc650dSSadaf Ebrahimi default:
4566*22dc650dSSadaf Ebrahimi return SLJIT_ERR_UNSUPPORTED;
4567*22dc650dSSadaf Ebrahimi }
4568*22dc650dSSadaf Ebrahimi
4569*22dc650dSSadaf Ebrahimi if (type & SLJIT_SIMD_TEST)
4570*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
4571*22dc650dSSadaf Ebrahimi
4572*22dc650dSSadaf Ebrahimi if (use_vex)
4573*22dc650dSSadaf Ebrahimi return emit_vex_instruction(compiler, opcode | ((reg_size == 5) ? VEX_256 : 0) | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, freg, 0, src, srcw);
4574*22dc650dSSadaf Ebrahimi return emit_groupf_ext(compiler, opcode | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, freg, src, srcw);
4575*22dc650dSSadaf Ebrahimi }
4576*22dc650dSSadaf Ebrahimi
4577*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_sign(struct sljit_compiler *compiler, sljit_s32 type,
4578*22dc650dSSadaf Ebrahimi sljit_s32 freg,
4579*22dc650dSSadaf Ebrahimi sljit_s32 dst, sljit_sw dstw)
4580*22dc650dSSadaf Ebrahimi {
4581*22dc650dSSadaf Ebrahimi sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
4582*22dc650dSSadaf Ebrahimi sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
4583*22dc650dSSadaf Ebrahimi sljit_s32 use_vex = (cpu_feature_list & CPU_FEATURE_AVX) && (compiler->options & SLJIT_ENTER_USE_VEX);
4584*22dc650dSSadaf Ebrahimi sljit_s32 dst_r;
4585*22dc650dSSadaf Ebrahimi sljit_uw op;
4586*22dc650dSSadaf Ebrahimi sljit_u8 *inst;
4587*22dc650dSSadaf Ebrahimi
4588*22dc650dSSadaf Ebrahimi CHECK_ERROR();
4589*22dc650dSSadaf Ebrahimi CHECK(check_sljit_emit_simd_sign(compiler, type, freg, dst, dstw));
4590*22dc650dSSadaf Ebrahimi
4591*22dc650dSSadaf Ebrahimi ADJUST_LOCAL_OFFSET(dst, dstw);
4592*22dc650dSSadaf Ebrahimi
4593*22dc650dSSadaf Ebrahimi CHECK_EXTRA_REGS(dst, dstw, (void)0);
4594*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
4595*22dc650dSSadaf Ebrahimi compiler->mode32 = 1;
4596*22dc650dSSadaf Ebrahimi #endif /* SLJIT_CONFIG_X86_64 */
4597*22dc650dSSadaf Ebrahimi
4598*22dc650dSSadaf Ebrahimi if (elem_size > 3 || ((type & SLJIT_SIMD_FLOAT) && elem_size < 2))
4599*22dc650dSSadaf Ebrahimi return SLJIT_ERR_UNSUPPORTED;
4600*22dc650dSSadaf Ebrahimi
4601*22dc650dSSadaf Ebrahimi if (reg_size == 4) {
4602*22dc650dSSadaf Ebrahimi if (type & SLJIT_SIMD_TEST)
4603*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
4604*22dc650dSSadaf Ebrahimi
4605*22dc650dSSadaf Ebrahimi op = EX86_PREF_66 | EX86_SSE2_OP2;
4606*22dc650dSSadaf Ebrahimi
4607*22dc650dSSadaf Ebrahimi switch (elem_size) {
4608*22dc650dSSadaf Ebrahimi case 1:
4609*22dc650dSSadaf Ebrahimi if (use_vex)
4610*22dc650dSSadaf Ebrahimi FAIL_IF(emit_vex_instruction(compiler, PACKSSWB_x_xm | EX86_PREF_66 | EX86_SSE2 | VEX_SSE2_OPV, TMP_FREG, freg, freg, 0));
4611*22dc650dSSadaf Ebrahimi else
4612*22dc650dSSadaf Ebrahimi FAIL_IF(emit_groupf(compiler, PACKSSWB_x_xm | EX86_PREF_66 | EX86_SSE2, TMP_FREG, freg, 0));
4613*22dc650dSSadaf Ebrahimi freg = TMP_FREG;
4614*22dc650dSSadaf Ebrahimi break;
4615*22dc650dSSadaf Ebrahimi case 2:
4616*22dc650dSSadaf Ebrahimi op = EX86_SSE2_OP2;
4617*22dc650dSSadaf Ebrahimi break;
4618*22dc650dSSadaf Ebrahimi }
4619*22dc650dSSadaf Ebrahimi
4620*22dc650dSSadaf Ebrahimi dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
4621*22dc650dSSadaf Ebrahimi op |= (elem_size < 2) ? PMOVMSKB_r_x : MOVMSKPS_r_x;
4622*22dc650dSSadaf Ebrahimi
4623*22dc650dSSadaf Ebrahimi if (use_vex)
4624*22dc650dSSadaf Ebrahimi FAIL_IF(emit_vex_instruction(compiler, op, dst_r, 0, freg, 0));
4625*22dc650dSSadaf Ebrahimi else
4626*22dc650dSSadaf Ebrahimi FAIL_IF(emit_groupf(compiler, op, dst_r, freg, 0));
4627*22dc650dSSadaf Ebrahimi
4628*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
4629*22dc650dSSadaf Ebrahimi compiler->mode32 = type & SLJIT_32;
4630*22dc650dSSadaf Ebrahimi #endif /* SLJIT_CONFIG_X86_64 */
4631*22dc650dSSadaf Ebrahimi
4632*22dc650dSSadaf Ebrahimi if (elem_size == 1) {
4633*22dc650dSSadaf Ebrahimi inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 8, dst_r, 0);
4634*22dc650dSSadaf Ebrahimi FAIL_IF(!inst);
4635*22dc650dSSadaf Ebrahimi inst[1] |= SHR;
4636*22dc650dSSadaf Ebrahimi }
4637*22dc650dSSadaf Ebrahimi
4638*22dc650dSSadaf Ebrahimi if (dst_r == TMP_REG1)
4639*22dc650dSSadaf Ebrahimi return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
4640*22dc650dSSadaf Ebrahimi
4641*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
4642*22dc650dSSadaf Ebrahimi }
4643*22dc650dSSadaf Ebrahimi
4644*22dc650dSSadaf Ebrahimi if (reg_size != 5 || !(cpu_feature_list & CPU_FEATURE_AVX2))
4645*22dc650dSSadaf Ebrahimi return SLJIT_ERR_UNSUPPORTED;
4646*22dc650dSSadaf Ebrahimi
4647*22dc650dSSadaf Ebrahimi if (type & SLJIT_SIMD_TEST)
4648*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
4649*22dc650dSSadaf Ebrahimi
4650*22dc650dSSadaf Ebrahimi dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
4651*22dc650dSSadaf Ebrahimi
4652*22dc650dSSadaf Ebrahimi if (elem_size == 1) {
4653*22dc650dSSadaf Ebrahimi FAIL_IF(emit_vex_instruction(compiler, VEXTRACTI128_x_ym | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | EX86_SSE2, freg, 0, TMP_FREG, 0));
4654*22dc650dSSadaf Ebrahimi FAIL_IF(emit_byte(compiler, 1));
4655*22dc650dSSadaf Ebrahimi FAIL_IF(emit_vex_instruction(compiler, PACKSSWB_x_xm | VEX_256 | EX86_PREF_66 | EX86_SSE2 | VEX_SSE2_OPV, TMP_FREG, freg, TMP_FREG, 0));
4656*22dc650dSSadaf Ebrahimi FAIL_IF(emit_groupf(compiler, PMOVMSKB_r_x | EX86_PREF_66 | EX86_SSE2_OP2, dst_r, TMP_FREG, 0));
4657*22dc650dSSadaf Ebrahimi } else {
4658*22dc650dSSadaf Ebrahimi op = MOVMSKPS_r_x | VEX_256 | EX86_SSE2_OP2;
4659*22dc650dSSadaf Ebrahimi
4660*22dc650dSSadaf Ebrahimi if (elem_size == 0)
4661*22dc650dSSadaf Ebrahimi op = PMOVMSKB_r_x | VEX_256 | EX86_PREF_66 | EX86_SSE2_OP2;
4662*22dc650dSSadaf Ebrahimi else if (elem_size == 3)
4663*22dc650dSSadaf Ebrahimi op |= EX86_PREF_66;
4664*22dc650dSSadaf Ebrahimi
4665*22dc650dSSadaf Ebrahimi FAIL_IF(emit_vex_instruction(compiler, op, dst_r, 0, freg, 0));
4666*22dc650dSSadaf Ebrahimi }
4667*22dc650dSSadaf Ebrahimi
4668*22dc650dSSadaf Ebrahimi if (dst_r == TMP_REG1) {
4669*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
4670*22dc650dSSadaf Ebrahimi compiler->mode32 = type & SLJIT_32;
4671*22dc650dSSadaf Ebrahimi #endif /* SLJIT_CONFIG_X86_64 */
4672*22dc650dSSadaf Ebrahimi return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
4673*22dc650dSSadaf Ebrahimi }
4674*22dc650dSSadaf Ebrahimi
4675*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
4676*22dc650dSSadaf Ebrahimi }
4677*22dc650dSSadaf Ebrahimi
4678*22dc650dSSadaf Ebrahimi static sljit_s32 emit_simd_mov(struct sljit_compiler *compiler, sljit_s32 type,
4679*22dc650dSSadaf Ebrahimi sljit_s32 dst_freg, sljit_s32 src_freg)
4680*22dc650dSSadaf Ebrahimi {
4681*22dc650dSSadaf Ebrahimi sljit_uw op = ((type & SLJIT_SIMD_FLOAT) ? MOVAPS_x_xm : MOVDQA_x_xm) | EX86_SSE2;
4682*22dc650dSSadaf Ebrahimi
4683*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(SLJIT_SIMD_GET_REG_SIZE(type) == 4);
4684*22dc650dSSadaf Ebrahimi
4685*22dc650dSSadaf Ebrahimi if (!(type & SLJIT_SIMD_FLOAT) || SLJIT_SIMD_GET_ELEM_SIZE(type) == 3)
4686*22dc650dSSadaf Ebrahimi op |= EX86_PREF_66;
4687*22dc650dSSadaf Ebrahimi
4688*22dc650dSSadaf Ebrahimi return emit_groupf(compiler, op, dst_freg, src_freg, 0);
4689*22dc650dSSadaf Ebrahimi }
4690*22dc650dSSadaf Ebrahimi
4691*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_op2(struct sljit_compiler *compiler, sljit_s32 type,
4692*22dc650dSSadaf Ebrahimi sljit_s32 dst_freg, sljit_s32 src1_freg, sljit_s32 src2_freg)
4693*22dc650dSSadaf Ebrahimi {
4694*22dc650dSSadaf Ebrahimi sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
4695*22dc650dSSadaf Ebrahimi sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
4696*22dc650dSSadaf Ebrahimi sljit_uw op = 0;
4697*22dc650dSSadaf Ebrahimi
4698*22dc650dSSadaf Ebrahimi CHECK_ERROR();
4699*22dc650dSSadaf Ebrahimi CHECK(check_sljit_emit_simd_op2(compiler, type, dst_freg, src1_freg, src2_freg));
4700*22dc650dSSadaf Ebrahimi
4701*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
4702*22dc650dSSadaf Ebrahimi compiler->mode32 = 1;
4703*22dc650dSSadaf Ebrahimi #endif /* SLJIT_CONFIG_X86_64 */
4704*22dc650dSSadaf Ebrahimi
4705*22dc650dSSadaf Ebrahimi if (reg_size == 5) {
4706*22dc650dSSadaf Ebrahimi if (!(cpu_feature_list & CPU_FEATURE_AVX2))
4707*22dc650dSSadaf Ebrahimi return SLJIT_ERR_UNSUPPORTED;
4708*22dc650dSSadaf Ebrahimi } else if (reg_size != 4)
4709*22dc650dSSadaf Ebrahimi return SLJIT_ERR_UNSUPPORTED;
4710*22dc650dSSadaf Ebrahimi
4711*22dc650dSSadaf Ebrahimi if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3))
4712*22dc650dSSadaf Ebrahimi return SLJIT_ERR_UNSUPPORTED;
4713*22dc650dSSadaf Ebrahimi
4714*22dc650dSSadaf Ebrahimi switch (SLJIT_SIMD_GET_OPCODE(type)) {
4715*22dc650dSSadaf Ebrahimi case SLJIT_SIMD_OP2_AND:
4716*22dc650dSSadaf Ebrahimi op = (type & SLJIT_SIMD_FLOAT) ? ANDPD_x_xm : PAND_x_xm;
4717*22dc650dSSadaf Ebrahimi
4718*22dc650dSSadaf Ebrahimi if (!(type & SLJIT_SIMD_FLOAT) || elem_size == 3)
4719*22dc650dSSadaf Ebrahimi op |= EX86_PREF_66;
4720*22dc650dSSadaf Ebrahimi break;
4721*22dc650dSSadaf Ebrahimi case SLJIT_SIMD_OP2_OR:
4722*22dc650dSSadaf Ebrahimi op = (type & SLJIT_SIMD_FLOAT) ? ORPD_x_xm : POR_x_xm;
4723*22dc650dSSadaf Ebrahimi
4724*22dc650dSSadaf Ebrahimi if (!(type & SLJIT_SIMD_FLOAT) || elem_size == 3)
4725*22dc650dSSadaf Ebrahimi op |= EX86_PREF_66;
4726*22dc650dSSadaf Ebrahimi break;
4727*22dc650dSSadaf Ebrahimi case SLJIT_SIMD_OP2_XOR:
4728*22dc650dSSadaf Ebrahimi op = (type & SLJIT_SIMD_FLOAT) ? XORPD_x_xm : PXOR_x_xm;
4729*22dc650dSSadaf Ebrahimi
4730*22dc650dSSadaf Ebrahimi if (!(type & SLJIT_SIMD_FLOAT) || elem_size == 3)
4731*22dc650dSSadaf Ebrahimi op |= EX86_PREF_66;
4732*22dc650dSSadaf Ebrahimi break;
4733*22dc650dSSadaf Ebrahimi }
4734*22dc650dSSadaf Ebrahimi
4735*22dc650dSSadaf Ebrahimi if (type & SLJIT_SIMD_TEST)
4736*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
4737*22dc650dSSadaf Ebrahimi
4738*22dc650dSSadaf Ebrahimi if (reg_size == 5 || ((cpu_feature_list & CPU_FEATURE_AVX) && (compiler->options & SLJIT_ENTER_USE_VEX))) {
4739*22dc650dSSadaf Ebrahimi if (reg_size == 5)
4740*22dc650dSSadaf Ebrahimi op |= VEX_256;
4741*22dc650dSSadaf Ebrahimi
4742*22dc650dSSadaf Ebrahimi return emit_vex_instruction(compiler, op | EX86_SSE2 | VEX_SSE2_OPV, dst_freg, src1_freg, src2_freg, 0);
4743*22dc650dSSadaf Ebrahimi }
4744*22dc650dSSadaf Ebrahimi
4745*22dc650dSSadaf Ebrahimi if (dst_freg != src1_freg) {
4746*22dc650dSSadaf Ebrahimi if (dst_freg == src2_freg)
4747*22dc650dSSadaf Ebrahimi src2_freg = src1_freg;
4748*22dc650dSSadaf Ebrahimi else
4749*22dc650dSSadaf Ebrahimi FAIL_IF(emit_simd_mov(compiler, type, dst_freg, src1_freg));
4750*22dc650dSSadaf Ebrahimi }
4751*22dc650dSSadaf Ebrahimi
4752*22dc650dSSadaf Ebrahimi FAIL_IF(emit_groupf(compiler, op | EX86_SSE2, dst_freg, src2_freg, 0));
4753*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
4754*22dc650dSSadaf Ebrahimi }
4755*22dc650dSSadaf Ebrahimi
4756*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_load(struct sljit_compiler *compiler, sljit_s32 op,
4757*22dc650dSSadaf Ebrahimi sljit_s32 dst_reg,
4758*22dc650dSSadaf Ebrahimi sljit_s32 mem_reg)
4759*22dc650dSSadaf Ebrahimi {
4760*22dc650dSSadaf Ebrahimi CHECK_ERROR();
4761*22dc650dSSadaf Ebrahimi CHECK(check_sljit_emit_atomic_load(compiler, op, dst_reg, mem_reg));
4762*22dc650dSSadaf Ebrahimi
4763*22dc650dSSadaf Ebrahimi SLJIT_SKIP_CHECKS(compiler);
4764*22dc650dSSadaf Ebrahimi return sljit_emit_op1(compiler, op, dst_reg, 0, SLJIT_MEM1(mem_reg), 0);
4765*22dc650dSSadaf Ebrahimi }
4766*22dc650dSSadaf Ebrahimi
4767*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_store(struct sljit_compiler *compiler, sljit_s32 op,
4768*22dc650dSSadaf Ebrahimi sljit_s32 src_reg,
4769*22dc650dSSadaf Ebrahimi sljit_s32 mem_reg,
4770*22dc650dSSadaf Ebrahimi sljit_s32 temp_reg)
4771*22dc650dSSadaf Ebrahimi {
4772*22dc650dSSadaf Ebrahimi sljit_uw pref;
4773*22dc650dSSadaf Ebrahimi sljit_s32 free_reg = TMP_REG1;
4774*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
4775*22dc650dSSadaf Ebrahimi sljit_sw srcw = 0;
4776*22dc650dSSadaf Ebrahimi sljit_sw tempw = 0;
4777*22dc650dSSadaf Ebrahimi #endif /* SLJIT_CONFIG_X86_32 */
4778*22dc650dSSadaf Ebrahimi
4779*22dc650dSSadaf Ebrahimi CHECK_ERROR();
4780*22dc650dSSadaf Ebrahimi CHECK(check_sljit_emit_atomic_store(compiler, op, src_reg, mem_reg, temp_reg));
4781*22dc650dSSadaf Ebrahimi CHECK_EXTRA_REGS(src_reg, srcw, (void)0);
4782*22dc650dSSadaf Ebrahimi CHECK_EXTRA_REGS(temp_reg, tempw, (void)0);
4783*22dc650dSSadaf Ebrahimi
4784*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(FAST_IS_REG(src_reg) || src_reg == SLJIT_MEM1(SLJIT_SP));
4785*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(FAST_IS_REG(temp_reg) || temp_reg == SLJIT_MEM1(SLJIT_SP));
4786*22dc650dSSadaf Ebrahimi
4787*22dc650dSSadaf Ebrahimi op = GET_OPCODE(op);
4788*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
4789*22dc650dSSadaf Ebrahimi if ((src_reg & SLJIT_MEM) || (op == SLJIT_MOV_U8 && reg_map[src_reg] >= 4)) {
4790*22dc650dSSadaf Ebrahimi /* Src is virtual register or its low byte is not accessible. */
4791*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(src_reg != SLJIT_R1);
4792*22dc650dSSadaf Ebrahimi free_reg = src_reg;
4793*22dc650dSSadaf Ebrahimi
4794*22dc650dSSadaf Ebrahimi EMIT_MOV(compiler, TMP_REG1, 0, src_reg, srcw);
4795*22dc650dSSadaf Ebrahimi src_reg = TMP_REG1;
4796*22dc650dSSadaf Ebrahimi
4797*22dc650dSSadaf Ebrahimi if (mem_reg == src_reg)
4798*22dc650dSSadaf Ebrahimi mem_reg = TMP_REG1;
4799*22dc650dSSadaf Ebrahimi }
4800*22dc650dSSadaf Ebrahimi #endif /* SLJIT_CONFIG_X86_32 */
4801*22dc650dSSadaf Ebrahimi
4802*22dc650dSSadaf Ebrahimi if (temp_reg != SLJIT_R0) {
4803*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
4804*22dc650dSSadaf Ebrahimi compiler->mode32 = 0;
4805*22dc650dSSadaf Ebrahimi
4806*22dc650dSSadaf Ebrahimi EMIT_MOV(compiler, free_reg, 0, SLJIT_R0, 0);
4807*22dc650dSSadaf Ebrahimi EMIT_MOV(compiler, SLJIT_R0, 0, temp_reg, 0);
4808*22dc650dSSadaf Ebrahimi
4809*22dc650dSSadaf Ebrahimi if (src_reg == SLJIT_R0)
4810*22dc650dSSadaf Ebrahimi src_reg = free_reg;
4811*22dc650dSSadaf Ebrahimi if (mem_reg == SLJIT_R0)
4812*22dc650dSSadaf Ebrahimi mem_reg = free_reg;
4813*22dc650dSSadaf Ebrahimi #else /* !SLJIT_CONFIG_X86_64 */
4814*22dc650dSSadaf Ebrahimi if (src_reg == TMP_REG1 && mem_reg == SLJIT_R0 && (free_reg & SLJIT_MEM)) {
4815*22dc650dSSadaf Ebrahimi EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_R1, 0);
4816*22dc650dSSadaf Ebrahimi EMIT_MOV(compiler, SLJIT_R1, 0, SLJIT_R0, 0);
4817*22dc650dSSadaf Ebrahimi EMIT_MOV(compiler, SLJIT_R0, 0, temp_reg, tempw);
4818*22dc650dSSadaf Ebrahimi
4819*22dc650dSSadaf Ebrahimi mem_reg = SLJIT_R1;
4820*22dc650dSSadaf Ebrahimi free_reg = SLJIT_R1;
4821*22dc650dSSadaf Ebrahimi } else {
4822*22dc650dSSadaf Ebrahimi EMIT_MOV(compiler, free_reg, 0, SLJIT_R0, 0);
4823*22dc650dSSadaf Ebrahimi EMIT_MOV(compiler, SLJIT_R0, 0, temp_reg, tempw);
4824*22dc650dSSadaf Ebrahimi
4825*22dc650dSSadaf Ebrahimi if (src_reg == SLJIT_R0)
4826*22dc650dSSadaf Ebrahimi src_reg = free_reg;
4827*22dc650dSSadaf Ebrahimi if (mem_reg == SLJIT_R0)
4828*22dc650dSSadaf Ebrahimi mem_reg = free_reg;
4829*22dc650dSSadaf Ebrahimi }
4830*22dc650dSSadaf Ebrahimi #endif /* SLJIT_CONFIG_X86_64 */
4831*22dc650dSSadaf Ebrahimi }
4832*22dc650dSSadaf Ebrahimi
4833*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
4834*22dc650dSSadaf Ebrahimi compiler->mode32 = op != SLJIT_MOV && op != SLJIT_MOV_P;
4835*22dc650dSSadaf Ebrahimi #endif /* SLJIT_CONFIG_X86_64 */
4836*22dc650dSSadaf Ebrahimi
4837*22dc650dSSadaf Ebrahimi /* Lock prefix. */
4838*22dc650dSSadaf Ebrahimi FAIL_IF(emit_byte(compiler, GROUP_LOCK));
4839*22dc650dSSadaf Ebrahimi
4840*22dc650dSSadaf Ebrahimi pref = 0;
4841*22dc650dSSadaf Ebrahimi if (op == SLJIT_MOV_U16)
4842*22dc650dSSadaf Ebrahimi pref = EX86_HALF_ARG | EX86_PREF_66;
4843*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
4844*22dc650dSSadaf Ebrahimi if (op == SLJIT_MOV_U8)
4845*22dc650dSSadaf Ebrahimi pref = EX86_REX;
4846*22dc650dSSadaf Ebrahimi #endif /* SLJIT_CONFIG_X86_64 */
4847*22dc650dSSadaf Ebrahimi
4848*22dc650dSSadaf Ebrahimi FAIL_IF(emit_groupf(compiler, (op == SLJIT_MOV_U8 ? CMPXCHG_rm8_r : CMPXCHG_rm_r) | pref, src_reg, SLJIT_MEM1(mem_reg), 0));
4849*22dc650dSSadaf Ebrahimi
4850*22dc650dSSadaf Ebrahimi if (temp_reg != SLJIT_R0) {
4851*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
4852*22dc650dSSadaf Ebrahimi compiler->mode32 = 0;
4853*22dc650dSSadaf Ebrahimi return emit_mov(compiler, SLJIT_R0, 0, TMP_REG1, 0);
4854*22dc650dSSadaf Ebrahimi #else /* !SLJIT_CONFIG_X86_64 */
4855*22dc650dSSadaf Ebrahimi EMIT_MOV(compiler, SLJIT_R0, 0, free_reg, 0);
4856*22dc650dSSadaf Ebrahimi if (free_reg != TMP_REG1)
4857*22dc650dSSadaf Ebrahimi return emit_mov(compiler, free_reg, 0, (free_reg == SLJIT_R1) ? SLJIT_MEM1(SLJIT_SP) : TMP_REG1, 0);
4858*22dc650dSSadaf Ebrahimi #endif /* SLJIT_CONFIG_X86_64 */
4859*22dc650dSSadaf Ebrahimi }
4860*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
4861*22dc650dSSadaf Ebrahimi }
4862*22dc650dSSadaf Ebrahimi
4863*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_local_base(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw offset)
4864*22dc650dSSadaf Ebrahimi {
4865*22dc650dSSadaf Ebrahimi CHECK_ERROR();
4866*22dc650dSSadaf Ebrahimi CHECK(check_sljit_get_local_base(compiler, dst, dstw, offset));
4867*22dc650dSSadaf Ebrahimi ADJUST_LOCAL_OFFSET(dst, dstw);
4868*22dc650dSSadaf Ebrahimi
4869*22dc650dSSadaf Ebrahimi CHECK_EXTRA_REGS(dst, dstw, (void)0);
4870*22dc650dSSadaf Ebrahimi
4871*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
4872*22dc650dSSadaf Ebrahimi compiler->mode32 = 0;
4873*22dc650dSSadaf Ebrahimi #endif
4874*22dc650dSSadaf Ebrahimi
4875*22dc650dSSadaf Ebrahimi ADJUST_LOCAL_OFFSET(SLJIT_MEM1(SLJIT_SP), offset);
4876*22dc650dSSadaf Ebrahimi
4877*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
4878*22dc650dSSadaf Ebrahimi if (NOT_HALFWORD(offset)) {
4879*22dc650dSSadaf Ebrahimi FAIL_IF(emit_load_imm64(compiler, TMP_REG1, offset));
4880*22dc650dSSadaf Ebrahimi #if (defined SLJIT_DEBUG && SLJIT_DEBUG)
4881*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(emit_lea_binary(compiler, dst, dstw, SLJIT_SP, 0, TMP_REG1, 0) != SLJIT_ERR_UNSUPPORTED);
4882*22dc650dSSadaf Ebrahimi return compiler->error;
4883*22dc650dSSadaf Ebrahimi #else
4884*22dc650dSSadaf Ebrahimi return emit_lea_binary(compiler, dst, dstw, SLJIT_SP, 0, TMP_REG1, 0);
4885*22dc650dSSadaf Ebrahimi #endif
4886*22dc650dSSadaf Ebrahimi }
4887*22dc650dSSadaf Ebrahimi #endif
4888*22dc650dSSadaf Ebrahimi
4889*22dc650dSSadaf Ebrahimi if (offset != 0)
4890*22dc650dSSadaf Ebrahimi return emit_lea_binary(compiler, dst, dstw, SLJIT_SP, 0, SLJIT_IMM, offset);
4891*22dc650dSSadaf Ebrahimi return emit_mov(compiler, dst, dstw, SLJIT_SP, 0);
4892*22dc650dSSadaf Ebrahimi }
4893*22dc650dSSadaf Ebrahimi
4894*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value)
4895*22dc650dSSadaf Ebrahimi {
4896*22dc650dSSadaf Ebrahimi sljit_u8 *inst;
4897*22dc650dSSadaf Ebrahimi struct sljit_const *const_;
4898*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
4899*22dc650dSSadaf Ebrahimi sljit_s32 reg;
4900*22dc650dSSadaf Ebrahimi #endif
4901*22dc650dSSadaf Ebrahimi
4902*22dc650dSSadaf Ebrahimi CHECK_ERROR_PTR();
4903*22dc650dSSadaf Ebrahimi CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value));
4904*22dc650dSSadaf Ebrahimi ADJUST_LOCAL_OFFSET(dst, dstw);
4905*22dc650dSSadaf Ebrahimi
4906*22dc650dSSadaf Ebrahimi CHECK_EXTRA_REGS(dst, dstw, (void)0);
4907*22dc650dSSadaf Ebrahimi
4908*22dc650dSSadaf Ebrahimi const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const));
4909*22dc650dSSadaf Ebrahimi PTR_FAIL_IF(!const_);
4910*22dc650dSSadaf Ebrahimi set_const(const_, compiler);
4911*22dc650dSSadaf Ebrahimi
4912*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
4913*22dc650dSSadaf Ebrahimi compiler->mode32 = 0;
4914*22dc650dSSadaf Ebrahimi reg = FAST_IS_REG(dst) ? dst : TMP_REG1;
4915*22dc650dSSadaf Ebrahimi
4916*22dc650dSSadaf Ebrahimi if (emit_load_imm64(compiler, reg, init_value))
4917*22dc650dSSadaf Ebrahimi return NULL;
4918*22dc650dSSadaf Ebrahimi #else
4919*22dc650dSSadaf Ebrahimi if (emit_mov(compiler, dst, dstw, SLJIT_IMM, init_value))
4920*22dc650dSSadaf Ebrahimi return NULL;
4921*22dc650dSSadaf Ebrahimi #endif
4922*22dc650dSSadaf Ebrahimi
4923*22dc650dSSadaf Ebrahimi inst = (sljit_u8*)ensure_buf(compiler, 1);
4924*22dc650dSSadaf Ebrahimi PTR_FAIL_IF(!inst);
4925*22dc650dSSadaf Ebrahimi
4926*22dc650dSSadaf Ebrahimi inst[0] = SLJIT_INST_CONST;
4927*22dc650dSSadaf Ebrahimi
4928*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
4929*22dc650dSSadaf Ebrahimi if (dst & SLJIT_MEM)
4930*22dc650dSSadaf Ebrahimi if (emit_mov(compiler, dst, dstw, TMP_REG1, 0))
4931*22dc650dSSadaf Ebrahimi return NULL;
4932*22dc650dSSadaf Ebrahimi #endif
4933*22dc650dSSadaf Ebrahimi
4934*22dc650dSSadaf Ebrahimi return const_;
4935*22dc650dSSadaf Ebrahimi }
4936*22dc650dSSadaf Ebrahimi
4937*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_mov_addr(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
4938*22dc650dSSadaf Ebrahimi {
4939*22dc650dSSadaf Ebrahimi struct sljit_jump *jump;
4940*22dc650dSSadaf Ebrahimi sljit_u8 *inst;
4941*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
4942*22dc650dSSadaf Ebrahimi sljit_s32 reg;
4943*22dc650dSSadaf Ebrahimi #endif /* SLJIT_CONFIG_X86_64 */
4944*22dc650dSSadaf Ebrahimi
4945*22dc650dSSadaf Ebrahimi CHECK_ERROR_PTR();
4946*22dc650dSSadaf Ebrahimi CHECK_PTR(check_sljit_emit_mov_addr(compiler, dst, dstw));
4947*22dc650dSSadaf Ebrahimi ADJUST_LOCAL_OFFSET(dst, dstw);
4948*22dc650dSSadaf Ebrahimi
4949*22dc650dSSadaf Ebrahimi CHECK_EXTRA_REGS(dst, dstw, (void)0);
4950*22dc650dSSadaf Ebrahimi
4951*22dc650dSSadaf Ebrahimi jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
4952*22dc650dSSadaf Ebrahimi PTR_FAIL_IF(!jump);
4953*22dc650dSSadaf Ebrahimi set_mov_addr(jump, compiler, 0);
4954*22dc650dSSadaf Ebrahimi
4955*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
4956*22dc650dSSadaf Ebrahimi compiler->mode32 = 0;
4957*22dc650dSSadaf Ebrahimi reg = FAST_IS_REG(dst) ? dst : TMP_REG1;
4958*22dc650dSSadaf Ebrahimi
4959*22dc650dSSadaf Ebrahimi PTR_FAIL_IF(emit_load_imm64(compiler, reg, 0));
4960*22dc650dSSadaf Ebrahimi jump->addr = compiler->size;
4961*22dc650dSSadaf Ebrahimi
4962*22dc650dSSadaf Ebrahimi if (reg_map[reg] >= 8)
4963*22dc650dSSadaf Ebrahimi jump->flags |= MOV_ADDR_HI;
4964*22dc650dSSadaf Ebrahimi #else /* !SLJIT_CONFIG_X86_64 */
4965*22dc650dSSadaf Ebrahimi PTR_FAIL_IF(emit_mov(compiler, dst, dstw, SLJIT_IMM, 0));
4966*22dc650dSSadaf Ebrahimi #endif /* SLJIT_CONFIG_X86_64 */
4967*22dc650dSSadaf Ebrahimi
4968*22dc650dSSadaf Ebrahimi inst = (sljit_u8*)ensure_buf(compiler, 1);
4969*22dc650dSSadaf Ebrahimi PTR_FAIL_IF(!inst);
4970*22dc650dSSadaf Ebrahimi
4971*22dc650dSSadaf Ebrahimi inst[0] = SLJIT_INST_MOV_ADDR;
4972*22dc650dSSadaf Ebrahimi
4973*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
4974*22dc650dSSadaf Ebrahimi if (dst & SLJIT_MEM)
4975*22dc650dSSadaf Ebrahimi PTR_FAIL_IF(emit_mov(compiler, dst, dstw, TMP_REG1, 0));
4976*22dc650dSSadaf Ebrahimi #endif /* SLJIT_CONFIG_X86_64 */
4977*22dc650dSSadaf Ebrahimi
4978*22dc650dSSadaf Ebrahimi return jump;
4979*22dc650dSSadaf Ebrahimi }
4980*22dc650dSSadaf Ebrahimi
4981*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset)
4982*22dc650dSSadaf Ebrahimi {
4983*22dc650dSSadaf Ebrahimi SLJIT_UNUSED_ARG(executable_offset);
4984*22dc650dSSadaf Ebrahimi
4985*22dc650dSSadaf Ebrahimi SLJIT_UPDATE_WX_FLAGS((void*)addr, (void*)(addr + sizeof(sljit_uw)), 0);
4986*22dc650dSSadaf Ebrahimi #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
4987*22dc650dSSadaf Ebrahimi sljit_unaligned_store_sw((void*)addr, (sljit_sw)(new_target - (addr + 4) - (sljit_uw)executable_offset));
4988*22dc650dSSadaf Ebrahimi #else
4989*22dc650dSSadaf Ebrahimi sljit_unaligned_store_sw((void*)addr, (sljit_sw)new_target);
4990*22dc650dSSadaf Ebrahimi #endif
4991*22dc650dSSadaf Ebrahimi SLJIT_UPDATE_WX_FLAGS((void*)addr, (void*)(addr + sizeof(sljit_uw)), 1);
4992*22dc650dSSadaf Ebrahimi }
4993*22dc650dSSadaf Ebrahimi
4994*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset)
4995*22dc650dSSadaf Ebrahimi {
4996*22dc650dSSadaf Ebrahimi SLJIT_UNUSED_ARG(executable_offset);
4997*22dc650dSSadaf Ebrahimi
4998*22dc650dSSadaf Ebrahimi SLJIT_UPDATE_WX_FLAGS((void*)addr, (void*)(addr + sizeof(sljit_sw)), 0);
4999*22dc650dSSadaf Ebrahimi sljit_unaligned_store_sw((void*)addr, new_constant);
5000*22dc650dSSadaf Ebrahimi SLJIT_UPDATE_WX_FLAGS((void*)addr, (void*)(addr + sizeof(sljit_sw)), 1);
5001*22dc650dSSadaf Ebrahimi }
5002