xref: /aosp_15_r20/external/pcre/src/sljit/sljitNativeLOONGARCH_64.c (revision 22dc650d8ae982c6770746019a6f94af92b0f024)
1*22dc650dSSadaf Ebrahimi /*
2*22dc650dSSadaf Ebrahimi  *    Stack-less Just-In-Time compiler
3*22dc650dSSadaf Ebrahimi  *
4*22dc650dSSadaf Ebrahimi  *    Copyright Zoltan Herczeg ([email protected]). All rights reserved.
5*22dc650dSSadaf Ebrahimi  *
6*22dc650dSSadaf Ebrahimi  * Redistribution and use in source and binary forms, with or without modification, are
7*22dc650dSSadaf Ebrahimi  * permitted provided that the following conditions are met:
8*22dc650dSSadaf Ebrahimi  *
9*22dc650dSSadaf Ebrahimi  *   1. Redistributions of source code must retain the above copyright notice, this list of
10*22dc650dSSadaf Ebrahimi  *      conditions and the following disclaimer.
11*22dc650dSSadaf Ebrahimi  *
12*22dc650dSSadaf Ebrahimi  *   2. Redistributions in binary form must reproduce the above copyright notice, this list
13*22dc650dSSadaf Ebrahimi  *      of conditions and the following disclaimer in the documentation and/or other materials
14*22dc650dSSadaf Ebrahimi  *      provided with the distribution.
15*22dc650dSSadaf Ebrahimi  *
16*22dc650dSSadaf Ebrahimi  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
17*22dc650dSSadaf Ebrahimi  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18*22dc650dSSadaf Ebrahimi  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
19*22dc650dSSadaf Ebrahimi  * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20*22dc650dSSadaf Ebrahimi  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
21*22dc650dSSadaf Ebrahimi  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22*22dc650dSSadaf Ebrahimi  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23*22dc650dSSadaf Ebrahimi  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
24*22dc650dSSadaf Ebrahimi  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25*22dc650dSSadaf Ebrahimi  */
26*22dc650dSSadaf Ebrahimi 
sljit_get_platform_name(void)27*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void)
28*22dc650dSSadaf Ebrahimi {
29*22dc650dSSadaf Ebrahimi 	return "LOONGARCH" SLJIT_CPUINFO;
30*22dc650dSSadaf Ebrahimi }
31*22dc650dSSadaf Ebrahimi 
32*22dc650dSSadaf Ebrahimi typedef sljit_u32 sljit_ins;
33*22dc650dSSadaf Ebrahimi 
34*22dc650dSSadaf Ebrahimi #define TMP_REG1	(SLJIT_NUMBER_OF_REGISTERS + 2)
35*22dc650dSSadaf Ebrahimi #define TMP_REG2	(SLJIT_NUMBER_OF_REGISTERS + 3)
36*22dc650dSSadaf Ebrahimi #define TMP_REG3	(SLJIT_NUMBER_OF_REGISTERS + 4)
37*22dc650dSSadaf Ebrahimi #define TMP_ZERO	0
38*22dc650dSSadaf Ebrahimi 
39*22dc650dSSadaf Ebrahimi /* Flags are kept in volatile registers. */
40*22dc650dSSadaf Ebrahimi #define EQUAL_FLAG	(SLJIT_NUMBER_OF_REGISTERS + 5)
41*22dc650dSSadaf Ebrahimi #define RETURN_ADDR_REG	TMP_REG2
42*22dc650dSSadaf Ebrahimi #define OTHER_FLAG	(SLJIT_NUMBER_OF_REGISTERS + 6)
43*22dc650dSSadaf Ebrahimi 
44*22dc650dSSadaf Ebrahimi #define TMP_FREG1	(SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1)
45*22dc650dSSadaf Ebrahimi #define TMP_FREG2	(SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2)
46*22dc650dSSadaf Ebrahimi 
47*22dc650dSSadaf Ebrahimi static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 7] = {
48*22dc650dSSadaf Ebrahimi 	0, 4, 5, 6, 7, 8, 9, 10, 11, 16, 17, 18, 19, 20, 22, 31, 30, 29, 28, 27, 26, 25, 24, 23, 3, 13, 1, 14, 12, 15
49*22dc650dSSadaf Ebrahimi };
50*22dc650dSSadaf Ebrahimi 
51*22dc650dSSadaf Ebrahimi static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = {
52*22dc650dSSadaf Ebrahimi 	0, 0, 1, 2, 3, 4, 5, 6, 7, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 31, 30, 29, 28, 27, 26, 25, 24, 8, 9
53*22dc650dSSadaf Ebrahimi };
54*22dc650dSSadaf Ebrahimi 
55*22dc650dSSadaf Ebrahimi /* --------------------------------------------------------------------- */
56*22dc650dSSadaf Ebrahimi /*  Instrucion forms                                                     */
57*22dc650dSSadaf Ebrahimi /* --------------------------------------------------------------------- */
58*22dc650dSSadaf Ebrahimi 
59*22dc650dSSadaf Ebrahimi /*
60*22dc650dSSadaf Ebrahimi LoongArch instructions are 32 bits wide, belonging to 9 basic instruction formats (and variants of them):
61*22dc650dSSadaf Ebrahimi 
62*22dc650dSSadaf Ebrahimi | Format name  | Composition                 |
63*22dc650dSSadaf Ebrahimi | 2R           | Opcode + Rj + Rd            |
64*22dc650dSSadaf Ebrahimi | 3R           | Opcode + Rk + Rj + Rd       |
65*22dc650dSSadaf Ebrahimi | 4R           | Opcode + Ra + Rk + Rj + Rd  |
66*22dc650dSSadaf Ebrahimi | 2RI8         | Opcode + I8 + Rj + Rd       |
67*22dc650dSSadaf Ebrahimi | 2RI12        | Opcode + I12 + Rj + Rd      |
68*22dc650dSSadaf Ebrahimi | 2RI14        | Opcode + I14 + Rj + Rd      |
69*22dc650dSSadaf Ebrahimi | 2RI16        | Opcode + I16 + Rj + Rd      |
70*22dc650dSSadaf Ebrahimi | 1RI21        | Opcode + I21L + Rj + I21H   |
71*22dc650dSSadaf Ebrahimi | I26          | Opcode + I26L + I26H        |
72*22dc650dSSadaf Ebrahimi 
73*22dc650dSSadaf Ebrahimi Rd is the destination register operand, while Rj, Rk and Ra (“a” stands for “additional”) are the source register operands.
74*22dc650dSSadaf Ebrahimi I8/I12/I14/I16/I21/I26 are immediate operands of respective width. The longer I21 and I26 are stored in separate higher and
75*22dc650dSSadaf Ebrahimi lower parts in the instruction word, denoted by the “L” and “H” suffixes. */
76*22dc650dSSadaf Ebrahimi 
77*22dc650dSSadaf Ebrahimi #define RD(rd) ((sljit_ins)reg_map[rd])
78*22dc650dSSadaf Ebrahimi #define RJ(rj) ((sljit_ins)reg_map[rj] << 5)
79*22dc650dSSadaf Ebrahimi #define RK(rk) ((sljit_ins)reg_map[rk] << 10)
80*22dc650dSSadaf Ebrahimi #define RA(ra) ((sljit_ins)reg_map[ra] << 15)
81*22dc650dSSadaf Ebrahimi 
82*22dc650dSSadaf Ebrahimi #define FD(fd) ((sljit_ins)reg_map[fd])
83*22dc650dSSadaf Ebrahimi #define FRD(fd) ((sljit_ins)freg_map[fd])
84*22dc650dSSadaf Ebrahimi #define FRJ(fj) ((sljit_ins)freg_map[fj] << 5)
85*22dc650dSSadaf Ebrahimi #define FRK(fk) ((sljit_ins)freg_map[fk] << 10)
86*22dc650dSSadaf Ebrahimi #define FRA(fa) ((sljit_ins)freg_map[fa] << 15)
87*22dc650dSSadaf Ebrahimi 
88*22dc650dSSadaf Ebrahimi #define IMM_V(imm) ((sljit_ins)(imm) << 10)
89*22dc650dSSadaf Ebrahimi #define IMM_I8(imm) (((sljit_ins)(imm)&0xff) << 10)
90*22dc650dSSadaf Ebrahimi #define IMM_I12(imm) (((sljit_ins)(imm)&0xfff) << 10)
91*22dc650dSSadaf Ebrahimi #define IMM_I14(imm) (((sljit_ins)(imm)&0xfff3) << 10)
92*22dc650dSSadaf Ebrahimi #define IMM_I16(imm) (((sljit_ins)(imm)&0xffff) << 10)
93*22dc650dSSadaf Ebrahimi #define IMM_I20(imm) (((sljit_ins)(imm)&0xffffffff) >> 12 << 5)
94*22dc650dSSadaf Ebrahimi #define IMM_I21(imm) ((((sljit_ins)(imm)&0xffff) << 10) | (((sljit_ins)(imm) >> 16) & 0x1f))
95*22dc650dSSadaf Ebrahimi #define IMM_I26(imm) ((((sljit_ins)(imm)&0xffff) << 10) | (((sljit_ins)(imm) >> 16) & 0x3ff))
96*22dc650dSSadaf Ebrahimi 
97*22dc650dSSadaf Ebrahimi #define OPC_I26(opc) ((sljit_ins)(opc) << 26)
98*22dc650dSSadaf Ebrahimi #define OPC_1RI21(opc) ((sljit_ins)(opc) << 26)
99*22dc650dSSadaf Ebrahimi #define OPC_2RI16(opc) ((sljit_ins)(opc) << 26)
100*22dc650dSSadaf Ebrahimi #define OPC_2RI14(opc) ((sljit_ins)(opc) << 24)
101*22dc650dSSadaf Ebrahimi #define OPC_2RI12(opc) ((sljit_ins)(opc) << 22)
102*22dc650dSSadaf Ebrahimi #define OPC_2RI8(opc) ((sljit_ins)(opc) << 18)
103*22dc650dSSadaf Ebrahimi #define OPC_4R(opc) ((sljit_ins)(opc) << 20)
104*22dc650dSSadaf Ebrahimi #define OPC_3R(opc) ((sljit_ins)(opc) << 15)
105*22dc650dSSadaf Ebrahimi #define OPC_2R(opc) ((sljit_ins)(opc) << 10)
106*22dc650dSSadaf Ebrahimi #define OPC_1RI20(opc) ((sljit_ins)(opc) << 25)
107*22dc650dSSadaf Ebrahimi 
108*22dc650dSSadaf Ebrahimi /* Arithmetic operation instructions */
109*22dc650dSSadaf Ebrahimi #define ADD_W OPC_3R(0x20)
110*22dc650dSSadaf Ebrahimi #define ADD_D OPC_3R(0x21)
111*22dc650dSSadaf Ebrahimi #define SUB_W OPC_3R(0x22)
112*22dc650dSSadaf Ebrahimi #define SUB_D OPC_3R(0x23)
113*22dc650dSSadaf Ebrahimi #define ADDI_W OPC_2RI12(0xa)
114*22dc650dSSadaf Ebrahimi #define ADDI_D OPC_2RI12(0xb)
115*22dc650dSSadaf Ebrahimi #define ANDI OPC_2RI12(0xd)
116*22dc650dSSadaf Ebrahimi #define ORI OPC_2RI12(0xe)
117*22dc650dSSadaf Ebrahimi #define XORI OPC_2RI12(0xf)
118*22dc650dSSadaf Ebrahimi #define ADDU16I_D OPC_2RI16(0x4)
119*22dc650dSSadaf Ebrahimi #define LU12I_W OPC_1RI20(0xa)
120*22dc650dSSadaf Ebrahimi #define LU32I_D OPC_1RI20(0xb)
121*22dc650dSSadaf Ebrahimi #define LU52I_D OPC_2RI12(0xc)
122*22dc650dSSadaf Ebrahimi #define SLT OPC_3R(0x24)
123*22dc650dSSadaf Ebrahimi #define SLTU OPC_3R(0x25)
124*22dc650dSSadaf Ebrahimi #define SLTI OPC_2RI12(0x8)
125*22dc650dSSadaf Ebrahimi #define SLTUI OPC_2RI12(0x9)
126*22dc650dSSadaf Ebrahimi #define PCADDI OPC_1RI20(0xc)
127*22dc650dSSadaf Ebrahimi #define PCALAU12I OPC_1RI20(0xd)
128*22dc650dSSadaf Ebrahimi #define PCADDU12I OPC_1RI20(0xe)
129*22dc650dSSadaf Ebrahimi #define PCADDU18I OPC_1RI20(0xf)
130*22dc650dSSadaf Ebrahimi #define NOR OPC_3R(0x28)
131*22dc650dSSadaf Ebrahimi #define AND OPC_3R(0x29)
132*22dc650dSSadaf Ebrahimi #define OR OPC_3R(0x2a)
133*22dc650dSSadaf Ebrahimi #define XOR OPC_3R(0x2b)
134*22dc650dSSadaf Ebrahimi #define ORN OPC_3R(0x2c)
135*22dc650dSSadaf Ebrahimi #define ANDN OPC_3R(0x2d)
136*22dc650dSSadaf Ebrahimi #define MUL_W OPC_3R(0x38)
137*22dc650dSSadaf Ebrahimi #define MULH_W OPC_3R(0x39)
138*22dc650dSSadaf Ebrahimi #define MULH_WU OPC_3R(0x3a)
139*22dc650dSSadaf Ebrahimi #define MUL_D OPC_3R(0x3b)
140*22dc650dSSadaf Ebrahimi #define MULH_D OPC_3R(0x3c)
141*22dc650dSSadaf Ebrahimi #define MULH_DU OPC_3R(0x3d)
142*22dc650dSSadaf Ebrahimi #define MULW_D_W OPC_3R(0x3e)
143*22dc650dSSadaf Ebrahimi #define MULW_D_WU OPC_3R(0x3f)
144*22dc650dSSadaf Ebrahimi #define DIV_W OPC_3R(0x40)
145*22dc650dSSadaf Ebrahimi #define MOD_W OPC_3R(0x41)
146*22dc650dSSadaf Ebrahimi #define DIV_WU OPC_3R(0x42)
147*22dc650dSSadaf Ebrahimi #define MOD_WU OPC_3R(0x43)
148*22dc650dSSadaf Ebrahimi #define DIV_D OPC_3R(0x44)
149*22dc650dSSadaf Ebrahimi #define MOD_D OPC_3R(0x45)
150*22dc650dSSadaf Ebrahimi #define DIV_DU OPC_3R(0x46)
151*22dc650dSSadaf Ebrahimi #define MOD_DU OPC_3R(0x47)
152*22dc650dSSadaf Ebrahimi 
153*22dc650dSSadaf Ebrahimi /* Bit-shift instructions */
154*22dc650dSSadaf Ebrahimi #define SLL_W OPC_3R(0x2e)
155*22dc650dSSadaf Ebrahimi #define SRL_W OPC_3R(0x2f)
156*22dc650dSSadaf Ebrahimi #define SRA_W OPC_3R(0x30)
157*22dc650dSSadaf Ebrahimi #define SLL_D OPC_3R(0x31)
158*22dc650dSSadaf Ebrahimi #define SRL_D OPC_3R(0x32)
159*22dc650dSSadaf Ebrahimi #define SRA_D OPC_3R(0x33)
160*22dc650dSSadaf Ebrahimi #define ROTR_W OPC_3R(0x36)
161*22dc650dSSadaf Ebrahimi #define ROTR_D OPC_3R(0x37)
162*22dc650dSSadaf Ebrahimi #define SLLI_W OPC_3R(0x81)
163*22dc650dSSadaf Ebrahimi #define SLLI_D ((sljit_ins)(0x41) << 16)
164*22dc650dSSadaf Ebrahimi #define SRLI_W OPC_3R(0x89)
165*22dc650dSSadaf Ebrahimi #define SRLI_D ((sljit_ins)(0x45) << 16)
166*22dc650dSSadaf Ebrahimi #define SRAI_W OPC_3R(0x91)
167*22dc650dSSadaf Ebrahimi #define SRAI_D ((sljit_ins)(0x49) << 16)
168*22dc650dSSadaf Ebrahimi #define ROTRI_W OPC_3R(0x99)
169*22dc650dSSadaf Ebrahimi #define ROTRI_D ((sljit_ins)(0x4d) << 16)
170*22dc650dSSadaf Ebrahimi 
171*22dc650dSSadaf Ebrahimi /* Bit-manipulation instructions */
172*22dc650dSSadaf Ebrahimi #define CLO_W OPC_2R(0x4)
173*22dc650dSSadaf Ebrahimi #define CLZ_W OPC_2R(0x5)
174*22dc650dSSadaf Ebrahimi #define CTO_W OPC_2R(0x6)
175*22dc650dSSadaf Ebrahimi #define CTZ_W OPC_2R(0x7)
176*22dc650dSSadaf Ebrahimi #define CLO_D OPC_2R(0x8)
177*22dc650dSSadaf Ebrahimi #define CLZ_D OPC_2R(0x9)
178*22dc650dSSadaf Ebrahimi #define CTO_D OPC_2R(0xa)
179*22dc650dSSadaf Ebrahimi #define CTZ_D OPC_2R(0xb)
180*22dc650dSSadaf Ebrahimi #define REVB_2H OPC_2R(0xc)
181*22dc650dSSadaf Ebrahimi #define REVB_4H OPC_2R(0xd)
182*22dc650dSSadaf Ebrahimi #define REVB_2W OPC_2R(0xe)
183*22dc650dSSadaf Ebrahimi #define REVB_D OPC_2R(0xf)
184*22dc650dSSadaf Ebrahimi #define REVH_2W OPC_2R(0x10)
185*22dc650dSSadaf Ebrahimi #define REVH_D OPC_2R(0x11)
186*22dc650dSSadaf Ebrahimi #define BITREV_4B OPC_2R(0x12)
187*22dc650dSSadaf Ebrahimi #define BITREV_8B OPC_2R(0x13)
188*22dc650dSSadaf Ebrahimi #define BITREV_W OPC_2R(0x14)
189*22dc650dSSadaf Ebrahimi #define BITREV_D OPC_2R(0x15)
190*22dc650dSSadaf Ebrahimi #define EXT_W_H OPC_2R(0x16)
191*22dc650dSSadaf Ebrahimi #define EXT_W_B OPC_2R(0x17)
192*22dc650dSSadaf Ebrahimi #define BSTRINS_W (0x1 << 22 | 1 << 21)
193*22dc650dSSadaf Ebrahimi #define BSTRPICK_W (0x1 << 22 | 1 << 21 | 1 << 15)
194*22dc650dSSadaf Ebrahimi #define BSTRINS_D (0x2 << 22)
195*22dc650dSSadaf Ebrahimi #define BSTRPICK_D (0x3 << 22)
196*22dc650dSSadaf Ebrahimi 
197*22dc650dSSadaf Ebrahimi /* Branch instructions */
198*22dc650dSSadaf Ebrahimi #define BEQZ  OPC_1RI21(0x10)
199*22dc650dSSadaf Ebrahimi #define BNEZ  OPC_1RI21(0x11)
200*22dc650dSSadaf Ebrahimi #define JIRL  OPC_2RI16(0x13)
201*22dc650dSSadaf Ebrahimi #define B     OPC_I26(0x14)
202*22dc650dSSadaf Ebrahimi #define BL    OPC_I26(0x15)
203*22dc650dSSadaf Ebrahimi #define BEQ   OPC_2RI16(0x16)
204*22dc650dSSadaf Ebrahimi #define BNE   OPC_2RI16(0x17)
205*22dc650dSSadaf Ebrahimi #define BLT   OPC_2RI16(0x18)
206*22dc650dSSadaf Ebrahimi #define BGE   OPC_2RI16(0x19)
207*22dc650dSSadaf Ebrahimi #define BLTU  OPC_2RI16(0x1a)
208*22dc650dSSadaf Ebrahimi #define BGEU  OPC_2RI16(0x1b)
209*22dc650dSSadaf Ebrahimi 
210*22dc650dSSadaf Ebrahimi /* Memory access instructions */
211*22dc650dSSadaf Ebrahimi #define LD_B OPC_2RI12(0xa0)
212*22dc650dSSadaf Ebrahimi #define LD_H OPC_2RI12(0xa1)
213*22dc650dSSadaf Ebrahimi #define LD_W OPC_2RI12(0xa2)
214*22dc650dSSadaf Ebrahimi #define LD_D OPC_2RI12(0xa3)
215*22dc650dSSadaf Ebrahimi 
216*22dc650dSSadaf Ebrahimi #define ST_B OPC_2RI12(0xa4)
217*22dc650dSSadaf Ebrahimi #define ST_H OPC_2RI12(0xa5)
218*22dc650dSSadaf Ebrahimi #define ST_W OPC_2RI12(0xa6)
219*22dc650dSSadaf Ebrahimi #define ST_D OPC_2RI12(0xa7)
220*22dc650dSSadaf Ebrahimi 
221*22dc650dSSadaf Ebrahimi #define LD_BU OPC_2RI12(0xa8)
222*22dc650dSSadaf Ebrahimi #define LD_HU OPC_2RI12(0xa9)
223*22dc650dSSadaf Ebrahimi #define LD_WU OPC_2RI12(0xaa)
224*22dc650dSSadaf Ebrahimi 
225*22dc650dSSadaf Ebrahimi #define LDX_B OPC_3R(0x7000)
226*22dc650dSSadaf Ebrahimi #define LDX_H OPC_3R(0x7008)
227*22dc650dSSadaf Ebrahimi #define LDX_W OPC_3R(0x7010)
228*22dc650dSSadaf Ebrahimi #define LDX_D OPC_3R(0x7018)
229*22dc650dSSadaf Ebrahimi 
230*22dc650dSSadaf Ebrahimi #define STX_B OPC_3R(0x7020)
231*22dc650dSSadaf Ebrahimi #define STX_H OPC_3R(0x7028)
232*22dc650dSSadaf Ebrahimi #define STX_W OPC_3R(0x7030)
233*22dc650dSSadaf Ebrahimi #define STX_D OPC_3R(0x7038)
234*22dc650dSSadaf Ebrahimi 
235*22dc650dSSadaf Ebrahimi #define LDX_BU OPC_3R(0x7040)
236*22dc650dSSadaf Ebrahimi #define LDX_HU OPC_3R(0x7048)
237*22dc650dSSadaf Ebrahimi #define LDX_WU OPC_3R(0x7050)
238*22dc650dSSadaf Ebrahimi 
239*22dc650dSSadaf Ebrahimi #define PRELD OPC_2RI12(0xab)
240*22dc650dSSadaf Ebrahimi 
241*22dc650dSSadaf Ebrahimi /* Atomic memory access instructions */
242*22dc650dSSadaf Ebrahimi #define LL_W OPC_2RI14(0x20)
243*22dc650dSSadaf Ebrahimi #define SC_W OPC_2RI14(0x21)
244*22dc650dSSadaf Ebrahimi #define LL_D OPC_2RI14(0x22)
245*22dc650dSSadaf Ebrahimi #define SC_D OPC_2RI14(0x23)
246*22dc650dSSadaf Ebrahimi 
247*22dc650dSSadaf Ebrahimi /* LoongArch V1.10 Instructions */
248*22dc650dSSadaf Ebrahimi #define AMCAS_B OPC_3R(0x70B0)
249*22dc650dSSadaf Ebrahimi #define AMCAS_H OPC_3R(0x70B1)
250*22dc650dSSadaf Ebrahimi #define AMCAS_W OPC_3R(0x70B2)
251*22dc650dSSadaf Ebrahimi #define AMCAS_D OPC_3R(0x70B3)
252*22dc650dSSadaf Ebrahimi 
253*22dc650dSSadaf Ebrahimi /* Other instructions */
254*22dc650dSSadaf Ebrahimi #define BREAK OPC_3R(0x54)
255*22dc650dSSadaf Ebrahimi #define DBGCALL OPC_3R(0x55)
256*22dc650dSSadaf Ebrahimi #define SYSCALL OPC_3R(0x56)
257*22dc650dSSadaf Ebrahimi 
258*22dc650dSSadaf Ebrahimi /* Basic Floating-Point Instructions */
259*22dc650dSSadaf Ebrahimi /* Floating-Point Arithmetic Operation Instructions */
260*22dc650dSSadaf Ebrahimi #define FADD_S  OPC_3R(0x201)
261*22dc650dSSadaf Ebrahimi #define FADD_D  OPC_3R(0x202)
262*22dc650dSSadaf Ebrahimi #define FSUB_S  OPC_3R(0x205)
263*22dc650dSSadaf Ebrahimi #define FSUB_D  OPC_3R(0x206)
264*22dc650dSSadaf Ebrahimi #define FMUL_S  OPC_3R(0x209)
265*22dc650dSSadaf Ebrahimi #define FMUL_D  OPC_3R(0x20a)
266*22dc650dSSadaf Ebrahimi #define FDIV_S  OPC_3R(0x20d)
267*22dc650dSSadaf Ebrahimi #define FDIV_D  OPC_3R(0x20e)
268*22dc650dSSadaf Ebrahimi #define FCMP_COND_S  OPC_4R(0xc1)
269*22dc650dSSadaf Ebrahimi #define FCMP_COND_D  OPC_4R(0xc2)
270*22dc650dSSadaf Ebrahimi #define FCOPYSIGN_S  OPC_3R(0x225)
271*22dc650dSSadaf Ebrahimi #define FCOPYSIGN_D  OPC_3R(0x226)
272*22dc650dSSadaf Ebrahimi #define FSEL  OPC_4R(0xd0)
273*22dc650dSSadaf Ebrahimi #define FABS_S  OPC_2R(0x4501)
274*22dc650dSSadaf Ebrahimi #define FABS_D  OPC_2R(0x4502)
275*22dc650dSSadaf Ebrahimi #define FNEG_S  OPC_2R(0x4505)
276*22dc650dSSadaf Ebrahimi #define FNEG_D  OPC_2R(0x4506)
277*22dc650dSSadaf Ebrahimi #define FMOV_S  OPC_2R(0x4525)
278*22dc650dSSadaf Ebrahimi #define FMOV_D  OPC_2R(0x4526)
279*22dc650dSSadaf Ebrahimi 
280*22dc650dSSadaf Ebrahimi /* Floating-Point Conversion Instructions */
281*22dc650dSSadaf Ebrahimi #define FCVT_S_D  OPC_2R(0x4646)
282*22dc650dSSadaf Ebrahimi #define FCVT_D_S  OPC_2R(0x4649)
283*22dc650dSSadaf Ebrahimi #define FTINTRZ_W_S  OPC_2R(0x46a1)
284*22dc650dSSadaf Ebrahimi #define FTINTRZ_W_D  OPC_2R(0x46a2)
285*22dc650dSSadaf Ebrahimi #define FTINTRZ_L_S  OPC_2R(0x46a9)
286*22dc650dSSadaf Ebrahimi #define FTINTRZ_L_D  OPC_2R(0x46aa)
287*22dc650dSSadaf Ebrahimi #define FFINT_S_W  OPC_2R(0x4744)
288*22dc650dSSadaf Ebrahimi #define FFINT_S_L  OPC_2R(0x4746)
289*22dc650dSSadaf Ebrahimi #define FFINT_D_W  OPC_2R(0x4748)
290*22dc650dSSadaf Ebrahimi #define FFINT_D_L  OPC_2R(0x474a)
291*22dc650dSSadaf Ebrahimi 
292*22dc650dSSadaf Ebrahimi /* Floating-Point Move Instructions */
293*22dc650dSSadaf Ebrahimi #define FMOV_S  OPC_2R(0x4525)
294*22dc650dSSadaf Ebrahimi #define FMOV_D  OPC_2R(0x4526)
295*22dc650dSSadaf Ebrahimi #define MOVGR2FR_W  OPC_2R(0x4529)
296*22dc650dSSadaf Ebrahimi #define MOVGR2FR_D  OPC_2R(0x452a)
297*22dc650dSSadaf Ebrahimi #define MOVGR2FRH_W  OPC_2R(0x452b)
298*22dc650dSSadaf Ebrahimi #define MOVFR2GR_S  OPC_2R(0x452d)
299*22dc650dSSadaf Ebrahimi #define MOVFR2GR_D  OPC_2R(0x452e)
300*22dc650dSSadaf Ebrahimi #define MOVFRH2GR_S  OPC_2R(0x452f)
301*22dc650dSSadaf Ebrahimi #define MOVGR2FCSR  OPC_2R(0x4530)
302*22dc650dSSadaf Ebrahimi #define MOVFCSR2GR  OPC_2R(0x4532)
303*22dc650dSSadaf Ebrahimi #define MOVFR2CF  OPC_2R(0x4534)
304*22dc650dSSadaf Ebrahimi #define MOVCF2FR  OPC_2R(0x4535)
305*22dc650dSSadaf Ebrahimi #define MOVGR2CF  OPC_2R(0x4536)
306*22dc650dSSadaf Ebrahimi #define MOVCF2GR  OPC_2R(0x4537)
307*22dc650dSSadaf Ebrahimi 
308*22dc650dSSadaf Ebrahimi /* Floating-Point Branch Instructions */
309*22dc650dSSadaf Ebrahimi #define BCEQZ OPC_I26(0x12)
310*22dc650dSSadaf Ebrahimi #define BCNEZ OPC_I26(0x12)
311*22dc650dSSadaf Ebrahimi 
312*22dc650dSSadaf Ebrahimi /* Floating-Point Common Memory Access Instructions */
313*22dc650dSSadaf Ebrahimi #define FLD_S OPC_2RI12(0xac)
314*22dc650dSSadaf Ebrahimi #define FLD_D OPC_2RI12(0xae)
315*22dc650dSSadaf Ebrahimi #define FST_S OPC_2RI12(0xad)
316*22dc650dSSadaf Ebrahimi #define FST_D OPC_2RI12(0xaf)
317*22dc650dSSadaf Ebrahimi 
318*22dc650dSSadaf Ebrahimi #define FLDX_S OPC_3R(0x7060)
319*22dc650dSSadaf Ebrahimi #define FLDX_D OPC_3R(0x7068)
320*22dc650dSSadaf Ebrahimi #define FSTX_S OPC_3R(0x7070)
321*22dc650dSSadaf Ebrahimi #define FSTX_D OPC_3R(0x7078)
322*22dc650dSSadaf Ebrahimi 
323*22dc650dSSadaf Ebrahimi /* Vector Instructions */
324*22dc650dSSadaf Ebrahimi 
325*22dc650dSSadaf Ebrahimi /* Vector Arithmetic Instructions */
326*22dc650dSSadaf Ebrahimi #define VOR_V OPC_3R(0xe24d)
327*22dc650dSSadaf Ebrahimi #define VXOR_V OPC_3R(0xe24e)
328*22dc650dSSadaf Ebrahimi #define VAND_V OPC_3R(0xe24c)
329*22dc650dSSadaf Ebrahimi #define VMSKLTZ OPC_2R(0x1ca710)
330*22dc650dSSadaf Ebrahimi 
331*22dc650dSSadaf Ebrahimi /* Vector Memory Access Instructions */
332*22dc650dSSadaf Ebrahimi #define VLD OPC_2RI12(0xb0)
333*22dc650dSSadaf Ebrahimi #define VST OPC_2RI12(0xb1)
334*22dc650dSSadaf Ebrahimi #define XVLD OPC_2RI12(0xb2)
335*22dc650dSSadaf Ebrahimi #define XVST OPC_2RI12(0xb3)
336*22dc650dSSadaf Ebrahimi #define VSTELM OPC_2RI8(0xc40)
337*22dc650dSSadaf Ebrahimi 
338*22dc650dSSadaf Ebrahimi /* Vector Float Conversion Instructions */
339*22dc650dSSadaf Ebrahimi #define VFCVTL_D_S OPC_2R(0x1ca77c)
340*22dc650dSSadaf Ebrahimi 
341*22dc650dSSadaf Ebrahimi /* Vector Bit Manipulate Instructions */
342*22dc650dSSadaf Ebrahimi #define VSLLWIL OPC_2R(0x1cc200)
343*22dc650dSSadaf Ebrahimi 
344*22dc650dSSadaf Ebrahimi /* Vector Move And Shuffle Instructions */
345*22dc650dSSadaf Ebrahimi #define VLDREPL OPC_2R(0xc0000)
346*22dc650dSSadaf Ebrahimi #define VINSGR2VR OPC_2R(0x1cbac0)
347*22dc650dSSadaf Ebrahimi #define VPICKVE2GR_U OPC_2R(0x1cbce0)
348*22dc650dSSadaf Ebrahimi #define VREPLGR2VR OPC_2R(0x1ca7c0)
349*22dc650dSSadaf Ebrahimi #define VREPLVE OPC_3R(0xe244)
350*22dc650dSSadaf Ebrahimi #define VREPLVEI OPC_2R(0x1cbde0)
351*22dc650dSSadaf Ebrahimi #define XVPERMI OPC_2RI8(0x1dfa)
352*22dc650dSSadaf Ebrahimi 
353*22dc650dSSadaf Ebrahimi #define I12_MAX (0x7ff)
354*22dc650dSSadaf Ebrahimi #define I12_MIN (-0x800)
355*22dc650dSSadaf Ebrahimi #define BRANCH16_MAX (0x7fff << 2)
356*22dc650dSSadaf Ebrahimi #define BRANCH16_MIN (-(0x8000 << 2))
357*22dc650dSSadaf Ebrahimi #define BRANCH21_MAX (0xfffff << 2)
358*22dc650dSSadaf Ebrahimi #define BRANCH21_MIN (-(0x100000 << 2))
359*22dc650dSSadaf Ebrahimi #define JUMP_MAX (0x1ffffff << 2)
360*22dc650dSSadaf Ebrahimi #define JUMP_MIN (-(0x2000000 << 2))
361*22dc650dSSadaf Ebrahimi #define JIRL_MAX (0x7fff << 2)
362*22dc650dSSadaf Ebrahimi #define JIRL_MIN (-(0x8000 << 2))
363*22dc650dSSadaf Ebrahimi 
364*22dc650dSSadaf Ebrahimi #define S32_MAX		(0x7fffffffl)
365*22dc650dSSadaf Ebrahimi #define S32_MIN		(-0x80000000l)
366*22dc650dSSadaf Ebrahimi #define S52_MAX		(0x7ffffffffffffl)
367*22dc650dSSadaf Ebrahimi 
368*22dc650dSSadaf Ebrahimi #define INST(inst, type) ((sljit_ins)((type & SLJIT_32) ? inst##_W : inst##_D))
369*22dc650dSSadaf Ebrahimi 
370*22dc650dSSadaf Ebrahimi /* LoongArch CPUCFG register for feature detection */
371*22dc650dSSadaf Ebrahimi #define LOONGARCH_CFG2			0x02
372*22dc650dSSadaf Ebrahimi #define LOONGARCH_CFG2_LAMCAS	(1 << 28)
373*22dc650dSSadaf Ebrahimi 
374*22dc650dSSadaf Ebrahimi static sljit_u32 cfg2_feature_list = 0;
375*22dc650dSSadaf Ebrahimi 
376*22dc650dSSadaf Ebrahimi /* According to Software Development and Build Convention for LoongArch Architectures,
377*22dc650dSSadaf Ebrahimi +   the status of LSX and LASX extension must be checked through HWCAP */
378*22dc650dSSadaf Ebrahimi #include <sys/auxv.h>
379*22dc650dSSadaf Ebrahimi 
380*22dc650dSSadaf Ebrahimi #define LOONGARCH_HWCAP_LSX		(1 << 4)
381*22dc650dSSadaf Ebrahimi #define LOONGARCH_HWCAP_LASX	(1 << 5)
382*22dc650dSSadaf Ebrahimi 
383*22dc650dSSadaf Ebrahimi static sljit_u32 hwcap_feature_list = 0;
384*22dc650dSSadaf Ebrahimi 
385*22dc650dSSadaf Ebrahimi /* Feature type */
386*22dc650dSSadaf Ebrahimi #define GET_CFG2 	0
387*22dc650dSSadaf Ebrahimi #define GET_HWCAP	1
388*22dc650dSSadaf Ebrahimi 
get_cpu_features(sljit_u32 feature_type)389*22dc650dSSadaf Ebrahimi static SLJIT_INLINE sljit_u32 get_cpu_features(sljit_u32 feature_type)
390*22dc650dSSadaf Ebrahimi  {
391*22dc650dSSadaf Ebrahimi  	if (cfg2_feature_list == 0)
392*22dc650dSSadaf Ebrahimi  		__asm__ ("cpucfg %0, %1" : "+&r"(cfg2_feature_list) : "r"(LOONGARCH_CFG2));
393*22dc650dSSadaf Ebrahimi 	if (hwcap_feature_list == 0)
394*22dc650dSSadaf Ebrahimi 		hwcap_feature_list = (sljit_u32)getauxval(AT_HWCAP);
395*22dc650dSSadaf Ebrahimi 
396*22dc650dSSadaf Ebrahimi 	return feature_type ? hwcap_feature_list : cfg2_feature_list;
397*22dc650dSSadaf Ebrahimi  }
398*22dc650dSSadaf Ebrahimi 
push_inst(struct sljit_compiler * compiler,sljit_ins ins)399*22dc650dSSadaf Ebrahimi static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_ins ins)
400*22dc650dSSadaf Ebrahimi {
401*22dc650dSSadaf Ebrahimi 	sljit_ins *ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins));
402*22dc650dSSadaf Ebrahimi 	FAIL_IF(!ptr);
403*22dc650dSSadaf Ebrahimi 	*ptr = ins;
404*22dc650dSSadaf Ebrahimi 	compiler->size++;
405*22dc650dSSadaf Ebrahimi 	return SLJIT_SUCCESS;
406*22dc650dSSadaf Ebrahimi }
407*22dc650dSSadaf Ebrahimi 
detect_jump_type(struct sljit_jump * jump,sljit_ins * code,sljit_sw executable_offset)408*22dc650dSSadaf Ebrahimi static SLJIT_INLINE sljit_ins* detect_jump_type(struct sljit_jump *jump, sljit_ins *code, sljit_sw executable_offset)
409*22dc650dSSadaf Ebrahimi {
410*22dc650dSSadaf Ebrahimi 	sljit_sw diff;
411*22dc650dSSadaf Ebrahimi 	sljit_uw target_addr;
412*22dc650dSSadaf Ebrahimi 	sljit_ins *inst;
413*22dc650dSSadaf Ebrahimi 
414*22dc650dSSadaf Ebrahimi 	inst = (sljit_ins *)jump->addr;
415*22dc650dSSadaf Ebrahimi 
416*22dc650dSSadaf Ebrahimi 	if (jump->flags & SLJIT_REWRITABLE_JUMP)
417*22dc650dSSadaf Ebrahimi 		goto exit;
418*22dc650dSSadaf Ebrahimi 
419*22dc650dSSadaf Ebrahimi 	if (jump->flags & JUMP_ADDR)
420*22dc650dSSadaf Ebrahimi 		target_addr = jump->u.target;
421*22dc650dSSadaf Ebrahimi 	else {
422*22dc650dSSadaf Ebrahimi 		SLJIT_ASSERT(jump->u.label != NULL);
423*22dc650dSSadaf Ebrahimi 		target_addr = (sljit_uw)(code + jump->u.label->size) + (sljit_uw)executable_offset;
424*22dc650dSSadaf Ebrahimi 	}
425*22dc650dSSadaf Ebrahimi 
426*22dc650dSSadaf Ebrahimi 	diff = (sljit_sw)target_addr - (sljit_sw)inst - executable_offset;
427*22dc650dSSadaf Ebrahimi 
428*22dc650dSSadaf Ebrahimi 	if (jump->flags & IS_COND) {
429*22dc650dSSadaf Ebrahimi 		diff += SSIZE_OF(ins);
430*22dc650dSSadaf Ebrahimi 
431*22dc650dSSadaf Ebrahimi 		if (diff >= BRANCH16_MIN && diff <= BRANCH16_MAX) {
432*22dc650dSSadaf Ebrahimi 			inst--;
433*22dc650dSSadaf Ebrahimi 			inst[0] = (inst[0] & 0xfc0003ff) ^ 0x4000000;
434*22dc650dSSadaf Ebrahimi 			jump->flags |= PATCH_B;
435*22dc650dSSadaf Ebrahimi 			jump->addr = (sljit_uw)inst;
436*22dc650dSSadaf Ebrahimi 			return inst;
437*22dc650dSSadaf Ebrahimi 		}
438*22dc650dSSadaf Ebrahimi 
439*22dc650dSSadaf Ebrahimi 		diff -= SSIZE_OF(ins);
440*22dc650dSSadaf Ebrahimi 	}
441*22dc650dSSadaf Ebrahimi 
442*22dc650dSSadaf Ebrahimi 	if (diff >= JUMP_MIN && diff <= JUMP_MAX) {
443*22dc650dSSadaf Ebrahimi 		if (jump->flags & IS_COND) {
444*22dc650dSSadaf Ebrahimi 			inst[-1] |= (sljit_ins)IMM_I16(2);
445*22dc650dSSadaf Ebrahimi 		}
446*22dc650dSSadaf Ebrahimi 
447*22dc650dSSadaf Ebrahimi 		jump->flags |= PATCH_J;
448*22dc650dSSadaf Ebrahimi 		return inst;
449*22dc650dSSadaf Ebrahimi 	}
450*22dc650dSSadaf Ebrahimi 
451*22dc650dSSadaf Ebrahimi 	if (diff >= S32_MIN && diff <= S32_MAX) {
452*22dc650dSSadaf Ebrahimi 		if (jump->flags & IS_COND)
453*22dc650dSSadaf Ebrahimi 			inst[-1] |= (sljit_ins)IMM_I16(3);
454*22dc650dSSadaf Ebrahimi 
455*22dc650dSSadaf Ebrahimi 		jump->flags |= PATCH_REL32;
456*22dc650dSSadaf Ebrahimi 		inst[1] = inst[0];
457*22dc650dSSadaf Ebrahimi 		return inst + 1;
458*22dc650dSSadaf Ebrahimi 	}
459*22dc650dSSadaf Ebrahimi 
460*22dc650dSSadaf Ebrahimi 	if (target_addr <= (sljit_uw)S32_MAX) {
461*22dc650dSSadaf Ebrahimi 		if (jump->flags & IS_COND)
462*22dc650dSSadaf Ebrahimi 			inst[-1] |= (sljit_ins)IMM_I16(3);
463*22dc650dSSadaf Ebrahimi 
464*22dc650dSSadaf Ebrahimi 		jump->flags |= PATCH_ABS32;
465*22dc650dSSadaf Ebrahimi 		inst[1] = inst[0];
466*22dc650dSSadaf Ebrahimi 		return inst + 1;
467*22dc650dSSadaf Ebrahimi 	}
468*22dc650dSSadaf Ebrahimi 
469*22dc650dSSadaf Ebrahimi 	if (target_addr <= S52_MAX) {
470*22dc650dSSadaf Ebrahimi 		if (jump->flags & IS_COND)
471*22dc650dSSadaf Ebrahimi 			inst[-1] |= (sljit_ins)IMM_I16(4);
472*22dc650dSSadaf Ebrahimi 
473*22dc650dSSadaf Ebrahimi 		jump->flags |= PATCH_ABS52;
474*22dc650dSSadaf Ebrahimi 		inst[2] = inst[0];
475*22dc650dSSadaf Ebrahimi 		return inst + 2;
476*22dc650dSSadaf Ebrahimi 	}
477*22dc650dSSadaf Ebrahimi 
478*22dc650dSSadaf Ebrahimi exit:
479*22dc650dSSadaf Ebrahimi 	if (jump->flags & IS_COND)
480*22dc650dSSadaf Ebrahimi 		inst[-1] |= (sljit_ins)IMM_I16(5);
481*22dc650dSSadaf Ebrahimi 	inst[3] = inst[0];
482*22dc650dSSadaf Ebrahimi 	return inst + 3;
483*22dc650dSSadaf Ebrahimi }
484*22dc650dSSadaf Ebrahimi 
mov_addr_get_length(struct sljit_jump * jump,sljit_ins * code_ptr,sljit_ins * code,sljit_sw executable_offset)485*22dc650dSSadaf Ebrahimi static SLJIT_INLINE sljit_sw mov_addr_get_length(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code, sljit_sw executable_offset)
486*22dc650dSSadaf Ebrahimi {
487*22dc650dSSadaf Ebrahimi 	sljit_uw addr;
488*22dc650dSSadaf Ebrahimi 	sljit_sw diff;
489*22dc650dSSadaf Ebrahimi 	SLJIT_UNUSED_ARG(executable_offset);
490*22dc650dSSadaf Ebrahimi 
491*22dc650dSSadaf Ebrahimi 	SLJIT_ASSERT(jump->flags < ((sljit_uw)6 << JUMP_SIZE_SHIFT));
492*22dc650dSSadaf Ebrahimi 	if (jump->flags & JUMP_ADDR)
493*22dc650dSSadaf Ebrahimi 		addr = jump->u.target;
494*22dc650dSSadaf Ebrahimi 	else
495*22dc650dSSadaf Ebrahimi 		addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code + jump->u.label->size, executable_offset);
496*22dc650dSSadaf Ebrahimi 
497*22dc650dSSadaf Ebrahimi 	diff = (sljit_sw)addr - (sljit_sw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
498*22dc650dSSadaf Ebrahimi 
499*22dc650dSSadaf Ebrahimi 	if (diff >= S32_MIN && diff <= S32_MAX) {
500*22dc650dSSadaf Ebrahimi 		SLJIT_ASSERT(jump->flags >= ((sljit_uw)1 << JUMP_SIZE_SHIFT));
501*22dc650dSSadaf Ebrahimi 		jump->flags |= PATCH_REL32;
502*22dc650dSSadaf Ebrahimi 		return 1;
503*22dc650dSSadaf Ebrahimi 	}
504*22dc650dSSadaf Ebrahimi 
505*22dc650dSSadaf Ebrahimi 	if (addr <= S32_MAX) {
506*22dc650dSSadaf Ebrahimi 		SLJIT_ASSERT(jump->flags >= ((sljit_uw)1 << JUMP_SIZE_SHIFT));
507*22dc650dSSadaf Ebrahimi 		jump->flags |= PATCH_ABS32;
508*22dc650dSSadaf Ebrahimi 		return 1;
509*22dc650dSSadaf Ebrahimi 	}
510*22dc650dSSadaf Ebrahimi 
511*22dc650dSSadaf Ebrahimi 	if (addr <= S52_MAX) {
512*22dc650dSSadaf Ebrahimi 		SLJIT_ASSERT(jump->flags >= ((sljit_uw)2 << JUMP_SIZE_SHIFT));
513*22dc650dSSadaf Ebrahimi 		jump->flags |= PATCH_ABS52;
514*22dc650dSSadaf Ebrahimi 		return 2;
515*22dc650dSSadaf Ebrahimi 	}
516*22dc650dSSadaf Ebrahimi 
517*22dc650dSSadaf Ebrahimi 	SLJIT_ASSERT(jump->flags >= ((sljit_uw)3 << JUMP_SIZE_SHIFT));
518*22dc650dSSadaf Ebrahimi 	return 3;
519*22dc650dSSadaf Ebrahimi }
520*22dc650dSSadaf Ebrahimi 
load_addr_to_reg(struct sljit_jump * jump,sljit_sw executable_offset)521*22dc650dSSadaf Ebrahimi static SLJIT_INLINE void load_addr_to_reg(struct sljit_jump *jump, sljit_sw executable_offset)
522*22dc650dSSadaf Ebrahimi {
523*22dc650dSSadaf Ebrahimi 	sljit_uw flags = jump->flags;
524*22dc650dSSadaf Ebrahimi 	sljit_uw addr = (flags & JUMP_ADDR) ? jump->u.target : jump->u.label->u.addr;
525*22dc650dSSadaf Ebrahimi 	sljit_ins *ins = (sljit_ins*)jump->addr;
526*22dc650dSSadaf Ebrahimi 	sljit_u32 reg = (flags & JUMP_MOV_ADDR) ? *ins : TMP_REG1;
527*22dc650dSSadaf Ebrahimi 	SLJIT_UNUSED_ARG(executable_offset);
528*22dc650dSSadaf Ebrahimi 
529*22dc650dSSadaf Ebrahimi 	if (flags & PATCH_REL32) {
530*22dc650dSSadaf Ebrahimi 		addr -= (sljit_uw)SLJIT_ADD_EXEC_OFFSET(ins, executable_offset);
531*22dc650dSSadaf Ebrahimi 
532*22dc650dSSadaf Ebrahimi 		SLJIT_ASSERT((sljit_sw)addr >= S32_MIN && (sljit_sw)addr <= S32_MAX);
533*22dc650dSSadaf Ebrahimi 
534*22dc650dSSadaf Ebrahimi 		if ((addr & 0x800) != 0)
535*22dc650dSSadaf Ebrahimi 			addr += 0x1000;
536*22dc650dSSadaf Ebrahimi 
537*22dc650dSSadaf Ebrahimi 		ins[0] = PCADDU12I | RD(reg) | IMM_I20(addr);
538*22dc650dSSadaf Ebrahimi 
539*22dc650dSSadaf Ebrahimi 		if (!(flags & JUMP_MOV_ADDR)) {
540*22dc650dSSadaf Ebrahimi 			SLJIT_ASSERT((ins[1] & OPC_2RI16(0x3f)) == JIRL);
541*22dc650dSSadaf Ebrahimi 			ins[1] = (ins[1] & (OPC_2RI16(0x3f) | 0x3ff)) | IMM_I16((addr & 0xfff) >> 2);
542*22dc650dSSadaf Ebrahimi 		} else
543*22dc650dSSadaf Ebrahimi 			ins[1] = ADDI_D | RD(reg) | RJ(reg) | IMM_I12(addr);
544*22dc650dSSadaf Ebrahimi 		return;
545*22dc650dSSadaf Ebrahimi 	}
546*22dc650dSSadaf Ebrahimi 
547*22dc650dSSadaf Ebrahimi 	if (flags & PATCH_ABS32) {
548*22dc650dSSadaf Ebrahimi 		SLJIT_ASSERT(addr <= S32_MAX);
549*22dc650dSSadaf Ebrahimi 		ins[0] = LU12I_W | RD(reg) | (sljit_ins)(((addr & 0xffffffff) >> 12) << 5);
550*22dc650dSSadaf Ebrahimi 	} else if (flags & PATCH_ABS52) {
551*22dc650dSSadaf Ebrahimi 		ins[0] = LU12I_W | RD(reg) | (sljit_ins)(((addr & 0xffffffff) >> 12) << 5);
552*22dc650dSSadaf Ebrahimi 		ins[1] = LU32I_D | RD(reg) | (sljit_ins)(((addr >> 32) & 0xfffff) << 5);
553*22dc650dSSadaf Ebrahimi 		ins += 1;
554*22dc650dSSadaf Ebrahimi 	} else {
555*22dc650dSSadaf Ebrahimi 		ins[0] = LU12I_W | RD(reg) | (sljit_ins)(((addr & 0xffffffff) >> 12) << 5);
556*22dc650dSSadaf Ebrahimi 		ins[1] = LU32I_D | RD(reg) | (sljit_ins)(((addr >> 32) & 0xfffff) << 5);
557*22dc650dSSadaf Ebrahimi 		ins[2] = LU52I_D | RD(reg) | RJ(reg) | IMM_I12(addr >> 52);
558*22dc650dSSadaf Ebrahimi 		ins += 2;
559*22dc650dSSadaf Ebrahimi 	}
560*22dc650dSSadaf Ebrahimi 
561*22dc650dSSadaf Ebrahimi 	if (!(flags & JUMP_MOV_ADDR)) {
562*22dc650dSSadaf Ebrahimi 		SLJIT_ASSERT((ins[1] & OPC_2RI16(0x3f)) == JIRL);
563*22dc650dSSadaf Ebrahimi 		ins[1] = (ins[1] & (OPC_2RI16(0x3f) | 0x3ff)) | IMM_I16((addr & 0xfff) >> 2);
564*22dc650dSSadaf Ebrahimi 	} else
565*22dc650dSSadaf Ebrahimi 		ins[1] = ORI | RD(reg) | RJ(reg) | IMM_I12(addr);
566*22dc650dSSadaf Ebrahimi }
567*22dc650dSSadaf Ebrahimi 
reduce_code_size(struct sljit_compiler * compiler)568*22dc650dSSadaf Ebrahimi static void reduce_code_size(struct sljit_compiler *compiler)
569*22dc650dSSadaf Ebrahimi {
570*22dc650dSSadaf Ebrahimi 	struct sljit_label *label;
571*22dc650dSSadaf Ebrahimi 	struct sljit_jump *jump;
572*22dc650dSSadaf Ebrahimi 	struct sljit_const *const_;
573*22dc650dSSadaf Ebrahimi 	SLJIT_NEXT_DEFINE_TYPES;
574*22dc650dSSadaf Ebrahimi 	sljit_uw total_size;
575*22dc650dSSadaf Ebrahimi 	sljit_uw size_reduce = 0;
576*22dc650dSSadaf Ebrahimi 	sljit_sw diff;
577*22dc650dSSadaf Ebrahimi 
578*22dc650dSSadaf Ebrahimi 	label = compiler->labels;
579*22dc650dSSadaf Ebrahimi 	jump = compiler->jumps;
580*22dc650dSSadaf Ebrahimi 	const_ = compiler->consts;
581*22dc650dSSadaf Ebrahimi 
582*22dc650dSSadaf Ebrahimi 	SLJIT_NEXT_INIT_TYPES();
583*22dc650dSSadaf Ebrahimi 
584*22dc650dSSadaf Ebrahimi 	while (1) {
585*22dc650dSSadaf Ebrahimi 		SLJIT_GET_NEXT_MIN();
586*22dc650dSSadaf Ebrahimi 
587*22dc650dSSadaf Ebrahimi 		if (next_min_addr == SLJIT_MAX_ADDRESS)
588*22dc650dSSadaf Ebrahimi 			break;
589*22dc650dSSadaf Ebrahimi 
590*22dc650dSSadaf Ebrahimi 		if (next_min_addr == next_label_size) {
591*22dc650dSSadaf Ebrahimi 			label->size -= size_reduce;
592*22dc650dSSadaf Ebrahimi 
593*22dc650dSSadaf Ebrahimi 			label = label->next;
594*22dc650dSSadaf Ebrahimi 			next_label_size = SLJIT_GET_NEXT_SIZE(label);
595*22dc650dSSadaf Ebrahimi 		}
596*22dc650dSSadaf Ebrahimi 
597*22dc650dSSadaf Ebrahimi 		if (next_min_addr == next_const_addr) {
598*22dc650dSSadaf Ebrahimi 			const_->addr -= size_reduce;
599*22dc650dSSadaf Ebrahimi 			const_ = const_->next;
600*22dc650dSSadaf Ebrahimi 			next_const_addr = SLJIT_GET_NEXT_ADDRESS(const_);
601*22dc650dSSadaf Ebrahimi 			continue;
602*22dc650dSSadaf Ebrahimi 		}
603*22dc650dSSadaf Ebrahimi 
604*22dc650dSSadaf Ebrahimi 		if (next_min_addr != next_jump_addr)
605*22dc650dSSadaf Ebrahimi 			continue;
606*22dc650dSSadaf Ebrahimi 
607*22dc650dSSadaf Ebrahimi 		jump->addr -= size_reduce;
608*22dc650dSSadaf Ebrahimi 		if (!(jump->flags & JUMP_MOV_ADDR)) {
609*22dc650dSSadaf Ebrahimi 			total_size = JUMP_MAX_SIZE;
610*22dc650dSSadaf Ebrahimi 
611*22dc650dSSadaf Ebrahimi 			if (!(jump->flags & SLJIT_REWRITABLE_JUMP)) {
612*22dc650dSSadaf Ebrahimi 				if (jump->flags & JUMP_ADDR) {
613*22dc650dSSadaf Ebrahimi 					if (jump->u.target <= S32_MAX)
614*22dc650dSSadaf Ebrahimi 							total_size = 2;
615*22dc650dSSadaf Ebrahimi 					else if (jump->u.target <= S52_MAX)
616*22dc650dSSadaf Ebrahimi 							total_size = 3;
617*22dc650dSSadaf Ebrahimi 				} else {
618*22dc650dSSadaf Ebrahimi 					/* Unit size: instruction. */
619*22dc650dSSadaf Ebrahimi 					diff = (sljit_sw)jump->u.label->size - (sljit_sw)jump->addr;
620*22dc650dSSadaf Ebrahimi 
621*22dc650dSSadaf Ebrahimi 					if ((jump->flags & IS_COND) && (diff + 1) <= (BRANCH16_MAX / SSIZE_OF(ins)) && (diff + 1) >= (BRANCH16_MIN / SSIZE_OF(ins)))
622*22dc650dSSadaf Ebrahimi 						total_size = 0;
623*22dc650dSSadaf Ebrahimi 					else if (diff >= (JUMP_MIN / SSIZE_OF(ins)) && diff <= (JUMP_MAX / SSIZE_OF(ins)))
624*22dc650dSSadaf Ebrahimi 						total_size = 1;
625*22dc650dSSadaf Ebrahimi 					else if (diff >= (S32_MIN / SSIZE_OF(ins)) && diff <= (S32_MAX / SSIZE_OF(ins)))
626*22dc650dSSadaf Ebrahimi 						total_size = 2;
627*22dc650dSSadaf Ebrahimi 				}
628*22dc650dSSadaf Ebrahimi 			}
629*22dc650dSSadaf Ebrahimi 
630*22dc650dSSadaf Ebrahimi 			size_reduce += JUMP_MAX_SIZE - total_size;
631*22dc650dSSadaf Ebrahimi 			jump->flags |= total_size << JUMP_SIZE_SHIFT;
632*22dc650dSSadaf Ebrahimi 		} else {
633*22dc650dSSadaf Ebrahimi 			total_size = 3;
634*22dc650dSSadaf Ebrahimi 
635*22dc650dSSadaf Ebrahimi 			if (!(jump->flags & JUMP_ADDR)) {
636*22dc650dSSadaf Ebrahimi 				/* Real size minus 1. Unit size: instruction. */
637*22dc650dSSadaf Ebrahimi 				diff = (sljit_sw)jump->u.label->size - (sljit_sw)jump->addr;
638*22dc650dSSadaf Ebrahimi 
639*22dc650dSSadaf Ebrahimi 				if (diff >= (S32_MIN / SSIZE_OF(ins)) && diff <= (S32_MAX / SSIZE_OF(ins)))
640*22dc650dSSadaf Ebrahimi 					total_size = 1;
641*22dc650dSSadaf Ebrahimi 			} else if (jump->u.target < S32_MAX)
642*22dc650dSSadaf Ebrahimi 				total_size = 1;
643*22dc650dSSadaf Ebrahimi 			else if (jump->u.target <= S52_MAX)
644*22dc650dSSadaf Ebrahimi 				total_size = 2;
645*22dc650dSSadaf Ebrahimi 
646*22dc650dSSadaf Ebrahimi 			size_reduce += 3 - total_size;
647*22dc650dSSadaf Ebrahimi 			jump->flags |= total_size << JUMP_SIZE_SHIFT;
648*22dc650dSSadaf Ebrahimi 		}
649*22dc650dSSadaf Ebrahimi 
650*22dc650dSSadaf Ebrahimi 		jump = jump->next;
651*22dc650dSSadaf Ebrahimi 		next_jump_addr = SLJIT_GET_NEXT_ADDRESS(jump);
652*22dc650dSSadaf Ebrahimi 	}
653*22dc650dSSadaf Ebrahimi 
654*22dc650dSSadaf Ebrahimi 	compiler->size -= size_reduce;
655*22dc650dSSadaf Ebrahimi }
656*22dc650dSSadaf Ebrahimi 
sljit_generate_code(struct sljit_compiler * compiler,sljit_s32 options,void * exec_allocator_data)657*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler, sljit_s32 options, void *exec_allocator_data)
658*22dc650dSSadaf Ebrahimi {
659*22dc650dSSadaf Ebrahimi 	struct sljit_memory_fragment *buf;
660*22dc650dSSadaf Ebrahimi 	sljit_ins *code;
661*22dc650dSSadaf Ebrahimi 	sljit_ins *code_ptr;
662*22dc650dSSadaf Ebrahimi 	sljit_ins *buf_ptr;
663*22dc650dSSadaf Ebrahimi 	sljit_ins *buf_end;
664*22dc650dSSadaf Ebrahimi 	sljit_uw word_count;
665*22dc650dSSadaf Ebrahimi 	SLJIT_NEXT_DEFINE_TYPES;
666*22dc650dSSadaf Ebrahimi 	sljit_sw executable_offset;
667*22dc650dSSadaf Ebrahimi 	sljit_uw addr;
668*22dc650dSSadaf Ebrahimi 
669*22dc650dSSadaf Ebrahimi 	struct sljit_label *label;
670*22dc650dSSadaf Ebrahimi 	struct sljit_jump *jump;
671*22dc650dSSadaf Ebrahimi 	struct sljit_const *const_;
672*22dc650dSSadaf Ebrahimi 
673*22dc650dSSadaf Ebrahimi 	CHECK_ERROR_PTR();
674*22dc650dSSadaf Ebrahimi 	CHECK_PTR(check_sljit_generate_code(compiler));
675*22dc650dSSadaf Ebrahimi 
676*22dc650dSSadaf Ebrahimi 	reduce_code_size(compiler);
677*22dc650dSSadaf Ebrahimi 
678*22dc650dSSadaf Ebrahimi 	code = (sljit_ins*)allocate_executable_memory(compiler->size * sizeof(sljit_ins), options, exec_allocator_data, &executable_offset);
679*22dc650dSSadaf Ebrahimi 	PTR_FAIL_WITH_EXEC_IF(code);
680*22dc650dSSadaf Ebrahimi 
681*22dc650dSSadaf Ebrahimi 	reverse_buf(compiler);
682*22dc650dSSadaf Ebrahimi 	buf = compiler->buf;
683*22dc650dSSadaf Ebrahimi 
684*22dc650dSSadaf Ebrahimi 	code_ptr = code;
685*22dc650dSSadaf Ebrahimi 	word_count = 0;
686*22dc650dSSadaf Ebrahimi 	label = compiler->labels;
687*22dc650dSSadaf Ebrahimi 	jump = compiler->jumps;
688*22dc650dSSadaf Ebrahimi 	const_ = compiler->consts;
689*22dc650dSSadaf Ebrahimi 	SLJIT_NEXT_INIT_TYPES();
690*22dc650dSSadaf Ebrahimi 	SLJIT_GET_NEXT_MIN();
691*22dc650dSSadaf Ebrahimi 
692*22dc650dSSadaf Ebrahimi 	do {
693*22dc650dSSadaf Ebrahimi 		buf_ptr = (sljit_ins*)buf->memory;
694*22dc650dSSadaf Ebrahimi 		buf_end = buf_ptr + (buf->used_size >> 2);
695*22dc650dSSadaf Ebrahimi 		do {
696*22dc650dSSadaf Ebrahimi 			*code_ptr = *buf_ptr++;
697*22dc650dSSadaf Ebrahimi 			if (next_min_addr == word_count) {
698*22dc650dSSadaf Ebrahimi 				SLJIT_ASSERT(!label || label->size >= word_count);
699*22dc650dSSadaf Ebrahimi 				SLJIT_ASSERT(!jump || jump->addr >= word_count);
700*22dc650dSSadaf Ebrahimi 				SLJIT_ASSERT(!const_ || const_->addr >= word_count);
701*22dc650dSSadaf Ebrahimi 
702*22dc650dSSadaf Ebrahimi 				/* These structures are ordered by their address. */
703*22dc650dSSadaf Ebrahimi 				if (next_min_addr == next_label_size) {
704*22dc650dSSadaf Ebrahimi 					label->u.addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
705*22dc650dSSadaf Ebrahimi 					label->size = (sljit_uw)(code_ptr - code);
706*22dc650dSSadaf Ebrahimi 					label = label->next;
707*22dc650dSSadaf Ebrahimi 					next_label_size = SLJIT_GET_NEXT_SIZE(label);
708*22dc650dSSadaf Ebrahimi 				}
709*22dc650dSSadaf Ebrahimi 
710*22dc650dSSadaf Ebrahimi 				if (next_min_addr == next_jump_addr) {
711*22dc650dSSadaf Ebrahimi 					if (!(jump->flags & JUMP_MOV_ADDR)) {
712*22dc650dSSadaf Ebrahimi 						word_count = word_count - 1 + (jump->flags >> JUMP_SIZE_SHIFT);
713*22dc650dSSadaf Ebrahimi 						jump->addr = (sljit_uw)code_ptr;
714*22dc650dSSadaf Ebrahimi 						code_ptr = detect_jump_type(jump, code, executable_offset);
715*22dc650dSSadaf Ebrahimi 						SLJIT_ASSERT((jump->flags & PATCH_B) || ((sljit_uw)code_ptr - jump->addr < (jump->flags >> JUMP_SIZE_SHIFT) * sizeof(sljit_ins)));
716*22dc650dSSadaf Ebrahimi 					} else {
717*22dc650dSSadaf Ebrahimi 						word_count += jump->flags >> JUMP_SIZE_SHIFT;
718*22dc650dSSadaf Ebrahimi 						addr = (sljit_uw)code_ptr;
719*22dc650dSSadaf Ebrahimi 						code_ptr += mov_addr_get_length(jump, code_ptr, code, executable_offset);
720*22dc650dSSadaf Ebrahimi 						jump->addr = addr;
721*22dc650dSSadaf Ebrahimi 					}
722*22dc650dSSadaf Ebrahimi 					jump = jump->next;
723*22dc650dSSadaf Ebrahimi 					next_jump_addr = SLJIT_GET_NEXT_ADDRESS(jump);
724*22dc650dSSadaf Ebrahimi 				} else if (next_min_addr == next_const_addr) {
725*22dc650dSSadaf Ebrahimi 					const_->addr = (sljit_uw)code_ptr;
726*22dc650dSSadaf Ebrahimi 					const_ = const_->next;
727*22dc650dSSadaf Ebrahimi 					next_const_addr = SLJIT_GET_NEXT_ADDRESS(const_);
728*22dc650dSSadaf Ebrahimi 				}
729*22dc650dSSadaf Ebrahimi 
730*22dc650dSSadaf Ebrahimi 				SLJIT_GET_NEXT_MIN();
731*22dc650dSSadaf Ebrahimi 			}
732*22dc650dSSadaf Ebrahimi 			code_ptr++;
733*22dc650dSSadaf Ebrahimi 			word_count++;
734*22dc650dSSadaf Ebrahimi 		} while (buf_ptr < buf_end);
735*22dc650dSSadaf Ebrahimi 
736*22dc650dSSadaf Ebrahimi 		buf = buf->next;
737*22dc650dSSadaf Ebrahimi 	} while (buf);
738*22dc650dSSadaf Ebrahimi 
739*22dc650dSSadaf Ebrahimi 	if (label && label->size == word_count) {
740*22dc650dSSadaf Ebrahimi 		label->u.addr = (sljit_uw)code_ptr;
741*22dc650dSSadaf Ebrahimi 		label->size = (sljit_uw)(code_ptr - code);
742*22dc650dSSadaf Ebrahimi 		label = label->next;
743*22dc650dSSadaf Ebrahimi 	}
744*22dc650dSSadaf Ebrahimi 
745*22dc650dSSadaf Ebrahimi 	SLJIT_ASSERT(!label);
746*22dc650dSSadaf Ebrahimi 	SLJIT_ASSERT(!jump);
747*22dc650dSSadaf Ebrahimi 	SLJIT_ASSERT(!const_);
748*22dc650dSSadaf Ebrahimi 	SLJIT_ASSERT(code_ptr - code <= (sljit_sw)compiler->size);
749*22dc650dSSadaf Ebrahimi 
750*22dc650dSSadaf Ebrahimi 	jump = compiler->jumps;
751*22dc650dSSadaf Ebrahimi 	while (jump) {
752*22dc650dSSadaf Ebrahimi 		do {
753*22dc650dSSadaf Ebrahimi 			if (!(jump->flags & (PATCH_B | PATCH_J)) || (jump->flags & JUMP_MOV_ADDR)) {
754*22dc650dSSadaf Ebrahimi 				load_addr_to_reg(jump, executable_offset);
755*22dc650dSSadaf Ebrahimi 				break;
756*22dc650dSSadaf Ebrahimi 			}
757*22dc650dSSadaf Ebrahimi 
758*22dc650dSSadaf Ebrahimi 			addr = (jump->flags & JUMP_ADDR) ? jump->u.target : jump->u.label->u.addr;
759*22dc650dSSadaf Ebrahimi 			buf_ptr = (sljit_ins *)jump->addr;
760*22dc650dSSadaf Ebrahimi 			addr -= (sljit_uw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset);
761*22dc650dSSadaf Ebrahimi 
762*22dc650dSSadaf Ebrahimi 			if (jump->flags & PATCH_B) {
763*22dc650dSSadaf Ebrahimi 				SLJIT_ASSERT((sljit_sw)addr >= BRANCH16_MIN && (sljit_sw)addr <= BRANCH16_MAX);
764*22dc650dSSadaf Ebrahimi 				buf_ptr[0] |= (sljit_ins)IMM_I16(addr >> 2);
765*22dc650dSSadaf Ebrahimi 				break;
766*22dc650dSSadaf Ebrahimi 			}
767*22dc650dSSadaf Ebrahimi 
768*22dc650dSSadaf Ebrahimi 			SLJIT_ASSERT((sljit_sw)addr >= JUMP_MIN && (sljit_sw)addr <= JUMP_MAX);
769*22dc650dSSadaf Ebrahimi 			if (jump->flags & IS_CALL)
770*22dc650dSSadaf Ebrahimi 				buf_ptr[0] = BL | (sljit_ins)IMM_I26(addr >> 2);
771*22dc650dSSadaf Ebrahimi 			else
772*22dc650dSSadaf Ebrahimi 				buf_ptr[0] = B | (sljit_ins)IMM_I26(addr >> 2);
773*22dc650dSSadaf Ebrahimi 		} while (0);
774*22dc650dSSadaf Ebrahimi 		jump = jump->next;
775*22dc650dSSadaf Ebrahimi 	}
776*22dc650dSSadaf Ebrahimi 
777*22dc650dSSadaf Ebrahimi 	compiler->error = SLJIT_ERR_COMPILED;
778*22dc650dSSadaf Ebrahimi 	compiler->executable_offset = executable_offset;
779*22dc650dSSadaf Ebrahimi 	compiler->executable_size = (sljit_uw)(code_ptr - code) * sizeof(sljit_ins);
780*22dc650dSSadaf Ebrahimi 
781*22dc650dSSadaf Ebrahimi 	code = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(code, executable_offset);
782*22dc650dSSadaf Ebrahimi 	code_ptr = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
783*22dc650dSSadaf Ebrahimi 
784*22dc650dSSadaf Ebrahimi 	SLJIT_CACHE_FLUSH(code, code_ptr);
785*22dc650dSSadaf Ebrahimi 	SLJIT_UPDATE_WX_FLAGS(code, code_ptr, 1);
786*22dc650dSSadaf Ebrahimi 	return code;
787*22dc650dSSadaf Ebrahimi }
788*22dc650dSSadaf Ebrahimi 
sljit_has_cpu_feature(sljit_s32 feature_type)789*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
790*22dc650dSSadaf Ebrahimi {
791*22dc650dSSadaf Ebrahimi 	switch (feature_type)
792*22dc650dSSadaf Ebrahimi 	{
793*22dc650dSSadaf Ebrahimi 	case SLJIT_HAS_FPU:
794*22dc650dSSadaf Ebrahimi #ifdef SLJIT_IS_FPU_AVAILABLE
795*22dc650dSSadaf Ebrahimi 		return (SLJIT_IS_FPU_AVAILABLE) != 0;
796*22dc650dSSadaf Ebrahimi #else
797*22dc650dSSadaf Ebrahimi 		/* Available by default. */
798*22dc650dSSadaf Ebrahimi 		return 1;
799*22dc650dSSadaf Ebrahimi #endif
800*22dc650dSSadaf Ebrahimi 
801*22dc650dSSadaf Ebrahimi 	case SLJIT_HAS_LASX:
802*22dc650dSSadaf Ebrahimi 		return (LOONGARCH_HWCAP_LASX & get_cpu_features(GET_HWCAP));
803*22dc650dSSadaf Ebrahimi 
804*22dc650dSSadaf Ebrahimi 	case SLJIT_HAS_SIMD:
805*22dc650dSSadaf Ebrahimi 		return (LOONGARCH_HWCAP_LSX & get_cpu_features(GET_HWCAP));
806*22dc650dSSadaf Ebrahimi 
807*22dc650dSSadaf Ebrahimi 	case SLJIT_HAS_ATOMIC:
808*22dc650dSSadaf Ebrahimi 		return (LOONGARCH_CFG2_LAMCAS & get_cpu_features(GET_CFG2));
809*22dc650dSSadaf Ebrahimi 
810*22dc650dSSadaf Ebrahimi 	case SLJIT_HAS_CLZ:
811*22dc650dSSadaf Ebrahimi 	case SLJIT_HAS_CTZ:
812*22dc650dSSadaf Ebrahimi 	case SLJIT_HAS_REV:
813*22dc650dSSadaf Ebrahimi 	case SLJIT_HAS_ROT:
814*22dc650dSSadaf Ebrahimi 	case SLJIT_HAS_PREFETCH:
815*22dc650dSSadaf Ebrahimi 	case SLJIT_HAS_COPY_F32:
816*22dc650dSSadaf Ebrahimi 	case SLJIT_HAS_COPY_F64:
817*22dc650dSSadaf Ebrahimi 		return 1;
818*22dc650dSSadaf Ebrahimi 
819*22dc650dSSadaf Ebrahimi 	default:
820*22dc650dSSadaf Ebrahimi 		return 0;
821*22dc650dSSadaf Ebrahimi 	}
822*22dc650dSSadaf Ebrahimi }
823*22dc650dSSadaf Ebrahimi 
sljit_cmp_info(sljit_s32 type)824*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_cmp_info(sljit_s32 type)
825*22dc650dSSadaf Ebrahimi {
826*22dc650dSSadaf Ebrahimi 	SLJIT_UNUSED_ARG(type);
827*22dc650dSSadaf Ebrahimi 
828*22dc650dSSadaf Ebrahimi 	return 0;
829*22dc650dSSadaf Ebrahimi }
830*22dc650dSSadaf Ebrahimi 
831*22dc650dSSadaf Ebrahimi /* --------------------------------------------------------------------- */
832*22dc650dSSadaf Ebrahimi /*  Entry, exit                                                          */
833*22dc650dSSadaf Ebrahimi /* --------------------------------------------------------------------- */
834*22dc650dSSadaf Ebrahimi 
835*22dc650dSSadaf Ebrahimi /* Creates an index in data_transfer_insts array. */
836*22dc650dSSadaf Ebrahimi #define LOAD_DATA	0x01
837*22dc650dSSadaf Ebrahimi #define WORD_DATA	0x00
838*22dc650dSSadaf Ebrahimi #define BYTE_DATA	0x02
839*22dc650dSSadaf Ebrahimi #define HALF_DATA	0x04
840*22dc650dSSadaf Ebrahimi #define INT_DATA	0x06
841*22dc650dSSadaf Ebrahimi #define SIGNED_DATA	0x08
842*22dc650dSSadaf Ebrahimi /* Separates integer and floating point registers */
843*22dc650dSSadaf Ebrahimi #define GPR_REG		0x0f
844*22dc650dSSadaf Ebrahimi #define DOUBLE_DATA	0x10
845*22dc650dSSadaf Ebrahimi #define SINGLE_DATA	0x12
846*22dc650dSSadaf Ebrahimi 
847*22dc650dSSadaf Ebrahimi #define MEM_MASK	0x1f
848*22dc650dSSadaf Ebrahimi 
849*22dc650dSSadaf Ebrahimi #define ARG_TEST	0x00020
850*22dc650dSSadaf Ebrahimi #define ALT_KEEP_CACHE	0x00040
851*22dc650dSSadaf Ebrahimi #define CUMULATIVE_OP	0x00080
852*22dc650dSSadaf Ebrahimi #define IMM_OP		0x00100
853*22dc650dSSadaf Ebrahimi #define MOVE_OP		0x00200
854*22dc650dSSadaf Ebrahimi #define SRC2_IMM	0x00400
855*22dc650dSSadaf Ebrahimi 
856*22dc650dSSadaf Ebrahimi #define UNUSED_DEST	0x00800
857*22dc650dSSadaf Ebrahimi #define REG_DEST	0x01000
858*22dc650dSSadaf Ebrahimi #define REG1_SOURCE	0x02000
859*22dc650dSSadaf Ebrahimi #define REG2_SOURCE	0x04000
860*22dc650dSSadaf Ebrahimi #define SLOW_SRC1	0x08000
861*22dc650dSSadaf Ebrahimi #define SLOW_SRC2	0x10000
862*22dc650dSSadaf Ebrahimi #define SLOW_DEST	0x20000
863*22dc650dSSadaf Ebrahimi #define MEM_USE_TMP2	0x40000
864*22dc650dSSadaf Ebrahimi 
865*22dc650dSSadaf Ebrahimi #define STACK_STORE	ST_D
866*22dc650dSSadaf Ebrahimi #define STACK_LOAD	LD_D
867*22dc650dSSadaf Ebrahimi 
load_immediate(struct sljit_compiler * compiler,sljit_s32 dst_r,sljit_sw imm)868*22dc650dSSadaf Ebrahimi static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst_r, sljit_sw imm)
869*22dc650dSSadaf Ebrahimi {
870*22dc650dSSadaf Ebrahimi 	if (imm <= I12_MAX && imm >= I12_MIN)
871*22dc650dSSadaf Ebrahimi 		return push_inst(compiler, ADDI_D | RD(dst_r) | RJ(TMP_ZERO) | IMM_I12(imm));
872*22dc650dSSadaf Ebrahimi 
873*22dc650dSSadaf Ebrahimi 	if (imm <= 0x7fffffffl && imm >= -0x80000000l) {
874*22dc650dSSadaf Ebrahimi 		FAIL_IF(push_inst(compiler, LU12I_W | RD(dst_r) | (sljit_ins)(((imm & 0xffffffff) >> 12) << 5)));
875*22dc650dSSadaf Ebrahimi 		return push_inst(compiler, ORI | RD(dst_r) | RJ(dst_r) | IMM_I12(imm));
876*22dc650dSSadaf Ebrahimi 	} else if (imm <= 0x7ffffffffffffl && imm >= -0x8000000000000l) {
877*22dc650dSSadaf Ebrahimi 		FAIL_IF(push_inst(compiler, LU12I_W | RD(dst_r) | (sljit_ins)(((imm & 0xffffffff) >> 12) << 5)));
878*22dc650dSSadaf Ebrahimi 		FAIL_IF(push_inst(compiler, ORI | RD(dst_r) | RJ(dst_r) | IMM_I12(imm)));
879*22dc650dSSadaf Ebrahimi 		return push_inst(compiler, LU32I_D | RD(dst_r) | (sljit_ins)(((imm >> 32) & 0xfffff) << 5));
880*22dc650dSSadaf Ebrahimi 	}
881*22dc650dSSadaf Ebrahimi 	FAIL_IF(push_inst(compiler, LU12I_W | RD(dst_r) | (sljit_ins)(((imm & 0xffffffff) >> 12) << 5)));
882*22dc650dSSadaf Ebrahimi 	FAIL_IF(push_inst(compiler, ORI | RD(dst_r) | RJ(dst_r) | IMM_I12(imm)));
883*22dc650dSSadaf Ebrahimi 	FAIL_IF(push_inst(compiler, LU32I_D | RD(dst_r) | (sljit_ins)(((imm >> 32) & 0xfffff) << 5)));
884*22dc650dSSadaf Ebrahimi 	return push_inst(compiler, LU52I_D | RD(dst_r) | RJ(dst_r) | IMM_I12(imm >> 52));
885*22dc650dSSadaf Ebrahimi }
886*22dc650dSSadaf Ebrahimi 
887*22dc650dSSadaf Ebrahimi #define STACK_MAX_DISTANCE (-I12_MIN)
888*22dc650dSSadaf Ebrahimi 
889*22dc650dSSadaf Ebrahimi static sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw);
890*22dc650dSSadaf Ebrahimi 
sljit_emit_enter(struct sljit_compiler * compiler,sljit_s32 options,sljit_s32 arg_types,sljit_s32 scratches,sljit_s32 saveds,sljit_s32 fscratches,sljit_s32 fsaveds,sljit_s32 local_size)891*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler,
892*22dc650dSSadaf Ebrahimi 	sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
893*22dc650dSSadaf Ebrahimi 	sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
894*22dc650dSSadaf Ebrahimi {
895*22dc650dSSadaf Ebrahimi 	sljit_s32 i, tmp, offset;
896*22dc650dSSadaf Ebrahimi 	sljit_s32 saved_arg_count = SLJIT_KEPT_SAVEDS_COUNT(options);
897*22dc650dSSadaf Ebrahimi 
898*22dc650dSSadaf Ebrahimi 	CHECK_ERROR();
899*22dc650dSSadaf Ebrahimi 	CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
900*22dc650dSSadaf Ebrahimi 	set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
901*22dc650dSSadaf Ebrahimi 
902*22dc650dSSadaf Ebrahimi 	local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds - saved_arg_count, 1);
903*22dc650dSSadaf Ebrahimi 	local_size += GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, f64);
904*22dc650dSSadaf Ebrahimi 
905*22dc650dSSadaf Ebrahimi 	local_size = (local_size + SLJIT_LOCALS_OFFSET + 15) & ~0xf;
906*22dc650dSSadaf Ebrahimi 	compiler->local_size = local_size;
907*22dc650dSSadaf Ebrahimi 
908*22dc650dSSadaf Ebrahimi 	if (local_size <= STACK_MAX_DISTANCE) {
909*22dc650dSSadaf Ebrahimi 		/* Frequent case. */
910*22dc650dSSadaf Ebrahimi 		FAIL_IF(push_inst(compiler, ADDI_D | RD(SLJIT_SP) | RJ(SLJIT_SP) | IMM_I12(-local_size)));
911*22dc650dSSadaf Ebrahimi 		offset = local_size - SSIZE_OF(sw);
912*22dc650dSSadaf Ebrahimi 		local_size = 0;
913*22dc650dSSadaf Ebrahimi 	} else {
914*22dc650dSSadaf Ebrahimi 		FAIL_IF(push_inst(compiler, ADDI_D | RD(SLJIT_SP) | RJ(SLJIT_SP) | IMM_I12(STACK_MAX_DISTANCE)));
915*22dc650dSSadaf Ebrahimi 		local_size -= STACK_MAX_DISTANCE;
916*22dc650dSSadaf Ebrahimi 
917*22dc650dSSadaf Ebrahimi 		if (local_size > STACK_MAX_DISTANCE)
918*22dc650dSSadaf Ebrahimi 			FAIL_IF(load_immediate(compiler, TMP_REG1, local_size));
919*22dc650dSSadaf Ebrahimi 		offset = STACK_MAX_DISTANCE - SSIZE_OF(sw);
920*22dc650dSSadaf Ebrahimi 	}
921*22dc650dSSadaf Ebrahimi 
922*22dc650dSSadaf Ebrahimi 	FAIL_IF(push_inst(compiler, STACK_STORE | RD(RETURN_ADDR_REG) | RJ(SLJIT_SP) | IMM_I12(offset)));
923*22dc650dSSadaf Ebrahimi 
924*22dc650dSSadaf Ebrahimi 	tmp = SLJIT_S0 - saveds;
925*22dc650dSSadaf Ebrahimi 	for (i = SLJIT_S0 - saved_arg_count; i > tmp; i--) {
926*22dc650dSSadaf Ebrahimi 		offset -= SSIZE_OF(sw);
927*22dc650dSSadaf Ebrahimi 		FAIL_IF(push_inst(compiler, STACK_STORE | RD(i) | RJ(SLJIT_SP) | IMM_I12(offset)));
928*22dc650dSSadaf Ebrahimi 	}
929*22dc650dSSadaf Ebrahimi 
930*22dc650dSSadaf Ebrahimi 	for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) {
931*22dc650dSSadaf Ebrahimi 		offset -= SSIZE_OF(sw);
932*22dc650dSSadaf Ebrahimi 		FAIL_IF(push_inst(compiler, STACK_STORE | RD(i) | RJ(SLJIT_SP) | IMM_I12(offset)));
933*22dc650dSSadaf Ebrahimi 	}
934*22dc650dSSadaf Ebrahimi 
935*22dc650dSSadaf Ebrahimi 	tmp = SLJIT_FS0 - fsaveds;
936*22dc650dSSadaf Ebrahimi 	for (i = SLJIT_FS0; i > tmp; i--) {
937*22dc650dSSadaf Ebrahimi 		offset -= SSIZE_OF(f64);
938*22dc650dSSadaf Ebrahimi 		FAIL_IF(push_inst(compiler, FST_D | FRD(i) | RJ(SLJIT_SP) | IMM_I12(offset)));
939*22dc650dSSadaf Ebrahimi 	}
940*22dc650dSSadaf Ebrahimi 
941*22dc650dSSadaf Ebrahimi 	for (i = fscratches; i >= SLJIT_FIRST_SAVED_FLOAT_REG; i--) {
942*22dc650dSSadaf Ebrahimi 		offset -= SSIZE_OF(f64);
943*22dc650dSSadaf Ebrahimi 		FAIL_IF(push_inst(compiler, FST_D | FRD(i) | RJ(SLJIT_SP) | IMM_I12(offset)));
944*22dc650dSSadaf Ebrahimi 	}
945*22dc650dSSadaf Ebrahimi 
946*22dc650dSSadaf Ebrahimi 	if (local_size > STACK_MAX_DISTANCE)
947*22dc650dSSadaf Ebrahimi 		FAIL_IF(push_inst(compiler, SUB_D | RD(SLJIT_SP) | RJ(SLJIT_SP) | RK(TMP_REG1)));
948*22dc650dSSadaf Ebrahimi 	else if (local_size > 0)
949*22dc650dSSadaf Ebrahimi 		FAIL_IF(push_inst(compiler, ADDI_D | RD(SLJIT_SP) | RJ(SLJIT_SP) | IMM_I12(-local_size)));
950*22dc650dSSadaf Ebrahimi 
951*22dc650dSSadaf Ebrahimi 	if (options & SLJIT_ENTER_REG_ARG)
952*22dc650dSSadaf Ebrahimi 		return SLJIT_SUCCESS;
953*22dc650dSSadaf Ebrahimi 
954*22dc650dSSadaf Ebrahimi 	arg_types >>= SLJIT_ARG_SHIFT;
955*22dc650dSSadaf Ebrahimi 	saved_arg_count = 0;
956*22dc650dSSadaf Ebrahimi 	tmp = SLJIT_R0;
957*22dc650dSSadaf Ebrahimi 
958*22dc650dSSadaf Ebrahimi 	while (arg_types > 0) {
959*22dc650dSSadaf Ebrahimi 		if ((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64) {
960*22dc650dSSadaf Ebrahimi 			if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) {
961*22dc650dSSadaf Ebrahimi 				FAIL_IF(push_inst(compiler, ADDI_D | RD(SLJIT_S0 - saved_arg_count) | RJ(tmp) | IMM_I12(0)));
962*22dc650dSSadaf Ebrahimi 				saved_arg_count++;
963*22dc650dSSadaf Ebrahimi 			}
964*22dc650dSSadaf Ebrahimi 			tmp++;
965*22dc650dSSadaf Ebrahimi 		}
966*22dc650dSSadaf Ebrahimi 
967*22dc650dSSadaf Ebrahimi 		arg_types >>= SLJIT_ARG_SHIFT;
968*22dc650dSSadaf Ebrahimi 	}
969*22dc650dSSadaf Ebrahimi 
970*22dc650dSSadaf Ebrahimi 	return SLJIT_SUCCESS;
971*22dc650dSSadaf Ebrahimi }
972*22dc650dSSadaf Ebrahimi 
973*22dc650dSSadaf Ebrahimi #undef STACK_MAX_DISTANCE
974*22dc650dSSadaf Ebrahimi 
sljit_set_context(struct sljit_compiler * compiler,sljit_s32 options,sljit_s32 arg_types,sljit_s32 scratches,sljit_s32 saveds,sljit_s32 fscratches,sljit_s32 fsaveds,sljit_s32 local_size)975*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler,
976*22dc650dSSadaf Ebrahimi 	sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
977*22dc650dSSadaf Ebrahimi 	sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
978*22dc650dSSadaf Ebrahimi {
979*22dc650dSSadaf Ebrahimi 	CHECK_ERROR();
980*22dc650dSSadaf Ebrahimi 	CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
981*22dc650dSSadaf Ebrahimi 	set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
982*22dc650dSSadaf Ebrahimi 
983*22dc650dSSadaf Ebrahimi 	local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds - SLJIT_KEPT_SAVEDS_COUNT(options), 1);
984*22dc650dSSadaf Ebrahimi 	local_size += GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, f64);
985*22dc650dSSadaf Ebrahimi 
986*22dc650dSSadaf Ebrahimi 	compiler->local_size = (local_size + SLJIT_LOCALS_OFFSET + 15) & ~0xf;
987*22dc650dSSadaf Ebrahimi 
988*22dc650dSSadaf Ebrahimi 	return SLJIT_SUCCESS;
989*22dc650dSSadaf Ebrahimi }
990*22dc650dSSadaf Ebrahimi 
991*22dc650dSSadaf Ebrahimi #define STACK_MAX_DISTANCE (-I12_MIN - 16)
992*22dc650dSSadaf Ebrahimi 
emit_stack_frame_release(struct sljit_compiler * compiler,sljit_s32 is_return_to)993*22dc650dSSadaf Ebrahimi static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit_s32 is_return_to)
994*22dc650dSSadaf Ebrahimi {
995*22dc650dSSadaf Ebrahimi 	sljit_s32 i, tmp, offset;
996*22dc650dSSadaf Ebrahimi 	sljit_s32 local_size = compiler->local_size;
997*22dc650dSSadaf Ebrahimi 
998*22dc650dSSadaf Ebrahimi 	if (local_size > STACK_MAX_DISTANCE) {
999*22dc650dSSadaf Ebrahimi 		local_size -= STACK_MAX_DISTANCE;
1000*22dc650dSSadaf Ebrahimi 
1001*22dc650dSSadaf Ebrahimi 		if (local_size > STACK_MAX_DISTANCE) {
1002*22dc650dSSadaf Ebrahimi 			FAIL_IF(load_immediate(compiler, TMP_REG2, local_size));
1003*22dc650dSSadaf Ebrahimi 			FAIL_IF(push_inst(compiler, ADD_D | RD(SLJIT_SP) | RJ(SLJIT_SP) | RK(TMP_REG2)));
1004*22dc650dSSadaf Ebrahimi 		} else
1005*22dc650dSSadaf Ebrahimi 			FAIL_IF(push_inst(compiler, ADDI_D | RD(SLJIT_SP) | RJ(SLJIT_SP) | IMM_I12(local_size)));
1006*22dc650dSSadaf Ebrahimi 
1007*22dc650dSSadaf Ebrahimi 		local_size = STACK_MAX_DISTANCE;
1008*22dc650dSSadaf Ebrahimi 	}
1009*22dc650dSSadaf Ebrahimi 
1010*22dc650dSSadaf Ebrahimi 	SLJIT_ASSERT(local_size > 0);
1011*22dc650dSSadaf Ebrahimi 
1012*22dc650dSSadaf Ebrahimi 	offset = local_size - SSIZE_OF(sw);
1013*22dc650dSSadaf Ebrahimi 	if (!is_return_to)
1014*22dc650dSSadaf Ebrahimi 		FAIL_IF(push_inst(compiler, STACK_LOAD | RD(RETURN_ADDR_REG) | RJ(SLJIT_SP) | IMM_I12(offset)));
1015*22dc650dSSadaf Ebrahimi 
1016*22dc650dSSadaf Ebrahimi 	tmp = SLJIT_S0 - compiler->saveds;
1017*22dc650dSSadaf Ebrahimi 	for (i = SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options); i > tmp; i--) {
1018*22dc650dSSadaf Ebrahimi 		offset -= SSIZE_OF(sw);
1019*22dc650dSSadaf Ebrahimi 		FAIL_IF(push_inst(compiler, STACK_LOAD | RD(i) | RJ(SLJIT_SP) | IMM_I12(offset)));
1020*22dc650dSSadaf Ebrahimi 	}
1021*22dc650dSSadaf Ebrahimi 
1022*22dc650dSSadaf Ebrahimi 	for (i = compiler->scratches; i >= SLJIT_FIRST_SAVED_REG; i--) {
1023*22dc650dSSadaf Ebrahimi 		offset -= SSIZE_OF(sw);
1024*22dc650dSSadaf Ebrahimi 		FAIL_IF(push_inst(compiler, STACK_LOAD | RD(i) | RJ(SLJIT_SP) | IMM_I12(offset)));
1025*22dc650dSSadaf Ebrahimi 	}
1026*22dc650dSSadaf Ebrahimi 
1027*22dc650dSSadaf Ebrahimi 	tmp = SLJIT_FS0 - compiler->fsaveds;
1028*22dc650dSSadaf Ebrahimi 	for (i = SLJIT_FS0; i > tmp; i--) {
1029*22dc650dSSadaf Ebrahimi 		offset -= SSIZE_OF(f64);
1030*22dc650dSSadaf Ebrahimi 		FAIL_IF(push_inst(compiler, FLD_D | FRD(i) | RJ(SLJIT_SP) | IMM_I12(offset)));
1031*22dc650dSSadaf Ebrahimi 	}
1032*22dc650dSSadaf Ebrahimi 
1033*22dc650dSSadaf Ebrahimi 	for (i = compiler->fscratches; i >= SLJIT_FIRST_SAVED_FLOAT_REG; i--) {
1034*22dc650dSSadaf Ebrahimi 		offset -= SSIZE_OF(f64);
1035*22dc650dSSadaf Ebrahimi 		FAIL_IF(push_inst(compiler, FLD_D | FRD(i) | RJ(SLJIT_SP) | IMM_I12(offset)));
1036*22dc650dSSadaf Ebrahimi 	}
1037*22dc650dSSadaf Ebrahimi 
1038*22dc650dSSadaf Ebrahimi 	return push_inst(compiler, ADDI_D | RD(SLJIT_SP) | RJ(SLJIT_SP) | IMM_I12(local_size));
1039*22dc650dSSadaf Ebrahimi }
1040*22dc650dSSadaf Ebrahimi 
1041*22dc650dSSadaf Ebrahimi #undef STACK_MAX_DISTANCE
1042*22dc650dSSadaf Ebrahimi 
sljit_emit_return_void(struct sljit_compiler * compiler)1043*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler *compiler)
1044*22dc650dSSadaf Ebrahimi {
1045*22dc650dSSadaf Ebrahimi 	CHECK_ERROR();
1046*22dc650dSSadaf Ebrahimi 	CHECK(check_sljit_emit_return_void(compiler));
1047*22dc650dSSadaf Ebrahimi 
1048*22dc650dSSadaf Ebrahimi 	FAIL_IF(emit_stack_frame_release(compiler, 0));
1049*22dc650dSSadaf Ebrahimi 	return push_inst(compiler, JIRL | RD(TMP_ZERO) | RJ(RETURN_ADDR_REG) | IMM_I12(0));
1050*22dc650dSSadaf Ebrahimi }
1051*22dc650dSSadaf Ebrahimi 
sljit_emit_return_to(struct sljit_compiler * compiler,sljit_s32 src,sljit_sw srcw)1052*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_to(struct sljit_compiler *compiler,
1053*22dc650dSSadaf Ebrahimi 	sljit_s32 src, sljit_sw srcw)
1054*22dc650dSSadaf Ebrahimi {
1055*22dc650dSSadaf Ebrahimi 	CHECK_ERROR();
1056*22dc650dSSadaf Ebrahimi 	CHECK(check_sljit_emit_return_to(compiler, src, srcw));
1057*22dc650dSSadaf Ebrahimi 
1058*22dc650dSSadaf Ebrahimi 	if (src & SLJIT_MEM) {
1059*22dc650dSSadaf Ebrahimi 		ADJUST_LOCAL_OFFSET(src, srcw);
1060*22dc650dSSadaf Ebrahimi 		FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_REG1, src, srcw));
1061*22dc650dSSadaf Ebrahimi 		src = TMP_REG1;
1062*22dc650dSSadaf Ebrahimi 		srcw = 0;
1063*22dc650dSSadaf Ebrahimi 	} else if (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options))) {
1064*22dc650dSSadaf Ebrahimi 		FAIL_IF(push_inst(compiler, ADDI_D | RD(TMP_REG1) | RJ(src) | IMM_I12(0)));
1065*22dc650dSSadaf Ebrahimi 		src = TMP_REG1;
1066*22dc650dSSadaf Ebrahimi 		srcw = 0;
1067*22dc650dSSadaf Ebrahimi 	}
1068*22dc650dSSadaf Ebrahimi 
1069*22dc650dSSadaf Ebrahimi 	FAIL_IF(emit_stack_frame_release(compiler, 1));
1070*22dc650dSSadaf Ebrahimi 
1071*22dc650dSSadaf Ebrahimi 	SLJIT_SKIP_CHECKS(compiler);
1072*22dc650dSSadaf Ebrahimi 	return sljit_emit_ijump(compiler, SLJIT_JUMP, src, srcw);
1073*22dc650dSSadaf Ebrahimi }
1074*22dc650dSSadaf Ebrahimi 
1075*22dc650dSSadaf Ebrahimi /* --------------------------------------------------------------------- */
1076*22dc650dSSadaf Ebrahimi /*  Operators                                                            */
1077*22dc650dSSadaf Ebrahimi /* --------------------------------------------------------------------- */
1078*22dc650dSSadaf Ebrahimi 
1079*22dc650dSSadaf Ebrahimi static const sljit_ins data_transfer_insts[16 + 4] = {
1080*22dc650dSSadaf Ebrahimi /* u w s */ ST_D /* st.d */,
1081*22dc650dSSadaf Ebrahimi /* u w l */ LD_D /* ld.d */,
1082*22dc650dSSadaf Ebrahimi /* u b s */ ST_B /* st.b */,
1083*22dc650dSSadaf Ebrahimi /* u b l */ LD_BU /* ld.bu */,
1084*22dc650dSSadaf Ebrahimi /* u h s */ ST_H /* st.h */,
1085*22dc650dSSadaf Ebrahimi /* u h l */ LD_HU /* ld.hu */,
1086*22dc650dSSadaf Ebrahimi /* u i s */ ST_W /* st.w */,
1087*22dc650dSSadaf Ebrahimi /* u i l */ LD_WU /* ld.wu */,
1088*22dc650dSSadaf Ebrahimi 
1089*22dc650dSSadaf Ebrahimi /* s w s */ ST_D /* st.d */,
1090*22dc650dSSadaf Ebrahimi /* s w l */ LD_D /* ld.d */,
1091*22dc650dSSadaf Ebrahimi /* s b s */ ST_B /* st.b */,
1092*22dc650dSSadaf Ebrahimi /* s b l */ LD_B /* ld.b */,
1093*22dc650dSSadaf Ebrahimi /* s h s */ ST_H /* st.h */,
1094*22dc650dSSadaf Ebrahimi /* s h l */ LD_H /* ld.h */,
1095*22dc650dSSadaf Ebrahimi /* s i s */ ST_W /* st.w */,
1096*22dc650dSSadaf Ebrahimi /* s i l */ LD_W /* ld.w */,
1097*22dc650dSSadaf Ebrahimi 
1098*22dc650dSSadaf Ebrahimi /* d   s */ FST_D /* fst.d */,
1099*22dc650dSSadaf Ebrahimi /* d   l */ FLD_D /* fld.d */,
1100*22dc650dSSadaf Ebrahimi /* s   s */ FST_S /* fst.s */,
1101*22dc650dSSadaf Ebrahimi /* s   l */ FLD_S /* fld.s */,
1102*22dc650dSSadaf Ebrahimi };
1103*22dc650dSSadaf Ebrahimi 
1104*22dc650dSSadaf Ebrahimi static const sljit_ins data_transfer_insts_x[16 + 4] = {
1105*22dc650dSSadaf Ebrahimi /* u w s */ STX_D /* stx.d */,
1106*22dc650dSSadaf Ebrahimi /* u w l */ LDX_D /* ldx.d */,
1107*22dc650dSSadaf Ebrahimi /* u b s */ STX_B /* stx.b */,
1108*22dc650dSSadaf Ebrahimi /* u b l */ LDX_BU /* ldx.bu */,
1109*22dc650dSSadaf Ebrahimi /* u h s */ STX_H /* stx.h */,
1110*22dc650dSSadaf Ebrahimi /* u h l */ LDX_HU /* ldx.hu */,
1111*22dc650dSSadaf Ebrahimi /* u i s */ STX_W /* stx.w */,
1112*22dc650dSSadaf Ebrahimi /* u i l */ LDX_WU /* ldx.wu */,
1113*22dc650dSSadaf Ebrahimi 
1114*22dc650dSSadaf Ebrahimi /* s w s */ STX_D /* stx.d */,
1115*22dc650dSSadaf Ebrahimi /* s w l */ LDX_D /* ldx.d */,
1116*22dc650dSSadaf Ebrahimi /* s b s */ STX_B /* stx.b */,
1117*22dc650dSSadaf Ebrahimi /* s b l */ LDX_B /* ldx.b */,
1118*22dc650dSSadaf Ebrahimi /* s h s */ STX_H /* stx.h */,
1119*22dc650dSSadaf Ebrahimi /* s h l */ LDX_H /* ldx.h */,
1120*22dc650dSSadaf Ebrahimi /* s i s */ STX_W /* stx.w */,
1121*22dc650dSSadaf Ebrahimi /* s i l */ LDX_W /* ldx.w */,
1122*22dc650dSSadaf Ebrahimi 
1123*22dc650dSSadaf Ebrahimi /* d   s */ FSTX_D /* fstx.d */,
1124*22dc650dSSadaf Ebrahimi /* d   l */ FLDX_D /* fldx.d */,
1125*22dc650dSSadaf Ebrahimi /* s   s */ FSTX_S /* fstx.s */,
1126*22dc650dSSadaf Ebrahimi /* s   l */ FLDX_S /* fldx.s */,
1127*22dc650dSSadaf Ebrahimi };
1128*22dc650dSSadaf Ebrahimi 
push_mem_inst(struct sljit_compiler * compiler,sljit_s32 flags,sljit_s32 reg,sljit_s32 arg,sljit_sw argw)1129*22dc650dSSadaf Ebrahimi static sljit_s32 push_mem_inst(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw)
1130*22dc650dSSadaf Ebrahimi {
1131*22dc650dSSadaf Ebrahimi 	sljit_ins ins;
1132*22dc650dSSadaf Ebrahimi 	sljit_s32 base = arg & REG_MASK;
1133*22dc650dSSadaf Ebrahimi 
1134*22dc650dSSadaf Ebrahimi 	SLJIT_ASSERT(arg & SLJIT_MEM);
1135*22dc650dSSadaf Ebrahimi 
1136*22dc650dSSadaf Ebrahimi 	if (arg & OFFS_REG_MASK) {
1137*22dc650dSSadaf Ebrahimi 		sljit_s32 offs = OFFS_REG(arg);
1138*22dc650dSSadaf Ebrahimi 
1139*22dc650dSSadaf Ebrahimi 		SLJIT_ASSERT(!argw);
1140*22dc650dSSadaf Ebrahimi 		ins = data_transfer_insts_x[flags & MEM_MASK] |
1141*22dc650dSSadaf Ebrahimi 			  ((flags & MEM_MASK) <= GPR_REG ? RD(reg) : FRD(reg)) |
1142*22dc650dSSadaf Ebrahimi 			  RJ(base) | RK(offs);
1143*22dc650dSSadaf Ebrahimi 	} else {
1144*22dc650dSSadaf Ebrahimi 		SLJIT_ASSERT(argw <= 0xfff && argw >= I12_MIN);
1145*22dc650dSSadaf Ebrahimi 
1146*22dc650dSSadaf Ebrahimi 		ins = data_transfer_insts[flags & MEM_MASK] |
1147*22dc650dSSadaf Ebrahimi 			  ((flags & MEM_MASK) <= GPR_REG ? RD(reg) : FRD(reg)) |
1148*22dc650dSSadaf Ebrahimi 			  RJ(base) | IMM_I12(argw);
1149*22dc650dSSadaf Ebrahimi 	}
1150*22dc650dSSadaf Ebrahimi 	return push_inst(compiler, ins);
1151*22dc650dSSadaf Ebrahimi }
1152*22dc650dSSadaf Ebrahimi 
1153*22dc650dSSadaf Ebrahimi /* Can perform an operation using at most 1 instruction. */
getput_arg_fast(struct sljit_compiler * compiler,sljit_s32 flags,sljit_s32 reg,sljit_s32 arg,sljit_sw argw)1154*22dc650dSSadaf Ebrahimi static sljit_s32 getput_arg_fast(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw)
1155*22dc650dSSadaf Ebrahimi {
1156*22dc650dSSadaf Ebrahimi 	SLJIT_ASSERT(arg & SLJIT_MEM);
1157*22dc650dSSadaf Ebrahimi 
1158*22dc650dSSadaf Ebrahimi 	/* argw == 0 (ldx/stx rd, rj, rk) can be used.
1159*22dc650dSSadaf Ebrahimi 	 * argw in [-2048, 2047] (ld/st rd, rj, imm) can be used. */
1160*22dc650dSSadaf Ebrahimi 	if (!argw || (!(arg & OFFS_REG_MASK) && (argw <= I12_MAX && argw >= I12_MIN))) {
1161*22dc650dSSadaf Ebrahimi 		/* Works for both absolute and relative addresses. */
1162*22dc650dSSadaf Ebrahimi 		if (SLJIT_UNLIKELY(flags & ARG_TEST))
1163*22dc650dSSadaf Ebrahimi 			return 1;
1164*22dc650dSSadaf Ebrahimi 
1165*22dc650dSSadaf Ebrahimi 		FAIL_IF(push_mem_inst(compiler, flags, reg, arg, argw));
1166*22dc650dSSadaf Ebrahimi 		return -1;
1167*22dc650dSSadaf Ebrahimi 	}
1168*22dc650dSSadaf Ebrahimi 	return 0;
1169*22dc650dSSadaf Ebrahimi }
1170*22dc650dSSadaf Ebrahimi 
1171*22dc650dSSadaf Ebrahimi #define TO_ARGW_HI(argw) (((argw) & ~0xfff) + (((argw) & 0x800) ? 0x1000 : 0))
1172*22dc650dSSadaf Ebrahimi 
1173*22dc650dSSadaf Ebrahimi /* See getput_arg below.
1174*22dc650dSSadaf Ebrahimi    Note: can_cache is called only for binary operators. */
can_cache(sljit_s32 arg,sljit_sw argw,sljit_s32 next_arg,sljit_sw next_argw)1175*22dc650dSSadaf Ebrahimi static sljit_s32 can_cache(sljit_s32 arg, sljit_sw argw, sljit_s32 next_arg, sljit_sw next_argw)
1176*22dc650dSSadaf Ebrahimi {
1177*22dc650dSSadaf Ebrahimi 	SLJIT_ASSERT((arg & SLJIT_MEM) && (next_arg & SLJIT_MEM));
1178*22dc650dSSadaf Ebrahimi 
1179*22dc650dSSadaf Ebrahimi 	if (arg & OFFS_REG_MASK)
1180*22dc650dSSadaf Ebrahimi 		return 0;
1181*22dc650dSSadaf Ebrahimi 
1182*22dc650dSSadaf Ebrahimi 	if (arg == next_arg) {
1183*22dc650dSSadaf Ebrahimi 		if (((next_argw - argw) <= I12_MAX && (next_argw - argw) >= I12_MIN)
1184*22dc650dSSadaf Ebrahimi 				|| TO_ARGW_HI(argw) == TO_ARGW_HI(next_argw))
1185*22dc650dSSadaf Ebrahimi 			return 1;
1186*22dc650dSSadaf Ebrahimi 		return 0;
1187*22dc650dSSadaf Ebrahimi 	}
1188*22dc650dSSadaf Ebrahimi 
1189*22dc650dSSadaf Ebrahimi 	return 0;
1190*22dc650dSSadaf Ebrahimi }
1191*22dc650dSSadaf Ebrahimi 
1192*22dc650dSSadaf Ebrahimi /* Emit the necessary instructions. See can_cache above. */
getput_arg(struct sljit_compiler * compiler,sljit_s32 flags,sljit_s32 reg,sljit_s32 arg,sljit_sw argw,sljit_s32 next_arg,sljit_sw next_argw)1193*22dc650dSSadaf Ebrahimi static sljit_s32 getput_arg(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw, sljit_s32 next_arg, sljit_sw next_argw)
1194*22dc650dSSadaf Ebrahimi {
1195*22dc650dSSadaf Ebrahimi 	sljit_s32 base = arg & REG_MASK;
1196*22dc650dSSadaf Ebrahimi 	sljit_s32 tmp_r = (flags & MEM_USE_TMP2) ? TMP_REG2 : TMP_REG1;
1197*22dc650dSSadaf Ebrahimi 	sljit_sw offset;
1198*22dc650dSSadaf Ebrahimi 
1199*22dc650dSSadaf Ebrahimi 	SLJIT_ASSERT(arg & SLJIT_MEM);
1200*22dc650dSSadaf Ebrahimi 	if (!(next_arg & SLJIT_MEM)) {
1201*22dc650dSSadaf Ebrahimi 		next_arg = 0;
1202*22dc650dSSadaf Ebrahimi 		next_argw = 0;
1203*22dc650dSSadaf Ebrahimi 	}
1204*22dc650dSSadaf Ebrahimi 
1205*22dc650dSSadaf Ebrahimi 	if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) {
1206*22dc650dSSadaf Ebrahimi 		argw &= 0x3;
1207*22dc650dSSadaf Ebrahimi 
1208*22dc650dSSadaf Ebrahimi 		if (SLJIT_UNLIKELY(argw))
1209*22dc650dSSadaf Ebrahimi 			FAIL_IF(push_inst(compiler, SLLI_D | RD(TMP_REG3) | RJ(OFFS_REG(arg)) | IMM_I12(argw)));
1210*22dc650dSSadaf Ebrahimi 		return push_mem_inst(compiler, flags, reg, SLJIT_MEM2(base, TMP_REG3), 0);
1211*22dc650dSSadaf Ebrahimi 	}
1212*22dc650dSSadaf Ebrahimi 
1213*22dc650dSSadaf Ebrahimi 	if (compiler->cache_arg == arg && argw - compiler->cache_argw <= I12_MAX && argw - compiler->cache_argw >= I12_MIN)
1214*22dc650dSSadaf Ebrahimi 		return push_mem_inst(compiler, flags, reg, SLJIT_MEM1(TMP_REG3), argw - compiler->cache_argw);
1215*22dc650dSSadaf Ebrahimi 
1216*22dc650dSSadaf Ebrahimi 	if (compiler->cache_arg == SLJIT_MEM && (argw - compiler->cache_argw <= I12_MAX) && (argw - compiler->cache_argw >= I12_MIN)) {
1217*22dc650dSSadaf Ebrahimi 		offset = argw - compiler->cache_argw;
1218*22dc650dSSadaf Ebrahimi 	} else {
1219*22dc650dSSadaf Ebrahimi 		sljit_sw argw_hi=TO_ARGW_HI(argw);
1220*22dc650dSSadaf Ebrahimi 		compiler->cache_arg = SLJIT_MEM;
1221*22dc650dSSadaf Ebrahimi 
1222*22dc650dSSadaf Ebrahimi 		if (next_arg && next_argw - argw <= I12_MAX && next_argw - argw >= I12_MIN && argw_hi != TO_ARGW_HI(next_argw)) {
1223*22dc650dSSadaf Ebrahimi 			FAIL_IF(load_immediate(compiler, TMP_REG3, argw));
1224*22dc650dSSadaf Ebrahimi 			compiler->cache_argw = argw;
1225*22dc650dSSadaf Ebrahimi 			offset = 0;
1226*22dc650dSSadaf Ebrahimi 		} else {
1227*22dc650dSSadaf Ebrahimi 			FAIL_IF(load_immediate(compiler, TMP_REG3, argw_hi));
1228*22dc650dSSadaf Ebrahimi 			compiler->cache_argw = argw_hi;
1229*22dc650dSSadaf Ebrahimi 			offset = argw & 0xfff;
1230*22dc650dSSadaf Ebrahimi 			argw = argw_hi;
1231*22dc650dSSadaf Ebrahimi 		}
1232*22dc650dSSadaf Ebrahimi 	}
1233*22dc650dSSadaf Ebrahimi 
1234*22dc650dSSadaf Ebrahimi 	if (!base)
1235*22dc650dSSadaf Ebrahimi 		return push_mem_inst(compiler, flags, reg, SLJIT_MEM1(TMP_REG3), offset);
1236*22dc650dSSadaf Ebrahimi 
1237*22dc650dSSadaf Ebrahimi 	if (arg == next_arg && next_argw - argw <= I12_MAX && next_argw - argw >= I12_MIN) {
1238*22dc650dSSadaf Ebrahimi 		compiler->cache_arg = arg;
1239*22dc650dSSadaf Ebrahimi 		FAIL_IF(push_inst(compiler, ADD_D | RD(TMP_REG3) | RJ(TMP_REG3) | RK(base)));
1240*22dc650dSSadaf Ebrahimi 		return push_mem_inst(compiler, flags, reg, SLJIT_MEM1(TMP_REG3), offset);
1241*22dc650dSSadaf Ebrahimi 	}
1242*22dc650dSSadaf Ebrahimi 
1243*22dc650dSSadaf Ebrahimi 	if (!offset)
1244*22dc650dSSadaf Ebrahimi 		return push_mem_inst(compiler, flags, reg, SLJIT_MEM2(base, TMP_REG3), 0);
1245*22dc650dSSadaf Ebrahimi 
1246*22dc650dSSadaf Ebrahimi 	FAIL_IF(push_inst(compiler, ADD_D | RD(tmp_r) | RJ(TMP_REG3) | RK(base)));
1247*22dc650dSSadaf Ebrahimi 	return push_mem_inst(compiler, flags, reg, SLJIT_MEM1(tmp_r), offset);
1248*22dc650dSSadaf Ebrahimi }
1249*22dc650dSSadaf Ebrahimi 
emit_op_mem(struct sljit_compiler * compiler,sljit_s32 flags,sljit_s32 reg,sljit_s32 arg,sljit_sw argw)1250*22dc650dSSadaf Ebrahimi static sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw)
1251*22dc650dSSadaf Ebrahimi {
1252*22dc650dSSadaf Ebrahimi 	sljit_s32 base = arg & REG_MASK;
1253*22dc650dSSadaf Ebrahimi 	sljit_s32 tmp_r = TMP_REG1;
1254*22dc650dSSadaf Ebrahimi 
1255*22dc650dSSadaf Ebrahimi 	if (getput_arg_fast(compiler, flags, reg, arg, argw))
1256*22dc650dSSadaf Ebrahimi 		return compiler->error;
1257*22dc650dSSadaf Ebrahimi 
1258*22dc650dSSadaf Ebrahimi 	if ((flags & MEM_MASK) <= GPR_REG && (flags & LOAD_DATA))
1259*22dc650dSSadaf Ebrahimi 		tmp_r = reg;
1260*22dc650dSSadaf Ebrahimi 
1261*22dc650dSSadaf Ebrahimi 	if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) {
1262*22dc650dSSadaf Ebrahimi 		argw &= 0x3;
1263*22dc650dSSadaf Ebrahimi 
1264*22dc650dSSadaf Ebrahimi 		if (SLJIT_UNLIKELY(argw))
1265*22dc650dSSadaf Ebrahimi 			FAIL_IF(push_inst(compiler, SLLI_D | RD(tmp_r) | RJ(OFFS_REG(arg)) | IMM_I12(argw)));
1266*22dc650dSSadaf Ebrahimi 		return push_mem_inst(compiler, flags, reg, SLJIT_MEM2(base, tmp_r), 0);
1267*22dc650dSSadaf Ebrahimi 	} else {
1268*22dc650dSSadaf Ebrahimi 		FAIL_IF(load_immediate(compiler, tmp_r, argw));
1269*22dc650dSSadaf Ebrahimi 
1270*22dc650dSSadaf Ebrahimi 		if (base != 0)
1271*22dc650dSSadaf Ebrahimi 			return push_mem_inst(compiler, flags, reg, SLJIT_MEM2(base, tmp_r), 0);
1272*22dc650dSSadaf Ebrahimi 		return push_mem_inst(compiler, flags, reg, SLJIT_MEM1(tmp_r), 0);
1273*22dc650dSSadaf Ebrahimi 	}
1274*22dc650dSSadaf Ebrahimi }
1275*22dc650dSSadaf Ebrahimi 
emit_op_mem2(struct sljit_compiler * compiler,sljit_s32 flags,sljit_s32 reg,sljit_s32 arg1,sljit_sw arg1w,sljit_s32 arg2,sljit_sw arg2w)1276*22dc650dSSadaf Ebrahimi static SLJIT_INLINE sljit_s32 emit_op_mem2(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg1, sljit_sw arg1w, sljit_s32 arg2, sljit_sw arg2w)
1277*22dc650dSSadaf Ebrahimi {
1278*22dc650dSSadaf Ebrahimi 	if (getput_arg_fast(compiler, flags, reg, arg1, arg1w))
1279*22dc650dSSadaf Ebrahimi 		return compiler->error;
1280*22dc650dSSadaf Ebrahimi 	return getput_arg(compiler, flags, reg, arg1, arg1w, arg2, arg2w);
1281*22dc650dSSadaf Ebrahimi }
1282*22dc650dSSadaf Ebrahimi 
1283*22dc650dSSadaf Ebrahimi #define IMM_EXTEND(v) (IMM_I12((op & SLJIT_32) ? (v) : (32 + (v))))
1284*22dc650dSSadaf Ebrahimi 
1285*22dc650dSSadaf Ebrahimi /* andi/ori/xori are zero-extended */
1286*22dc650dSSadaf Ebrahimi #define EMIT_LOGICAL(op_imm, op_reg) \
1287*22dc650dSSadaf Ebrahimi 	if (flags & SRC2_IMM) { \
1288*22dc650dSSadaf Ebrahimi 		if (op & SLJIT_SET_Z) {\
1289*22dc650dSSadaf Ebrahimi 			FAIL_IF(push_inst(compiler, ADDI_D | RD(EQUAL_FLAG) | RJ(TMP_ZERO) | IMM_I12(src2))); \
1290*22dc650dSSadaf Ebrahimi 			FAIL_IF(push_inst(compiler, op_reg | RD(EQUAL_FLAG) | RJ(src1) | RK(EQUAL_FLAG))); \
1291*22dc650dSSadaf Ebrahimi 		} \
1292*22dc650dSSadaf Ebrahimi 		if (!(flags & UNUSED_DEST)) { \
1293*22dc650dSSadaf Ebrahimi 			if (dst == src1) { \
1294*22dc650dSSadaf Ebrahimi 				FAIL_IF(push_inst(compiler, ADDI_D | RD(TMP_REG1) | RJ(TMP_ZERO) | IMM_I12(src2))); \
1295*22dc650dSSadaf Ebrahimi 				FAIL_IF(push_inst(compiler, op_reg | RD(dst) | RJ(src1) | RK(TMP_REG1))); \
1296*22dc650dSSadaf Ebrahimi 			} else { \
1297*22dc650dSSadaf Ebrahimi 				FAIL_IF(push_inst(compiler, ADDI_D | RD(dst) | RJ(TMP_ZERO) | IMM_I12(src2))); \
1298*22dc650dSSadaf Ebrahimi 				FAIL_IF(push_inst(compiler, op_reg | RD(dst) | RJ(src1) | RK(dst))); \
1299*22dc650dSSadaf Ebrahimi 			} \
1300*22dc650dSSadaf Ebrahimi 		} \
1301*22dc650dSSadaf Ebrahimi 	} else { \
1302*22dc650dSSadaf Ebrahimi 		if (op & SLJIT_SET_Z) \
1303*22dc650dSSadaf Ebrahimi 			FAIL_IF(push_inst(compiler, op_reg | RD(EQUAL_FLAG) | RJ(src1) | RK(src2))); \
1304*22dc650dSSadaf Ebrahimi 		if (!(flags & UNUSED_DEST)) \
1305*22dc650dSSadaf Ebrahimi 			FAIL_IF(push_inst(compiler, op_reg | RD(dst) | RJ(src1) | RK(src2))); \
1306*22dc650dSSadaf Ebrahimi 	} \
1307*22dc650dSSadaf Ebrahimi 	while (0)
1308*22dc650dSSadaf Ebrahimi 
1309*22dc650dSSadaf Ebrahimi #define EMIT_SHIFT(imm, reg) \
1310*22dc650dSSadaf Ebrahimi 	op_imm = (imm); \
1311*22dc650dSSadaf Ebrahimi 	op_reg = (reg)
1312*22dc650dSSadaf Ebrahimi 
emit_single_op(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 flags,sljit_s32 dst,sljit_s32 src1,sljit_sw src2)1313*22dc650dSSadaf Ebrahimi static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 flags,
1314*22dc650dSSadaf Ebrahimi 	sljit_s32 dst, sljit_s32 src1, sljit_sw src2)
1315*22dc650dSSadaf Ebrahimi {
1316*22dc650dSSadaf Ebrahimi 	sljit_s32 is_overflow, is_carry, carry_src_r, is_handled, reg;
1317*22dc650dSSadaf Ebrahimi 	sljit_ins op_imm, op_reg;
1318*22dc650dSSadaf Ebrahimi 	sljit_ins word_size = ((op & SLJIT_32) ? 32 : 64);
1319*22dc650dSSadaf Ebrahimi 
1320*22dc650dSSadaf Ebrahimi 	switch (GET_OPCODE(op)) {
1321*22dc650dSSadaf Ebrahimi 	case SLJIT_MOV:
1322*22dc650dSSadaf Ebrahimi 		SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM));
1323*22dc650dSSadaf Ebrahimi 		if (dst != src2)
1324*22dc650dSSadaf Ebrahimi 			return push_inst(compiler, INST(ADD, op) | RD(dst) | RJ(src2) | IMM_I12(0));
1325*22dc650dSSadaf Ebrahimi 		return SLJIT_SUCCESS;
1326*22dc650dSSadaf Ebrahimi 
1327*22dc650dSSadaf Ebrahimi 	case SLJIT_MOV_U8:
1328*22dc650dSSadaf Ebrahimi 		SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM));
1329*22dc650dSSadaf Ebrahimi 		if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE))
1330*22dc650dSSadaf Ebrahimi 			return push_inst(compiler, ANDI | RD(dst) | RJ(src2) | IMM_I12(0xff));
1331*22dc650dSSadaf Ebrahimi 		SLJIT_ASSERT(dst == src2);
1332*22dc650dSSadaf Ebrahimi 		return SLJIT_SUCCESS;
1333*22dc650dSSadaf Ebrahimi 
1334*22dc650dSSadaf Ebrahimi 	case SLJIT_MOV_S8:
1335*22dc650dSSadaf Ebrahimi 		SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM));
1336*22dc650dSSadaf Ebrahimi 		if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE))
1337*22dc650dSSadaf Ebrahimi 			return push_inst(compiler, EXT_W_B | RD(dst) | RJ(src2));
1338*22dc650dSSadaf Ebrahimi 		SLJIT_ASSERT(dst == src2);
1339*22dc650dSSadaf Ebrahimi 		return SLJIT_SUCCESS;
1340*22dc650dSSadaf Ebrahimi 
1341*22dc650dSSadaf Ebrahimi 	case SLJIT_MOV_U16:
1342*22dc650dSSadaf Ebrahimi 		SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM));
1343*22dc650dSSadaf Ebrahimi 		if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE))
1344*22dc650dSSadaf Ebrahimi 			return push_inst(compiler, INST(BSTRPICK, op) | RD(dst) | RJ(src2) | (15 << 16));
1345*22dc650dSSadaf Ebrahimi 		SLJIT_ASSERT(dst == src2);
1346*22dc650dSSadaf Ebrahimi 		return SLJIT_SUCCESS;
1347*22dc650dSSadaf Ebrahimi 
1348*22dc650dSSadaf Ebrahimi 	case SLJIT_MOV_S16:
1349*22dc650dSSadaf Ebrahimi 		SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM));
1350*22dc650dSSadaf Ebrahimi 		if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE))
1351*22dc650dSSadaf Ebrahimi 			return push_inst(compiler, EXT_W_H | RD(dst) | RJ(src2));
1352*22dc650dSSadaf Ebrahimi 		SLJIT_ASSERT(dst == src2);
1353*22dc650dSSadaf Ebrahimi 		return SLJIT_SUCCESS;
1354*22dc650dSSadaf Ebrahimi 
1355*22dc650dSSadaf Ebrahimi 	case SLJIT_MOV_U32:
1356*22dc650dSSadaf Ebrahimi 		SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM));
1357*22dc650dSSadaf Ebrahimi 		if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE))
1358*22dc650dSSadaf Ebrahimi 			return push_inst(compiler, BSTRPICK_D | RD(dst) | RJ(src2) | (31 << 16));
1359*22dc650dSSadaf Ebrahimi 		SLJIT_ASSERT(dst == src2);
1360*22dc650dSSadaf Ebrahimi 		return SLJIT_SUCCESS;
1361*22dc650dSSadaf Ebrahimi 
1362*22dc650dSSadaf Ebrahimi 	case SLJIT_MOV_S32:
1363*22dc650dSSadaf Ebrahimi 		SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM));
1364*22dc650dSSadaf Ebrahimi 		if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE))
1365*22dc650dSSadaf Ebrahimi 			return push_inst(compiler, SLLI_W | RD(dst) | RJ(src2) | IMM_I12(0));
1366*22dc650dSSadaf Ebrahimi 		SLJIT_ASSERT(dst == src2);
1367*22dc650dSSadaf Ebrahimi 		return SLJIT_SUCCESS;
1368*22dc650dSSadaf Ebrahimi 
1369*22dc650dSSadaf Ebrahimi 	case SLJIT_CLZ:
1370*22dc650dSSadaf Ebrahimi 		SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM));
1371*22dc650dSSadaf Ebrahimi 		return push_inst(compiler, INST(CLZ, op) | RD(dst) | RJ(src2));
1372*22dc650dSSadaf Ebrahimi 
1373*22dc650dSSadaf Ebrahimi 	case SLJIT_CTZ:
1374*22dc650dSSadaf Ebrahimi 		SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM));
1375*22dc650dSSadaf Ebrahimi 		return push_inst(compiler, INST(CTZ, op) | RD(dst) | RJ(src2));
1376*22dc650dSSadaf Ebrahimi 
1377*22dc650dSSadaf Ebrahimi 	case SLJIT_REV:
1378*22dc650dSSadaf Ebrahimi 		SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM));
1379*22dc650dSSadaf Ebrahimi 		return push_inst(compiler, ((op & SLJIT_32) ? REVB_2W : REVB_D) | RD(dst) | RJ(src2));
1380*22dc650dSSadaf Ebrahimi 
1381*22dc650dSSadaf Ebrahimi 	case SLJIT_REV_S16:
1382*22dc650dSSadaf Ebrahimi 		SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM));
1383*22dc650dSSadaf Ebrahimi 		FAIL_IF(push_inst(compiler, REVB_2H | RD(dst) | RJ(src2)));
1384*22dc650dSSadaf Ebrahimi 		return push_inst(compiler, EXT_W_H | RD(dst) | RJ(dst));
1385*22dc650dSSadaf Ebrahimi 
1386*22dc650dSSadaf Ebrahimi 	case SLJIT_REV_U16:
1387*22dc650dSSadaf Ebrahimi 		SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM));
1388*22dc650dSSadaf Ebrahimi 		FAIL_IF(push_inst(compiler, REVB_2H | RD(dst) | RJ(src2)));
1389*22dc650dSSadaf Ebrahimi 		return push_inst(compiler, INST(BSTRPICK, op) | RD(dst) | RJ(dst) | (15 << 16));
1390*22dc650dSSadaf Ebrahimi 
1391*22dc650dSSadaf Ebrahimi 	case SLJIT_REV_S32:
1392*22dc650dSSadaf Ebrahimi 		SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM) && dst != TMP_REG1);
1393*22dc650dSSadaf Ebrahimi 		FAIL_IF(push_inst(compiler, REVB_2W | RD(dst) | RJ(src2)));
1394*22dc650dSSadaf Ebrahimi 		return push_inst(compiler, SLLI_W | RD(dst) | RJ(dst) | IMM_I12(0));
1395*22dc650dSSadaf Ebrahimi 
1396*22dc650dSSadaf Ebrahimi 	case SLJIT_REV_U32:
1397*22dc650dSSadaf Ebrahimi 		SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM) && dst != TMP_REG1);
1398*22dc650dSSadaf Ebrahimi 		FAIL_IF(push_inst(compiler, REVB_2W | RD(dst) | RJ(src2)));
1399*22dc650dSSadaf Ebrahimi 		return push_inst(compiler, BSTRPICK_D | RD(dst) | RJ(dst) | (31 << 16));
1400*22dc650dSSadaf Ebrahimi 
1401*22dc650dSSadaf Ebrahimi 	case SLJIT_ADD:
1402*22dc650dSSadaf Ebrahimi 		/* Overflow computation (both add and sub): overflow = src1_sign ^ src2_sign ^ result_sign ^ carry_flag */
1403*22dc650dSSadaf Ebrahimi 		is_overflow = GET_FLAG_TYPE(op) == SLJIT_OVERFLOW;
1404*22dc650dSSadaf Ebrahimi 		carry_src_r = GET_FLAG_TYPE(op) == SLJIT_CARRY;
1405*22dc650dSSadaf Ebrahimi 
1406*22dc650dSSadaf Ebrahimi 		if (flags & SRC2_IMM) {
1407*22dc650dSSadaf Ebrahimi 			if (is_overflow) {
1408*22dc650dSSadaf Ebrahimi 				if (src2 >= 0)
1409*22dc650dSSadaf Ebrahimi 					FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(EQUAL_FLAG) | RJ(src1) | IMM_I12(0)));
1410*22dc650dSSadaf Ebrahimi 				else {
1411*22dc650dSSadaf Ebrahimi 					FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(EQUAL_FLAG) | RJ(TMP_ZERO) | IMM_I12(-1)));
1412*22dc650dSSadaf Ebrahimi 					FAIL_IF(push_inst(compiler, XOR | RD(EQUAL_FLAG) | RJ(src1) | RK(EQUAL_FLAG)));
1413*22dc650dSSadaf Ebrahimi 				}
1414*22dc650dSSadaf Ebrahimi 			} else if (op & SLJIT_SET_Z)
1415*22dc650dSSadaf Ebrahimi 				FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(EQUAL_FLAG) | RJ(src1) | IMM_I12(src2)));
1416*22dc650dSSadaf Ebrahimi 
1417*22dc650dSSadaf Ebrahimi 			/* Only the zero flag is needed. */
1418*22dc650dSSadaf Ebrahimi 			if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK))
1419*22dc650dSSadaf Ebrahimi 				FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(dst) | RJ(src1) | IMM_I12(src2)));
1420*22dc650dSSadaf Ebrahimi 		} else {
1421*22dc650dSSadaf Ebrahimi 			if (is_overflow)
1422*22dc650dSSadaf Ebrahimi 				FAIL_IF(push_inst(compiler, XOR | RD(EQUAL_FLAG) | RJ(src1) | RK(src2)));
1423*22dc650dSSadaf Ebrahimi 			else if (op & SLJIT_SET_Z)
1424*22dc650dSSadaf Ebrahimi 				FAIL_IF(push_inst(compiler, INST(ADD, op) | RD(EQUAL_FLAG) | RJ(src1) | RK(src2)));
1425*22dc650dSSadaf Ebrahimi 
1426*22dc650dSSadaf Ebrahimi 			if (is_overflow || carry_src_r != 0) {
1427*22dc650dSSadaf Ebrahimi 				if (src1 != dst)
1428*22dc650dSSadaf Ebrahimi 					carry_src_r = (sljit_s32)src1;
1429*22dc650dSSadaf Ebrahimi 				else if (src2 != dst)
1430*22dc650dSSadaf Ebrahimi 					carry_src_r = (sljit_s32)src2;
1431*22dc650dSSadaf Ebrahimi 				else {
1432*22dc650dSSadaf Ebrahimi 					FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(OTHER_FLAG) | RJ(src1) | IMM_I12(0)));
1433*22dc650dSSadaf Ebrahimi 					carry_src_r = OTHER_FLAG;
1434*22dc650dSSadaf Ebrahimi 				}
1435*22dc650dSSadaf Ebrahimi 			}
1436*22dc650dSSadaf Ebrahimi 
1437*22dc650dSSadaf Ebrahimi 			/* Only the zero flag is needed. */
1438*22dc650dSSadaf Ebrahimi 			if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK))
1439*22dc650dSSadaf Ebrahimi 				FAIL_IF(push_inst(compiler, INST(ADD, op) | RD(dst) | RJ(src1) | RK(src2)));
1440*22dc650dSSadaf Ebrahimi 		}
1441*22dc650dSSadaf Ebrahimi 
1442*22dc650dSSadaf Ebrahimi 		/* Carry is zero if a + b >= a or a + b >= b, otherwise it is 1. */
1443*22dc650dSSadaf Ebrahimi 		if (is_overflow || carry_src_r != 0) {
1444*22dc650dSSadaf Ebrahimi 			if (flags & SRC2_IMM)
1445*22dc650dSSadaf Ebrahimi 				FAIL_IF(push_inst(compiler, SLTUI | RD(OTHER_FLAG) | RJ(dst) | IMM_I12(src2)));
1446*22dc650dSSadaf Ebrahimi 			else
1447*22dc650dSSadaf Ebrahimi 				FAIL_IF(push_inst(compiler, SLTU | RD(OTHER_FLAG) | RJ(dst) | RK(carry_src_r)));
1448*22dc650dSSadaf Ebrahimi 		}
1449*22dc650dSSadaf Ebrahimi 
1450*22dc650dSSadaf Ebrahimi 		if (!is_overflow)
1451*22dc650dSSadaf Ebrahimi 			return SLJIT_SUCCESS;
1452*22dc650dSSadaf Ebrahimi 
1453*22dc650dSSadaf Ebrahimi 		FAIL_IF(push_inst(compiler, XOR | RD(TMP_REG1) | RJ(dst) | RK(EQUAL_FLAG)));
1454*22dc650dSSadaf Ebrahimi 		if (op & SLJIT_SET_Z)
1455*22dc650dSSadaf Ebrahimi 			FAIL_IF(push_inst(compiler, INST(ADD, op) | RD(EQUAL_FLAG) | RJ(dst) | IMM_I12(0)));
1456*22dc650dSSadaf Ebrahimi 		FAIL_IF(push_inst(compiler, INST(SRLI, op) | RD(TMP_REG1) | RJ(TMP_REG1) | IMM_EXTEND(31)));
1457*22dc650dSSadaf Ebrahimi 		return push_inst(compiler, XOR | RD(OTHER_FLAG) | RJ(TMP_REG1) | RK(OTHER_FLAG));
1458*22dc650dSSadaf Ebrahimi 
1459*22dc650dSSadaf Ebrahimi 	case SLJIT_ADDC:
1460*22dc650dSSadaf Ebrahimi 		carry_src_r = GET_FLAG_TYPE(op) == SLJIT_CARRY;
1461*22dc650dSSadaf Ebrahimi 
1462*22dc650dSSadaf Ebrahimi 		if (flags & SRC2_IMM) {
1463*22dc650dSSadaf Ebrahimi 			FAIL_IF(push_inst(compiler, ADDI_D | RD(dst) | RJ(src1) | IMM_I12(src2)));
1464*22dc650dSSadaf Ebrahimi 		} else {
1465*22dc650dSSadaf Ebrahimi 			if (carry_src_r != 0) {
1466*22dc650dSSadaf Ebrahimi 				if (src1 != dst)
1467*22dc650dSSadaf Ebrahimi 					carry_src_r = (sljit_s32)src1;
1468*22dc650dSSadaf Ebrahimi 				else if (src2 != dst)
1469*22dc650dSSadaf Ebrahimi 					carry_src_r = (sljit_s32)src2;
1470*22dc650dSSadaf Ebrahimi 				else {
1471*22dc650dSSadaf Ebrahimi 					FAIL_IF(push_inst(compiler, ADDI_D | RD(EQUAL_FLAG) | RJ(src1) | IMM_I12(0)));
1472*22dc650dSSadaf Ebrahimi 					carry_src_r = EQUAL_FLAG;
1473*22dc650dSSadaf Ebrahimi 				}
1474*22dc650dSSadaf Ebrahimi 			}
1475*22dc650dSSadaf Ebrahimi 
1476*22dc650dSSadaf Ebrahimi 			FAIL_IF(push_inst(compiler, ADD_D | RD(dst) | RJ(src1) | RK(src2)));
1477*22dc650dSSadaf Ebrahimi 		}
1478*22dc650dSSadaf Ebrahimi 
1479*22dc650dSSadaf Ebrahimi 		/* Carry is zero if a + b >= a or a + b >= b, otherwise it is 1. */
1480*22dc650dSSadaf Ebrahimi 		if (carry_src_r != 0) {
1481*22dc650dSSadaf Ebrahimi 			if (flags & SRC2_IMM)
1482*22dc650dSSadaf Ebrahimi 				FAIL_IF(push_inst(compiler, SLTUI | RD(EQUAL_FLAG) | RJ(dst) | IMM_I12(src2)));
1483*22dc650dSSadaf Ebrahimi 			else
1484*22dc650dSSadaf Ebrahimi 				FAIL_IF(push_inst(compiler, SLTU | RD(EQUAL_FLAG) | RJ(dst) | RK(carry_src_r)));
1485*22dc650dSSadaf Ebrahimi 		}
1486*22dc650dSSadaf Ebrahimi 
1487*22dc650dSSadaf Ebrahimi 		FAIL_IF(push_inst(compiler, ADD_D | RD(dst) | RJ(dst) | RK(OTHER_FLAG)));
1488*22dc650dSSadaf Ebrahimi 
1489*22dc650dSSadaf Ebrahimi 		if (carry_src_r == 0)
1490*22dc650dSSadaf Ebrahimi 			return SLJIT_SUCCESS;
1491*22dc650dSSadaf Ebrahimi 
1492*22dc650dSSadaf Ebrahimi 		/* Set ULESS_FLAG (dst == 0) && (OTHER_FLAG == 1). */
1493*22dc650dSSadaf Ebrahimi 		FAIL_IF(push_inst(compiler, SLTU | RD(OTHER_FLAG) | RJ(dst) | RK(OTHER_FLAG)));
1494*22dc650dSSadaf Ebrahimi 		/* Set carry flag. */
1495*22dc650dSSadaf Ebrahimi 		return push_inst(compiler, OR | RD(OTHER_FLAG) | RJ(OTHER_FLAG) | RK(EQUAL_FLAG));
1496*22dc650dSSadaf Ebrahimi 
1497*22dc650dSSadaf Ebrahimi 	case SLJIT_SUB:
1498*22dc650dSSadaf Ebrahimi 		if ((flags & SRC2_IMM) && src2 == I12_MIN) {
1499*22dc650dSSadaf Ebrahimi 			FAIL_IF(push_inst(compiler, ADDI_D | RD(TMP_REG2) | RJ(TMP_ZERO) | IMM_I12(src2)));
1500*22dc650dSSadaf Ebrahimi 			src2 = TMP_REG2;
1501*22dc650dSSadaf Ebrahimi 			flags &= ~SRC2_IMM;
1502*22dc650dSSadaf Ebrahimi 		}
1503*22dc650dSSadaf Ebrahimi 
1504*22dc650dSSadaf Ebrahimi 		is_handled = 0;
1505*22dc650dSSadaf Ebrahimi 
1506*22dc650dSSadaf Ebrahimi 		if (flags & SRC2_IMM) {
1507*22dc650dSSadaf Ebrahimi 			if (GET_FLAG_TYPE(op) == SLJIT_LESS) {
1508*22dc650dSSadaf Ebrahimi 				FAIL_IF(push_inst(compiler, SLTUI | RD(OTHER_FLAG) | RJ(src1) | IMM_I12(src2)));
1509*22dc650dSSadaf Ebrahimi 				is_handled = 1;
1510*22dc650dSSadaf Ebrahimi 			} else if (GET_FLAG_TYPE(op) == SLJIT_SIG_LESS) {
1511*22dc650dSSadaf Ebrahimi 				FAIL_IF(push_inst(compiler, SLTI | RD(OTHER_FLAG) | RJ(src1) | IMM_I12(src2)));
1512*22dc650dSSadaf Ebrahimi 				is_handled = 1;
1513*22dc650dSSadaf Ebrahimi 			}
1514*22dc650dSSadaf Ebrahimi 		}
1515*22dc650dSSadaf Ebrahimi 
1516*22dc650dSSadaf Ebrahimi 		if (!is_handled && GET_FLAG_TYPE(op) >= SLJIT_LESS && GET_FLAG_TYPE(op) <= SLJIT_SIG_LESS_EQUAL) {
1517*22dc650dSSadaf Ebrahimi 			is_handled = 1;
1518*22dc650dSSadaf Ebrahimi 
1519*22dc650dSSadaf Ebrahimi 			if (flags & SRC2_IMM) {
1520*22dc650dSSadaf Ebrahimi 				reg = (src1 == TMP_REG1) ? TMP_REG2 : TMP_REG1;
1521*22dc650dSSadaf Ebrahimi 				FAIL_IF(push_inst(compiler, ADDI_D | RD(reg) | RJ(TMP_ZERO) | IMM_I12(src2)));
1522*22dc650dSSadaf Ebrahimi 				src2 = reg;
1523*22dc650dSSadaf Ebrahimi 				flags &= ~SRC2_IMM;
1524*22dc650dSSadaf Ebrahimi 			}
1525*22dc650dSSadaf Ebrahimi 
1526*22dc650dSSadaf Ebrahimi 			switch (GET_FLAG_TYPE(op)) {
1527*22dc650dSSadaf Ebrahimi 			case SLJIT_LESS:
1528*22dc650dSSadaf Ebrahimi 				FAIL_IF(push_inst(compiler, SLTU | RD(OTHER_FLAG) | RJ(src1) | RK(src2)));
1529*22dc650dSSadaf Ebrahimi 				break;
1530*22dc650dSSadaf Ebrahimi 			case SLJIT_GREATER:
1531*22dc650dSSadaf Ebrahimi 				FAIL_IF(push_inst(compiler, SLTU | RD(OTHER_FLAG) | RJ(src2) | RK(src1)));
1532*22dc650dSSadaf Ebrahimi 				break;
1533*22dc650dSSadaf Ebrahimi 			case SLJIT_SIG_LESS:
1534*22dc650dSSadaf Ebrahimi 				FAIL_IF(push_inst(compiler, SLT | RD(OTHER_FLAG) | RJ(src1) | RK(src2)));
1535*22dc650dSSadaf Ebrahimi 				break;
1536*22dc650dSSadaf Ebrahimi 			case SLJIT_SIG_GREATER:
1537*22dc650dSSadaf Ebrahimi 				FAIL_IF(push_inst(compiler, SLT | RD(OTHER_FLAG) | RJ(src2) | RK(src1)));
1538*22dc650dSSadaf Ebrahimi 				break;
1539*22dc650dSSadaf Ebrahimi 			}
1540*22dc650dSSadaf Ebrahimi 		}
1541*22dc650dSSadaf Ebrahimi 
1542*22dc650dSSadaf Ebrahimi 		if (is_handled) {
1543*22dc650dSSadaf Ebrahimi 			if (flags & SRC2_IMM) {
1544*22dc650dSSadaf Ebrahimi 				if (op & SLJIT_SET_Z)
1545*22dc650dSSadaf Ebrahimi 					FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(EQUAL_FLAG) | RJ(src1) | IMM_I12(-src2)));
1546*22dc650dSSadaf Ebrahimi 				if (!(flags & UNUSED_DEST))
1547*22dc650dSSadaf Ebrahimi 					return push_inst(compiler, INST(ADDI, op) | RD(dst) | RJ(src1) | IMM_I12(-src2));
1548*22dc650dSSadaf Ebrahimi 			} else {
1549*22dc650dSSadaf Ebrahimi 				if (op & SLJIT_SET_Z)
1550*22dc650dSSadaf Ebrahimi 					FAIL_IF(push_inst(compiler, INST(SUB, op) | RD(EQUAL_FLAG) | RJ(src1) | RK(src2)));
1551*22dc650dSSadaf Ebrahimi 				if (!(flags & UNUSED_DEST))
1552*22dc650dSSadaf Ebrahimi 					return push_inst(compiler, INST(SUB, op) | RD(dst) | RJ(src1) | RK(src2));
1553*22dc650dSSadaf Ebrahimi 			}
1554*22dc650dSSadaf Ebrahimi 			return SLJIT_SUCCESS;
1555*22dc650dSSadaf Ebrahimi 		}
1556*22dc650dSSadaf Ebrahimi 
1557*22dc650dSSadaf Ebrahimi 		is_overflow = GET_FLAG_TYPE(op) == SLJIT_OVERFLOW;
1558*22dc650dSSadaf Ebrahimi 		is_carry = GET_FLAG_TYPE(op) == SLJIT_CARRY;
1559*22dc650dSSadaf Ebrahimi 
1560*22dc650dSSadaf Ebrahimi 		if (flags & SRC2_IMM) {
1561*22dc650dSSadaf Ebrahimi 			if (is_overflow) {
1562*22dc650dSSadaf Ebrahimi 				if (src2 >= 0)
1563*22dc650dSSadaf Ebrahimi 					FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(EQUAL_FLAG) | RJ(src1) | IMM_I12(0)));
1564*22dc650dSSadaf Ebrahimi 				else {
1565*22dc650dSSadaf Ebrahimi 					FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(EQUAL_FLAG) | RJ(src1) | IMM_I12(-1)));
1566*22dc650dSSadaf Ebrahimi 					FAIL_IF(push_inst(compiler, XOR | RD(EQUAL_FLAG) | RJ(src1) | RK(EQUAL_FLAG)));
1567*22dc650dSSadaf Ebrahimi 				}
1568*22dc650dSSadaf Ebrahimi 			} else if (op & SLJIT_SET_Z)
1569*22dc650dSSadaf Ebrahimi 				FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(EQUAL_FLAG) | RJ(src1) | IMM_I12(-src2)));
1570*22dc650dSSadaf Ebrahimi 
1571*22dc650dSSadaf Ebrahimi 			if (is_overflow || is_carry)
1572*22dc650dSSadaf Ebrahimi 				FAIL_IF(push_inst(compiler, SLTUI | RD(OTHER_FLAG) | RJ(src1) | IMM_I12(src2)));
1573*22dc650dSSadaf Ebrahimi 
1574*22dc650dSSadaf Ebrahimi 			/* Only the zero flag is needed. */
1575*22dc650dSSadaf Ebrahimi 			if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK))
1576*22dc650dSSadaf Ebrahimi 				FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(dst) | RJ(src1) | IMM_I12(-src2)));
1577*22dc650dSSadaf Ebrahimi 		} else {
1578*22dc650dSSadaf Ebrahimi 			if (is_overflow)
1579*22dc650dSSadaf Ebrahimi 				FAIL_IF(push_inst(compiler, XOR | RD(EQUAL_FLAG) | RJ(src1) | RK(src2)));
1580*22dc650dSSadaf Ebrahimi 			else if (op & SLJIT_SET_Z)
1581*22dc650dSSadaf Ebrahimi 				FAIL_IF(push_inst(compiler, INST(SUB, op) | RD(EQUAL_FLAG) | RJ(src1) | RK(src2)));
1582*22dc650dSSadaf Ebrahimi 
1583*22dc650dSSadaf Ebrahimi 			if (is_overflow || is_carry)
1584*22dc650dSSadaf Ebrahimi 				FAIL_IF(push_inst(compiler, SLTU | RD(OTHER_FLAG) | RJ(src1) | RK(src2)));
1585*22dc650dSSadaf Ebrahimi 
1586*22dc650dSSadaf Ebrahimi 			/* Only the zero flag is needed. */
1587*22dc650dSSadaf Ebrahimi 			if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK))
1588*22dc650dSSadaf Ebrahimi 				FAIL_IF(push_inst(compiler, INST(SUB, op) | RD(dst) | RJ(src1) | RK(src2)));
1589*22dc650dSSadaf Ebrahimi 		}
1590*22dc650dSSadaf Ebrahimi 
1591*22dc650dSSadaf Ebrahimi 		if (!is_overflow)
1592*22dc650dSSadaf Ebrahimi 			return SLJIT_SUCCESS;
1593*22dc650dSSadaf Ebrahimi 
1594*22dc650dSSadaf Ebrahimi 		FAIL_IF(push_inst(compiler, XOR | RD(TMP_REG1) | RJ(dst) | RK(EQUAL_FLAG)));
1595*22dc650dSSadaf Ebrahimi 		if (op & SLJIT_SET_Z)
1596*22dc650dSSadaf Ebrahimi 			FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(EQUAL_FLAG) | RJ(dst) | IMM_I12(0)));
1597*22dc650dSSadaf Ebrahimi 		FAIL_IF(push_inst(compiler, INST(SRLI, op) | RD(TMP_REG1) | RJ(TMP_REG1) | IMM_EXTEND(31)));
1598*22dc650dSSadaf Ebrahimi 		return push_inst(compiler, XOR | RD(OTHER_FLAG) | RJ(TMP_REG1) | RK(OTHER_FLAG));
1599*22dc650dSSadaf Ebrahimi 
1600*22dc650dSSadaf Ebrahimi 	case SLJIT_SUBC:
1601*22dc650dSSadaf Ebrahimi 		if ((flags & SRC2_IMM) && src2 == I12_MIN) {
1602*22dc650dSSadaf Ebrahimi 			FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(TMP_REG2) | RJ(TMP_ZERO) | IMM_I12(src2)));
1603*22dc650dSSadaf Ebrahimi 			src2 = TMP_REG2;
1604*22dc650dSSadaf Ebrahimi 			flags &= ~SRC2_IMM;
1605*22dc650dSSadaf Ebrahimi 		}
1606*22dc650dSSadaf Ebrahimi 
1607*22dc650dSSadaf Ebrahimi 		is_carry = GET_FLAG_TYPE(op) == SLJIT_CARRY;
1608*22dc650dSSadaf Ebrahimi 
1609*22dc650dSSadaf Ebrahimi 		if (flags & SRC2_IMM) {
1610*22dc650dSSadaf Ebrahimi 			if (is_carry)
1611*22dc650dSSadaf Ebrahimi 				FAIL_IF(push_inst(compiler, SLTUI | RD(EQUAL_FLAG) | RJ(src1) | IMM_I12(src2)));
1612*22dc650dSSadaf Ebrahimi 
1613*22dc650dSSadaf Ebrahimi 			FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(dst) | RJ(src1) | IMM_I12(-src2)));
1614*22dc650dSSadaf Ebrahimi 		} else {
1615*22dc650dSSadaf Ebrahimi 			if (is_carry)
1616*22dc650dSSadaf Ebrahimi 				FAIL_IF(push_inst(compiler, SLTU | RD(EQUAL_FLAG) | RJ(src1) | RK(src2)));
1617*22dc650dSSadaf Ebrahimi 
1618*22dc650dSSadaf Ebrahimi 			FAIL_IF(push_inst(compiler, INST(SUB, op) | RD(dst) | RJ(src1) | RK(src2)));
1619*22dc650dSSadaf Ebrahimi 		}
1620*22dc650dSSadaf Ebrahimi 
1621*22dc650dSSadaf Ebrahimi 		if (is_carry)
1622*22dc650dSSadaf Ebrahimi 			FAIL_IF(push_inst(compiler, SLTU | RD(TMP_REG1) | RJ(dst) | RK(OTHER_FLAG)));
1623*22dc650dSSadaf Ebrahimi 
1624*22dc650dSSadaf Ebrahimi 		FAIL_IF(push_inst(compiler, INST(SUB, op) | RD(dst) | RJ(dst) | RK(OTHER_FLAG)));
1625*22dc650dSSadaf Ebrahimi 
1626*22dc650dSSadaf Ebrahimi 		if (!is_carry)
1627*22dc650dSSadaf Ebrahimi 			return SLJIT_SUCCESS;
1628*22dc650dSSadaf Ebrahimi 
1629*22dc650dSSadaf Ebrahimi 		return push_inst(compiler, OR | RD(OTHER_FLAG) | RJ(EQUAL_FLAG) | RK(TMP_REG1));
1630*22dc650dSSadaf Ebrahimi 
1631*22dc650dSSadaf Ebrahimi 	case SLJIT_MUL:
1632*22dc650dSSadaf Ebrahimi 		SLJIT_ASSERT(!(flags & SRC2_IMM));
1633*22dc650dSSadaf Ebrahimi 
1634*22dc650dSSadaf Ebrahimi 		if (GET_FLAG_TYPE(op) != SLJIT_OVERFLOW)
1635*22dc650dSSadaf Ebrahimi 			return push_inst(compiler, INST(MUL, op) | RD(dst) | RJ(src1) | RK(src2));
1636*22dc650dSSadaf Ebrahimi 
1637*22dc650dSSadaf Ebrahimi 		if (op & SLJIT_32) {
1638*22dc650dSSadaf Ebrahimi 			FAIL_IF(push_inst(compiler, MUL_D | RD(OTHER_FLAG) | RJ(src1) | RK(src2)));
1639*22dc650dSSadaf Ebrahimi 			FAIL_IF(push_inst(compiler, MUL_W | RD(dst) | RJ(src1) | RK(src2)));
1640*22dc650dSSadaf Ebrahimi 			return push_inst(compiler, SUB_D | RD(OTHER_FLAG) | RJ(dst) | RK(OTHER_FLAG));
1641*22dc650dSSadaf Ebrahimi 		}
1642*22dc650dSSadaf Ebrahimi 
1643*22dc650dSSadaf Ebrahimi 		FAIL_IF(push_inst(compiler, MULH_D | RD(EQUAL_FLAG) | RJ(src1) | RK(src2)));
1644*22dc650dSSadaf Ebrahimi 		FAIL_IF(push_inst(compiler, MUL_D | RD(dst) | RJ(src1) | RK(src2)));
1645*22dc650dSSadaf Ebrahimi 		FAIL_IF(push_inst(compiler, SRAI_D | RD(OTHER_FLAG) | RJ(dst) | IMM_I12((63))));
1646*22dc650dSSadaf Ebrahimi 		return push_inst(compiler, SUB_D | RD(OTHER_FLAG) | RJ(EQUAL_FLAG) | RK(OTHER_FLAG));
1647*22dc650dSSadaf Ebrahimi 
1648*22dc650dSSadaf Ebrahimi 	case SLJIT_AND:
1649*22dc650dSSadaf Ebrahimi 		EMIT_LOGICAL(ANDI, AND);
1650*22dc650dSSadaf Ebrahimi 		return SLJIT_SUCCESS;
1651*22dc650dSSadaf Ebrahimi 
1652*22dc650dSSadaf Ebrahimi 	case SLJIT_OR:
1653*22dc650dSSadaf Ebrahimi 		EMIT_LOGICAL(ORI, OR);
1654*22dc650dSSadaf Ebrahimi 		return SLJIT_SUCCESS;
1655*22dc650dSSadaf Ebrahimi 
1656*22dc650dSSadaf Ebrahimi 	case SLJIT_XOR:
1657*22dc650dSSadaf Ebrahimi 		EMIT_LOGICAL(XORI, XOR);
1658*22dc650dSSadaf Ebrahimi 		return SLJIT_SUCCESS;
1659*22dc650dSSadaf Ebrahimi 
1660*22dc650dSSadaf Ebrahimi 	case SLJIT_SHL:
1661*22dc650dSSadaf Ebrahimi 	case SLJIT_MSHL:
1662*22dc650dSSadaf Ebrahimi 		if (op & SLJIT_32) {
1663*22dc650dSSadaf Ebrahimi 			EMIT_SHIFT(SLLI_W, SLL_W);
1664*22dc650dSSadaf Ebrahimi 		} else {
1665*22dc650dSSadaf Ebrahimi 			EMIT_SHIFT(SLLI_D, SLL_D);
1666*22dc650dSSadaf Ebrahimi 		}
1667*22dc650dSSadaf Ebrahimi 		break;
1668*22dc650dSSadaf Ebrahimi 
1669*22dc650dSSadaf Ebrahimi 	case SLJIT_LSHR:
1670*22dc650dSSadaf Ebrahimi 	case SLJIT_MLSHR:
1671*22dc650dSSadaf Ebrahimi 		if (op & SLJIT_32) {
1672*22dc650dSSadaf Ebrahimi 			EMIT_SHIFT(SRLI_W, SRL_W);
1673*22dc650dSSadaf Ebrahimi 		} else {
1674*22dc650dSSadaf Ebrahimi 			EMIT_SHIFT(SRLI_D, SRL_D);
1675*22dc650dSSadaf Ebrahimi 		}
1676*22dc650dSSadaf Ebrahimi 		break;
1677*22dc650dSSadaf Ebrahimi 
1678*22dc650dSSadaf Ebrahimi 	case SLJIT_ASHR:
1679*22dc650dSSadaf Ebrahimi 	case SLJIT_MASHR:
1680*22dc650dSSadaf Ebrahimi 		if (op & SLJIT_32) {
1681*22dc650dSSadaf Ebrahimi 			EMIT_SHIFT(SRAI_W, SRA_W);
1682*22dc650dSSadaf Ebrahimi 		} else {
1683*22dc650dSSadaf Ebrahimi 			EMIT_SHIFT(SRAI_D, SRA_D);
1684*22dc650dSSadaf Ebrahimi 		}
1685*22dc650dSSadaf Ebrahimi 		break;
1686*22dc650dSSadaf Ebrahimi 
1687*22dc650dSSadaf Ebrahimi 	case SLJIT_ROTL:
1688*22dc650dSSadaf Ebrahimi 	case SLJIT_ROTR:
1689*22dc650dSSadaf Ebrahimi 		if (flags & SRC2_IMM) {
1690*22dc650dSSadaf Ebrahimi 			SLJIT_ASSERT(src2 != 0);
1691*22dc650dSSadaf Ebrahimi 
1692*22dc650dSSadaf Ebrahimi 			if (GET_OPCODE(op) == SLJIT_ROTL)
1693*22dc650dSSadaf Ebrahimi 				src2 = word_size - src2;
1694*22dc650dSSadaf Ebrahimi 			return push_inst(compiler, INST(ROTRI, op) | RD(dst) | RJ(src1) | IMM_I12(src2));
1695*22dc650dSSadaf Ebrahimi 		}
1696*22dc650dSSadaf Ebrahimi 
1697*22dc650dSSadaf Ebrahimi 		if (src2 == TMP_ZERO) {
1698*22dc650dSSadaf Ebrahimi 			if (dst != src1)
1699*22dc650dSSadaf Ebrahimi 				return push_inst(compiler, INST(ADDI, op) | RD(dst) | RJ(src1) | IMM_I12(0));
1700*22dc650dSSadaf Ebrahimi 			return SLJIT_SUCCESS;
1701*22dc650dSSadaf Ebrahimi 		}
1702*22dc650dSSadaf Ebrahimi 
1703*22dc650dSSadaf Ebrahimi 		if (GET_OPCODE(op) == SLJIT_ROTL) {
1704*22dc650dSSadaf Ebrahimi 			FAIL_IF(push_inst(compiler, INST(SUB, op)| RD(OTHER_FLAG) | RJ(TMP_ZERO) | RK(src2)));
1705*22dc650dSSadaf Ebrahimi 			src2 = OTHER_FLAG;
1706*22dc650dSSadaf Ebrahimi 		}
1707*22dc650dSSadaf Ebrahimi 		return push_inst(compiler, INST(ROTR, op) | RD(dst) | RJ(src1) | RK(src2));
1708*22dc650dSSadaf Ebrahimi 
1709*22dc650dSSadaf Ebrahimi 	default:
1710*22dc650dSSadaf Ebrahimi 		SLJIT_UNREACHABLE();
1711*22dc650dSSadaf Ebrahimi 		return SLJIT_SUCCESS;
1712*22dc650dSSadaf Ebrahimi 	}
1713*22dc650dSSadaf Ebrahimi 
1714*22dc650dSSadaf Ebrahimi 	if (flags & SRC2_IMM) {
1715*22dc650dSSadaf Ebrahimi 		if (op & SLJIT_SET_Z)
1716*22dc650dSSadaf Ebrahimi 			FAIL_IF(push_inst(compiler, op_imm | RD(EQUAL_FLAG) | RJ(src1) | IMM_I12(src2)));
1717*22dc650dSSadaf Ebrahimi 
1718*22dc650dSSadaf Ebrahimi 		if (flags & UNUSED_DEST)
1719*22dc650dSSadaf Ebrahimi 			return SLJIT_SUCCESS;
1720*22dc650dSSadaf Ebrahimi 		return push_inst(compiler, op_imm | RD(dst) | RJ(src1) | IMM_I12(src2));
1721*22dc650dSSadaf Ebrahimi 	}
1722*22dc650dSSadaf Ebrahimi 
1723*22dc650dSSadaf Ebrahimi 	if (op & SLJIT_SET_Z)
1724*22dc650dSSadaf Ebrahimi 		FAIL_IF(push_inst(compiler, op_reg | RD(EQUAL_FLAG) | RJ(src1) | RK(src2)));
1725*22dc650dSSadaf Ebrahimi 
1726*22dc650dSSadaf Ebrahimi 	if (flags & UNUSED_DEST)
1727*22dc650dSSadaf Ebrahimi 		return SLJIT_SUCCESS;
1728*22dc650dSSadaf Ebrahimi 	return push_inst(compiler, op_reg | RD(dst) | RJ(src1) | RK(src2));
1729*22dc650dSSadaf Ebrahimi }
1730*22dc650dSSadaf Ebrahimi 
1731*22dc650dSSadaf Ebrahimi #undef IMM_EXTEND
1732*22dc650dSSadaf Ebrahimi 
emit_op(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 flags,sljit_s32 dst,sljit_sw dstw,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)1733*22dc650dSSadaf Ebrahimi static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 flags,
1734*22dc650dSSadaf Ebrahimi 	sljit_s32 dst, sljit_sw dstw,
1735*22dc650dSSadaf Ebrahimi 	sljit_s32 src1, sljit_sw src1w,
1736*22dc650dSSadaf Ebrahimi 	sljit_s32 src2, sljit_sw src2w)
1737*22dc650dSSadaf Ebrahimi {
1738*22dc650dSSadaf Ebrahimi 	/* arg1 goes to TMP_REG1 or src reg
1739*22dc650dSSadaf Ebrahimi 	   arg2 goes to TMP_REG2, imm or src reg
1740*22dc650dSSadaf Ebrahimi 	   TMP_REG3 can be used for caching
1741*22dc650dSSadaf Ebrahimi 	   result goes to TMP_REG2, so put result can use TMP_REG1 and TMP_REG3. */
1742*22dc650dSSadaf Ebrahimi 	sljit_s32 dst_r = TMP_REG2;
1743*22dc650dSSadaf Ebrahimi 	sljit_s32 src1_r;
1744*22dc650dSSadaf Ebrahimi 	sljit_sw src2_r = 0;
1745*22dc650dSSadaf Ebrahimi 	sljit_s32 src2_tmp_reg = (GET_OPCODE(op) >= SLJIT_OP2_BASE && FAST_IS_REG(src1)) ? TMP_REG1 : TMP_REG2;
1746*22dc650dSSadaf Ebrahimi 
1747*22dc650dSSadaf Ebrahimi 	if (!(flags & ALT_KEEP_CACHE)) {
1748*22dc650dSSadaf Ebrahimi 		compiler->cache_arg = 0;
1749*22dc650dSSadaf Ebrahimi 		compiler->cache_argw = 0;
1750*22dc650dSSadaf Ebrahimi 	}
1751*22dc650dSSadaf Ebrahimi 
1752*22dc650dSSadaf Ebrahimi 	if (dst == 0) {
1753*22dc650dSSadaf Ebrahimi 		SLJIT_ASSERT(HAS_FLAGS(op));
1754*22dc650dSSadaf Ebrahimi 		flags |= UNUSED_DEST;
1755*22dc650dSSadaf Ebrahimi 		dst = TMP_REG2;
1756*22dc650dSSadaf Ebrahimi 	} else if (FAST_IS_REG(dst)) {
1757*22dc650dSSadaf Ebrahimi 		dst_r = dst;
1758*22dc650dSSadaf Ebrahimi 		flags |= REG_DEST;
1759*22dc650dSSadaf Ebrahimi 		if (flags & MOVE_OP)
1760*22dc650dSSadaf Ebrahimi 			src2_tmp_reg = dst_r;
1761*22dc650dSSadaf Ebrahimi 	} else if ((dst & SLJIT_MEM) && !getput_arg_fast(compiler, flags | ARG_TEST, TMP_REG1, dst, dstw))
1762*22dc650dSSadaf Ebrahimi 		flags |= SLOW_DEST;
1763*22dc650dSSadaf Ebrahimi 
1764*22dc650dSSadaf Ebrahimi 	if (flags & IMM_OP) {
1765*22dc650dSSadaf Ebrahimi 		if (src2 == SLJIT_IMM && src2w != 0 && src2w <= I12_MAX && src2w >= I12_MIN) {
1766*22dc650dSSadaf Ebrahimi 			flags |= SRC2_IMM;
1767*22dc650dSSadaf Ebrahimi 			src2_r = src2w;
1768*22dc650dSSadaf Ebrahimi 		} else if ((flags & CUMULATIVE_OP) && src1 == SLJIT_IMM && src1w != 0 && src1w <= I12_MAX && src1w >= I12_MIN) {
1769*22dc650dSSadaf Ebrahimi 			flags |= SRC2_IMM;
1770*22dc650dSSadaf Ebrahimi 			src2_r = src1w;
1771*22dc650dSSadaf Ebrahimi 
1772*22dc650dSSadaf Ebrahimi 			/* And swap arguments. */
1773*22dc650dSSadaf Ebrahimi 			src1 = src2;
1774*22dc650dSSadaf Ebrahimi 			src1w = src2w;
1775*22dc650dSSadaf Ebrahimi 			src2 = SLJIT_IMM;
1776*22dc650dSSadaf Ebrahimi 			/* src2w = src2_r unneeded. */
1777*22dc650dSSadaf Ebrahimi 		}
1778*22dc650dSSadaf Ebrahimi 	}
1779*22dc650dSSadaf Ebrahimi 
1780*22dc650dSSadaf Ebrahimi 	/* Source 1. */
1781*22dc650dSSadaf Ebrahimi 	if (FAST_IS_REG(src1)) {
1782*22dc650dSSadaf Ebrahimi 		src1_r = src1;
1783*22dc650dSSadaf Ebrahimi 		flags |= REG1_SOURCE;
1784*22dc650dSSadaf Ebrahimi 	} else if (src1 == SLJIT_IMM) {
1785*22dc650dSSadaf Ebrahimi 		if (src1w) {
1786*22dc650dSSadaf Ebrahimi 			FAIL_IF(load_immediate(compiler, TMP_REG1, src1w));
1787*22dc650dSSadaf Ebrahimi 			src1_r = TMP_REG1;
1788*22dc650dSSadaf Ebrahimi 		}
1789*22dc650dSSadaf Ebrahimi 		else
1790*22dc650dSSadaf Ebrahimi 			src1_r = TMP_ZERO;
1791*22dc650dSSadaf Ebrahimi 	} else {
1792*22dc650dSSadaf Ebrahimi 		if (getput_arg_fast(compiler, flags | LOAD_DATA, TMP_REG1, src1, src1w))
1793*22dc650dSSadaf Ebrahimi 			FAIL_IF(compiler->error);
1794*22dc650dSSadaf Ebrahimi 		else
1795*22dc650dSSadaf Ebrahimi 			flags |= SLOW_SRC1;
1796*22dc650dSSadaf Ebrahimi 		src1_r = TMP_REG1;
1797*22dc650dSSadaf Ebrahimi 	}
1798*22dc650dSSadaf Ebrahimi 
1799*22dc650dSSadaf Ebrahimi 	/* Source 2. */
1800*22dc650dSSadaf Ebrahimi 	if (FAST_IS_REG(src2)) {
1801*22dc650dSSadaf Ebrahimi 		src2_r = src2;
1802*22dc650dSSadaf Ebrahimi 		flags |= REG2_SOURCE;
1803*22dc650dSSadaf Ebrahimi 		if ((flags & (REG_DEST | MOVE_OP)) == MOVE_OP)
1804*22dc650dSSadaf Ebrahimi 			dst_r = (sljit_s32)src2_r;
1805*22dc650dSSadaf Ebrahimi 	} else if (src2 == SLJIT_IMM) {
1806*22dc650dSSadaf Ebrahimi 		if (!(flags & SRC2_IMM)) {
1807*22dc650dSSadaf Ebrahimi 			if (src2w) {
1808*22dc650dSSadaf Ebrahimi 				FAIL_IF(load_immediate(compiler, src2_tmp_reg, src2w));
1809*22dc650dSSadaf Ebrahimi 				src2_r = src2_tmp_reg;
1810*22dc650dSSadaf Ebrahimi 			} else {
1811*22dc650dSSadaf Ebrahimi 				src2_r = TMP_ZERO;
1812*22dc650dSSadaf Ebrahimi 				if (flags & MOVE_OP) {
1813*22dc650dSSadaf Ebrahimi 					if (dst & SLJIT_MEM)
1814*22dc650dSSadaf Ebrahimi 						dst_r = 0;
1815*22dc650dSSadaf Ebrahimi 					else
1816*22dc650dSSadaf Ebrahimi 						op = SLJIT_MOV;
1817*22dc650dSSadaf Ebrahimi 				}
1818*22dc650dSSadaf Ebrahimi 			}
1819*22dc650dSSadaf Ebrahimi 		}
1820*22dc650dSSadaf Ebrahimi 	} else {
1821*22dc650dSSadaf Ebrahimi 		if (getput_arg_fast(compiler, flags | LOAD_DATA, src2_tmp_reg, src2, src2w))
1822*22dc650dSSadaf Ebrahimi 			FAIL_IF(compiler->error);
1823*22dc650dSSadaf Ebrahimi 		else
1824*22dc650dSSadaf Ebrahimi 			flags |= SLOW_SRC2;
1825*22dc650dSSadaf Ebrahimi 
1826*22dc650dSSadaf Ebrahimi 		src2_r = src2_tmp_reg;
1827*22dc650dSSadaf Ebrahimi 	}
1828*22dc650dSSadaf Ebrahimi 
1829*22dc650dSSadaf Ebrahimi 	if ((flags & (SLOW_SRC1 | SLOW_SRC2)) == (SLOW_SRC1 | SLOW_SRC2)) {
1830*22dc650dSSadaf Ebrahimi 		SLJIT_ASSERT(src2_r == TMP_REG2);
1831*22dc650dSSadaf Ebrahimi 		if ((flags & SLOW_DEST) && !can_cache(src2, src2w, src1, src1w) && can_cache(src2, src2w, dst, dstw)) {
1832*22dc650dSSadaf Ebrahimi 			FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG1, src1, src1w, src2, src2w));
1833*22dc650dSSadaf Ebrahimi 			FAIL_IF(getput_arg(compiler, flags | LOAD_DATA | MEM_USE_TMP2, TMP_REG2, src2, src2w, dst, dstw));
1834*22dc650dSSadaf Ebrahimi 		} else {
1835*22dc650dSSadaf Ebrahimi 			FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG2, src2, src2w, src1, src1w));
1836*22dc650dSSadaf Ebrahimi 			FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG1, src1, src1w, dst, dstw));
1837*22dc650dSSadaf Ebrahimi 		}
1838*22dc650dSSadaf Ebrahimi 	}
1839*22dc650dSSadaf Ebrahimi 	else if (flags & SLOW_SRC1)
1840*22dc650dSSadaf Ebrahimi 		FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG1, src1, src1w, dst, dstw));
1841*22dc650dSSadaf Ebrahimi 	else if (flags & SLOW_SRC2)
1842*22dc650dSSadaf Ebrahimi 		FAIL_IF(getput_arg(compiler, flags | LOAD_DATA | ((src1_r == TMP_REG1) ? MEM_USE_TMP2 : 0), src2_tmp_reg, src2, src2w, dst, dstw));
1843*22dc650dSSadaf Ebrahimi 
1844*22dc650dSSadaf Ebrahimi 	FAIL_IF(emit_single_op(compiler, op, flags, dst_r, src1_r, src2_r));
1845*22dc650dSSadaf Ebrahimi 
1846*22dc650dSSadaf Ebrahimi 	if (dst & SLJIT_MEM) {
1847*22dc650dSSadaf Ebrahimi 		if (!(flags & SLOW_DEST)) {
1848*22dc650dSSadaf Ebrahimi 			getput_arg_fast(compiler, flags, dst_r, dst, dstw);
1849*22dc650dSSadaf Ebrahimi 			return compiler->error;
1850*22dc650dSSadaf Ebrahimi 		}
1851*22dc650dSSadaf Ebrahimi 		return getput_arg(compiler, flags, dst_r, dst, dstw, 0, 0);
1852*22dc650dSSadaf Ebrahimi 	}
1853*22dc650dSSadaf Ebrahimi 
1854*22dc650dSSadaf Ebrahimi 	return SLJIT_SUCCESS;
1855*22dc650dSSadaf Ebrahimi }
1856*22dc650dSSadaf Ebrahimi 
sljit_emit_op0(struct sljit_compiler * compiler,sljit_s32 op)1857*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op)
1858*22dc650dSSadaf Ebrahimi {
1859*22dc650dSSadaf Ebrahimi 	CHECK_ERROR();
1860*22dc650dSSadaf Ebrahimi 	CHECK(check_sljit_emit_op0(compiler, op));
1861*22dc650dSSadaf Ebrahimi 
1862*22dc650dSSadaf Ebrahimi 	switch (GET_OPCODE(op)) {
1863*22dc650dSSadaf Ebrahimi 	case SLJIT_BREAKPOINT:
1864*22dc650dSSadaf Ebrahimi 		return push_inst(compiler, BREAK);
1865*22dc650dSSadaf Ebrahimi 	case SLJIT_NOP:
1866*22dc650dSSadaf Ebrahimi 		return push_inst(compiler, ANDI | RD(TMP_ZERO) | RJ(TMP_ZERO) | IMM_I12(0));
1867*22dc650dSSadaf Ebrahimi 	case SLJIT_LMUL_UW:
1868*22dc650dSSadaf Ebrahimi 		FAIL_IF(push_inst(compiler, ADDI_D | RD(TMP_REG1) | RJ(SLJIT_R1) | IMM_I12(0)));
1869*22dc650dSSadaf Ebrahimi 		FAIL_IF(push_inst(compiler, MULH_DU | RD(SLJIT_R1) | RJ(SLJIT_R0) | RK(SLJIT_R1)));
1870*22dc650dSSadaf Ebrahimi 		return push_inst(compiler, MUL_D | RD(SLJIT_R0) | RJ(SLJIT_R0) | RK(TMP_REG1));
1871*22dc650dSSadaf Ebrahimi 	case SLJIT_LMUL_SW:
1872*22dc650dSSadaf Ebrahimi 		FAIL_IF(push_inst(compiler, ADDI_D | RD(TMP_REG1) | RJ(SLJIT_R1) | IMM_I12(0)));
1873*22dc650dSSadaf Ebrahimi 		FAIL_IF(push_inst(compiler, MULH_D | RD(SLJIT_R1) | RJ(SLJIT_R0) | RK(SLJIT_R1)));
1874*22dc650dSSadaf Ebrahimi 		return push_inst(compiler, MUL_D | RD(SLJIT_R0) | RJ(SLJIT_R0) | RK(TMP_REG1));
1875*22dc650dSSadaf Ebrahimi 	case SLJIT_DIVMOD_UW:
1876*22dc650dSSadaf Ebrahimi 		FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(TMP_REG1) | RJ(SLJIT_R0) | IMM_I12(0)));
1877*22dc650dSSadaf Ebrahimi 		FAIL_IF(push_inst(compiler, ((op & SLJIT_32)? DIV_WU: DIV_DU) | RD(SLJIT_R0) | RJ(SLJIT_R0) | RK(SLJIT_R1)));
1878*22dc650dSSadaf Ebrahimi 		return push_inst(compiler, ((op & SLJIT_32)? MOD_WU: MOD_DU) | RD(SLJIT_R1) | RJ(TMP_REG1) | RK(SLJIT_R1));
1879*22dc650dSSadaf Ebrahimi 	case SLJIT_DIVMOD_SW:
1880*22dc650dSSadaf Ebrahimi 		FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(TMP_REG1) | RJ(SLJIT_R0) | IMM_I12(0)));
1881*22dc650dSSadaf Ebrahimi 		FAIL_IF(push_inst(compiler, INST(DIV, op) | RD(SLJIT_R0) | RJ(SLJIT_R0) | RK(SLJIT_R1)));
1882*22dc650dSSadaf Ebrahimi 		return push_inst(compiler, INST(MOD, op) | RD(SLJIT_R1) | RJ(TMP_REG1) | RK(SLJIT_R1));
1883*22dc650dSSadaf Ebrahimi 	case SLJIT_DIV_UW:
1884*22dc650dSSadaf Ebrahimi 		return push_inst(compiler, ((op & SLJIT_32)? DIV_WU: DIV_DU) | RD(SLJIT_R0) | RJ(SLJIT_R0) | RK(SLJIT_R1));
1885*22dc650dSSadaf Ebrahimi 	case SLJIT_DIV_SW:
1886*22dc650dSSadaf Ebrahimi 		return push_inst(compiler, INST(DIV, op) | RD(SLJIT_R0) | RJ(SLJIT_R0) | RK(SLJIT_R1));
1887*22dc650dSSadaf Ebrahimi 	case SLJIT_ENDBR:
1888*22dc650dSSadaf Ebrahimi 	case SLJIT_SKIP_FRAMES_BEFORE_RETURN:
1889*22dc650dSSadaf Ebrahimi 		return SLJIT_SUCCESS;
1890*22dc650dSSadaf Ebrahimi 	}
1891*22dc650dSSadaf Ebrahimi 
1892*22dc650dSSadaf Ebrahimi 	SLJIT_UNREACHABLE();
1893*22dc650dSSadaf Ebrahimi 	return SLJIT_ERR_UNSUPPORTED;
1894*22dc650dSSadaf Ebrahimi }
1895*22dc650dSSadaf Ebrahimi 
sljit_emit_op1(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)1896*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op,
1897*22dc650dSSadaf Ebrahimi 	sljit_s32 dst, sljit_sw dstw,
1898*22dc650dSSadaf Ebrahimi 	sljit_s32 src, sljit_sw srcw)
1899*22dc650dSSadaf Ebrahimi {
1900*22dc650dSSadaf Ebrahimi 	sljit_s32 flags = 0;
1901*22dc650dSSadaf Ebrahimi 
1902*22dc650dSSadaf Ebrahimi 	CHECK_ERROR();
1903*22dc650dSSadaf Ebrahimi 	CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw));
1904*22dc650dSSadaf Ebrahimi 	ADJUST_LOCAL_OFFSET(dst, dstw);
1905*22dc650dSSadaf Ebrahimi 	ADJUST_LOCAL_OFFSET(src, srcw);
1906*22dc650dSSadaf Ebrahimi 
1907*22dc650dSSadaf Ebrahimi 	if (op & SLJIT_32)
1908*22dc650dSSadaf Ebrahimi 		flags = INT_DATA | SIGNED_DATA;
1909*22dc650dSSadaf Ebrahimi 
1910*22dc650dSSadaf Ebrahimi 	switch (GET_OPCODE(op)) {
1911*22dc650dSSadaf Ebrahimi 	case SLJIT_MOV:
1912*22dc650dSSadaf Ebrahimi 	case SLJIT_MOV_P:
1913*22dc650dSSadaf Ebrahimi 		return emit_op(compiler, SLJIT_MOV, WORD_DATA | MOVE_OP, dst, dstw, TMP_ZERO, 0, src, srcw);
1914*22dc650dSSadaf Ebrahimi 
1915*22dc650dSSadaf Ebrahimi 	case SLJIT_MOV_U32:
1916*22dc650dSSadaf Ebrahimi 		return emit_op(compiler, SLJIT_MOV_U32, INT_DATA | MOVE_OP, dst, dstw, TMP_ZERO, 0, src, (src == SLJIT_IMM) ? (sljit_u32)srcw : srcw);
1917*22dc650dSSadaf Ebrahimi 
1918*22dc650dSSadaf Ebrahimi 	case SLJIT_MOV_S32:
1919*22dc650dSSadaf Ebrahimi 	/* Logical operators have no W variant, so sign extended input is necessary for them. */
1920*22dc650dSSadaf Ebrahimi 	case SLJIT_MOV32:
1921*22dc650dSSadaf Ebrahimi 		return emit_op(compiler, SLJIT_MOV_S32, INT_DATA | SIGNED_DATA | MOVE_OP, dst, dstw, TMP_ZERO, 0, src, (src == SLJIT_IMM) ? (sljit_s32)srcw : srcw);
1922*22dc650dSSadaf Ebrahimi 
1923*22dc650dSSadaf Ebrahimi 	case SLJIT_MOV_U8:
1924*22dc650dSSadaf Ebrahimi 		return emit_op(compiler, op, BYTE_DATA | MOVE_OP, dst, dstw, TMP_ZERO, 0, src, (src == SLJIT_IMM) ? (sljit_u8)srcw : srcw);
1925*22dc650dSSadaf Ebrahimi 
1926*22dc650dSSadaf Ebrahimi 	case SLJIT_MOV_S8:
1927*22dc650dSSadaf Ebrahimi 		return emit_op(compiler, op, BYTE_DATA | SIGNED_DATA | MOVE_OP, dst, dstw, TMP_ZERO, 0, src, (src == SLJIT_IMM) ? (sljit_s8)srcw : srcw);
1928*22dc650dSSadaf Ebrahimi 
1929*22dc650dSSadaf Ebrahimi 	case SLJIT_MOV_U16:
1930*22dc650dSSadaf Ebrahimi 		return emit_op(compiler, op, HALF_DATA | MOVE_OP, dst, dstw, TMP_ZERO, 0, src, (src == SLJIT_IMM) ? (sljit_u16)srcw : srcw);
1931*22dc650dSSadaf Ebrahimi 
1932*22dc650dSSadaf Ebrahimi 	case SLJIT_MOV_S16:
1933*22dc650dSSadaf Ebrahimi 		return emit_op(compiler, op, HALF_DATA | SIGNED_DATA | MOVE_OP, dst, dstw, TMP_ZERO, 0, src, (src == SLJIT_IMM) ? (sljit_s16)srcw : srcw);
1934*22dc650dSSadaf Ebrahimi 
1935*22dc650dSSadaf Ebrahimi 	case SLJIT_CLZ:
1936*22dc650dSSadaf Ebrahimi 	case SLJIT_CTZ:
1937*22dc650dSSadaf Ebrahimi 	case SLJIT_REV:
1938*22dc650dSSadaf Ebrahimi 		return emit_op(compiler, op, flags, dst, dstw, TMP_ZERO, 0, src, srcw);
1939*22dc650dSSadaf Ebrahimi 
1940*22dc650dSSadaf Ebrahimi 	case SLJIT_REV_U16:
1941*22dc650dSSadaf Ebrahimi 	case SLJIT_REV_S16:
1942*22dc650dSSadaf Ebrahimi 		return emit_op(compiler, op, HALF_DATA, dst, dstw, TMP_ZERO, 0, src, srcw);
1943*22dc650dSSadaf Ebrahimi 
1944*22dc650dSSadaf Ebrahimi 	case SLJIT_REV_U32:
1945*22dc650dSSadaf Ebrahimi 	case SLJIT_REV_S32:
1946*22dc650dSSadaf Ebrahimi 		return emit_op(compiler, op | SLJIT_32, INT_DATA, dst, dstw, TMP_ZERO, 0, src, srcw);
1947*22dc650dSSadaf Ebrahimi 	}
1948*22dc650dSSadaf Ebrahimi 
1949*22dc650dSSadaf Ebrahimi 	SLJIT_UNREACHABLE();
1950*22dc650dSSadaf Ebrahimi 	return SLJIT_SUCCESS;
1951*22dc650dSSadaf Ebrahimi }
1952*22dc650dSSadaf Ebrahimi 
sljit_emit_op2(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)1953*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op,
1954*22dc650dSSadaf Ebrahimi 	sljit_s32 dst, sljit_sw dstw,
1955*22dc650dSSadaf Ebrahimi 	sljit_s32 src1, sljit_sw src1w,
1956*22dc650dSSadaf Ebrahimi 	sljit_s32 src2, sljit_sw src2w)
1957*22dc650dSSadaf Ebrahimi {
1958*22dc650dSSadaf Ebrahimi 	sljit_s32 flags = 0;
1959*22dc650dSSadaf Ebrahimi 
1960*22dc650dSSadaf Ebrahimi 	CHECK_ERROR();
1961*22dc650dSSadaf Ebrahimi 	CHECK(check_sljit_emit_op2(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w));
1962*22dc650dSSadaf Ebrahimi 	ADJUST_LOCAL_OFFSET(dst, dstw);
1963*22dc650dSSadaf Ebrahimi 	ADJUST_LOCAL_OFFSET(src1, src1w);
1964*22dc650dSSadaf Ebrahimi 	ADJUST_LOCAL_OFFSET(src2, src2w);
1965*22dc650dSSadaf Ebrahimi 
1966*22dc650dSSadaf Ebrahimi 	if (op & SLJIT_32) {
1967*22dc650dSSadaf Ebrahimi 		flags |= INT_DATA | SIGNED_DATA;
1968*22dc650dSSadaf Ebrahimi 		if (src1 == SLJIT_IMM)
1969*22dc650dSSadaf Ebrahimi 			src1w = (sljit_s32)src1w;
1970*22dc650dSSadaf Ebrahimi 		if (src2 == SLJIT_IMM)
1971*22dc650dSSadaf Ebrahimi 			src2w = (sljit_s32)src2w;
1972*22dc650dSSadaf Ebrahimi 	}
1973*22dc650dSSadaf Ebrahimi 
1974*22dc650dSSadaf Ebrahimi 
1975*22dc650dSSadaf Ebrahimi 	switch (GET_OPCODE(op)) {
1976*22dc650dSSadaf Ebrahimi 	case SLJIT_ADD:
1977*22dc650dSSadaf Ebrahimi 	case SLJIT_ADDC:
1978*22dc650dSSadaf Ebrahimi 		compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD;
1979*22dc650dSSadaf Ebrahimi 		return emit_op(compiler, op, flags | CUMULATIVE_OP | IMM_OP, dst, dstw, src1, src1w, src2, src2w);
1980*22dc650dSSadaf Ebrahimi 
1981*22dc650dSSadaf Ebrahimi 	case SLJIT_SUB:
1982*22dc650dSSadaf Ebrahimi 	case SLJIT_SUBC:
1983*22dc650dSSadaf Ebrahimi 		compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB;
1984*22dc650dSSadaf Ebrahimi 		return emit_op(compiler, op, flags | IMM_OP, dst, dstw, src1, src1w, src2, src2w);
1985*22dc650dSSadaf Ebrahimi 
1986*22dc650dSSadaf Ebrahimi 	case SLJIT_MUL:
1987*22dc650dSSadaf Ebrahimi 		compiler->status_flags_state = 0;
1988*22dc650dSSadaf Ebrahimi 		return emit_op(compiler, op, flags | CUMULATIVE_OP, dst, dstw, src1, src1w, src2, src2w);
1989*22dc650dSSadaf Ebrahimi 
1990*22dc650dSSadaf Ebrahimi 	case SLJIT_AND:
1991*22dc650dSSadaf Ebrahimi 	case SLJIT_OR:
1992*22dc650dSSadaf Ebrahimi 	case SLJIT_XOR:
1993*22dc650dSSadaf Ebrahimi 		return emit_op(compiler, op, flags | CUMULATIVE_OP | IMM_OP, dst, dstw, src1, src1w, src2, src2w);
1994*22dc650dSSadaf Ebrahimi 
1995*22dc650dSSadaf Ebrahimi 	case SLJIT_SHL:
1996*22dc650dSSadaf Ebrahimi 	case SLJIT_MSHL:
1997*22dc650dSSadaf Ebrahimi 	case SLJIT_LSHR:
1998*22dc650dSSadaf Ebrahimi 	case SLJIT_MLSHR:
1999*22dc650dSSadaf Ebrahimi 	case SLJIT_ASHR:
2000*22dc650dSSadaf Ebrahimi 	case SLJIT_MASHR:
2001*22dc650dSSadaf Ebrahimi 	case SLJIT_ROTL:
2002*22dc650dSSadaf Ebrahimi 	case SLJIT_ROTR:
2003*22dc650dSSadaf Ebrahimi 		if (src2 == SLJIT_IMM) {
2004*22dc650dSSadaf Ebrahimi 			if (op & SLJIT_32)
2005*22dc650dSSadaf Ebrahimi 				src2w &= 0x1f;
2006*22dc650dSSadaf Ebrahimi 			else
2007*22dc650dSSadaf Ebrahimi 				src2w &= 0x3f;
2008*22dc650dSSadaf Ebrahimi 		}
2009*22dc650dSSadaf Ebrahimi 
2010*22dc650dSSadaf Ebrahimi 		return emit_op(compiler, op, flags | IMM_OP, dst, dstw, src1, src1w, src2, src2w);
2011*22dc650dSSadaf Ebrahimi 	}
2012*22dc650dSSadaf Ebrahimi 
2013*22dc650dSSadaf Ebrahimi 	SLJIT_UNREACHABLE();
2014*22dc650dSSadaf Ebrahimi 	return SLJIT_SUCCESS;
2015*22dc650dSSadaf Ebrahimi }
2016*22dc650dSSadaf Ebrahimi 
sljit_emit_op2u(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2017*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compiler, sljit_s32 op,
2018*22dc650dSSadaf Ebrahimi 	sljit_s32 src1, sljit_sw src1w,
2019*22dc650dSSadaf Ebrahimi 	sljit_s32 src2, sljit_sw src2w)
2020*22dc650dSSadaf Ebrahimi {
2021*22dc650dSSadaf Ebrahimi 	CHECK_ERROR();
2022*22dc650dSSadaf Ebrahimi 	CHECK(check_sljit_emit_op2(compiler, op, 1, 0, 0, src1, src1w, src2, src2w));
2023*22dc650dSSadaf Ebrahimi 
2024*22dc650dSSadaf Ebrahimi 	SLJIT_SKIP_CHECKS(compiler);
2025*22dc650dSSadaf Ebrahimi 	return sljit_emit_op2(compiler, op, 0, 0, src1, src1w, src2, src2w);
2026*22dc650dSSadaf Ebrahimi }
2027*22dc650dSSadaf Ebrahimi 
sljit_emit_op2r(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst_reg,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2028*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2r(struct sljit_compiler *compiler, sljit_s32 op,
2029*22dc650dSSadaf Ebrahimi 	sljit_s32 dst_reg,
2030*22dc650dSSadaf Ebrahimi 	sljit_s32 src1, sljit_sw src1w,
2031*22dc650dSSadaf Ebrahimi 	sljit_s32 src2, sljit_sw src2w)
2032*22dc650dSSadaf Ebrahimi {
2033*22dc650dSSadaf Ebrahimi 	CHECK_ERROR();
2034*22dc650dSSadaf Ebrahimi 	CHECK(check_sljit_emit_op2r(compiler, op, dst_reg, src1, src1w, src2, src2w));
2035*22dc650dSSadaf Ebrahimi 
2036*22dc650dSSadaf Ebrahimi 	switch (GET_OPCODE(op)) {
2037*22dc650dSSadaf Ebrahimi 	case SLJIT_MULADD:
2038*22dc650dSSadaf Ebrahimi 		SLJIT_SKIP_CHECKS(compiler);
2039*22dc650dSSadaf Ebrahimi 		FAIL_IF(sljit_emit_op2(compiler, SLJIT_MUL | (op & SLJIT_32), TMP_REG2, 0, src1, src1w, src2, src2w));
2040*22dc650dSSadaf Ebrahimi 		return push_inst(compiler, ADD_D | RD(dst_reg) | RJ(dst_reg) | RK(TMP_REG2));
2041*22dc650dSSadaf Ebrahimi 	}
2042*22dc650dSSadaf Ebrahimi 
2043*22dc650dSSadaf Ebrahimi 	return SLJIT_SUCCESS;
2044*22dc650dSSadaf Ebrahimi }
2045*22dc650dSSadaf Ebrahimi 
sljit_emit_shift_into(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst_reg,sljit_s32 src1_reg,sljit_s32 src2_reg,sljit_s32 src3,sljit_sw src3w)2046*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler *compiler, sljit_s32 op,
2047*22dc650dSSadaf Ebrahimi 	sljit_s32 dst_reg,
2048*22dc650dSSadaf Ebrahimi 	sljit_s32 src1_reg,
2049*22dc650dSSadaf Ebrahimi 	sljit_s32 src2_reg,
2050*22dc650dSSadaf Ebrahimi 	sljit_s32 src3, sljit_sw src3w)
2051*22dc650dSSadaf Ebrahimi {
2052*22dc650dSSadaf Ebrahimi 	sljit_s32 is_left;
2053*22dc650dSSadaf Ebrahimi 	sljit_ins ins1, ins2, ins3;
2054*22dc650dSSadaf Ebrahimi 	sljit_s32 inp_flags = ((op & SLJIT_32) ? INT_DATA : WORD_DATA) | LOAD_DATA;
2055*22dc650dSSadaf Ebrahimi 	sljit_sw bit_length = (op & SLJIT_32) ? 32 : 64;
2056*22dc650dSSadaf Ebrahimi 
2057*22dc650dSSadaf Ebrahimi 
2058*22dc650dSSadaf Ebrahimi 	CHECK_ERROR();
2059*22dc650dSSadaf Ebrahimi 	CHECK(check_sljit_emit_shift_into(compiler, op, dst_reg, src1_reg, src2_reg, src3, src3w));
2060*22dc650dSSadaf Ebrahimi 
2061*22dc650dSSadaf Ebrahimi 	is_left = (GET_OPCODE(op) == SLJIT_SHL || GET_OPCODE(op) == SLJIT_MSHL);
2062*22dc650dSSadaf Ebrahimi 
2063*22dc650dSSadaf Ebrahimi 	if (src1_reg == src2_reg) {
2064*22dc650dSSadaf Ebrahimi 		SLJIT_SKIP_CHECKS(compiler);
2065*22dc650dSSadaf Ebrahimi 		return sljit_emit_op2(compiler, (is_left ? SLJIT_ROTL : SLJIT_ROTR) | (op & SLJIT_32), dst_reg, 0, src1_reg, 0, src3, src3w);
2066*22dc650dSSadaf Ebrahimi 	}
2067*22dc650dSSadaf Ebrahimi 
2068*22dc650dSSadaf Ebrahimi 	ADJUST_LOCAL_OFFSET(src3, src3w);
2069*22dc650dSSadaf Ebrahimi 
2070*22dc650dSSadaf Ebrahimi 	if (src3 == SLJIT_IMM) {
2071*22dc650dSSadaf Ebrahimi 		src3w &= bit_length - 1;
2072*22dc650dSSadaf Ebrahimi 
2073*22dc650dSSadaf Ebrahimi 		if (src3w == 0)
2074*22dc650dSSadaf Ebrahimi 			return SLJIT_SUCCESS;
2075*22dc650dSSadaf Ebrahimi 
2076*22dc650dSSadaf Ebrahimi 		if (is_left) {
2077*22dc650dSSadaf Ebrahimi 			ins1 = INST(SLLI, op) | IMM_I12(src3w);
2078*22dc650dSSadaf Ebrahimi 			src3w = bit_length - src3w;
2079*22dc650dSSadaf Ebrahimi 			ins2 = INST(SRLI, op) | IMM_I12(src3w);
2080*22dc650dSSadaf Ebrahimi 		} else {
2081*22dc650dSSadaf Ebrahimi 			ins1 = INST(SRLI, op) | IMM_I12(src3w);
2082*22dc650dSSadaf Ebrahimi 			src3w = bit_length - src3w;
2083*22dc650dSSadaf Ebrahimi 			ins2 = INST(SLLI, op) | IMM_I12(src3w);
2084*22dc650dSSadaf Ebrahimi 		}
2085*22dc650dSSadaf Ebrahimi 
2086*22dc650dSSadaf Ebrahimi 		FAIL_IF(push_inst(compiler, ins1 | RD(dst_reg) | RJ(src1_reg)));
2087*22dc650dSSadaf Ebrahimi 		FAIL_IF(push_inst(compiler, ins2 | RD(TMP_REG1) | RJ(src2_reg)));
2088*22dc650dSSadaf Ebrahimi 		return push_inst(compiler, OR | RD(dst_reg) | RJ(dst_reg) | RK(TMP_REG1));
2089*22dc650dSSadaf Ebrahimi 	}
2090*22dc650dSSadaf Ebrahimi 
2091*22dc650dSSadaf Ebrahimi 	if (src3 & SLJIT_MEM) {
2092*22dc650dSSadaf Ebrahimi 		FAIL_IF(emit_op_mem(compiler, inp_flags, TMP_REG2, src3, src3w));
2093*22dc650dSSadaf Ebrahimi 		src3 = TMP_REG2;
2094*22dc650dSSadaf Ebrahimi 	} else if (dst_reg == src3) {
2095*22dc650dSSadaf Ebrahimi 		push_inst(compiler, INST(ADDI, op) | RD(TMP_REG2) | RJ(src3) | IMM_I12(0));
2096*22dc650dSSadaf Ebrahimi 		src3 = TMP_REG2;
2097*22dc650dSSadaf Ebrahimi 	}
2098*22dc650dSSadaf Ebrahimi 
2099*22dc650dSSadaf Ebrahimi 	if (is_left) {
2100*22dc650dSSadaf Ebrahimi 		ins1 = INST(SLL, op);
2101*22dc650dSSadaf Ebrahimi 		ins2 = INST(SRLI, op);
2102*22dc650dSSadaf Ebrahimi 		ins3 = INST(SRL, op);
2103*22dc650dSSadaf Ebrahimi 	} else {
2104*22dc650dSSadaf Ebrahimi 		ins1 = INST(SRL, op);
2105*22dc650dSSadaf Ebrahimi 		ins2 = INST(SLLI, op);
2106*22dc650dSSadaf Ebrahimi 		ins3 = INST(SLL, op);
2107*22dc650dSSadaf Ebrahimi 	}
2108*22dc650dSSadaf Ebrahimi 
2109*22dc650dSSadaf Ebrahimi 	FAIL_IF(push_inst(compiler, ins1 | RD(dst_reg) | RJ(src1_reg) | RK(src3)));
2110*22dc650dSSadaf Ebrahimi 
2111*22dc650dSSadaf Ebrahimi 	if (!(op & SLJIT_SHIFT_INTO_NON_ZERO)) {
2112*22dc650dSSadaf Ebrahimi 		FAIL_IF(push_inst(compiler, ins2 | RD(TMP_REG1) | RJ(src2_reg) | IMM_I12(1)));
2113*22dc650dSSadaf Ebrahimi 		FAIL_IF(push_inst(compiler, XORI | RD(TMP_REG2) | RJ(src3) | IMM_I12((sljit_ins)bit_length - 1)));
2114*22dc650dSSadaf Ebrahimi 		src2_reg = TMP_REG1;
2115*22dc650dSSadaf Ebrahimi 	} else
2116*22dc650dSSadaf Ebrahimi 		FAIL_IF(push_inst(compiler, INST(SUB, op) | RD(TMP_REG2) | RJ(TMP_ZERO) | RK(src3)));
2117*22dc650dSSadaf Ebrahimi 
2118*22dc650dSSadaf Ebrahimi 	FAIL_IF(push_inst(compiler, ins3 | RD(TMP_REG1) | RJ(src2_reg) | RK(TMP_REG2)));
2119*22dc650dSSadaf Ebrahimi 	return push_inst(compiler, OR | RD(dst_reg) | RJ(dst_reg) | RK(TMP_REG1));
2120*22dc650dSSadaf Ebrahimi }
2121*22dc650dSSadaf Ebrahimi 
sljit_emit_op_src(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 src,sljit_sw srcw)2122*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op,
2123*22dc650dSSadaf Ebrahimi 	sljit_s32 src, sljit_sw srcw)
2124*22dc650dSSadaf Ebrahimi {
2125*22dc650dSSadaf Ebrahimi 	sljit_s32 base = src & REG_MASK;
2126*22dc650dSSadaf Ebrahimi 
2127*22dc650dSSadaf Ebrahimi 	CHECK_ERROR();
2128*22dc650dSSadaf Ebrahimi 	CHECK(check_sljit_emit_op_src(compiler, op, src, srcw));
2129*22dc650dSSadaf Ebrahimi 	ADJUST_LOCAL_OFFSET(src, srcw);
2130*22dc650dSSadaf Ebrahimi 
2131*22dc650dSSadaf Ebrahimi 	switch (op) {
2132*22dc650dSSadaf Ebrahimi 	case SLJIT_FAST_RETURN:
2133*22dc650dSSadaf Ebrahimi 		if (FAST_IS_REG(src))
2134*22dc650dSSadaf Ebrahimi 			FAIL_IF(push_inst(compiler, ADDI_D | RD(RETURN_ADDR_REG) | RJ(src) | IMM_I12(0)));
2135*22dc650dSSadaf Ebrahimi 		else
2136*22dc650dSSadaf Ebrahimi 			FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, RETURN_ADDR_REG, src, srcw));
2137*22dc650dSSadaf Ebrahimi 
2138*22dc650dSSadaf Ebrahimi 		return push_inst(compiler, JIRL | RD(TMP_ZERO) | RJ(RETURN_ADDR_REG) | IMM_I12(0));
2139*22dc650dSSadaf Ebrahimi 	case SLJIT_SKIP_FRAMES_BEFORE_FAST_RETURN:
2140*22dc650dSSadaf Ebrahimi 		return SLJIT_SUCCESS;
2141*22dc650dSSadaf Ebrahimi 	case SLJIT_PREFETCH_L1:
2142*22dc650dSSadaf Ebrahimi 	case SLJIT_PREFETCH_L2:
2143*22dc650dSSadaf Ebrahimi 	case SLJIT_PREFETCH_L3:
2144*22dc650dSSadaf Ebrahimi 	case SLJIT_PREFETCH_ONCE:
2145*22dc650dSSadaf Ebrahimi 		if (SLJIT_UNLIKELY(src & OFFS_REG_MASK)) {
2146*22dc650dSSadaf Ebrahimi 			srcw &= 0x3;
2147*22dc650dSSadaf Ebrahimi 			if (SLJIT_UNLIKELY(srcw))
2148*22dc650dSSadaf Ebrahimi 				FAIL_IF(push_inst(compiler, SLLI_D | RD(TMP_REG1) | RJ(OFFS_REG(src)) | IMM_I12(srcw)));
2149*22dc650dSSadaf Ebrahimi 			FAIL_IF(push_inst(compiler, ADD_D | RD(TMP_REG1) | RJ(base) | RK(TMP_REG1)));
2150*22dc650dSSadaf Ebrahimi 		} else {
2151*22dc650dSSadaf Ebrahimi 			if (base && srcw <= I12_MAX && srcw >= I12_MIN)
2152*22dc650dSSadaf Ebrahimi 				return push_inst(compiler,PRELD | RJ(base) | IMM_I12(srcw));
2153*22dc650dSSadaf Ebrahimi 
2154*22dc650dSSadaf Ebrahimi 			FAIL_IF(load_immediate(compiler, TMP_REG1, srcw));
2155*22dc650dSSadaf Ebrahimi 			if (base != 0)
2156*22dc650dSSadaf Ebrahimi 				FAIL_IF(push_inst(compiler, ADD_D | RD(TMP_REG1) | RJ(base) | RK(TMP_REG1)));
2157*22dc650dSSadaf Ebrahimi 		}
2158*22dc650dSSadaf Ebrahimi 		return push_inst(compiler, PRELD | RD(0) | RJ(TMP_REG1));
2159*22dc650dSSadaf Ebrahimi 	}
2160*22dc650dSSadaf Ebrahimi 	return SLJIT_SUCCESS;
2161*22dc650dSSadaf Ebrahimi }
2162*22dc650dSSadaf Ebrahimi 
sljit_emit_op_dst(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw)2163*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_dst(struct sljit_compiler *compiler, sljit_s32 op,
2164*22dc650dSSadaf Ebrahimi 	sljit_s32 dst, sljit_sw dstw)
2165*22dc650dSSadaf Ebrahimi {
2166*22dc650dSSadaf Ebrahimi 	sljit_s32 dst_r;
2167*22dc650dSSadaf Ebrahimi 
2168*22dc650dSSadaf Ebrahimi 	CHECK_ERROR();
2169*22dc650dSSadaf Ebrahimi 	CHECK(check_sljit_emit_op_dst(compiler, op, dst, dstw));
2170*22dc650dSSadaf Ebrahimi 	ADJUST_LOCAL_OFFSET(dst, dstw);
2171*22dc650dSSadaf Ebrahimi 
2172*22dc650dSSadaf Ebrahimi 	switch (op) {
2173*22dc650dSSadaf Ebrahimi 	case SLJIT_FAST_ENTER:
2174*22dc650dSSadaf Ebrahimi 		if (FAST_IS_REG(dst))
2175*22dc650dSSadaf Ebrahimi 			return push_inst(compiler, ADDI_D | RD(dst) | RJ(RETURN_ADDR_REG) | IMM_I12(0));
2176*22dc650dSSadaf Ebrahimi 
2177*22dc650dSSadaf Ebrahimi 		SLJIT_ASSERT(RETURN_ADDR_REG == TMP_REG2);
2178*22dc650dSSadaf Ebrahimi 		break;
2179*22dc650dSSadaf Ebrahimi 	case SLJIT_GET_RETURN_ADDRESS:
2180*22dc650dSSadaf Ebrahimi 		dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;
2181*22dc650dSSadaf Ebrahimi 		FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, dst_r, SLJIT_MEM1(SLJIT_SP), compiler->local_size - SSIZE_OF(sw)));
2182*22dc650dSSadaf Ebrahimi 		break;
2183*22dc650dSSadaf Ebrahimi 	}
2184*22dc650dSSadaf Ebrahimi 
2185*22dc650dSSadaf Ebrahimi 	if (dst & SLJIT_MEM)
2186*22dc650dSSadaf Ebrahimi 		return emit_op_mem(compiler, WORD_DATA, TMP_REG2, dst, dstw);
2187*22dc650dSSadaf Ebrahimi 
2188*22dc650dSSadaf Ebrahimi 	return SLJIT_SUCCESS;
2189*22dc650dSSadaf Ebrahimi }
2190*22dc650dSSadaf Ebrahimi 
sljit_get_register_index(sljit_s32 type,sljit_s32 reg)2191*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 type, sljit_s32 reg)
2192*22dc650dSSadaf Ebrahimi {
2193*22dc650dSSadaf Ebrahimi 	CHECK_REG_INDEX(check_sljit_get_register_index(type, reg));
2194*22dc650dSSadaf Ebrahimi 
2195*22dc650dSSadaf Ebrahimi 	if (type == SLJIT_GP_REGISTER)
2196*22dc650dSSadaf Ebrahimi 		return reg_map[reg];
2197*22dc650dSSadaf Ebrahimi 
2198*22dc650dSSadaf Ebrahimi 	if (type != SLJIT_FLOAT_REGISTER && type != SLJIT_SIMD_REG_128 && type != SLJIT_SIMD_REG_256)
2199*22dc650dSSadaf Ebrahimi 		return -1;
2200*22dc650dSSadaf Ebrahimi 
2201*22dc650dSSadaf Ebrahimi 	return freg_map[reg];
2202*22dc650dSSadaf Ebrahimi }
2203*22dc650dSSadaf Ebrahimi 
sljit_emit_op_custom(struct sljit_compiler * compiler,void * instruction,sljit_u32 size)2204*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler,
2205*22dc650dSSadaf Ebrahimi 	void *instruction, sljit_u32 size)
2206*22dc650dSSadaf Ebrahimi {
2207*22dc650dSSadaf Ebrahimi 	SLJIT_UNUSED_ARG(size);
2208*22dc650dSSadaf Ebrahimi 	CHECK_ERROR();
2209*22dc650dSSadaf Ebrahimi 	CHECK(check_sljit_emit_op_custom(compiler, instruction, size));
2210*22dc650dSSadaf Ebrahimi 
2211*22dc650dSSadaf Ebrahimi 	return push_inst(compiler, *(sljit_ins*)instruction);
2212*22dc650dSSadaf Ebrahimi }
2213*22dc650dSSadaf Ebrahimi 
2214*22dc650dSSadaf Ebrahimi /* --------------------------------------------------------------------- */
2215*22dc650dSSadaf Ebrahimi /*  Floating point operators                                             */
2216*22dc650dSSadaf Ebrahimi /* --------------------------------------------------------------------- */
2217*22dc650dSSadaf Ebrahimi #define SET_COND(cond) (sljit_ins)(cond << 15)
2218*22dc650dSSadaf Ebrahimi 
2219*22dc650dSSadaf Ebrahimi #define COND_CUN SET_COND(0x8)	 /* UN */
2220*22dc650dSSadaf Ebrahimi #define COND_CEQ SET_COND(0x4)	 /* EQ */
2221*22dc650dSSadaf Ebrahimi #define COND_CUEQ SET_COND(0xc)	 /* UN EQ */
2222*22dc650dSSadaf Ebrahimi #define COND_CLT SET_COND(0x2)	 /* LT */
2223*22dc650dSSadaf Ebrahimi #define COND_CULT SET_COND(0xa)	 /* UN LT */
2224*22dc650dSSadaf Ebrahimi #define COND_CLE SET_COND(0x6)	 /* LT EQ */
2225*22dc650dSSadaf Ebrahimi #define COND_CULE SET_COND(0xe)	 /* UN LT EQ */
2226*22dc650dSSadaf Ebrahimi #define COND_CNE SET_COND(0x10)	 /* GT LT */
2227*22dc650dSSadaf Ebrahimi #define COND_CUNE SET_COND(0x18) /* UN GT LT */
2228*22dc650dSSadaf Ebrahimi #define COND_COR SET_COND(0x14)	 /* GT LT EQ */
2229*22dc650dSSadaf Ebrahimi 
2230*22dc650dSSadaf Ebrahimi #define FINST(inst, type) (sljit_ins)((type & SLJIT_32) ? inst##_S : inst##_D)
2231*22dc650dSSadaf Ebrahimi #define FCD(cd) (sljit_ins)(cd & 0x7)
2232*22dc650dSSadaf Ebrahimi #define FCJ(cj) (sljit_ins)((cj & 0x7) << 5)
2233*22dc650dSSadaf Ebrahimi #define FCA(ca) (sljit_ins)((ca & 0x7) << 15)
2234*22dc650dSSadaf Ebrahimi #define F_OTHER_FLAG 1
2235*22dc650dSSadaf Ebrahimi 
2236*22dc650dSSadaf Ebrahimi #define FLOAT_DATA(op) (DOUBLE_DATA | ((op & SLJIT_32) >> 7))
2237*22dc650dSSadaf Ebrahimi 
2238*22dc650dSSadaf Ebrahimi /* convert to inter exact toward zero */
sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)2239*22dc650dSSadaf Ebrahimi static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op,
2240*22dc650dSSadaf Ebrahimi 	sljit_s32 dst, sljit_sw dstw,
2241*22dc650dSSadaf Ebrahimi 	sljit_s32 src, sljit_sw srcw)
2242*22dc650dSSadaf Ebrahimi {
2243*22dc650dSSadaf Ebrahimi 	sljit_ins inst;
2244*22dc650dSSadaf Ebrahimi 	sljit_u32 word_data = 0;
2245*22dc650dSSadaf Ebrahimi 	sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;
2246*22dc650dSSadaf Ebrahimi 
2247*22dc650dSSadaf Ebrahimi 	switch (GET_OPCODE(op))
2248*22dc650dSSadaf Ebrahimi 	{
2249*22dc650dSSadaf Ebrahimi 	case SLJIT_CONV_SW_FROM_F64:
2250*22dc650dSSadaf Ebrahimi 		word_data = 1;
2251*22dc650dSSadaf Ebrahimi 		inst = FINST(FTINTRZ_L, op);
2252*22dc650dSSadaf Ebrahimi 		break;
2253*22dc650dSSadaf Ebrahimi 	case SLJIT_CONV_S32_FROM_F64:
2254*22dc650dSSadaf Ebrahimi 		inst = FINST(FTINTRZ_W, op);
2255*22dc650dSSadaf Ebrahimi 		break;
2256*22dc650dSSadaf Ebrahimi 	default:
2257*22dc650dSSadaf Ebrahimi 		inst = BREAK;
2258*22dc650dSSadaf Ebrahimi 		SLJIT_UNREACHABLE();
2259*22dc650dSSadaf Ebrahimi 	}
2260*22dc650dSSadaf Ebrahimi 
2261*22dc650dSSadaf Ebrahimi 	if (src & SLJIT_MEM) {
2262*22dc650dSSadaf Ebrahimi 		FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src, srcw, dst, dstw));
2263*22dc650dSSadaf Ebrahimi 		src = TMP_FREG1;
2264*22dc650dSSadaf Ebrahimi 	}
2265*22dc650dSSadaf Ebrahimi 
2266*22dc650dSSadaf Ebrahimi 	FAIL_IF(push_inst(compiler, inst | FRD(TMP_FREG1) | FRJ(src)));
2267*22dc650dSSadaf Ebrahimi 	FAIL_IF(push_inst(compiler, FINST(MOVFR2GR, word_data) | RD(dst_r) | FRJ(TMP_FREG1)));
2268*22dc650dSSadaf Ebrahimi 
2269*22dc650dSSadaf Ebrahimi 	if (dst & SLJIT_MEM)
2270*22dc650dSSadaf Ebrahimi 		return emit_op_mem2(compiler, word_data ? WORD_DATA : INT_DATA, TMP_REG2, dst, dstw, 0, 0);
2271*22dc650dSSadaf Ebrahimi 	return SLJIT_SUCCESS;
2272*22dc650dSSadaf Ebrahimi }
2273*22dc650dSSadaf Ebrahimi 
sljit_emit_fop1_conv_f64_from_w(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)2274*22dc650dSSadaf Ebrahimi static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_w(struct sljit_compiler *compiler, sljit_s32 op,
2275*22dc650dSSadaf Ebrahimi 	sljit_s32 dst, sljit_sw dstw,
2276*22dc650dSSadaf Ebrahimi 	sljit_s32 src, sljit_sw srcw)
2277*22dc650dSSadaf Ebrahimi {
2278*22dc650dSSadaf Ebrahimi 	sljit_ins inst;
2279*22dc650dSSadaf Ebrahimi 	sljit_u32 word_data = 0;
2280*22dc650dSSadaf Ebrahimi 	sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
2281*22dc650dSSadaf Ebrahimi 
2282*22dc650dSSadaf Ebrahimi 	switch (GET_OPCODE(op))
2283*22dc650dSSadaf Ebrahimi 	{
2284*22dc650dSSadaf Ebrahimi 	case SLJIT_CONV_F64_FROM_SW:
2285*22dc650dSSadaf Ebrahimi 		word_data = 1;
2286*22dc650dSSadaf Ebrahimi 		inst = (sljit_ins)((op & SLJIT_32) ? FFINT_S_L : FFINT_D_L);
2287*22dc650dSSadaf Ebrahimi 		break;
2288*22dc650dSSadaf Ebrahimi 	case SLJIT_CONV_F64_FROM_S32:
2289*22dc650dSSadaf Ebrahimi 		inst = (sljit_ins)((op & SLJIT_32) ? FFINT_S_W : FFINT_D_W);
2290*22dc650dSSadaf Ebrahimi 		break;
2291*22dc650dSSadaf Ebrahimi 	default:
2292*22dc650dSSadaf Ebrahimi 		inst = BREAK;
2293*22dc650dSSadaf Ebrahimi 		SLJIT_UNREACHABLE();
2294*22dc650dSSadaf Ebrahimi 	}
2295*22dc650dSSadaf Ebrahimi 
2296*22dc650dSSadaf Ebrahimi 	if (src & SLJIT_MEM) {
2297*22dc650dSSadaf Ebrahimi 		FAIL_IF(emit_op_mem2(compiler, (word_data ? WORD_DATA : INT_DATA) | LOAD_DATA, TMP_REG1, src, srcw, dst, dstw));
2298*22dc650dSSadaf Ebrahimi 		src = TMP_REG1;
2299*22dc650dSSadaf Ebrahimi 	} else if (src == SLJIT_IMM) {
2300*22dc650dSSadaf Ebrahimi 		if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32)
2301*22dc650dSSadaf Ebrahimi 			srcw = (sljit_s32)srcw;
2302*22dc650dSSadaf Ebrahimi 
2303*22dc650dSSadaf Ebrahimi 		FAIL_IF(load_immediate(compiler, TMP_REG1, srcw));
2304*22dc650dSSadaf Ebrahimi 		src = TMP_REG1;
2305*22dc650dSSadaf Ebrahimi 	}
2306*22dc650dSSadaf Ebrahimi 	FAIL_IF(push_inst(compiler, (word_data ? MOVGR2FR_D : MOVGR2FR_W) | FRD(dst_r) | RJ(src)));
2307*22dc650dSSadaf Ebrahimi 	FAIL_IF(push_inst(compiler, inst | FRD(dst_r) | FRJ(dst_r)));
2308*22dc650dSSadaf Ebrahimi 
2309*22dc650dSSadaf Ebrahimi 	if (dst & SLJIT_MEM)
2310*22dc650dSSadaf Ebrahimi 		return emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG1, dst, dstw, 0, 0);
2311*22dc650dSSadaf Ebrahimi 	return SLJIT_SUCCESS;
2312*22dc650dSSadaf Ebrahimi }
2313*22dc650dSSadaf Ebrahimi 
sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)2314*22dc650dSSadaf Ebrahimi static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op,
2315*22dc650dSSadaf Ebrahimi 	sljit_s32 dst, sljit_sw dstw,
2316*22dc650dSSadaf Ebrahimi 	sljit_s32 src, sljit_sw srcw)
2317*22dc650dSSadaf Ebrahimi {
2318*22dc650dSSadaf Ebrahimi 	return sljit_emit_fop1_conv_f64_from_w(compiler, op, dst, dstw, src, srcw);
2319*22dc650dSSadaf Ebrahimi }
2320*22dc650dSSadaf Ebrahimi 
sljit_emit_fop1_conv_f64_from_uw(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)2321*22dc650dSSadaf Ebrahimi static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_uw(struct sljit_compiler *compiler, sljit_s32 op,
2322*22dc650dSSadaf Ebrahimi 	sljit_s32 dst, sljit_sw dstw,
2323*22dc650dSSadaf Ebrahimi 	sljit_s32 src, sljit_sw srcw)
2324*22dc650dSSadaf Ebrahimi {
2325*22dc650dSSadaf Ebrahimi 	sljit_ins inst;
2326*22dc650dSSadaf Ebrahimi 	sljit_u32 word_data = 0;
2327*22dc650dSSadaf Ebrahimi 	sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
2328*22dc650dSSadaf Ebrahimi 
2329*22dc650dSSadaf Ebrahimi 	switch (GET_OPCODE(op))
2330*22dc650dSSadaf Ebrahimi 	{
2331*22dc650dSSadaf Ebrahimi 	case SLJIT_CONV_F64_FROM_UW:
2332*22dc650dSSadaf Ebrahimi 		word_data = 1;
2333*22dc650dSSadaf Ebrahimi 		inst = (sljit_ins)((op & SLJIT_32) ? FFINT_S_L : FFINT_D_L);
2334*22dc650dSSadaf Ebrahimi 		break;
2335*22dc650dSSadaf Ebrahimi 	case SLJIT_CONV_F64_FROM_U32:
2336*22dc650dSSadaf Ebrahimi 		inst = (sljit_ins)((op & SLJIT_32) ? FFINT_S_W : FFINT_D_W);
2337*22dc650dSSadaf Ebrahimi 		break;
2338*22dc650dSSadaf Ebrahimi 	default:
2339*22dc650dSSadaf Ebrahimi 		inst = BREAK;
2340*22dc650dSSadaf Ebrahimi 		SLJIT_UNREACHABLE();
2341*22dc650dSSadaf Ebrahimi 	}
2342*22dc650dSSadaf Ebrahimi 
2343*22dc650dSSadaf Ebrahimi 	if (src & SLJIT_MEM) {
2344*22dc650dSSadaf Ebrahimi 		FAIL_IF(emit_op_mem2(compiler, (word_data ? WORD_DATA : INT_DATA) | LOAD_DATA, TMP_REG1, src, srcw, dst, dstw));
2345*22dc650dSSadaf Ebrahimi 		src = TMP_REG1;
2346*22dc650dSSadaf Ebrahimi 	} else if (src == SLJIT_IMM) {
2347*22dc650dSSadaf Ebrahimi 		if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_U32)
2348*22dc650dSSadaf Ebrahimi 			srcw = (sljit_u32)srcw;
2349*22dc650dSSadaf Ebrahimi 
2350*22dc650dSSadaf Ebrahimi 		FAIL_IF(load_immediate(compiler, TMP_REG1, srcw));
2351*22dc650dSSadaf Ebrahimi 		src = TMP_REG1;
2352*22dc650dSSadaf Ebrahimi 	}
2353*22dc650dSSadaf Ebrahimi 
2354*22dc650dSSadaf Ebrahimi 	if (!word_data)
2355*22dc650dSSadaf Ebrahimi 		FAIL_IF(push_inst(compiler, SRLI_W | RD(src) | RJ(src) | IMM_I12(0)));
2356*22dc650dSSadaf Ebrahimi 
2357*22dc650dSSadaf Ebrahimi 	FAIL_IF(push_inst(compiler, BLT | RJ(src) | RD(TMP_ZERO) | IMM_I16(4)));
2358*22dc650dSSadaf Ebrahimi 
2359*22dc650dSSadaf Ebrahimi 	FAIL_IF(push_inst(compiler, (word_data ? MOVGR2FR_D : MOVGR2FR_W) | FRD(dst_r) | RJ(src)));
2360*22dc650dSSadaf Ebrahimi 	FAIL_IF(push_inst(compiler, inst | FRD(dst_r) | FRJ(dst_r)));
2361*22dc650dSSadaf Ebrahimi 	FAIL_IF(push_inst(compiler, B | IMM_I26(7)));
2362*22dc650dSSadaf Ebrahimi 
2363*22dc650dSSadaf Ebrahimi 	FAIL_IF(push_inst(compiler, ANDI | RD(TMP_REG2) | RJ(src) | IMM_I12(1)));
2364*22dc650dSSadaf Ebrahimi 	FAIL_IF(push_inst(compiler, (word_data ? SRLI_D : SRLI_W) | RD(TMP_REG1) | RJ(src) | IMM_I12(1)));
2365*22dc650dSSadaf Ebrahimi 	FAIL_IF(push_inst(compiler, OR | RD(TMP_REG1) | RJ(TMP_REG1) | RK(TMP_REG2)));
2366*22dc650dSSadaf Ebrahimi 	FAIL_IF(push_inst(compiler, INST(MOVGR2FR, (!word_data)) | FRD(dst_r) | RJ(TMP_REG1)));
2367*22dc650dSSadaf Ebrahimi 	FAIL_IF(push_inst(compiler, inst | FRD(dst_r) | FRJ(dst_r)));
2368*22dc650dSSadaf Ebrahimi 	FAIL_IF(push_inst(compiler, FINST(FADD, op) | FRD(dst_r) | FRJ(dst_r) | FRK(dst_r)));
2369*22dc650dSSadaf Ebrahimi 
2370*22dc650dSSadaf Ebrahimi 	if (dst & SLJIT_MEM)
2371*22dc650dSSadaf Ebrahimi 		return emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG1, dst, dstw, 0, 0);
2372*22dc650dSSadaf Ebrahimi 	return SLJIT_SUCCESS;
2373*22dc650dSSadaf Ebrahimi }
2374*22dc650dSSadaf Ebrahimi 
sljit_emit_fop1_cmp(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2375*22dc650dSSadaf Ebrahimi static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op,
2376*22dc650dSSadaf Ebrahimi 	sljit_s32 src1, sljit_sw src1w,
2377*22dc650dSSadaf Ebrahimi 	sljit_s32 src2, sljit_sw src2w)
2378*22dc650dSSadaf Ebrahimi {
2379*22dc650dSSadaf Ebrahimi 	if (src1 & SLJIT_MEM) {
2380*22dc650dSSadaf Ebrahimi 		FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, src2, src2w));
2381*22dc650dSSadaf Ebrahimi 		src1 = TMP_FREG1;
2382*22dc650dSSadaf Ebrahimi 	}
2383*22dc650dSSadaf Ebrahimi 
2384*22dc650dSSadaf Ebrahimi 	if (src2 & SLJIT_MEM) {
2385*22dc650dSSadaf Ebrahimi 		FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, 0, 0));
2386*22dc650dSSadaf Ebrahimi 		src2 = TMP_FREG2;
2387*22dc650dSSadaf Ebrahimi 	}
2388*22dc650dSSadaf Ebrahimi 
2389*22dc650dSSadaf Ebrahimi 	FAIL_IF(push_inst(compiler, XOR | RD(OTHER_FLAG) | RJ(OTHER_FLAG) | RK(OTHER_FLAG)));
2390*22dc650dSSadaf Ebrahimi 
2391*22dc650dSSadaf Ebrahimi 	switch (GET_FLAG_TYPE(op)) {
2392*22dc650dSSadaf Ebrahimi 	case SLJIT_F_EQUAL:
2393*22dc650dSSadaf Ebrahimi 	case SLJIT_ORDERED_EQUAL:
2394*22dc650dSSadaf Ebrahimi 		FAIL_IF(push_inst(compiler, FINST(FCMP_COND, op) | COND_CEQ | FCD(F_OTHER_FLAG) | FRJ(src1) | FRK(src2)));
2395*22dc650dSSadaf Ebrahimi 		break;
2396*22dc650dSSadaf Ebrahimi 	case SLJIT_F_LESS:
2397*22dc650dSSadaf Ebrahimi 	case SLJIT_ORDERED_LESS:
2398*22dc650dSSadaf Ebrahimi 		FAIL_IF(push_inst(compiler, FINST(FCMP_COND, op) | COND_CLT | FCD(F_OTHER_FLAG) | FRJ(src1) | FRK(src2)));
2399*22dc650dSSadaf Ebrahimi 		break;
2400*22dc650dSSadaf Ebrahimi 	case SLJIT_F_GREATER:
2401*22dc650dSSadaf Ebrahimi 	case SLJIT_ORDERED_GREATER:
2402*22dc650dSSadaf Ebrahimi 		FAIL_IF(push_inst(compiler, FINST(FCMP_COND, op) | COND_CLT | FCD(F_OTHER_FLAG) | FRJ(src2) | FRK(src1)));
2403*22dc650dSSadaf Ebrahimi 		break;
2404*22dc650dSSadaf Ebrahimi 	case SLJIT_UNORDERED_OR_GREATER:
2405*22dc650dSSadaf Ebrahimi 		FAIL_IF(push_inst(compiler, FINST(FCMP_COND, op) | COND_CULT | FCD(F_OTHER_FLAG) | FRJ(src2) | FRK(src1)));
2406*22dc650dSSadaf Ebrahimi 		break;
2407*22dc650dSSadaf Ebrahimi 	case SLJIT_UNORDERED_OR_LESS:
2408*22dc650dSSadaf Ebrahimi 		FAIL_IF(push_inst(compiler, FINST(FCMP_COND, op) | COND_CULT | FCD(F_OTHER_FLAG) | FRJ(src1) | FRK(src2)));
2409*22dc650dSSadaf Ebrahimi 		break;
2410*22dc650dSSadaf Ebrahimi 	case SLJIT_UNORDERED_OR_EQUAL:
2411*22dc650dSSadaf Ebrahimi 		FAIL_IF(push_inst(compiler, FINST(FCMP_COND, op) | COND_CUEQ | FCD(F_OTHER_FLAG) | FRJ(src1) | FRK(src2)));
2412*22dc650dSSadaf Ebrahimi 		break;
2413*22dc650dSSadaf Ebrahimi 	default: /* SLJIT_UNORDERED */
2414*22dc650dSSadaf Ebrahimi 		FAIL_IF(push_inst(compiler, FINST(FCMP_COND, op) | COND_CUN | FCD(F_OTHER_FLAG) | FRJ(src1) | FRK(src2)));
2415*22dc650dSSadaf Ebrahimi 	}
2416*22dc650dSSadaf Ebrahimi 	return push_inst(compiler, MOVCF2GR | RD(OTHER_FLAG) | FCJ(F_OTHER_FLAG));
2417*22dc650dSSadaf Ebrahimi }
2418*22dc650dSSadaf Ebrahimi 
sljit_emit_fop1(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)2419*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op,
2420*22dc650dSSadaf Ebrahimi 	sljit_s32 dst, sljit_sw dstw,
2421*22dc650dSSadaf Ebrahimi 	sljit_s32 src, sljit_sw srcw)
2422*22dc650dSSadaf Ebrahimi {
2423*22dc650dSSadaf Ebrahimi 	sljit_s32 dst_r;
2424*22dc650dSSadaf Ebrahimi 
2425*22dc650dSSadaf Ebrahimi 	CHECK_ERROR();
2426*22dc650dSSadaf Ebrahimi 	compiler->cache_arg = 0;
2427*22dc650dSSadaf Ebrahimi 	compiler->cache_argw = 0;
2428*22dc650dSSadaf Ebrahimi 
2429*22dc650dSSadaf Ebrahimi 	SLJIT_COMPILE_ASSERT((SLJIT_32 == 0x100) && !(DOUBLE_DATA & 0x2), float_transfer_bit_error);
2430*22dc650dSSadaf Ebrahimi 	SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw);
2431*22dc650dSSadaf Ebrahimi 
2432*22dc650dSSadaf Ebrahimi 	if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_F32)
2433*22dc650dSSadaf Ebrahimi 		op ^= SLJIT_32;
2434*22dc650dSSadaf Ebrahimi 
2435*22dc650dSSadaf Ebrahimi 	dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
2436*22dc650dSSadaf Ebrahimi 
2437*22dc650dSSadaf Ebrahimi 	if (src & SLJIT_MEM) {
2438*22dc650dSSadaf Ebrahimi 		FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, dst_r, src, srcw, dst, dstw));
2439*22dc650dSSadaf Ebrahimi 		src = dst_r;
2440*22dc650dSSadaf Ebrahimi 	}
2441*22dc650dSSadaf Ebrahimi 
2442*22dc650dSSadaf Ebrahimi 	switch (GET_OPCODE(op)) {
2443*22dc650dSSadaf Ebrahimi 	case SLJIT_MOV_F64:
2444*22dc650dSSadaf Ebrahimi 		if (src != dst_r) {
2445*22dc650dSSadaf Ebrahimi 			if (!(dst & SLJIT_MEM))
2446*22dc650dSSadaf Ebrahimi 				FAIL_IF(push_inst(compiler, FINST(FMOV, op) | FRD(dst_r) | FRJ(src)));
2447*22dc650dSSadaf Ebrahimi 			else
2448*22dc650dSSadaf Ebrahimi 				dst_r = src;
2449*22dc650dSSadaf Ebrahimi 		}
2450*22dc650dSSadaf Ebrahimi 		break;
2451*22dc650dSSadaf Ebrahimi 	case SLJIT_NEG_F64:
2452*22dc650dSSadaf Ebrahimi 		FAIL_IF(push_inst(compiler, FINST(FNEG, op) | FRD(dst_r) | FRJ(src)));
2453*22dc650dSSadaf Ebrahimi 		break;
2454*22dc650dSSadaf Ebrahimi 	case SLJIT_ABS_F64:
2455*22dc650dSSadaf Ebrahimi 		FAIL_IF(push_inst(compiler, FINST(FABS, op) | FRD(dst_r) | FRJ(src)));
2456*22dc650dSSadaf Ebrahimi 		break;
2457*22dc650dSSadaf Ebrahimi 	case SLJIT_CONV_F64_FROM_F32:
2458*22dc650dSSadaf Ebrahimi 		/* The SLJIT_32 bit is inverted because sljit_f32 needs to be loaded from the memory. */
2459*22dc650dSSadaf Ebrahimi 		FAIL_IF(push_inst(compiler, ((op & SLJIT_32) ? FCVT_D_S : FCVT_S_D) | FRD(dst_r) | FRJ(src)));
2460*22dc650dSSadaf Ebrahimi 		op ^= SLJIT_32;
2461*22dc650dSSadaf Ebrahimi 		break;
2462*22dc650dSSadaf Ebrahimi 	}
2463*22dc650dSSadaf Ebrahimi 
2464*22dc650dSSadaf Ebrahimi 	if (dst & SLJIT_MEM)
2465*22dc650dSSadaf Ebrahimi 		return emit_op_mem2(compiler, FLOAT_DATA(op), dst_r, dst, dstw, 0, 0);
2466*22dc650dSSadaf Ebrahimi 	return SLJIT_SUCCESS;
2467*22dc650dSSadaf Ebrahimi }
2468*22dc650dSSadaf Ebrahimi 
sljit_emit_fop2(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2469*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op,
2470*22dc650dSSadaf Ebrahimi 	sljit_s32 dst, sljit_sw dstw,
2471*22dc650dSSadaf Ebrahimi 	sljit_s32 src1, sljit_sw src1w,
2472*22dc650dSSadaf Ebrahimi 	sljit_s32 src2, sljit_sw src2w)
2473*22dc650dSSadaf Ebrahimi {
2474*22dc650dSSadaf Ebrahimi 	sljit_s32 dst_r, flags = 0;
2475*22dc650dSSadaf Ebrahimi 
2476*22dc650dSSadaf Ebrahimi 	CHECK_ERROR();
2477*22dc650dSSadaf Ebrahimi 	CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
2478*22dc650dSSadaf Ebrahimi 	ADJUST_LOCAL_OFFSET(dst, dstw);
2479*22dc650dSSadaf Ebrahimi 	ADJUST_LOCAL_OFFSET(src1, src1w);
2480*22dc650dSSadaf Ebrahimi 	ADJUST_LOCAL_OFFSET(src2, src2w);
2481*22dc650dSSadaf Ebrahimi 
2482*22dc650dSSadaf Ebrahimi 	compiler->cache_arg = 0;
2483*22dc650dSSadaf Ebrahimi 	compiler->cache_argw = 0;
2484*22dc650dSSadaf Ebrahimi 
2485*22dc650dSSadaf Ebrahimi 	dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG2;
2486*22dc650dSSadaf Ebrahimi 
2487*22dc650dSSadaf Ebrahimi 	if (src1 & SLJIT_MEM) {
2488*22dc650dSSadaf Ebrahimi 		if (getput_arg_fast(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w)) {
2489*22dc650dSSadaf Ebrahimi 			FAIL_IF(compiler->error);
2490*22dc650dSSadaf Ebrahimi 			src1 = TMP_FREG1;
2491*22dc650dSSadaf Ebrahimi 		} else
2492*22dc650dSSadaf Ebrahimi 			flags |= SLOW_SRC1;
2493*22dc650dSSadaf Ebrahimi 	}
2494*22dc650dSSadaf Ebrahimi 
2495*22dc650dSSadaf Ebrahimi 	if (src2 & SLJIT_MEM) {
2496*22dc650dSSadaf Ebrahimi 		if (getput_arg_fast(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w)) {
2497*22dc650dSSadaf Ebrahimi 			FAIL_IF(compiler->error);
2498*22dc650dSSadaf Ebrahimi 			src2 = TMP_FREG2;
2499*22dc650dSSadaf Ebrahimi 		} else
2500*22dc650dSSadaf Ebrahimi 			flags |= SLOW_SRC2;
2501*22dc650dSSadaf Ebrahimi 	}
2502*22dc650dSSadaf Ebrahimi 
2503*22dc650dSSadaf Ebrahimi 	if ((flags & (SLOW_SRC1 | SLOW_SRC2)) == (SLOW_SRC1 | SLOW_SRC2)) {
2504*22dc650dSSadaf Ebrahimi 		if ((dst & SLJIT_MEM) && !can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) {
2505*22dc650dSSadaf Ebrahimi 			FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, src1, src1w));
2506*22dc650dSSadaf Ebrahimi 			FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, dst, dstw));
2507*22dc650dSSadaf Ebrahimi 		} else {
2508*22dc650dSSadaf Ebrahimi 			FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, src2, src2w));
2509*22dc650dSSadaf Ebrahimi 			FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, dst, dstw));
2510*22dc650dSSadaf Ebrahimi 		}
2511*22dc650dSSadaf Ebrahimi 	}
2512*22dc650dSSadaf Ebrahimi 	else if (flags & SLOW_SRC1)
2513*22dc650dSSadaf Ebrahimi 		FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, dst, dstw));
2514*22dc650dSSadaf Ebrahimi 	else if (flags & SLOW_SRC2)
2515*22dc650dSSadaf Ebrahimi 		FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, dst, dstw));
2516*22dc650dSSadaf Ebrahimi 
2517*22dc650dSSadaf Ebrahimi 	if (flags & SLOW_SRC1)
2518*22dc650dSSadaf Ebrahimi 		src1 = TMP_FREG1;
2519*22dc650dSSadaf Ebrahimi 	if (flags & SLOW_SRC2)
2520*22dc650dSSadaf Ebrahimi 		src2 = TMP_FREG2;
2521*22dc650dSSadaf Ebrahimi 
2522*22dc650dSSadaf Ebrahimi 	switch (GET_OPCODE(op)) {
2523*22dc650dSSadaf Ebrahimi 	case SLJIT_ADD_F64:
2524*22dc650dSSadaf Ebrahimi 		FAIL_IF(push_inst(compiler, FINST(FADD, op) | FRD(dst_r) | FRJ(src1) | FRK(src2)));
2525*22dc650dSSadaf Ebrahimi 		break;
2526*22dc650dSSadaf Ebrahimi 	case SLJIT_SUB_F64:
2527*22dc650dSSadaf Ebrahimi 		FAIL_IF(push_inst(compiler, FINST(FSUB, op) | FRD(dst_r) | FRJ(src1) | FRK(src2)));
2528*22dc650dSSadaf Ebrahimi 		break;
2529*22dc650dSSadaf Ebrahimi 	case SLJIT_MUL_F64:
2530*22dc650dSSadaf Ebrahimi 		FAIL_IF(push_inst(compiler, FINST(FMUL, op) | FRD(dst_r) | FRJ(src1) | FRK(src2)));
2531*22dc650dSSadaf Ebrahimi 		break;
2532*22dc650dSSadaf Ebrahimi 	case SLJIT_DIV_F64:
2533*22dc650dSSadaf Ebrahimi 		FAIL_IF(push_inst(compiler, FINST(FDIV, op) | FRD(dst_r) | FRJ(src1) | FRK(src2)));
2534*22dc650dSSadaf Ebrahimi 		break;
2535*22dc650dSSadaf Ebrahimi 	}
2536*22dc650dSSadaf Ebrahimi 
2537*22dc650dSSadaf Ebrahimi 	if (dst_r != dst)
2538*22dc650dSSadaf Ebrahimi 		FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG2, dst, dstw, 0, 0));
2539*22dc650dSSadaf Ebrahimi 	return SLJIT_SUCCESS;
2540*22dc650dSSadaf Ebrahimi }
2541*22dc650dSSadaf Ebrahimi 
sljit_emit_fop2r(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst_freg,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2542*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2r(struct sljit_compiler *compiler, sljit_s32 op,
2543*22dc650dSSadaf Ebrahimi 	sljit_s32 dst_freg,
2544*22dc650dSSadaf Ebrahimi 	sljit_s32 src1, sljit_sw src1w,
2545*22dc650dSSadaf Ebrahimi 	sljit_s32 src2, sljit_sw src2w)
2546*22dc650dSSadaf Ebrahimi {
2547*22dc650dSSadaf Ebrahimi 	sljit_s32 reg;
2548*22dc650dSSadaf Ebrahimi 
2549*22dc650dSSadaf Ebrahimi 	CHECK_ERROR();
2550*22dc650dSSadaf Ebrahimi 	CHECK(check_sljit_emit_fop2r(compiler, op, dst_freg, src1, src1w, src2, src2w));
2551*22dc650dSSadaf Ebrahimi 	ADJUST_LOCAL_OFFSET(src1, src1w);
2552*22dc650dSSadaf Ebrahimi 	ADJUST_LOCAL_OFFSET(src2, src2w);
2553*22dc650dSSadaf Ebrahimi 
2554*22dc650dSSadaf Ebrahimi 	if (src2 & SLJIT_MEM) {
2555*22dc650dSSadaf Ebrahimi 		FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src2, src2w, 0, 0));
2556*22dc650dSSadaf Ebrahimi 		src2 = TMP_FREG1;
2557*22dc650dSSadaf Ebrahimi 	}
2558*22dc650dSSadaf Ebrahimi 
2559*22dc650dSSadaf Ebrahimi 	if (src1 & SLJIT_MEM) {
2560*22dc650dSSadaf Ebrahimi 		reg = (dst_freg == src2) ? TMP_FREG1 : dst_freg;
2561*22dc650dSSadaf Ebrahimi 		FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, reg, src1, src1w, 0, 0));
2562*22dc650dSSadaf Ebrahimi 		src1 = reg;
2563*22dc650dSSadaf Ebrahimi 	}
2564*22dc650dSSadaf Ebrahimi 
2565*22dc650dSSadaf Ebrahimi 	return push_inst(compiler, FINST(FCOPYSIGN, op) | FRD(dst_freg) | FRJ(src1) | FRK(src2));
2566*22dc650dSSadaf Ebrahimi }
2567*22dc650dSSadaf Ebrahimi 
sljit_emit_fset32(struct sljit_compiler * compiler,sljit_s32 freg,sljit_f32 value)2568*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset32(struct sljit_compiler *compiler,
2569*22dc650dSSadaf Ebrahimi 	sljit_s32 freg, sljit_f32 value)
2570*22dc650dSSadaf Ebrahimi {
2571*22dc650dSSadaf Ebrahimi 	union {
2572*22dc650dSSadaf Ebrahimi 		sljit_s32 imm;
2573*22dc650dSSadaf Ebrahimi 		sljit_f32 value;
2574*22dc650dSSadaf Ebrahimi 	} u;
2575*22dc650dSSadaf Ebrahimi 
2576*22dc650dSSadaf Ebrahimi 	CHECK_ERROR();
2577*22dc650dSSadaf Ebrahimi 	CHECK(check_sljit_emit_fset32(compiler, freg, value));
2578*22dc650dSSadaf Ebrahimi 
2579*22dc650dSSadaf Ebrahimi 	u.value = value;
2580*22dc650dSSadaf Ebrahimi 
2581*22dc650dSSadaf Ebrahimi 	if (u.imm == 0)
2582*22dc650dSSadaf Ebrahimi 		return push_inst(compiler, MOVGR2FR_W | RJ(TMP_ZERO) | FRD(freg));
2583*22dc650dSSadaf Ebrahimi 
2584*22dc650dSSadaf Ebrahimi 	FAIL_IF(load_immediate(compiler, TMP_REG1, u.imm));
2585*22dc650dSSadaf Ebrahimi 	return push_inst(compiler, MOVGR2FR_W | RJ(TMP_REG1) | FRD(freg));
2586*22dc650dSSadaf Ebrahimi }
2587*22dc650dSSadaf Ebrahimi 
sljit_emit_fset64(struct sljit_compiler * compiler,sljit_s32 freg,sljit_f64 value)2588*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset64(struct sljit_compiler *compiler,
2589*22dc650dSSadaf Ebrahimi 	sljit_s32 freg, sljit_f64 value)
2590*22dc650dSSadaf Ebrahimi {
2591*22dc650dSSadaf Ebrahimi 	union {
2592*22dc650dSSadaf Ebrahimi 		sljit_sw imm;
2593*22dc650dSSadaf Ebrahimi 		sljit_f64 value;
2594*22dc650dSSadaf Ebrahimi 	} u;
2595*22dc650dSSadaf Ebrahimi 
2596*22dc650dSSadaf Ebrahimi 	CHECK_ERROR();
2597*22dc650dSSadaf Ebrahimi 	CHECK(check_sljit_emit_fset64(compiler, freg, value));
2598*22dc650dSSadaf Ebrahimi 
2599*22dc650dSSadaf Ebrahimi 	u.value = value;
2600*22dc650dSSadaf Ebrahimi 
2601*22dc650dSSadaf Ebrahimi 	if (u.imm == 0)
2602*22dc650dSSadaf Ebrahimi 		return push_inst(compiler, MOVGR2FR_D | RJ(TMP_ZERO) | FRD(freg));
2603*22dc650dSSadaf Ebrahimi 
2604*22dc650dSSadaf Ebrahimi 	FAIL_IF(load_immediate(compiler, TMP_REG1, u.imm));
2605*22dc650dSSadaf Ebrahimi 	return push_inst(compiler, MOVGR2FR_D | RJ(TMP_REG1) | FRD(freg));
2606*22dc650dSSadaf Ebrahimi }
2607*22dc650dSSadaf Ebrahimi 
sljit_emit_fcopy(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 freg,sljit_s32 reg)2608*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fcopy(struct sljit_compiler *compiler, sljit_s32 op,
2609*22dc650dSSadaf Ebrahimi 	sljit_s32 freg, sljit_s32 reg)
2610*22dc650dSSadaf Ebrahimi {
2611*22dc650dSSadaf Ebrahimi 	sljit_ins inst;
2612*22dc650dSSadaf Ebrahimi 
2613*22dc650dSSadaf Ebrahimi 	CHECK_ERROR();
2614*22dc650dSSadaf Ebrahimi 	CHECK(check_sljit_emit_fcopy(compiler, op, freg, reg));
2615*22dc650dSSadaf Ebrahimi 
2616*22dc650dSSadaf Ebrahimi 	if (GET_OPCODE(op) == SLJIT_COPY_TO_F64)
2617*22dc650dSSadaf Ebrahimi 		inst = ((op & SLJIT_32) ? MOVGR2FR_W : MOVGR2FR_D) | FRD(freg) | RJ(reg);
2618*22dc650dSSadaf Ebrahimi 	else
2619*22dc650dSSadaf Ebrahimi 		inst = ((op & SLJIT_32) ? MOVFR2GR_S : MOVFR2GR_D) | RD(reg) | FRJ(freg);
2620*22dc650dSSadaf Ebrahimi 	return push_inst(compiler, inst);
2621*22dc650dSSadaf Ebrahimi }
2622*22dc650dSSadaf Ebrahimi 
2623*22dc650dSSadaf Ebrahimi /* --------------------------------------------------------------------- */
2624*22dc650dSSadaf Ebrahimi /*  Conditional instructions                                             */
2625*22dc650dSSadaf Ebrahimi /* --------------------------------------------------------------------- */
2626*22dc650dSSadaf Ebrahimi 
sljit_emit_label(struct sljit_compiler * compiler)2627*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)
2628*22dc650dSSadaf Ebrahimi {
2629*22dc650dSSadaf Ebrahimi 	struct sljit_label *label;
2630*22dc650dSSadaf Ebrahimi 
2631*22dc650dSSadaf Ebrahimi 	CHECK_ERROR_PTR();
2632*22dc650dSSadaf Ebrahimi 	CHECK_PTR(check_sljit_emit_label(compiler));
2633*22dc650dSSadaf Ebrahimi 
2634*22dc650dSSadaf Ebrahimi 	if (compiler->last_label && compiler->last_label->size == compiler->size)
2635*22dc650dSSadaf Ebrahimi 		return compiler->last_label;
2636*22dc650dSSadaf Ebrahimi 
2637*22dc650dSSadaf Ebrahimi 	label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));
2638*22dc650dSSadaf Ebrahimi 	PTR_FAIL_IF(!label);
2639*22dc650dSSadaf Ebrahimi 	set_label(label, compiler);
2640*22dc650dSSadaf Ebrahimi 	return label;
2641*22dc650dSSadaf Ebrahimi }
2642*22dc650dSSadaf Ebrahimi 
get_jump_instruction(sljit_s32 type)2643*22dc650dSSadaf Ebrahimi static sljit_ins get_jump_instruction(sljit_s32 type)
2644*22dc650dSSadaf Ebrahimi {
2645*22dc650dSSadaf Ebrahimi 	switch (type) {
2646*22dc650dSSadaf Ebrahimi 	case SLJIT_EQUAL:
2647*22dc650dSSadaf Ebrahimi 	case SLJIT_ATOMIC_NOT_STORED:
2648*22dc650dSSadaf Ebrahimi 		return BNE | RJ(EQUAL_FLAG) | RD(TMP_ZERO);
2649*22dc650dSSadaf Ebrahimi 	case SLJIT_NOT_EQUAL:
2650*22dc650dSSadaf Ebrahimi 	case SLJIT_ATOMIC_STORED:
2651*22dc650dSSadaf Ebrahimi 		return BEQ | RJ(EQUAL_FLAG) | RD(TMP_ZERO);
2652*22dc650dSSadaf Ebrahimi 	case SLJIT_LESS:
2653*22dc650dSSadaf Ebrahimi 	case SLJIT_GREATER:
2654*22dc650dSSadaf Ebrahimi 	case SLJIT_SIG_LESS:
2655*22dc650dSSadaf Ebrahimi 	case SLJIT_SIG_GREATER:
2656*22dc650dSSadaf Ebrahimi 	case SLJIT_OVERFLOW:
2657*22dc650dSSadaf Ebrahimi 	case SLJIT_CARRY:
2658*22dc650dSSadaf Ebrahimi 		return BEQ | RJ(OTHER_FLAG) | RD(TMP_ZERO);
2659*22dc650dSSadaf Ebrahimi 	case SLJIT_GREATER_EQUAL:
2660*22dc650dSSadaf Ebrahimi 	case SLJIT_LESS_EQUAL:
2661*22dc650dSSadaf Ebrahimi 	case SLJIT_SIG_GREATER_EQUAL:
2662*22dc650dSSadaf Ebrahimi 	case SLJIT_SIG_LESS_EQUAL:
2663*22dc650dSSadaf Ebrahimi 	case SLJIT_NOT_OVERFLOW:
2664*22dc650dSSadaf Ebrahimi 	case SLJIT_NOT_CARRY:
2665*22dc650dSSadaf Ebrahimi 		return BNE | RJ(OTHER_FLAG) | RD(TMP_ZERO);
2666*22dc650dSSadaf Ebrahimi 	case SLJIT_F_EQUAL:
2667*22dc650dSSadaf Ebrahimi 	case SLJIT_ORDERED_EQUAL:
2668*22dc650dSSadaf Ebrahimi 	case SLJIT_F_LESS:
2669*22dc650dSSadaf Ebrahimi 	case SLJIT_ORDERED_LESS:
2670*22dc650dSSadaf Ebrahimi 	case SLJIT_ORDERED_GREATER:
2671*22dc650dSSadaf Ebrahimi 	case SLJIT_UNORDERED_OR_GREATER:
2672*22dc650dSSadaf Ebrahimi 	case SLJIT_F_GREATER:
2673*22dc650dSSadaf Ebrahimi 	case SLJIT_UNORDERED_OR_LESS:
2674*22dc650dSSadaf Ebrahimi 	case SLJIT_UNORDERED_OR_EQUAL:
2675*22dc650dSSadaf Ebrahimi 	case SLJIT_UNORDERED:
2676*22dc650dSSadaf Ebrahimi 		return BEQ | RJ(OTHER_FLAG) | RD(TMP_ZERO);
2677*22dc650dSSadaf Ebrahimi 	case SLJIT_ORDERED_NOT_EQUAL:
2678*22dc650dSSadaf Ebrahimi 	case SLJIT_ORDERED_LESS_EQUAL:
2679*22dc650dSSadaf Ebrahimi 	case SLJIT_ORDERED_GREATER_EQUAL:
2680*22dc650dSSadaf Ebrahimi 	case SLJIT_F_NOT_EQUAL:
2681*22dc650dSSadaf Ebrahimi 	case SLJIT_UNORDERED_OR_NOT_EQUAL:
2682*22dc650dSSadaf Ebrahimi 	case SLJIT_UNORDERED_OR_GREATER_EQUAL:
2683*22dc650dSSadaf Ebrahimi 	case SLJIT_UNORDERED_OR_LESS_EQUAL:
2684*22dc650dSSadaf Ebrahimi 	case SLJIT_F_LESS_EQUAL:
2685*22dc650dSSadaf Ebrahimi 	case SLJIT_F_GREATER_EQUAL:
2686*22dc650dSSadaf Ebrahimi 	case SLJIT_ORDERED:
2687*22dc650dSSadaf Ebrahimi 		return BNE | RJ(OTHER_FLAG) | RD(TMP_ZERO);
2688*22dc650dSSadaf Ebrahimi 	default:
2689*22dc650dSSadaf Ebrahimi 		/* Not conditional branch. */
2690*22dc650dSSadaf Ebrahimi 		return 0;
2691*22dc650dSSadaf Ebrahimi 	}
2692*22dc650dSSadaf Ebrahimi }
2693*22dc650dSSadaf Ebrahimi 
sljit_emit_jump(struct sljit_compiler * compiler,sljit_s32 type)2694*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type)
2695*22dc650dSSadaf Ebrahimi {
2696*22dc650dSSadaf Ebrahimi 	struct sljit_jump *jump;
2697*22dc650dSSadaf Ebrahimi 	sljit_ins inst;
2698*22dc650dSSadaf Ebrahimi 
2699*22dc650dSSadaf Ebrahimi 	CHECK_ERROR_PTR();
2700*22dc650dSSadaf Ebrahimi 	CHECK_PTR(check_sljit_emit_jump(compiler, type));
2701*22dc650dSSadaf Ebrahimi 
2702*22dc650dSSadaf Ebrahimi 	jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
2703*22dc650dSSadaf Ebrahimi 	PTR_FAIL_IF(!jump);
2704*22dc650dSSadaf Ebrahimi 	set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
2705*22dc650dSSadaf Ebrahimi 	type &= 0xff;
2706*22dc650dSSadaf Ebrahimi 
2707*22dc650dSSadaf Ebrahimi 	inst = get_jump_instruction(type);
2708*22dc650dSSadaf Ebrahimi 
2709*22dc650dSSadaf Ebrahimi 	if (inst != 0) {
2710*22dc650dSSadaf Ebrahimi 		PTR_FAIL_IF(push_inst(compiler, inst));
2711*22dc650dSSadaf Ebrahimi 		jump->flags |= IS_COND;
2712*22dc650dSSadaf Ebrahimi 	}
2713*22dc650dSSadaf Ebrahimi 
2714*22dc650dSSadaf Ebrahimi 	jump->addr = compiler->size;
2715*22dc650dSSadaf Ebrahimi 	inst = JIRL | RJ(TMP_REG1) | IMM_I16(0);
2716*22dc650dSSadaf Ebrahimi 
2717*22dc650dSSadaf Ebrahimi 	if (type >= SLJIT_FAST_CALL) {
2718*22dc650dSSadaf Ebrahimi 		jump->flags |= IS_CALL;
2719*22dc650dSSadaf Ebrahimi 		inst |= RD(RETURN_ADDR_REG);
2720*22dc650dSSadaf Ebrahimi 	}
2721*22dc650dSSadaf Ebrahimi 
2722*22dc650dSSadaf Ebrahimi 	PTR_FAIL_IF(push_inst(compiler, inst));
2723*22dc650dSSadaf Ebrahimi 
2724*22dc650dSSadaf Ebrahimi 	/* Maximum number of instructions required for generating a constant. */
2725*22dc650dSSadaf Ebrahimi 	compiler->size += JUMP_MAX_SIZE - 1;
2726*22dc650dSSadaf Ebrahimi 	return jump;
2727*22dc650dSSadaf Ebrahimi }
2728*22dc650dSSadaf Ebrahimi 
sljit_emit_call(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 arg_types)2729*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type,
2730*22dc650dSSadaf Ebrahimi 	sljit_s32 arg_types)
2731*22dc650dSSadaf Ebrahimi {
2732*22dc650dSSadaf Ebrahimi 	SLJIT_UNUSED_ARG(arg_types);
2733*22dc650dSSadaf Ebrahimi 	CHECK_ERROR_PTR();
2734*22dc650dSSadaf Ebrahimi 	CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types));
2735*22dc650dSSadaf Ebrahimi 
2736*22dc650dSSadaf Ebrahimi 	if (type & SLJIT_CALL_RETURN) {
2737*22dc650dSSadaf Ebrahimi 		PTR_FAIL_IF(emit_stack_frame_release(compiler, 0));
2738*22dc650dSSadaf Ebrahimi 		type = SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP);
2739*22dc650dSSadaf Ebrahimi 	}
2740*22dc650dSSadaf Ebrahimi 
2741*22dc650dSSadaf Ebrahimi 	SLJIT_SKIP_CHECKS(compiler);
2742*22dc650dSSadaf Ebrahimi 	return sljit_emit_jump(compiler, type);
2743*22dc650dSSadaf Ebrahimi }
2744*22dc650dSSadaf Ebrahimi 
sljit_emit_cmp(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2745*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler *compiler, sljit_s32 type,
2746*22dc650dSSadaf Ebrahimi 	sljit_s32 src1, sljit_sw src1w,
2747*22dc650dSSadaf Ebrahimi 	sljit_s32 src2, sljit_sw src2w)
2748*22dc650dSSadaf Ebrahimi {
2749*22dc650dSSadaf Ebrahimi 	struct sljit_jump *jump;
2750*22dc650dSSadaf Ebrahimi 	sljit_s32 flags;
2751*22dc650dSSadaf Ebrahimi 	sljit_ins inst;
2752*22dc650dSSadaf Ebrahimi 	sljit_s32 src2_tmp_reg = FAST_IS_REG(src1) ? TMP_REG1 : TMP_REG2;
2753*22dc650dSSadaf Ebrahimi 
2754*22dc650dSSadaf Ebrahimi 	CHECK_ERROR_PTR();
2755*22dc650dSSadaf Ebrahimi 	CHECK_PTR(check_sljit_emit_cmp(compiler, type, src1, src1w, src2, src2w));
2756*22dc650dSSadaf Ebrahimi 	ADJUST_LOCAL_OFFSET(src1, src1w);
2757*22dc650dSSadaf Ebrahimi 	ADJUST_LOCAL_OFFSET(src2, src2w);
2758*22dc650dSSadaf Ebrahimi 
2759*22dc650dSSadaf Ebrahimi 	compiler->cache_arg = 0;
2760*22dc650dSSadaf Ebrahimi 	compiler->cache_argw = 0;
2761*22dc650dSSadaf Ebrahimi 
2762*22dc650dSSadaf Ebrahimi 	flags = ((type & SLJIT_32) ? INT_DATA : WORD_DATA) | LOAD_DATA;
2763*22dc650dSSadaf Ebrahimi 
2764*22dc650dSSadaf Ebrahimi 	if (src1 & SLJIT_MEM) {
2765*22dc650dSSadaf Ebrahimi 		PTR_FAIL_IF(emit_op_mem2(compiler, flags, TMP_REG1, src1, src1w, src2, src2w));
2766*22dc650dSSadaf Ebrahimi 		src1 = TMP_REG1;
2767*22dc650dSSadaf Ebrahimi 	}
2768*22dc650dSSadaf Ebrahimi 
2769*22dc650dSSadaf Ebrahimi 	if (src2 & SLJIT_MEM) {
2770*22dc650dSSadaf Ebrahimi 		PTR_FAIL_IF(emit_op_mem2(compiler, flags, src2_tmp_reg, src2, src2w, 0, 0));
2771*22dc650dSSadaf Ebrahimi 		src2 = src2_tmp_reg;
2772*22dc650dSSadaf Ebrahimi 	}
2773*22dc650dSSadaf Ebrahimi 
2774*22dc650dSSadaf Ebrahimi 	if (src1 == SLJIT_IMM) {
2775*22dc650dSSadaf Ebrahimi 		if (src1w != 0) {
2776*22dc650dSSadaf Ebrahimi 			PTR_FAIL_IF(load_immediate(compiler, TMP_REG1, src1w));
2777*22dc650dSSadaf Ebrahimi 			src1 = TMP_REG1;
2778*22dc650dSSadaf Ebrahimi 		}
2779*22dc650dSSadaf Ebrahimi 		else
2780*22dc650dSSadaf Ebrahimi 			src1 = TMP_ZERO;
2781*22dc650dSSadaf Ebrahimi 	}
2782*22dc650dSSadaf Ebrahimi 
2783*22dc650dSSadaf Ebrahimi 	if (src2 == SLJIT_IMM) {
2784*22dc650dSSadaf Ebrahimi 		if (src2w != 0) {
2785*22dc650dSSadaf Ebrahimi 			PTR_FAIL_IF(load_immediate(compiler, src2_tmp_reg, src2w));
2786*22dc650dSSadaf Ebrahimi 			src2 = src2_tmp_reg;
2787*22dc650dSSadaf Ebrahimi 		}
2788*22dc650dSSadaf Ebrahimi 		else
2789*22dc650dSSadaf Ebrahimi 			src2 = TMP_ZERO;
2790*22dc650dSSadaf Ebrahimi 	}
2791*22dc650dSSadaf Ebrahimi 
2792*22dc650dSSadaf Ebrahimi 	jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
2793*22dc650dSSadaf Ebrahimi 	PTR_FAIL_IF(!jump);
2794*22dc650dSSadaf Ebrahimi 	set_jump(jump, compiler, (sljit_u32)((type & SLJIT_REWRITABLE_JUMP) | IS_COND));
2795*22dc650dSSadaf Ebrahimi 	type &= 0xff;
2796*22dc650dSSadaf Ebrahimi 
2797*22dc650dSSadaf Ebrahimi 	switch (type) {
2798*22dc650dSSadaf Ebrahimi 	case SLJIT_EQUAL:
2799*22dc650dSSadaf Ebrahimi 		inst = BNE | RJ(src1) | RD(src2);
2800*22dc650dSSadaf Ebrahimi 		break;
2801*22dc650dSSadaf Ebrahimi 	case SLJIT_NOT_EQUAL:
2802*22dc650dSSadaf Ebrahimi 		inst = BEQ | RJ(src1) | RD(src2);
2803*22dc650dSSadaf Ebrahimi 		break;
2804*22dc650dSSadaf Ebrahimi 	case SLJIT_LESS:
2805*22dc650dSSadaf Ebrahimi 		inst = BGEU | RJ(src1) | RD(src2);
2806*22dc650dSSadaf Ebrahimi 		break;
2807*22dc650dSSadaf Ebrahimi 	case SLJIT_GREATER_EQUAL:
2808*22dc650dSSadaf Ebrahimi 		inst = BLTU | RJ(src1) | RD(src2);
2809*22dc650dSSadaf Ebrahimi 		break;
2810*22dc650dSSadaf Ebrahimi 	case SLJIT_GREATER:
2811*22dc650dSSadaf Ebrahimi 		inst = BGEU | RJ(src2) | RD(src1);
2812*22dc650dSSadaf Ebrahimi 		break;
2813*22dc650dSSadaf Ebrahimi 	case SLJIT_LESS_EQUAL:
2814*22dc650dSSadaf Ebrahimi 		inst = BLTU | RJ(src2) | RD(src1);
2815*22dc650dSSadaf Ebrahimi 		break;
2816*22dc650dSSadaf Ebrahimi 	case SLJIT_SIG_LESS:
2817*22dc650dSSadaf Ebrahimi 		inst = BGE | RJ(src1) | RD(src2);
2818*22dc650dSSadaf Ebrahimi 		break;
2819*22dc650dSSadaf Ebrahimi 	case SLJIT_SIG_GREATER_EQUAL:
2820*22dc650dSSadaf Ebrahimi 		inst = BLT | RJ(src1) | RD(src2);
2821*22dc650dSSadaf Ebrahimi 		break;
2822*22dc650dSSadaf Ebrahimi 	case SLJIT_SIG_GREATER:
2823*22dc650dSSadaf Ebrahimi 		inst = BGE | RJ(src2) | RD(src1);
2824*22dc650dSSadaf Ebrahimi 		break;
2825*22dc650dSSadaf Ebrahimi 	case SLJIT_SIG_LESS_EQUAL:
2826*22dc650dSSadaf Ebrahimi 		inst = BLT | RJ(src2) | RD(src1);
2827*22dc650dSSadaf Ebrahimi 		break;
2828*22dc650dSSadaf Ebrahimi 	default:
2829*22dc650dSSadaf Ebrahimi 		inst = BREAK;
2830*22dc650dSSadaf Ebrahimi 		SLJIT_UNREACHABLE();
2831*22dc650dSSadaf Ebrahimi 	}
2832*22dc650dSSadaf Ebrahimi 
2833*22dc650dSSadaf Ebrahimi 	PTR_FAIL_IF(push_inst(compiler, inst));
2834*22dc650dSSadaf Ebrahimi 
2835*22dc650dSSadaf Ebrahimi 	jump->addr = compiler->size;
2836*22dc650dSSadaf Ebrahimi 	PTR_FAIL_IF(push_inst(compiler, JIRL | RD(TMP_ZERO) | RJ(TMP_REG1) | IMM_I12(0)));
2837*22dc650dSSadaf Ebrahimi 
2838*22dc650dSSadaf Ebrahimi 	/* Maximum number of instructions required for generating a constant. */
2839*22dc650dSSadaf Ebrahimi 	compiler->size += JUMP_MAX_SIZE - 1;
2840*22dc650dSSadaf Ebrahimi 
2841*22dc650dSSadaf Ebrahimi 	return jump;
2842*22dc650dSSadaf Ebrahimi }
2843*22dc650dSSadaf Ebrahimi 
sljit_emit_ijump(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 src,sljit_sw srcw)2844*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw)
2845*22dc650dSSadaf Ebrahimi {
2846*22dc650dSSadaf Ebrahimi 	struct sljit_jump *jump;
2847*22dc650dSSadaf Ebrahimi 
2848*22dc650dSSadaf Ebrahimi 	CHECK_ERROR();
2849*22dc650dSSadaf Ebrahimi 	CHECK(check_sljit_emit_ijump(compiler, type, src, srcw));
2850*22dc650dSSadaf Ebrahimi 
2851*22dc650dSSadaf Ebrahimi 	if (src != SLJIT_IMM) {
2852*22dc650dSSadaf Ebrahimi 		if (src & SLJIT_MEM) {
2853*22dc650dSSadaf Ebrahimi 			ADJUST_LOCAL_OFFSET(src, srcw);
2854*22dc650dSSadaf Ebrahimi 			FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_REG1, src, srcw));
2855*22dc650dSSadaf Ebrahimi 			src = TMP_REG1;
2856*22dc650dSSadaf Ebrahimi 		}
2857*22dc650dSSadaf Ebrahimi 		return push_inst(compiler, JIRL | RD((type >= SLJIT_FAST_CALL) ? RETURN_ADDR_REG : TMP_ZERO) | RJ(src) | IMM_I12(0));
2858*22dc650dSSadaf Ebrahimi 	}
2859*22dc650dSSadaf Ebrahimi 
2860*22dc650dSSadaf Ebrahimi 	/* These jumps are converted to jump/call instructions when possible. */
2861*22dc650dSSadaf Ebrahimi 	jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
2862*22dc650dSSadaf Ebrahimi 	FAIL_IF(!jump);
2863*22dc650dSSadaf Ebrahimi 	set_jump(jump, compiler, JUMP_ADDR | ((type >= SLJIT_FAST_CALL) ? IS_CALL : 0));
2864*22dc650dSSadaf Ebrahimi 	jump->u.target = (sljit_uw)srcw;
2865*22dc650dSSadaf Ebrahimi 
2866*22dc650dSSadaf Ebrahimi 	jump->addr = compiler->size;
2867*22dc650dSSadaf Ebrahimi 	FAIL_IF(push_inst(compiler, JIRL | RD((type >= SLJIT_FAST_CALL) ? RETURN_ADDR_REG : TMP_ZERO) | RJ(TMP_REG1) | IMM_I12(0)));
2868*22dc650dSSadaf Ebrahimi 
2869*22dc650dSSadaf Ebrahimi 	/* Maximum number of instructions required for generating a constant. */
2870*22dc650dSSadaf Ebrahimi 	compiler->size += JUMP_MAX_SIZE - 1;
2871*22dc650dSSadaf Ebrahimi 
2872*22dc650dSSadaf Ebrahimi 	return SLJIT_SUCCESS;
2873*22dc650dSSadaf Ebrahimi }
2874*22dc650dSSadaf Ebrahimi 
sljit_emit_icall(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 arg_types,sljit_s32 src,sljit_sw srcw)2875*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type,
2876*22dc650dSSadaf Ebrahimi 	sljit_s32 arg_types,
2877*22dc650dSSadaf Ebrahimi 	sljit_s32 src, sljit_sw srcw)
2878*22dc650dSSadaf Ebrahimi {
2879*22dc650dSSadaf Ebrahimi 	SLJIT_UNUSED_ARG(arg_types);
2880*22dc650dSSadaf Ebrahimi 	CHECK_ERROR();
2881*22dc650dSSadaf Ebrahimi 	CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw));
2882*22dc650dSSadaf Ebrahimi 
2883*22dc650dSSadaf Ebrahimi 	if (src & SLJIT_MEM) {
2884*22dc650dSSadaf Ebrahimi 		ADJUST_LOCAL_OFFSET(src, srcw);
2885*22dc650dSSadaf Ebrahimi 		FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_REG1, src, srcw));
2886*22dc650dSSadaf Ebrahimi 		src = TMP_REG1;
2887*22dc650dSSadaf Ebrahimi 	}
2888*22dc650dSSadaf Ebrahimi 
2889*22dc650dSSadaf Ebrahimi 	if (type & SLJIT_CALL_RETURN) {
2890*22dc650dSSadaf Ebrahimi 		if (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options))) {
2891*22dc650dSSadaf Ebrahimi 			FAIL_IF(push_inst(compiler, ADDI_D | RD(TMP_REG1) | RJ(src) | IMM_I12(0)));
2892*22dc650dSSadaf Ebrahimi 			src = TMP_REG1;
2893*22dc650dSSadaf Ebrahimi 		}
2894*22dc650dSSadaf Ebrahimi 
2895*22dc650dSSadaf Ebrahimi 		FAIL_IF(emit_stack_frame_release(compiler, 0));
2896*22dc650dSSadaf Ebrahimi 		type = SLJIT_JUMP;
2897*22dc650dSSadaf Ebrahimi 	}
2898*22dc650dSSadaf Ebrahimi 
2899*22dc650dSSadaf Ebrahimi 	SLJIT_SKIP_CHECKS(compiler);
2900*22dc650dSSadaf Ebrahimi 	return sljit_emit_ijump(compiler, type, src, srcw);
2901*22dc650dSSadaf Ebrahimi }
2902*22dc650dSSadaf Ebrahimi 
sljit_emit_op_flags(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 type)2903*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op,
2904*22dc650dSSadaf Ebrahimi 	sljit_s32 dst, sljit_sw dstw,
2905*22dc650dSSadaf Ebrahimi 	sljit_s32 type)
2906*22dc650dSSadaf Ebrahimi {
2907*22dc650dSSadaf Ebrahimi 	sljit_s32 src_r, dst_r, invert;
2908*22dc650dSSadaf Ebrahimi 	sljit_s32 saved_op = op;
2909*22dc650dSSadaf Ebrahimi 	sljit_s32 mem_type = ((op & SLJIT_32) || op == SLJIT_MOV32) ? (INT_DATA | SIGNED_DATA) : WORD_DATA;
2910*22dc650dSSadaf Ebrahimi 
2911*22dc650dSSadaf Ebrahimi 	CHECK_ERROR();
2912*22dc650dSSadaf Ebrahimi 	CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type));
2913*22dc650dSSadaf Ebrahimi 	ADJUST_LOCAL_OFFSET(dst, dstw);
2914*22dc650dSSadaf Ebrahimi 
2915*22dc650dSSadaf Ebrahimi 	op = GET_OPCODE(op);
2916*22dc650dSSadaf Ebrahimi 	dst_r = (op < SLJIT_ADD && FAST_IS_REG(dst)) ? dst : TMP_REG2;
2917*22dc650dSSadaf Ebrahimi 
2918*22dc650dSSadaf Ebrahimi 	compiler->cache_arg = 0;
2919*22dc650dSSadaf Ebrahimi 	compiler->cache_argw = 0;
2920*22dc650dSSadaf Ebrahimi 
2921*22dc650dSSadaf Ebrahimi 	if (op >= SLJIT_ADD && (dst & SLJIT_MEM))
2922*22dc650dSSadaf Ebrahimi 		FAIL_IF(emit_op_mem2(compiler, mem_type | LOAD_DATA, TMP_REG1, dst, dstw, dst, dstw));
2923*22dc650dSSadaf Ebrahimi 
2924*22dc650dSSadaf Ebrahimi 	if (type < SLJIT_F_EQUAL) {
2925*22dc650dSSadaf Ebrahimi 		src_r = OTHER_FLAG;
2926*22dc650dSSadaf Ebrahimi 		invert = type & 0x1;
2927*22dc650dSSadaf Ebrahimi 
2928*22dc650dSSadaf Ebrahimi 		switch (type) {
2929*22dc650dSSadaf Ebrahimi 		case SLJIT_EQUAL:
2930*22dc650dSSadaf Ebrahimi 		case SLJIT_NOT_EQUAL:
2931*22dc650dSSadaf Ebrahimi 			FAIL_IF(push_inst(compiler, SLTUI | RD(dst_r) | RJ(EQUAL_FLAG) | IMM_I12(1)));
2932*22dc650dSSadaf Ebrahimi 			src_r = dst_r;
2933*22dc650dSSadaf Ebrahimi 			break;
2934*22dc650dSSadaf Ebrahimi 		case SLJIT_ATOMIC_STORED:
2935*22dc650dSSadaf Ebrahimi 		case SLJIT_ATOMIC_NOT_STORED:
2936*22dc650dSSadaf Ebrahimi 			FAIL_IF(push_inst(compiler, SLTUI | RD(dst_r) | RJ(EQUAL_FLAG) | IMM_I12(1)));
2937*22dc650dSSadaf Ebrahimi 			src_r = dst_r;
2938*22dc650dSSadaf Ebrahimi 			invert ^= 0x1;
2939*22dc650dSSadaf Ebrahimi 			break;
2940*22dc650dSSadaf Ebrahimi 		case SLJIT_OVERFLOW:
2941*22dc650dSSadaf Ebrahimi 		case SLJIT_NOT_OVERFLOW:
2942*22dc650dSSadaf Ebrahimi 			if (compiler->status_flags_state & (SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB)) {
2943*22dc650dSSadaf Ebrahimi 				src_r = OTHER_FLAG;
2944*22dc650dSSadaf Ebrahimi 				break;
2945*22dc650dSSadaf Ebrahimi 			}
2946*22dc650dSSadaf Ebrahimi 			FAIL_IF(push_inst(compiler, SLTUI | RD(dst_r) | RJ(OTHER_FLAG) | IMM_I12(1)));
2947*22dc650dSSadaf Ebrahimi 			src_r = dst_r;
2948*22dc650dSSadaf Ebrahimi 			invert ^= 0x1;
2949*22dc650dSSadaf Ebrahimi 			break;
2950*22dc650dSSadaf Ebrahimi 		}
2951*22dc650dSSadaf Ebrahimi 	} else {
2952*22dc650dSSadaf Ebrahimi 		invert = 0;
2953*22dc650dSSadaf Ebrahimi 		src_r = OTHER_FLAG;
2954*22dc650dSSadaf Ebrahimi 
2955*22dc650dSSadaf Ebrahimi 		switch (type) {
2956*22dc650dSSadaf Ebrahimi 		case SLJIT_ORDERED_NOT_EQUAL:
2957*22dc650dSSadaf Ebrahimi 		case SLJIT_ORDERED_LESS_EQUAL:
2958*22dc650dSSadaf Ebrahimi 		case SLJIT_ORDERED_GREATER_EQUAL:
2959*22dc650dSSadaf Ebrahimi 		case SLJIT_F_NOT_EQUAL:
2960*22dc650dSSadaf Ebrahimi 		case SLJIT_UNORDERED_OR_NOT_EQUAL:
2961*22dc650dSSadaf Ebrahimi 		case SLJIT_UNORDERED_OR_GREATER_EQUAL:
2962*22dc650dSSadaf Ebrahimi 		case SLJIT_UNORDERED_OR_LESS_EQUAL:
2963*22dc650dSSadaf Ebrahimi 		case SLJIT_F_LESS_EQUAL:
2964*22dc650dSSadaf Ebrahimi 		case SLJIT_F_GREATER_EQUAL:
2965*22dc650dSSadaf Ebrahimi 		case SLJIT_ORDERED:
2966*22dc650dSSadaf Ebrahimi 			invert = 1;
2967*22dc650dSSadaf Ebrahimi 			break;
2968*22dc650dSSadaf Ebrahimi 		}
2969*22dc650dSSadaf Ebrahimi 	}
2970*22dc650dSSadaf Ebrahimi 
2971*22dc650dSSadaf Ebrahimi 	if (invert) {
2972*22dc650dSSadaf Ebrahimi 		FAIL_IF(push_inst(compiler, XORI | RD(dst_r) | RJ(src_r) | IMM_I12(1)));
2973*22dc650dSSadaf Ebrahimi 		src_r = dst_r;
2974*22dc650dSSadaf Ebrahimi 	}
2975*22dc650dSSadaf Ebrahimi 
2976*22dc650dSSadaf Ebrahimi 	if (op < SLJIT_ADD) {
2977*22dc650dSSadaf Ebrahimi 		if (dst & SLJIT_MEM)
2978*22dc650dSSadaf Ebrahimi 			return emit_op_mem(compiler, mem_type, src_r, dst, dstw);
2979*22dc650dSSadaf Ebrahimi 
2980*22dc650dSSadaf Ebrahimi 		if (src_r != dst_r)
2981*22dc650dSSadaf Ebrahimi 			return push_inst(compiler, ADDI_D | RD(dst_r) | RJ(src_r) | IMM_I12(0));
2982*22dc650dSSadaf Ebrahimi 		return SLJIT_SUCCESS;
2983*22dc650dSSadaf Ebrahimi 	}
2984*22dc650dSSadaf Ebrahimi 
2985*22dc650dSSadaf Ebrahimi 	mem_type |= CUMULATIVE_OP | IMM_OP | ALT_KEEP_CACHE;
2986*22dc650dSSadaf Ebrahimi 
2987*22dc650dSSadaf Ebrahimi 	if (dst & SLJIT_MEM)
2988*22dc650dSSadaf Ebrahimi 		return emit_op(compiler, saved_op, mem_type, dst, dstw, TMP_REG1, 0, src_r, 0);
2989*22dc650dSSadaf Ebrahimi 	return emit_op(compiler, saved_op, mem_type, dst, dstw, dst, dstw, src_r, 0);
2990*22dc650dSSadaf Ebrahimi }
2991*22dc650dSSadaf Ebrahimi 
sljit_emit_select(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 dst_reg,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2_reg)2992*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_select(struct sljit_compiler *compiler, sljit_s32 type,
2993*22dc650dSSadaf Ebrahimi 	sljit_s32 dst_reg,
2994*22dc650dSSadaf Ebrahimi 	sljit_s32 src1, sljit_sw src1w,
2995*22dc650dSSadaf Ebrahimi 	sljit_s32 src2_reg)
2996*22dc650dSSadaf Ebrahimi {
2997*22dc650dSSadaf Ebrahimi 	sljit_ins *ptr;
2998*22dc650dSSadaf Ebrahimi 	sljit_uw size;
2999*22dc650dSSadaf Ebrahimi 	sljit_s32 inp_flags = ((type & SLJIT_32) ? INT_DATA : WORD_DATA) | LOAD_DATA;
3000*22dc650dSSadaf Ebrahimi 
3001*22dc650dSSadaf Ebrahimi 	CHECK_ERROR();
3002*22dc650dSSadaf Ebrahimi 	CHECK(check_sljit_emit_select(compiler, type, dst_reg, src1, src1w, src2_reg));
3003*22dc650dSSadaf Ebrahimi 	ADJUST_LOCAL_OFFSET(src1, src1w);
3004*22dc650dSSadaf Ebrahimi 
3005*22dc650dSSadaf Ebrahimi 	if (dst_reg != src2_reg) {
3006*22dc650dSSadaf Ebrahimi 		if (dst_reg == src1) {
3007*22dc650dSSadaf Ebrahimi 			src1 = src2_reg;
3008*22dc650dSSadaf Ebrahimi 			src1w = 0;
3009*22dc650dSSadaf Ebrahimi 			type ^= 0x1;
3010*22dc650dSSadaf Ebrahimi 		} else {
3011*22dc650dSSadaf Ebrahimi 			if (ADDRESSING_DEPENDS_ON(src1, dst_reg)) {
3012*22dc650dSSadaf Ebrahimi 				FAIL_IF(push_inst(compiler, ADDI_D | RD(TMP_REG1) | RJ(dst_reg) | IMM_I12(0)));
3013*22dc650dSSadaf Ebrahimi 
3014*22dc650dSSadaf Ebrahimi 				if ((src1 & REG_MASK) == dst_reg)
3015*22dc650dSSadaf Ebrahimi 					src1 = (src1 & ~REG_MASK) | TMP_REG1;
3016*22dc650dSSadaf Ebrahimi 
3017*22dc650dSSadaf Ebrahimi 				if (OFFS_REG(src1) == dst_reg)
3018*22dc650dSSadaf Ebrahimi 					src1 = (src1 & ~OFFS_REG_MASK) | TO_OFFS_REG(TMP_REG1);
3019*22dc650dSSadaf Ebrahimi 			}
3020*22dc650dSSadaf Ebrahimi 
3021*22dc650dSSadaf Ebrahimi 			FAIL_IF(push_inst(compiler, ADDI_D | RD(dst_reg) | RJ(src2_reg) | IMM_I12(0)));
3022*22dc650dSSadaf Ebrahimi 		}
3023*22dc650dSSadaf Ebrahimi 	}
3024*22dc650dSSadaf Ebrahimi 
3025*22dc650dSSadaf Ebrahimi 	size = compiler->size;
3026*22dc650dSSadaf Ebrahimi 
3027*22dc650dSSadaf Ebrahimi 	ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins));
3028*22dc650dSSadaf Ebrahimi 	FAIL_IF(!ptr);
3029*22dc650dSSadaf Ebrahimi 	compiler->size++;
3030*22dc650dSSadaf Ebrahimi 
3031*22dc650dSSadaf Ebrahimi 	if (src1 & SLJIT_MEM) {
3032*22dc650dSSadaf Ebrahimi 		FAIL_IF(emit_op_mem(compiler, inp_flags, dst_reg, src1, src1w));
3033*22dc650dSSadaf Ebrahimi 	} else if (src1 == SLJIT_IMM) {
3034*22dc650dSSadaf Ebrahimi 		if (type & SLJIT_32)
3035*22dc650dSSadaf Ebrahimi 			src1w = (sljit_s32)src1w;
3036*22dc650dSSadaf Ebrahimi 		FAIL_IF(load_immediate(compiler, dst_reg, src1w));
3037*22dc650dSSadaf Ebrahimi 	} else
3038*22dc650dSSadaf Ebrahimi 		FAIL_IF(push_inst(compiler, ADDI_D | RD(dst_reg) | RJ(src1) | IMM_I12(0)));
3039*22dc650dSSadaf Ebrahimi 
3040*22dc650dSSadaf Ebrahimi 	*ptr = get_jump_instruction(type & ~SLJIT_32) | IMM_I16(compiler->size - size);
3041*22dc650dSSadaf Ebrahimi 	return SLJIT_SUCCESS;
3042*22dc650dSSadaf Ebrahimi }
3043*22dc650dSSadaf Ebrahimi 
sljit_emit_fselect(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 dst_freg,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2_freg)3044*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fselect(struct sljit_compiler *compiler, sljit_s32 type,
3045*22dc650dSSadaf Ebrahimi 	sljit_s32 dst_freg,
3046*22dc650dSSadaf Ebrahimi 	sljit_s32 src1, sljit_sw src1w,
3047*22dc650dSSadaf Ebrahimi 	sljit_s32 src2_freg)
3048*22dc650dSSadaf Ebrahimi {
3049*22dc650dSSadaf Ebrahimi 	sljit_s32 invert = 0;
3050*22dc650dSSadaf Ebrahimi 
3051*22dc650dSSadaf Ebrahimi 	CHECK_ERROR();
3052*22dc650dSSadaf Ebrahimi 	CHECK(check_sljit_emit_fselect(compiler, type, dst_freg, src1, src1w, src2_freg));
3053*22dc650dSSadaf Ebrahimi 
3054*22dc650dSSadaf Ebrahimi 	ADJUST_LOCAL_OFFSET(src1, src1w);
3055*22dc650dSSadaf Ebrahimi 
3056*22dc650dSSadaf Ebrahimi 	if ((type & ~SLJIT_32) == SLJIT_EQUAL || (type & ~SLJIT_32) == SLJIT_NOT_EQUAL) {
3057*22dc650dSSadaf Ebrahimi 		if ((type & ~SLJIT_32) == SLJIT_EQUAL)
3058*22dc650dSSadaf Ebrahimi 			invert = 1;
3059*22dc650dSSadaf Ebrahimi 		FAIL_IF(push_inst(compiler, MOVGR2CF | FCD(F_OTHER_FLAG) | RJ(EQUAL_FLAG)));
3060*22dc650dSSadaf Ebrahimi 	} else {
3061*22dc650dSSadaf Ebrahimi 		if (get_jump_instruction(type & ~SLJIT_32) == (BNE | RJ(OTHER_FLAG) | RD(TMP_ZERO)))
3062*22dc650dSSadaf Ebrahimi 			invert = 1;
3063*22dc650dSSadaf Ebrahimi 		FAIL_IF(push_inst(compiler, MOVGR2CF | FCD(F_OTHER_FLAG) | RJ(OTHER_FLAG)));
3064*22dc650dSSadaf Ebrahimi 	}
3065*22dc650dSSadaf Ebrahimi 
3066*22dc650dSSadaf Ebrahimi 	if (src1 & SLJIT_MEM) {
3067*22dc650dSSadaf Ebrahimi 		FAIL_IF(emit_op_mem(compiler, FLOAT_DATA(type) | LOAD_DATA, TMP_FREG2, src1, src1w));
3068*22dc650dSSadaf Ebrahimi 		if (invert)
3069*22dc650dSSadaf Ebrahimi 			return push_inst(compiler, FSEL | FRD(dst_freg) | FRJ(TMP_FREG2) | FRK(src2_freg) | FCA(F_OTHER_FLAG));
3070*22dc650dSSadaf Ebrahimi 		return push_inst(compiler, FSEL | FRD(dst_freg) | FRJ(src2_freg) | FRK(TMP_FREG2) | FCA(F_OTHER_FLAG));
3071*22dc650dSSadaf Ebrahimi 	} else {
3072*22dc650dSSadaf Ebrahimi 		if (invert)
3073*22dc650dSSadaf Ebrahimi 			return push_inst(compiler, FSEL | FRD(dst_freg) | FRJ(src1) | FRK(src2_freg) | FCA(F_OTHER_FLAG));
3074*22dc650dSSadaf Ebrahimi 		return push_inst(compiler, FSEL | FRD(dst_freg) | FRJ(src2_freg) | FRK(src1) | FCA(F_OTHER_FLAG));
3075*22dc650dSSadaf Ebrahimi 	}
3076*22dc650dSSadaf Ebrahimi }
3077*22dc650dSSadaf Ebrahimi 
3078*22dc650dSSadaf Ebrahimi #undef FLOAT_DATA
3079*22dc650dSSadaf Ebrahimi 
sljit_emit_mem(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 reg,sljit_s32 mem,sljit_sw memw)3080*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compiler, sljit_s32 type,
3081*22dc650dSSadaf Ebrahimi 	sljit_s32 reg,
3082*22dc650dSSadaf Ebrahimi 	sljit_s32 mem, sljit_sw memw)
3083*22dc650dSSadaf Ebrahimi {
3084*22dc650dSSadaf Ebrahimi 	sljit_s32 flags;
3085*22dc650dSSadaf Ebrahimi 
3086*22dc650dSSadaf Ebrahimi 	CHECK_ERROR();
3087*22dc650dSSadaf Ebrahimi 	CHECK(check_sljit_emit_mem(compiler, type, reg, mem, memw));
3088*22dc650dSSadaf Ebrahimi 
3089*22dc650dSSadaf Ebrahimi 	if (!(reg & REG_PAIR_MASK))
3090*22dc650dSSadaf Ebrahimi 		return sljit_emit_mem_unaligned(compiler, type, reg, mem, memw);
3091*22dc650dSSadaf Ebrahimi 
3092*22dc650dSSadaf Ebrahimi 	if (SLJIT_UNLIKELY(mem & OFFS_REG_MASK)) {
3093*22dc650dSSadaf Ebrahimi 		memw &= 0x3;
3094*22dc650dSSadaf Ebrahimi 
3095*22dc650dSSadaf Ebrahimi 		if (SLJIT_UNLIKELY(memw != 0)) {
3096*22dc650dSSadaf Ebrahimi 			FAIL_IF(push_inst(compiler, SLLI_D | RD(TMP_REG1) | RJ(OFFS_REG(mem)) | IMM_I12(memw)));
3097*22dc650dSSadaf Ebrahimi 			FAIL_IF(push_inst(compiler, ADD_D| RD(TMP_REG1) | RJ(TMP_REG1) | RK(mem & REG_MASK)));
3098*22dc650dSSadaf Ebrahimi 		} else
3099*22dc650dSSadaf Ebrahimi 			FAIL_IF(push_inst(compiler, ADD_D| RD(TMP_REG1) | RJ(mem & REG_MASK) | RK(OFFS_REG(mem))));
3100*22dc650dSSadaf Ebrahimi 
3101*22dc650dSSadaf Ebrahimi 		mem = TMP_REG1;
3102*22dc650dSSadaf Ebrahimi 		memw = 0;
3103*22dc650dSSadaf Ebrahimi 	} else if (memw > I12_MAX - SSIZE_OF(sw) || memw < I12_MIN) {
3104*22dc650dSSadaf Ebrahimi 		if (((memw + 0x800) & 0xfff) <= 0xfff - SSIZE_OF(sw)) {
3105*22dc650dSSadaf Ebrahimi 			FAIL_IF(load_immediate(compiler, TMP_REG1, TO_ARGW_HI(memw)));
3106*22dc650dSSadaf Ebrahimi 			memw &= 0xfff;
3107*22dc650dSSadaf Ebrahimi 		} else {
3108*22dc650dSSadaf Ebrahimi 			FAIL_IF(load_immediate(compiler, TMP_REG1, memw));
3109*22dc650dSSadaf Ebrahimi 			memw = 0;
3110*22dc650dSSadaf Ebrahimi 		}
3111*22dc650dSSadaf Ebrahimi 
3112*22dc650dSSadaf Ebrahimi 		if (mem & REG_MASK)
3113*22dc650dSSadaf Ebrahimi 			FAIL_IF(push_inst(compiler, ADD_D| RD(TMP_REG1) | RJ(TMP_REG1) | RK(mem & REG_MASK)));
3114*22dc650dSSadaf Ebrahimi 
3115*22dc650dSSadaf Ebrahimi 		mem = TMP_REG1;
3116*22dc650dSSadaf Ebrahimi 	} else {
3117*22dc650dSSadaf Ebrahimi 		mem &= REG_MASK;
3118*22dc650dSSadaf Ebrahimi 		memw &= 0xfff;
3119*22dc650dSSadaf Ebrahimi 	}
3120*22dc650dSSadaf Ebrahimi 
3121*22dc650dSSadaf Ebrahimi 	SLJIT_ASSERT((memw >= 0 && memw <= I12_MAX - SSIZE_OF(sw)) || (memw > I12_MAX && memw <= 0xfff));
3122*22dc650dSSadaf Ebrahimi 
3123*22dc650dSSadaf Ebrahimi 	if (!(type & SLJIT_MEM_STORE) && mem == REG_PAIR_FIRST(reg)) {
3124*22dc650dSSadaf Ebrahimi 		FAIL_IF(push_mem_inst(compiler, WORD_DATA | LOAD_DATA, REG_PAIR_SECOND(reg), SLJIT_MEM1(mem), (memw + SSIZE_OF(sw)) & 0xfff));
3125*22dc650dSSadaf Ebrahimi 		return push_mem_inst(compiler, WORD_DATA | LOAD_DATA, REG_PAIR_FIRST(reg), SLJIT_MEM1(mem), memw);
3126*22dc650dSSadaf Ebrahimi 	}
3127*22dc650dSSadaf Ebrahimi 
3128*22dc650dSSadaf Ebrahimi 	flags = WORD_DATA | (!(type & SLJIT_MEM_STORE) ? LOAD_DATA : 0);
3129*22dc650dSSadaf Ebrahimi 
3130*22dc650dSSadaf Ebrahimi 	FAIL_IF(push_mem_inst(compiler, flags, REG_PAIR_FIRST(reg), SLJIT_MEM1(mem), memw));
3131*22dc650dSSadaf Ebrahimi 	return push_mem_inst(compiler, flags, REG_PAIR_SECOND(reg), SLJIT_MEM1(mem), (memw + SSIZE_OF(sw)) & 0xfff);
3132*22dc650dSSadaf Ebrahimi }
3133*22dc650dSSadaf Ebrahimi 
3134*22dc650dSSadaf Ebrahimi #undef TO_ARGW_HI
3135*22dc650dSSadaf Ebrahimi 
sljit_emit_simd_mem_offset(struct sljit_compiler * compiler,sljit_s32 * mem_ptr,sljit_sw memw)3136*22dc650dSSadaf Ebrahimi static sljit_s32 sljit_emit_simd_mem_offset(struct sljit_compiler *compiler, sljit_s32 *mem_ptr, sljit_sw memw)
3137*22dc650dSSadaf Ebrahimi {
3138*22dc650dSSadaf Ebrahimi 	sljit_s32 mem = *mem_ptr;
3139*22dc650dSSadaf Ebrahimi 
3140*22dc650dSSadaf Ebrahimi 	if (SLJIT_UNLIKELY(mem & OFFS_REG_MASK)) {
3141*22dc650dSSadaf Ebrahimi 		*mem_ptr = TMP_REG3;
3142*22dc650dSSadaf Ebrahimi 		FAIL_IF(push_inst(compiler, SLLI_D | RD(TMP_REG3) | RJ(OFFS_REG(mem)) | IMM_I12(memw & 0x3)));
3143*22dc650dSSadaf Ebrahimi 		return push_inst(compiler, ADD_D | RD(TMP_REG3) | RJ(TMP_REG3) | RK(mem & REG_MASK));
3144*22dc650dSSadaf Ebrahimi 	}
3145*22dc650dSSadaf Ebrahimi 
3146*22dc650dSSadaf Ebrahimi 	if (!(mem & REG_MASK)) {
3147*22dc650dSSadaf Ebrahimi 		*mem_ptr = TMP_REG3;
3148*22dc650dSSadaf Ebrahimi 		return load_immediate(compiler, TMP_REG3, memw);
3149*22dc650dSSadaf Ebrahimi 	}
3150*22dc650dSSadaf Ebrahimi 
3151*22dc650dSSadaf Ebrahimi 	mem &= REG_MASK;
3152*22dc650dSSadaf Ebrahimi 
3153*22dc650dSSadaf Ebrahimi 	if (memw == 0) {
3154*22dc650dSSadaf Ebrahimi 		*mem_ptr = mem;
3155*22dc650dSSadaf Ebrahimi 		return SLJIT_SUCCESS;
3156*22dc650dSSadaf Ebrahimi 	}
3157*22dc650dSSadaf Ebrahimi 
3158*22dc650dSSadaf Ebrahimi 	*mem_ptr = TMP_REG3;
3159*22dc650dSSadaf Ebrahimi 
3160*22dc650dSSadaf Ebrahimi 	FAIL_IF(load_immediate(compiler, TMP_REG3, memw));
3161*22dc650dSSadaf Ebrahimi 	return push_inst(compiler, ADD_D | RD(TMP_REG3) | RJ(TMP_REG3) | RK(mem));
3162*22dc650dSSadaf Ebrahimi }
3163*22dc650dSSadaf Ebrahimi 
sljit_emit_simd_mov(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 freg,sljit_s32 srcdst,sljit_sw srcdstw)3164*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_mov(struct sljit_compiler *compiler, sljit_s32 type,
3165*22dc650dSSadaf Ebrahimi 	sljit_s32 freg,
3166*22dc650dSSadaf Ebrahimi 	sljit_s32 srcdst, sljit_sw srcdstw)
3167*22dc650dSSadaf Ebrahimi {
3168*22dc650dSSadaf Ebrahimi 	sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
3169*22dc650dSSadaf Ebrahimi 	sljit_ins ins = 0;
3170*22dc650dSSadaf Ebrahimi 
3171*22dc650dSSadaf Ebrahimi 	CHECK_ERROR();
3172*22dc650dSSadaf Ebrahimi 	CHECK(check_sljit_emit_simd_mov(compiler, type, freg, srcdst, srcdstw));
3173*22dc650dSSadaf Ebrahimi 
3174*22dc650dSSadaf Ebrahimi 	ADJUST_LOCAL_OFFSET(srcdst, srcdstw);
3175*22dc650dSSadaf Ebrahimi 
3176*22dc650dSSadaf Ebrahimi 	if (reg_size != 5 && reg_size != 4)
3177*22dc650dSSadaf Ebrahimi 		return SLJIT_ERR_UNSUPPORTED;
3178*22dc650dSSadaf Ebrahimi 
3179*22dc650dSSadaf Ebrahimi 	if (reg_size == 5 && !(get_cpu_features(GET_HWCAP) & LOONGARCH_HWCAP_LASX))
3180*22dc650dSSadaf Ebrahimi 		return SLJIT_ERR_UNSUPPORTED;
3181*22dc650dSSadaf Ebrahimi 
3182*22dc650dSSadaf Ebrahimi 	if (type & SLJIT_SIMD_TEST)
3183*22dc650dSSadaf Ebrahimi 		return SLJIT_SUCCESS;
3184*22dc650dSSadaf Ebrahimi 
3185*22dc650dSSadaf Ebrahimi 	if (!(srcdst & SLJIT_MEM)) {
3186*22dc650dSSadaf Ebrahimi 		if (type & SLJIT_SIMD_STORE)
3187*22dc650dSSadaf Ebrahimi 			ins = FRD(srcdst) | FRJ(freg) | FRK(freg);
3188*22dc650dSSadaf Ebrahimi 		else
3189*22dc650dSSadaf Ebrahimi 			ins = FRD(freg) | FRJ(srcdst) | FRK(srcdst);
3190*22dc650dSSadaf Ebrahimi 
3191*22dc650dSSadaf Ebrahimi 		if (reg_size == 5)
3192*22dc650dSSadaf Ebrahimi 			ins |= VOR_V | (sljit_ins)1 << 26;
3193*22dc650dSSadaf Ebrahimi 		else
3194*22dc650dSSadaf Ebrahimi 			ins |= VOR_V;
3195*22dc650dSSadaf Ebrahimi 
3196*22dc650dSSadaf Ebrahimi 		return push_inst(compiler, ins);
3197*22dc650dSSadaf Ebrahimi 	}
3198*22dc650dSSadaf Ebrahimi 
3199*22dc650dSSadaf Ebrahimi 	ins = (type & SLJIT_SIMD_STORE) ? VST : VLD;
3200*22dc650dSSadaf Ebrahimi 
3201*22dc650dSSadaf Ebrahimi 	if (reg_size == 5)
3202*22dc650dSSadaf Ebrahimi 		ins = (type & SLJIT_SIMD_STORE) ? XVST : XVLD;
3203*22dc650dSSadaf Ebrahimi 
3204*22dc650dSSadaf Ebrahimi 	if (FAST_IS_REG(srcdst) && srcdst >= 0 && (srcdstw >= I12_MIN && srcdstw <= I12_MAX))
3205*22dc650dSSadaf Ebrahimi 		return push_inst(compiler, ins | FRD(freg) | RJ((sljit_u8)srcdst) | IMM_I12(srcdstw));
3206*22dc650dSSadaf Ebrahimi 	else {
3207*22dc650dSSadaf Ebrahimi 		FAIL_IF(sljit_emit_simd_mem_offset(compiler, &srcdst, srcdstw));
3208*22dc650dSSadaf Ebrahimi 		return push_inst(compiler, ins | FRD(freg) | RJ(srcdst) | IMM_I12(0));
3209*22dc650dSSadaf Ebrahimi 	}
3210*22dc650dSSadaf Ebrahimi }
3211*22dc650dSSadaf Ebrahimi 
sljit_emit_simd_replicate(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 freg,sljit_s32 src,sljit_sw srcw)3212*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_replicate(struct sljit_compiler *compiler, sljit_s32 type,
3213*22dc650dSSadaf Ebrahimi 	sljit_s32 freg,
3214*22dc650dSSadaf Ebrahimi 	sljit_s32 src, sljit_sw srcw)
3215*22dc650dSSadaf Ebrahimi {
3216*22dc650dSSadaf Ebrahimi 	sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
3217*22dc650dSSadaf Ebrahimi 	sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
3218*22dc650dSSadaf Ebrahimi 	sljit_ins ins = 0;
3219*22dc650dSSadaf Ebrahimi 
3220*22dc650dSSadaf Ebrahimi 	CHECK_ERROR();
3221*22dc650dSSadaf Ebrahimi 	CHECK(check_sljit_emit_simd_replicate(compiler, type, freg, src, srcw));
3222*22dc650dSSadaf Ebrahimi 
3223*22dc650dSSadaf Ebrahimi 	ADJUST_LOCAL_OFFSET(src, srcw);
3224*22dc650dSSadaf Ebrahimi 
3225*22dc650dSSadaf Ebrahimi 	if (reg_size != 5 && reg_size != 4)
3226*22dc650dSSadaf Ebrahimi 		return SLJIT_ERR_UNSUPPORTED;
3227*22dc650dSSadaf Ebrahimi 
3228*22dc650dSSadaf Ebrahimi 	if (reg_size == 5 && !(get_cpu_features(GET_HWCAP) & LOONGARCH_HWCAP_LASX))
3229*22dc650dSSadaf Ebrahimi 		return SLJIT_ERR_UNSUPPORTED;
3230*22dc650dSSadaf Ebrahimi 
3231*22dc650dSSadaf Ebrahimi 	if (type & SLJIT_SIMD_TEST)
3232*22dc650dSSadaf Ebrahimi 		return SLJIT_SUCCESS;
3233*22dc650dSSadaf Ebrahimi 
3234*22dc650dSSadaf Ebrahimi 	if (src & SLJIT_MEM) {
3235*22dc650dSSadaf Ebrahimi 		FAIL_IF(sljit_emit_simd_mem_offset(compiler, &src, srcw));
3236*22dc650dSSadaf Ebrahimi 
3237*22dc650dSSadaf Ebrahimi 		if (reg_size == 5)
3238*22dc650dSSadaf Ebrahimi 			ins = (sljit_ins)1 << 25;
3239*22dc650dSSadaf Ebrahimi 
3240*22dc650dSSadaf Ebrahimi 		return push_inst(compiler, VLDREPL | ins | FRD(freg) | RJ(src) | (sljit_ins)1 << (23 - elem_size));
3241*22dc650dSSadaf Ebrahimi 	}
3242*22dc650dSSadaf Ebrahimi 
3243*22dc650dSSadaf Ebrahimi 	if (reg_size == 5)
3244*22dc650dSSadaf Ebrahimi 		ins = (sljit_ins)1 << 26;
3245*22dc650dSSadaf Ebrahimi 
3246*22dc650dSSadaf Ebrahimi 	if (type & SLJIT_SIMD_FLOAT) {
3247*22dc650dSSadaf Ebrahimi 		if (src == SLJIT_IMM)
3248*22dc650dSSadaf Ebrahimi 			return push_inst(compiler, VREPLGR2VR | ins | FRD(freg) | RJ(TMP_ZERO) | (sljit_ins)elem_size << 10);
3249*22dc650dSSadaf Ebrahimi 
3250*22dc650dSSadaf Ebrahimi 		FAIL_IF(push_inst(compiler, VREPLVE | ins | FRD(freg) | FRJ(src) | RK(TMP_ZERO) | (sljit_ins)elem_size << 15));
3251*22dc650dSSadaf Ebrahimi 
3252*22dc650dSSadaf Ebrahimi 		if (reg_size == 5) {
3253*22dc650dSSadaf Ebrahimi 			ins = (sljit_ins)(0x44 << 10);
3254*22dc650dSSadaf Ebrahimi 			return push_inst(compiler, XVPERMI | ins | FRD(freg) | FRJ(freg));
3255*22dc650dSSadaf Ebrahimi 		}
3256*22dc650dSSadaf Ebrahimi 
3257*22dc650dSSadaf Ebrahimi 		return SLJIT_SUCCESS;
3258*22dc650dSSadaf Ebrahimi 	}
3259*22dc650dSSadaf Ebrahimi 
3260*22dc650dSSadaf Ebrahimi 	ins |= VREPLGR2VR | (sljit_ins)elem_size << 10;
3261*22dc650dSSadaf Ebrahimi 
3262*22dc650dSSadaf Ebrahimi 	if (src == SLJIT_IMM) {
3263*22dc650dSSadaf Ebrahimi 		FAIL_IF(load_immediate(compiler, TMP_REG2, srcw));
3264*22dc650dSSadaf Ebrahimi 		src = TMP_REG2;
3265*22dc650dSSadaf Ebrahimi 	}
3266*22dc650dSSadaf Ebrahimi 
3267*22dc650dSSadaf Ebrahimi 	return push_inst(compiler, ins | FRD(freg) | RJ(src));
3268*22dc650dSSadaf Ebrahimi }
3269*22dc650dSSadaf Ebrahimi 
sljit_emit_simd_lane_mov(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 freg,sljit_s32 lane_index,sljit_s32 srcdst,sljit_sw srcdstw)3270*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_mov(struct sljit_compiler *compiler, sljit_s32 type,
3271*22dc650dSSadaf Ebrahimi 	sljit_s32 freg, sljit_s32 lane_index,
3272*22dc650dSSadaf Ebrahimi 	sljit_s32 srcdst, sljit_sw srcdstw)
3273*22dc650dSSadaf Ebrahimi {
3274*22dc650dSSadaf Ebrahimi 	sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
3275*22dc650dSSadaf Ebrahimi 	sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
3276*22dc650dSSadaf Ebrahimi 	sljit_ins ins = 0;
3277*22dc650dSSadaf Ebrahimi 
3278*22dc650dSSadaf Ebrahimi 	CHECK_ERROR();
3279*22dc650dSSadaf Ebrahimi 	CHECK(check_sljit_emit_simd_lane_mov(compiler, type, freg, lane_index, srcdst, srcdstw));
3280*22dc650dSSadaf Ebrahimi 
3281*22dc650dSSadaf Ebrahimi 	ADJUST_LOCAL_OFFSET(srcdst, srcdstw);
3282*22dc650dSSadaf Ebrahimi 
3283*22dc650dSSadaf Ebrahimi 	if (reg_size != 5 && reg_size != 4)
3284*22dc650dSSadaf Ebrahimi 		return SLJIT_ERR_UNSUPPORTED;
3285*22dc650dSSadaf Ebrahimi 
3286*22dc650dSSadaf Ebrahimi 	if (reg_size == 5 && !(get_cpu_features(GET_HWCAP) & LOONGARCH_HWCAP_LASX))
3287*22dc650dSSadaf Ebrahimi 		return SLJIT_ERR_UNSUPPORTED;
3288*22dc650dSSadaf Ebrahimi 
3289*22dc650dSSadaf Ebrahimi 	if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3))
3290*22dc650dSSadaf Ebrahimi 		return SLJIT_ERR_UNSUPPORTED;
3291*22dc650dSSadaf Ebrahimi 
3292*22dc650dSSadaf Ebrahimi 	if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3))
3293*22dc650dSSadaf Ebrahimi 		return SLJIT_ERR_UNSUPPORTED;
3294*22dc650dSSadaf Ebrahimi 
3295*22dc650dSSadaf Ebrahimi 	if (type & SLJIT_SIMD_TEST)
3296*22dc650dSSadaf Ebrahimi 		return SLJIT_SUCCESS;
3297*22dc650dSSadaf Ebrahimi 
3298*22dc650dSSadaf Ebrahimi 	if (type & SLJIT_SIMD_LANE_ZERO) {
3299*22dc650dSSadaf Ebrahimi 		ins = (reg_size == 5) ? ((sljit_ins)1 << 26) : 0;
3300*22dc650dSSadaf Ebrahimi 
3301*22dc650dSSadaf Ebrahimi 		if ((type & SLJIT_SIMD_FLOAT) && freg == srcdst) {
3302*22dc650dSSadaf Ebrahimi 			FAIL_IF(push_inst(compiler, VOR_V | ins | FRD(TMP_FREG1) | FRJ(freg) | FRK(freg)));
3303*22dc650dSSadaf Ebrahimi 			srcdst = TMP_FREG1;
3304*22dc650dSSadaf Ebrahimi 			srcdstw = 0;
3305*22dc650dSSadaf Ebrahimi 		}
3306*22dc650dSSadaf Ebrahimi 
3307*22dc650dSSadaf Ebrahimi 		FAIL_IF(push_inst(compiler, VXOR_V | ins | FRD(freg) | FRJ(freg) | FRK(freg)));
3308*22dc650dSSadaf Ebrahimi 	}
3309*22dc650dSSadaf Ebrahimi 
3310*22dc650dSSadaf Ebrahimi 	if (srcdst & SLJIT_MEM) {
3311*22dc650dSSadaf Ebrahimi 		FAIL_IF(sljit_emit_simd_mem_offset(compiler, &srcdst, srcdstw));
3312*22dc650dSSadaf Ebrahimi 
3313*22dc650dSSadaf Ebrahimi 		if (reg_size == 5)
3314*22dc650dSSadaf Ebrahimi 			ins = (sljit_ins)1 << 25;
3315*22dc650dSSadaf Ebrahimi 
3316*22dc650dSSadaf Ebrahimi 		if (type & SLJIT_SIMD_STORE) {
3317*22dc650dSSadaf Ebrahimi 			ins |= (sljit_ins)lane_index << 18 | (sljit_ins)(1 << (23 - elem_size));
3318*22dc650dSSadaf Ebrahimi 			return push_inst(compiler, VSTELM | ins | FRD(freg) | RJ(srcdst));
3319*22dc650dSSadaf Ebrahimi 		} else {
3320*22dc650dSSadaf Ebrahimi 			emit_op_mem(compiler, (elem_size == 3 ? WORD_DATA : (elem_size == 2 ? INT_DATA : (elem_size == 1 ? HALF_DATA : BYTE_DATA))) | LOAD_DATA, TMP_REG1, srcdst | SLJIT_MEM, 0);
3321*22dc650dSSadaf Ebrahimi 			srcdst = TMP_REG1;
3322*22dc650dSSadaf Ebrahimi 			ins = (sljit_ins)(0x3f ^ (0x1f >> elem_size)) << 10;
3323*22dc650dSSadaf Ebrahimi 
3324*22dc650dSSadaf Ebrahimi 			if (reg_size == 5) {
3325*22dc650dSSadaf Ebrahimi 				if (elem_size < 2) {
3326*22dc650dSSadaf Ebrahimi 					FAIL_IF(push_inst(compiler, VOR_V | (sljit_ins)1 << 26 | FRD(TMP_FREG1) | FRJ(freg) | FRK(freg)));
3327*22dc650dSSadaf Ebrahimi 					if (lane_index >= (2 << (3 - elem_size))) {
3328*22dc650dSSadaf Ebrahimi 						FAIL_IF(push_inst(compiler, XVPERMI | (sljit_ins)1 << 18 | FRD(TMP_FREG1) | FRJ(freg) | IMM_I8(1)));
3329*22dc650dSSadaf Ebrahimi 						FAIL_IF(push_inst(compiler, VINSGR2VR | ins | FRD(TMP_FREG1) | RJ(srcdst) | IMM_V(lane_index % (2 << (3 - elem_size)))));
3330*22dc650dSSadaf Ebrahimi 						return push_inst(compiler, XVPERMI | (sljit_ins)1 << 18 | FRD(freg) | FRJ(TMP_FREG1) | IMM_I8(2));
3331*22dc650dSSadaf Ebrahimi 					} else {
3332*22dc650dSSadaf Ebrahimi 						FAIL_IF(push_inst(compiler, VINSGR2VR | ins | FRD(freg) | RJ(srcdst) | IMM_V(lane_index)));
3333*22dc650dSSadaf Ebrahimi 						return push_inst(compiler, XVPERMI | (sljit_ins)1 << 18 | FRD(freg) | FRJ(TMP_FREG1) | IMM_I8(18));
3334*22dc650dSSadaf Ebrahimi 					}
3335*22dc650dSSadaf Ebrahimi 				} else
3336*22dc650dSSadaf Ebrahimi 					ins = (sljit_ins)(0x3f ^ (0x3f >> elem_size)) << 10 | (sljit_ins)1 << 26;
3337*22dc650dSSadaf Ebrahimi 			}
3338*22dc650dSSadaf Ebrahimi 
3339*22dc650dSSadaf Ebrahimi 			return push_inst(compiler, VINSGR2VR | ins | FRD(freg) | RJ(srcdst) | IMM_V(lane_index));
3340*22dc650dSSadaf Ebrahimi 		}
3341*22dc650dSSadaf Ebrahimi 	}
3342*22dc650dSSadaf Ebrahimi 
3343*22dc650dSSadaf Ebrahimi 	if (type & SLJIT_SIMD_FLOAT) {
3344*22dc650dSSadaf Ebrahimi 		ins = (reg_size == 5) ? (sljit_ins)(0x3f ^ (0x3f >> elem_size)) << 10 | (sljit_ins)1 << 26 : (sljit_ins)(0x3f ^ (0x1f >> elem_size)) << 10;
3345*22dc650dSSadaf Ebrahimi 
3346*22dc650dSSadaf Ebrahimi 		if (type & SLJIT_SIMD_STORE) {
3347*22dc650dSSadaf Ebrahimi 			FAIL_IF(push_inst(compiler, VPICKVE2GR_U | ins | RD(TMP_REG1) | FRJ(freg) | IMM_V(lane_index)));
3348*22dc650dSSadaf Ebrahimi 			return push_inst(compiler, VINSGR2VR | ins | FRD(srcdst) | RJ(TMP_REG1) | IMM_V(0));
3349*22dc650dSSadaf Ebrahimi 		} else {
3350*22dc650dSSadaf Ebrahimi 			FAIL_IF(push_inst(compiler, VPICKVE2GR_U | ins | RD(TMP_REG1) | FRJ(srcdst) | IMM_V(0)));
3351*22dc650dSSadaf Ebrahimi 			return push_inst(compiler, VINSGR2VR | ins | FRD(freg) | RJ(TMP_REG1) | IMM_V(lane_index));
3352*22dc650dSSadaf Ebrahimi 		}
3353*22dc650dSSadaf Ebrahimi 	}
3354*22dc650dSSadaf Ebrahimi 
3355*22dc650dSSadaf Ebrahimi 	if (srcdst == SLJIT_IMM) {
3356*22dc650dSSadaf Ebrahimi 		FAIL_IF(load_immediate(compiler, TMP_REG1, srcdstw));
3357*22dc650dSSadaf Ebrahimi 		srcdst = TMP_REG1;
3358*22dc650dSSadaf Ebrahimi 	}
3359*22dc650dSSadaf Ebrahimi 
3360*22dc650dSSadaf Ebrahimi 	if (type & SLJIT_SIMD_STORE) {
3361*22dc650dSSadaf Ebrahimi 		ins = (sljit_ins)(0x3f ^ (0x1f >> elem_size)) << 10;
3362*22dc650dSSadaf Ebrahimi 
3363*22dc650dSSadaf Ebrahimi 		if (type & SLJIT_SIMD_LANE_SIGNED)
3364*22dc650dSSadaf Ebrahimi 			ins |= (sljit_ins)(VPICKVE2GR_U ^ (0x7 << 18));
3365*22dc650dSSadaf Ebrahimi 		else
3366*22dc650dSSadaf Ebrahimi 			ins |= VPICKVE2GR_U;
3367*22dc650dSSadaf Ebrahimi 
3368*22dc650dSSadaf Ebrahimi 		if (reg_size == 5) {
3369*22dc650dSSadaf Ebrahimi 			if (elem_size < 2) {
3370*22dc650dSSadaf Ebrahimi 				if (lane_index >= (2 << (3 - elem_size))) {
3371*22dc650dSSadaf Ebrahimi 					if (type & SLJIT_SIMD_LANE_SIGNED)
3372*22dc650dSSadaf Ebrahimi 						ins |= (sljit_ins)(VPICKVE2GR_U ^ (0x7 << 18));
3373*22dc650dSSadaf Ebrahimi 					else
3374*22dc650dSSadaf Ebrahimi 						ins |= VPICKVE2GR_U;
3375*22dc650dSSadaf Ebrahimi 
3376*22dc650dSSadaf Ebrahimi 					FAIL_IF(push_inst(compiler, VOR_V | (sljit_ins)1 << 26 | FRD(TMP_FREG1) | FRJ(freg) | FRK(freg)));
3377*22dc650dSSadaf Ebrahimi 					FAIL_IF(push_inst(compiler, XVPERMI | (sljit_ins)1 << 18 | FRD(TMP_FREG1) | FRJ(freg) | IMM_I8(1)));
3378*22dc650dSSadaf Ebrahimi 					return push_inst(compiler, ins | RD(srcdst) | FRJ(TMP_FREG1) | IMM_V(lane_index % (2 << (3 - elem_size))));
3379*22dc650dSSadaf Ebrahimi 				}
3380*22dc650dSSadaf Ebrahimi 			} else {
3381*22dc650dSSadaf Ebrahimi 				ins ^= (sljit_ins)1 << (15 - elem_size);
3382*22dc650dSSadaf Ebrahimi 				ins |= (sljit_ins)1 << 26;
3383*22dc650dSSadaf Ebrahimi 			}
3384*22dc650dSSadaf Ebrahimi 		}
3385*22dc650dSSadaf Ebrahimi 
3386*22dc650dSSadaf Ebrahimi 		return push_inst(compiler, ins | RD(srcdst) | FRJ(freg) | IMM_V(lane_index));
3387*22dc650dSSadaf Ebrahimi 	} else {
3388*22dc650dSSadaf Ebrahimi 		ins = (sljit_ins)(0x3f ^ (0x1f >> elem_size)) << 10;
3389*22dc650dSSadaf Ebrahimi 
3390*22dc650dSSadaf Ebrahimi 		if (reg_size == 5) {
3391*22dc650dSSadaf Ebrahimi 			if (elem_size < 2) {
3392*22dc650dSSadaf Ebrahimi 				FAIL_IF(push_inst(compiler, VOR_V | (sljit_ins)1 << 26 | FRD(TMP_FREG1) | FRJ(freg) | FRK(freg)));
3393*22dc650dSSadaf Ebrahimi 				if (lane_index >= (2 << (3 - elem_size))) {
3394*22dc650dSSadaf Ebrahimi 					FAIL_IF(push_inst(compiler, XVPERMI | (sljit_ins)1 << 18 | FRD(TMP_FREG1) | FRJ(freg) | IMM_I8(1)));
3395*22dc650dSSadaf Ebrahimi 					FAIL_IF(push_inst(compiler, VINSGR2VR | ins | FRD(TMP_FREG1) | RJ(srcdst) | IMM_V(lane_index % (2 << (3 - elem_size)))));
3396*22dc650dSSadaf Ebrahimi 					return push_inst(compiler, XVPERMI | (sljit_ins)1 << 18 | FRD(freg) | FRJ(TMP_FREG1) | IMM_I8(2));
3397*22dc650dSSadaf Ebrahimi 				} else {
3398*22dc650dSSadaf Ebrahimi 					FAIL_IF(push_inst(compiler, VINSGR2VR | ins | FRD(freg) | RJ(srcdst) | IMM_V(lane_index)));
3399*22dc650dSSadaf Ebrahimi 					return push_inst(compiler, XVPERMI | (sljit_ins)1 << 18 | FRD(freg) | FRJ(TMP_FREG1) | IMM_I8(18));
3400*22dc650dSSadaf Ebrahimi 				}
3401*22dc650dSSadaf Ebrahimi 			} else
3402*22dc650dSSadaf Ebrahimi 				ins = (sljit_ins)(0x3f ^ (0x3f >> elem_size)) << 10 | (sljit_ins)1 << 26;
3403*22dc650dSSadaf Ebrahimi 		}
3404*22dc650dSSadaf Ebrahimi 
3405*22dc650dSSadaf Ebrahimi 		return push_inst(compiler, VINSGR2VR | ins | FRD(freg) | RJ(srcdst) | IMM_V(lane_index));
3406*22dc650dSSadaf Ebrahimi 	}
3407*22dc650dSSadaf Ebrahimi 
3408*22dc650dSSadaf Ebrahimi 	return SLJIT_ERR_UNSUPPORTED;
3409*22dc650dSSadaf Ebrahimi }
3410*22dc650dSSadaf Ebrahimi 
sljit_emit_simd_lane_replicate(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 freg,sljit_s32 src,sljit_s32 src_lane_index)3411*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_replicate(struct sljit_compiler *compiler, sljit_s32 type,
3412*22dc650dSSadaf Ebrahimi 	sljit_s32 freg,
3413*22dc650dSSadaf Ebrahimi 	sljit_s32 src, sljit_s32 src_lane_index)
3414*22dc650dSSadaf Ebrahimi {
3415*22dc650dSSadaf Ebrahimi 	sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
3416*22dc650dSSadaf Ebrahimi 	sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
3417*22dc650dSSadaf Ebrahimi 	sljit_ins ins = 0;
3418*22dc650dSSadaf Ebrahimi 
3419*22dc650dSSadaf Ebrahimi 	CHECK_ERROR();
3420*22dc650dSSadaf Ebrahimi 	CHECK(check_sljit_emit_simd_lane_replicate(compiler, type, freg, src, src_lane_index));
3421*22dc650dSSadaf Ebrahimi 
3422*22dc650dSSadaf Ebrahimi 	if (reg_size != 5 && reg_size != 4)
3423*22dc650dSSadaf Ebrahimi 		return SLJIT_ERR_UNSUPPORTED;
3424*22dc650dSSadaf Ebrahimi 
3425*22dc650dSSadaf Ebrahimi 	if (reg_size == 5 && !(get_cpu_features(GET_HWCAP) & LOONGARCH_HWCAP_LASX))
3426*22dc650dSSadaf Ebrahimi 		return SLJIT_ERR_UNSUPPORTED;
3427*22dc650dSSadaf Ebrahimi 
3428*22dc650dSSadaf Ebrahimi 	if (type & SLJIT_SIMD_TEST)
3429*22dc650dSSadaf Ebrahimi 		return SLJIT_SUCCESS;
3430*22dc650dSSadaf Ebrahimi 
3431*22dc650dSSadaf Ebrahimi 	ins = (sljit_ins)(0x3f ^ (0x1f >> elem_size)) << 10;
3432*22dc650dSSadaf Ebrahimi 
3433*22dc650dSSadaf Ebrahimi 	if (reg_size == 5) {
3434*22dc650dSSadaf Ebrahimi 		FAIL_IF(push_inst(compiler, VREPLVEI | (sljit_ins)1 << 26 | ins | FRD(freg) | FRJ(src) | IMM_V(src_lane_index % (2 << (3 - elem_size)))));
3435*22dc650dSSadaf Ebrahimi 
3436*22dc650dSSadaf Ebrahimi 		ins = (src_lane_index < (2 << (3 - elem_size))) ? (sljit_ins)(0x44 << 10) : (sljit_ins)(0xee << 10);
3437*22dc650dSSadaf Ebrahimi 
3438*22dc650dSSadaf Ebrahimi 		return push_inst(compiler, XVPERMI | ins | FRD(freg) | FRJ(freg));
3439*22dc650dSSadaf Ebrahimi 	}
3440*22dc650dSSadaf Ebrahimi 
3441*22dc650dSSadaf Ebrahimi 	return push_inst(compiler, VREPLVEI | ins | FRD(freg) | FRJ(src) | IMM_V(src_lane_index));
3442*22dc650dSSadaf Ebrahimi }
3443*22dc650dSSadaf Ebrahimi 
sljit_emit_simd_extend(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 freg,sljit_s32 src,sljit_sw srcw)3444*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_extend(struct sljit_compiler *compiler, sljit_s32 type,
3445*22dc650dSSadaf Ebrahimi 	sljit_s32 freg,
3446*22dc650dSSadaf Ebrahimi 	sljit_s32 src, sljit_sw srcw)
3447*22dc650dSSadaf Ebrahimi {
3448*22dc650dSSadaf Ebrahimi 	sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
3449*22dc650dSSadaf Ebrahimi 	sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
3450*22dc650dSSadaf Ebrahimi 	sljit_s32 elem2_size = SLJIT_SIMD_GET_ELEM2_SIZE(type);
3451*22dc650dSSadaf Ebrahimi 	sljit_ins ins = 0;
3452*22dc650dSSadaf Ebrahimi 
3453*22dc650dSSadaf Ebrahimi 	CHECK_ERROR();
3454*22dc650dSSadaf Ebrahimi 	CHECK(check_sljit_emit_simd_extend(compiler, type, freg, src, srcw));
3455*22dc650dSSadaf Ebrahimi 
3456*22dc650dSSadaf Ebrahimi 	ADJUST_LOCAL_OFFSET(src, srcw);
3457*22dc650dSSadaf Ebrahimi 
3458*22dc650dSSadaf Ebrahimi 	if (reg_size != 5 && reg_size != 4)
3459*22dc650dSSadaf Ebrahimi 		return SLJIT_ERR_UNSUPPORTED;
3460*22dc650dSSadaf Ebrahimi 
3461*22dc650dSSadaf Ebrahimi 	if (reg_size == 5 && !(get_cpu_features(GET_HWCAP) & LOONGARCH_HWCAP_LASX))
3462*22dc650dSSadaf Ebrahimi 		return SLJIT_ERR_UNSUPPORTED;
3463*22dc650dSSadaf Ebrahimi 
3464*22dc650dSSadaf Ebrahimi 	if (type & SLJIT_SIMD_TEST)
3465*22dc650dSSadaf Ebrahimi 		return SLJIT_SUCCESS;
3466*22dc650dSSadaf Ebrahimi 
3467*22dc650dSSadaf Ebrahimi 	if (src & SLJIT_MEM) {
3468*22dc650dSSadaf Ebrahimi 		ins = (type & SLJIT_SIMD_STORE) ? VST : VLD;
3469*22dc650dSSadaf Ebrahimi 
3470*22dc650dSSadaf Ebrahimi 		if (reg_size == 5)
3471*22dc650dSSadaf Ebrahimi 			ins = (type & SLJIT_SIMD_STORE) ? XVST : XVLD;
3472*22dc650dSSadaf Ebrahimi 
3473*22dc650dSSadaf Ebrahimi 		if (FAST_IS_REG(src) && src >= 0 && (srcw >= I12_MIN && srcw <= I12_MAX))
3474*22dc650dSSadaf Ebrahimi 			FAIL_IF(push_inst(compiler, ins | FRD(freg) | RJ(src) | IMM_I12(srcw)));
3475*22dc650dSSadaf Ebrahimi 		else {
3476*22dc650dSSadaf Ebrahimi 			FAIL_IF(sljit_emit_simd_mem_offset(compiler, &src, srcw));
3477*22dc650dSSadaf Ebrahimi 			FAIL_IF(push_inst(compiler, ins | FRD(freg) | RJ(src) | IMM_I12(0)));
3478*22dc650dSSadaf Ebrahimi 		}
3479*22dc650dSSadaf Ebrahimi 		src = freg;
3480*22dc650dSSadaf Ebrahimi 	}
3481*22dc650dSSadaf Ebrahimi 
3482*22dc650dSSadaf Ebrahimi 	if (type & SLJIT_SIMD_FLOAT) {
3483*22dc650dSSadaf Ebrahimi 		if (elem_size != 2 || elem2_size != 3)
3484*22dc650dSSadaf Ebrahimi 			return SLJIT_ERR_UNSUPPORTED;
3485*22dc650dSSadaf Ebrahimi 
3486*22dc650dSSadaf Ebrahimi 		ins = 0;
3487*22dc650dSSadaf Ebrahimi 		if (reg_size == 5) {
3488*22dc650dSSadaf Ebrahimi 			ins = (sljit_ins)1 << 26;
3489*22dc650dSSadaf Ebrahimi 			FAIL_IF(push_inst(compiler, XVPERMI | FRD(src) | FRJ(src) | IMM_I8(16)));
3490*22dc650dSSadaf Ebrahimi 		}
3491*22dc650dSSadaf Ebrahimi 
3492*22dc650dSSadaf Ebrahimi 		return push_inst(compiler, VFCVTL_D_S | ins | FRD(freg) | FRJ(src));
3493*22dc650dSSadaf Ebrahimi 	}
3494*22dc650dSSadaf Ebrahimi 
3495*22dc650dSSadaf Ebrahimi 	ins = (type & SLJIT_SIMD_EXTEND_SIGNED) ? VSLLWIL : (VSLLWIL | (sljit_ins)1 << 18);
3496*22dc650dSSadaf Ebrahimi 
3497*22dc650dSSadaf Ebrahimi 	if (reg_size == 5)
3498*22dc650dSSadaf Ebrahimi 		ins |= (sljit_ins)1 << 26;
3499*22dc650dSSadaf Ebrahimi 
3500*22dc650dSSadaf Ebrahimi 	do {
3501*22dc650dSSadaf Ebrahimi 		if (reg_size == 5)
3502*22dc650dSSadaf Ebrahimi 			FAIL_IF(push_inst(compiler, XVPERMI | FRD(src) | FRJ(src) | IMM_I8(16)));
3503*22dc650dSSadaf Ebrahimi 
3504*22dc650dSSadaf Ebrahimi 		FAIL_IF(push_inst(compiler, ins | ((sljit_ins)1 << (13 + elem_size)) | FRD(freg) | FRJ(src)));
3505*22dc650dSSadaf Ebrahimi 		src = freg;
3506*22dc650dSSadaf Ebrahimi 	} while (++elem_size < elem2_size);
3507*22dc650dSSadaf Ebrahimi 
3508*22dc650dSSadaf Ebrahimi 	return SLJIT_SUCCESS;
3509*22dc650dSSadaf Ebrahimi }
3510*22dc650dSSadaf Ebrahimi 
sljit_emit_simd_sign(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 freg,sljit_s32 dst,sljit_sw dstw)3511*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_sign(struct sljit_compiler *compiler, sljit_s32 type,
3512*22dc650dSSadaf Ebrahimi 	sljit_s32 freg,
3513*22dc650dSSadaf Ebrahimi 	sljit_s32 dst, sljit_sw dstw)
3514*22dc650dSSadaf Ebrahimi {
3515*22dc650dSSadaf Ebrahimi 	sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
3516*22dc650dSSadaf Ebrahimi 	sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
3517*22dc650dSSadaf Ebrahimi 	sljit_ins ins = 0;
3518*22dc650dSSadaf Ebrahimi 	sljit_s32 dst_r;
3519*22dc650dSSadaf Ebrahimi 
3520*22dc650dSSadaf Ebrahimi 	CHECK_ERROR();
3521*22dc650dSSadaf Ebrahimi 	CHECK(check_sljit_emit_simd_sign(compiler, type, freg, dst, dstw));
3522*22dc650dSSadaf Ebrahimi 
3523*22dc650dSSadaf Ebrahimi 	ADJUST_LOCAL_OFFSET(dst, dstw);
3524*22dc650dSSadaf Ebrahimi 
3525*22dc650dSSadaf Ebrahimi 	if (reg_size != 5 && reg_size != 4)
3526*22dc650dSSadaf Ebrahimi 		return SLJIT_ERR_UNSUPPORTED;
3527*22dc650dSSadaf Ebrahimi 
3528*22dc650dSSadaf Ebrahimi 	if (reg_size == 5 && !(get_cpu_features(GET_HWCAP) & LOONGARCH_HWCAP_LASX))
3529*22dc650dSSadaf Ebrahimi 		return SLJIT_ERR_UNSUPPORTED;
3530*22dc650dSSadaf Ebrahimi 
3531*22dc650dSSadaf Ebrahimi 	if (elem_size > 3 || ((type & SLJIT_SIMD_FLOAT) && elem_size < 2))
3532*22dc650dSSadaf Ebrahimi 		return SLJIT_ERR_UNSUPPORTED;
3533*22dc650dSSadaf Ebrahimi 
3534*22dc650dSSadaf Ebrahimi 	if (type & SLJIT_SIMD_TEST)
3535*22dc650dSSadaf Ebrahimi 		return SLJIT_SUCCESS;
3536*22dc650dSSadaf Ebrahimi 
3537*22dc650dSSadaf Ebrahimi 	dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;
3538*22dc650dSSadaf Ebrahimi 
3539*22dc650dSSadaf Ebrahimi 	if (reg_size == 5)
3540*22dc650dSSadaf Ebrahimi 		ins = (sljit_ins)1 << 26;
3541*22dc650dSSadaf Ebrahimi 
3542*22dc650dSSadaf Ebrahimi 	FAIL_IF(push_inst(compiler, VMSKLTZ | ins | (sljit_ins)(elem_size << 10) | FRD(TMP_FREG1) | FRJ(freg)));
3543*22dc650dSSadaf Ebrahimi 
3544*22dc650dSSadaf Ebrahimi 	FAIL_IF(push_inst(compiler, VPICKVE2GR_U | (sljit_ins)(0x3c << 10) | RD(dst_r) | FRJ(TMP_FREG1)));
3545*22dc650dSSadaf Ebrahimi 
3546*22dc650dSSadaf Ebrahimi 	if (reg_size == 5) {
3547*22dc650dSSadaf Ebrahimi 		FAIL_IF(push_inst(compiler, VPICKVE2GR_U | (sljit_ins)(0x38 << 10) | ins | RD(TMP_REG3) | FRJ(TMP_FREG1) | IMM_V(2)));
3548*22dc650dSSadaf Ebrahimi 		FAIL_IF(push_inst(compiler, SLLI_W | RD(TMP_REG3) | RJ(TMP_REG3) | IMM_I12(2 << (3 - elem_size))));
3549*22dc650dSSadaf Ebrahimi 		FAIL_IF(push_inst(compiler, OR | RD(dst_r) | RJ(dst_r) | RK(TMP_REG3)));
3550*22dc650dSSadaf Ebrahimi 	}
3551*22dc650dSSadaf Ebrahimi 
3552*22dc650dSSadaf Ebrahimi 	if (dst_r == TMP_REG2)
3553*22dc650dSSadaf Ebrahimi 		return emit_op_mem(compiler, ((type & SLJIT_32) ? INT_DATA : WORD_DATA), TMP_REG2, dst, dstw);
3554*22dc650dSSadaf Ebrahimi 
3555*22dc650dSSadaf Ebrahimi 	return SLJIT_SUCCESS;
3556*22dc650dSSadaf Ebrahimi }
3557*22dc650dSSadaf Ebrahimi 
sljit_emit_simd_op2(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 dst_freg,sljit_s32 src1_freg,sljit_s32 src2_freg)3558*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_op2(struct sljit_compiler *compiler, sljit_s32 type,
3559*22dc650dSSadaf Ebrahimi 	sljit_s32 dst_freg, sljit_s32 src1_freg, sljit_s32 src2_freg)
3560*22dc650dSSadaf Ebrahimi {
3561*22dc650dSSadaf Ebrahimi 	sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
3562*22dc650dSSadaf Ebrahimi 	sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
3563*22dc650dSSadaf Ebrahimi 	sljit_ins ins = 0;
3564*22dc650dSSadaf Ebrahimi 
3565*22dc650dSSadaf Ebrahimi 	CHECK_ERROR();
3566*22dc650dSSadaf Ebrahimi 	CHECK(check_sljit_emit_simd_op2(compiler, type, dst_freg, src1_freg, src2_freg));
3567*22dc650dSSadaf Ebrahimi 
3568*22dc650dSSadaf Ebrahimi 	if (reg_size != 5 && reg_size != 4)
3569*22dc650dSSadaf Ebrahimi 		return SLJIT_ERR_UNSUPPORTED;
3570*22dc650dSSadaf Ebrahimi 
3571*22dc650dSSadaf Ebrahimi 	if (reg_size == 5 && !(get_cpu_features(GET_HWCAP) & LOONGARCH_HWCAP_LASX))
3572*22dc650dSSadaf Ebrahimi 		return SLJIT_ERR_UNSUPPORTED;
3573*22dc650dSSadaf Ebrahimi 
3574*22dc650dSSadaf Ebrahimi 	if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3))
3575*22dc650dSSadaf Ebrahimi 		return SLJIT_ERR_UNSUPPORTED;
3576*22dc650dSSadaf Ebrahimi 
3577*22dc650dSSadaf Ebrahimi 	if (type & SLJIT_SIMD_TEST)
3578*22dc650dSSadaf Ebrahimi 		return SLJIT_SUCCESS;
3579*22dc650dSSadaf Ebrahimi 
3580*22dc650dSSadaf Ebrahimi 	switch (SLJIT_SIMD_GET_OPCODE(type)) {
3581*22dc650dSSadaf Ebrahimi 	case SLJIT_SIMD_OP2_AND:
3582*22dc650dSSadaf Ebrahimi 		ins = VAND_V;
3583*22dc650dSSadaf Ebrahimi 		break;
3584*22dc650dSSadaf Ebrahimi 	case SLJIT_SIMD_OP2_OR:
3585*22dc650dSSadaf Ebrahimi 		ins = VOR_V;
3586*22dc650dSSadaf Ebrahimi 		break;
3587*22dc650dSSadaf Ebrahimi 	case SLJIT_SIMD_OP2_XOR:
3588*22dc650dSSadaf Ebrahimi 		ins = VXOR_V;
3589*22dc650dSSadaf Ebrahimi 		break;
3590*22dc650dSSadaf Ebrahimi 	}
3591*22dc650dSSadaf Ebrahimi 
3592*22dc650dSSadaf Ebrahimi 	if (reg_size == 5)
3593*22dc650dSSadaf Ebrahimi 		ins |= (sljit_ins)1 << 26;
3594*22dc650dSSadaf Ebrahimi 
3595*22dc650dSSadaf Ebrahimi 	return push_inst(compiler, ins | FRD(dst_freg) | FRJ(src1_freg) | FRK(src2_freg));
3596*22dc650dSSadaf Ebrahimi }
3597*22dc650dSSadaf Ebrahimi 
sljit_emit_atomic_load(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst_reg,sljit_s32 mem_reg)3598*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_load(struct sljit_compiler *compiler,
3599*22dc650dSSadaf Ebrahimi 	sljit_s32 op,
3600*22dc650dSSadaf Ebrahimi 	sljit_s32 dst_reg,
3601*22dc650dSSadaf Ebrahimi 	sljit_s32 mem_reg)
3602*22dc650dSSadaf Ebrahimi {
3603*22dc650dSSadaf Ebrahimi 	sljit_ins ins;
3604*22dc650dSSadaf Ebrahimi 
3605*22dc650dSSadaf Ebrahimi 	CHECK_ERROR();
3606*22dc650dSSadaf Ebrahimi 	CHECK(check_sljit_emit_atomic_load(compiler, op, dst_reg, mem_reg));
3607*22dc650dSSadaf Ebrahimi 
3608*22dc650dSSadaf Ebrahimi 	switch(GET_OPCODE(op)) {
3609*22dc650dSSadaf Ebrahimi 	case SLJIT_MOV_U8:
3610*22dc650dSSadaf Ebrahimi 		ins = LD_BU;
3611*22dc650dSSadaf Ebrahimi 		break;
3612*22dc650dSSadaf Ebrahimi 	case SLJIT_MOV_U16:
3613*22dc650dSSadaf Ebrahimi 		ins = LD_HU;
3614*22dc650dSSadaf Ebrahimi 		break;
3615*22dc650dSSadaf Ebrahimi 	case SLJIT_MOV32:
3616*22dc650dSSadaf Ebrahimi 		ins = LD_W;
3617*22dc650dSSadaf Ebrahimi 		break;
3618*22dc650dSSadaf Ebrahimi 	case SLJIT_MOV_U32:
3619*22dc650dSSadaf Ebrahimi 		ins = LD_WU;
3620*22dc650dSSadaf Ebrahimi 		break;
3621*22dc650dSSadaf Ebrahimi 	default:
3622*22dc650dSSadaf Ebrahimi 		ins = LD_D;
3623*22dc650dSSadaf Ebrahimi 		break;
3624*22dc650dSSadaf Ebrahimi 	}
3625*22dc650dSSadaf Ebrahimi 
3626*22dc650dSSadaf Ebrahimi 	return push_inst(compiler, ins | RD(dst_reg) | RJ(mem_reg) | IMM_I12(0));
3627*22dc650dSSadaf Ebrahimi }
3628*22dc650dSSadaf Ebrahimi 
sljit_emit_atomic_store(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 src_reg,sljit_s32 mem_reg,sljit_s32 temp_reg)3629*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_store(struct sljit_compiler *compiler,
3630*22dc650dSSadaf Ebrahimi 	sljit_s32 op,
3631*22dc650dSSadaf Ebrahimi 	sljit_s32 src_reg,
3632*22dc650dSSadaf Ebrahimi 	sljit_s32 mem_reg,
3633*22dc650dSSadaf Ebrahimi 	sljit_s32 temp_reg)
3634*22dc650dSSadaf Ebrahimi {
3635*22dc650dSSadaf Ebrahimi 	sljit_ins ins = 0;
3636*22dc650dSSadaf Ebrahimi 	sljit_ins unsign = 0;
3637*22dc650dSSadaf Ebrahimi 	sljit_s32 tmp = temp_reg;
3638*22dc650dSSadaf Ebrahimi 
3639*22dc650dSSadaf Ebrahimi 	CHECK_ERROR();
3640*22dc650dSSadaf Ebrahimi 	CHECK(check_sljit_emit_atomic_store(compiler, op, src_reg, mem_reg, temp_reg));
3641*22dc650dSSadaf Ebrahimi 
3642*22dc650dSSadaf Ebrahimi 	switch (GET_OPCODE(op)) {
3643*22dc650dSSadaf Ebrahimi 	case SLJIT_MOV_U8:
3644*22dc650dSSadaf Ebrahimi 		ins = AMCAS_B;
3645*22dc650dSSadaf Ebrahimi 		unsign = BSTRPICK_D | (7 << 16);
3646*22dc650dSSadaf Ebrahimi 		break;
3647*22dc650dSSadaf Ebrahimi 	case SLJIT_MOV_U16:
3648*22dc650dSSadaf Ebrahimi 		ins = AMCAS_H;
3649*22dc650dSSadaf Ebrahimi 		unsign = BSTRPICK_D | (15 << 16);
3650*22dc650dSSadaf Ebrahimi 		break;
3651*22dc650dSSadaf Ebrahimi 	case SLJIT_MOV32:
3652*22dc650dSSadaf Ebrahimi 		ins = AMCAS_W;
3653*22dc650dSSadaf Ebrahimi 		break;
3654*22dc650dSSadaf Ebrahimi 	case SLJIT_MOV_U32:
3655*22dc650dSSadaf Ebrahimi 		ins = AMCAS_W;
3656*22dc650dSSadaf Ebrahimi 		unsign = BSTRPICK_D | (31 << 16);
3657*22dc650dSSadaf Ebrahimi 		break;
3658*22dc650dSSadaf Ebrahimi 	default:
3659*22dc650dSSadaf Ebrahimi 		ins = AMCAS_D;
3660*22dc650dSSadaf Ebrahimi 		break;
3661*22dc650dSSadaf Ebrahimi 	}
3662*22dc650dSSadaf Ebrahimi 
3663*22dc650dSSadaf Ebrahimi 	if (op & SLJIT_SET_ATOMIC_STORED) {
3664*22dc650dSSadaf Ebrahimi 		FAIL_IF(push_inst(compiler, XOR | RD(TMP_REG1) | RJ(temp_reg) | RK(TMP_ZERO)));
3665*22dc650dSSadaf Ebrahimi 		tmp = TMP_REG1;
3666*22dc650dSSadaf Ebrahimi 	}
3667*22dc650dSSadaf Ebrahimi 	FAIL_IF(push_inst(compiler, ins | RD(tmp) | RJ(mem_reg) | RK(src_reg)));
3668*22dc650dSSadaf Ebrahimi 	if (!(op & SLJIT_SET_ATOMIC_STORED))
3669*22dc650dSSadaf Ebrahimi 		return SLJIT_SUCCESS;
3670*22dc650dSSadaf Ebrahimi 
3671*22dc650dSSadaf Ebrahimi 	if (unsign)
3672*22dc650dSSadaf Ebrahimi 		FAIL_IF(push_inst(compiler, unsign | RD(tmp) | RJ(tmp)));
3673*22dc650dSSadaf Ebrahimi 
3674*22dc650dSSadaf Ebrahimi 	FAIL_IF(push_inst(compiler, XOR | RD(EQUAL_FLAG) | RJ(tmp) | RK(temp_reg)));
3675*22dc650dSSadaf Ebrahimi 	return push_inst(compiler, SLTUI | RD(EQUAL_FLAG) | RJ(EQUAL_FLAG) | IMM_I12(1));
3676*22dc650dSSadaf Ebrahimi }
3677*22dc650dSSadaf Ebrahimi 
emit_const(struct sljit_compiler * compiler,sljit_s32 dst,sljit_sw init_value,sljit_ins last_ins)3678*22dc650dSSadaf Ebrahimi static SLJIT_INLINE sljit_s32 emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw init_value, sljit_ins last_ins)
3679*22dc650dSSadaf Ebrahimi {
3680*22dc650dSSadaf Ebrahimi 	SLJIT_UNUSED_ARG(last_ins);
3681*22dc650dSSadaf Ebrahimi 
3682*22dc650dSSadaf Ebrahimi 	FAIL_IF(push_inst(compiler, LU12I_W | RD(dst) | (sljit_ins)(((init_value & 0xffffffff) >> 12) << 5)));
3683*22dc650dSSadaf Ebrahimi 	FAIL_IF(push_inst(compiler, LU32I_D | RD(dst) | (sljit_ins)(((init_value >> 32) & 0xfffff) << 5)));
3684*22dc650dSSadaf Ebrahimi 	FAIL_IF(push_inst(compiler, LU52I_D | RD(dst) | RJ(dst) | (sljit_ins)(IMM_I12(init_value >> 52))));
3685*22dc650dSSadaf Ebrahimi 	return push_inst(compiler, ORI | RD(dst) | RJ(dst) | IMM_I12(init_value));
3686*22dc650dSSadaf Ebrahimi }
3687*22dc650dSSadaf Ebrahimi 
sljit_set_jump_addr(sljit_uw addr,sljit_uw new_target,sljit_sw executable_offset)3688*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset)
3689*22dc650dSSadaf Ebrahimi {
3690*22dc650dSSadaf Ebrahimi 	sljit_ins *inst = (sljit_ins*)addr;
3691*22dc650dSSadaf Ebrahimi 	SLJIT_UNUSED_ARG(executable_offset);
3692*22dc650dSSadaf Ebrahimi 
3693*22dc650dSSadaf Ebrahimi 	SLJIT_UPDATE_WX_FLAGS(inst, inst + 4, 0);
3694*22dc650dSSadaf Ebrahimi 
3695*22dc650dSSadaf Ebrahimi 	SLJIT_ASSERT((inst[0] & OPC_1RI20(0x7f)) == LU12I_W);
3696*22dc650dSSadaf Ebrahimi 	inst[0] = (inst[0] & (OPC_1RI20(0x7f) | 0x1f)) | (sljit_ins)(((new_target & 0xffffffff) >> 12) << 5);
3697*22dc650dSSadaf Ebrahimi 
3698*22dc650dSSadaf Ebrahimi 	SLJIT_ASSERT((inst[1] & OPC_1RI20(0x7f)) == LU32I_D);
3699*22dc650dSSadaf Ebrahimi 	inst[1] = (inst[1] & (OPC_1RI20(0x7f) | 0x1f)) | (sljit_ins)(sljit_ins)(((new_target >> 32) & 0xfffff) << 5);
3700*22dc650dSSadaf Ebrahimi 
3701*22dc650dSSadaf Ebrahimi 	SLJIT_ASSERT((inst[2] & OPC_2RI12(0x3ff)) == LU52I_D);
3702*22dc650dSSadaf Ebrahimi 	inst[2] = (inst[2] & (OPC_2RI12(0x3ff) | 0x3ff)) | IMM_I12(new_target >> 52);
3703*22dc650dSSadaf Ebrahimi 
3704*22dc650dSSadaf Ebrahimi 	SLJIT_ASSERT((inst[3] & OPC_2RI12(0x3ff)) == ORI || (inst[3] & OPC_2RI16(0x3f)) == JIRL);
3705*22dc650dSSadaf Ebrahimi 	if ((inst[3] & OPC_2RI12(0x3ff)) == ORI)
3706*22dc650dSSadaf Ebrahimi 		inst[3] = (inst[3] & (OPC_2RI12(0x3ff) | 0x3ff)) | IMM_I12(new_target);
3707*22dc650dSSadaf Ebrahimi 	else
3708*22dc650dSSadaf Ebrahimi 		inst[3] = (inst[3] & (OPC_2RI16(0x3f) | 0x3ff)) | IMM_I12((new_target & 0xfff) >> 2);
3709*22dc650dSSadaf Ebrahimi 
3710*22dc650dSSadaf Ebrahimi 	SLJIT_UPDATE_WX_FLAGS(inst, inst + 4, 1);
3711*22dc650dSSadaf Ebrahimi 
3712*22dc650dSSadaf Ebrahimi 	inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
3713*22dc650dSSadaf Ebrahimi 	SLJIT_CACHE_FLUSH(inst, inst + 4);
3714*22dc650dSSadaf Ebrahimi }
3715*22dc650dSSadaf Ebrahimi 
sljit_emit_const(struct sljit_compiler * compiler,sljit_s32 dst,sljit_sw dstw,sljit_sw init_value)3716*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value)
3717*22dc650dSSadaf Ebrahimi {
3718*22dc650dSSadaf Ebrahimi 	struct sljit_const *const_;
3719*22dc650dSSadaf Ebrahimi 	sljit_s32 dst_r;
3720*22dc650dSSadaf Ebrahimi 
3721*22dc650dSSadaf Ebrahimi 	CHECK_ERROR_PTR();
3722*22dc650dSSadaf Ebrahimi 	CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value));
3723*22dc650dSSadaf Ebrahimi 	ADJUST_LOCAL_OFFSET(dst, dstw);
3724*22dc650dSSadaf Ebrahimi 
3725*22dc650dSSadaf Ebrahimi 	const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const));
3726*22dc650dSSadaf Ebrahimi 	PTR_FAIL_IF(!const_);
3727*22dc650dSSadaf Ebrahimi 	set_const(const_, compiler);
3728*22dc650dSSadaf Ebrahimi 
3729*22dc650dSSadaf Ebrahimi 	dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;
3730*22dc650dSSadaf Ebrahimi 	PTR_FAIL_IF(emit_const(compiler, dst_r, init_value, 0));
3731*22dc650dSSadaf Ebrahimi 
3732*22dc650dSSadaf Ebrahimi 	if (dst & SLJIT_MEM)
3733*22dc650dSSadaf Ebrahimi 		PTR_FAIL_IF(emit_op_mem(compiler, WORD_DATA, TMP_REG2, dst, dstw));
3734*22dc650dSSadaf Ebrahimi 
3735*22dc650dSSadaf Ebrahimi 	return const_;
3736*22dc650dSSadaf Ebrahimi }
3737*22dc650dSSadaf Ebrahimi 
sljit_emit_mov_addr(struct sljit_compiler * compiler,sljit_s32 dst,sljit_sw dstw)3738*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_mov_addr(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
3739*22dc650dSSadaf Ebrahimi {
3740*22dc650dSSadaf Ebrahimi 	struct sljit_jump *jump;
3741*22dc650dSSadaf Ebrahimi 	sljit_s32 dst_r;
3742*22dc650dSSadaf Ebrahimi 
3743*22dc650dSSadaf Ebrahimi 	CHECK_ERROR_PTR();
3744*22dc650dSSadaf Ebrahimi 	CHECK_PTR(check_sljit_emit_mov_addr(compiler, dst, dstw));
3745*22dc650dSSadaf Ebrahimi 	ADJUST_LOCAL_OFFSET(dst, dstw);
3746*22dc650dSSadaf Ebrahimi 
3747*22dc650dSSadaf Ebrahimi 	jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
3748*22dc650dSSadaf Ebrahimi 	PTR_FAIL_IF(!jump);
3749*22dc650dSSadaf Ebrahimi 	set_mov_addr(jump, compiler, 0);
3750*22dc650dSSadaf Ebrahimi 
3751*22dc650dSSadaf Ebrahimi 	dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;
3752*22dc650dSSadaf Ebrahimi 	PTR_FAIL_IF(push_inst(compiler, (sljit_ins)dst_r));
3753*22dc650dSSadaf Ebrahimi 
3754*22dc650dSSadaf Ebrahimi 	compiler->size += JUMP_MAX_SIZE - 1;
3755*22dc650dSSadaf Ebrahimi 
3756*22dc650dSSadaf Ebrahimi 	if (dst & SLJIT_MEM)
3757*22dc650dSSadaf Ebrahimi 		PTR_FAIL_IF(emit_op_mem(compiler, WORD_DATA, TMP_REG2, dst, dstw));
3758*22dc650dSSadaf Ebrahimi 
3759*22dc650dSSadaf Ebrahimi 	return jump;
3760*22dc650dSSadaf Ebrahimi }
3761*22dc650dSSadaf Ebrahimi 
sljit_set_const(sljit_uw addr,sljit_sw new_constant,sljit_sw executable_offset)3762*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset)
3763*22dc650dSSadaf Ebrahimi {
3764*22dc650dSSadaf Ebrahimi 	sljit_set_jump_addr(addr, (sljit_uw)new_constant, executable_offset);
3765*22dc650dSSadaf Ebrahimi }
3766