1*22dc650dSSadaf Ebrahimi /*
2*22dc650dSSadaf Ebrahimi * Stack-less Just-In-Time compiler
3*22dc650dSSadaf Ebrahimi *
4*22dc650dSSadaf Ebrahimi * Copyright Zoltan Herczeg ([email protected]). All rights reserved.
5*22dc650dSSadaf Ebrahimi *
6*22dc650dSSadaf Ebrahimi * Redistribution and use in source and binary forms, with or without modification, are
7*22dc650dSSadaf Ebrahimi * permitted provided that the following conditions are met:
8*22dc650dSSadaf Ebrahimi *
9*22dc650dSSadaf Ebrahimi * 1. Redistributions of source code must retain the above copyright notice, this list of
10*22dc650dSSadaf Ebrahimi * conditions and the following disclaimer.
11*22dc650dSSadaf Ebrahimi *
12*22dc650dSSadaf Ebrahimi * 2. Redistributions in binary form must reproduce the above copyright notice, this list
13*22dc650dSSadaf Ebrahimi * of conditions and the following disclaimer in the documentation and/or other materials
14*22dc650dSSadaf Ebrahimi * provided with the distribution.
15*22dc650dSSadaf Ebrahimi *
16*22dc650dSSadaf Ebrahimi * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
17*22dc650dSSadaf Ebrahimi * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18*22dc650dSSadaf Ebrahimi * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
19*22dc650dSSadaf Ebrahimi * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20*22dc650dSSadaf Ebrahimi * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
21*22dc650dSSadaf Ebrahimi * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22*22dc650dSSadaf Ebrahimi * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23*22dc650dSSadaf Ebrahimi * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
24*22dc650dSSadaf Ebrahimi * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25*22dc650dSSadaf Ebrahimi */
26*22dc650dSSadaf Ebrahimi
sljit_get_platform_name(void)27*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void)
28*22dc650dSSadaf Ebrahimi {
29*22dc650dSSadaf Ebrahimi return "LOONGARCH" SLJIT_CPUINFO;
30*22dc650dSSadaf Ebrahimi }
31*22dc650dSSadaf Ebrahimi
32*22dc650dSSadaf Ebrahimi typedef sljit_u32 sljit_ins;
33*22dc650dSSadaf Ebrahimi
34*22dc650dSSadaf Ebrahimi #define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2)
35*22dc650dSSadaf Ebrahimi #define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3)
36*22dc650dSSadaf Ebrahimi #define TMP_REG3 (SLJIT_NUMBER_OF_REGISTERS + 4)
37*22dc650dSSadaf Ebrahimi #define TMP_ZERO 0
38*22dc650dSSadaf Ebrahimi
39*22dc650dSSadaf Ebrahimi /* Flags are kept in volatile registers. */
40*22dc650dSSadaf Ebrahimi #define EQUAL_FLAG (SLJIT_NUMBER_OF_REGISTERS + 5)
41*22dc650dSSadaf Ebrahimi #define RETURN_ADDR_REG TMP_REG2
42*22dc650dSSadaf Ebrahimi #define OTHER_FLAG (SLJIT_NUMBER_OF_REGISTERS + 6)
43*22dc650dSSadaf Ebrahimi
44*22dc650dSSadaf Ebrahimi #define TMP_FREG1 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1)
45*22dc650dSSadaf Ebrahimi #define TMP_FREG2 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2)
46*22dc650dSSadaf Ebrahimi
47*22dc650dSSadaf Ebrahimi static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 7] = {
48*22dc650dSSadaf Ebrahimi 0, 4, 5, 6, 7, 8, 9, 10, 11, 16, 17, 18, 19, 20, 22, 31, 30, 29, 28, 27, 26, 25, 24, 23, 3, 13, 1, 14, 12, 15
49*22dc650dSSadaf Ebrahimi };
50*22dc650dSSadaf Ebrahimi
51*22dc650dSSadaf Ebrahimi static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = {
52*22dc650dSSadaf Ebrahimi 0, 0, 1, 2, 3, 4, 5, 6, 7, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 31, 30, 29, 28, 27, 26, 25, 24, 8, 9
53*22dc650dSSadaf Ebrahimi };
54*22dc650dSSadaf Ebrahimi
55*22dc650dSSadaf Ebrahimi /* --------------------------------------------------------------------- */
56*22dc650dSSadaf Ebrahimi /* Instrucion forms */
57*22dc650dSSadaf Ebrahimi /* --------------------------------------------------------------------- */
58*22dc650dSSadaf Ebrahimi
59*22dc650dSSadaf Ebrahimi /*
60*22dc650dSSadaf Ebrahimi LoongArch instructions are 32 bits wide, belonging to 9 basic instruction formats (and variants of them):
61*22dc650dSSadaf Ebrahimi
62*22dc650dSSadaf Ebrahimi | Format name | Composition |
63*22dc650dSSadaf Ebrahimi | 2R | Opcode + Rj + Rd |
64*22dc650dSSadaf Ebrahimi | 3R | Opcode + Rk + Rj + Rd |
65*22dc650dSSadaf Ebrahimi | 4R | Opcode + Ra + Rk + Rj + Rd |
66*22dc650dSSadaf Ebrahimi | 2RI8 | Opcode + I8 + Rj + Rd |
67*22dc650dSSadaf Ebrahimi | 2RI12 | Opcode + I12 + Rj + Rd |
68*22dc650dSSadaf Ebrahimi | 2RI14 | Opcode + I14 + Rj + Rd |
69*22dc650dSSadaf Ebrahimi | 2RI16 | Opcode + I16 + Rj + Rd |
70*22dc650dSSadaf Ebrahimi | 1RI21 | Opcode + I21L + Rj + I21H |
71*22dc650dSSadaf Ebrahimi | I26 | Opcode + I26L + I26H |
72*22dc650dSSadaf Ebrahimi
73*22dc650dSSadaf Ebrahimi Rd is the destination register operand, while Rj, Rk and Ra (“a” stands for “additional”) are the source register operands.
74*22dc650dSSadaf Ebrahimi I8/I12/I14/I16/I21/I26 are immediate operands of respective width. The longer I21 and I26 are stored in separate higher and
75*22dc650dSSadaf Ebrahimi lower parts in the instruction word, denoted by the “L” and “H” suffixes. */
76*22dc650dSSadaf Ebrahimi
77*22dc650dSSadaf Ebrahimi #define RD(rd) ((sljit_ins)reg_map[rd])
78*22dc650dSSadaf Ebrahimi #define RJ(rj) ((sljit_ins)reg_map[rj] << 5)
79*22dc650dSSadaf Ebrahimi #define RK(rk) ((sljit_ins)reg_map[rk] << 10)
80*22dc650dSSadaf Ebrahimi #define RA(ra) ((sljit_ins)reg_map[ra] << 15)
81*22dc650dSSadaf Ebrahimi
82*22dc650dSSadaf Ebrahimi #define FD(fd) ((sljit_ins)reg_map[fd])
83*22dc650dSSadaf Ebrahimi #define FRD(fd) ((sljit_ins)freg_map[fd])
84*22dc650dSSadaf Ebrahimi #define FRJ(fj) ((sljit_ins)freg_map[fj] << 5)
85*22dc650dSSadaf Ebrahimi #define FRK(fk) ((sljit_ins)freg_map[fk] << 10)
86*22dc650dSSadaf Ebrahimi #define FRA(fa) ((sljit_ins)freg_map[fa] << 15)
87*22dc650dSSadaf Ebrahimi
88*22dc650dSSadaf Ebrahimi #define IMM_V(imm) ((sljit_ins)(imm) << 10)
89*22dc650dSSadaf Ebrahimi #define IMM_I8(imm) (((sljit_ins)(imm)&0xff) << 10)
90*22dc650dSSadaf Ebrahimi #define IMM_I12(imm) (((sljit_ins)(imm)&0xfff) << 10)
91*22dc650dSSadaf Ebrahimi #define IMM_I14(imm) (((sljit_ins)(imm)&0xfff3) << 10)
92*22dc650dSSadaf Ebrahimi #define IMM_I16(imm) (((sljit_ins)(imm)&0xffff) << 10)
93*22dc650dSSadaf Ebrahimi #define IMM_I20(imm) (((sljit_ins)(imm)&0xffffffff) >> 12 << 5)
94*22dc650dSSadaf Ebrahimi #define IMM_I21(imm) ((((sljit_ins)(imm)&0xffff) << 10) | (((sljit_ins)(imm) >> 16) & 0x1f))
95*22dc650dSSadaf Ebrahimi #define IMM_I26(imm) ((((sljit_ins)(imm)&0xffff) << 10) | (((sljit_ins)(imm) >> 16) & 0x3ff))
96*22dc650dSSadaf Ebrahimi
97*22dc650dSSadaf Ebrahimi #define OPC_I26(opc) ((sljit_ins)(opc) << 26)
98*22dc650dSSadaf Ebrahimi #define OPC_1RI21(opc) ((sljit_ins)(opc) << 26)
99*22dc650dSSadaf Ebrahimi #define OPC_2RI16(opc) ((sljit_ins)(opc) << 26)
100*22dc650dSSadaf Ebrahimi #define OPC_2RI14(opc) ((sljit_ins)(opc) << 24)
101*22dc650dSSadaf Ebrahimi #define OPC_2RI12(opc) ((sljit_ins)(opc) << 22)
102*22dc650dSSadaf Ebrahimi #define OPC_2RI8(opc) ((sljit_ins)(opc) << 18)
103*22dc650dSSadaf Ebrahimi #define OPC_4R(opc) ((sljit_ins)(opc) << 20)
104*22dc650dSSadaf Ebrahimi #define OPC_3R(opc) ((sljit_ins)(opc) << 15)
105*22dc650dSSadaf Ebrahimi #define OPC_2R(opc) ((sljit_ins)(opc) << 10)
106*22dc650dSSadaf Ebrahimi #define OPC_1RI20(opc) ((sljit_ins)(opc) << 25)
107*22dc650dSSadaf Ebrahimi
108*22dc650dSSadaf Ebrahimi /* Arithmetic operation instructions */
109*22dc650dSSadaf Ebrahimi #define ADD_W OPC_3R(0x20)
110*22dc650dSSadaf Ebrahimi #define ADD_D OPC_3R(0x21)
111*22dc650dSSadaf Ebrahimi #define SUB_W OPC_3R(0x22)
112*22dc650dSSadaf Ebrahimi #define SUB_D OPC_3R(0x23)
113*22dc650dSSadaf Ebrahimi #define ADDI_W OPC_2RI12(0xa)
114*22dc650dSSadaf Ebrahimi #define ADDI_D OPC_2RI12(0xb)
115*22dc650dSSadaf Ebrahimi #define ANDI OPC_2RI12(0xd)
116*22dc650dSSadaf Ebrahimi #define ORI OPC_2RI12(0xe)
117*22dc650dSSadaf Ebrahimi #define XORI OPC_2RI12(0xf)
118*22dc650dSSadaf Ebrahimi #define ADDU16I_D OPC_2RI16(0x4)
119*22dc650dSSadaf Ebrahimi #define LU12I_W OPC_1RI20(0xa)
120*22dc650dSSadaf Ebrahimi #define LU32I_D OPC_1RI20(0xb)
121*22dc650dSSadaf Ebrahimi #define LU52I_D OPC_2RI12(0xc)
122*22dc650dSSadaf Ebrahimi #define SLT OPC_3R(0x24)
123*22dc650dSSadaf Ebrahimi #define SLTU OPC_3R(0x25)
124*22dc650dSSadaf Ebrahimi #define SLTI OPC_2RI12(0x8)
125*22dc650dSSadaf Ebrahimi #define SLTUI OPC_2RI12(0x9)
126*22dc650dSSadaf Ebrahimi #define PCADDI OPC_1RI20(0xc)
127*22dc650dSSadaf Ebrahimi #define PCALAU12I OPC_1RI20(0xd)
128*22dc650dSSadaf Ebrahimi #define PCADDU12I OPC_1RI20(0xe)
129*22dc650dSSadaf Ebrahimi #define PCADDU18I OPC_1RI20(0xf)
130*22dc650dSSadaf Ebrahimi #define NOR OPC_3R(0x28)
131*22dc650dSSadaf Ebrahimi #define AND OPC_3R(0x29)
132*22dc650dSSadaf Ebrahimi #define OR OPC_3R(0x2a)
133*22dc650dSSadaf Ebrahimi #define XOR OPC_3R(0x2b)
134*22dc650dSSadaf Ebrahimi #define ORN OPC_3R(0x2c)
135*22dc650dSSadaf Ebrahimi #define ANDN OPC_3R(0x2d)
136*22dc650dSSadaf Ebrahimi #define MUL_W OPC_3R(0x38)
137*22dc650dSSadaf Ebrahimi #define MULH_W OPC_3R(0x39)
138*22dc650dSSadaf Ebrahimi #define MULH_WU OPC_3R(0x3a)
139*22dc650dSSadaf Ebrahimi #define MUL_D OPC_3R(0x3b)
140*22dc650dSSadaf Ebrahimi #define MULH_D OPC_3R(0x3c)
141*22dc650dSSadaf Ebrahimi #define MULH_DU OPC_3R(0x3d)
142*22dc650dSSadaf Ebrahimi #define MULW_D_W OPC_3R(0x3e)
143*22dc650dSSadaf Ebrahimi #define MULW_D_WU OPC_3R(0x3f)
144*22dc650dSSadaf Ebrahimi #define DIV_W OPC_3R(0x40)
145*22dc650dSSadaf Ebrahimi #define MOD_W OPC_3R(0x41)
146*22dc650dSSadaf Ebrahimi #define DIV_WU OPC_3R(0x42)
147*22dc650dSSadaf Ebrahimi #define MOD_WU OPC_3R(0x43)
148*22dc650dSSadaf Ebrahimi #define DIV_D OPC_3R(0x44)
149*22dc650dSSadaf Ebrahimi #define MOD_D OPC_3R(0x45)
150*22dc650dSSadaf Ebrahimi #define DIV_DU OPC_3R(0x46)
151*22dc650dSSadaf Ebrahimi #define MOD_DU OPC_3R(0x47)
152*22dc650dSSadaf Ebrahimi
153*22dc650dSSadaf Ebrahimi /* Bit-shift instructions */
154*22dc650dSSadaf Ebrahimi #define SLL_W OPC_3R(0x2e)
155*22dc650dSSadaf Ebrahimi #define SRL_W OPC_3R(0x2f)
156*22dc650dSSadaf Ebrahimi #define SRA_W OPC_3R(0x30)
157*22dc650dSSadaf Ebrahimi #define SLL_D OPC_3R(0x31)
158*22dc650dSSadaf Ebrahimi #define SRL_D OPC_3R(0x32)
159*22dc650dSSadaf Ebrahimi #define SRA_D OPC_3R(0x33)
160*22dc650dSSadaf Ebrahimi #define ROTR_W OPC_3R(0x36)
161*22dc650dSSadaf Ebrahimi #define ROTR_D OPC_3R(0x37)
162*22dc650dSSadaf Ebrahimi #define SLLI_W OPC_3R(0x81)
163*22dc650dSSadaf Ebrahimi #define SLLI_D ((sljit_ins)(0x41) << 16)
164*22dc650dSSadaf Ebrahimi #define SRLI_W OPC_3R(0x89)
165*22dc650dSSadaf Ebrahimi #define SRLI_D ((sljit_ins)(0x45) << 16)
166*22dc650dSSadaf Ebrahimi #define SRAI_W OPC_3R(0x91)
167*22dc650dSSadaf Ebrahimi #define SRAI_D ((sljit_ins)(0x49) << 16)
168*22dc650dSSadaf Ebrahimi #define ROTRI_W OPC_3R(0x99)
169*22dc650dSSadaf Ebrahimi #define ROTRI_D ((sljit_ins)(0x4d) << 16)
170*22dc650dSSadaf Ebrahimi
171*22dc650dSSadaf Ebrahimi /* Bit-manipulation instructions */
172*22dc650dSSadaf Ebrahimi #define CLO_W OPC_2R(0x4)
173*22dc650dSSadaf Ebrahimi #define CLZ_W OPC_2R(0x5)
174*22dc650dSSadaf Ebrahimi #define CTO_W OPC_2R(0x6)
175*22dc650dSSadaf Ebrahimi #define CTZ_W OPC_2R(0x7)
176*22dc650dSSadaf Ebrahimi #define CLO_D OPC_2R(0x8)
177*22dc650dSSadaf Ebrahimi #define CLZ_D OPC_2R(0x9)
178*22dc650dSSadaf Ebrahimi #define CTO_D OPC_2R(0xa)
179*22dc650dSSadaf Ebrahimi #define CTZ_D OPC_2R(0xb)
180*22dc650dSSadaf Ebrahimi #define REVB_2H OPC_2R(0xc)
181*22dc650dSSadaf Ebrahimi #define REVB_4H OPC_2R(0xd)
182*22dc650dSSadaf Ebrahimi #define REVB_2W OPC_2R(0xe)
183*22dc650dSSadaf Ebrahimi #define REVB_D OPC_2R(0xf)
184*22dc650dSSadaf Ebrahimi #define REVH_2W OPC_2R(0x10)
185*22dc650dSSadaf Ebrahimi #define REVH_D OPC_2R(0x11)
186*22dc650dSSadaf Ebrahimi #define BITREV_4B OPC_2R(0x12)
187*22dc650dSSadaf Ebrahimi #define BITREV_8B OPC_2R(0x13)
188*22dc650dSSadaf Ebrahimi #define BITREV_W OPC_2R(0x14)
189*22dc650dSSadaf Ebrahimi #define BITREV_D OPC_2R(0x15)
190*22dc650dSSadaf Ebrahimi #define EXT_W_H OPC_2R(0x16)
191*22dc650dSSadaf Ebrahimi #define EXT_W_B OPC_2R(0x17)
192*22dc650dSSadaf Ebrahimi #define BSTRINS_W (0x1 << 22 | 1 << 21)
193*22dc650dSSadaf Ebrahimi #define BSTRPICK_W (0x1 << 22 | 1 << 21 | 1 << 15)
194*22dc650dSSadaf Ebrahimi #define BSTRINS_D (0x2 << 22)
195*22dc650dSSadaf Ebrahimi #define BSTRPICK_D (0x3 << 22)
196*22dc650dSSadaf Ebrahimi
197*22dc650dSSadaf Ebrahimi /* Branch instructions */
198*22dc650dSSadaf Ebrahimi #define BEQZ OPC_1RI21(0x10)
199*22dc650dSSadaf Ebrahimi #define BNEZ OPC_1RI21(0x11)
200*22dc650dSSadaf Ebrahimi #define JIRL OPC_2RI16(0x13)
201*22dc650dSSadaf Ebrahimi #define B OPC_I26(0x14)
202*22dc650dSSadaf Ebrahimi #define BL OPC_I26(0x15)
203*22dc650dSSadaf Ebrahimi #define BEQ OPC_2RI16(0x16)
204*22dc650dSSadaf Ebrahimi #define BNE OPC_2RI16(0x17)
205*22dc650dSSadaf Ebrahimi #define BLT OPC_2RI16(0x18)
206*22dc650dSSadaf Ebrahimi #define BGE OPC_2RI16(0x19)
207*22dc650dSSadaf Ebrahimi #define BLTU OPC_2RI16(0x1a)
208*22dc650dSSadaf Ebrahimi #define BGEU OPC_2RI16(0x1b)
209*22dc650dSSadaf Ebrahimi
210*22dc650dSSadaf Ebrahimi /* Memory access instructions */
211*22dc650dSSadaf Ebrahimi #define LD_B OPC_2RI12(0xa0)
212*22dc650dSSadaf Ebrahimi #define LD_H OPC_2RI12(0xa1)
213*22dc650dSSadaf Ebrahimi #define LD_W OPC_2RI12(0xa2)
214*22dc650dSSadaf Ebrahimi #define LD_D OPC_2RI12(0xa3)
215*22dc650dSSadaf Ebrahimi
216*22dc650dSSadaf Ebrahimi #define ST_B OPC_2RI12(0xa4)
217*22dc650dSSadaf Ebrahimi #define ST_H OPC_2RI12(0xa5)
218*22dc650dSSadaf Ebrahimi #define ST_W OPC_2RI12(0xa6)
219*22dc650dSSadaf Ebrahimi #define ST_D OPC_2RI12(0xa7)
220*22dc650dSSadaf Ebrahimi
221*22dc650dSSadaf Ebrahimi #define LD_BU OPC_2RI12(0xa8)
222*22dc650dSSadaf Ebrahimi #define LD_HU OPC_2RI12(0xa9)
223*22dc650dSSadaf Ebrahimi #define LD_WU OPC_2RI12(0xaa)
224*22dc650dSSadaf Ebrahimi
225*22dc650dSSadaf Ebrahimi #define LDX_B OPC_3R(0x7000)
226*22dc650dSSadaf Ebrahimi #define LDX_H OPC_3R(0x7008)
227*22dc650dSSadaf Ebrahimi #define LDX_W OPC_3R(0x7010)
228*22dc650dSSadaf Ebrahimi #define LDX_D OPC_3R(0x7018)
229*22dc650dSSadaf Ebrahimi
230*22dc650dSSadaf Ebrahimi #define STX_B OPC_3R(0x7020)
231*22dc650dSSadaf Ebrahimi #define STX_H OPC_3R(0x7028)
232*22dc650dSSadaf Ebrahimi #define STX_W OPC_3R(0x7030)
233*22dc650dSSadaf Ebrahimi #define STX_D OPC_3R(0x7038)
234*22dc650dSSadaf Ebrahimi
235*22dc650dSSadaf Ebrahimi #define LDX_BU OPC_3R(0x7040)
236*22dc650dSSadaf Ebrahimi #define LDX_HU OPC_3R(0x7048)
237*22dc650dSSadaf Ebrahimi #define LDX_WU OPC_3R(0x7050)
238*22dc650dSSadaf Ebrahimi
239*22dc650dSSadaf Ebrahimi #define PRELD OPC_2RI12(0xab)
240*22dc650dSSadaf Ebrahimi
241*22dc650dSSadaf Ebrahimi /* Atomic memory access instructions */
242*22dc650dSSadaf Ebrahimi #define LL_W OPC_2RI14(0x20)
243*22dc650dSSadaf Ebrahimi #define SC_W OPC_2RI14(0x21)
244*22dc650dSSadaf Ebrahimi #define LL_D OPC_2RI14(0x22)
245*22dc650dSSadaf Ebrahimi #define SC_D OPC_2RI14(0x23)
246*22dc650dSSadaf Ebrahimi
247*22dc650dSSadaf Ebrahimi /* LoongArch V1.10 Instructions */
248*22dc650dSSadaf Ebrahimi #define AMCAS_B OPC_3R(0x70B0)
249*22dc650dSSadaf Ebrahimi #define AMCAS_H OPC_3R(0x70B1)
250*22dc650dSSadaf Ebrahimi #define AMCAS_W OPC_3R(0x70B2)
251*22dc650dSSadaf Ebrahimi #define AMCAS_D OPC_3R(0x70B3)
252*22dc650dSSadaf Ebrahimi
253*22dc650dSSadaf Ebrahimi /* Other instructions */
254*22dc650dSSadaf Ebrahimi #define BREAK OPC_3R(0x54)
255*22dc650dSSadaf Ebrahimi #define DBGCALL OPC_3R(0x55)
256*22dc650dSSadaf Ebrahimi #define SYSCALL OPC_3R(0x56)
257*22dc650dSSadaf Ebrahimi
258*22dc650dSSadaf Ebrahimi /* Basic Floating-Point Instructions */
259*22dc650dSSadaf Ebrahimi /* Floating-Point Arithmetic Operation Instructions */
260*22dc650dSSadaf Ebrahimi #define FADD_S OPC_3R(0x201)
261*22dc650dSSadaf Ebrahimi #define FADD_D OPC_3R(0x202)
262*22dc650dSSadaf Ebrahimi #define FSUB_S OPC_3R(0x205)
263*22dc650dSSadaf Ebrahimi #define FSUB_D OPC_3R(0x206)
264*22dc650dSSadaf Ebrahimi #define FMUL_S OPC_3R(0x209)
265*22dc650dSSadaf Ebrahimi #define FMUL_D OPC_3R(0x20a)
266*22dc650dSSadaf Ebrahimi #define FDIV_S OPC_3R(0x20d)
267*22dc650dSSadaf Ebrahimi #define FDIV_D OPC_3R(0x20e)
268*22dc650dSSadaf Ebrahimi #define FCMP_COND_S OPC_4R(0xc1)
269*22dc650dSSadaf Ebrahimi #define FCMP_COND_D OPC_4R(0xc2)
270*22dc650dSSadaf Ebrahimi #define FCOPYSIGN_S OPC_3R(0x225)
271*22dc650dSSadaf Ebrahimi #define FCOPYSIGN_D OPC_3R(0x226)
272*22dc650dSSadaf Ebrahimi #define FSEL OPC_4R(0xd0)
273*22dc650dSSadaf Ebrahimi #define FABS_S OPC_2R(0x4501)
274*22dc650dSSadaf Ebrahimi #define FABS_D OPC_2R(0x4502)
275*22dc650dSSadaf Ebrahimi #define FNEG_S OPC_2R(0x4505)
276*22dc650dSSadaf Ebrahimi #define FNEG_D OPC_2R(0x4506)
277*22dc650dSSadaf Ebrahimi #define FMOV_S OPC_2R(0x4525)
278*22dc650dSSadaf Ebrahimi #define FMOV_D OPC_2R(0x4526)
279*22dc650dSSadaf Ebrahimi
280*22dc650dSSadaf Ebrahimi /* Floating-Point Conversion Instructions */
281*22dc650dSSadaf Ebrahimi #define FCVT_S_D OPC_2R(0x4646)
282*22dc650dSSadaf Ebrahimi #define FCVT_D_S OPC_2R(0x4649)
283*22dc650dSSadaf Ebrahimi #define FTINTRZ_W_S OPC_2R(0x46a1)
284*22dc650dSSadaf Ebrahimi #define FTINTRZ_W_D OPC_2R(0x46a2)
285*22dc650dSSadaf Ebrahimi #define FTINTRZ_L_S OPC_2R(0x46a9)
286*22dc650dSSadaf Ebrahimi #define FTINTRZ_L_D OPC_2R(0x46aa)
287*22dc650dSSadaf Ebrahimi #define FFINT_S_W OPC_2R(0x4744)
288*22dc650dSSadaf Ebrahimi #define FFINT_S_L OPC_2R(0x4746)
289*22dc650dSSadaf Ebrahimi #define FFINT_D_W OPC_2R(0x4748)
290*22dc650dSSadaf Ebrahimi #define FFINT_D_L OPC_2R(0x474a)
291*22dc650dSSadaf Ebrahimi
292*22dc650dSSadaf Ebrahimi /* Floating-Point Move Instructions */
293*22dc650dSSadaf Ebrahimi #define FMOV_S OPC_2R(0x4525)
294*22dc650dSSadaf Ebrahimi #define FMOV_D OPC_2R(0x4526)
295*22dc650dSSadaf Ebrahimi #define MOVGR2FR_W OPC_2R(0x4529)
296*22dc650dSSadaf Ebrahimi #define MOVGR2FR_D OPC_2R(0x452a)
297*22dc650dSSadaf Ebrahimi #define MOVGR2FRH_W OPC_2R(0x452b)
298*22dc650dSSadaf Ebrahimi #define MOVFR2GR_S OPC_2R(0x452d)
299*22dc650dSSadaf Ebrahimi #define MOVFR2GR_D OPC_2R(0x452e)
300*22dc650dSSadaf Ebrahimi #define MOVFRH2GR_S OPC_2R(0x452f)
301*22dc650dSSadaf Ebrahimi #define MOVGR2FCSR OPC_2R(0x4530)
302*22dc650dSSadaf Ebrahimi #define MOVFCSR2GR OPC_2R(0x4532)
303*22dc650dSSadaf Ebrahimi #define MOVFR2CF OPC_2R(0x4534)
304*22dc650dSSadaf Ebrahimi #define MOVCF2FR OPC_2R(0x4535)
305*22dc650dSSadaf Ebrahimi #define MOVGR2CF OPC_2R(0x4536)
306*22dc650dSSadaf Ebrahimi #define MOVCF2GR OPC_2R(0x4537)
307*22dc650dSSadaf Ebrahimi
308*22dc650dSSadaf Ebrahimi /* Floating-Point Branch Instructions */
309*22dc650dSSadaf Ebrahimi #define BCEQZ OPC_I26(0x12)
310*22dc650dSSadaf Ebrahimi #define BCNEZ OPC_I26(0x12)
311*22dc650dSSadaf Ebrahimi
312*22dc650dSSadaf Ebrahimi /* Floating-Point Common Memory Access Instructions */
313*22dc650dSSadaf Ebrahimi #define FLD_S OPC_2RI12(0xac)
314*22dc650dSSadaf Ebrahimi #define FLD_D OPC_2RI12(0xae)
315*22dc650dSSadaf Ebrahimi #define FST_S OPC_2RI12(0xad)
316*22dc650dSSadaf Ebrahimi #define FST_D OPC_2RI12(0xaf)
317*22dc650dSSadaf Ebrahimi
318*22dc650dSSadaf Ebrahimi #define FLDX_S OPC_3R(0x7060)
319*22dc650dSSadaf Ebrahimi #define FLDX_D OPC_3R(0x7068)
320*22dc650dSSadaf Ebrahimi #define FSTX_S OPC_3R(0x7070)
321*22dc650dSSadaf Ebrahimi #define FSTX_D OPC_3R(0x7078)
322*22dc650dSSadaf Ebrahimi
323*22dc650dSSadaf Ebrahimi /* Vector Instructions */
324*22dc650dSSadaf Ebrahimi
325*22dc650dSSadaf Ebrahimi /* Vector Arithmetic Instructions */
326*22dc650dSSadaf Ebrahimi #define VOR_V OPC_3R(0xe24d)
327*22dc650dSSadaf Ebrahimi #define VXOR_V OPC_3R(0xe24e)
328*22dc650dSSadaf Ebrahimi #define VAND_V OPC_3R(0xe24c)
329*22dc650dSSadaf Ebrahimi #define VMSKLTZ OPC_2R(0x1ca710)
330*22dc650dSSadaf Ebrahimi
331*22dc650dSSadaf Ebrahimi /* Vector Memory Access Instructions */
332*22dc650dSSadaf Ebrahimi #define VLD OPC_2RI12(0xb0)
333*22dc650dSSadaf Ebrahimi #define VST OPC_2RI12(0xb1)
334*22dc650dSSadaf Ebrahimi #define XVLD OPC_2RI12(0xb2)
335*22dc650dSSadaf Ebrahimi #define XVST OPC_2RI12(0xb3)
336*22dc650dSSadaf Ebrahimi #define VSTELM OPC_2RI8(0xc40)
337*22dc650dSSadaf Ebrahimi
338*22dc650dSSadaf Ebrahimi /* Vector Float Conversion Instructions */
339*22dc650dSSadaf Ebrahimi #define VFCVTL_D_S OPC_2R(0x1ca77c)
340*22dc650dSSadaf Ebrahimi
341*22dc650dSSadaf Ebrahimi /* Vector Bit Manipulate Instructions */
342*22dc650dSSadaf Ebrahimi #define VSLLWIL OPC_2R(0x1cc200)
343*22dc650dSSadaf Ebrahimi
344*22dc650dSSadaf Ebrahimi /* Vector Move And Shuffle Instructions */
345*22dc650dSSadaf Ebrahimi #define VLDREPL OPC_2R(0xc0000)
346*22dc650dSSadaf Ebrahimi #define VINSGR2VR OPC_2R(0x1cbac0)
347*22dc650dSSadaf Ebrahimi #define VPICKVE2GR_U OPC_2R(0x1cbce0)
348*22dc650dSSadaf Ebrahimi #define VREPLGR2VR OPC_2R(0x1ca7c0)
349*22dc650dSSadaf Ebrahimi #define VREPLVE OPC_3R(0xe244)
350*22dc650dSSadaf Ebrahimi #define VREPLVEI OPC_2R(0x1cbde0)
351*22dc650dSSadaf Ebrahimi #define XVPERMI OPC_2RI8(0x1dfa)
352*22dc650dSSadaf Ebrahimi
353*22dc650dSSadaf Ebrahimi #define I12_MAX (0x7ff)
354*22dc650dSSadaf Ebrahimi #define I12_MIN (-0x800)
355*22dc650dSSadaf Ebrahimi #define BRANCH16_MAX (0x7fff << 2)
356*22dc650dSSadaf Ebrahimi #define BRANCH16_MIN (-(0x8000 << 2))
357*22dc650dSSadaf Ebrahimi #define BRANCH21_MAX (0xfffff << 2)
358*22dc650dSSadaf Ebrahimi #define BRANCH21_MIN (-(0x100000 << 2))
359*22dc650dSSadaf Ebrahimi #define JUMP_MAX (0x1ffffff << 2)
360*22dc650dSSadaf Ebrahimi #define JUMP_MIN (-(0x2000000 << 2))
361*22dc650dSSadaf Ebrahimi #define JIRL_MAX (0x7fff << 2)
362*22dc650dSSadaf Ebrahimi #define JIRL_MIN (-(0x8000 << 2))
363*22dc650dSSadaf Ebrahimi
364*22dc650dSSadaf Ebrahimi #define S32_MAX (0x7fffffffl)
365*22dc650dSSadaf Ebrahimi #define S32_MIN (-0x80000000l)
366*22dc650dSSadaf Ebrahimi #define S52_MAX (0x7ffffffffffffl)
367*22dc650dSSadaf Ebrahimi
368*22dc650dSSadaf Ebrahimi #define INST(inst, type) ((sljit_ins)((type & SLJIT_32) ? inst##_W : inst##_D))
369*22dc650dSSadaf Ebrahimi
370*22dc650dSSadaf Ebrahimi /* LoongArch CPUCFG register for feature detection */
371*22dc650dSSadaf Ebrahimi #define LOONGARCH_CFG2 0x02
372*22dc650dSSadaf Ebrahimi #define LOONGARCH_CFG2_LAMCAS (1 << 28)
373*22dc650dSSadaf Ebrahimi
374*22dc650dSSadaf Ebrahimi static sljit_u32 cfg2_feature_list = 0;
375*22dc650dSSadaf Ebrahimi
376*22dc650dSSadaf Ebrahimi /* According to Software Development and Build Convention for LoongArch Architectures,
377*22dc650dSSadaf Ebrahimi + the status of LSX and LASX extension must be checked through HWCAP */
378*22dc650dSSadaf Ebrahimi #include <sys/auxv.h>
379*22dc650dSSadaf Ebrahimi
380*22dc650dSSadaf Ebrahimi #define LOONGARCH_HWCAP_LSX (1 << 4)
381*22dc650dSSadaf Ebrahimi #define LOONGARCH_HWCAP_LASX (1 << 5)
382*22dc650dSSadaf Ebrahimi
383*22dc650dSSadaf Ebrahimi static sljit_u32 hwcap_feature_list = 0;
384*22dc650dSSadaf Ebrahimi
385*22dc650dSSadaf Ebrahimi /* Feature type */
386*22dc650dSSadaf Ebrahimi #define GET_CFG2 0
387*22dc650dSSadaf Ebrahimi #define GET_HWCAP 1
388*22dc650dSSadaf Ebrahimi
get_cpu_features(sljit_u32 feature_type)389*22dc650dSSadaf Ebrahimi static SLJIT_INLINE sljit_u32 get_cpu_features(sljit_u32 feature_type)
390*22dc650dSSadaf Ebrahimi {
391*22dc650dSSadaf Ebrahimi if (cfg2_feature_list == 0)
392*22dc650dSSadaf Ebrahimi __asm__ ("cpucfg %0, %1" : "+&r"(cfg2_feature_list) : "r"(LOONGARCH_CFG2));
393*22dc650dSSadaf Ebrahimi if (hwcap_feature_list == 0)
394*22dc650dSSadaf Ebrahimi hwcap_feature_list = (sljit_u32)getauxval(AT_HWCAP);
395*22dc650dSSadaf Ebrahimi
396*22dc650dSSadaf Ebrahimi return feature_type ? hwcap_feature_list : cfg2_feature_list;
397*22dc650dSSadaf Ebrahimi }
398*22dc650dSSadaf Ebrahimi
push_inst(struct sljit_compiler * compiler,sljit_ins ins)399*22dc650dSSadaf Ebrahimi static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_ins ins)
400*22dc650dSSadaf Ebrahimi {
401*22dc650dSSadaf Ebrahimi sljit_ins *ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins));
402*22dc650dSSadaf Ebrahimi FAIL_IF(!ptr);
403*22dc650dSSadaf Ebrahimi *ptr = ins;
404*22dc650dSSadaf Ebrahimi compiler->size++;
405*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
406*22dc650dSSadaf Ebrahimi }
407*22dc650dSSadaf Ebrahimi
detect_jump_type(struct sljit_jump * jump,sljit_ins * code,sljit_sw executable_offset)408*22dc650dSSadaf Ebrahimi static SLJIT_INLINE sljit_ins* detect_jump_type(struct sljit_jump *jump, sljit_ins *code, sljit_sw executable_offset)
409*22dc650dSSadaf Ebrahimi {
410*22dc650dSSadaf Ebrahimi sljit_sw diff;
411*22dc650dSSadaf Ebrahimi sljit_uw target_addr;
412*22dc650dSSadaf Ebrahimi sljit_ins *inst;
413*22dc650dSSadaf Ebrahimi
414*22dc650dSSadaf Ebrahimi inst = (sljit_ins *)jump->addr;
415*22dc650dSSadaf Ebrahimi
416*22dc650dSSadaf Ebrahimi if (jump->flags & SLJIT_REWRITABLE_JUMP)
417*22dc650dSSadaf Ebrahimi goto exit;
418*22dc650dSSadaf Ebrahimi
419*22dc650dSSadaf Ebrahimi if (jump->flags & JUMP_ADDR)
420*22dc650dSSadaf Ebrahimi target_addr = jump->u.target;
421*22dc650dSSadaf Ebrahimi else {
422*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(jump->u.label != NULL);
423*22dc650dSSadaf Ebrahimi target_addr = (sljit_uw)(code + jump->u.label->size) + (sljit_uw)executable_offset;
424*22dc650dSSadaf Ebrahimi }
425*22dc650dSSadaf Ebrahimi
426*22dc650dSSadaf Ebrahimi diff = (sljit_sw)target_addr - (sljit_sw)inst - executable_offset;
427*22dc650dSSadaf Ebrahimi
428*22dc650dSSadaf Ebrahimi if (jump->flags & IS_COND) {
429*22dc650dSSadaf Ebrahimi diff += SSIZE_OF(ins);
430*22dc650dSSadaf Ebrahimi
431*22dc650dSSadaf Ebrahimi if (diff >= BRANCH16_MIN && diff <= BRANCH16_MAX) {
432*22dc650dSSadaf Ebrahimi inst--;
433*22dc650dSSadaf Ebrahimi inst[0] = (inst[0] & 0xfc0003ff) ^ 0x4000000;
434*22dc650dSSadaf Ebrahimi jump->flags |= PATCH_B;
435*22dc650dSSadaf Ebrahimi jump->addr = (sljit_uw)inst;
436*22dc650dSSadaf Ebrahimi return inst;
437*22dc650dSSadaf Ebrahimi }
438*22dc650dSSadaf Ebrahimi
439*22dc650dSSadaf Ebrahimi diff -= SSIZE_OF(ins);
440*22dc650dSSadaf Ebrahimi }
441*22dc650dSSadaf Ebrahimi
442*22dc650dSSadaf Ebrahimi if (diff >= JUMP_MIN && diff <= JUMP_MAX) {
443*22dc650dSSadaf Ebrahimi if (jump->flags & IS_COND) {
444*22dc650dSSadaf Ebrahimi inst[-1] |= (sljit_ins)IMM_I16(2);
445*22dc650dSSadaf Ebrahimi }
446*22dc650dSSadaf Ebrahimi
447*22dc650dSSadaf Ebrahimi jump->flags |= PATCH_J;
448*22dc650dSSadaf Ebrahimi return inst;
449*22dc650dSSadaf Ebrahimi }
450*22dc650dSSadaf Ebrahimi
451*22dc650dSSadaf Ebrahimi if (diff >= S32_MIN && diff <= S32_MAX) {
452*22dc650dSSadaf Ebrahimi if (jump->flags & IS_COND)
453*22dc650dSSadaf Ebrahimi inst[-1] |= (sljit_ins)IMM_I16(3);
454*22dc650dSSadaf Ebrahimi
455*22dc650dSSadaf Ebrahimi jump->flags |= PATCH_REL32;
456*22dc650dSSadaf Ebrahimi inst[1] = inst[0];
457*22dc650dSSadaf Ebrahimi return inst + 1;
458*22dc650dSSadaf Ebrahimi }
459*22dc650dSSadaf Ebrahimi
460*22dc650dSSadaf Ebrahimi if (target_addr <= (sljit_uw)S32_MAX) {
461*22dc650dSSadaf Ebrahimi if (jump->flags & IS_COND)
462*22dc650dSSadaf Ebrahimi inst[-1] |= (sljit_ins)IMM_I16(3);
463*22dc650dSSadaf Ebrahimi
464*22dc650dSSadaf Ebrahimi jump->flags |= PATCH_ABS32;
465*22dc650dSSadaf Ebrahimi inst[1] = inst[0];
466*22dc650dSSadaf Ebrahimi return inst + 1;
467*22dc650dSSadaf Ebrahimi }
468*22dc650dSSadaf Ebrahimi
469*22dc650dSSadaf Ebrahimi if (target_addr <= S52_MAX) {
470*22dc650dSSadaf Ebrahimi if (jump->flags & IS_COND)
471*22dc650dSSadaf Ebrahimi inst[-1] |= (sljit_ins)IMM_I16(4);
472*22dc650dSSadaf Ebrahimi
473*22dc650dSSadaf Ebrahimi jump->flags |= PATCH_ABS52;
474*22dc650dSSadaf Ebrahimi inst[2] = inst[0];
475*22dc650dSSadaf Ebrahimi return inst + 2;
476*22dc650dSSadaf Ebrahimi }
477*22dc650dSSadaf Ebrahimi
478*22dc650dSSadaf Ebrahimi exit:
479*22dc650dSSadaf Ebrahimi if (jump->flags & IS_COND)
480*22dc650dSSadaf Ebrahimi inst[-1] |= (sljit_ins)IMM_I16(5);
481*22dc650dSSadaf Ebrahimi inst[3] = inst[0];
482*22dc650dSSadaf Ebrahimi return inst + 3;
483*22dc650dSSadaf Ebrahimi }
484*22dc650dSSadaf Ebrahimi
mov_addr_get_length(struct sljit_jump * jump,sljit_ins * code_ptr,sljit_ins * code,sljit_sw executable_offset)485*22dc650dSSadaf Ebrahimi static SLJIT_INLINE sljit_sw mov_addr_get_length(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code, sljit_sw executable_offset)
486*22dc650dSSadaf Ebrahimi {
487*22dc650dSSadaf Ebrahimi sljit_uw addr;
488*22dc650dSSadaf Ebrahimi sljit_sw diff;
489*22dc650dSSadaf Ebrahimi SLJIT_UNUSED_ARG(executable_offset);
490*22dc650dSSadaf Ebrahimi
491*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(jump->flags < ((sljit_uw)6 << JUMP_SIZE_SHIFT));
492*22dc650dSSadaf Ebrahimi if (jump->flags & JUMP_ADDR)
493*22dc650dSSadaf Ebrahimi addr = jump->u.target;
494*22dc650dSSadaf Ebrahimi else
495*22dc650dSSadaf Ebrahimi addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code + jump->u.label->size, executable_offset);
496*22dc650dSSadaf Ebrahimi
497*22dc650dSSadaf Ebrahimi diff = (sljit_sw)addr - (sljit_sw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
498*22dc650dSSadaf Ebrahimi
499*22dc650dSSadaf Ebrahimi if (diff >= S32_MIN && diff <= S32_MAX) {
500*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(jump->flags >= ((sljit_uw)1 << JUMP_SIZE_SHIFT));
501*22dc650dSSadaf Ebrahimi jump->flags |= PATCH_REL32;
502*22dc650dSSadaf Ebrahimi return 1;
503*22dc650dSSadaf Ebrahimi }
504*22dc650dSSadaf Ebrahimi
505*22dc650dSSadaf Ebrahimi if (addr <= S32_MAX) {
506*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(jump->flags >= ((sljit_uw)1 << JUMP_SIZE_SHIFT));
507*22dc650dSSadaf Ebrahimi jump->flags |= PATCH_ABS32;
508*22dc650dSSadaf Ebrahimi return 1;
509*22dc650dSSadaf Ebrahimi }
510*22dc650dSSadaf Ebrahimi
511*22dc650dSSadaf Ebrahimi if (addr <= S52_MAX) {
512*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(jump->flags >= ((sljit_uw)2 << JUMP_SIZE_SHIFT));
513*22dc650dSSadaf Ebrahimi jump->flags |= PATCH_ABS52;
514*22dc650dSSadaf Ebrahimi return 2;
515*22dc650dSSadaf Ebrahimi }
516*22dc650dSSadaf Ebrahimi
517*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(jump->flags >= ((sljit_uw)3 << JUMP_SIZE_SHIFT));
518*22dc650dSSadaf Ebrahimi return 3;
519*22dc650dSSadaf Ebrahimi }
520*22dc650dSSadaf Ebrahimi
load_addr_to_reg(struct sljit_jump * jump,sljit_sw executable_offset)521*22dc650dSSadaf Ebrahimi static SLJIT_INLINE void load_addr_to_reg(struct sljit_jump *jump, sljit_sw executable_offset)
522*22dc650dSSadaf Ebrahimi {
523*22dc650dSSadaf Ebrahimi sljit_uw flags = jump->flags;
524*22dc650dSSadaf Ebrahimi sljit_uw addr = (flags & JUMP_ADDR) ? jump->u.target : jump->u.label->u.addr;
525*22dc650dSSadaf Ebrahimi sljit_ins *ins = (sljit_ins*)jump->addr;
526*22dc650dSSadaf Ebrahimi sljit_u32 reg = (flags & JUMP_MOV_ADDR) ? *ins : TMP_REG1;
527*22dc650dSSadaf Ebrahimi SLJIT_UNUSED_ARG(executable_offset);
528*22dc650dSSadaf Ebrahimi
529*22dc650dSSadaf Ebrahimi if (flags & PATCH_REL32) {
530*22dc650dSSadaf Ebrahimi addr -= (sljit_uw)SLJIT_ADD_EXEC_OFFSET(ins, executable_offset);
531*22dc650dSSadaf Ebrahimi
532*22dc650dSSadaf Ebrahimi SLJIT_ASSERT((sljit_sw)addr >= S32_MIN && (sljit_sw)addr <= S32_MAX);
533*22dc650dSSadaf Ebrahimi
534*22dc650dSSadaf Ebrahimi if ((addr & 0x800) != 0)
535*22dc650dSSadaf Ebrahimi addr += 0x1000;
536*22dc650dSSadaf Ebrahimi
537*22dc650dSSadaf Ebrahimi ins[0] = PCADDU12I | RD(reg) | IMM_I20(addr);
538*22dc650dSSadaf Ebrahimi
539*22dc650dSSadaf Ebrahimi if (!(flags & JUMP_MOV_ADDR)) {
540*22dc650dSSadaf Ebrahimi SLJIT_ASSERT((ins[1] & OPC_2RI16(0x3f)) == JIRL);
541*22dc650dSSadaf Ebrahimi ins[1] = (ins[1] & (OPC_2RI16(0x3f) | 0x3ff)) | IMM_I16((addr & 0xfff) >> 2);
542*22dc650dSSadaf Ebrahimi } else
543*22dc650dSSadaf Ebrahimi ins[1] = ADDI_D | RD(reg) | RJ(reg) | IMM_I12(addr);
544*22dc650dSSadaf Ebrahimi return;
545*22dc650dSSadaf Ebrahimi }
546*22dc650dSSadaf Ebrahimi
547*22dc650dSSadaf Ebrahimi if (flags & PATCH_ABS32) {
548*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(addr <= S32_MAX);
549*22dc650dSSadaf Ebrahimi ins[0] = LU12I_W | RD(reg) | (sljit_ins)(((addr & 0xffffffff) >> 12) << 5);
550*22dc650dSSadaf Ebrahimi } else if (flags & PATCH_ABS52) {
551*22dc650dSSadaf Ebrahimi ins[0] = LU12I_W | RD(reg) | (sljit_ins)(((addr & 0xffffffff) >> 12) << 5);
552*22dc650dSSadaf Ebrahimi ins[1] = LU32I_D | RD(reg) | (sljit_ins)(((addr >> 32) & 0xfffff) << 5);
553*22dc650dSSadaf Ebrahimi ins += 1;
554*22dc650dSSadaf Ebrahimi } else {
555*22dc650dSSadaf Ebrahimi ins[0] = LU12I_W | RD(reg) | (sljit_ins)(((addr & 0xffffffff) >> 12) << 5);
556*22dc650dSSadaf Ebrahimi ins[1] = LU32I_D | RD(reg) | (sljit_ins)(((addr >> 32) & 0xfffff) << 5);
557*22dc650dSSadaf Ebrahimi ins[2] = LU52I_D | RD(reg) | RJ(reg) | IMM_I12(addr >> 52);
558*22dc650dSSadaf Ebrahimi ins += 2;
559*22dc650dSSadaf Ebrahimi }
560*22dc650dSSadaf Ebrahimi
561*22dc650dSSadaf Ebrahimi if (!(flags & JUMP_MOV_ADDR)) {
562*22dc650dSSadaf Ebrahimi SLJIT_ASSERT((ins[1] & OPC_2RI16(0x3f)) == JIRL);
563*22dc650dSSadaf Ebrahimi ins[1] = (ins[1] & (OPC_2RI16(0x3f) | 0x3ff)) | IMM_I16((addr & 0xfff) >> 2);
564*22dc650dSSadaf Ebrahimi } else
565*22dc650dSSadaf Ebrahimi ins[1] = ORI | RD(reg) | RJ(reg) | IMM_I12(addr);
566*22dc650dSSadaf Ebrahimi }
567*22dc650dSSadaf Ebrahimi
reduce_code_size(struct sljit_compiler * compiler)568*22dc650dSSadaf Ebrahimi static void reduce_code_size(struct sljit_compiler *compiler)
569*22dc650dSSadaf Ebrahimi {
570*22dc650dSSadaf Ebrahimi struct sljit_label *label;
571*22dc650dSSadaf Ebrahimi struct sljit_jump *jump;
572*22dc650dSSadaf Ebrahimi struct sljit_const *const_;
573*22dc650dSSadaf Ebrahimi SLJIT_NEXT_DEFINE_TYPES;
574*22dc650dSSadaf Ebrahimi sljit_uw total_size;
575*22dc650dSSadaf Ebrahimi sljit_uw size_reduce = 0;
576*22dc650dSSadaf Ebrahimi sljit_sw diff;
577*22dc650dSSadaf Ebrahimi
578*22dc650dSSadaf Ebrahimi label = compiler->labels;
579*22dc650dSSadaf Ebrahimi jump = compiler->jumps;
580*22dc650dSSadaf Ebrahimi const_ = compiler->consts;
581*22dc650dSSadaf Ebrahimi
582*22dc650dSSadaf Ebrahimi SLJIT_NEXT_INIT_TYPES();
583*22dc650dSSadaf Ebrahimi
584*22dc650dSSadaf Ebrahimi while (1) {
585*22dc650dSSadaf Ebrahimi SLJIT_GET_NEXT_MIN();
586*22dc650dSSadaf Ebrahimi
587*22dc650dSSadaf Ebrahimi if (next_min_addr == SLJIT_MAX_ADDRESS)
588*22dc650dSSadaf Ebrahimi break;
589*22dc650dSSadaf Ebrahimi
590*22dc650dSSadaf Ebrahimi if (next_min_addr == next_label_size) {
591*22dc650dSSadaf Ebrahimi label->size -= size_reduce;
592*22dc650dSSadaf Ebrahimi
593*22dc650dSSadaf Ebrahimi label = label->next;
594*22dc650dSSadaf Ebrahimi next_label_size = SLJIT_GET_NEXT_SIZE(label);
595*22dc650dSSadaf Ebrahimi }
596*22dc650dSSadaf Ebrahimi
597*22dc650dSSadaf Ebrahimi if (next_min_addr == next_const_addr) {
598*22dc650dSSadaf Ebrahimi const_->addr -= size_reduce;
599*22dc650dSSadaf Ebrahimi const_ = const_->next;
600*22dc650dSSadaf Ebrahimi next_const_addr = SLJIT_GET_NEXT_ADDRESS(const_);
601*22dc650dSSadaf Ebrahimi continue;
602*22dc650dSSadaf Ebrahimi }
603*22dc650dSSadaf Ebrahimi
604*22dc650dSSadaf Ebrahimi if (next_min_addr != next_jump_addr)
605*22dc650dSSadaf Ebrahimi continue;
606*22dc650dSSadaf Ebrahimi
607*22dc650dSSadaf Ebrahimi jump->addr -= size_reduce;
608*22dc650dSSadaf Ebrahimi if (!(jump->flags & JUMP_MOV_ADDR)) {
609*22dc650dSSadaf Ebrahimi total_size = JUMP_MAX_SIZE;
610*22dc650dSSadaf Ebrahimi
611*22dc650dSSadaf Ebrahimi if (!(jump->flags & SLJIT_REWRITABLE_JUMP)) {
612*22dc650dSSadaf Ebrahimi if (jump->flags & JUMP_ADDR) {
613*22dc650dSSadaf Ebrahimi if (jump->u.target <= S32_MAX)
614*22dc650dSSadaf Ebrahimi total_size = 2;
615*22dc650dSSadaf Ebrahimi else if (jump->u.target <= S52_MAX)
616*22dc650dSSadaf Ebrahimi total_size = 3;
617*22dc650dSSadaf Ebrahimi } else {
618*22dc650dSSadaf Ebrahimi /* Unit size: instruction. */
619*22dc650dSSadaf Ebrahimi diff = (sljit_sw)jump->u.label->size - (sljit_sw)jump->addr;
620*22dc650dSSadaf Ebrahimi
621*22dc650dSSadaf Ebrahimi if ((jump->flags & IS_COND) && (diff + 1) <= (BRANCH16_MAX / SSIZE_OF(ins)) && (diff + 1) >= (BRANCH16_MIN / SSIZE_OF(ins)))
622*22dc650dSSadaf Ebrahimi total_size = 0;
623*22dc650dSSadaf Ebrahimi else if (diff >= (JUMP_MIN / SSIZE_OF(ins)) && diff <= (JUMP_MAX / SSIZE_OF(ins)))
624*22dc650dSSadaf Ebrahimi total_size = 1;
625*22dc650dSSadaf Ebrahimi else if (diff >= (S32_MIN / SSIZE_OF(ins)) && diff <= (S32_MAX / SSIZE_OF(ins)))
626*22dc650dSSadaf Ebrahimi total_size = 2;
627*22dc650dSSadaf Ebrahimi }
628*22dc650dSSadaf Ebrahimi }
629*22dc650dSSadaf Ebrahimi
630*22dc650dSSadaf Ebrahimi size_reduce += JUMP_MAX_SIZE - total_size;
631*22dc650dSSadaf Ebrahimi jump->flags |= total_size << JUMP_SIZE_SHIFT;
632*22dc650dSSadaf Ebrahimi } else {
633*22dc650dSSadaf Ebrahimi total_size = 3;
634*22dc650dSSadaf Ebrahimi
635*22dc650dSSadaf Ebrahimi if (!(jump->flags & JUMP_ADDR)) {
636*22dc650dSSadaf Ebrahimi /* Real size minus 1. Unit size: instruction. */
637*22dc650dSSadaf Ebrahimi diff = (sljit_sw)jump->u.label->size - (sljit_sw)jump->addr;
638*22dc650dSSadaf Ebrahimi
639*22dc650dSSadaf Ebrahimi if (diff >= (S32_MIN / SSIZE_OF(ins)) && diff <= (S32_MAX / SSIZE_OF(ins)))
640*22dc650dSSadaf Ebrahimi total_size = 1;
641*22dc650dSSadaf Ebrahimi } else if (jump->u.target < S32_MAX)
642*22dc650dSSadaf Ebrahimi total_size = 1;
643*22dc650dSSadaf Ebrahimi else if (jump->u.target <= S52_MAX)
644*22dc650dSSadaf Ebrahimi total_size = 2;
645*22dc650dSSadaf Ebrahimi
646*22dc650dSSadaf Ebrahimi size_reduce += 3 - total_size;
647*22dc650dSSadaf Ebrahimi jump->flags |= total_size << JUMP_SIZE_SHIFT;
648*22dc650dSSadaf Ebrahimi }
649*22dc650dSSadaf Ebrahimi
650*22dc650dSSadaf Ebrahimi jump = jump->next;
651*22dc650dSSadaf Ebrahimi next_jump_addr = SLJIT_GET_NEXT_ADDRESS(jump);
652*22dc650dSSadaf Ebrahimi }
653*22dc650dSSadaf Ebrahimi
654*22dc650dSSadaf Ebrahimi compiler->size -= size_reduce;
655*22dc650dSSadaf Ebrahimi }
656*22dc650dSSadaf Ebrahimi
sljit_generate_code(struct sljit_compiler * compiler,sljit_s32 options,void * exec_allocator_data)657*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler, sljit_s32 options, void *exec_allocator_data)
658*22dc650dSSadaf Ebrahimi {
659*22dc650dSSadaf Ebrahimi struct sljit_memory_fragment *buf;
660*22dc650dSSadaf Ebrahimi sljit_ins *code;
661*22dc650dSSadaf Ebrahimi sljit_ins *code_ptr;
662*22dc650dSSadaf Ebrahimi sljit_ins *buf_ptr;
663*22dc650dSSadaf Ebrahimi sljit_ins *buf_end;
664*22dc650dSSadaf Ebrahimi sljit_uw word_count;
665*22dc650dSSadaf Ebrahimi SLJIT_NEXT_DEFINE_TYPES;
666*22dc650dSSadaf Ebrahimi sljit_sw executable_offset;
667*22dc650dSSadaf Ebrahimi sljit_uw addr;
668*22dc650dSSadaf Ebrahimi
669*22dc650dSSadaf Ebrahimi struct sljit_label *label;
670*22dc650dSSadaf Ebrahimi struct sljit_jump *jump;
671*22dc650dSSadaf Ebrahimi struct sljit_const *const_;
672*22dc650dSSadaf Ebrahimi
673*22dc650dSSadaf Ebrahimi CHECK_ERROR_PTR();
674*22dc650dSSadaf Ebrahimi CHECK_PTR(check_sljit_generate_code(compiler));
675*22dc650dSSadaf Ebrahimi
676*22dc650dSSadaf Ebrahimi reduce_code_size(compiler);
677*22dc650dSSadaf Ebrahimi
678*22dc650dSSadaf Ebrahimi code = (sljit_ins*)allocate_executable_memory(compiler->size * sizeof(sljit_ins), options, exec_allocator_data, &executable_offset);
679*22dc650dSSadaf Ebrahimi PTR_FAIL_WITH_EXEC_IF(code);
680*22dc650dSSadaf Ebrahimi
681*22dc650dSSadaf Ebrahimi reverse_buf(compiler);
682*22dc650dSSadaf Ebrahimi buf = compiler->buf;
683*22dc650dSSadaf Ebrahimi
684*22dc650dSSadaf Ebrahimi code_ptr = code;
685*22dc650dSSadaf Ebrahimi word_count = 0;
686*22dc650dSSadaf Ebrahimi label = compiler->labels;
687*22dc650dSSadaf Ebrahimi jump = compiler->jumps;
688*22dc650dSSadaf Ebrahimi const_ = compiler->consts;
689*22dc650dSSadaf Ebrahimi SLJIT_NEXT_INIT_TYPES();
690*22dc650dSSadaf Ebrahimi SLJIT_GET_NEXT_MIN();
691*22dc650dSSadaf Ebrahimi
692*22dc650dSSadaf Ebrahimi do {
693*22dc650dSSadaf Ebrahimi buf_ptr = (sljit_ins*)buf->memory;
694*22dc650dSSadaf Ebrahimi buf_end = buf_ptr + (buf->used_size >> 2);
695*22dc650dSSadaf Ebrahimi do {
696*22dc650dSSadaf Ebrahimi *code_ptr = *buf_ptr++;
697*22dc650dSSadaf Ebrahimi if (next_min_addr == word_count) {
698*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(!label || label->size >= word_count);
699*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(!jump || jump->addr >= word_count);
700*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(!const_ || const_->addr >= word_count);
701*22dc650dSSadaf Ebrahimi
702*22dc650dSSadaf Ebrahimi /* These structures are ordered by their address. */
703*22dc650dSSadaf Ebrahimi if (next_min_addr == next_label_size) {
704*22dc650dSSadaf Ebrahimi label->u.addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
705*22dc650dSSadaf Ebrahimi label->size = (sljit_uw)(code_ptr - code);
706*22dc650dSSadaf Ebrahimi label = label->next;
707*22dc650dSSadaf Ebrahimi next_label_size = SLJIT_GET_NEXT_SIZE(label);
708*22dc650dSSadaf Ebrahimi }
709*22dc650dSSadaf Ebrahimi
710*22dc650dSSadaf Ebrahimi if (next_min_addr == next_jump_addr) {
711*22dc650dSSadaf Ebrahimi if (!(jump->flags & JUMP_MOV_ADDR)) {
712*22dc650dSSadaf Ebrahimi word_count = word_count - 1 + (jump->flags >> JUMP_SIZE_SHIFT);
713*22dc650dSSadaf Ebrahimi jump->addr = (sljit_uw)code_ptr;
714*22dc650dSSadaf Ebrahimi code_ptr = detect_jump_type(jump, code, executable_offset);
715*22dc650dSSadaf Ebrahimi SLJIT_ASSERT((jump->flags & PATCH_B) || ((sljit_uw)code_ptr - jump->addr < (jump->flags >> JUMP_SIZE_SHIFT) * sizeof(sljit_ins)));
716*22dc650dSSadaf Ebrahimi } else {
717*22dc650dSSadaf Ebrahimi word_count += jump->flags >> JUMP_SIZE_SHIFT;
718*22dc650dSSadaf Ebrahimi addr = (sljit_uw)code_ptr;
719*22dc650dSSadaf Ebrahimi code_ptr += mov_addr_get_length(jump, code_ptr, code, executable_offset);
720*22dc650dSSadaf Ebrahimi jump->addr = addr;
721*22dc650dSSadaf Ebrahimi }
722*22dc650dSSadaf Ebrahimi jump = jump->next;
723*22dc650dSSadaf Ebrahimi next_jump_addr = SLJIT_GET_NEXT_ADDRESS(jump);
724*22dc650dSSadaf Ebrahimi } else if (next_min_addr == next_const_addr) {
725*22dc650dSSadaf Ebrahimi const_->addr = (sljit_uw)code_ptr;
726*22dc650dSSadaf Ebrahimi const_ = const_->next;
727*22dc650dSSadaf Ebrahimi next_const_addr = SLJIT_GET_NEXT_ADDRESS(const_);
728*22dc650dSSadaf Ebrahimi }
729*22dc650dSSadaf Ebrahimi
730*22dc650dSSadaf Ebrahimi SLJIT_GET_NEXT_MIN();
731*22dc650dSSadaf Ebrahimi }
732*22dc650dSSadaf Ebrahimi code_ptr++;
733*22dc650dSSadaf Ebrahimi word_count++;
734*22dc650dSSadaf Ebrahimi } while (buf_ptr < buf_end);
735*22dc650dSSadaf Ebrahimi
736*22dc650dSSadaf Ebrahimi buf = buf->next;
737*22dc650dSSadaf Ebrahimi } while (buf);
738*22dc650dSSadaf Ebrahimi
739*22dc650dSSadaf Ebrahimi if (label && label->size == word_count) {
740*22dc650dSSadaf Ebrahimi label->u.addr = (sljit_uw)code_ptr;
741*22dc650dSSadaf Ebrahimi label->size = (sljit_uw)(code_ptr - code);
742*22dc650dSSadaf Ebrahimi label = label->next;
743*22dc650dSSadaf Ebrahimi }
744*22dc650dSSadaf Ebrahimi
745*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(!label);
746*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(!jump);
747*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(!const_);
748*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(code_ptr - code <= (sljit_sw)compiler->size);
749*22dc650dSSadaf Ebrahimi
750*22dc650dSSadaf Ebrahimi jump = compiler->jumps;
751*22dc650dSSadaf Ebrahimi while (jump) {
752*22dc650dSSadaf Ebrahimi do {
753*22dc650dSSadaf Ebrahimi if (!(jump->flags & (PATCH_B | PATCH_J)) || (jump->flags & JUMP_MOV_ADDR)) {
754*22dc650dSSadaf Ebrahimi load_addr_to_reg(jump, executable_offset);
755*22dc650dSSadaf Ebrahimi break;
756*22dc650dSSadaf Ebrahimi }
757*22dc650dSSadaf Ebrahimi
758*22dc650dSSadaf Ebrahimi addr = (jump->flags & JUMP_ADDR) ? jump->u.target : jump->u.label->u.addr;
759*22dc650dSSadaf Ebrahimi buf_ptr = (sljit_ins *)jump->addr;
760*22dc650dSSadaf Ebrahimi addr -= (sljit_uw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset);
761*22dc650dSSadaf Ebrahimi
762*22dc650dSSadaf Ebrahimi if (jump->flags & PATCH_B) {
763*22dc650dSSadaf Ebrahimi SLJIT_ASSERT((sljit_sw)addr >= BRANCH16_MIN && (sljit_sw)addr <= BRANCH16_MAX);
764*22dc650dSSadaf Ebrahimi buf_ptr[0] |= (sljit_ins)IMM_I16(addr >> 2);
765*22dc650dSSadaf Ebrahimi break;
766*22dc650dSSadaf Ebrahimi }
767*22dc650dSSadaf Ebrahimi
768*22dc650dSSadaf Ebrahimi SLJIT_ASSERT((sljit_sw)addr >= JUMP_MIN && (sljit_sw)addr <= JUMP_MAX);
769*22dc650dSSadaf Ebrahimi if (jump->flags & IS_CALL)
770*22dc650dSSadaf Ebrahimi buf_ptr[0] = BL | (sljit_ins)IMM_I26(addr >> 2);
771*22dc650dSSadaf Ebrahimi else
772*22dc650dSSadaf Ebrahimi buf_ptr[0] = B | (sljit_ins)IMM_I26(addr >> 2);
773*22dc650dSSadaf Ebrahimi } while (0);
774*22dc650dSSadaf Ebrahimi jump = jump->next;
775*22dc650dSSadaf Ebrahimi }
776*22dc650dSSadaf Ebrahimi
777*22dc650dSSadaf Ebrahimi compiler->error = SLJIT_ERR_COMPILED;
778*22dc650dSSadaf Ebrahimi compiler->executable_offset = executable_offset;
779*22dc650dSSadaf Ebrahimi compiler->executable_size = (sljit_uw)(code_ptr - code) * sizeof(sljit_ins);
780*22dc650dSSadaf Ebrahimi
781*22dc650dSSadaf Ebrahimi code = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(code, executable_offset);
782*22dc650dSSadaf Ebrahimi code_ptr = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
783*22dc650dSSadaf Ebrahimi
784*22dc650dSSadaf Ebrahimi SLJIT_CACHE_FLUSH(code, code_ptr);
785*22dc650dSSadaf Ebrahimi SLJIT_UPDATE_WX_FLAGS(code, code_ptr, 1);
786*22dc650dSSadaf Ebrahimi return code;
787*22dc650dSSadaf Ebrahimi }
788*22dc650dSSadaf Ebrahimi
sljit_has_cpu_feature(sljit_s32 feature_type)789*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
790*22dc650dSSadaf Ebrahimi {
791*22dc650dSSadaf Ebrahimi switch (feature_type)
792*22dc650dSSadaf Ebrahimi {
793*22dc650dSSadaf Ebrahimi case SLJIT_HAS_FPU:
794*22dc650dSSadaf Ebrahimi #ifdef SLJIT_IS_FPU_AVAILABLE
795*22dc650dSSadaf Ebrahimi return (SLJIT_IS_FPU_AVAILABLE) != 0;
796*22dc650dSSadaf Ebrahimi #else
797*22dc650dSSadaf Ebrahimi /* Available by default. */
798*22dc650dSSadaf Ebrahimi return 1;
799*22dc650dSSadaf Ebrahimi #endif
800*22dc650dSSadaf Ebrahimi
801*22dc650dSSadaf Ebrahimi case SLJIT_HAS_LASX:
802*22dc650dSSadaf Ebrahimi return (LOONGARCH_HWCAP_LASX & get_cpu_features(GET_HWCAP));
803*22dc650dSSadaf Ebrahimi
804*22dc650dSSadaf Ebrahimi case SLJIT_HAS_SIMD:
805*22dc650dSSadaf Ebrahimi return (LOONGARCH_HWCAP_LSX & get_cpu_features(GET_HWCAP));
806*22dc650dSSadaf Ebrahimi
807*22dc650dSSadaf Ebrahimi case SLJIT_HAS_ATOMIC:
808*22dc650dSSadaf Ebrahimi return (LOONGARCH_CFG2_LAMCAS & get_cpu_features(GET_CFG2));
809*22dc650dSSadaf Ebrahimi
810*22dc650dSSadaf Ebrahimi case SLJIT_HAS_CLZ:
811*22dc650dSSadaf Ebrahimi case SLJIT_HAS_CTZ:
812*22dc650dSSadaf Ebrahimi case SLJIT_HAS_REV:
813*22dc650dSSadaf Ebrahimi case SLJIT_HAS_ROT:
814*22dc650dSSadaf Ebrahimi case SLJIT_HAS_PREFETCH:
815*22dc650dSSadaf Ebrahimi case SLJIT_HAS_COPY_F32:
816*22dc650dSSadaf Ebrahimi case SLJIT_HAS_COPY_F64:
817*22dc650dSSadaf Ebrahimi return 1;
818*22dc650dSSadaf Ebrahimi
819*22dc650dSSadaf Ebrahimi default:
820*22dc650dSSadaf Ebrahimi return 0;
821*22dc650dSSadaf Ebrahimi }
822*22dc650dSSadaf Ebrahimi }
823*22dc650dSSadaf Ebrahimi
sljit_cmp_info(sljit_s32 type)824*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_cmp_info(sljit_s32 type)
825*22dc650dSSadaf Ebrahimi {
826*22dc650dSSadaf Ebrahimi SLJIT_UNUSED_ARG(type);
827*22dc650dSSadaf Ebrahimi
828*22dc650dSSadaf Ebrahimi return 0;
829*22dc650dSSadaf Ebrahimi }
830*22dc650dSSadaf Ebrahimi
831*22dc650dSSadaf Ebrahimi /* --------------------------------------------------------------------- */
832*22dc650dSSadaf Ebrahimi /* Entry, exit */
833*22dc650dSSadaf Ebrahimi /* --------------------------------------------------------------------- */
834*22dc650dSSadaf Ebrahimi
835*22dc650dSSadaf Ebrahimi /* Creates an index in data_transfer_insts array. */
836*22dc650dSSadaf Ebrahimi #define LOAD_DATA 0x01
837*22dc650dSSadaf Ebrahimi #define WORD_DATA 0x00
838*22dc650dSSadaf Ebrahimi #define BYTE_DATA 0x02
839*22dc650dSSadaf Ebrahimi #define HALF_DATA 0x04
840*22dc650dSSadaf Ebrahimi #define INT_DATA 0x06
841*22dc650dSSadaf Ebrahimi #define SIGNED_DATA 0x08
842*22dc650dSSadaf Ebrahimi /* Separates integer and floating point registers */
843*22dc650dSSadaf Ebrahimi #define GPR_REG 0x0f
844*22dc650dSSadaf Ebrahimi #define DOUBLE_DATA 0x10
845*22dc650dSSadaf Ebrahimi #define SINGLE_DATA 0x12
846*22dc650dSSadaf Ebrahimi
847*22dc650dSSadaf Ebrahimi #define MEM_MASK 0x1f
848*22dc650dSSadaf Ebrahimi
849*22dc650dSSadaf Ebrahimi #define ARG_TEST 0x00020
850*22dc650dSSadaf Ebrahimi #define ALT_KEEP_CACHE 0x00040
851*22dc650dSSadaf Ebrahimi #define CUMULATIVE_OP 0x00080
852*22dc650dSSadaf Ebrahimi #define IMM_OP 0x00100
853*22dc650dSSadaf Ebrahimi #define MOVE_OP 0x00200
854*22dc650dSSadaf Ebrahimi #define SRC2_IMM 0x00400
855*22dc650dSSadaf Ebrahimi
856*22dc650dSSadaf Ebrahimi #define UNUSED_DEST 0x00800
857*22dc650dSSadaf Ebrahimi #define REG_DEST 0x01000
858*22dc650dSSadaf Ebrahimi #define REG1_SOURCE 0x02000
859*22dc650dSSadaf Ebrahimi #define REG2_SOURCE 0x04000
860*22dc650dSSadaf Ebrahimi #define SLOW_SRC1 0x08000
861*22dc650dSSadaf Ebrahimi #define SLOW_SRC2 0x10000
862*22dc650dSSadaf Ebrahimi #define SLOW_DEST 0x20000
863*22dc650dSSadaf Ebrahimi #define MEM_USE_TMP2 0x40000
864*22dc650dSSadaf Ebrahimi
865*22dc650dSSadaf Ebrahimi #define STACK_STORE ST_D
866*22dc650dSSadaf Ebrahimi #define STACK_LOAD LD_D
867*22dc650dSSadaf Ebrahimi
load_immediate(struct sljit_compiler * compiler,sljit_s32 dst_r,sljit_sw imm)868*22dc650dSSadaf Ebrahimi static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst_r, sljit_sw imm)
869*22dc650dSSadaf Ebrahimi {
870*22dc650dSSadaf Ebrahimi if (imm <= I12_MAX && imm >= I12_MIN)
871*22dc650dSSadaf Ebrahimi return push_inst(compiler, ADDI_D | RD(dst_r) | RJ(TMP_ZERO) | IMM_I12(imm));
872*22dc650dSSadaf Ebrahimi
873*22dc650dSSadaf Ebrahimi if (imm <= 0x7fffffffl && imm >= -0x80000000l) {
874*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, LU12I_W | RD(dst_r) | (sljit_ins)(((imm & 0xffffffff) >> 12) << 5)));
875*22dc650dSSadaf Ebrahimi return push_inst(compiler, ORI | RD(dst_r) | RJ(dst_r) | IMM_I12(imm));
876*22dc650dSSadaf Ebrahimi } else if (imm <= 0x7ffffffffffffl && imm >= -0x8000000000000l) {
877*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, LU12I_W | RD(dst_r) | (sljit_ins)(((imm & 0xffffffff) >> 12) << 5)));
878*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, ORI | RD(dst_r) | RJ(dst_r) | IMM_I12(imm)));
879*22dc650dSSadaf Ebrahimi return push_inst(compiler, LU32I_D | RD(dst_r) | (sljit_ins)(((imm >> 32) & 0xfffff) << 5));
880*22dc650dSSadaf Ebrahimi }
881*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, LU12I_W | RD(dst_r) | (sljit_ins)(((imm & 0xffffffff) >> 12) << 5)));
882*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, ORI | RD(dst_r) | RJ(dst_r) | IMM_I12(imm)));
883*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, LU32I_D | RD(dst_r) | (sljit_ins)(((imm >> 32) & 0xfffff) << 5)));
884*22dc650dSSadaf Ebrahimi return push_inst(compiler, LU52I_D | RD(dst_r) | RJ(dst_r) | IMM_I12(imm >> 52));
885*22dc650dSSadaf Ebrahimi }
886*22dc650dSSadaf Ebrahimi
887*22dc650dSSadaf Ebrahimi #define STACK_MAX_DISTANCE (-I12_MIN)
888*22dc650dSSadaf Ebrahimi
889*22dc650dSSadaf Ebrahimi static sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw);
890*22dc650dSSadaf Ebrahimi
sljit_emit_enter(struct sljit_compiler * compiler,sljit_s32 options,sljit_s32 arg_types,sljit_s32 scratches,sljit_s32 saveds,sljit_s32 fscratches,sljit_s32 fsaveds,sljit_s32 local_size)891*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler,
892*22dc650dSSadaf Ebrahimi sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
893*22dc650dSSadaf Ebrahimi sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
894*22dc650dSSadaf Ebrahimi {
895*22dc650dSSadaf Ebrahimi sljit_s32 i, tmp, offset;
896*22dc650dSSadaf Ebrahimi sljit_s32 saved_arg_count = SLJIT_KEPT_SAVEDS_COUNT(options);
897*22dc650dSSadaf Ebrahimi
898*22dc650dSSadaf Ebrahimi CHECK_ERROR();
899*22dc650dSSadaf Ebrahimi CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
900*22dc650dSSadaf Ebrahimi set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
901*22dc650dSSadaf Ebrahimi
902*22dc650dSSadaf Ebrahimi local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds - saved_arg_count, 1);
903*22dc650dSSadaf Ebrahimi local_size += GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, f64);
904*22dc650dSSadaf Ebrahimi
905*22dc650dSSadaf Ebrahimi local_size = (local_size + SLJIT_LOCALS_OFFSET + 15) & ~0xf;
906*22dc650dSSadaf Ebrahimi compiler->local_size = local_size;
907*22dc650dSSadaf Ebrahimi
908*22dc650dSSadaf Ebrahimi if (local_size <= STACK_MAX_DISTANCE) {
909*22dc650dSSadaf Ebrahimi /* Frequent case. */
910*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, ADDI_D | RD(SLJIT_SP) | RJ(SLJIT_SP) | IMM_I12(-local_size)));
911*22dc650dSSadaf Ebrahimi offset = local_size - SSIZE_OF(sw);
912*22dc650dSSadaf Ebrahimi local_size = 0;
913*22dc650dSSadaf Ebrahimi } else {
914*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, ADDI_D | RD(SLJIT_SP) | RJ(SLJIT_SP) | IMM_I12(STACK_MAX_DISTANCE)));
915*22dc650dSSadaf Ebrahimi local_size -= STACK_MAX_DISTANCE;
916*22dc650dSSadaf Ebrahimi
917*22dc650dSSadaf Ebrahimi if (local_size > STACK_MAX_DISTANCE)
918*22dc650dSSadaf Ebrahimi FAIL_IF(load_immediate(compiler, TMP_REG1, local_size));
919*22dc650dSSadaf Ebrahimi offset = STACK_MAX_DISTANCE - SSIZE_OF(sw);
920*22dc650dSSadaf Ebrahimi }
921*22dc650dSSadaf Ebrahimi
922*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, STACK_STORE | RD(RETURN_ADDR_REG) | RJ(SLJIT_SP) | IMM_I12(offset)));
923*22dc650dSSadaf Ebrahimi
924*22dc650dSSadaf Ebrahimi tmp = SLJIT_S0 - saveds;
925*22dc650dSSadaf Ebrahimi for (i = SLJIT_S0 - saved_arg_count; i > tmp; i--) {
926*22dc650dSSadaf Ebrahimi offset -= SSIZE_OF(sw);
927*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, STACK_STORE | RD(i) | RJ(SLJIT_SP) | IMM_I12(offset)));
928*22dc650dSSadaf Ebrahimi }
929*22dc650dSSadaf Ebrahimi
930*22dc650dSSadaf Ebrahimi for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) {
931*22dc650dSSadaf Ebrahimi offset -= SSIZE_OF(sw);
932*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, STACK_STORE | RD(i) | RJ(SLJIT_SP) | IMM_I12(offset)));
933*22dc650dSSadaf Ebrahimi }
934*22dc650dSSadaf Ebrahimi
935*22dc650dSSadaf Ebrahimi tmp = SLJIT_FS0 - fsaveds;
936*22dc650dSSadaf Ebrahimi for (i = SLJIT_FS0; i > tmp; i--) {
937*22dc650dSSadaf Ebrahimi offset -= SSIZE_OF(f64);
938*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, FST_D | FRD(i) | RJ(SLJIT_SP) | IMM_I12(offset)));
939*22dc650dSSadaf Ebrahimi }
940*22dc650dSSadaf Ebrahimi
941*22dc650dSSadaf Ebrahimi for (i = fscratches; i >= SLJIT_FIRST_SAVED_FLOAT_REG; i--) {
942*22dc650dSSadaf Ebrahimi offset -= SSIZE_OF(f64);
943*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, FST_D | FRD(i) | RJ(SLJIT_SP) | IMM_I12(offset)));
944*22dc650dSSadaf Ebrahimi }
945*22dc650dSSadaf Ebrahimi
946*22dc650dSSadaf Ebrahimi if (local_size > STACK_MAX_DISTANCE)
947*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, SUB_D | RD(SLJIT_SP) | RJ(SLJIT_SP) | RK(TMP_REG1)));
948*22dc650dSSadaf Ebrahimi else if (local_size > 0)
949*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, ADDI_D | RD(SLJIT_SP) | RJ(SLJIT_SP) | IMM_I12(-local_size)));
950*22dc650dSSadaf Ebrahimi
951*22dc650dSSadaf Ebrahimi if (options & SLJIT_ENTER_REG_ARG)
952*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
953*22dc650dSSadaf Ebrahimi
954*22dc650dSSadaf Ebrahimi arg_types >>= SLJIT_ARG_SHIFT;
955*22dc650dSSadaf Ebrahimi saved_arg_count = 0;
956*22dc650dSSadaf Ebrahimi tmp = SLJIT_R0;
957*22dc650dSSadaf Ebrahimi
958*22dc650dSSadaf Ebrahimi while (arg_types > 0) {
959*22dc650dSSadaf Ebrahimi if ((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64) {
960*22dc650dSSadaf Ebrahimi if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) {
961*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, ADDI_D | RD(SLJIT_S0 - saved_arg_count) | RJ(tmp) | IMM_I12(0)));
962*22dc650dSSadaf Ebrahimi saved_arg_count++;
963*22dc650dSSadaf Ebrahimi }
964*22dc650dSSadaf Ebrahimi tmp++;
965*22dc650dSSadaf Ebrahimi }
966*22dc650dSSadaf Ebrahimi
967*22dc650dSSadaf Ebrahimi arg_types >>= SLJIT_ARG_SHIFT;
968*22dc650dSSadaf Ebrahimi }
969*22dc650dSSadaf Ebrahimi
970*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
971*22dc650dSSadaf Ebrahimi }
972*22dc650dSSadaf Ebrahimi
973*22dc650dSSadaf Ebrahimi #undef STACK_MAX_DISTANCE
974*22dc650dSSadaf Ebrahimi
sljit_set_context(struct sljit_compiler * compiler,sljit_s32 options,sljit_s32 arg_types,sljit_s32 scratches,sljit_s32 saveds,sljit_s32 fscratches,sljit_s32 fsaveds,sljit_s32 local_size)975*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler,
976*22dc650dSSadaf Ebrahimi sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
977*22dc650dSSadaf Ebrahimi sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
978*22dc650dSSadaf Ebrahimi {
979*22dc650dSSadaf Ebrahimi CHECK_ERROR();
980*22dc650dSSadaf Ebrahimi CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
981*22dc650dSSadaf Ebrahimi set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
982*22dc650dSSadaf Ebrahimi
983*22dc650dSSadaf Ebrahimi local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds - SLJIT_KEPT_SAVEDS_COUNT(options), 1);
984*22dc650dSSadaf Ebrahimi local_size += GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, f64);
985*22dc650dSSadaf Ebrahimi
986*22dc650dSSadaf Ebrahimi compiler->local_size = (local_size + SLJIT_LOCALS_OFFSET + 15) & ~0xf;
987*22dc650dSSadaf Ebrahimi
988*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
989*22dc650dSSadaf Ebrahimi }
990*22dc650dSSadaf Ebrahimi
991*22dc650dSSadaf Ebrahimi #define STACK_MAX_DISTANCE (-I12_MIN - 16)
992*22dc650dSSadaf Ebrahimi
emit_stack_frame_release(struct sljit_compiler * compiler,sljit_s32 is_return_to)993*22dc650dSSadaf Ebrahimi static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit_s32 is_return_to)
994*22dc650dSSadaf Ebrahimi {
995*22dc650dSSadaf Ebrahimi sljit_s32 i, tmp, offset;
996*22dc650dSSadaf Ebrahimi sljit_s32 local_size = compiler->local_size;
997*22dc650dSSadaf Ebrahimi
998*22dc650dSSadaf Ebrahimi if (local_size > STACK_MAX_DISTANCE) {
999*22dc650dSSadaf Ebrahimi local_size -= STACK_MAX_DISTANCE;
1000*22dc650dSSadaf Ebrahimi
1001*22dc650dSSadaf Ebrahimi if (local_size > STACK_MAX_DISTANCE) {
1002*22dc650dSSadaf Ebrahimi FAIL_IF(load_immediate(compiler, TMP_REG2, local_size));
1003*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, ADD_D | RD(SLJIT_SP) | RJ(SLJIT_SP) | RK(TMP_REG2)));
1004*22dc650dSSadaf Ebrahimi } else
1005*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, ADDI_D | RD(SLJIT_SP) | RJ(SLJIT_SP) | IMM_I12(local_size)));
1006*22dc650dSSadaf Ebrahimi
1007*22dc650dSSadaf Ebrahimi local_size = STACK_MAX_DISTANCE;
1008*22dc650dSSadaf Ebrahimi }
1009*22dc650dSSadaf Ebrahimi
1010*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(local_size > 0);
1011*22dc650dSSadaf Ebrahimi
1012*22dc650dSSadaf Ebrahimi offset = local_size - SSIZE_OF(sw);
1013*22dc650dSSadaf Ebrahimi if (!is_return_to)
1014*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, STACK_LOAD | RD(RETURN_ADDR_REG) | RJ(SLJIT_SP) | IMM_I12(offset)));
1015*22dc650dSSadaf Ebrahimi
1016*22dc650dSSadaf Ebrahimi tmp = SLJIT_S0 - compiler->saveds;
1017*22dc650dSSadaf Ebrahimi for (i = SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options); i > tmp; i--) {
1018*22dc650dSSadaf Ebrahimi offset -= SSIZE_OF(sw);
1019*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, STACK_LOAD | RD(i) | RJ(SLJIT_SP) | IMM_I12(offset)));
1020*22dc650dSSadaf Ebrahimi }
1021*22dc650dSSadaf Ebrahimi
1022*22dc650dSSadaf Ebrahimi for (i = compiler->scratches; i >= SLJIT_FIRST_SAVED_REG; i--) {
1023*22dc650dSSadaf Ebrahimi offset -= SSIZE_OF(sw);
1024*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, STACK_LOAD | RD(i) | RJ(SLJIT_SP) | IMM_I12(offset)));
1025*22dc650dSSadaf Ebrahimi }
1026*22dc650dSSadaf Ebrahimi
1027*22dc650dSSadaf Ebrahimi tmp = SLJIT_FS0 - compiler->fsaveds;
1028*22dc650dSSadaf Ebrahimi for (i = SLJIT_FS0; i > tmp; i--) {
1029*22dc650dSSadaf Ebrahimi offset -= SSIZE_OF(f64);
1030*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, FLD_D | FRD(i) | RJ(SLJIT_SP) | IMM_I12(offset)));
1031*22dc650dSSadaf Ebrahimi }
1032*22dc650dSSadaf Ebrahimi
1033*22dc650dSSadaf Ebrahimi for (i = compiler->fscratches; i >= SLJIT_FIRST_SAVED_FLOAT_REG; i--) {
1034*22dc650dSSadaf Ebrahimi offset -= SSIZE_OF(f64);
1035*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, FLD_D | FRD(i) | RJ(SLJIT_SP) | IMM_I12(offset)));
1036*22dc650dSSadaf Ebrahimi }
1037*22dc650dSSadaf Ebrahimi
1038*22dc650dSSadaf Ebrahimi return push_inst(compiler, ADDI_D | RD(SLJIT_SP) | RJ(SLJIT_SP) | IMM_I12(local_size));
1039*22dc650dSSadaf Ebrahimi }
1040*22dc650dSSadaf Ebrahimi
1041*22dc650dSSadaf Ebrahimi #undef STACK_MAX_DISTANCE
1042*22dc650dSSadaf Ebrahimi
sljit_emit_return_void(struct sljit_compiler * compiler)1043*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler *compiler)
1044*22dc650dSSadaf Ebrahimi {
1045*22dc650dSSadaf Ebrahimi CHECK_ERROR();
1046*22dc650dSSadaf Ebrahimi CHECK(check_sljit_emit_return_void(compiler));
1047*22dc650dSSadaf Ebrahimi
1048*22dc650dSSadaf Ebrahimi FAIL_IF(emit_stack_frame_release(compiler, 0));
1049*22dc650dSSadaf Ebrahimi return push_inst(compiler, JIRL | RD(TMP_ZERO) | RJ(RETURN_ADDR_REG) | IMM_I12(0));
1050*22dc650dSSadaf Ebrahimi }
1051*22dc650dSSadaf Ebrahimi
sljit_emit_return_to(struct sljit_compiler * compiler,sljit_s32 src,sljit_sw srcw)1052*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_to(struct sljit_compiler *compiler,
1053*22dc650dSSadaf Ebrahimi sljit_s32 src, sljit_sw srcw)
1054*22dc650dSSadaf Ebrahimi {
1055*22dc650dSSadaf Ebrahimi CHECK_ERROR();
1056*22dc650dSSadaf Ebrahimi CHECK(check_sljit_emit_return_to(compiler, src, srcw));
1057*22dc650dSSadaf Ebrahimi
1058*22dc650dSSadaf Ebrahimi if (src & SLJIT_MEM) {
1059*22dc650dSSadaf Ebrahimi ADJUST_LOCAL_OFFSET(src, srcw);
1060*22dc650dSSadaf Ebrahimi FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_REG1, src, srcw));
1061*22dc650dSSadaf Ebrahimi src = TMP_REG1;
1062*22dc650dSSadaf Ebrahimi srcw = 0;
1063*22dc650dSSadaf Ebrahimi } else if (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options))) {
1064*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, ADDI_D | RD(TMP_REG1) | RJ(src) | IMM_I12(0)));
1065*22dc650dSSadaf Ebrahimi src = TMP_REG1;
1066*22dc650dSSadaf Ebrahimi srcw = 0;
1067*22dc650dSSadaf Ebrahimi }
1068*22dc650dSSadaf Ebrahimi
1069*22dc650dSSadaf Ebrahimi FAIL_IF(emit_stack_frame_release(compiler, 1));
1070*22dc650dSSadaf Ebrahimi
1071*22dc650dSSadaf Ebrahimi SLJIT_SKIP_CHECKS(compiler);
1072*22dc650dSSadaf Ebrahimi return sljit_emit_ijump(compiler, SLJIT_JUMP, src, srcw);
1073*22dc650dSSadaf Ebrahimi }
1074*22dc650dSSadaf Ebrahimi
1075*22dc650dSSadaf Ebrahimi /* --------------------------------------------------------------------- */
1076*22dc650dSSadaf Ebrahimi /* Operators */
1077*22dc650dSSadaf Ebrahimi /* --------------------------------------------------------------------- */
1078*22dc650dSSadaf Ebrahimi
1079*22dc650dSSadaf Ebrahimi static const sljit_ins data_transfer_insts[16 + 4] = {
1080*22dc650dSSadaf Ebrahimi /* u w s */ ST_D /* st.d */,
1081*22dc650dSSadaf Ebrahimi /* u w l */ LD_D /* ld.d */,
1082*22dc650dSSadaf Ebrahimi /* u b s */ ST_B /* st.b */,
1083*22dc650dSSadaf Ebrahimi /* u b l */ LD_BU /* ld.bu */,
1084*22dc650dSSadaf Ebrahimi /* u h s */ ST_H /* st.h */,
1085*22dc650dSSadaf Ebrahimi /* u h l */ LD_HU /* ld.hu */,
1086*22dc650dSSadaf Ebrahimi /* u i s */ ST_W /* st.w */,
1087*22dc650dSSadaf Ebrahimi /* u i l */ LD_WU /* ld.wu */,
1088*22dc650dSSadaf Ebrahimi
1089*22dc650dSSadaf Ebrahimi /* s w s */ ST_D /* st.d */,
1090*22dc650dSSadaf Ebrahimi /* s w l */ LD_D /* ld.d */,
1091*22dc650dSSadaf Ebrahimi /* s b s */ ST_B /* st.b */,
1092*22dc650dSSadaf Ebrahimi /* s b l */ LD_B /* ld.b */,
1093*22dc650dSSadaf Ebrahimi /* s h s */ ST_H /* st.h */,
1094*22dc650dSSadaf Ebrahimi /* s h l */ LD_H /* ld.h */,
1095*22dc650dSSadaf Ebrahimi /* s i s */ ST_W /* st.w */,
1096*22dc650dSSadaf Ebrahimi /* s i l */ LD_W /* ld.w */,
1097*22dc650dSSadaf Ebrahimi
1098*22dc650dSSadaf Ebrahimi /* d s */ FST_D /* fst.d */,
1099*22dc650dSSadaf Ebrahimi /* d l */ FLD_D /* fld.d */,
1100*22dc650dSSadaf Ebrahimi /* s s */ FST_S /* fst.s */,
1101*22dc650dSSadaf Ebrahimi /* s l */ FLD_S /* fld.s */,
1102*22dc650dSSadaf Ebrahimi };
1103*22dc650dSSadaf Ebrahimi
1104*22dc650dSSadaf Ebrahimi static const sljit_ins data_transfer_insts_x[16 + 4] = {
1105*22dc650dSSadaf Ebrahimi /* u w s */ STX_D /* stx.d */,
1106*22dc650dSSadaf Ebrahimi /* u w l */ LDX_D /* ldx.d */,
1107*22dc650dSSadaf Ebrahimi /* u b s */ STX_B /* stx.b */,
1108*22dc650dSSadaf Ebrahimi /* u b l */ LDX_BU /* ldx.bu */,
1109*22dc650dSSadaf Ebrahimi /* u h s */ STX_H /* stx.h */,
1110*22dc650dSSadaf Ebrahimi /* u h l */ LDX_HU /* ldx.hu */,
1111*22dc650dSSadaf Ebrahimi /* u i s */ STX_W /* stx.w */,
1112*22dc650dSSadaf Ebrahimi /* u i l */ LDX_WU /* ldx.wu */,
1113*22dc650dSSadaf Ebrahimi
1114*22dc650dSSadaf Ebrahimi /* s w s */ STX_D /* stx.d */,
1115*22dc650dSSadaf Ebrahimi /* s w l */ LDX_D /* ldx.d */,
1116*22dc650dSSadaf Ebrahimi /* s b s */ STX_B /* stx.b */,
1117*22dc650dSSadaf Ebrahimi /* s b l */ LDX_B /* ldx.b */,
1118*22dc650dSSadaf Ebrahimi /* s h s */ STX_H /* stx.h */,
1119*22dc650dSSadaf Ebrahimi /* s h l */ LDX_H /* ldx.h */,
1120*22dc650dSSadaf Ebrahimi /* s i s */ STX_W /* stx.w */,
1121*22dc650dSSadaf Ebrahimi /* s i l */ LDX_W /* ldx.w */,
1122*22dc650dSSadaf Ebrahimi
1123*22dc650dSSadaf Ebrahimi /* d s */ FSTX_D /* fstx.d */,
1124*22dc650dSSadaf Ebrahimi /* d l */ FLDX_D /* fldx.d */,
1125*22dc650dSSadaf Ebrahimi /* s s */ FSTX_S /* fstx.s */,
1126*22dc650dSSadaf Ebrahimi /* s l */ FLDX_S /* fldx.s */,
1127*22dc650dSSadaf Ebrahimi };
1128*22dc650dSSadaf Ebrahimi
push_mem_inst(struct sljit_compiler * compiler,sljit_s32 flags,sljit_s32 reg,sljit_s32 arg,sljit_sw argw)1129*22dc650dSSadaf Ebrahimi static sljit_s32 push_mem_inst(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw)
1130*22dc650dSSadaf Ebrahimi {
1131*22dc650dSSadaf Ebrahimi sljit_ins ins;
1132*22dc650dSSadaf Ebrahimi sljit_s32 base = arg & REG_MASK;
1133*22dc650dSSadaf Ebrahimi
1134*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(arg & SLJIT_MEM);
1135*22dc650dSSadaf Ebrahimi
1136*22dc650dSSadaf Ebrahimi if (arg & OFFS_REG_MASK) {
1137*22dc650dSSadaf Ebrahimi sljit_s32 offs = OFFS_REG(arg);
1138*22dc650dSSadaf Ebrahimi
1139*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(!argw);
1140*22dc650dSSadaf Ebrahimi ins = data_transfer_insts_x[flags & MEM_MASK] |
1141*22dc650dSSadaf Ebrahimi ((flags & MEM_MASK) <= GPR_REG ? RD(reg) : FRD(reg)) |
1142*22dc650dSSadaf Ebrahimi RJ(base) | RK(offs);
1143*22dc650dSSadaf Ebrahimi } else {
1144*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(argw <= 0xfff && argw >= I12_MIN);
1145*22dc650dSSadaf Ebrahimi
1146*22dc650dSSadaf Ebrahimi ins = data_transfer_insts[flags & MEM_MASK] |
1147*22dc650dSSadaf Ebrahimi ((flags & MEM_MASK) <= GPR_REG ? RD(reg) : FRD(reg)) |
1148*22dc650dSSadaf Ebrahimi RJ(base) | IMM_I12(argw);
1149*22dc650dSSadaf Ebrahimi }
1150*22dc650dSSadaf Ebrahimi return push_inst(compiler, ins);
1151*22dc650dSSadaf Ebrahimi }
1152*22dc650dSSadaf Ebrahimi
1153*22dc650dSSadaf Ebrahimi /* Can perform an operation using at most 1 instruction. */
getput_arg_fast(struct sljit_compiler * compiler,sljit_s32 flags,sljit_s32 reg,sljit_s32 arg,sljit_sw argw)1154*22dc650dSSadaf Ebrahimi static sljit_s32 getput_arg_fast(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw)
1155*22dc650dSSadaf Ebrahimi {
1156*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(arg & SLJIT_MEM);
1157*22dc650dSSadaf Ebrahimi
1158*22dc650dSSadaf Ebrahimi /* argw == 0 (ldx/stx rd, rj, rk) can be used.
1159*22dc650dSSadaf Ebrahimi * argw in [-2048, 2047] (ld/st rd, rj, imm) can be used. */
1160*22dc650dSSadaf Ebrahimi if (!argw || (!(arg & OFFS_REG_MASK) && (argw <= I12_MAX && argw >= I12_MIN))) {
1161*22dc650dSSadaf Ebrahimi /* Works for both absolute and relative addresses. */
1162*22dc650dSSadaf Ebrahimi if (SLJIT_UNLIKELY(flags & ARG_TEST))
1163*22dc650dSSadaf Ebrahimi return 1;
1164*22dc650dSSadaf Ebrahimi
1165*22dc650dSSadaf Ebrahimi FAIL_IF(push_mem_inst(compiler, flags, reg, arg, argw));
1166*22dc650dSSadaf Ebrahimi return -1;
1167*22dc650dSSadaf Ebrahimi }
1168*22dc650dSSadaf Ebrahimi return 0;
1169*22dc650dSSadaf Ebrahimi }
1170*22dc650dSSadaf Ebrahimi
1171*22dc650dSSadaf Ebrahimi #define TO_ARGW_HI(argw) (((argw) & ~0xfff) + (((argw) & 0x800) ? 0x1000 : 0))
1172*22dc650dSSadaf Ebrahimi
1173*22dc650dSSadaf Ebrahimi /* See getput_arg below.
1174*22dc650dSSadaf Ebrahimi Note: can_cache is called only for binary operators. */
can_cache(sljit_s32 arg,sljit_sw argw,sljit_s32 next_arg,sljit_sw next_argw)1175*22dc650dSSadaf Ebrahimi static sljit_s32 can_cache(sljit_s32 arg, sljit_sw argw, sljit_s32 next_arg, sljit_sw next_argw)
1176*22dc650dSSadaf Ebrahimi {
1177*22dc650dSSadaf Ebrahimi SLJIT_ASSERT((arg & SLJIT_MEM) && (next_arg & SLJIT_MEM));
1178*22dc650dSSadaf Ebrahimi
1179*22dc650dSSadaf Ebrahimi if (arg & OFFS_REG_MASK)
1180*22dc650dSSadaf Ebrahimi return 0;
1181*22dc650dSSadaf Ebrahimi
1182*22dc650dSSadaf Ebrahimi if (arg == next_arg) {
1183*22dc650dSSadaf Ebrahimi if (((next_argw - argw) <= I12_MAX && (next_argw - argw) >= I12_MIN)
1184*22dc650dSSadaf Ebrahimi || TO_ARGW_HI(argw) == TO_ARGW_HI(next_argw))
1185*22dc650dSSadaf Ebrahimi return 1;
1186*22dc650dSSadaf Ebrahimi return 0;
1187*22dc650dSSadaf Ebrahimi }
1188*22dc650dSSadaf Ebrahimi
1189*22dc650dSSadaf Ebrahimi return 0;
1190*22dc650dSSadaf Ebrahimi }
1191*22dc650dSSadaf Ebrahimi
1192*22dc650dSSadaf Ebrahimi /* Emit the necessary instructions. See can_cache above. */
getput_arg(struct sljit_compiler * compiler,sljit_s32 flags,sljit_s32 reg,sljit_s32 arg,sljit_sw argw,sljit_s32 next_arg,sljit_sw next_argw)1193*22dc650dSSadaf Ebrahimi static sljit_s32 getput_arg(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw, sljit_s32 next_arg, sljit_sw next_argw)
1194*22dc650dSSadaf Ebrahimi {
1195*22dc650dSSadaf Ebrahimi sljit_s32 base = arg & REG_MASK;
1196*22dc650dSSadaf Ebrahimi sljit_s32 tmp_r = (flags & MEM_USE_TMP2) ? TMP_REG2 : TMP_REG1;
1197*22dc650dSSadaf Ebrahimi sljit_sw offset;
1198*22dc650dSSadaf Ebrahimi
1199*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(arg & SLJIT_MEM);
1200*22dc650dSSadaf Ebrahimi if (!(next_arg & SLJIT_MEM)) {
1201*22dc650dSSadaf Ebrahimi next_arg = 0;
1202*22dc650dSSadaf Ebrahimi next_argw = 0;
1203*22dc650dSSadaf Ebrahimi }
1204*22dc650dSSadaf Ebrahimi
1205*22dc650dSSadaf Ebrahimi if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) {
1206*22dc650dSSadaf Ebrahimi argw &= 0x3;
1207*22dc650dSSadaf Ebrahimi
1208*22dc650dSSadaf Ebrahimi if (SLJIT_UNLIKELY(argw))
1209*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, SLLI_D | RD(TMP_REG3) | RJ(OFFS_REG(arg)) | IMM_I12(argw)));
1210*22dc650dSSadaf Ebrahimi return push_mem_inst(compiler, flags, reg, SLJIT_MEM2(base, TMP_REG3), 0);
1211*22dc650dSSadaf Ebrahimi }
1212*22dc650dSSadaf Ebrahimi
1213*22dc650dSSadaf Ebrahimi if (compiler->cache_arg == arg && argw - compiler->cache_argw <= I12_MAX && argw - compiler->cache_argw >= I12_MIN)
1214*22dc650dSSadaf Ebrahimi return push_mem_inst(compiler, flags, reg, SLJIT_MEM1(TMP_REG3), argw - compiler->cache_argw);
1215*22dc650dSSadaf Ebrahimi
1216*22dc650dSSadaf Ebrahimi if (compiler->cache_arg == SLJIT_MEM && (argw - compiler->cache_argw <= I12_MAX) && (argw - compiler->cache_argw >= I12_MIN)) {
1217*22dc650dSSadaf Ebrahimi offset = argw - compiler->cache_argw;
1218*22dc650dSSadaf Ebrahimi } else {
1219*22dc650dSSadaf Ebrahimi sljit_sw argw_hi=TO_ARGW_HI(argw);
1220*22dc650dSSadaf Ebrahimi compiler->cache_arg = SLJIT_MEM;
1221*22dc650dSSadaf Ebrahimi
1222*22dc650dSSadaf Ebrahimi if (next_arg && next_argw - argw <= I12_MAX && next_argw - argw >= I12_MIN && argw_hi != TO_ARGW_HI(next_argw)) {
1223*22dc650dSSadaf Ebrahimi FAIL_IF(load_immediate(compiler, TMP_REG3, argw));
1224*22dc650dSSadaf Ebrahimi compiler->cache_argw = argw;
1225*22dc650dSSadaf Ebrahimi offset = 0;
1226*22dc650dSSadaf Ebrahimi } else {
1227*22dc650dSSadaf Ebrahimi FAIL_IF(load_immediate(compiler, TMP_REG3, argw_hi));
1228*22dc650dSSadaf Ebrahimi compiler->cache_argw = argw_hi;
1229*22dc650dSSadaf Ebrahimi offset = argw & 0xfff;
1230*22dc650dSSadaf Ebrahimi argw = argw_hi;
1231*22dc650dSSadaf Ebrahimi }
1232*22dc650dSSadaf Ebrahimi }
1233*22dc650dSSadaf Ebrahimi
1234*22dc650dSSadaf Ebrahimi if (!base)
1235*22dc650dSSadaf Ebrahimi return push_mem_inst(compiler, flags, reg, SLJIT_MEM1(TMP_REG3), offset);
1236*22dc650dSSadaf Ebrahimi
1237*22dc650dSSadaf Ebrahimi if (arg == next_arg && next_argw - argw <= I12_MAX && next_argw - argw >= I12_MIN) {
1238*22dc650dSSadaf Ebrahimi compiler->cache_arg = arg;
1239*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, ADD_D | RD(TMP_REG3) | RJ(TMP_REG3) | RK(base)));
1240*22dc650dSSadaf Ebrahimi return push_mem_inst(compiler, flags, reg, SLJIT_MEM1(TMP_REG3), offset);
1241*22dc650dSSadaf Ebrahimi }
1242*22dc650dSSadaf Ebrahimi
1243*22dc650dSSadaf Ebrahimi if (!offset)
1244*22dc650dSSadaf Ebrahimi return push_mem_inst(compiler, flags, reg, SLJIT_MEM2(base, TMP_REG3), 0);
1245*22dc650dSSadaf Ebrahimi
1246*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, ADD_D | RD(tmp_r) | RJ(TMP_REG3) | RK(base)));
1247*22dc650dSSadaf Ebrahimi return push_mem_inst(compiler, flags, reg, SLJIT_MEM1(tmp_r), offset);
1248*22dc650dSSadaf Ebrahimi }
1249*22dc650dSSadaf Ebrahimi
emit_op_mem(struct sljit_compiler * compiler,sljit_s32 flags,sljit_s32 reg,sljit_s32 arg,sljit_sw argw)1250*22dc650dSSadaf Ebrahimi static sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw)
1251*22dc650dSSadaf Ebrahimi {
1252*22dc650dSSadaf Ebrahimi sljit_s32 base = arg & REG_MASK;
1253*22dc650dSSadaf Ebrahimi sljit_s32 tmp_r = TMP_REG1;
1254*22dc650dSSadaf Ebrahimi
1255*22dc650dSSadaf Ebrahimi if (getput_arg_fast(compiler, flags, reg, arg, argw))
1256*22dc650dSSadaf Ebrahimi return compiler->error;
1257*22dc650dSSadaf Ebrahimi
1258*22dc650dSSadaf Ebrahimi if ((flags & MEM_MASK) <= GPR_REG && (flags & LOAD_DATA))
1259*22dc650dSSadaf Ebrahimi tmp_r = reg;
1260*22dc650dSSadaf Ebrahimi
1261*22dc650dSSadaf Ebrahimi if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) {
1262*22dc650dSSadaf Ebrahimi argw &= 0x3;
1263*22dc650dSSadaf Ebrahimi
1264*22dc650dSSadaf Ebrahimi if (SLJIT_UNLIKELY(argw))
1265*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, SLLI_D | RD(tmp_r) | RJ(OFFS_REG(arg)) | IMM_I12(argw)));
1266*22dc650dSSadaf Ebrahimi return push_mem_inst(compiler, flags, reg, SLJIT_MEM2(base, tmp_r), 0);
1267*22dc650dSSadaf Ebrahimi } else {
1268*22dc650dSSadaf Ebrahimi FAIL_IF(load_immediate(compiler, tmp_r, argw));
1269*22dc650dSSadaf Ebrahimi
1270*22dc650dSSadaf Ebrahimi if (base != 0)
1271*22dc650dSSadaf Ebrahimi return push_mem_inst(compiler, flags, reg, SLJIT_MEM2(base, tmp_r), 0);
1272*22dc650dSSadaf Ebrahimi return push_mem_inst(compiler, flags, reg, SLJIT_MEM1(tmp_r), 0);
1273*22dc650dSSadaf Ebrahimi }
1274*22dc650dSSadaf Ebrahimi }
1275*22dc650dSSadaf Ebrahimi
emit_op_mem2(struct sljit_compiler * compiler,sljit_s32 flags,sljit_s32 reg,sljit_s32 arg1,sljit_sw arg1w,sljit_s32 arg2,sljit_sw arg2w)1276*22dc650dSSadaf Ebrahimi static SLJIT_INLINE sljit_s32 emit_op_mem2(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg1, sljit_sw arg1w, sljit_s32 arg2, sljit_sw arg2w)
1277*22dc650dSSadaf Ebrahimi {
1278*22dc650dSSadaf Ebrahimi if (getput_arg_fast(compiler, flags, reg, arg1, arg1w))
1279*22dc650dSSadaf Ebrahimi return compiler->error;
1280*22dc650dSSadaf Ebrahimi return getput_arg(compiler, flags, reg, arg1, arg1w, arg2, arg2w);
1281*22dc650dSSadaf Ebrahimi }
1282*22dc650dSSadaf Ebrahimi
1283*22dc650dSSadaf Ebrahimi #define IMM_EXTEND(v) (IMM_I12((op & SLJIT_32) ? (v) : (32 + (v))))
1284*22dc650dSSadaf Ebrahimi
1285*22dc650dSSadaf Ebrahimi /* andi/ori/xori are zero-extended */
1286*22dc650dSSadaf Ebrahimi #define EMIT_LOGICAL(op_imm, op_reg) \
1287*22dc650dSSadaf Ebrahimi if (flags & SRC2_IMM) { \
1288*22dc650dSSadaf Ebrahimi if (op & SLJIT_SET_Z) {\
1289*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, ADDI_D | RD(EQUAL_FLAG) | RJ(TMP_ZERO) | IMM_I12(src2))); \
1290*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, op_reg | RD(EQUAL_FLAG) | RJ(src1) | RK(EQUAL_FLAG))); \
1291*22dc650dSSadaf Ebrahimi } \
1292*22dc650dSSadaf Ebrahimi if (!(flags & UNUSED_DEST)) { \
1293*22dc650dSSadaf Ebrahimi if (dst == src1) { \
1294*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, ADDI_D | RD(TMP_REG1) | RJ(TMP_ZERO) | IMM_I12(src2))); \
1295*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, op_reg | RD(dst) | RJ(src1) | RK(TMP_REG1))); \
1296*22dc650dSSadaf Ebrahimi } else { \
1297*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, ADDI_D | RD(dst) | RJ(TMP_ZERO) | IMM_I12(src2))); \
1298*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, op_reg | RD(dst) | RJ(src1) | RK(dst))); \
1299*22dc650dSSadaf Ebrahimi } \
1300*22dc650dSSadaf Ebrahimi } \
1301*22dc650dSSadaf Ebrahimi } else { \
1302*22dc650dSSadaf Ebrahimi if (op & SLJIT_SET_Z) \
1303*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, op_reg | RD(EQUAL_FLAG) | RJ(src1) | RK(src2))); \
1304*22dc650dSSadaf Ebrahimi if (!(flags & UNUSED_DEST)) \
1305*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, op_reg | RD(dst) | RJ(src1) | RK(src2))); \
1306*22dc650dSSadaf Ebrahimi } \
1307*22dc650dSSadaf Ebrahimi while (0)
1308*22dc650dSSadaf Ebrahimi
1309*22dc650dSSadaf Ebrahimi #define EMIT_SHIFT(imm, reg) \
1310*22dc650dSSadaf Ebrahimi op_imm = (imm); \
1311*22dc650dSSadaf Ebrahimi op_reg = (reg)
1312*22dc650dSSadaf Ebrahimi
emit_single_op(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 flags,sljit_s32 dst,sljit_s32 src1,sljit_sw src2)1313*22dc650dSSadaf Ebrahimi static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 flags,
1314*22dc650dSSadaf Ebrahimi sljit_s32 dst, sljit_s32 src1, sljit_sw src2)
1315*22dc650dSSadaf Ebrahimi {
1316*22dc650dSSadaf Ebrahimi sljit_s32 is_overflow, is_carry, carry_src_r, is_handled, reg;
1317*22dc650dSSadaf Ebrahimi sljit_ins op_imm, op_reg;
1318*22dc650dSSadaf Ebrahimi sljit_ins word_size = ((op & SLJIT_32) ? 32 : 64);
1319*22dc650dSSadaf Ebrahimi
1320*22dc650dSSadaf Ebrahimi switch (GET_OPCODE(op)) {
1321*22dc650dSSadaf Ebrahimi case SLJIT_MOV:
1322*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM));
1323*22dc650dSSadaf Ebrahimi if (dst != src2)
1324*22dc650dSSadaf Ebrahimi return push_inst(compiler, INST(ADD, op) | RD(dst) | RJ(src2) | IMM_I12(0));
1325*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
1326*22dc650dSSadaf Ebrahimi
1327*22dc650dSSadaf Ebrahimi case SLJIT_MOV_U8:
1328*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM));
1329*22dc650dSSadaf Ebrahimi if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE))
1330*22dc650dSSadaf Ebrahimi return push_inst(compiler, ANDI | RD(dst) | RJ(src2) | IMM_I12(0xff));
1331*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(dst == src2);
1332*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
1333*22dc650dSSadaf Ebrahimi
1334*22dc650dSSadaf Ebrahimi case SLJIT_MOV_S8:
1335*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM));
1336*22dc650dSSadaf Ebrahimi if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE))
1337*22dc650dSSadaf Ebrahimi return push_inst(compiler, EXT_W_B | RD(dst) | RJ(src2));
1338*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(dst == src2);
1339*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
1340*22dc650dSSadaf Ebrahimi
1341*22dc650dSSadaf Ebrahimi case SLJIT_MOV_U16:
1342*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM));
1343*22dc650dSSadaf Ebrahimi if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE))
1344*22dc650dSSadaf Ebrahimi return push_inst(compiler, INST(BSTRPICK, op) | RD(dst) | RJ(src2) | (15 << 16));
1345*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(dst == src2);
1346*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
1347*22dc650dSSadaf Ebrahimi
1348*22dc650dSSadaf Ebrahimi case SLJIT_MOV_S16:
1349*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM));
1350*22dc650dSSadaf Ebrahimi if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE))
1351*22dc650dSSadaf Ebrahimi return push_inst(compiler, EXT_W_H | RD(dst) | RJ(src2));
1352*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(dst == src2);
1353*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
1354*22dc650dSSadaf Ebrahimi
1355*22dc650dSSadaf Ebrahimi case SLJIT_MOV_U32:
1356*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM));
1357*22dc650dSSadaf Ebrahimi if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE))
1358*22dc650dSSadaf Ebrahimi return push_inst(compiler, BSTRPICK_D | RD(dst) | RJ(src2) | (31 << 16));
1359*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(dst == src2);
1360*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
1361*22dc650dSSadaf Ebrahimi
1362*22dc650dSSadaf Ebrahimi case SLJIT_MOV_S32:
1363*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM));
1364*22dc650dSSadaf Ebrahimi if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE))
1365*22dc650dSSadaf Ebrahimi return push_inst(compiler, SLLI_W | RD(dst) | RJ(src2) | IMM_I12(0));
1366*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(dst == src2);
1367*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
1368*22dc650dSSadaf Ebrahimi
1369*22dc650dSSadaf Ebrahimi case SLJIT_CLZ:
1370*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM));
1371*22dc650dSSadaf Ebrahimi return push_inst(compiler, INST(CLZ, op) | RD(dst) | RJ(src2));
1372*22dc650dSSadaf Ebrahimi
1373*22dc650dSSadaf Ebrahimi case SLJIT_CTZ:
1374*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM));
1375*22dc650dSSadaf Ebrahimi return push_inst(compiler, INST(CTZ, op) | RD(dst) | RJ(src2));
1376*22dc650dSSadaf Ebrahimi
1377*22dc650dSSadaf Ebrahimi case SLJIT_REV:
1378*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM));
1379*22dc650dSSadaf Ebrahimi return push_inst(compiler, ((op & SLJIT_32) ? REVB_2W : REVB_D) | RD(dst) | RJ(src2));
1380*22dc650dSSadaf Ebrahimi
1381*22dc650dSSadaf Ebrahimi case SLJIT_REV_S16:
1382*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM));
1383*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, REVB_2H | RD(dst) | RJ(src2)));
1384*22dc650dSSadaf Ebrahimi return push_inst(compiler, EXT_W_H | RD(dst) | RJ(dst));
1385*22dc650dSSadaf Ebrahimi
1386*22dc650dSSadaf Ebrahimi case SLJIT_REV_U16:
1387*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM));
1388*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, REVB_2H | RD(dst) | RJ(src2)));
1389*22dc650dSSadaf Ebrahimi return push_inst(compiler, INST(BSTRPICK, op) | RD(dst) | RJ(dst) | (15 << 16));
1390*22dc650dSSadaf Ebrahimi
1391*22dc650dSSadaf Ebrahimi case SLJIT_REV_S32:
1392*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM) && dst != TMP_REG1);
1393*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, REVB_2W | RD(dst) | RJ(src2)));
1394*22dc650dSSadaf Ebrahimi return push_inst(compiler, SLLI_W | RD(dst) | RJ(dst) | IMM_I12(0));
1395*22dc650dSSadaf Ebrahimi
1396*22dc650dSSadaf Ebrahimi case SLJIT_REV_U32:
1397*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM) && dst != TMP_REG1);
1398*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, REVB_2W | RD(dst) | RJ(src2)));
1399*22dc650dSSadaf Ebrahimi return push_inst(compiler, BSTRPICK_D | RD(dst) | RJ(dst) | (31 << 16));
1400*22dc650dSSadaf Ebrahimi
1401*22dc650dSSadaf Ebrahimi case SLJIT_ADD:
1402*22dc650dSSadaf Ebrahimi /* Overflow computation (both add and sub): overflow = src1_sign ^ src2_sign ^ result_sign ^ carry_flag */
1403*22dc650dSSadaf Ebrahimi is_overflow = GET_FLAG_TYPE(op) == SLJIT_OVERFLOW;
1404*22dc650dSSadaf Ebrahimi carry_src_r = GET_FLAG_TYPE(op) == SLJIT_CARRY;
1405*22dc650dSSadaf Ebrahimi
1406*22dc650dSSadaf Ebrahimi if (flags & SRC2_IMM) {
1407*22dc650dSSadaf Ebrahimi if (is_overflow) {
1408*22dc650dSSadaf Ebrahimi if (src2 >= 0)
1409*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(EQUAL_FLAG) | RJ(src1) | IMM_I12(0)));
1410*22dc650dSSadaf Ebrahimi else {
1411*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(EQUAL_FLAG) | RJ(TMP_ZERO) | IMM_I12(-1)));
1412*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, XOR | RD(EQUAL_FLAG) | RJ(src1) | RK(EQUAL_FLAG)));
1413*22dc650dSSadaf Ebrahimi }
1414*22dc650dSSadaf Ebrahimi } else if (op & SLJIT_SET_Z)
1415*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(EQUAL_FLAG) | RJ(src1) | IMM_I12(src2)));
1416*22dc650dSSadaf Ebrahimi
1417*22dc650dSSadaf Ebrahimi /* Only the zero flag is needed. */
1418*22dc650dSSadaf Ebrahimi if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK))
1419*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(dst) | RJ(src1) | IMM_I12(src2)));
1420*22dc650dSSadaf Ebrahimi } else {
1421*22dc650dSSadaf Ebrahimi if (is_overflow)
1422*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, XOR | RD(EQUAL_FLAG) | RJ(src1) | RK(src2)));
1423*22dc650dSSadaf Ebrahimi else if (op & SLJIT_SET_Z)
1424*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, INST(ADD, op) | RD(EQUAL_FLAG) | RJ(src1) | RK(src2)));
1425*22dc650dSSadaf Ebrahimi
1426*22dc650dSSadaf Ebrahimi if (is_overflow || carry_src_r != 0) {
1427*22dc650dSSadaf Ebrahimi if (src1 != dst)
1428*22dc650dSSadaf Ebrahimi carry_src_r = (sljit_s32)src1;
1429*22dc650dSSadaf Ebrahimi else if (src2 != dst)
1430*22dc650dSSadaf Ebrahimi carry_src_r = (sljit_s32)src2;
1431*22dc650dSSadaf Ebrahimi else {
1432*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(OTHER_FLAG) | RJ(src1) | IMM_I12(0)));
1433*22dc650dSSadaf Ebrahimi carry_src_r = OTHER_FLAG;
1434*22dc650dSSadaf Ebrahimi }
1435*22dc650dSSadaf Ebrahimi }
1436*22dc650dSSadaf Ebrahimi
1437*22dc650dSSadaf Ebrahimi /* Only the zero flag is needed. */
1438*22dc650dSSadaf Ebrahimi if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK))
1439*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, INST(ADD, op) | RD(dst) | RJ(src1) | RK(src2)));
1440*22dc650dSSadaf Ebrahimi }
1441*22dc650dSSadaf Ebrahimi
1442*22dc650dSSadaf Ebrahimi /* Carry is zero if a + b >= a or a + b >= b, otherwise it is 1. */
1443*22dc650dSSadaf Ebrahimi if (is_overflow || carry_src_r != 0) {
1444*22dc650dSSadaf Ebrahimi if (flags & SRC2_IMM)
1445*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, SLTUI | RD(OTHER_FLAG) | RJ(dst) | IMM_I12(src2)));
1446*22dc650dSSadaf Ebrahimi else
1447*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, SLTU | RD(OTHER_FLAG) | RJ(dst) | RK(carry_src_r)));
1448*22dc650dSSadaf Ebrahimi }
1449*22dc650dSSadaf Ebrahimi
1450*22dc650dSSadaf Ebrahimi if (!is_overflow)
1451*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
1452*22dc650dSSadaf Ebrahimi
1453*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, XOR | RD(TMP_REG1) | RJ(dst) | RK(EQUAL_FLAG)));
1454*22dc650dSSadaf Ebrahimi if (op & SLJIT_SET_Z)
1455*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, INST(ADD, op) | RD(EQUAL_FLAG) | RJ(dst) | IMM_I12(0)));
1456*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, INST(SRLI, op) | RD(TMP_REG1) | RJ(TMP_REG1) | IMM_EXTEND(31)));
1457*22dc650dSSadaf Ebrahimi return push_inst(compiler, XOR | RD(OTHER_FLAG) | RJ(TMP_REG1) | RK(OTHER_FLAG));
1458*22dc650dSSadaf Ebrahimi
1459*22dc650dSSadaf Ebrahimi case SLJIT_ADDC:
1460*22dc650dSSadaf Ebrahimi carry_src_r = GET_FLAG_TYPE(op) == SLJIT_CARRY;
1461*22dc650dSSadaf Ebrahimi
1462*22dc650dSSadaf Ebrahimi if (flags & SRC2_IMM) {
1463*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, ADDI_D | RD(dst) | RJ(src1) | IMM_I12(src2)));
1464*22dc650dSSadaf Ebrahimi } else {
1465*22dc650dSSadaf Ebrahimi if (carry_src_r != 0) {
1466*22dc650dSSadaf Ebrahimi if (src1 != dst)
1467*22dc650dSSadaf Ebrahimi carry_src_r = (sljit_s32)src1;
1468*22dc650dSSadaf Ebrahimi else if (src2 != dst)
1469*22dc650dSSadaf Ebrahimi carry_src_r = (sljit_s32)src2;
1470*22dc650dSSadaf Ebrahimi else {
1471*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, ADDI_D | RD(EQUAL_FLAG) | RJ(src1) | IMM_I12(0)));
1472*22dc650dSSadaf Ebrahimi carry_src_r = EQUAL_FLAG;
1473*22dc650dSSadaf Ebrahimi }
1474*22dc650dSSadaf Ebrahimi }
1475*22dc650dSSadaf Ebrahimi
1476*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, ADD_D | RD(dst) | RJ(src1) | RK(src2)));
1477*22dc650dSSadaf Ebrahimi }
1478*22dc650dSSadaf Ebrahimi
1479*22dc650dSSadaf Ebrahimi /* Carry is zero if a + b >= a or a + b >= b, otherwise it is 1. */
1480*22dc650dSSadaf Ebrahimi if (carry_src_r != 0) {
1481*22dc650dSSadaf Ebrahimi if (flags & SRC2_IMM)
1482*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, SLTUI | RD(EQUAL_FLAG) | RJ(dst) | IMM_I12(src2)));
1483*22dc650dSSadaf Ebrahimi else
1484*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, SLTU | RD(EQUAL_FLAG) | RJ(dst) | RK(carry_src_r)));
1485*22dc650dSSadaf Ebrahimi }
1486*22dc650dSSadaf Ebrahimi
1487*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, ADD_D | RD(dst) | RJ(dst) | RK(OTHER_FLAG)));
1488*22dc650dSSadaf Ebrahimi
1489*22dc650dSSadaf Ebrahimi if (carry_src_r == 0)
1490*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
1491*22dc650dSSadaf Ebrahimi
1492*22dc650dSSadaf Ebrahimi /* Set ULESS_FLAG (dst == 0) && (OTHER_FLAG == 1). */
1493*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, SLTU | RD(OTHER_FLAG) | RJ(dst) | RK(OTHER_FLAG)));
1494*22dc650dSSadaf Ebrahimi /* Set carry flag. */
1495*22dc650dSSadaf Ebrahimi return push_inst(compiler, OR | RD(OTHER_FLAG) | RJ(OTHER_FLAG) | RK(EQUAL_FLAG));
1496*22dc650dSSadaf Ebrahimi
1497*22dc650dSSadaf Ebrahimi case SLJIT_SUB:
1498*22dc650dSSadaf Ebrahimi if ((flags & SRC2_IMM) && src2 == I12_MIN) {
1499*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, ADDI_D | RD(TMP_REG2) | RJ(TMP_ZERO) | IMM_I12(src2)));
1500*22dc650dSSadaf Ebrahimi src2 = TMP_REG2;
1501*22dc650dSSadaf Ebrahimi flags &= ~SRC2_IMM;
1502*22dc650dSSadaf Ebrahimi }
1503*22dc650dSSadaf Ebrahimi
1504*22dc650dSSadaf Ebrahimi is_handled = 0;
1505*22dc650dSSadaf Ebrahimi
1506*22dc650dSSadaf Ebrahimi if (flags & SRC2_IMM) {
1507*22dc650dSSadaf Ebrahimi if (GET_FLAG_TYPE(op) == SLJIT_LESS) {
1508*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, SLTUI | RD(OTHER_FLAG) | RJ(src1) | IMM_I12(src2)));
1509*22dc650dSSadaf Ebrahimi is_handled = 1;
1510*22dc650dSSadaf Ebrahimi } else if (GET_FLAG_TYPE(op) == SLJIT_SIG_LESS) {
1511*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, SLTI | RD(OTHER_FLAG) | RJ(src1) | IMM_I12(src2)));
1512*22dc650dSSadaf Ebrahimi is_handled = 1;
1513*22dc650dSSadaf Ebrahimi }
1514*22dc650dSSadaf Ebrahimi }
1515*22dc650dSSadaf Ebrahimi
1516*22dc650dSSadaf Ebrahimi if (!is_handled && GET_FLAG_TYPE(op) >= SLJIT_LESS && GET_FLAG_TYPE(op) <= SLJIT_SIG_LESS_EQUAL) {
1517*22dc650dSSadaf Ebrahimi is_handled = 1;
1518*22dc650dSSadaf Ebrahimi
1519*22dc650dSSadaf Ebrahimi if (flags & SRC2_IMM) {
1520*22dc650dSSadaf Ebrahimi reg = (src1 == TMP_REG1) ? TMP_REG2 : TMP_REG1;
1521*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, ADDI_D | RD(reg) | RJ(TMP_ZERO) | IMM_I12(src2)));
1522*22dc650dSSadaf Ebrahimi src2 = reg;
1523*22dc650dSSadaf Ebrahimi flags &= ~SRC2_IMM;
1524*22dc650dSSadaf Ebrahimi }
1525*22dc650dSSadaf Ebrahimi
1526*22dc650dSSadaf Ebrahimi switch (GET_FLAG_TYPE(op)) {
1527*22dc650dSSadaf Ebrahimi case SLJIT_LESS:
1528*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, SLTU | RD(OTHER_FLAG) | RJ(src1) | RK(src2)));
1529*22dc650dSSadaf Ebrahimi break;
1530*22dc650dSSadaf Ebrahimi case SLJIT_GREATER:
1531*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, SLTU | RD(OTHER_FLAG) | RJ(src2) | RK(src1)));
1532*22dc650dSSadaf Ebrahimi break;
1533*22dc650dSSadaf Ebrahimi case SLJIT_SIG_LESS:
1534*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, SLT | RD(OTHER_FLAG) | RJ(src1) | RK(src2)));
1535*22dc650dSSadaf Ebrahimi break;
1536*22dc650dSSadaf Ebrahimi case SLJIT_SIG_GREATER:
1537*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, SLT | RD(OTHER_FLAG) | RJ(src2) | RK(src1)));
1538*22dc650dSSadaf Ebrahimi break;
1539*22dc650dSSadaf Ebrahimi }
1540*22dc650dSSadaf Ebrahimi }
1541*22dc650dSSadaf Ebrahimi
1542*22dc650dSSadaf Ebrahimi if (is_handled) {
1543*22dc650dSSadaf Ebrahimi if (flags & SRC2_IMM) {
1544*22dc650dSSadaf Ebrahimi if (op & SLJIT_SET_Z)
1545*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(EQUAL_FLAG) | RJ(src1) | IMM_I12(-src2)));
1546*22dc650dSSadaf Ebrahimi if (!(flags & UNUSED_DEST))
1547*22dc650dSSadaf Ebrahimi return push_inst(compiler, INST(ADDI, op) | RD(dst) | RJ(src1) | IMM_I12(-src2));
1548*22dc650dSSadaf Ebrahimi } else {
1549*22dc650dSSadaf Ebrahimi if (op & SLJIT_SET_Z)
1550*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, INST(SUB, op) | RD(EQUAL_FLAG) | RJ(src1) | RK(src2)));
1551*22dc650dSSadaf Ebrahimi if (!(flags & UNUSED_DEST))
1552*22dc650dSSadaf Ebrahimi return push_inst(compiler, INST(SUB, op) | RD(dst) | RJ(src1) | RK(src2));
1553*22dc650dSSadaf Ebrahimi }
1554*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
1555*22dc650dSSadaf Ebrahimi }
1556*22dc650dSSadaf Ebrahimi
1557*22dc650dSSadaf Ebrahimi is_overflow = GET_FLAG_TYPE(op) == SLJIT_OVERFLOW;
1558*22dc650dSSadaf Ebrahimi is_carry = GET_FLAG_TYPE(op) == SLJIT_CARRY;
1559*22dc650dSSadaf Ebrahimi
1560*22dc650dSSadaf Ebrahimi if (flags & SRC2_IMM) {
1561*22dc650dSSadaf Ebrahimi if (is_overflow) {
1562*22dc650dSSadaf Ebrahimi if (src2 >= 0)
1563*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(EQUAL_FLAG) | RJ(src1) | IMM_I12(0)));
1564*22dc650dSSadaf Ebrahimi else {
1565*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(EQUAL_FLAG) | RJ(src1) | IMM_I12(-1)));
1566*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, XOR | RD(EQUAL_FLAG) | RJ(src1) | RK(EQUAL_FLAG)));
1567*22dc650dSSadaf Ebrahimi }
1568*22dc650dSSadaf Ebrahimi } else if (op & SLJIT_SET_Z)
1569*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(EQUAL_FLAG) | RJ(src1) | IMM_I12(-src2)));
1570*22dc650dSSadaf Ebrahimi
1571*22dc650dSSadaf Ebrahimi if (is_overflow || is_carry)
1572*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, SLTUI | RD(OTHER_FLAG) | RJ(src1) | IMM_I12(src2)));
1573*22dc650dSSadaf Ebrahimi
1574*22dc650dSSadaf Ebrahimi /* Only the zero flag is needed. */
1575*22dc650dSSadaf Ebrahimi if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK))
1576*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(dst) | RJ(src1) | IMM_I12(-src2)));
1577*22dc650dSSadaf Ebrahimi } else {
1578*22dc650dSSadaf Ebrahimi if (is_overflow)
1579*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, XOR | RD(EQUAL_FLAG) | RJ(src1) | RK(src2)));
1580*22dc650dSSadaf Ebrahimi else if (op & SLJIT_SET_Z)
1581*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, INST(SUB, op) | RD(EQUAL_FLAG) | RJ(src1) | RK(src2)));
1582*22dc650dSSadaf Ebrahimi
1583*22dc650dSSadaf Ebrahimi if (is_overflow || is_carry)
1584*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, SLTU | RD(OTHER_FLAG) | RJ(src1) | RK(src2)));
1585*22dc650dSSadaf Ebrahimi
1586*22dc650dSSadaf Ebrahimi /* Only the zero flag is needed. */
1587*22dc650dSSadaf Ebrahimi if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK))
1588*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, INST(SUB, op) | RD(dst) | RJ(src1) | RK(src2)));
1589*22dc650dSSadaf Ebrahimi }
1590*22dc650dSSadaf Ebrahimi
1591*22dc650dSSadaf Ebrahimi if (!is_overflow)
1592*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
1593*22dc650dSSadaf Ebrahimi
1594*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, XOR | RD(TMP_REG1) | RJ(dst) | RK(EQUAL_FLAG)));
1595*22dc650dSSadaf Ebrahimi if (op & SLJIT_SET_Z)
1596*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(EQUAL_FLAG) | RJ(dst) | IMM_I12(0)));
1597*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, INST(SRLI, op) | RD(TMP_REG1) | RJ(TMP_REG1) | IMM_EXTEND(31)));
1598*22dc650dSSadaf Ebrahimi return push_inst(compiler, XOR | RD(OTHER_FLAG) | RJ(TMP_REG1) | RK(OTHER_FLAG));
1599*22dc650dSSadaf Ebrahimi
1600*22dc650dSSadaf Ebrahimi case SLJIT_SUBC:
1601*22dc650dSSadaf Ebrahimi if ((flags & SRC2_IMM) && src2 == I12_MIN) {
1602*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(TMP_REG2) | RJ(TMP_ZERO) | IMM_I12(src2)));
1603*22dc650dSSadaf Ebrahimi src2 = TMP_REG2;
1604*22dc650dSSadaf Ebrahimi flags &= ~SRC2_IMM;
1605*22dc650dSSadaf Ebrahimi }
1606*22dc650dSSadaf Ebrahimi
1607*22dc650dSSadaf Ebrahimi is_carry = GET_FLAG_TYPE(op) == SLJIT_CARRY;
1608*22dc650dSSadaf Ebrahimi
1609*22dc650dSSadaf Ebrahimi if (flags & SRC2_IMM) {
1610*22dc650dSSadaf Ebrahimi if (is_carry)
1611*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, SLTUI | RD(EQUAL_FLAG) | RJ(src1) | IMM_I12(src2)));
1612*22dc650dSSadaf Ebrahimi
1613*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(dst) | RJ(src1) | IMM_I12(-src2)));
1614*22dc650dSSadaf Ebrahimi } else {
1615*22dc650dSSadaf Ebrahimi if (is_carry)
1616*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, SLTU | RD(EQUAL_FLAG) | RJ(src1) | RK(src2)));
1617*22dc650dSSadaf Ebrahimi
1618*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, INST(SUB, op) | RD(dst) | RJ(src1) | RK(src2)));
1619*22dc650dSSadaf Ebrahimi }
1620*22dc650dSSadaf Ebrahimi
1621*22dc650dSSadaf Ebrahimi if (is_carry)
1622*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, SLTU | RD(TMP_REG1) | RJ(dst) | RK(OTHER_FLAG)));
1623*22dc650dSSadaf Ebrahimi
1624*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, INST(SUB, op) | RD(dst) | RJ(dst) | RK(OTHER_FLAG)));
1625*22dc650dSSadaf Ebrahimi
1626*22dc650dSSadaf Ebrahimi if (!is_carry)
1627*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
1628*22dc650dSSadaf Ebrahimi
1629*22dc650dSSadaf Ebrahimi return push_inst(compiler, OR | RD(OTHER_FLAG) | RJ(EQUAL_FLAG) | RK(TMP_REG1));
1630*22dc650dSSadaf Ebrahimi
1631*22dc650dSSadaf Ebrahimi case SLJIT_MUL:
1632*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(!(flags & SRC2_IMM));
1633*22dc650dSSadaf Ebrahimi
1634*22dc650dSSadaf Ebrahimi if (GET_FLAG_TYPE(op) != SLJIT_OVERFLOW)
1635*22dc650dSSadaf Ebrahimi return push_inst(compiler, INST(MUL, op) | RD(dst) | RJ(src1) | RK(src2));
1636*22dc650dSSadaf Ebrahimi
1637*22dc650dSSadaf Ebrahimi if (op & SLJIT_32) {
1638*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, MUL_D | RD(OTHER_FLAG) | RJ(src1) | RK(src2)));
1639*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, MUL_W | RD(dst) | RJ(src1) | RK(src2)));
1640*22dc650dSSadaf Ebrahimi return push_inst(compiler, SUB_D | RD(OTHER_FLAG) | RJ(dst) | RK(OTHER_FLAG));
1641*22dc650dSSadaf Ebrahimi }
1642*22dc650dSSadaf Ebrahimi
1643*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, MULH_D | RD(EQUAL_FLAG) | RJ(src1) | RK(src2)));
1644*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, MUL_D | RD(dst) | RJ(src1) | RK(src2)));
1645*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, SRAI_D | RD(OTHER_FLAG) | RJ(dst) | IMM_I12((63))));
1646*22dc650dSSadaf Ebrahimi return push_inst(compiler, SUB_D | RD(OTHER_FLAG) | RJ(EQUAL_FLAG) | RK(OTHER_FLAG));
1647*22dc650dSSadaf Ebrahimi
1648*22dc650dSSadaf Ebrahimi case SLJIT_AND:
1649*22dc650dSSadaf Ebrahimi EMIT_LOGICAL(ANDI, AND);
1650*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
1651*22dc650dSSadaf Ebrahimi
1652*22dc650dSSadaf Ebrahimi case SLJIT_OR:
1653*22dc650dSSadaf Ebrahimi EMIT_LOGICAL(ORI, OR);
1654*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
1655*22dc650dSSadaf Ebrahimi
1656*22dc650dSSadaf Ebrahimi case SLJIT_XOR:
1657*22dc650dSSadaf Ebrahimi EMIT_LOGICAL(XORI, XOR);
1658*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
1659*22dc650dSSadaf Ebrahimi
1660*22dc650dSSadaf Ebrahimi case SLJIT_SHL:
1661*22dc650dSSadaf Ebrahimi case SLJIT_MSHL:
1662*22dc650dSSadaf Ebrahimi if (op & SLJIT_32) {
1663*22dc650dSSadaf Ebrahimi EMIT_SHIFT(SLLI_W, SLL_W);
1664*22dc650dSSadaf Ebrahimi } else {
1665*22dc650dSSadaf Ebrahimi EMIT_SHIFT(SLLI_D, SLL_D);
1666*22dc650dSSadaf Ebrahimi }
1667*22dc650dSSadaf Ebrahimi break;
1668*22dc650dSSadaf Ebrahimi
1669*22dc650dSSadaf Ebrahimi case SLJIT_LSHR:
1670*22dc650dSSadaf Ebrahimi case SLJIT_MLSHR:
1671*22dc650dSSadaf Ebrahimi if (op & SLJIT_32) {
1672*22dc650dSSadaf Ebrahimi EMIT_SHIFT(SRLI_W, SRL_W);
1673*22dc650dSSadaf Ebrahimi } else {
1674*22dc650dSSadaf Ebrahimi EMIT_SHIFT(SRLI_D, SRL_D);
1675*22dc650dSSadaf Ebrahimi }
1676*22dc650dSSadaf Ebrahimi break;
1677*22dc650dSSadaf Ebrahimi
1678*22dc650dSSadaf Ebrahimi case SLJIT_ASHR:
1679*22dc650dSSadaf Ebrahimi case SLJIT_MASHR:
1680*22dc650dSSadaf Ebrahimi if (op & SLJIT_32) {
1681*22dc650dSSadaf Ebrahimi EMIT_SHIFT(SRAI_W, SRA_W);
1682*22dc650dSSadaf Ebrahimi } else {
1683*22dc650dSSadaf Ebrahimi EMIT_SHIFT(SRAI_D, SRA_D);
1684*22dc650dSSadaf Ebrahimi }
1685*22dc650dSSadaf Ebrahimi break;
1686*22dc650dSSadaf Ebrahimi
1687*22dc650dSSadaf Ebrahimi case SLJIT_ROTL:
1688*22dc650dSSadaf Ebrahimi case SLJIT_ROTR:
1689*22dc650dSSadaf Ebrahimi if (flags & SRC2_IMM) {
1690*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(src2 != 0);
1691*22dc650dSSadaf Ebrahimi
1692*22dc650dSSadaf Ebrahimi if (GET_OPCODE(op) == SLJIT_ROTL)
1693*22dc650dSSadaf Ebrahimi src2 = word_size - src2;
1694*22dc650dSSadaf Ebrahimi return push_inst(compiler, INST(ROTRI, op) | RD(dst) | RJ(src1) | IMM_I12(src2));
1695*22dc650dSSadaf Ebrahimi }
1696*22dc650dSSadaf Ebrahimi
1697*22dc650dSSadaf Ebrahimi if (src2 == TMP_ZERO) {
1698*22dc650dSSadaf Ebrahimi if (dst != src1)
1699*22dc650dSSadaf Ebrahimi return push_inst(compiler, INST(ADDI, op) | RD(dst) | RJ(src1) | IMM_I12(0));
1700*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
1701*22dc650dSSadaf Ebrahimi }
1702*22dc650dSSadaf Ebrahimi
1703*22dc650dSSadaf Ebrahimi if (GET_OPCODE(op) == SLJIT_ROTL) {
1704*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, INST(SUB, op)| RD(OTHER_FLAG) | RJ(TMP_ZERO) | RK(src2)));
1705*22dc650dSSadaf Ebrahimi src2 = OTHER_FLAG;
1706*22dc650dSSadaf Ebrahimi }
1707*22dc650dSSadaf Ebrahimi return push_inst(compiler, INST(ROTR, op) | RD(dst) | RJ(src1) | RK(src2));
1708*22dc650dSSadaf Ebrahimi
1709*22dc650dSSadaf Ebrahimi default:
1710*22dc650dSSadaf Ebrahimi SLJIT_UNREACHABLE();
1711*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
1712*22dc650dSSadaf Ebrahimi }
1713*22dc650dSSadaf Ebrahimi
1714*22dc650dSSadaf Ebrahimi if (flags & SRC2_IMM) {
1715*22dc650dSSadaf Ebrahimi if (op & SLJIT_SET_Z)
1716*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, op_imm | RD(EQUAL_FLAG) | RJ(src1) | IMM_I12(src2)));
1717*22dc650dSSadaf Ebrahimi
1718*22dc650dSSadaf Ebrahimi if (flags & UNUSED_DEST)
1719*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
1720*22dc650dSSadaf Ebrahimi return push_inst(compiler, op_imm | RD(dst) | RJ(src1) | IMM_I12(src2));
1721*22dc650dSSadaf Ebrahimi }
1722*22dc650dSSadaf Ebrahimi
1723*22dc650dSSadaf Ebrahimi if (op & SLJIT_SET_Z)
1724*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, op_reg | RD(EQUAL_FLAG) | RJ(src1) | RK(src2)));
1725*22dc650dSSadaf Ebrahimi
1726*22dc650dSSadaf Ebrahimi if (flags & UNUSED_DEST)
1727*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
1728*22dc650dSSadaf Ebrahimi return push_inst(compiler, op_reg | RD(dst) | RJ(src1) | RK(src2));
1729*22dc650dSSadaf Ebrahimi }
1730*22dc650dSSadaf Ebrahimi
1731*22dc650dSSadaf Ebrahimi #undef IMM_EXTEND
1732*22dc650dSSadaf Ebrahimi
emit_op(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 flags,sljit_s32 dst,sljit_sw dstw,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)1733*22dc650dSSadaf Ebrahimi static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 flags,
1734*22dc650dSSadaf Ebrahimi sljit_s32 dst, sljit_sw dstw,
1735*22dc650dSSadaf Ebrahimi sljit_s32 src1, sljit_sw src1w,
1736*22dc650dSSadaf Ebrahimi sljit_s32 src2, sljit_sw src2w)
1737*22dc650dSSadaf Ebrahimi {
1738*22dc650dSSadaf Ebrahimi /* arg1 goes to TMP_REG1 or src reg
1739*22dc650dSSadaf Ebrahimi arg2 goes to TMP_REG2, imm or src reg
1740*22dc650dSSadaf Ebrahimi TMP_REG3 can be used for caching
1741*22dc650dSSadaf Ebrahimi result goes to TMP_REG2, so put result can use TMP_REG1 and TMP_REG3. */
1742*22dc650dSSadaf Ebrahimi sljit_s32 dst_r = TMP_REG2;
1743*22dc650dSSadaf Ebrahimi sljit_s32 src1_r;
1744*22dc650dSSadaf Ebrahimi sljit_sw src2_r = 0;
1745*22dc650dSSadaf Ebrahimi sljit_s32 src2_tmp_reg = (GET_OPCODE(op) >= SLJIT_OP2_BASE && FAST_IS_REG(src1)) ? TMP_REG1 : TMP_REG2;
1746*22dc650dSSadaf Ebrahimi
1747*22dc650dSSadaf Ebrahimi if (!(flags & ALT_KEEP_CACHE)) {
1748*22dc650dSSadaf Ebrahimi compiler->cache_arg = 0;
1749*22dc650dSSadaf Ebrahimi compiler->cache_argw = 0;
1750*22dc650dSSadaf Ebrahimi }
1751*22dc650dSSadaf Ebrahimi
1752*22dc650dSSadaf Ebrahimi if (dst == 0) {
1753*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(HAS_FLAGS(op));
1754*22dc650dSSadaf Ebrahimi flags |= UNUSED_DEST;
1755*22dc650dSSadaf Ebrahimi dst = TMP_REG2;
1756*22dc650dSSadaf Ebrahimi } else if (FAST_IS_REG(dst)) {
1757*22dc650dSSadaf Ebrahimi dst_r = dst;
1758*22dc650dSSadaf Ebrahimi flags |= REG_DEST;
1759*22dc650dSSadaf Ebrahimi if (flags & MOVE_OP)
1760*22dc650dSSadaf Ebrahimi src2_tmp_reg = dst_r;
1761*22dc650dSSadaf Ebrahimi } else if ((dst & SLJIT_MEM) && !getput_arg_fast(compiler, flags | ARG_TEST, TMP_REG1, dst, dstw))
1762*22dc650dSSadaf Ebrahimi flags |= SLOW_DEST;
1763*22dc650dSSadaf Ebrahimi
1764*22dc650dSSadaf Ebrahimi if (flags & IMM_OP) {
1765*22dc650dSSadaf Ebrahimi if (src2 == SLJIT_IMM && src2w != 0 && src2w <= I12_MAX && src2w >= I12_MIN) {
1766*22dc650dSSadaf Ebrahimi flags |= SRC2_IMM;
1767*22dc650dSSadaf Ebrahimi src2_r = src2w;
1768*22dc650dSSadaf Ebrahimi } else if ((flags & CUMULATIVE_OP) && src1 == SLJIT_IMM && src1w != 0 && src1w <= I12_MAX && src1w >= I12_MIN) {
1769*22dc650dSSadaf Ebrahimi flags |= SRC2_IMM;
1770*22dc650dSSadaf Ebrahimi src2_r = src1w;
1771*22dc650dSSadaf Ebrahimi
1772*22dc650dSSadaf Ebrahimi /* And swap arguments. */
1773*22dc650dSSadaf Ebrahimi src1 = src2;
1774*22dc650dSSadaf Ebrahimi src1w = src2w;
1775*22dc650dSSadaf Ebrahimi src2 = SLJIT_IMM;
1776*22dc650dSSadaf Ebrahimi /* src2w = src2_r unneeded. */
1777*22dc650dSSadaf Ebrahimi }
1778*22dc650dSSadaf Ebrahimi }
1779*22dc650dSSadaf Ebrahimi
1780*22dc650dSSadaf Ebrahimi /* Source 1. */
1781*22dc650dSSadaf Ebrahimi if (FAST_IS_REG(src1)) {
1782*22dc650dSSadaf Ebrahimi src1_r = src1;
1783*22dc650dSSadaf Ebrahimi flags |= REG1_SOURCE;
1784*22dc650dSSadaf Ebrahimi } else if (src1 == SLJIT_IMM) {
1785*22dc650dSSadaf Ebrahimi if (src1w) {
1786*22dc650dSSadaf Ebrahimi FAIL_IF(load_immediate(compiler, TMP_REG1, src1w));
1787*22dc650dSSadaf Ebrahimi src1_r = TMP_REG1;
1788*22dc650dSSadaf Ebrahimi }
1789*22dc650dSSadaf Ebrahimi else
1790*22dc650dSSadaf Ebrahimi src1_r = TMP_ZERO;
1791*22dc650dSSadaf Ebrahimi } else {
1792*22dc650dSSadaf Ebrahimi if (getput_arg_fast(compiler, flags | LOAD_DATA, TMP_REG1, src1, src1w))
1793*22dc650dSSadaf Ebrahimi FAIL_IF(compiler->error);
1794*22dc650dSSadaf Ebrahimi else
1795*22dc650dSSadaf Ebrahimi flags |= SLOW_SRC1;
1796*22dc650dSSadaf Ebrahimi src1_r = TMP_REG1;
1797*22dc650dSSadaf Ebrahimi }
1798*22dc650dSSadaf Ebrahimi
1799*22dc650dSSadaf Ebrahimi /* Source 2. */
1800*22dc650dSSadaf Ebrahimi if (FAST_IS_REG(src2)) {
1801*22dc650dSSadaf Ebrahimi src2_r = src2;
1802*22dc650dSSadaf Ebrahimi flags |= REG2_SOURCE;
1803*22dc650dSSadaf Ebrahimi if ((flags & (REG_DEST | MOVE_OP)) == MOVE_OP)
1804*22dc650dSSadaf Ebrahimi dst_r = (sljit_s32)src2_r;
1805*22dc650dSSadaf Ebrahimi } else if (src2 == SLJIT_IMM) {
1806*22dc650dSSadaf Ebrahimi if (!(flags & SRC2_IMM)) {
1807*22dc650dSSadaf Ebrahimi if (src2w) {
1808*22dc650dSSadaf Ebrahimi FAIL_IF(load_immediate(compiler, src2_tmp_reg, src2w));
1809*22dc650dSSadaf Ebrahimi src2_r = src2_tmp_reg;
1810*22dc650dSSadaf Ebrahimi } else {
1811*22dc650dSSadaf Ebrahimi src2_r = TMP_ZERO;
1812*22dc650dSSadaf Ebrahimi if (flags & MOVE_OP) {
1813*22dc650dSSadaf Ebrahimi if (dst & SLJIT_MEM)
1814*22dc650dSSadaf Ebrahimi dst_r = 0;
1815*22dc650dSSadaf Ebrahimi else
1816*22dc650dSSadaf Ebrahimi op = SLJIT_MOV;
1817*22dc650dSSadaf Ebrahimi }
1818*22dc650dSSadaf Ebrahimi }
1819*22dc650dSSadaf Ebrahimi }
1820*22dc650dSSadaf Ebrahimi } else {
1821*22dc650dSSadaf Ebrahimi if (getput_arg_fast(compiler, flags | LOAD_DATA, src2_tmp_reg, src2, src2w))
1822*22dc650dSSadaf Ebrahimi FAIL_IF(compiler->error);
1823*22dc650dSSadaf Ebrahimi else
1824*22dc650dSSadaf Ebrahimi flags |= SLOW_SRC2;
1825*22dc650dSSadaf Ebrahimi
1826*22dc650dSSadaf Ebrahimi src2_r = src2_tmp_reg;
1827*22dc650dSSadaf Ebrahimi }
1828*22dc650dSSadaf Ebrahimi
1829*22dc650dSSadaf Ebrahimi if ((flags & (SLOW_SRC1 | SLOW_SRC2)) == (SLOW_SRC1 | SLOW_SRC2)) {
1830*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(src2_r == TMP_REG2);
1831*22dc650dSSadaf Ebrahimi if ((flags & SLOW_DEST) && !can_cache(src2, src2w, src1, src1w) && can_cache(src2, src2w, dst, dstw)) {
1832*22dc650dSSadaf Ebrahimi FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG1, src1, src1w, src2, src2w));
1833*22dc650dSSadaf Ebrahimi FAIL_IF(getput_arg(compiler, flags | LOAD_DATA | MEM_USE_TMP2, TMP_REG2, src2, src2w, dst, dstw));
1834*22dc650dSSadaf Ebrahimi } else {
1835*22dc650dSSadaf Ebrahimi FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG2, src2, src2w, src1, src1w));
1836*22dc650dSSadaf Ebrahimi FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG1, src1, src1w, dst, dstw));
1837*22dc650dSSadaf Ebrahimi }
1838*22dc650dSSadaf Ebrahimi }
1839*22dc650dSSadaf Ebrahimi else if (flags & SLOW_SRC1)
1840*22dc650dSSadaf Ebrahimi FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG1, src1, src1w, dst, dstw));
1841*22dc650dSSadaf Ebrahimi else if (flags & SLOW_SRC2)
1842*22dc650dSSadaf Ebrahimi FAIL_IF(getput_arg(compiler, flags | LOAD_DATA | ((src1_r == TMP_REG1) ? MEM_USE_TMP2 : 0), src2_tmp_reg, src2, src2w, dst, dstw));
1843*22dc650dSSadaf Ebrahimi
1844*22dc650dSSadaf Ebrahimi FAIL_IF(emit_single_op(compiler, op, flags, dst_r, src1_r, src2_r));
1845*22dc650dSSadaf Ebrahimi
1846*22dc650dSSadaf Ebrahimi if (dst & SLJIT_MEM) {
1847*22dc650dSSadaf Ebrahimi if (!(flags & SLOW_DEST)) {
1848*22dc650dSSadaf Ebrahimi getput_arg_fast(compiler, flags, dst_r, dst, dstw);
1849*22dc650dSSadaf Ebrahimi return compiler->error;
1850*22dc650dSSadaf Ebrahimi }
1851*22dc650dSSadaf Ebrahimi return getput_arg(compiler, flags, dst_r, dst, dstw, 0, 0);
1852*22dc650dSSadaf Ebrahimi }
1853*22dc650dSSadaf Ebrahimi
1854*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
1855*22dc650dSSadaf Ebrahimi }
1856*22dc650dSSadaf Ebrahimi
sljit_emit_op0(struct sljit_compiler * compiler,sljit_s32 op)1857*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op)
1858*22dc650dSSadaf Ebrahimi {
1859*22dc650dSSadaf Ebrahimi CHECK_ERROR();
1860*22dc650dSSadaf Ebrahimi CHECK(check_sljit_emit_op0(compiler, op));
1861*22dc650dSSadaf Ebrahimi
1862*22dc650dSSadaf Ebrahimi switch (GET_OPCODE(op)) {
1863*22dc650dSSadaf Ebrahimi case SLJIT_BREAKPOINT:
1864*22dc650dSSadaf Ebrahimi return push_inst(compiler, BREAK);
1865*22dc650dSSadaf Ebrahimi case SLJIT_NOP:
1866*22dc650dSSadaf Ebrahimi return push_inst(compiler, ANDI | RD(TMP_ZERO) | RJ(TMP_ZERO) | IMM_I12(0));
1867*22dc650dSSadaf Ebrahimi case SLJIT_LMUL_UW:
1868*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, ADDI_D | RD(TMP_REG1) | RJ(SLJIT_R1) | IMM_I12(0)));
1869*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, MULH_DU | RD(SLJIT_R1) | RJ(SLJIT_R0) | RK(SLJIT_R1)));
1870*22dc650dSSadaf Ebrahimi return push_inst(compiler, MUL_D | RD(SLJIT_R0) | RJ(SLJIT_R0) | RK(TMP_REG1));
1871*22dc650dSSadaf Ebrahimi case SLJIT_LMUL_SW:
1872*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, ADDI_D | RD(TMP_REG1) | RJ(SLJIT_R1) | IMM_I12(0)));
1873*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, MULH_D | RD(SLJIT_R1) | RJ(SLJIT_R0) | RK(SLJIT_R1)));
1874*22dc650dSSadaf Ebrahimi return push_inst(compiler, MUL_D | RD(SLJIT_R0) | RJ(SLJIT_R0) | RK(TMP_REG1));
1875*22dc650dSSadaf Ebrahimi case SLJIT_DIVMOD_UW:
1876*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(TMP_REG1) | RJ(SLJIT_R0) | IMM_I12(0)));
1877*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, ((op & SLJIT_32)? DIV_WU: DIV_DU) | RD(SLJIT_R0) | RJ(SLJIT_R0) | RK(SLJIT_R1)));
1878*22dc650dSSadaf Ebrahimi return push_inst(compiler, ((op & SLJIT_32)? MOD_WU: MOD_DU) | RD(SLJIT_R1) | RJ(TMP_REG1) | RK(SLJIT_R1));
1879*22dc650dSSadaf Ebrahimi case SLJIT_DIVMOD_SW:
1880*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(TMP_REG1) | RJ(SLJIT_R0) | IMM_I12(0)));
1881*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, INST(DIV, op) | RD(SLJIT_R0) | RJ(SLJIT_R0) | RK(SLJIT_R1)));
1882*22dc650dSSadaf Ebrahimi return push_inst(compiler, INST(MOD, op) | RD(SLJIT_R1) | RJ(TMP_REG1) | RK(SLJIT_R1));
1883*22dc650dSSadaf Ebrahimi case SLJIT_DIV_UW:
1884*22dc650dSSadaf Ebrahimi return push_inst(compiler, ((op & SLJIT_32)? DIV_WU: DIV_DU) | RD(SLJIT_R0) | RJ(SLJIT_R0) | RK(SLJIT_R1));
1885*22dc650dSSadaf Ebrahimi case SLJIT_DIV_SW:
1886*22dc650dSSadaf Ebrahimi return push_inst(compiler, INST(DIV, op) | RD(SLJIT_R0) | RJ(SLJIT_R0) | RK(SLJIT_R1));
1887*22dc650dSSadaf Ebrahimi case SLJIT_ENDBR:
1888*22dc650dSSadaf Ebrahimi case SLJIT_SKIP_FRAMES_BEFORE_RETURN:
1889*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
1890*22dc650dSSadaf Ebrahimi }
1891*22dc650dSSadaf Ebrahimi
1892*22dc650dSSadaf Ebrahimi SLJIT_UNREACHABLE();
1893*22dc650dSSadaf Ebrahimi return SLJIT_ERR_UNSUPPORTED;
1894*22dc650dSSadaf Ebrahimi }
1895*22dc650dSSadaf Ebrahimi
sljit_emit_op1(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)1896*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op,
1897*22dc650dSSadaf Ebrahimi sljit_s32 dst, sljit_sw dstw,
1898*22dc650dSSadaf Ebrahimi sljit_s32 src, sljit_sw srcw)
1899*22dc650dSSadaf Ebrahimi {
1900*22dc650dSSadaf Ebrahimi sljit_s32 flags = 0;
1901*22dc650dSSadaf Ebrahimi
1902*22dc650dSSadaf Ebrahimi CHECK_ERROR();
1903*22dc650dSSadaf Ebrahimi CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw));
1904*22dc650dSSadaf Ebrahimi ADJUST_LOCAL_OFFSET(dst, dstw);
1905*22dc650dSSadaf Ebrahimi ADJUST_LOCAL_OFFSET(src, srcw);
1906*22dc650dSSadaf Ebrahimi
1907*22dc650dSSadaf Ebrahimi if (op & SLJIT_32)
1908*22dc650dSSadaf Ebrahimi flags = INT_DATA | SIGNED_DATA;
1909*22dc650dSSadaf Ebrahimi
1910*22dc650dSSadaf Ebrahimi switch (GET_OPCODE(op)) {
1911*22dc650dSSadaf Ebrahimi case SLJIT_MOV:
1912*22dc650dSSadaf Ebrahimi case SLJIT_MOV_P:
1913*22dc650dSSadaf Ebrahimi return emit_op(compiler, SLJIT_MOV, WORD_DATA | MOVE_OP, dst, dstw, TMP_ZERO, 0, src, srcw);
1914*22dc650dSSadaf Ebrahimi
1915*22dc650dSSadaf Ebrahimi case SLJIT_MOV_U32:
1916*22dc650dSSadaf Ebrahimi return emit_op(compiler, SLJIT_MOV_U32, INT_DATA | MOVE_OP, dst, dstw, TMP_ZERO, 0, src, (src == SLJIT_IMM) ? (sljit_u32)srcw : srcw);
1917*22dc650dSSadaf Ebrahimi
1918*22dc650dSSadaf Ebrahimi case SLJIT_MOV_S32:
1919*22dc650dSSadaf Ebrahimi /* Logical operators have no W variant, so sign extended input is necessary for them. */
1920*22dc650dSSadaf Ebrahimi case SLJIT_MOV32:
1921*22dc650dSSadaf Ebrahimi return emit_op(compiler, SLJIT_MOV_S32, INT_DATA | SIGNED_DATA | MOVE_OP, dst, dstw, TMP_ZERO, 0, src, (src == SLJIT_IMM) ? (sljit_s32)srcw : srcw);
1922*22dc650dSSadaf Ebrahimi
1923*22dc650dSSadaf Ebrahimi case SLJIT_MOV_U8:
1924*22dc650dSSadaf Ebrahimi return emit_op(compiler, op, BYTE_DATA | MOVE_OP, dst, dstw, TMP_ZERO, 0, src, (src == SLJIT_IMM) ? (sljit_u8)srcw : srcw);
1925*22dc650dSSadaf Ebrahimi
1926*22dc650dSSadaf Ebrahimi case SLJIT_MOV_S8:
1927*22dc650dSSadaf Ebrahimi return emit_op(compiler, op, BYTE_DATA | SIGNED_DATA | MOVE_OP, dst, dstw, TMP_ZERO, 0, src, (src == SLJIT_IMM) ? (sljit_s8)srcw : srcw);
1928*22dc650dSSadaf Ebrahimi
1929*22dc650dSSadaf Ebrahimi case SLJIT_MOV_U16:
1930*22dc650dSSadaf Ebrahimi return emit_op(compiler, op, HALF_DATA | MOVE_OP, dst, dstw, TMP_ZERO, 0, src, (src == SLJIT_IMM) ? (sljit_u16)srcw : srcw);
1931*22dc650dSSadaf Ebrahimi
1932*22dc650dSSadaf Ebrahimi case SLJIT_MOV_S16:
1933*22dc650dSSadaf Ebrahimi return emit_op(compiler, op, HALF_DATA | SIGNED_DATA | MOVE_OP, dst, dstw, TMP_ZERO, 0, src, (src == SLJIT_IMM) ? (sljit_s16)srcw : srcw);
1934*22dc650dSSadaf Ebrahimi
1935*22dc650dSSadaf Ebrahimi case SLJIT_CLZ:
1936*22dc650dSSadaf Ebrahimi case SLJIT_CTZ:
1937*22dc650dSSadaf Ebrahimi case SLJIT_REV:
1938*22dc650dSSadaf Ebrahimi return emit_op(compiler, op, flags, dst, dstw, TMP_ZERO, 0, src, srcw);
1939*22dc650dSSadaf Ebrahimi
1940*22dc650dSSadaf Ebrahimi case SLJIT_REV_U16:
1941*22dc650dSSadaf Ebrahimi case SLJIT_REV_S16:
1942*22dc650dSSadaf Ebrahimi return emit_op(compiler, op, HALF_DATA, dst, dstw, TMP_ZERO, 0, src, srcw);
1943*22dc650dSSadaf Ebrahimi
1944*22dc650dSSadaf Ebrahimi case SLJIT_REV_U32:
1945*22dc650dSSadaf Ebrahimi case SLJIT_REV_S32:
1946*22dc650dSSadaf Ebrahimi return emit_op(compiler, op | SLJIT_32, INT_DATA, dst, dstw, TMP_ZERO, 0, src, srcw);
1947*22dc650dSSadaf Ebrahimi }
1948*22dc650dSSadaf Ebrahimi
1949*22dc650dSSadaf Ebrahimi SLJIT_UNREACHABLE();
1950*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
1951*22dc650dSSadaf Ebrahimi }
1952*22dc650dSSadaf Ebrahimi
sljit_emit_op2(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)1953*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op,
1954*22dc650dSSadaf Ebrahimi sljit_s32 dst, sljit_sw dstw,
1955*22dc650dSSadaf Ebrahimi sljit_s32 src1, sljit_sw src1w,
1956*22dc650dSSadaf Ebrahimi sljit_s32 src2, sljit_sw src2w)
1957*22dc650dSSadaf Ebrahimi {
1958*22dc650dSSadaf Ebrahimi sljit_s32 flags = 0;
1959*22dc650dSSadaf Ebrahimi
1960*22dc650dSSadaf Ebrahimi CHECK_ERROR();
1961*22dc650dSSadaf Ebrahimi CHECK(check_sljit_emit_op2(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w));
1962*22dc650dSSadaf Ebrahimi ADJUST_LOCAL_OFFSET(dst, dstw);
1963*22dc650dSSadaf Ebrahimi ADJUST_LOCAL_OFFSET(src1, src1w);
1964*22dc650dSSadaf Ebrahimi ADJUST_LOCAL_OFFSET(src2, src2w);
1965*22dc650dSSadaf Ebrahimi
1966*22dc650dSSadaf Ebrahimi if (op & SLJIT_32) {
1967*22dc650dSSadaf Ebrahimi flags |= INT_DATA | SIGNED_DATA;
1968*22dc650dSSadaf Ebrahimi if (src1 == SLJIT_IMM)
1969*22dc650dSSadaf Ebrahimi src1w = (sljit_s32)src1w;
1970*22dc650dSSadaf Ebrahimi if (src2 == SLJIT_IMM)
1971*22dc650dSSadaf Ebrahimi src2w = (sljit_s32)src2w;
1972*22dc650dSSadaf Ebrahimi }
1973*22dc650dSSadaf Ebrahimi
1974*22dc650dSSadaf Ebrahimi
1975*22dc650dSSadaf Ebrahimi switch (GET_OPCODE(op)) {
1976*22dc650dSSadaf Ebrahimi case SLJIT_ADD:
1977*22dc650dSSadaf Ebrahimi case SLJIT_ADDC:
1978*22dc650dSSadaf Ebrahimi compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD;
1979*22dc650dSSadaf Ebrahimi return emit_op(compiler, op, flags | CUMULATIVE_OP | IMM_OP, dst, dstw, src1, src1w, src2, src2w);
1980*22dc650dSSadaf Ebrahimi
1981*22dc650dSSadaf Ebrahimi case SLJIT_SUB:
1982*22dc650dSSadaf Ebrahimi case SLJIT_SUBC:
1983*22dc650dSSadaf Ebrahimi compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB;
1984*22dc650dSSadaf Ebrahimi return emit_op(compiler, op, flags | IMM_OP, dst, dstw, src1, src1w, src2, src2w);
1985*22dc650dSSadaf Ebrahimi
1986*22dc650dSSadaf Ebrahimi case SLJIT_MUL:
1987*22dc650dSSadaf Ebrahimi compiler->status_flags_state = 0;
1988*22dc650dSSadaf Ebrahimi return emit_op(compiler, op, flags | CUMULATIVE_OP, dst, dstw, src1, src1w, src2, src2w);
1989*22dc650dSSadaf Ebrahimi
1990*22dc650dSSadaf Ebrahimi case SLJIT_AND:
1991*22dc650dSSadaf Ebrahimi case SLJIT_OR:
1992*22dc650dSSadaf Ebrahimi case SLJIT_XOR:
1993*22dc650dSSadaf Ebrahimi return emit_op(compiler, op, flags | CUMULATIVE_OP | IMM_OP, dst, dstw, src1, src1w, src2, src2w);
1994*22dc650dSSadaf Ebrahimi
1995*22dc650dSSadaf Ebrahimi case SLJIT_SHL:
1996*22dc650dSSadaf Ebrahimi case SLJIT_MSHL:
1997*22dc650dSSadaf Ebrahimi case SLJIT_LSHR:
1998*22dc650dSSadaf Ebrahimi case SLJIT_MLSHR:
1999*22dc650dSSadaf Ebrahimi case SLJIT_ASHR:
2000*22dc650dSSadaf Ebrahimi case SLJIT_MASHR:
2001*22dc650dSSadaf Ebrahimi case SLJIT_ROTL:
2002*22dc650dSSadaf Ebrahimi case SLJIT_ROTR:
2003*22dc650dSSadaf Ebrahimi if (src2 == SLJIT_IMM) {
2004*22dc650dSSadaf Ebrahimi if (op & SLJIT_32)
2005*22dc650dSSadaf Ebrahimi src2w &= 0x1f;
2006*22dc650dSSadaf Ebrahimi else
2007*22dc650dSSadaf Ebrahimi src2w &= 0x3f;
2008*22dc650dSSadaf Ebrahimi }
2009*22dc650dSSadaf Ebrahimi
2010*22dc650dSSadaf Ebrahimi return emit_op(compiler, op, flags | IMM_OP, dst, dstw, src1, src1w, src2, src2w);
2011*22dc650dSSadaf Ebrahimi }
2012*22dc650dSSadaf Ebrahimi
2013*22dc650dSSadaf Ebrahimi SLJIT_UNREACHABLE();
2014*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
2015*22dc650dSSadaf Ebrahimi }
2016*22dc650dSSadaf Ebrahimi
sljit_emit_op2u(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2017*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compiler, sljit_s32 op,
2018*22dc650dSSadaf Ebrahimi sljit_s32 src1, sljit_sw src1w,
2019*22dc650dSSadaf Ebrahimi sljit_s32 src2, sljit_sw src2w)
2020*22dc650dSSadaf Ebrahimi {
2021*22dc650dSSadaf Ebrahimi CHECK_ERROR();
2022*22dc650dSSadaf Ebrahimi CHECK(check_sljit_emit_op2(compiler, op, 1, 0, 0, src1, src1w, src2, src2w));
2023*22dc650dSSadaf Ebrahimi
2024*22dc650dSSadaf Ebrahimi SLJIT_SKIP_CHECKS(compiler);
2025*22dc650dSSadaf Ebrahimi return sljit_emit_op2(compiler, op, 0, 0, src1, src1w, src2, src2w);
2026*22dc650dSSadaf Ebrahimi }
2027*22dc650dSSadaf Ebrahimi
sljit_emit_op2r(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst_reg,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2028*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2r(struct sljit_compiler *compiler, sljit_s32 op,
2029*22dc650dSSadaf Ebrahimi sljit_s32 dst_reg,
2030*22dc650dSSadaf Ebrahimi sljit_s32 src1, sljit_sw src1w,
2031*22dc650dSSadaf Ebrahimi sljit_s32 src2, sljit_sw src2w)
2032*22dc650dSSadaf Ebrahimi {
2033*22dc650dSSadaf Ebrahimi CHECK_ERROR();
2034*22dc650dSSadaf Ebrahimi CHECK(check_sljit_emit_op2r(compiler, op, dst_reg, src1, src1w, src2, src2w));
2035*22dc650dSSadaf Ebrahimi
2036*22dc650dSSadaf Ebrahimi switch (GET_OPCODE(op)) {
2037*22dc650dSSadaf Ebrahimi case SLJIT_MULADD:
2038*22dc650dSSadaf Ebrahimi SLJIT_SKIP_CHECKS(compiler);
2039*22dc650dSSadaf Ebrahimi FAIL_IF(sljit_emit_op2(compiler, SLJIT_MUL | (op & SLJIT_32), TMP_REG2, 0, src1, src1w, src2, src2w));
2040*22dc650dSSadaf Ebrahimi return push_inst(compiler, ADD_D | RD(dst_reg) | RJ(dst_reg) | RK(TMP_REG2));
2041*22dc650dSSadaf Ebrahimi }
2042*22dc650dSSadaf Ebrahimi
2043*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
2044*22dc650dSSadaf Ebrahimi }
2045*22dc650dSSadaf Ebrahimi
sljit_emit_shift_into(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst_reg,sljit_s32 src1_reg,sljit_s32 src2_reg,sljit_s32 src3,sljit_sw src3w)2046*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler *compiler, sljit_s32 op,
2047*22dc650dSSadaf Ebrahimi sljit_s32 dst_reg,
2048*22dc650dSSadaf Ebrahimi sljit_s32 src1_reg,
2049*22dc650dSSadaf Ebrahimi sljit_s32 src2_reg,
2050*22dc650dSSadaf Ebrahimi sljit_s32 src3, sljit_sw src3w)
2051*22dc650dSSadaf Ebrahimi {
2052*22dc650dSSadaf Ebrahimi sljit_s32 is_left;
2053*22dc650dSSadaf Ebrahimi sljit_ins ins1, ins2, ins3;
2054*22dc650dSSadaf Ebrahimi sljit_s32 inp_flags = ((op & SLJIT_32) ? INT_DATA : WORD_DATA) | LOAD_DATA;
2055*22dc650dSSadaf Ebrahimi sljit_sw bit_length = (op & SLJIT_32) ? 32 : 64;
2056*22dc650dSSadaf Ebrahimi
2057*22dc650dSSadaf Ebrahimi
2058*22dc650dSSadaf Ebrahimi CHECK_ERROR();
2059*22dc650dSSadaf Ebrahimi CHECK(check_sljit_emit_shift_into(compiler, op, dst_reg, src1_reg, src2_reg, src3, src3w));
2060*22dc650dSSadaf Ebrahimi
2061*22dc650dSSadaf Ebrahimi is_left = (GET_OPCODE(op) == SLJIT_SHL || GET_OPCODE(op) == SLJIT_MSHL);
2062*22dc650dSSadaf Ebrahimi
2063*22dc650dSSadaf Ebrahimi if (src1_reg == src2_reg) {
2064*22dc650dSSadaf Ebrahimi SLJIT_SKIP_CHECKS(compiler);
2065*22dc650dSSadaf Ebrahimi return sljit_emit_op2(compiler, (is_left ? SLJIT_ROTL : SLJIT_ROTR) | (op & SLJIT_32), dst_reg, 0, src1_reg, 0, src3, src3w);
2066*22dc650dSSadaf Ebrahimi }
2067*22dc650dSSadaf Ebrahimi
2068*22dc650dSSadaf Ebrahimi ADJUST_LOCAL_OFFSET(src3, src3w);
2069*22dc650dSSadaf Ebrahimi
2070*22dc650dSSadaf Ebrahimi if (src3 == SLJIT_IMM) {
2071*22dc650dSSadaf Ebrahimi src3w &= bit_length - 1;
2072*22dc650dSSadaf Ebrahimi
2073*22dc650dSSadaf Ebrahimi if (src3w == 0)
2074*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
2075*22dc650dSSadaf Ebrahimi
2076*22dc650dSSadaf Ebrahimi if (is_left) {
2077*22dc650dSSadaf Ebrahimi ins1 = INST(SLLI, op) | IMM_I12(src3w);
2078*22dc650dSSadaf Ebrahimi src3w = bit_length - src3w;
2079*22dc650dSSadaf Ebrahimi ins2 = INST(SRLI, op) | IMM_I12(src3w);
2080*22dc650dSSadaf Ebrahimi } else {
2081*22dc650dSSadaf Ebrahimi ins1 = INST(SRLI, op) | IMM_I12(src3w);
2082*22dc650dSSadaf Ebrahimi src3w = bit_length - src3w;
2083*22dc650dSSadaf Ebrahimi ins2 = INST(SLLI, op) | IMM_I12(src3w);
2084*22dc650dSSadaf Ebrahimi }
2085*22dc650dSSadaf Ebrahimi
2086*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, ins1 | RD(dst_reg) | RJ(src1_reg)));
2087*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, ins2 | RD(TMP_REG1) | RJ(src2_reg)));
2088*22dc650dSSadaf Ebrahimi return push_inst(compiler, OR | RD(dst_reg) | RJ(dst_reg) | RK(TMP_REG1));
2089*22dc650dSSadaf Ebrahimi }
2090*22dc650dSSadaf Ebrahimi
2091*22dc650dSSadaf Ebrahimi if (src3 & SLJIT_MEM) {
2092*22dc650dSSadaf Ebrahimi FAIL_IF(emit_op_mem(compiler, inp_flags, TMP_REG2, src3, src3w));
2093*22dc650dSSadaf Ebrahimi src3 = TMP_REG2;
2094*22dc650dSSadaf Ebrahimi } else if (dst_reg == src3) {
2095*22dc650dSSadaf Ebrahimi push_inst(compiler, INST(ADDI, op) | RD(TMP_REG2) | RJ(src3) | IMM_I12(0));
2096*22dc650dSSadaf Ebrahimi src3 = TMP_REG2;
2097*22dc650dSSadaf Ebrahimi }
2098*22dc650dSSadaf Ebrahimi
2099*22dc650dSSadaf Ebrahimi if (is_left) {
2100*22dc650dSSadaf Ebrahimi ins1 = INST(SLL, op);
2101*22dc650dSSadaf Ebrahimi ins2 = INST(SRLI, op);
2102*22dc650dSSadaf Ebrahimi ins3 = INST(SRL, op);
2103*22dc650dSSadaf Ebrahimi } else {
2104*22dc650dSSadaf Ebrahimi ins1 = INST(SRL, op);
2105*22dc650dSSadaf Ebrahimi ins2 = INST(SLLI, op);
2106*22dc650dSSadaf Ebrahimi ins3 = INST(SLL, op);
2107*22dc650dSSadaf Ebrahimi }
2108*22dc650dSSadaf Ebrahimi
2109*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, ins1 | RD(dst_reg) | RJ(src1_reg) | RK(src3)));
2110*22dc650dSSadaf Ebrahimi
2111*22dc650dSSadaf Ebrahimi if (!(op & SLJIT_SHIFT_INTO_NON_ZERO)) {
2112*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, ins2 | RD(TMP_REG1) | RJ(src2_reg) | IMM_I12(1)));
2113*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, XORI | RD(TMP_REG2) | RJ(src3) | IMM_I12((sljit_ins)bit_length - 1)));
2114*22dc650dSSadaf Ebrahimi src2_reg = TMP_REG1;
2115*22dc650dSSadaf Ebrahimi } else
2116*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, INST(SUB, op) | RD(TMP_REG2) | RJ(TMP_ZERO) | RK(src3)));
2117*22dc650dSSadaf Ebrahimi
2118*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, ins3 | RD(TMP_REG1) | RJ(src2_reg) | RK(TMP_REG2)));
2119*22dc650dSSadaf Ebrahimi return push_inst(compiler, OR | RD(dst_reg) | RJ(dst_reg) | RK(TMP_REG1));
2120*22dc650dSSadaf Ebrahimi }
2121*22dc650dSSadaf Ebrahimi
sljit_emit_op_src(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 src,sljit_sw srcw)2122*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op,
2123*22dc650dSSadaf Ebrahimi sljit_s32 src, sljit_sw srcw)
2124*22dc650dSSadaf Ebrahimi {
2125*22dc650dSSadaf Ebrahimi sljit_s32 base = src & REG_MASK;
2126*22dc650dSSadaf Ebrahimi
2127*22dc650dSSadaf Ebrahimi CHECK_ERROR();
2128*22dc650dSSadaf Ebrahimi CHECK(check_sljit_emit_op_src(compiler, op, src, srcw));
2129*22dc650dSSadaf Ebrahimi ADJUST_LOCAL_OFFSET(src, srcw);
2130*22dc650dSSadaf Ebrahimi
2131*22dc650dSSadaf Ebrahimi switch (op) {
2132*22dc650dSSadaf Ebrahimi case SLJIT_FAST_RETURN:
2133*22dc650dSSadaf Ebrahimi if (FAST_IS_REG(src))
2134*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, ADDI_D | RD(RETURN_ADDR_REG) | RJ(src) | IMM_I12(0)));
2135*22dc650dSSadaf Ebrahimi else
2136*22dc650dSSadaf Ebrahimi FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, RETURN_ADDR_REG, src, srcw));
2137*22dc650dSSadaf Ebrahimi
2138*22dc650dSSadaf Ebrahimi return push_inst(compiler, JIRL | RD(TMP_ZERO) | RJ(RETURN_ADDR_REG) | IMM_I12(0));
2139*22dc650dSSadaf Ebrahimi case SLJIT_SKIP_FRAMES_BEFORE_FAST_RETURN:
2140*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
2141*22dc650dSSadaf Ebrahimi case SLJIT_PREFETCH_L1:
2142*22dc650dSSadaf Ebrahimi case SLJIT_PREFETCH_L2:
2143*22dc650dSSadaf Ebrahimi case SLJIT_PREFETCH_L3:
2144*22dc650dSSadaf Ebrahimi case SLJIT_PREFETCH_ONCE:
2145*22dc650dSSadaf Ebrahimi if (SLJIT_UNLIKELY(src & OFFS_REG_MASK)) {
2146*22dc650dSSadaf Ebrahimi srcw &= 0x3;
2147*22dc650dSSadaf Ebrahimi if (SLJIT_UNLIKELY(srcw))
2148*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, SLLI_D | RD(TMP_REG1) | RJ(OFFS_REG(src)) | IMM_I12(srcw)));
2149*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, ADD_D | RD(TMP_REG1) | RJ(base) | RK(TMP_REG1)));
2150*22dc650dSSadaf Ebrahimi } else {
2151*22dc650dSSadaf Ebrahimi if (base && srcw <= I12_MAX && srcw >= I12_MIN)
2152*22dc650dSSadaf Ebrahimi return push_inst(compiler,PRELD | RJ(base) | IMM_I12(srcw));
2153*22dc650dSSadaf Ebrahimi
2154*22dc650dSSadaf Ebrahimi FAIL_IF(load_immediate(compiler, TMP_REG1, srcw));
2155*22dc650dSSadaf Ebrahimi if (base != 0)
2156*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, ADD_D | RD(TMP_REG1) | RJ(base) | RK(TMP_REG1)));
2157*22dc650dSSadaf Ebrahimi }
2158*22dc650dSSadaf Ebrahimi return push_inst(compiler, PRELD | RD(0) | RJ(TMP_REG1));
2159*22dc650dSSadaf Ebrahimi }
2160*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
2161*22dc650dSSadaf Ebrahimi }
2162*22dc650dSSadaf Ebrahimi
sljit_emit_op_dst(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw)2163*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_dst(struct sljit_compiler *compiler, sljit_s32 op,
2164*22dc650dSSadaf Ebrahimi sljit_s32 dst, sljit_sw dstw)
2165*22dc650dSSadaf Ebrahimi {
2166*22dc650dSSadaf Ebrahimi sljit_s32 dst_r;
2167*22dc650dSSadaf Ebrahimi
2168*22dc650dSSadaf Ebrahimi CHECK_ERROR();
2169*22dc650dSSadaf Ebrahimi CHECK(check_sljit_emit_op_dst(compiler, op, dst, dstw));
2170*22dc650dSSadaf Ebrahimi ADJUST_LOCAL_OFFSET(dst, dstw);
2171*22dc650dSSadaf Ebrahimi
2172*22dc650dSSadaf Ebrahimi switch (op) {
2173*22dc650dSSadaf Ebrahimi case SLJIT_FAST_ENTER:
2174*22dc650dSSadaf Ebrahimi if (FAST_IS_REG(dst))
2175*22dc650dSSadaf Ebrahimi return push_inst(compiler, ADDI_D | RD(dst) | RJ(RETURN_ADDR_REG) | IMM_I12(0));
2176*22dc650dSSadaf Ebrahimi
2177*22dc650dSSadaf Ebrahimi SLJIT_ASSERT(RETURN_ADDR_REG == TMP_REG2);
2178*22dc650dSSadaf Ebrahimi break;
2179*22dc650dSSadaf Ebrahimi case SLJIT_GET_RETURN_ADDRESS:
2180*22dc650dSSadaf Ebrahimi dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;
2181*22dc650dSSadaf Ebrahimi FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, dst_r, SLJIT_MEM1(SLJIT_SP), compiler->local_size - SSIZE_OF(sw)));
2182*22dc650dSSadaf Ebrahimi break;
2183*22dc650dSSadaf Ebrahimi }
2184*22dc650dSSadaf Ebrahimi
2185*22dc650dSSadaf Ebrahimi if (dst & SLJIT_MEM)
2186*22dc650dSSadaf Ebrahimi return emit_op_mem(compiler, WORD_DATA, TMP_REG2, dst, dstw);
2187*22dc650dSSadaf Ebrahimi
2188*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
2189*22dc650dSSadaf Ebrahimi }
2190*22dc650dSSadaf Ebrahimi
sljit_get_register_index(sljit_s32 type,sljit_s32 reg)2191*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 type, sljit_s32 reg)
2192*22dc650dSSadaf Ebrahimi {
2193*22dc650dSSadaf Ebrahimi CHECK_REG_INDEX(check_sljit_get_register_index(type, reg));
2194*22dc650dSSadaf Ebrahimi
2195*22dc650dSSadaf Ebrahimi if (type == SLJIT_GP_REGISTER)
2196*22dc650dSSadaf Ebrahimi return reg_map[reg];
2197*22dc650dSSadaf Ebrahimi
2198*22dc650dSSadaf Ebrahimi if (type != SLJIT_FLOAT_REGISTER && type != SLJIT_SIMD_REG_128 && type != SLJIT_SIMD_REG_256)
2199*22dc650dSSadaf Ebrahimi return -1;
2200*22dc650dSSadaf Ebrahimi
2201*22dc650dSSadaf Ebrahimi return freg_map[reg];
2202*22dc650dSSadaf Ebrahimi }
2203*22dc650dSSadaf Ebrahimi
sljit_emit_op_custom(struct sljit_compiler * compiler,void * instruction,sljit_u32 size)2204*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler,
2205*22dc650dSSadaf Ebrahimi void *instruction, sljit_u32 size)
2206*22dc650dSSadaf Ebrahimi {
2207*22dc650dSSadaf Ebrahimi SLJIT_UNUSED_ARG(size);
2208*22dc650dSSadaf Ebrahimi CHECK_ERROR();
2209*22dc650dSSadaf Ebrahimi CHECK(check_sljit_emit_op_custom(compiler, instruction, size));
2210*22dc650dSSadaf Ebrahimi
2211*22dc650dSSadaf Ebrahimi return push_inst(compiler, *(sljit_ins*)instruction);
2212*22dc650dSSadaf Ebrahimi }
2213*22dc650dSSadaf Ebrahimi
2214*22dc650dSSadaf Ebrahimi /* --------------------------------------------------------------------- */
2215*22dc650dSSadaf Ebrahimi /* Floating point operators */
2216*22dc650dSSadaf Ebrahimi /* --------------------------------------------------------------------- */
2217*22dc650dSSadaf Ebrahimi #define SET_COND(cond) (sljit_ins)(cond << 15)
2218*22dc650dSSadaf Ebrahimi
2219*22dc650dSSadaf Ebrahimi #define COND_CUN SET_COND(0x8) /* UN */
2220*22dc650dSSadaf Ebrahimi #define COND_CEQ SET_COND(0x4) /* EQ */
2221*22dc650dSSadaf Ebrahimi #define COND_CUEQ SET_COND(0xc) /* UN EQ */
2222*22dc650dSSadaf Ebrahimi #define COND_CLT SET_COND(0x2) /* LT */
2223*22dc650dSSadaf Ebrahimi #define COND_CULT SET_COND(0xa) /* UN LT */
2224*22dc650dSSadaf Ebrahimi #define COND_CLE SET_COND(0x6) /* LT EQ */
2225*22dc650dSSadaf Ebrahimi #define COND_CULE SET_COND(0xe) /* UN LT EQ */
2226*22dc650dSSadaf Ebrahimi #define COND_CNE SET_COND(0x10) /* GT LT */
2227*22dc650dSSadaf Ebrahimi #define COND_CUNE SET_COND(0x18) /* UN GT LT */
2228*22dc650dSSadaf Ebrahimi #define COND_COR SET_COND(0x14) /* GT LT EQ */
2229*22dc650dSSadaf Ebrahimi
2230*22dc650dSSadaf Ebrahimi #define FINST(inst, type) (sljit_ins)((type & SLJIT_32) ? inst##_S : inst##_D)
2231*22dc650dSSadaf Ebrahimi #define FCD(cd) (sljit_ins)(cd & 0x7)
2232*22dc650dSSadaf Ebrahimi #define FCJ(cj) (sljit_ins)((cj & 0x7) << 5)
2233*22dc650dSSadaf Ebrahimi #define FCA(ca) (sljit_ins)((ca & 0x7) << 15)
2234*22dc650dSSadaf Ebrahimi #define F_OTHER_FLAG 1
2235*22dc650dSSadaf Ebrahimi
2236*22dc650dSSadaf Ebrahimi #define FLOAT_DATA(op) (DOUBLE_DATA | ((op & SLJIT_32) >> 7))
2237*22dc650dSSadaf Ebrahimi
2238*22dc650dSSadaf Ebrahimi /* convert to inter exact toward zero */
sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)2239*22dc650dSSadaf Ebrahimi static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op,
2240*22dc650dSSadaf Ebrahimi sljit_s32 dst, sljit_sw dstw,
2241*22dc650dSSadaf Ebrahimi sljit_s32 src, sljit_sw srcw)
2242*22dc650dSSadaf Ebrahimi {
2243*22dc650dSSadaf Ebrahimi sljit_ins inst;
2244*22dc650dSSadaf Ebrahimi sljit_u32 word_data = 0;
2245*22dc650dSSadaf Ebrahimi sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;
2246*22dc650dSSadaf Ebrahimi
2247*22dc650dSSadaf Ebrahimi switch (GET_OPCODE(op))
2248*22dc650dSSadaf Ebrahimi {
2249*22dc650dSSadaf Ebrahimi case SLJIT_CONV_SW_FROM_F64:
2250*22dc650dSSadaf Ebrahimi word_data = 1;
2251*22dc650dSSadaf Ebrahimi inst = FINST(FTINTRZ_L, op);
2252*22dc650dSSadaf Ebrahimi break;
2253*22dc650dSSadaf Ebrahimi case SLJIT_CONV_S32_FROM_F64:
2254*22dc650dSSadaf Ebrahimi inst = FINST(FTINTRZ_W, op);
2255*22dc650dSSadaf Ebrahimi break;
2256*22dc650dSSadaf Ebrahimi default:
2257*22dc650dSSadaf Ebrahimi inst = BREAK;
2258*22dc650dSSadaf Ebrahimi SLJIT_UNREACHABLE();
2259*22dc650dSSadaf Ebrahimi }
2260*22dc650dSSadaf Ebrahimi
2261*22dc650dSSadaf Ebrahimi if (src & SLJIT_MEM) {
2262*22dc650dSSadaf Ebrahimi FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src, srcw, dst, dstw));
2263*22dc650dSSadaf Ebrahimi src = TMP_FREG1;
2264*22dc650dSSadaf Ebrahimi }
2265*22dc650dSSadaf Ebrahimi
2266*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, inst | FRD(TMP_FREG1) | FRJ(src)));
2267*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, FINST(MOVFR2GR, word_data) | RD(dst_r) | FRJ(TMP_FREG1)));
2268*22dc650dSSadaf Ebrahimi
2269*22dc650dSSadaf Ebrahimi if (dst & SLJIT_MEM)
2270*22dc650dSSadaf Ebrahimi return emit_op_mem2(compiler, word_data ? WORD_DATA : INT_DATA, TMP_REG2, dst, dstw, 0, 0);
2271*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
2272*22dc650dSSadaf Ebrahimi }
2273*22dc650dSSadaf Ebrahimi
sljit_emit_fop1_conv_f64_from_w(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)2274*22dc650dSSadaf Ebrahimi static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_w(struct sljit_compiler *compiler, sljit_s32 op,
2275*22dc650dSSadaf Ebrahimi sljit_s32 dst, sljit_sw dstw,
2276*22dc650dSSadaf Ebrahimi sljit_s32 src, sljit_sw srcw)
2277*22dc650dSSadaf Ebrahimi {
2278*22dc650dSSadaf Ebrahimi sljit_ins inst;
2279*22dc650dSSadaf Ebrahimi sljit_u32 word_data = 0;
2280*22dc650dSSadaf Ebrahimi sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
2281*22dc650dSSadaf Ebrahimi
2282*22dc650dSSadaf Ebrahimi switch (GET_OPCODE(op))
2283*22dc650dSSadaf Ebrahimi {
2284*22dc650dSSadaf Ebrahimi case SLJIT_CONV_F64_FROM_SW:
2285*22dc650dSSadaf Ebrahimi word_data = 1;
2286*22dc650dSSadaf Ebrahimi inst = (sljit_ins)((op & SLJIT_32) ? FFINT_S_L : FFINT_D_L);
2287*22dc650dSSadaf Ebrahimi break;
2288*22dc650dSSadaf Ebrahimi case SLJIT_CONV_F64_FROM_S32:
2289*22dc650dSSadaf Ebrahimi inst = (sljit_ins)((op & SLJIT_32) ? FFINT_S_W : FFINT_D_W);
2290*22dc650dSSadaf Ebrahimi break;
2291*22dc650dSSadaf Ebrahimi default:
2292*22dc650dSSadaf Ebrahimi inst = BREAK;
2293*22dc650dSSadaf Ebrahimi SLJIT_UNREACHABLE();
2294*22dc650dSSadaf Ebrahimi }
2295*22dc650dSSadaf Ebrahimi
2296*22dc650dSSadaf Ebrahimi if (src & SLJIT_MEM) {
2297*22dc650dSSadaf Ebrahimi FAIL_IF(emit_op_mem2(compiler, (word_data ? WORD_DATA : INT_DATA) | LOAD_DATA, TMP_REG1, src, srcw, dst, dstw));
2298*22dc650dSSadaf Ebrahimi src = TMP_REG1;
2299*22dc650dSSadaf Ebrahimi } else if (src == SLJIT_IMM) {
2300*22dc650dSSadaf Ebrahimi if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32)
2301*22dc650dSSadaf Ebrahimi srcw = (sljit_s32)srcw;
2302*22dc650dSSadaf Ebrahimi
2303*22dc650dSSadaf Ebrahimi FAIL_IF(load_immediate(compiler, TMP_REG1, srcw));
2304*22dc650dSSadaf Ebrahimi src = TMP_REG1;
2305*22dc650dSSadaf Ebrahimi }
2306*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, (word_data ? MOVGR2FR_D : MOVGR2FR_W) | FRD(dst_r) | RJ(src)));
2307*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, inst | FRD(dst_r) | FRJ(dst_r)));
2308*22dc650dSSadaf Ebrahimi
2309*22dc650dSSadaf Ebrahimi if (dst & SLJIT_MEM)
2310*22dc650dSSadaf Ebrahimi return emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG1, dst, dstw, 0, 0);
2311*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
2312*22dc650dSSadaf Ebrahimi }
2313*22dc650dSSadaf Ebrahimi
sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)2314*22dc650dSSadaf Ebrahimi static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op,
2315*22dc650dSSadaf Ebrahimi sljit_s32 dst, sljit_sw dstw,
2316*22dc650dSSadaf Ebrahimi sljit_s32 src, sljit_sw srcw)
2317*22dc650dSSadaf Ebrahimi {
2318*22dc650dSSadaf Ebrahimi return sljit_emit_fop1_conv_f64_from_w(compiler, op, dst, dstw, src, srcw);
2319*22dc650dSSadaf Ebrahimi }
2320*22dc650dSSadaf Ebrahimi
sljit_emit_fop1_conv_f64_from_uw(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)2321*22dc650dSSadaf Ebrahimi static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_uw(struct sljit_compiler *compiler, sljit_s32 op,
2322*22dc650dSSadaf Ebrahimi sljit_s32 dst, sljit_sw dstw,
2323*22dc650dSSadaf Ebrahimi sljit_s32 src, sljit_sw srcw)
2324*22dc650dSSadaf Ebrahimi {
2325*22dc650dSSadaf Ebrahimi sljit_ins inst;
2326*22dc650dSSadaf Ebrahimi sljit_u32 word_data = 0;
2327*22dc650dSSadaf Ebrahimi sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
2328*22dc650dSSadaf Ebrahimi
2329*22dc650dSSadaf Ebrahimi switch (GET_OPCODE(op))
2330*22dc650dSSadaf Ebrahimi {
2331*22dc650dSSadaf Ebrahimi case SLJIT_CONV_F64_FROM_UW:
2332*22dc650dSSadaf Ebrahimi word_data = 1;
2333*22dc650dSSadaf Ebrahimi inst = (sljit_ins)((op & SLJIT_32) ? FFINT_S_L : FFINT_D_L);
2334*22dc650dSSadaf Ebrahimi break;
2335*22dc650dSSadaf Ebrahimi case SLJIT_CONV_F64_FROM_U32:
2336*22dc650dSSadaf Ebrahimi inst = (sljit_ins)((op & SLJIT_32) ? FFINT_S_W : FFINT_D_W);
2337*22dc650dSSadaf Ebrahimi break;
2338*22dc650dSSadaf Ebrahimi default:
2339*22dc650dSSadaf Ebrahimi inst = BREAK;
2340*22dc650dSSadaf Ebrahimi SLJIT_UNREACHABLE();
2341*22dc650dSSadaf Ebrahimi }
2342*22dc650dSSadaf Ebrahimi
2343*22dc650dSSadaf Ebrahimi if (src & SLJIT_MEM) {
2344*22dc650dSSadaf Ebrahimi FAIL_IF(emit_op_mem2(compiler, (word_data ? WORD_DATA : INT_DATA) | LOAD_DATA, TMP_REG1, src, srcw, dst, dstw));
2345*22dc650dSSadaf Ebrahimi src = TMP_REG1;
2346*22dc650dSSadaf Ebrahimi } else if (src == SLJIT_IMM) {
2347*22dc650dSSadaf Ebrahimi if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_U32)
2348*22dc650dSSadaf Ebrahimi srcw = (sljit_u32)srcw;
2349*22dc650dSSadaf Ebrahimi
2350*22dc650dSSadaf Ebrahimi FAIL_IF(load_immediate(compiler, TMP_REG1, srcw));
2351*22dc650dSSadaf Ebrahimi src = TMP_REG1;
2352*22dc650dSSadaf Ebrahimi }
2353*22dc650dSSadaf Ebrahimi
2354*22dc650dSSadaf Ebrahimi if (!word_data)
2355*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, SRLI_W | RD(src) | RJ(src) | IMM_I12(0)));
2356*22dc650dSSadaf Ebrahimi
2357*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, BLT | RJ(src) | RD(TMP_ZERO) | IMM_I16(4)));
2358*22dc650dSSadaf Ebrahimi
2359*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, (word_data ? MOVGR2FR_D : MOVGR2FR_W) | FRD(dst_r) | RJ(src)));
2360*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, inst | FRD(dst_r) | FRJ(dst_r)));
2361*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, B | IMM_I26(7)));
2362*22dc650dSSadaf Ebrahimi
2363*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, ANDI | RD(TMP_REG2) | RJ(src) | IMM_I12(1)));
2364*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, (word_data ? SRLI_D : SRLI_W) | RD(TMP_REG1) | RJ(src) | IMM_I12(1)));
2365*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, OR | RD(TMP_REG1) | RJ(TMP_REG1) | RK(TMP_REG2)));
2366*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, INST(MOVGR2FR, (!word_data)) | FRD(dst_r) | RJ(TMP_REG1)));
2367*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, inst | FRD(dst_r) | FRJ(dst_r)));
2368*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, FINST(FADD, op) | FRD(dst_r) | FRJ(dst_r) | FRK(dst_r)));
2369*22dc650dSSadaf Ebrahimi
2370*22dc650dSSadaf Ebrahimi if (dst & SLJIT_MEM)
2371*22dc650dSSadaf Ebrahimi return emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG1, dst, dstw, 0, 0);
2372*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
2373*22dc650dSSadaf Ebrahimi }
2374*22dc650dSSadaf Ebrahimi
sljit_emit_fop1_cmp(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2375*22dc650dSSadaf Ebrahimi static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op,
2376*22dc650dSSadaf Ebrahimi sljit_s32 src1, sljit_sw src1w,
2377*22dc650dSSadaf Ebrahimi sljit_s32 src2, sljit_sw src2w)
2378*22dc650dSSadaf Ebrahimi {
2379*22dc650dSSadaf Ebrahimi if (src1 & SLJIT_MEM) {
2380*22dc650dSSadaf Ebrahimi FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, src2, src2w));
2381*22dc650dSSadaf Ebrahimi src1 = TMP_FREG1;
2382*22dc650dSSadaf Ebrahimi }
2383*22dc650dSSadaf Ebrahimi
2384*22dc650dSSadaf Ebrahimi if (src2 & SLJIT_MEM) {
2385*22dc650dSSadaf Ebrahimi FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, 0, 0));
2386*22dc650dSSadaf Ebrahimi src2 = TMP_FREG2;
2387*22dc650dSSadaf Ebrahimi }
2388*22dc650dSSadaf Ebrahimi
2389*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, XOR | RD(OTHER_FLAG) | RJ(OTHER_FLAG) | RK(OTHER_FLAG)));
2390*22dc650dSSadaf Ebrahimi
2391*22dc650dSSadaf Ebrahimi switch (GET_FLAG_TYPE(op)) {
2392*22dc650dSSadaf Ebrahimi case SLJIT_F_EQUAL:
2393*22dc650dSSadaf Ebrahimi case SLJIT_ORDERED_EQUAL:
2394*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, FINST(FCMP_COND, op) | COND_CEQ | FCD(F_OTHER_FLAG) | FRJ(src1) | FRK(src2)));
2395*22dc650dSSadaf Ebrahimi break;
2396*22dc650dSSadaf Ebrahimi case SLJIT_F_LESS:
2397*22dc650dSSadaf Ebrahimi case SLJIT_ORDERED_LESS:
2398*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, FINST(FCMP_COND, op) | COND_CLT | FCD(F_OTHER_FLAG) | FRJ(src1) | FRK(src2)));
2399*22dc650dSSadaf Ebrahimi break;
2400*22dc650dSSadaf Ebrahimi case SLJIT_F_GREATER:
2401*22dc650dSSadaf Ebrahimi case SLJIT_ORDERED_GREATER:
2402*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, FINST(FCMP_COND, op) | COND_CLT | FCD(F_OTHER_FLAG) | FRJ(src2) | FRK(src1)));
2403*22dc650dSSadaf Ebrahimi break;
2404*22dc650dSSadaf Ebrahimi case SLJIT_UNORDERED_OR_GREATER:
2405*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, FINST(FCMP_COND, op) | COND_CULT | FCD(F_OTHER_FLAG) | FRJ(src2) | FRK(src1)));
2406*22dc650dSSadaf Ebrahimi break;
2407*22dc650dSSadaf Ebrahimi case SLJIT_UNORDERED_OR_LESS:
2408*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, FINST(FCMP_COND, op) | COND_CULT | FCD(F_OTHER_FLAG) | FRJ(src1) | FRK(src2)));
2409*22dc650dSSadaf Ebrahimi break;
2410*22dc650dSSadaf Ebrahimi case SLJIT_UNORDERED_OR_EQUAL:
2411*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, FINST(FCMP_COND, op) | COND_CUEQ | FCD(F_OTHER_FLAG) | FRJ(src1) | FRK(src2)));
2412*22dc650dSSadaf Ebrahimi break;
2413*22dc650dSSadaf Ebrahimi default: /* SLJIT_UNORDERED */
2414*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, FINST(FCMP_COND, op) | COND_CUN | FCD(F_OTHER_FLAG) | FRJ(src1) | FRK(src2)));
2415*22dc650dSSadaf Ebrahimi }
2416*22dc650dSSadaf Ebrahimi return push_inst(compiler, MOVCF2GR | RD(OTHER_FLAG) | FCJ(F_OTHER_FLAG));
2417*22dc650dSSadaf Ebrahimi }
2418*22dc650dSSadaf Ebrahimi
sljit_emit_fop1(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)2419*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op,
2420*22dc650dSSadaf Ebrahimi sljit_s32 dst, sljit_sw dstw,
2421*22dc650dSSadaf Ebrahimi sljit_s32 src, sljit_sw srcw)
2422*22dc650dSSadaf Ebrahimi {
2423*22dc650dSSadaf Ebrahimi sljit_s32 dst_r;
2424*22dc650dSSadaf Ebrahimi
2425*22dc650dSSadaf Ebrahimi CHECK_ERROR();
2426*22dc650dSSadaf Ebrahimi compiler->cache_arg = 0;
2427*22dc650dSSadaf Ebrahimi compiler->cache_argw = 0;
2428*22dc650dSSadaf Ebrahimi
2429*22dc650dSSadaf Ebrahimi SLJIT_COMPILE_ASSERT((SLJIT_32 == 0x100) && !(DOUBLE_DATA & 0x2), float_transfer_bit_error);
2430*22dc650dSSadaf Ebrahimi SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw);
2431*22dc650dSSadaf Ebrahimi
2432*22dc650dSSadaf Ebrahimi if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_F32)
2433*22dc650dSSadaf Ebrahimi op ^= SLJIT_32;
2434*22dc650dSSadaf Ebrahimi
2435*22dc650dSSadaf Ebrahimi dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
2436*22dc650dSSadaf Ebrahimi
2437*22dc650dSSadaf Ebrahimi if (src & SLJIT_MEM) {
2438*22dc650dSSadaf Ebrahimi FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, dst_r, src, srcw, dst, dstw));
2439*22dc650dSSadaf Ebrahimi src = dst_r;
2440*22dc650dSSadaf Ebrahimi }
2441*22dc650dSSadaf Ebrahimi
2442*22dc650dSSadaf Ebrahimi switch (GET_OPCODE(op)) {
2443*22dc650dSSadaf Ebrahimi case SLJIT_MOV_F64:
2444*22dc650dSSadaf Ebrahimi if (src != dst_r) {
2445*22dc650dSSadaf Ebrahimi if (!(dst & SLJIT_MEM))
2446*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, FINST(FMOV, op) | FRD(dst_r) | FRJ(src)));
2447*22dc650dSSadaf Ebrahimi else
2448*22dc650dSSadaf Ebrahimi dst_r = src;
2449*22dc650dSSadaf Ebrahimi }
2450*22dc650dSSadaf Ebrahimi break;
2451*22dc650dSSadaf Ebrahimi case SLJIT_NEG_F64:
2452*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, FINST(FNEG, op) | FRD(dst_r) | FRJ(src)));
2453*22dc650dSSadaf Ebrahimi break;
2454*22dc650dSSadaf Ebrahimi case SLJIT_ABS_F64:
2455*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, FINST(FABS, op) | FRD(dst_r) | FRJ(src)));
2456*22dc650dSSadaf Ebrahimi break;
2457*22dc650dSSadaf Ebrahimi case SLJIT_CONV_F64_FROM_F32:
2458*22dc650dSSadaf Ebrahimi /* The SLJIT_32 bit is inverted because sljit_f32 needs to be loaded from the memory. */
2459*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, ((op & SLJIT_32) ? FCVT_D_S : FCVT_S_D) | FRD(dst_r) | FRJ(src)));
2460*22dc650dSSadaf Ebrahimi op ^= SLJIT_32;
2461*22dc650dSSadaf Ebrahimi break;
2462*22dc650dSSadaf Ebrahimi }
2463*22dc650dSSadaf Ebrahimi
2464*22dc650dSSadaf Ebrahimi if (dst & SLJIT_MEM)
2465*22dc650dSSadaf Ebrahimi return emit_op_mem2(compiler, FLOAT_DATA(op), dst_r, dst, dstw, 0, 0);
2466*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
2467*22dc650dSSadaf Ebrahimi }
2468*22dc650dSSadaf Ebrahimi
sljit_emit_fop2(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2469*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op,
2470*22dc650dSSadaf Ebrahimi sljit_s32 dst, sljit_sw dstw,
2471*22dc650dSSadaf Ebrahimi sljit_s32 src1, sljit_sw src1w,
2472*22dc650dSSadaf Ebrahimi sljit_s32 src2, sljit_sw src2w)
2473*22dc650dSSadaf Ebrahimi {
2474*22dc650dSSadaf Ebrahimi sljit_s32 dst_r, flags = 0;
2475*22dc650dSSadaf Ebrahimi
2476*22dc650dSSadaf Ebrahimi CHECK_ERROR();
2477*22dc650dSSadaf Ebrahimi CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
2478*22dc650dSSadaf Ebrahimi ADJUST_LOCAL_OFFSET(dst, dstw);
2479*22dc650dSSadaf Ebrahimi ADJUST_LOCAL_OFFSET(src1, src1w);
2480*22dc650dSSadaf Ebrahimi ADJUST_LOCAL_OFFSET(src2, src2w);
2481*22dc650dSSadaf Ebrahimi
2482*22dc650dSSadaf Ebrahimi compiler->cache_arg = 0;
2483*22dc650dSSadaf Ebrahimi compiler->cache_argw = 0;
2484*22dc650dSSadaf Ebrahimi
2485*22dc650dSSadaf Ebrahimi dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG2;
2486*22dc650dSSadaf Ebrahimi
2487*22dc650dSSadaf Ebrahimi if (src1 & SLJIT_MEM) {
2488*22dc650dSSadaf Ebrahimi if (getput_arg_fast(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w)) {
2489*22dc650dSSadaf Ebrahimi FAIL_IF(compiler->error);
2490*22dc650dSSadaf Ebrahimi src1 = TMP_FREG1;
2491*22dc650dSSadaf Ebrahimi } else
2492*22dc650dSSadaf Ebrahimi flags |= SLOW_SRC1;
2493*22dc650dSSadaf Ebrahimi }
2494*22dc650dSSadaf Ebrahimi
2495*22dc650dSSadaf Ebrahimi if (src2 & SLJIT_MEM) {
2496*22dc650dSSadaf Ebrahimi if (getput_arg_fast(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w)) {
2497*22dc650dSSadaf Ebrahimi FAIL_IF(compiler->error);
2498*22dc650dSSadaf Ebrahimi src2 = TMP_FREG2;
2499*22dc650dSSadaf Ebrahimi } else
2500*22dc650dSSadaf Ebrahimi flags |= SLOW_SRC2;
2501*22dc650dSSadaf Ebrahimi }
2502*22dc650dSSadaf Ebrahimi
2503*22dc650dSSadaf Ebrahimi if ((flags & (SLOW_SRC1 | SLOW_SRC2)) == (SLOW_SRC1 | SLOW_SRC2)) {
2504*22dc650dSSadaf Ebrahimi if ((dst & SLJIT_MEM) && !can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) {
2505*22dc650dSSadaf Ebrahimi FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, src1, src1w));
2506*22dc650dSSadaf Ebrahimi FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, dst, dstw));
2507*22dc650dSSadaf Ebrahimi } else {
2508*22dc650dSSadaf Ebrahimi FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, src2, src2w));
2509*22dc650dSSadaf Ebrahimi FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, dst, dstw));
2510*22dc650dSSadaf Ebrahimi }
2511*22dc650dSSadaf Ebrahimi }
2512*22dc650dSSadaf Ebrahimi else if (flags & SLOW_SRC1)
2513*22dc650dSSadaf Ebrahimi FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, dst, dstw));
2514*22dc650dSSadaf Ebrahimi else if (flags & SLOW_SRC2)
2515*22dc650dSSadaf Ebrahimi FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, dst, dstw));
2516*22dc650dSSadaf Ebrahimi
2517*22dc650dSSadaf Ebrahimi if (flags & SLOW_SRC1)
2518*22dc650dSSadaf Ebrahimi src1 = TMP_FREG1;
2519*22dc650dSSadaf Ebrahimi if (flags & SLOW_SRC2)
2520*22dc650dSSadaf Ebrahimi src2 = TMP_FREG2;
2521*22dc650dSSadaf Ebrahimi
2522*22dc650dSSadaf Ebrahimi switch (GET_OPCODE(op)) {
2523*22dc650dSSadaf Ebrahimi case SLJIT_ADD_F64:
2524*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, FINST(FADD, op) | FRD(dst_r) | FRJ(src1) | FRK(src2)));
2525*22dc650dSSadaf Ebrahimi break;
2526*22dc650dSSadaf Ebrahimi case SLJIT_SUB_F64:
2527*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, FINST(FSUB, op) | FRD(dst_r) | FRJ(src1) | FRK(src2)));
2528*22dc650dSSadaf Ebrahimi break;
2529*22dc650dSSadaf Ebrahimi case SLJIT_MUL_F64:
2530*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, FINST(FMUL, op) | FRD(dst_r) | FRJ(src1) | FRK(src2)));
2531*22dc650dSSadaf Ebrahimi break;
2532*22dc650dSSadaf Ebrahimi case SLJIT_DIV_F64:
2533*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, FINST(FDIV, op) | FRD(dst_r) | FRJ(src1) | FRK(src2)));
2534*22dc650dSSadaf Ebrahimi break;
2535*22dc650dSSadaf Ebrahimi }
2536*22dc650dSSadaf Ebrahimi
2537*22dc650dSSadaf Ebrahimi if (dst_r != dst)
2538*22dc650dSSadaf Ebrahimi FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG2, dst, dstw, 0, 0));
2539*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
2540*22dc650dSSadaf Ebrahimi }
2541*22dc650dSSadaf Ebrahimi
sljit_emit_fop2r(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst_freg,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2542*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2r(struct sljit_compiler *compiler, sljit_s32 op,
2543*22dc650dSSadaf Ebrahimi sljit_s32 dst_freg,
2544*22dc650dSSadaf Ebrahimi sljit_s32 src1, sljit_sw src1w,
2545*22dc650dSSadaf Ebrahimi sljit_s32 src2, sljit_sw src2w)
2546*22dc650dSSadaf Ebrahimi {
2547*22dc650dSSadaf Ebrahimi sljit_s32 reg;
2548*22dc650dSSadaf Ebrahimi
2549*22dc650dSSadaf Ebrahimi CHECK_ERROR();
2550*22dc650dSSadaf Ebrahimi CHECK(check_sljit_emit_fop2r(compiler, op, dst_freg, src1, src1w, src2, src2w));
2551*22dc650dSSadaf Ebrahimi ADJUST_LOCAL_OFFSET(src1, src1w);
2552*22dc650dSSadaf Ebrahimi ADJUST_LOCAL_OFFSET(src2, src2w);
2553*22dc650dSSadaf Ebrahimi
2554*22dc650dSSadaf Ebrahimi if (src2 & SLJIT_MEM) {
2555*22dc650dSSadaf Ebrahimi FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src2, src2w, 0, 0));
2556*22dc650dSSadaf Ebrahimi src2 = TMP_FREG1;
2557*22dc650dSSadaf Ebrahimi }
2558*22dc650dSSadaf Ebrahimi
2559*22dc650dSSadaf Ebrahimi if (src1 & SLJIT_MEM) {
2560*22dc650dSSadaf Ebrahimi reg = (dst_freg == src2) ? TMP_FREG1 : dst_freg;
2561*22dc650dSSadaf Ebrahimi FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, reg, src1, src1w, 0, 0));
2562*22dc650dSSadaf Ebrahimi src1 = reg;
2563*22dc650dSSadaf Ebrahimi }
2564*22dc650dSSadaf Ebrahimi
2565*22dc650dSSadaf Ebrahimi return push_inst(compiler, FINST(FCOPYSIGN, op) | FRD(dst_freg) | FRJ(src1) | FRK(src2));
2566*22dc650dSSadaf Ebrahimi }
2567*22dc650dSSadaf Ebrahimi
sljit_emit_fset32(struct sljit_compiler * compiler,sljit_s32 freg,sljit_f32 value)2568*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset32(struct sljit_compiler *compiler,
2569*22dc650dSSadaf Ebrahimi sljit_s32 freg, sljit_f32 value)
2570*22dc650dSSadaf Ebrahimi {
2571*22dc650dSSadaf Ebrahimi union {
2572*22dc650dSSadaf Ebrahimi sljit_s32 imm;
2573*22dc650dSSadaf Ebrahimi sljit_f32 value;
2574*22dc650dSSadaf Ebrahimi } u;
2575*22dc650dSSadaf Ebrahimi
2576*22dc650dSSadaf Ebrahimi CHECK_ERROR();
2577*22dc650dSSadaf Ebrahimi CHECK(check_sljit_emit_fset32(compiler, freg, value));
2578*22dc650dSSadaf Ebrahimi
2579*22dc650dSSadaf Ebrahimi u.value = value;
2580*22dc650dSSadaf Ebrahimi
2581*22dc650dSSadaf Ebrahimi if (u.imm == 0)
2582*22dc650dSSadaf Ebrahimi return push_inst(compiler, MOVGR2FR_W | RJ(TMP_ZERO) | FRD(freg));
2583*22dc650dSSadaf Ebrahimi
2584*22dc650dSSadaf Ebrahimi FAIL_IF(load_immediate(compiler, TMP_REG1, u.imm));
2585*22dc650dSSadaf Ebrahimi return push_inst(compiler, MOVGR2FR_W | RJ(TMP_REG1) | FRD(freg));
2586*22dc650dSSadaf Ebrahimi }
2587*22dc650dSSadaf Ebrahimi
sljit_emit_fset64(struct sljit_compiler * compiler,sljit_s32 freg,sljit_f64 value)2588*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset64(struct sljit_compiler *compiler,
2589*22dc650dSSadaf Ebrahimi sljit_s32 freg, sljit_f64 value)
2590*22dc650dSSadaf Ebrahimi {
2591*22dc650dSSadaf Ebrahimi union {
2592*22dc650dSSadaf Ebrahimi sljit_sw imm;
2593*22dc650dSSadaf Ebrahimi sljit_f64 value;
2594*22dc650dSSadaf Ebrahimi } u;
2595*22dc650dSSadaf Ebrahimi
2596*22dc650dSSadaf Ebrahimi CHECK_ERROR();
2597*22dc650dSSadaf Ebrahimi CHECK(check_sljit_emit_fset64(compiler, freg, value));
2598*22dc650dSSadaf Ebrahimi
2599*22dc650dSSadaf Ebrahimi u.value = value;
2600*22dc650dSSadaf Ebrahimi
2601*22dc650dSSadaf Ebrahimi if (u.imm == 0)
2602*22dc650dSSadaf Ebrahimi return push_inst(compiler, MOVGR2FR_D | RJ(TMP_ZERO) | FRD(freg));
2603*22dc650dSSadaf Ebrahimi
2604*22dc650dSSadaf Ebrahimi FAIL_IF(load_immediate(compiler, TMP_REG1, u.imm));
2605*22dc650dSSadaf Ebrahimi return push_inst(compiler, MOVGR2FR_D | RJ(TMP_REG1) | FRD(freg));
2606*22dc650dSSadaf Ebrahimi }
2607*22dc650dSSadaf Ebrahimi
sljit_emit_fcopy(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 freg,sljit_s32 reg)2608*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fcopy(struct sljit_compiler *compiler, sljit_s32 op,
2609*22dc650dSSadaf Ebrahimi sljit_s32 freg, sljit_s32 reg)
2610*22dc650dSSadaf Ebrahimi {
2611*22dc650dSSadaf Ebrahimi sljit_ins inst;
2612*22dc650dSSadaf Ebrahimi
2613*22dc650dSSadaf Ebrahimi CHECK_ERROR();
2614*22dc650dSSadaf Ebrahimi CHECK(check_sljit_emit_fcopy(compiler, op, freg, reg));
2615*22dc650dSSadaf Ebrahimi
2616*22dc650dSSadaf Ebrahimi if (GET_OPCODE(op) == SLJIT_COPY_TO_F64)
2617*22dc650dSSadaf Ebrahimi inst = ((op & SLJIT_32) ? MOVGR2FR_W : MOVGR2FR_D) | FRD(freg) | RJ(reg);
2618*22dc650dSSadaf Ebrahimi else
2619*22dc650dSSadaf Ebrahimi inst = ((op & SLJIT_32) ? MOVFR2GR_S : MOVFR2GR_D) | RD(reg) | FRJ(freg);
2620*22dc650dSSadaf Ebrahimi return push_inst(compiler, inst);
2621*22dc650dSSadaf Ebrahimi }
2622*22dc650dSSadaf Ebrahimi
2623*22dc650dSSadaf Ebrahimi /* --------------------------------------------------------------------- */
2624*22dc650dSSadaf Ebrahimi /* Conditional instructions */
2625*22dc650dSSadaf Ebrahimi /* --------------------------------------------------------------------- */
2626*22dc650dSSadaf Ebrahimi
sljit_emit_label(struct sljit_compiler * compiler)2627*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)
2628*22dc650dSSadaf Ebrahimi {
2629*22dc650dSSadaf Ebrahimi struct sljit_label *label;
2630*22dc650dSSadaf Ebrahimi
2631*22dc650dSSadaf Ebrahimi CHECK_ERROR_PTR();
2632*22dc650dSSadaf Ebrahimi CHECK_PTR(check_sljit_emit_label(compiler));
2633*22dc650dSSadaf Ebrahimi
2634*22dc650dSSadaf Ebrahimi if (compiler->last_label && compiler->last_label->size == compiler->size)
2635*22dc650dSSadaf Ebrahimi return compiler->last_label;
2636*22dc650dSSadaf Ebrahimi
2637*22dc650dSSadaf Ebrahimi label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));
2638*22dc650dSSadaf Ebrahimi PTR_FAIL_IF(!label);
2639*22dc650dSSadaf Ebrahimi set_label(label, compiler);
2640*22dc650dSSadaf Ebrahimi return label;
2641*22dc650dSSadaf Ebrahimi }
2642*22dc650dSSadaf Ebrahimi
get_jump_instruction(sljit_s32 type)2643*22dc650dSSadaf Ebrahimi static sljit_ins get_jump_instruction(sljit_s32 type)
2644*22dc650dSSadaf Ebrahimi {
2645*22dc650dSSadaf Ebrahimi switch (type) {
2646*22dc650dSSadaf Ebrahimi case SLJIT_EQUAL:
2647*22dc650dSSadaf Ebrahimi case SLJIT_ATOMIC_NOT_STORED:
2648*22dc650dSSadaf Ebrahimi return BNE | RJ(EQUAL_FLAG) | RD(TMP_ZERO);
2649*22dc650dSSadaf Ebrahimi case SLJIT_NOT_EQUAL:
2650*22dc650dSSadaf Ebrahimi case SLJIT_ATOMIC_STORED:
2651*22dc650dSSadaf Ebrahimi return BEQ | RJ(EQUAL_FLAG) | RD(TMP_ZERO);
2652*22dc650dSSadaf Ebrahimi case SLJIT_LESS:
2653*22dc650dSSadaf Ebrahimi case SLJIT_GREATER:
2654*22dc650dSSadaf Ebrahimi case SLJIT_SIG_LESS:
2655*22dc650dSSadaf Ebrahimi case SLJIT_SIG_GREATER:
2656*22dc650dSSadaf Ebrahimi case SLJIT_OVERFLOW:
2657*22dc650dSSadaf Ebrahimi case SLJIT_CARRY:
2658*22dc650dSSadaf Ebrahimi return BEQ | RJ(OTHER_FLAG) | RD(TMP_ZERO);
2659*22dc650dSSadaf Ebrahimi case SLJIT_GREATER_EQUAL:
2660*22dc650dSSadaf Ebrahimi case SLJIT_LESS_EQUAL:
2661*22dc650dSSadaf Ebrahimi case SLJIT_SIG_GREATER_EQUAL:
2662*22dc650dSSadaf Ebrahimi case SLJIT_SIG_LESS_EQUAL:
2663*22dc650dSSadaf Ebrahimi case SLJIT_NOT_OVERFLOW:
2664*22dc650dSSadaf Ebrahimi case SLJIT_NOT_CARRY:
2665*22dc650dSSadaf Ebrahimi return BNE | RJ(OTHER_FLAG) | RD(TMP_ZERO);
2666*22dc650dSSadaf Ebrahimi case SLJIT_F_EQUAL:
2667*22dc650dSSadaf Ebrahimi case SLJIT_ORDERED_EQUAL:
2668*22dc650dSSadaf Ebrahimi case SLJIT_F_LESS:
2669*22dc650dSSadaf Ebrahimi case SLJIT_ORDERED_LESS:
2670*22dc650dSSadaf Ebrahimi case SLJIT_ORDERED_GREATER:
2671*22dc650dSSadaf Ebrahimi case SLJIT_UNORDERED_OR_GREATER:
2672*22dc650dSSadaf Ebrahimi case SLJIT_F_GREATER:
2673*22dc650dSSadaf Ebrahimi case SLJIT_UNORDERED_OR_LESS:
2674*22dc650dSSadaf Ebrahimi case SLJIT_UNORDERED_OR_EQUAL:
2675*22dc650dSSadaf Ebrahimi case SLJIT_UNORDERED:
2676*22dc650dSSadaf Ebrahimi return BEQ | RJ(OTHER_FLAG) | RD(TMP_ZERO);
2677*22dc650dSSadaf Ebrahimi case SLJIT_ORDERED_NOT_EQUAL:
2678*22dc650dSSadaf Ebrahimi case SLJIT_ORDERED_LESS_EQUAL:
2679*22dc650dSSadaf Ebrahimi case SLJIT_ORDERED_GREATER_EQUAL:
2680*22dc650dSSadaf Ebrahimi case SLJIT_F_NOT_EQUAL:
2681*22dc650dSSadaf Ebrahimi case SLJIT_UNORDERED_OR_NOT_EQUAL:
2682*22dc650dSSadaf Ebrahimi case SLJIT_UNORDERED_OR_GREATER_EQUAL:
2683*22dc650dSSadaf Ebrahimi case SLJIT_UNORDERED_OR_LESS_EQUAL:
2684*22dc650dSSadaf Ebrahimi case SLJIT_F_LESS_EQUAL:
2685*22dc650dSSadaf Ebrahimi case SLJIT_F_GREATER_EQUAL:
2686*22dc650dSSadaf Ebrahimi case SLJIT_ORDERED:
2687*22dc650dSSadaf Ebrahimi return BNE | RJ(OTHER_FLAG) | RD(TMP_ZERO);
2688*22dc650dSSadaf Ebrahimi default:
2689*22dc650dSSadaf Ebrahimi /* Not conditional branch. */
2690*22dc650dSSadaf Ebrahimi return 0;
2691*22dc650dSSadaf Ebrahimi }
2692*22dc650dSSadaf Ebrahimi }
2693*22dc650dSSadaf Ebrahimi
sljit_emit_jump(struct sljit_compiler * compiler,sljit_s32 type)2694*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type)
2695*22dc650dSSadaf Ebrahimi {
2696*22dc650dSSadaf Ebrahimi struct sljit_jump *jump;
2697*22dc650dSSadaf Ebrahimi sljit_ins inst;
2698*22dc650dSSadaf Ebrahimi
2699*22dc650dSSadaf Ebrahimi CHECK_ERROR_PTR();
2700*22dc650dSSadaf Ebrahimi CHECK_PTR(check_sljit_emit_jump(compiler, type));
2701*22dc650dSSadaf Ebrahimi
2702*22dc650dSSadaf Ebrahimi jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
2703*22dc650dSSadaf Ebrahimi PTR_FAIL_IF(!jump);
2704*22dc650dSSadaf Ebrahimi set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
2705*22dc650dSSadaf Ebrahimi type &= 0xff;
2706*22dc650dSSadaf Ebrahimi
2707*22dc650dSSadaf Ebrahimi inst = get_jump_instruction(type);
2708*22dc650dSSadaf Ebrahimi
2709*22dc650dSSadaf Ebrahimi if (inst != 0) {
2710*22dc650dSSadaf Ebrahimi PTR_FAIL_IF(push_inst(compiler, inst));
2711*22dc650dSSadaf Ebrahimi jump->flags |= IS_COND;
2712*22dc650dSSadaf Ebrahimi }
2713*22dc650dSSadaf Ebrahimi
2714*22dc650dSSadaf Ebrahimi jump->addr = compiler->size;
2715*22dc650dSSadaf Ebrahimi inst = JIRL | RJ(TMP_REG1) | IMM_I16(0);
2716*22dc650dSSadaf Ebrahimi
2717*22dc650dSSadaf Ebrahimi if (type >= SLJIT_FAST_CALL) {
2718*22dc650dSSadaf Ebrahimi jump->flags |= IS_CALL;
2719*22dc650dSSadaf Ebrahimi inst |= RD(RETURN_ADDR_REG);
2720*22dc650dSSadaf Ebrahimi }
2721*22dc650dSSadaf Ebrahimi
2722*22dc650dSSadaf Ebrahimi PTR_FAIL_IF(push_inst(compiler, inst));
2723*22dc650dSSadaf Ebrahimi
2724*22dc650dSSadaf Ebrahimi /* Maximum number of instructions required for generating a constant. */
2725*22dc650dSSadaf Ebrahimi compiler->size += JUMP_MAX_SIZE - 1;
2726*22dc650dSSadaf Ebrahimi return jump;
2727*22dc650dSSadaf Ebrahimi }
2728*22dc650dSSadaf Ebrahimi
sljit_emit_call(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 arg_types)2729*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type,
2730*22dc650dSSadaf Ebrahimi sljit_s32 arg_types)
2731*22dc650dSSadaf Ebrahimi {
2732*22dc650dSSadaf Ebrahimi SLJIT_UNUSED_ARG(arg_types);
2733*22dc650dSSadaf Ebrahimi CHECK_ERROR_PTR();
2734*22dc650dSSadaf Ebrahimi CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types));
2735*22dc650dSSadaf Ebrahimi
2736*22dc650dSSadaf Ebrahimi if (type & SLJIT_CALL_RETURN) {
2737*22dc650dSSadaf Ebrahimi PTR_FAIL_IF(emit_stack_frame_release(compiler, 0));
2738*22dc650dSSadaf Ebrahimi type = SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP);
2739*22dc650dSSadaf Ebrahimi }
2740*22dc650dSSadaf Ebrahimi
2741*22dc650dSSadaf Ebrahimi SLJIT_SKIP_CHECKS(compiler);
2742*22dc650dSSadaf Ebrahimi return sljit_emit_jump(compiler, type);
2743*22dc650dSSadaf Ebrahimi }
2744*22dc650dSSadaf Ebrahimi
sljit_emit_cmp(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2745*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler *compiler, sljit_s32 type,
2746*22dc650dSSadaf Ebrahimi sljit_s32 src1, sljit_sw src1w,
2747*22dc650dSSadaf Ebrahimi sljit_s32 src2, sljit_sw src2w)
2748*22dc650dSSadaf Ebrahimi {
2749*22dc650dSSadaf Ebrahimi struct sljit_jump *jump;
2750*22dc650dSSadaf Ebrahimi sljit_s32 flags;
2751*22dc650dSSadaf Ebrahimi sljit_ins inst;
2752*22dc650dSSadaf Ebrahimi sljit_s32 src2_tmp_reg = FAST_IS_REG(src1) ? TMP_REG1 : TMP_REG2;
2753*22dc650dSSadaf Ebrahimi
2754*22dc650dSSadaf Ebrahimi CHECK_ERROR_PTR();
2755*22dc650dSSadaf Ebrahimi CHECK_PTR(check_sljit_emit_cmp(compiler, type, src1, src1w, src2, src2w));
2756*22dc650dSSadaf Ebrahimi ADJUST_LOCAL_OFFSET(src1, src1w);
2757*22dc650dSSadaf Ebrahimi ADJUST_LOCAL_OFFSET(src2, src2w);
2758*22dc650dSSadaf Ebrahimi
2759*22dc650dSSadaf Ebrahimi compiler->cache_arg = 0;
2760*22dc650dSSadaf Ebrahimi compiler->cache_argw = 0;
2761*22dc650dSSadaf Ebrahimi
2762*22dc650dSSadaf Ebrahimi flags = ((type & SLJIT_32) ? INT_DATA : WORD_DATA) | LOAD_DATA;
2763*22dc650dSSadaf Ebrahimi
2764*22dc650dSSadaf Ebrahimi if (src1 & SLJIT_MEM) {
2765*22dc650dSSadaf Ebrahimi PTR_FAIL_IF(emit_op_mem2(compiler, flags, TMP_REG1, src1, src1w, src2, src2w));
2766*22dc650dSSadaf Ebrahimi src1 = TMP_REG1;
2767*22dc650dSSadaf Ebrahimi }
2768*22dc650dSSadaf Ebrahimi
2769*22dc650dSSadaf Ebrahimi if (src2 & SLJIT_MEM) {
2770*22dc650dSSadaf Ebrahimi PTR_FAIL_IF(emit_op_mem2(compiler, flags, src2_tmp_reg, src2, src2w, 0, 0));
2771*22dc650dSSadaf Ebrahimi src2 = src2_tmp_reg;
2772*22dc650dSSadaf Ebrahimi }
2773*22dc650dSSadaf Ebrahimi
2774*22dc650dSSadaf Ebrahimi if (src1 == SLJIT_IMM) {
2775*22dc650dSSadaf Ebrahimi if (src1w != 0) {
2776*22dc650dSSadaf Ebrahimi PTR_FAIL_IF(load_immediate(compiler, TMP_REG1, src1w));
2777*22dc650dSSadaf Ebrahimi src1 = TMP_REG1;
2778*22dc650dSSadaf Ebrahimi }
2779*22dc650dSSadaf Ebrahimi else
2780*22dc650dSSadaf Ebrahimi src1 = TMP_ZERO;
2781*22dc650dSSadaf Ebrahimi }
2782*22dc650dSSadaf Ebrahimi
2783*22dc650dSSadaf Ebrahimi if (src2 == SLJIT_IMM) {
2784*22dc650dSSadaf Ebrahimi if (src2w != 0) {
2785*22dc650dSSadaf Ebrahimi PTR_FAIL_IF(load_immediate(compiler, src2_tmp_reg, src2w));
2786*22dc650dSSadaf Ebrahimi src2 = src2_tmp_reg;
2787*22dc650dSSadaf Ebrahimi }
2788*22dc650dSSadaf Ebrahimi else
2789*22dc650dSSadaf Ebrahimi src2 = TMP_ZERO;
2790*22dc650dSSadaf Ebrahimi }
2791*22dc650dSSadaf Ebrahimi
2792*22dc650dSSadaf Ebrahimi jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
2793*22dc650dSSadaf Ebrahimi PTR_FAIL_IF(!jump);
2794*22dc650dSSadaf Ebrahimi set_jump(jump, compiler, (sljit_u32)((type & SLJIT_REWRITABLE_JUMP) | IS_COND));
2795*22dc650dSSadaf Ebrahimi type &= 0xff;
2796*22dc650dSSadaf Ebrahimi
2797*22dc650dSSadaf Ebrahimi switch (type) {
2798*22dc650dSSadaf Ebrahimi case SLJIT_EQUAL:
2799*22dc650dSSadaf Ebrahimi inst = BNE | RJ(src1) | RD(src2);
2800*22dc650dSSadaf Ebrahimi break;
2801*22dc650dSSadaf Ebrahimi case SLJIT_NOT_EQUAL:
2802*22dc650dSSadaf Ebrahimi inst = BEQ | RJ(src1) | RD(src2);
2803*22dc650dSSadaf Ebrahimi break;
2804*22dc650dSSadaf Ebrahimi case SLJIT_LESS:
2805*22dc650dSSadaf Ebrahimi inst = BGEU | RJ(src1) | RD(src2);
2806*22dc650dSSadaf Ebrahimi break;
2807*22dc650dSSadaf Ebrahimi case SLJIT_GREATER_EQUAL:
2808*22dc650dSSadaf Ebrahimi inst = BLTU | RJ(src1) | RD(src2);
2809*22dc650dSSadaf Ebrahimi break;
2810*22dc650dSSadaf Ebrahimi case SLJIT_GREATER:
2811*22dc650dSSadaf Ebrahimi inst = BGEU | RJ(src2) | RD(src1);
2812*22dc650dSSadaf Ebrahimi break;
2813*22dc650dSSadaf Ebrahimi case SLJIT_LESS_EQUAL:
2814*22dc650dSSadaf Ebrahimi inst = BLTU | RJ(src2) | RD(src1);
2815*22dc650dSSadaf Ebrahimi break;
2816*22dc650dSSadaf Ebrahimi case SLJIT_SIG_LESS:
2817*22dc650dSSadaf Ebrahimi inst = BGE | RJ(src1) | RD(src2);
2818*22dc650dSSadaf Ebrahimi break;
2819*22dc650dSSadaf Ebrahimi case SLJIT_SIG_GREATER_EQUAL:
2820*22dc650dSSadaf Ebrahimi inst = BLT | RJ(src1) | RD(src2);
2821*22dc650dSSadaf Ebrahimi break;
2822*22dc650dSSadaf Ebrahimi case SLJIT_SIG_GREATER:
2823*22dc650dSSadaf Ebrahimi inst = BGE | RJ(src2) | RD(src1);
2824*22dc650dSSadaf Ebrahimi break;
2825*22dc650dSSadaf Ebrahimi case SLJIT_SIG_LESS_EQUAL:
2826*22dc650dSSadaf Ebrahimi inst = BLT | RJ(src2) | RD(src1);
2827*22dc650dSSadaf Ebrahimi break;
2828*22dc650dSSadaf Ebrahimi default:
2829*22dc650dSSadaf Ebrahimi inst = BREAK;
2830*22dc650dSSadaf Ebrahimi SLJIT_UNREACHABLE();
2831*22dc650dSSadaf Ebrahimi }
2832*22dc650dSSadaf Ebrahimi
2833*22dc650dSSadaf Ebrahimi PTR_FAIL_IF(push_inst(compiler, inst));
2834*22dc650dSSadaf Ebrahimi
2835*22dc650dSSadaf Ebrahimi jump->addr = compiler->size;
2836*22dc650dSSadaf Ebrahimi PTR_FAIL_IF(push_inst(compiler, JIRL | RD(TMP_ZERO) | RJ(TMP_REG1) | IMM_I12(0)));
2837*22dc650dSSadaf Ebrahimi
2838*22dc650dSSadaf Ebrahimi /* Maximum number of instructions required for generating a constant. */
2839*22dc650dSSadaf Ebrahimi compiler->size += JUMP_MAX_SIZE - 1;
2840*22dc650dSSadaf Ebrahimi
2841*22dc650dSSadaf Ebrahimi return jump;
2842*22dc650dSSadaf Ebrahimi }
2843*22dc650dSSadaf Ebrahimi
sljit_emit_ijump(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 src,sljit_sw srcw)2844*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw)
2845*22dc650dSSadaf Ebrahimi {
2846*22dc650dSSadaf Ebrahimi struct sljit_jump *jump;
2847*22dc650dSSadaf Ebrahimi
2848*22dc650dSSadaf Ebrahimi CHECK_ERROR();
2849*22dc650dSSadaf Ebrahimi CHECK(check_sljit_emit_ijump(compiler, type, src, srcw));
2850*22dc650dSSadaf Ebrahimi
2851*22dc650dSSadaf Ebrahimi if (src != SLJIT_IMM) {
2852*22dc650dSSadaf Ebrahimi if (src & SLJIT_MEM) {
2853*22dc650dSSadaf Ebrahimi ADJUST_LOCAL_OFFSET(src, srcw);
2854*22dc650dSSadaf Ebrahimi FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_REG1, src, srcw));
2855*22dc650dSSadaf Ebrahimi src = TMP_REG1;
2856*22dc650dSSadaf Ebrahimi }
2857*22dc650dSSadaf Ebrahimi return push_inst(compiler, JIRL | RD((type >= SLJIT_FAST_CALL) ? RETURN_ADDR_REG : TMP_ZERO) | RJ(src) | IMM_I12(0));
2858*22dc650dSSadaf Ebrahimi }
2859*22dc650dSSadaf Ebrahimi
2860*22dc650dSSadaf Ebrahimi /* These jumps are converted to jump/call instructions when possible. */
2861*22dc650dSSadaf Ebrahimi jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
2862*22dc650dSSadaf Ebrahimi FAIL_IF(!jump);
2863*22dc650dSSadaf Ebrahimi set_jump(jump, compiler, JUMP_ADDR | ((type >= SLJIT_FAST_CALL) ? IS_CALL : 0));
2864*22dc650dSSadaf Ebrahimi jump->u.target = (sljit_uw)srcw;
2865*22dc650dSSadaf Ebrahimi
2866*22dc650dSSadaf Ebrahimi jump->addr = compiler->size;
2867*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, JIRL | RD((type >= SLJIT_FAST_CALL) ? RETURN_ADDR_REG : TMP_ZERO) | RJ(TMP_REG1) | IMM_I12(0)));
2868*22dc650dSSadaf Ebrahimi
2869*22dc650dSSadaf Ebrahimi /* Maximum number of instructions required for generating a constant. */
2870*22dc650dSSadaf Ebrahimi compiler->size += JUMP_MAX_SIZE - 1;
2871*22dc650dSSadaf Ebrahimi
2872*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
2873*22dc650dSSadaf Ebrahimi }
2874*22dc650dSSadaf Ebrahimi
sljit_emit_icall(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 arg_types,sljit_s32 src,sljit_sw srcw)2875*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type,
2876*22dc650dSSadaf Ebrahimi sljit_s32 arg_types,
2877*22dc650dSSadaf Ebrahimi sljit_s32 src, sljit_sw srcw)
2878*22dc650dSSadaf Ebrahimi {
2879*22dc650dSSadaf Ebrahimi SLJIT_UNUSED_ARG(arg_types);
2880*22dc650dSSadaf Ebrahimi CHECK_ERROR();
2881*22dc650dSSadaf Ebrahimi CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw));
2882*22dc650dSSadaf Ebrahimi
2883*22dc650dSSadaf Ebrahimi if (src & SLJIT_MEM) {
2884*22dc650dSSadaf Ebrahimi ADJUST_LOCAL_OFFSET(src, srcw);
2885*22dc650dSSadaf Ebrahimi FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_REG1, src, srcw));
2886*22dc650dSSadaf Ebrahimi src = TMP_REG1;
2887*22dc650dSSadaf Ebrahimi }
2888*22dc650dSSadaf Ebrahimi
2889*22dc650dSSadaf Ebrahimi if (type & SLJIT_CALL_RETURN) {
2890*22dc650dSSadaf Ebrahimi if (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options))) {
2891*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, ADDI_D | RD(TMP_REG1) | RJ(src) | IMM_I12(0)));
2892*22dc650dSSadaf Ebrahimi src = TMP_REG1;
2893*22dc650dSSadaf Ebrahimi }
2894*22dc650dSSadaf Ebrahimi
2895*22dc650dSSadaf Ebrahimi FAIL_IF(emit_stack_frame_release(compiler, 0));
2896*22dc650dSSadaf Ebrahimi type = SLJIT_JUMP;
2897*22dc650dSSadaf Ebrahimi }
2898*22dc650dSSadaf Ebrahimi
2899*22dc650dSSadaf Ebrahimi SLJIT_SKIP_CHECKS(compiler);
2900*22dc650dSSadaf Ebrahimi return sljit_emit_ijump(compiler, type, src, srcw);
2901*22dc650dSSadaf Ebrahimi }
2902*22dc650dSSadaf Ebrahimi
sljit_emit_op_flags(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 type)2903*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op,
2904*22dc650dSSadaf Ebrahimi sljit_s32 dst, sljit_sw dstw,
2905*22dc650dSSadaf Ebrahimi sljit_s32 type)
2906*22dc650dSSadaf Ebrahimi {
2907*22dc650dSSadaf Ebrahimi sljit_s32 src_r, dst_r, invert;
2908*22dc650dSSadaf Ebrahimi sljit_s32 saved_op = op;
2909*22dc650dSSadaf Ebrahimi sljit_s32 mem_type = ((op & SLJIT_32) || op == SLJIT_MOV32) ? (INT_DATA | SIGNED_DATA) : WORD_DATA;
2910*22dc650dSSadaf Ebrahimi
2911*22dc650dSSadaf Ebrahimi CHECK_ERROR();
2912*22dc650dSSadaf Ebrahimi CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type));
2913*22dc650dSSadaf Ebrahimi ADJUST_LOCAL_OFFSET(dst, dstw);
2914*22dc650dSSadaf Ebrahimi
2915*22dc650dSSadaf Ebrahimi op = GET_OPCODE(op);
2916*22dc650dSSadaf Ebrahimi dst_r = (op < SLJIT_ADD && FAST_IS_REG(dst)) ? dst : TMP_REG2;
2917*22dc650dSSadaf Ebrahimi
2918*22dc650dSSadaf Ebrahimi compiler->cache_arg = 0;
2919*22dc650dSSadaf Ebrahimi compiler->cache_argw = 0;
2920*22dc650dSSadaf Ebrahimi
2921*22dc650dSSadaf Ebrahimi if (op >= SLJIT_ADD && (dst & SLJIT_MEM))
2922*22dc650dSSadaf Ebrahimi FAIL_IF(emit_op_mem2(compiler, mem_type | LOAD_DATA, TMP_REG1, dst, dstw, dst, dstw));
2923*22dc650dSSadaf Ebrahimi
2924*22dc650dSSadaf Ebrahimi if (type < SLJIT_F_EQUAL) {
2925*22dc650dSSadaf Ebrahimi src_r = OTHER_FLAG;
2926*22dc650dSSadaf Ebrahimi invert = type & 0x1;
2927*22dc650dSSadaf Ebrahimi
2928*22dc650dSSadaf Ebrahimi switch (type) {
2929*22dc650dSSadaf Ebrahimi case SLJIT_EQUAL:
2930*22dc650dSSadaf Ebrahimi case SLJIT_NOT_EQUAL:
2931*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, SLTUI | RD(dst_r) | RJ(EQUAL_FLAG) | IMM_I12(1)));
2932*22dc650dSSadaf Ebrahimi src_r = dst_r;
2933*22dc650dSSadaf Ebrahimi break;
2934*22dc650dSSadaf Ebrahimi case SLJIT_ATOMIC_STORED:
2935*22dc650dSSadaf Ebrahimi case SLJIT_ATOMIC_NOT_STORED:
2936*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, SLTUI | RD(dst_r) | RJ(EQUAL_FLAG) | IMM_I12(1)));
2937*22dc650dSSadaf Ebrahimi src_r = dst_r;
2938*22dc650dSSadaf Ebrahimi invert ^= 0x1;
2939*22dc650dSSadaf Ebrahimi break;
2940*22dc650dSSadaf Ebrahimi case SLJIT_OVERFLOW:
2941*22dc650dSSadaf Ebrahimi case SLJIT_NOT_OVERFLOW:
2942*22dc650dSSadaf Ebrahimi if (compiler->status_flags_state & (SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB)) {
2943*22dc650dSSadaf Ebrahimi src_r = OTHER_FLAG;
2944*22dc650dSSadaf Ebrahimi break;
2945*22dc650dSSadaf Ebrahimi }
2946*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, SLTUI | RD(dst_r) | RJ(OTHER_FLAG) | IMM_I12(1)));
2947*22dc650dSSadaf Ebrahimi src_r = dst_r;
2948*22dc650dSSadaf Ebrahimi invert ^= 0x1;
2949*22dc650dSSadaf Ebrahimi break;
2950*22dc650dSSadaf Ebrahimi }
2951*22dc650dSSadaf Ebrahimi } else {
2952*22dc650dSSadaf Ebrahimi invert = 0;
2953*22dc650dSSadaf Ebrahimi src_r = OTHER_FLAG;
2954*22dc650dSSadaf Ebrahimi
2955*22dc650dSSadaf Ebrahimi switch (type) {
2956*22dc650dSSadaf Ebrahimi case SLJIT_ORDERED_NOT_EQUAL:
2957*22dc650dSSadaf Ebrahimi case SLJIT_ORDERED_LESS_EQUAL:
2958*22dc650dSSadaf Ebrahimi case SLJIT_ORDERED_GREATER_EQUAL:
2959*22dc650dSSadaf Ebrahimi case SLJIT_F_NOT_EQUAL:
2960*22dc650dSSadaf Ebrahimi case SLJIT_UNORDERED_OR_NOT_EQUAL:
2961*22dc650dSSadaf Ebrahimi case SLJIT_UNORDERED_OR_GREATER_EQUAL:
2962*22dc650dSSadaf Ebrahimi case SLJIT_UNORDERED_OR_LESS_EQUAL:
2963*22dc650dSSadaf Ebrahimi case SLJIT_F_LESS_EQUAL:
2964*22dc650dSSadaf Ebrahimi case SLJIT_F_GREATER_EQUAL:
2965*22dc650dSSadaf Ebrahimi case SLJIT_ORDERED:
2966*22dc650dSSadaf Ebrahimi invert = 1;
2967*22dc650dSSadaf Ebrahimi break;
2968*22dc650dSSadaf Ebrahimi }
2969*22dc650dSSadaf Ebrahimi }
2970*22dc650dSSadaf Ebrahimi
2971*22dc650dSSadaf Ebrahimi if (invert) {
2972*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, XORI | RD(dst_r) | RJ(src_r) | IMM_I12(1)));
2973*22dc650dSSadaf Ebrahimi src_r = dst_r;
2974*22dc650dSSadaf Ebrahimi }
2975*22dc650dSSadaf Ebrahimi
2976*22dc650dSSadaf Ebrahimi if (op < SLJIT_ADD) {
2977*22dc650dSSadaf Ebrahimi if (dst & SLJIT_MEM)
2978*22dc650dSSadaf Ebrahimi return emit_op_mem(compiler, mem_type, src_r, dst, dstw);
2979*22dc650dSSadaf Ebrahimi
2980*22dc650dSSadaf Ebrahimi if (src_r != dst_r)
2981*22dc650dSSadaf Ebrahimi return push_inst(compiler, ADDI_D | RD(dst_r) | RJ(src_r) | IMM_I12(0));
2982*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
2983*22dc650dSSadaf Ebrahimi }
2984*22dc650dSSadaf Ebrahimi
2985*22dc650dSSadaf Ebrahimi mem_type |= CUMULATIVE_OP | IMM_OP | ALT_KEEP_CACHE;
2986*22dc650dSSadaf Ebrahimi
2987*22dc650dSSadaf Ebrahimi if (dst & SLJIT_MEM)
2988*22dc650dSSadaf Ebrahimi return emit_op(compiler, saved_op, mem_type, dst, dstw, TMP_REG1, 0, src_r, 0);
2989*22dc650dSSadaf Ebrahimi return emit_op(compiler, saved_op, mem_type, dst, dstw, dst, dstw, src_r, 0);
2990*22dc650dSSadaf Ebrahimi }
2991*22dc650dSSadaf Ebrahimi
sljit_emit_select(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 dst_reg,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2_reg)2992*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_select(struct sljit_compiler *compiler, sljit_s32 type,
2993*22dc650dSSadaf Ebrahimi sljit_s32 dst_reg,
2994*22dc650dSSadaf Ebrahimi sljit_s32 src1, sljit_sw src1w,
2995*22dc650dSSadaf Ebrahimi sljit_s32 src2_reg)
2996*22dc650dSSadaf Ebrahimi {
2997*22dc650dSSadaf Ebrahimi sljit_ins *ptr;
2998*22dc650dSSadaf Ebrahimi sljit_uw size;
2999*22dc650dSSadaf Ebrahimi sljit_s32 inp_flags = ((type & SLJIT_32) ? INT_DATA : WORD_DATA) | LOAD_DATA;
3000*22dc650dSSadaf Ebrahimi
3001*22dc650dSSadaf Ebrahimi CHECK_ERROR();
3002*22dc650dSSadaf Ebrahimi CHECK(check_sljit_emit_select(compiler, type, dst_reg, src1, src1w, src2_reg));
3003*22dc650dSSadaf Ebrahimi ADJUST_LOCAL_OFFSET(src1, src1w);
3004*22dc650dSSadaf Ebrahimi
3005*22dc650dSSadaf Ebrahimi if (dst_reg != src2_reg) {
3006*22dc650dSSadaf Ebrahimi if (dst_reg == src1) {
3007*22dc650dSSadaf Ebrahimi src1 = src2_reg;
3008*22dc650dSSadaf Ebrahimi src1w = 0;
3009*22dc650dSSadaf Ebrahimi type ^= 0x1;
3010*22dc650dSSadaf Ebrahimi } else {
3011*22dc650dSSadaf Ebrahimi if (ADDRESSING_DEPENDS_ON(src1, dst_reg)) {
3012*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, ADDI_D | RD(TMP_REG1) | RJ(dst_reg) | IMM_I12(0)));
3013*22dc650dSSadaf Ebrahimi
3014*22dc650dSSadaf Ebrahimi if ((src1 & REG_MASK) == dst_reg)
3015*22dc650dSSadaf Ebrahimi src1 = (src1 & ~REG_MASK) | TMP_REG1;
3016*22dc650dSSadaf Ebrahimi
3017*22dc650dSSadaf Ebrahimi if (OFFS_REG(src1) == dst_reg)
3018*22dc650dSSadaf Ebrahimi src1 = (src1 & ~OFFS_REG_MASK) | TO_OFFS_REG(TMP_REG1);
3019*22dc650dSSadaf Ebrahimi }
3020*22dc650dSSadaf Ebrahimi
3021*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, ADDI_D | RD(dst_reg) | RJ(src2_reg) | IMM_I12(0)));
3022*22dc650dSSadaf Ebrahimi }
3023*22dc650dSSadaf Ebrahimi }
3024*22dc650dSSadaf Ebrahimi
3025*22dc650dSSadaf Ebrahimi size = compiler->size;
3026*22dc650dSSadaf Ebrahimi
3027*22dc650dSSadaf Ebrahimi ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins));
3028*22dc650dSSadaf Ebrahimi FAIL_IF(!ptr);
3029*22dc650dSSadaf Ebrahimi compiler->size++;
3030*22dc650dSSadaf Ebrahimi
3031*22dc650dSSadaf Ebrahimi if (src1 & SLJIT_MEM) {
3032*22dc650dSSadaf Ebrahimi FAIL_IF(emit_op_mem(compiler, inp_flags, dst_reg, src1, src1w));
3033*22dc650dSSadaf Ebrahimi } else if (src1 == SLJIT_IMM) {
3034*22dc650dSSadaf Ebrahimi if (type & SLJIT_32)
3035*22dc650dSSadaf Ebrahimi src1w = (sljit_s32)src1w;
3036*22dc650dSSadaf Ebrahimi FAIL_IF(load_immediate(compiler, dst_reg, src1w));
3037*22dc650dSSadaf Ebrahimi } else
3038*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, ADDI_D | RD(dst_reg) | RJ(src1) | IMM_I12(0)));
3039*22dc650dSSadaf Ebrahimi
3040*22dc650dSSadaf Ebrahimi *ptr = get_jump_instruction(type & ~SLJIT_32) | IMM_I16(compiler->size - size);
3041*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
3042*22dc650dSSadaf Ebrahimi }
3043*22dc650dSSadaf Ebrahimi
sljit_emit_fselect(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 dst_freg,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2_freg)3044*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fselect(struct sljit_compiler *compiler, sljit_s32 type,
3045*22dc650dSSadaf Ebrahimi sljit_s32 dst_freg,
3046*22dc650dSSadaf Ebrahimi sljit_s32 src1, sljit_sw src1w,
3047*22dc650dSSadaf Ebrahimi sljit_s32 src2_freg)
3048*22dc650dSSadaf Ebrahimi {
3049*22dc650dSSadaf Ebrahimi sljit_s32 invert = 0;
3050*22dc650dSSadaf Ebrahimi
3051*22dc650dSSadaf Ebrahimi CHECK_ERROR();
3052*22dc650dSSadaf Ebrahimi CHECK(check_sljit_emit_fselect(compiler, type, dst_freg, src1, src1w, src2_freg));
3053*22dc650dSSadaf Ebrahimi
3054*22dc650dSSadaf Ebrahimi ADJUST_LOCAL_OFFSET(src1, src1w);
3055*22dc650dSSadaf Ebrahimi
3056*22dc650dSSadaf Ebrahimi if ((type & ~SLJIT_32) == SLJIT_EQUAL || (type & ~SLJIT_32) == SLJIT_NOT_EQUAL) {
3057*22dc650dSSadaf Ebrahimi if ((type & ~SLJIT_32) == SLJIT_EQUAL)
3058*22dc650dSSadaf Ebrahimi invert = 1;
3059*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, MOVGR2CF | FCD(F_OTHER_FLAG) | RJ(EQUAL_FLAG)));
3060*22dc650dSSadaf Ebrahimi } else {
3061*22dc650dSSadaf Ebrahimi if (get_jump_instruction(type & ~SLJIT_32) == (BNE | RJ(OTHER_FLAG) | RD(TMP_ZERO)))
3062*22dc650dSSadaf Ebrahimi invert = 1;
3063*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, MOVGR2CF | FCD(F_OTHER_FLAG) | RJ(OTHER_FLAG)));
3064*22dc650dSSadaf Ebrahimi }
3065*22dc650dSSadaf Ebrahimi
3066*22dc650dSSadaf Ebrahimi if (src1 & SLJIT_MEM) {
3067*22dc650dSSadaf Ebrahimi FAIL_IF(emit_op_mem(compiler, FLOAT_DATA(type) | LOAD_DATA, TMP_FREG2, src1, src1w));
3068*22dc650dSSadaf Ebrahimi if (invert)
3069*22dc650dSSadaf Ebrahimi return push_inst(compiler, FSEL | FRD(dst_freg) | FRJ(TMP_FREG2) | FRK(src2_freg) | FCA(F_OTHER_FLAG));
3070*22dc650dSSadaf Ebrahimi return push_inst(compiler, FSEL | FRD(dst_freg) | FRJ(src2_freg) | FRK(TMP_FREG2) | FCA(F_OTHER_FLAG));
3071*22dc650dSSadaf Ebrahimi } else {
3072*22dc650dSSadaf Ebrahimi if (invert)
3073*22dc650dSSadaf Ebrahimi return push_inst(compiler, FSEL | FRD(dst_freg) | FRJ(src1) | FRK(src2_freg) | FCA(F_OTHER_FLAG));
3074*22dc650dSSadaf Ebrahimi return push_inst(compiler, FSEL | FRD(dst_freg) | FRJ(src2_freg) | FRK(src1) | FCA(F_OTHER_FLAG));
3075*22dc650dSSadaf Ebrahimi }
3076*22dc650dSSadaf Ebrahimi }
3077*22dc650dSSadaf Ebrahimi
3078*22dc650dSSadaf Ebrahimi #undef FLOAT_DATA
3079*22dc650dSSadaf Ebrahimi
sljit_emit_mem(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 reg,sljit_s32 mem,sljit_sw memw)3080*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compiler, sljit_s32 type,
3081*22dc650dSSadaf Ebrahimi sljit_s32 reg,
3082*22dc650dSSadaf Ebrahimi sljit_s32 mem, sljit_sw memw)
3083*22dc650dSSadaf Ebrahimi {
3084*22dc650dSSadaf Ebrahimi sljit_s32 flags;
3085*22dc650dSSadaf Ebrahimi
3086*22dc650dSSadaf Ebrahimi CHECK_ERROR();
3087*22dc650dSSadaf Ebrahimi CHECK(check_sljit_emit_mem(compiler, type, reg, mem, memw));
3088*22dc650dSSadaf Ebrahimi
3089*22dc650dSSadaf Ebrahimi if (!(reg & REG_PAIR_MASK))
3090*22dc650dSSadaf Ebrahimi return sljit_emit_mem_unaligned(compiler, type, reg, mem, memw);
3091*22dc650dSSadaf Ebrahimi
3092*22dc650dSSadaf Ebrahimi if (SLJIT_UNLIKELY(mem & OFFS_REG_MASK)) {
3093*22dc650dSSadaf Ebrahimi memw &= 0x3;
3094*22dc650dSSadaf Ebrahimi
3095*22dc650dSSadaf Ebrahimi if (SLJIT_UNLIKELY(memw != 0)) {
3096*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, SLLI_D | RD(TMP_REG1) | RJ(OFFS_REG(mem)) | IMM_I12(memw)));
3097*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, ADD_D| RD(TMP_REG1) | RJ(TMP_REG1) | RK(mem & REG_MASK)));
3098*22dc650dSSadaf Ebrahimi } else
3099*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, ADD_D| RD(TMP_REG1) | RJ(mem & REG_MASK) | RK(OFFS_REG(mem))));
3100*22dc650dSSadaf Ebrahimi
3101*22dc650dSSadaf Ebrahimi mem = TMP_REG1;
3102*22dc650dSSadaf Ebrahimi memw = 0;
3103*22dc650dSSadaf Ebrahimi } else if (memw > I12_MAX - SSIZE_OF(sw) || memw < I12_MIN) {
3104*22dc650dSSadaf Ebrahimi if (((memw + 0x800) & 0xfff) <= 0xfff - SSIZE_OF(sw)) {
3105*22dc650dSSadaf Ebrahimi FAIL_IF(load_immediate(compiler, TMP_REG1, TO_ARGW_HI(memw)));
3106*22dc650dSSadaf Ebrahimi memw &= 0xfff;
3107*22dc650dSSadaf Ebrahimi } else {
3108*22dc650dSSadaf Ebrahimi FAIL_IF(load_immediate(compiler, TMP_REG1, memw));
3109*22dc650dSSadaf Ebrahimi memw = 0;
3110*22dc650dSSadaf Ebrahimi }
3111*22dc650dSSadaf Ebrahimi
3112*22dc650dSSadaf Ebrahimi if (mem & REG_MASK)
3113*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, ADD_D| RD(TMP_REG1) | RJ(TMP_REG1) | RK(mem & REG_MASK)));
3114*22dc650dSSadaf Ebrahimi
3115*22dc650dSSadaf Ebrahimi mem = TMP_REG1;
3116*22dc650dSSadaf Ebrahimi } else {
3117*22dc650dSSadaf Ebrahimi mem &= REG_MASK;
3118*22dc650dSSadaf Ebrahimi memw &= 0xfff;
3119*22dc650dSSadaf Ebrahimi }
3120*22dc650dSSadaf Ebrahimi
3121*22dc650dSSadaf Ebrahimi SLJIT_ASSERT((memw >= 0 && memw <= I12_MAX - SSIZE_OF(sw)) || (memw > I12_MAX && memw <= 0xfff));
3122*22dc650dSSadaf Ebrahimi
3123*22dc650dSSadaf Ebrahimi if (!(type & SLJIT_MEM_STORE) && mem == REG_PAIR_FIRST(reg)) {
3124*22dc650dSSadaf Ebrahimi FAIL_IF(push_mem_inst(compiler, WORD_DATA | LOAD_DATA, REG_PAIR_SECOND(reg), SLJIT_MEM1(mem), (memw + SSIZE_OF(sw)) & 0xfff));
3125*22dc650dSSadaf Ebrahimi return push_mem_inst(compiler, WORD_DATA | LOAD_DATA, REG_PAIR_FIRST(reg), SLJIT_MEM1(mem), memw);
3126*22dc650dSSadaf Ebrahimi }
3127*22dc650dSSadaf Ebrahimi
3128*22dc650dSSadaf Ebrahimi flags = WORD_DATA | (!(type & SLJIT_MEM_STORE) ? LOAD_DATA : 0);
3129*22dc650dSSadaf Ebrahimi
3130*22dc650dSSadaf Ebrahimi FAIL_IF(push_mem_inst(compiler, flags, REG_PAIR_FIRST(reg), SLJIT_MEM1(mem), memw));
3131*22dc650dSSadaf Ebrahimi return push_mem_inst(compiler, flags, REG_PAIR_SECOND(reg), SLJIT_MEM1(mem), (memw + SSIZE_OF(sw)) & 0xfff);
3132*22dc650dSSadaf Ebrahimi }
3133*22dc650dSSadaf Ebrahimi
3134*22dc650dSSadaf Ebrahimi #undef TO_ARGW_HI
3135*22dc650dSSadaf Ebrahimi
sljit_emit_simd_mem_offset(struct sljit_compiler * compiler,sljit_s32 * mem_ptr,sljit_sw memw)3136*22dc650dSSadaf Ebrahimi static sljit_s32 sljit_emit_simd_mem_offset(struct sljit_compiler *compiler, sljit_s32 *mem_ptr, sljit_sw memw)
3137*22dc650dSSadaf Ebrahimi {
3138*22dc650dSSadaf Ebrahimi sljit_s32 mem = *mem_ptr;
3139*22dc650dSSadaf Ebrahimi
3140*22dc650dSSadaf Ebrahimi if (SLJIT_UNLIKELY(mem & OFFS_REG_MASK)) {
3141*22dc650dSSadaf Ebrahimi *mem_ptr = TMP_REG3;
3142*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, SLLI_D | RD(TMP_REG3) | RJ(OFFS_REG(mem)) | IMM_I12(memw & 0x3)));
3143*22dc650dSSadaf Ebrahimi return push_inst(compiler, ADD_D | RD(TMP_REG3) | RJ(TMP_REG3) | RK(mem & REG_MASK));
3144*22dc650dSSadaf Ebrahimi }
3145*22dc650dSSadaf Ebrahimi
3146*22dc650dSSadaf Ebrahimi if (!(mem & REG_MASK)) {
3147*22dc650dSSadaf Ebrahimi *mem_ptr = TMP_REG3;
3148*22dc650dSSadaf Ebrahimi return load_immediate(compiler, TMP_REG3, memw);
3149*22dc650dSSadaf Ebrahimi }
3150*22dc650dSSadaf Ebrahimi
3151*22dc650dSSadaf Ebrahimi mem &= REG_MASK;
3152*22dc650dSSadaf Ebrahimi
3153*22dc650dSSadaf Ebrahimi if (memw == 0) {
3154*22dc650dSSadaf Ebrahimi *mem_ptr = mem;
3155*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
3156*22dc650dSSadaf Ebrahimi }
3157*22dc650dSSadaf Ebrahimi
3158*22dc650dSSadaf Ebrahimi *mem_ptr = TMP_REG3;
3159*22dc650dSSadaf Ebrahimi
3160*22dc650dSSadaf Ebrahimi FAIL_IF(load_immediate(compiler, TMP_REG3, memw));
3161*22dc650dSSadaf Ebrahimi return push_inst(compiler, ADD_D | RD(TMP_REG3) | RJ(TMP_REG3) | RK(mem));
3162*22dc650dSSadaf Ebrahimi }
3163*22dc650dSSadaf Ebrahimi
sljit_emit_simd_mov(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 freg,sljit_s32 srcdst,sljit_sw srcdstw)3164*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_mov(struct sljit_compiler *compiler, sljit_s32 type,
3165*22dc650dSSadaf Ebrahimi sljit_s32 freg,
3166*22dc650dSSadaf Ebrahimi sljit_s32 srcdst, sljit_sw srcdstw)
3167*22dc650dSSadaf Ebrahimi {
3168*22dc650dSSadaf Ebrahimi sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
3169*22dc650dSSadaf Ebrahimi sljit_ins ins = 0;
3170*22dc650dSSadaf Ebrahimi
3171*22dc650dSSadaf Ebrahimi CHECK_ERROR();
3172*22dc650dSSadaf Ebrahimi CHECK(check_sljit_emit_simd_mov(compiler, type, freg, srcdst, srcdstw));
3173*22dc650dSSadaf Ebrahimi
3174*22dc650dSSadaf Ebrahimi ADJUST_LOCAL_OFFSET(srcdst, srcdstw);
3175*22dc650dSSadaf Ebrahimi
3176*22dc650dSSadaf Ebrahimi if (reg_size != 5 && reg_size != 4)
3177*22dc650dSSadaf Ebrahimi return SLJIT_ERR_UNSUPPORTED;
3178*22dc650dSSadaf Ebrahimi
3179*22dc650dSSadaf Ebrahimi if (reg_size == 5 && !(get_cpu_features(GET_HWCAP) & LOONGARCH_HWCAP_LASX))
3180*22dc650dSSadaf Ebrahimi return SLJIT_ERR_UNSUPPORTED;
3181*22dc650dSSadaf Ebrahimi
3182*22dc650dSSadaf Ebrahimi if (type & SLJIT_SIMD_TEST)
3183*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
3184*22dc650dSSadaf Ebrahimi
3185*22dc650dSSadaf Ebrahimi if (!(srcdst & SLJIT_MEM)) {
3186*22dc650dSSadaf Ebrahimi if (type & SLJIT_SIMD_STORE)
3187*22dc650dSSadaf Ebrahimi ins = FRD(srcdst) | FRJ(freg) | FRK(freg);
3188*22dc650dSSadaf Ebrahimi else
3189*22dc650dSSadaf Ebrahimi ins = FRD(freg) | FRJ(srcdst) | FRK(srcdst);
3190*22dc650dSSadaf Ebrahimi
3191*22dc650dSSadaf Ebrahimi if (reg_size == 5)
3192*22dc650dSSadaf Ebrahimi ins |= VOR_V | (sljit_ins)1 << 26;
3193*22dc650dSSadaf Ebrahimi else
3194*22dc650dSSadaf Ebrahimi ins |= VOR_V;
3195*22dc650dSSadaf Ebrahimi
3196*22dc650dSSadaf Ebrahimi return push_inst(compiler, ins);
3197*22dc650dSSadaf Ebrahimi }
3198*22dc650dSSadaf Ebrahimi
3199*22dc650dSSadaf Ebrahimi ins = (type & SLJIT_SIMD_STORE) ? VST : VLD;
3200*22dc650dSSadaf Ebrahimi
3201*22dc650dSSadaf Ebrahimi if (reg_size == 5)
3202*22dc650dSSadaf Ebrahimi ins = (type & SLJIT_SIMD_STORE) ? XVST : XVLD;
3203*22dc650dSSadaf Ebrahimi
3204*22dc650dSSadaf Ebrahimi if (FAST_IS_REG(srcdst) && srcdst >= 0 && (srcdstw >= I12_MIN && srcdstw <= I12_MAX))
3205*22dc650dSSadaf Ebrahimi return push_inst(compiler, ins | FRD(freg) | RJ((sljit_u8)srcdst) | IMM_I12(srcdstw));
3206*22dc650dSSadaf Ebrahimi else {
3207*22dc650dSSadaf Ebrahimi FAIL_IF(sljit_emit_simd_mem_offset(compiler, &srcdst, srcdstw));
3208*22dc650dSSadaf Ebrahimi return push_inst(compiler, ins | FRD(freg) | RJ(srcdst) | IMM_I12(0));
3209*22dc650dSSadaf Ebrahimi }
3210*22dc650dSSadaf Ebrahimi }
3211*22dc650dSSadaf Ebrahimi
sljit_emit_simd_replicate(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 freg,sljit_s32 src,sljit_sw srcw)3212*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_replicate(struct sljit_compiler *compiler, sljit_s32 type,
3213*22dc650dSSadaf Ebrahimi sljit_s32 freg,
3214*22dc650dSSadaf Ebrahimi sljit_s32 src, sljit_sw srcw)
3215*22dc650dSSadaf Ebrahimi {
3216*22dc650dSSadaf Ebrahimi sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
3217*22dc650dSSadaf Ebrahimi sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
3218*22dc650dSSadaf Ebrahimi sljit_ins ins = 0;
3219*22dc650dSSadaf Ebrahimi
3220*22dc650dSSadaf Ebrahimi CHECK_ERROR();
3221*22dc650dSSadaf Ebrahimi CHECK(check_sljit_emit_simd_replicate(compiler, type, freg, src, srcw));
3222*22dc650dSSadaf Ebrahimi
3223*22dc650dSSadaf Ebrahimi ADJUST_LOCAL_OFFSET(src, srcw);
3224*22dc650dSSadaf Ebrahimi
3225*22dc650dSSadaf Ebrahimi if (reg_size != 5 && reg_size != 4)
3226*22dc650dSSadaf Ebrahimi return SLJIT_ERR_UNSUPPORTED;
3227*22dc650dSSadaf Ebrahimi
3228*22dc650dSSadaf Ebrahimi if (reg_size == 5 && !(get_cpu_features(GET_HWCAP) & LOONGARCH_HWCAP_LASX))
3229*22dc650dSSadaf Ebrahimi return SLJIT_ERR_UNSUPPORTED;
3230*22dc650dSSadaf Ebrahimi
3231*22dc650dSSadaf Ebrahimi if (type & SLJIT_SIMD_TEST)
3232*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
3233*22dc650dSSadaf Ebrahimi
3234*22dc650dSSadaf Ebrahimi if (src & SLJIT_MEM) {
3235*22dc650dSSadaf Ebrahimi FAIL_IF(sljit_emit_simd_mem_offset(compiler, &src, srcw));
3236*22dc650dSSadaf Ebrahimi
3237*22dc650dSSadaf Ebrahimi if (reg_size == 5)
3238*22dc650dSSadaf Ebrahimi ins = (sljit_ins)1 << 25;
3239*22dc650dSSadaf Ebrahimi
3240*22dc650dSSadaf Ebrahimi return push_inst(compiler, VLDREPL | ins | FRD(freg) | RJ(src) | (sljit_ins)1 << (23 - elem_size));
3241*22dc650dSSadaf Ebrahimi }
3242*22dc650dSSadaf Ebrahimi
3243*22dc650dSSadaf Ebrahimi if (reg_size == 5)
3244*22dc650dSSadaf Ebrahimi ins = (sljit_ins)1 << 26;
3245*22dc650dSSadaf Ebrahimi
3246*22dc650dSSadaf Ebrahimi if (type & SLJIT_SIMD_FLOAT) {
3247*22dc650dSSadaf Ebrahimi if (src == SLJIT_IMM)
3248*22dc650dSSadaf Ebrahimi return push_inst(compiler, VREPLGR2VR | ins | FRD(freg) | RJ(TMP_ZERO) | (sljit_ins)elem_size << 10);
3249*22dc650dSSadaf Ebrahimi
3250*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, VREPLVE | ins | FRD(freg) | FRJ(src) | RK(TMP_ZERO) | (sljit_ins)elem_size << 15));
3251*22dc650dSSadaf Ebrahimi
3252*22dc650dSSadaf Ebrahimi if (reg_size == 5) {
3253*22dc650dSSadaf Ebrahimi ins = (sljit_ins)(0x44 << 10);
3254*22dc650dSSadaf Ebrahimi return push_inst(compiler, XVPERMI | ins | FRD(freg) | FRJ(freg));
3255*22dc650dSSadaf Ebrahimi }
3256*22dc650dSSadaf Ebrahimi
3257*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
3258*22dc650dSSadaf Ebrahimi }
3259*22dc650dSSadaf Ebrahimi
3260*22dc650dSSadaf Ebrahimi ins |= VREPLGR2VR | (sljit_ins)elem_size << 10;
3261*22dc650dSSadaf Ebrahimi
3262*22dc650dSSadaf Ebrahimi if (src == SLJIT_IMM) {
3263*22dc650dSSadaf Ebrahimi FAIL_IF(load_immediate(compiler, TMP_REG2, srcw));
3264*22dc650dSSadaf Ebrahimi src = TMP_REG2;
3265*22dc650dSSadaf Ebrahimi }
3266*22dc650dSSadaf Ebrahimi
3267*22dc650dSSadaf Ebrahimi return push_inst(compiler, ins | FRD(freg) | RJ(src));
3268*22dc650dSSadaf Ebrahimi }
3269*22dc650dSSadaf Ebrahimi
sljit_emit_simd_lane_mov(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 freg,sljit_s32 lane_index,sljit_s32 srcdst,sljit_sw srcdstw)3270*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_mov(struct sljit_compiler *compiler, sljit_s32 type,
3271*22dc650dSSadaf Ebrahimi sljit_s32 freg, sljit_s32 lane_index,
3272*22dc650dSSadaf Ebrahimi sljit_s32 srcdst, sljit_sw srcdstw)
3273*22dc650dSSadaf Ebrahimi {
3274*22dc650dSSadaf Ebrahimi sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
3275*22dc650dSSadaf Ebrahimi sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
3276*22dc650dSSadaf Ebrahimi sljit_ins ins = 0;
3277*22dc650dSSadaf Ebrahimi
3278*22dc650dSSadaf Ebrahimi CHECK_ERROR();
3279*22dc650dSSadaf Ebrahimi CHECK(check_sljit_emit_simd_lane_mov(compiler, type, freg, lane_index, srcdst, srcdstw));
3280*22dc650dSSadaf Ebrahimi
3281*22dc650dSSadaf Ebrahimi ADJUST_LOCAL_OFFSET(srcdst, srcdstw);
3282*22dc650dSSadaf Ebrahimi
3283*22dc650dSSadaf Ebrahimi if (reg_size != 5 && reg_size != 4)
3284*22dc650dSSadaf Ebrahimi return SLJIT_ERR_UNSUPPORTED;
3285*22dc650dSSadaf Ebrahimi
3286*22dc650dSSadaf Ebrahimi if (reg_size == 5 && !(get_cpu_features(GET_HWCAP) & LOONGARCH_HWCAP_LASX))
3287*22dc650dSSadaf Ebrahimi return SLJIT_ERR_UNSUPPORTED;
3288*22dc650dSSadaf Ebrahimi
3289*22dc650dSSadaf Ebrahimi if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3))
3290*22dc650dSSadaf Ebrahimi return SLJIT_ERR_UNSUPPORTED;
3291*22dc650dSSadaf Ebrahimi
3292*22dc650dSSadaf Ebrahimi if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3))
3293*22dc650dSSadaf Ebrahimi return SLJIT_ERR_UNSUPPORTED;
3294*22dc650dSSadaf Ebrahimi
3295*22dc650dSSadaf Ebrahimi if (type & SLJIT_SIMD_TEST)
3296*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
3297*22dc650dSSadaf Ebrahimi
3298*22dc650dSSadaf Ebrahimi if (type & SLJIT_SIMD_LANE_ZERO) {
3299*22dc650dSSadaf Ebrahimi ins = (reg_size == 5) ? ((sljit_ins)1 << 26) : 0;
3300*22dc650dSSadaf Ebrahimi
3301*22dc650dSSadaf Ebrahimi if ((type & SLJIT_SIMD_FLOAT) && freg == srcdst) {
3302*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, VOR_V | ins | FRD(TMP_FREG1) | FRJ(freg) | FRK(freg)));
3303*22dc650dSSadaf Ebrahimi srcdst = TMP_FREG1;
3304*22dc650dSSadaf Ebrahimi srcdstw = 0;
3305*22dc650dSSadaf Ebrahimi }
3306*22dc650dSSadaf Ebrahimi
3307*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, VXOR_V | ins | FRD(freg) | FRJ(freg) | FRK(freg)));
3308*22dc650dSSadaf Ebrahimi }
3309*22dc650dSSadaf Ebrahimi
3310*22dc650dSSadaf Ebrahimi if (srcdst & SLJIT_MEM) {
3311*22dc650dSSadaf Ebrahimi FAIL_IF(sljit_emit_simd_mem_offset(compiler, &srcdst, srcdstw));
3312*22dc650dSSadaf Ebrahimi
3313*22dc650dSSadaf Ebrahimi if (reg_size == 5)
3314*22dc650dSSadaf Ebrahimi ins = (sljit_ins)1 << 25;
3315*22dc650dSSadaf Ebrahimi
3316*22dc650dSSadaf Ebrahimi if (type & SLJIT_SIMD_STORE) {
3317*22dc650dSSadaf Ebrahimi ins |= (sljit_ins)lane_index << 18 | (sljit_ins)(1 << (23 - elem_size));
3318*22dc650dSSadaf Ebrahimi return push_inst(compiler, VSTELM | ins | FRD(freg) | RJ(srcdst));
3319*22dc650dSSadaf Ebrahimi } else {
3320*22dc650dSSadaf Ebrahimi emit_op_mem(compiler, (elem_size == 3 ? WORD_DATA : (elem_size == 2 ? INT_DATA : (elem_size == 1 ? HALF_DATA : BYTE_DATA))) | LOAD_DATA, TMP_REG1, srcdst | SLJIT_MEM, 0);
3321*22dc650dSSadaf Ebrahimi srcdst = TMP_REG1;
3322*22dc650dSSadaf Ebrahimi ins = (sljit_ins)(0x3f ^ (0x1f >> elem_size)) << 10;
3323*22dc650dSSadaf Ebrahimi
3324*22dc650dSSadaf Ebrahimi if (reg_size == 5) {
3325*22dc650dSSadaf Ebrahimi if (elem_size < 2) {
3326*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, VOR_V | (sljit_ins)1 << 26 | FRD(TMP_FREG1) | FRJ(freg) | FRK(freg)));
3327*22dc650dSSadaf Ebrahimi if (lane_index >= (2 << (3 - elem_size))) {
3328*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, XVPERMI | (sljit_ins)1 << 18 | FRD(TMP_FREG1) | FRJ(freg) | IMM_I8(1)));
3329*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, VINSGR2VR | ins | FRD(TMP_FREG1) | RJ(srcdst) | IMM_V(lane_index % (2 << (3 - elem_size)))));
3330*22dc650dSSadaf Ebrahimi return push_inst(compiler, XVPERMI | (sljit_ins)1 << 18 | FRD(freg) | FRJ(TMP_FREG1) | IMM_I8(2));
3331*22dc650dSSadaf Ebrahimi } else {
3332*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, VINSGR2VR | ins | FRD(freg) | RJ(srcdst) | IMM_V(lane_index)));
3333*22dc650dSSadaf Ebrahimi return push_inst(compiler, XVPERMI | (sljit_ins)1 << 18 | FRD(freg) | FRJ(TMP_FREG1) | IMM_I8(18));
3334*22dc650dSSadaf Ebrahimi }
3335*22dc650dSSadaf Ebrahimi } else
3336*22dc650dSSadaf Ebrahimi ins = (sljit_ins)(0x3f ^ (0x3f >> elem_size)) << 10 | (sljit_ins)1 << 26;
3337*22dc650dSSadaf Ebrahimi }
3338*22dc650dSSadaf Ebrahimi
3339*22dc650dSSadaf Ebrahimi return push_inst(compiler, VINSGR2VR | ins | FRD(freg) | RJ(srcdst) | IMM_V(lane_index));
3340*22dc650dSSadaf Ebrahimi }
3341*22dc650dSSadaf Ebrahimi }
3342*22dc650dSSadaf Ebrahimi
3343*22dc650dSSadaf Ebrahimi if (type & SLJIT_SIMD_FLOAT) {
3344*22dc650dSSadaf Ebrahimi ins = (reg_size == 5) ? (sljit_ins)(0x3f ^ (0x3f >> elem_size)) << 10 | (sljit_ins)1 << 26 : (sljit_ins)(0x3f ^ (0x1f >> elem_size)) << 10;
3345*22dc650dSSadaf Ebrahimi
3346*22dc650dSSadaf Ebrahimi if (type & SLJIT_SIMD_STORE) {
3347*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, VPICKVE2GR_U | ins | RD(TMP_REG1) | FRJ(freg) | IMM_V(lane_index)));
3348*22dc650dSSadaf Ebrahimi return push_inst(compiler, VINSGR2VR | ins | FRD(srcdst) | RJ(TMP_REG1) | IMM_V(0));
3349*22dc650dSSadaf Ebrahimi } else {
3350*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, VPICKVE2GR_U | ins | RD(TMP_REG1) | FRJ(srcdst) | IMM_V(0)));
3351*22dc650dSSadaf Ebrahimi return push_inst(compiler, VINSGR2VR | ins | FRD(freg) | RJ(TMP_REG1) | IMM_V(lane_index));
3352*22dc650dSSadaf Ebrahimi }
3353*22dc650dSSadaf Ebrahimi }
3354*22dc650dSSadaf Ebrahimi
3355*22dc650dSSadaf Ebrahimi if (srcdst == SLJIT_IMM) {
3356*22dc650dSSadaf Ebrahimi FAIL_IF(load_immediate(compiler, TMP_REG1, srcdstw));
3357*22dc650dSSadaf Ebrahimi srcdst = TMP_REG1;
3358*22dc650dSSadaf Ebrahimi }
3359*22dc650dSSadaf Ebrahimi
3360*22dc650dSSadaf Ebrahimi if (type & SLJIT_SIMD_STORE) {
3361*22dc650dSSadaf Ebrahimi ins = (sljit_ins)(0x3f ^ (0x1f >> elem_size)) << 10;
3362*22dc650dSSadaf Ebrahimi
3363*22dc650dSSadaf Ebrahimi if (type & SLJIT_SIMD_LANE_SIGNED)
3364*22dc650dSSadaf Ebrahimi ins |= (sljit_ins)(VPICKVE2GR_U ^ (0x7 << 18));
3365*22dc650dSSadaf Ebrahimi else
3366*22dc650dSSadaf Ebrahimi ins |= VPICKVE2GR_U;
3367*22dc650dSSadaf Ebrahimi
3368*22dc650dSSadaf Ebrahimi if (reg_size == 5) {
3369*22dc650dSSadaf Ebrahimi if (elem_size < 2) {
3370*22dc650dSSadaf Ebrahimi if (lane_index >= (2 << (3 - elem_size))) {
3371*22dc650dSSadaf Ebrahimi if (type & SLJIT_SIMD_LANE_SIGNED)
3372*22dc650dSSadaf Ebrahimi ins |= (sljit_ins)(VPICKVE2GR_U ^ (0x7 << 18));
3373*22dc650dSSadaf Ebrahimi else
3374*22dc650dSSadaf Ebrahimi ins |= VPICKVE2GR_U;
3375*22dc650dSSadaf Ebrahimi
3376*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, VOR_V | (sljit_ins)1 << 26 | FRD(TMP_FREG1) | FRJ(freg) | FRK(freg)));
3377*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, XVPERMI | (sljit_ins)1 << 18 | FRD(TMP_FREG1) | FRJ(freg) | IMM_I8(1)));
3378*22dc650dSSadaf Ebrahimi return push_inst(compiler, ins | RD(srcdst) | FRJ(TMP_FREG1) | IMM_V(lane_index % (2 << (3 - elem_size))));
3379*22dc650dSSadaf Ebrahimi }
3380*22dc650dSSadaf Ebrahimi } else {
3381*22dc650dSSadaf Ebrahimi ins ^= (sljit_ins)1 << (15 - elem_size);
3382*22dc650dSSadaf Ebrahimi ins |= (sljit_ins)1 << 26;
3383*22dc650dSSadaf Ebrahimi }
3384*22dc650dSSadaf Ebrahimi }
3385*22dc650dSSadaf Ebrahimi
3386*22dc650dSSadaf Ebrahimi return push_inst(compiler, ins | RD(srcdst) | FRJ(freg) | IMM_V(lane_index));
3387*22dc650dSSadaf Ebrahimi } else {
3388*22dc650dSSadaf Ebrahimi ins = (sljit_ins)(0x3f ^ (0x1f >> elem_size)) << 10;
3389*22dc650dSSadaf Ebrahimi
3390*22dc650dSSadaf Ebrahimi if (reg_size == 5) {
3391*22dc650dSSadaf Ebrahimi if (elem_size < 2) {
3392*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, VOR_V | (sljit_ins)1 << 26 | FRD(TMP_FREG1) | FRJ(freg) | FRK(freg)));
3393*22dc650dSSadaf Ebrahimi if (lane_index >= (2 << (3 - elem_size))) {
3394*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, XVPERMI | (sljit_ins)1 << 18 | FRD(TMP_FREG1) | FRJ(freg) | IMM_I8(1)));
3395*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, VINSGR2VR | ins | FRD(TMP_FREG1) | RJ(srcdst) | IMM_V(lane_index % (2 << (3 - elem_size)))));
3396*22dc650dSSadaf Ebrahimi return push_inst(compiler, XVPERMI | (sljit_ins)1 << 18 | FRD(freg) | FRJ(TMP_FREG1) | IMM_I8(2));
3397*22dc650dSSadaf Ebrahimi } else {
3398*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, VINSGR2VR | ins | FRD(freg) | RJ(srcdst) | IMM_V(lane_index)));
3399*22dc650dSSadaf Ebrahimi return push_inst(compiler, XVPERMI | (sljit_ins)1 << 18 | FRD(freg) | FRJ(TMP_FREG1) | IMM_I8(18));
3400*22dc650dSSadaf Ebrahimi }
3401*22dc650dSSadaf Ebrahimi } else
3402*22dc650dSSadaf Ebrahimi ins = (sljit_ins)(0x3f ^ (0x3f >> elem_size)) << 10 | (sljit_ins)1 << 26;
3403*22dc650dSSadaf Ebrahimi }
3404*22dc650dSSadaf Ebrahimi
3405*22dc650dSSadaf Ebrahimi return push_inst(compiler, VINSGR2VR | ins | FRD(freg) | RJ(srcdst) | IMM_V(lane_index));
3406*22dc650dSSadaf Ebrahimi }
3407*22dc650dSSadaf Ebrahimi
3408*22dc650dSSadaf Ebrahimi return SLJIT_ERR_UNSUPPORTED;
3409*22dc650dSSadaf Ebrahimi }
3410*22dc650dSSadaf Ebrahimi
sljit_emit_simd_lane_replicate(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 freg,sljit_s32 src,sljit_s32 src_lane_index)3411*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_replicate(struct sljit_compiler *compiler, sljit_s32 type,
3412*22dc650dSSadaf Ebrahimi sljit_s32 freg,
3413*22dc650dSSadaf Ebrahimi sljit_s32 src, sljit_s32 src_lane_index)
3414*22dc650dSSadaf Ebrahimi {
3415*22dc650dSSadaf Ebrahimi sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
3416*22dc650dSSadaf Ebrahimi sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
3417*22dc650dSSadaf Ebrahimi sljit_ins ins = 0;
3418*22dc650dSSadaf Ebrahimi
3419*22dc650dSSadaf Ebrahimi CHECK_ERROR();
3420*22dc650dSSadaf Ebrahimi CHECK(check_sljit_emit_simd_lane_replicate(compiler, type, freg, src, src_lane_index));
3421*22dc650dSSadaf Ebrahimi
3422*22dc650dSSadaf Ebrahimi if (reg_size != 5 && reg_size != 4)
3423*22dc650dSSadaf Ebrahimi return SLJIT_ERR_UNSUPPORTED;
3424*22dc650dSSadaf Ebrahimi
3425*22dc650dSSadaf Ebrahimi if (reg_size == 5 && !(get_cpu_features(GET_HWCAP) & LOONGARCH_HWCAP_LASX))
3426*22dc650dSSadaf Ebrahimi return SLJIT_ERR_UNSUPPORTED;
3427*22dc650dSSadaf Ebrahimi
3428*22dc650dSSadaf Ebrahimi if (type & SLJIT_SIMD_TEST)
3429*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
3430*22dc650dSSadaf Ebrahimi
3431*22dc650dSSadaf Ebrahimi ins = (sljit_ins)(0x3f ^ (0x1f >> elem_size)) << 10;
3432*22dc650dSSadaf Ebrahimi
3433*22dc650dSSadaf Ebrahimi if (reg_size == 5) {
3434*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, VREPLVEI | (sljit_ins)1 << 26 | ins | FRD(freg) | FRJ(src) | IMM_V(src_lane_index % (2 << (3 - elem_size)))));
3435*22dc650dSSadaf Ebrahimi
3436*22dc650dSSadaf Ebrahimi ins = (src_lane_index < (2 << (3 - elem_size))) ? (sljit_ins)(0x44 << 10) : (sljit_ins)(0xee << 10);
3437*22dc650dSSadaf Ebrahimi
3438*22dc650dSSadaf Ebrahimi return push_inst(compiler, XVPERMI | ins | FRD(freg) | FRJ(freg));
3439*22dc650dSSadaf Ebrahimi }
3440*22dc650dSSadaf Ebrahimi
3441*22dc650dSSadaf Ebrahimi return push_inst(compiler, VREPLVEI | ins | FRD(freg) | FRJ(src) | IMM_V(src_lane_index));
3442*22dc650dSSadaf Ebrahimi }
3443*22dc650dSSadaf Ebrahimi
sljit_emit_simd_extend(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 freg,sljit_s32 src,sljit_sw srcw)3444*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_extend(struct sljit_compiler *compiler, sljit_s32 type,
3445*22dc650dSSadaf Ebrahimi sljit_s32 freg,
3446*22dc650dSSadaf Ebrahimi sljit_s32 src, sljit_sw srcw)
3447*22dc650dSSadaf Ebrahimi {
3448*22dc650dSSadaf Ebrahimi sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
3449*22dc650dSSadaf Ebrahimi sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
3450*22dc650dSSadaf Ebrahimi sljit_s32 elem2_size = SLJIT_SIMD_GET_ELEM2_SIZE(type);
3451*22dc650dSSadaf Ebrahimi sljit_ins ins = 0;
3452*22dc650dSSadaf Ebrahimi
3453*22dc650dSSadaf Ebrahimi CHECK_ERROR();
3454*22dc650dSSadaf Ebrahimi CHECK(check_sljit_emit_simd_extend(compiler, type, freg, src, srcw));
3455*22dc650dSSadaf Ebrahimi
3456*22dc650dSSadaf Ebrahimi ADJUST_LOCAL_OFFSET(src, srcw);
3457*22dc650dSSadaf Ebrahimi
3458*22dc650dSSadaf Ebrahimi if (reg_size != 5 && reg_size != 4)
3459*22dc650dSSadaf Ebrahimi return SLJIT_ERR_UNSUPPORTED;
3460*22dc650dSSadaf Ebrahimi
3461*22dc650dSSadaf Ebrahimi if (reg_size == 5 && !(get_cpu_features(GET_HWCAP) & LOONGARCH_HWCAP_LASX))
3462*22dc650dSSadaf Ebrahimi return SLJIT_ERR_UNSUPPORTED;
3463*22dc650dSSadaf Ebrahimi
3464*22dc650dSSadaf Ebrahimi if (type & SLJIT_SIMD_TEST)
3465*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
3466*22dc650dSSadaf Ebrahimi
3467*22dc650dSSadaf Ebrahimi if (src & SLJIT_MEM) {
3468*22dc650dSSadaf Ebrahimi ins = (type & SLJIT_SIMD_STORE) ? VST : VLD;
3469*22dc650dSSadaf Ebrahimi
3470*22dc650dSSadaf Ebrahimi if (reg_size == 5)
3471*22dc650dSSadaf Ebrahimi ins = (type & SLJIT_SIMD_STORE) ? XVST : XVLD;
3472*22dc650dSSadaf Ebrahimi
3473*22dc650dSSadaf Ebrahimi if (FAST_IS_REG(src) && src >= 0 && (srcw >= I12_MIN && srcw <= I12_MAX))
3474*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, ins | FRD(freg) | RJ(src) | IMM_I12(srcw)));
3475*22dc650dSSadaf Ebrahimi else {
3476*22dc650dSSadaf Ebrahimi FAIL_IF(sljit_emit_simd_mem_offset(compiler, &src, srcw));
3477*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, ins | FRD(freg) | RJ(src) | IMM_I12(0)));
3478*22dc650dSSadaf Ebrahimi }
3479*22dc650dSSadaf Ebrahimi src = freg;
3480*22dc650dSSadaf Ebrahimi }
3481*22dc650dSSadaf Ebrahimi
3482*22dc650dSSadaf Ebrahimi if (type & SLJIT_SIMD_FLOAT) {
3483*22dc650dSSadaf Ebrahimi if (elem_size != 2 || elem2_size != 3)
3484*22dc650dSSadaf Ebrahimi return SLJIT_ERR_UNSUPPORTED;
3485*22dc650dSSadaf Ebrahimi
3486*22dc650dSSadaf Ebrahimi ins = 0;
3487*22dc650dSSadaf Ebrahimi if (reg_size == 5) {
3488*22dc650dSSadaf Ebrahimi ins = (sljit_ins)1 << 26;
3489*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, XVPERMI | FRD(src) | FRJ(src) | IMM_I8(16)));
3490*22dc650dSSadaf Ebrahimi }
3491*22dc650dSSadaf Ebrahimi
3492*22dc650dSSadaf Ebrahimi return push_inst(compiler, VFCVTL_D_S | ins | FRD(freg) | FRJ(src));
3493*22dc650dSSadaf Ebrahimi }
3494*22dc650dSSadaf Ebrahimi
3495*22dc650dSSadaf Ebrahimi ins = (type & SLJIT_SIMD_EXTEND_SIGNED) ? VSLLWIL : (VSLLWIL | (sljit_ins)1 << 18);
3496*22dc650dSSadaf Ebrahimi
3497*22dc650dSSadaf Ebrahimi if (reg_size == 5)
3498*22dc650dSSadaf Ebrahimi ins |= (sljit_ins)1 << 26;
3499*22dc650dSSadaf Ebrahimi
3500*22dc650dSSadaf Ebrahimi do {
3501*22dc650dSSadaf Ebrahimi if (reg_size == 5)
3502*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, XVPERMI | FRD(src) | FRJ(src) | IMM_I8(16)));
3503*22dc650dSSadaf Ebrahimi
3504*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, ins | ((sljit_ins)1 << (13 + elem_size)) | FRD(freg) | FRJ(src)));
3505*22dc650dSSadaf Ebrahimi src = freg;
3506*22dc650dSSadaf Ebrahimi } while (++elem_size < elem2_size);
3507*22dc650dSSadaf Ebrahimi
3508*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
3509*22dc650dSSadaf Ebrahimi }
3510*22dc650dSSadaf Ebrahimi
sljit_emit_simd_sign(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 freg,sljit_s32 dst,sljit_sw dstw)3511*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_sign(struct sljit_compiler *compiler, sljit_s32 type,
3512*22dc650dSSadaf Ebrahimi sljit_s32 freg,
3513*22dc650dSSadaf Ebrahimi sljit_s32 dst, sljit_sw dstw)
3514*22dc650dSSadaf Ebrahimi {
3515*22dc650dSSadaf Ebrahimi sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
3516*22dc650dSSadaf Ebrahimi sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
3517*22dc650dSSadaf Ebrahimi sljit_ins ins = 0;
3518*22dc650dSSadaf Ebrahimi sljit_s32 dst_r;
3519*22dc650dSSadaf Ebrahimi
3520*22dc650dSSadaf Ebrahimi CHECK_ERROR();
3521*22dc650dSSadaf Ebrahimi CHECK(check_sljit_emit_simd_sign(compiler, type, freg, dst, dstw));
3522*22dc650dSSadaf Ebrahimi
3523*22dc650dSSadaf Ebrahimi ADJUST_LOCAL_OFFSET(dst, dstw);
3524*22dc650dSSadaf Ebrahimi
3525*22dc650dSSadaf Ebrahimi if (reg_size != 5 && reg_size != 4)
3526*22dc650dSSadaf Ebrahimi return SLJIT_ERR_UNSUPPORTED;
3527*22dc650dSSadaf Ebrahimi
3528*22dc650dSSadaf Ebrahimi if (reg_size == 5 && !(get_cpu_features(GET_HWCAP) & LOONGARCH_HWCAP_LASX))
3529*22dc650dSSadaf Ebrahimi return SLJIT_ERR_UNSUPPORTED;
3530*22dc650dSSadaf Ebrahimi
3531*22dc650dSSadaf Ebrahimi if (elem_size > 3 || ((type & SLJIT_SIMD_FLOAT) && elem_size < 2))
3532*22dc650dSSadaf Ebrahimi return SLJIT_ERR_UNSUPPORTED;
3533*22dc650dSSadaf Ebrahimi
3534*22dc650dSSadaf Ebrahimi if (type & SLJIT_SIMD_TEST)
3535*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
3536*22dc650dSSadaf Ebrahimi
3537*22dc650dSSadaf Ebrahimi dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;
3538*22dc650dSSadaf Ebrahimi
3539*22dc650dSSadaf Ebrahimi if (reg_size == 5)
3540*22dc650dSSadaf Ebrahimi ins = (sljit_ins)1 << 26;
3541*22dc650dSSadaf Ebrahimi
3542*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, VMSKLTZ | ins | (sljit_ins)(elem_size << 10) | FRD(TMP_FREG1) | FRJ(freg)));
3543*22dc650dSSadaf Ebrahimi
3544*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, VPICKVE2GR_U | (sljit_ins)(0x3c << 10) | RD(dst_r) | FRJ(TMP_FREG1)));
3545*22dc650dSSadaf Ebrahimi
3546*22dc650dSSadaf Ebrahimi if (reg_size == 5) {
3547*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, VPICKVE2GR_U | (sljit_ins)(0x38 << 10) | ins | RD(TMP_REG3) | FRJ(TMP_FREG1) | IMM_V(2)));
3548*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, SLLI_W | RD(TMP_REG3) | RJ(TMP_REG3) | IMM_I12(2 << (3 - elem_size))));
3549*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, OR | RD(dst_r) | RJ(dst_r) | RK(TMP_REG3)));
3550*22dc650dSSadaf Ebrahimi }
3551*22dc650dSSadaf Ebrahimi
3552*22dc650dSSadaf Ebrahimi if (dst_r == TMP_REG2)
3553*22dc650dSSadaf Ebrahimi return emit_op_mem(compiler, ((type & SLJIT_32) ? INT_DATA : WORD_DATA), TMP_REG2, dst, dstw);
3554*22dc650dSSadaf Ebrahimi
3555*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
3556*22dc650dSSadaf Ebrahimi }
3557*22dc650dSSadaf Ebrahimi
sljit_emit_simd_op2(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 dst_freg,sljit_s32 src1_freg,sljit_s32 src2_freg)3558*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_op2(struct sljit_compiler *compiler, sljit_s32 type,
3559*22dc650dSSadaf Ebrahimi sljit_s32 dst_freg, sljit_s32 src1_freg, sljit_s32 src2_freg)
3560*22dc650dSSadaf Ebrahimi {
3561*22dc650dSSadaf Ebrahimi sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
3562*22dc650dSSadaf Ebrahimi sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
3563*22dc650dSSadaf Ebrahimi sljit_ins ins = 0;
3564*22dc650dSSadaf Ebrahimi
3565*22dc650dSSadaf Ebrahimi CHECK_ERROR();
3566*22dc650dSSadaf Ebrahimi CHECK(check_sljit_emit_simd_op2(compiler, type, dst_freg, src1_freg, src2_freg));
3567*22dc650dSSadaf Ebrahimi
3568*22dc650dSSadaf Ebrahimi if (reg_size != 5 && reg_size != 4)
3569*22dc650dSSadaf Ebrahimi return SLJIT_ERR_UNSUPPORTED;
3570*22dc650dSSadaf Ebrahimi
3571*22dc650dSSadaf Ebrahimi if (reg_size == 5 && !(get_cpu_features(GET_HWCAP) & LOONGARCH_HWCAP_LASX))
3572*22dc650dSSadaf Ebrahimi return SLJIT_ERR_UNSUPPORTED;
3573*22dc650dSSadaf Ebrahimi
3574*22dc650dSSadaf Ebrahimi if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3))
3575*22dc650dSSadaf Ebrahimi return SLJIT_ERR_UNSUPPORTED;
3576*22dc650dSSadaf Ebrahimi
3577*22dc650dSSadaf Ebrahimi if (type & SLJIT_SIMD_TEST)
3578*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
3579*22dc650dSSadaf Ebrahimi
3580*22dc650dSSadaf Ebrahimi switch (SLJIT_SIMD_GET_OPCODE(type)) {
3581*22dc650dSSadaf Ebrahimi case SLJIT_SIMD_OP2_AND:
3582*22dc650dSSadaf Ebrahimi ins = VAND_V;
3583*22dc650dSSadaf Ebrahimi break;
3584*22dc650dSSadaf Ebrahimi case SLJIT_SIMD_OP2_OR:
3585*22dc650dSSadaf Ebrahimi ins = VOR_V;
3586*22dc650dSSadaf Ebrahimi break;
3587*22dc650dSSadaf Ebrahimi case SLJIT_SIMD_OP2_XOR:
3588*22dc650dSSadaf Ebrahimi ins = VXOR_V;
3589*22dc650dSSadaf Ebrahimi break;
3590*22dc650dSSadaf Ebrahimi }
3591*22dc650dSSadaf Ebrahimi
3592*22dc650dSSadaf Ebrahimi if (reg_size == 5)
3593*22dc650dSSadaf Ebrahimi ins |= (sljit_ins)1 << 26;
3594*22dc650dSSadaf Ebrahimi
3595*22dc650dSSadaf Ebrahimi return push_inst(compiler, ins | FRD(dst_freg) | FRJ(src1_freg) | FRK(src2_freg));
3596*22dc650dSSadaf Ebrahimi }
3597*22dc650dSSadaf Ebrahimi
sljit_emit_atomic_load(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst_reg,sljit_s32 mem_reg)3598*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_load(struct sljit_compiler *compiler,
3599*22dc650dSSadaf Ebrahimi sljit_s32 op,
3600*22dc650dSSadaf Ebrahimi sljit_s32 dst_reg,
3601*22dc650dSSadaf Ebrahimi sljit_s32 mem_reg)
3602*22dc650dSSadaf Ebrahimi {
3603*22dc650dSSadaf Ebrahimi sljit_ins ins;
3604*22dc650dSSadaf Ebrahimi
3605*22dc650dSSadaf Ebrahimi CHECK_ERROR();
3606*22dc650dSSadaf Ebrahimi CHECK(check_sljit_emit_atomic_load(compiler, op, dst_reg, mem_reg));
3607*22dc650dSSadaf Ebrahimi
3608*22dc650dSSadaf Ebrahimi switch(GET_OPCODE(op)) {
3609*22dc650dSSadaf Ebrahimi case SLJIT_MOV_U8:
3610*22dc650dSSadaf Ebrahimi ins = LD_BU;
3611*22dc650dSSadaf Ebrahimi break;
3612*22dc650dSSadaf Ebrahimi case SLJIT_MOV_U16:
3613*22dc650dSSadaf Ebrahimi ins = LD_HU;
3614*22dc650dSSadaf Ebrahimi break;
3615*22dc650dSSadaf Ebrahimi case SLJIT_MOV32:
3616*22dc650dSSadaf Ebrahimi ins = LD_W;
3617*22dc650dSSadaf Ebrahimi break;
3618*22dc650dSSadaf Ebrahimi case SLJIT_MOV_U32:
3619*22dc650dSSadaf Ebrahimi ins = LD_WU;
3620*22dc650dSSadaf Ebrahimi break;
3621*22dc650dSSadaf Ebrahimi default:
3622*22dc650dSSadaf Ebrahimi ins = LD_D;
3623*22dc650dSSadaf Ebrahimi break;
3624*22dc650dSSadaf Ebrahimi }
3625*22dc650dSSadaf Ebrahimi
3626*22dc650dSSadaf Ebrahimi return push_inst(compiler, ins | RD(dst_reg) | RJ(mem_reg) | IMM_I12(0));
3627*22dc650dSSadaf Ebrahimi }
3628*22dc650dSSadaf Ebrahimi
sljit_emit_atomic_store(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 src_reg,sljit_s32 mem_reg,sljit_s32 temp_reg)3629*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_store(struct sljit_compiler *compiler,
3630*22dc650dSSadaf Ebrahimi sljit_s32 op,
3631*22dc650dSSadaf Ebrahimi sljit_s32 src_reg,
3632*22dc650dSSadaf Ebrahimi sljit_s32 mem_reg,
3633*22dc650dSSadaf Ebrahimi sljit_s32 temp_reg)
3634*22dc650dSSadaf Ebrahimi {
3635*22dc650dSSadaf Ebrahimi sljit_ins ins = 0;
3636*22dc650dSSadaf Ebrahimi sljit_ins unsign = 0;
3637*22dc650dSSadaf Ebrahimi sljit_s32 tmp = temp_reg;
3638*22dc650dSSadaf Ebrahimi
3639*22dc650dSSadaf Ebrahimi CHECK_ERROR();
3640*22dc650dSSadaf Ebrahimi CHECK(check_sljit_emit_atomic_store(compiler, op, src_reg, mem_reg, temp_reg));
3641*22dc650dSSadaf Ebrahimi
3642*22dc650dSSadaf Ebrahimi switch (GET_OPCODE(op)) {
3643*22dc650dSSadaf Ebrahimi case SLJIT_MOV_U8:
3644*22dc650dSSadaf Ebrahimi ins = AMCAS_B;
3645*22dc650dSSadaf Ebrahimi unsign = BSTRPICK_D | (7 << 16);
3646*22dc650dSSadaf Ebrahimi break;
3647*22dc650dSSadaf Ebrahimi case SLJIT_MOV_U16:
3648*22dc650dSSadaf Ebrahimi ins = AMCAS_H;
3649*22dc650dSSadaf Ebrahimi unsign = BSTRPICK_D | (15 << 16);
3650*22dc650dSSadaf Ebrahimi break;
3651*22dc650dSSadaf Ebrahimi case SLJIT_MOV32:
3652*22dc650dSSadaf Ebrahimi ins = AMCAS_W;
3653*22dc650dSSadaf Ebrahimi break;
3654*22dc650dSSadaf Ebrahimi case SLJIT_MOV_U32:
3655*22dc650dSSadaf Ebrahimi ins = AMCAS_W;
3656*22dc650dSSadaf Ebrahimi unsign = BSTRPICK_D | (31 << 16);
3657*22dc650dSSadaf Ebrahimi break;
3658*22dc650dSSadaf Ebrahimi default:
3659*22dc650dSSadaf Ebrahimi ins = AMCAS_D;
3660*22dc650dSSadaf Ebrahimi break;
3661*22dc650dSSadaf Ebrahimi }
3662*22dc650dSSadaf Ebrahimi
3663*22dc650dSSadaf Ebrahimi if (op & SLJIT_SET_ATOMIC_STORED) {
3664*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, XOR | RD(TMP_REG1) | RJ(temp_reg) | RK(TMP_ZERO)));
3665*22dc650dSSadaf Ebrahimi tmp = TMP_REG1;
3666*22dc650dSSadaf Ebrahimi }
3667*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, ins | RD(tmp) | RJ(mem_reg) | RK(src_reg)));
3668*22dc650dSSadaf Ebrahimi if (!(op & SLJIT_SET_ATOMIC_STORED))
3669*22dc650dSSadaf Ebrahimi return SLJIT_SUCCESS;
3670*22dc650dSSadaf Ebrahimi
3671*22dc650dSSadaf Ebrahimi if (unsign)
3672*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, unsign | RD(tmp) | RJ(tmp)));
3673*22dc650dSSadaf Ebrahimi
3674*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, XOR | RD(EQUAL_FLAG) | RJ(tmp) | RK(temp_reg)));
3675*22dc650dSSadaf Ebrahimi return push_inst(compiler, SLTUI | RD(EQUAL_FLAG) | RJ(EQUAL_FLAG) | IMM_I12(1));
3676*22dc650dSSadaf Ebrahimi }
3677*22dc650dSSadaf Ebrahimi
emit_const(struct sljit_compiler * compiler,sljit_s32 dst,sljit_sw init_value,sljit_ins last_ins)3678*22dc650dSSadaf Ebrahimi static SLJIT_INLINE sljit_s32 emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw init_value, sljit_ins last_ins)
3679*22dc650dSSadaf Ebrahimi {
3680*22dc650dSSadaf Ebrahimi SLJIT_UNUSED_ARG(last_ins);
3681*22dc650dSSadaf Ebrahimi
3682*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, LU12I_W | RD(dst) | (sljit_ins)(((init_value & 0xffffffff) >> 12) << 5)));
3683*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, LU32I_D | RD(dst) | (sljit_ins)(((init_value >> 32) & 0xfffff) << 5)));
3684*22dc650dSSadaf Ebrahimi FAIL_IF(push_inst(compiler, LU52I_D | RD(dst) | RJ(dst) | (sljit_ins)(IMM_I12(init_value >> 52))));
3685*22dc650dSSadaf Ebrahimi return push_inst(compiler, ORI | RD(dst) | RJ(dst) | IMM_I12(init_value));
3686*22dc650dSSadaf Ebrahimi }
3687*22dc650dSSadaf Ebrahimi
sljit_set_jump_addr(sljit_uw addr,sljit_uw new_target,sljit_sw executable_offset)3688*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset)
3689*22dc650dSSadaf Ebrahimi {
3690*22dc650dSSadaf Ebrahimi sljit_ins *inst = (sljit_ins*)addr;
3691*22dc650dSSadaf Ebrahimi SLJIT_UNUSED_ARG(executable_offset);
3692*22dc650dSSadaf Ebrahimi
3693*22dc650dSSadaf Ebrahimi SLJIT_UPDATE_WX_FLAGS(inst, inst + 4, 0);
3694*22dc650dSSadaf Ebrahimi
3695*22dc650dSSadaf Ebrahimi SLJIT_ASSERT((inst[0] & OPC_1RI20(0x7f)) == LU12I_W);
3696*22dc650dSSadaf Ebrahimi inst[0] = (inst[0] & (OPC_1RI20(0x7f) | 0x1f)) | (sljit_ins)(((new_target & 0xffffffff) >> 12) << 5);
3697*22dc650dSSadaf Ebrahimi
3698*22dc650dSSadaf Ebrahimi SLJIT_ASSERT((inst[1] & OPC_1RI20(0x7f)) == LU32I_D);
3699*22dc650dSSadaf Ebrahimi inst[1] = (inst[1] & (OPC_1RI20(0x7f) | 0x1f)) | (sljit_ins)(sljit_ins)(((new_target >> 32) & 0xfffff) << 5);
3700*22dc650dSSadaf Ebrahimi
3701*22dc650dSSadaf Ebrahimi SLJIT_ASSERT((inst[2] & OPC_2RI12(0x3ff)) == LU52I_D);
3702*22dc650dSSadaf Ebrahimi inst[2] = (inst[2] & (OPC_2RI12(0x3ff) | 0x3ff)) | IMM_I12(new_target >> 52);
3703*22dc650dSSadaf Ebrahimi
3704*22dc650dSSadaf Ebrahimi SLJIT_ASSERT((inst[3] & OPC_2RI12(0x3ff)) == ORI || (inst[3] & OPC_2RI16(0x3f)) == JIRL);
3705*22dc650dSSadaf Ebrahimi if ((inst[3] & OPC_2RI12(0x3ff)) == ORI)
3706*22dc650dSSadaf Ebrahimi inst[3] = (inst[3] & (OPC_2RI12(0x3ff) | 0x3ff)) | IMM_I12(new_target);
3707*22dc650dSSadaf Ebrahimi else
3708*22dc650dSSadaf Ebrahimi inst[3] = (inst[3] & (OPC_2RI16(0x3f) | 0x3ff)) | IMM_I12((new_target & 0xfff) >> 2);
3709*22dc650dSSadaf Ebrahimi
3710*22dc650dSSadaf Ebrahimi SLJIT_UPDATE_WX_FLAGS(inst, inst + 4, 1);
3711*22dc650dSSadaf Ebrahimi
3712*22dc650dSSadaf Ebrahimi inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
3713*22dc650dSSadaf Ebrahimi SLJIT_CACHE_FLUSH(inst, inst + 4);
3714*22dc650dSSadaf Ebrahimi }
3715*22dc650dSSadaf Ebrahimi
sljit_emit_const(struct sljit_compiler * compiler,sljit_s32 dst,sljit_sw dstw,sljit_sw init_value)3716*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value)
3717*22dc650dSSadaf Ebrahimi {
3718*22dc650dSSadaf Ebrahimi struct sljit_const *const_;
3719*22dc650dSSadaf Ebrahimi sljit_s32 dst_r;
3720*22dc650dSSadaf Ebrahimi
3721*22dc650dSSadaf Ebrahimi CHECK_ERROR_PTR();
3722*22dc650dSSadaf Ebrahimi CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value));
3723*22dc650dSSadaf Ebrahimi ADJUST_LOCAL_OFFSET(dst, dstw);
3724*22dc650dSSadaf Ebrahimi
3725*22dc650dSSadaf Ebrahimi const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const));
3726*22dc650dSSadaf Ebrahimi PTR_FAIL_IF(!const_);
3727*22dc650dSSadaf Ebrahimi set_const(const_, compiler);
3728*22dc650dSSadaf Ebrahimi
3729*22dc650dSSadaf Ebrahimi dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;
3730*22dc650dSSadaf Ebrahimi PTR_FAIL_IF(emit_const(compiler, dst_r, init_value, 0));
3731*22dc650dSSadaf Ebrahimi
3732*22dc650dSSadaf Ebrahimi if (dst & SLJIT_MEM)
3733*22dc650dSSadaf Ebrahimi PTR_FAIL_IF(emit_op_mem(compiler, WORD_DATA, TMP_REG2, dst, dstw));
3734*22dc650dSSadaf Ebrahimi
3735*22dc650dSSadaf Ebrahimi return const_;
3736*22dc650dSSadaf Ebrahimi }
3737*22dc650dSSadaf Ebrahimi
sljit_emit_mov_addr(struct sljit_compiler * compiler,sljit_s32 dst,sljit_sw dstw)3738*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_mov_addr(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
3739*22dc650dSSadaf Ebrahimi {
3740*22dc650dSSadaf Ebrahimi struct sljit_jump *jump;
3741*22dc650dSSadaf Ebrahimi sljit_s32 dst_r;
3742*22dc650dSSadaf Ebrahimi
3743*22dc650dSSadaf Ebrahimi CHECK_ERROR_PTR();
3744*22dc650dSSadaf Ebrahimi CHECK_PTR(check_sljit_emit_mov_addr(compiler, dst, dstw));
3745*22dc650dSSadaf Ebrahimi ADJUST_LOCAL_OFFSET(dst, dstw);
3746*22dc650dSSadaf Ebrahimi
3747*22dc650dSSadaf Ebrahimi jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
3748*22dc650dSSadaf Ebrahimi PTR_FAIL_IF(!jump);
3749*22dc650dSSadaf Ebrahimi set_mov_addr(jump, compiler, 0);
3750*22dc650dSSadaf Ebrahimi
3751*22dc650dSSadaf Ebrahimi dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;
3752*22dc650dSSadaf Ebrahimi PTR_FAIL_IF(push_inst(compiler, (sljit_ins)dst_r));
3753*22dc650dSSadaf Ebrahimi
3754*22dc650dSSadaf Ebrahimi compiler->size += JUMP_MAX_SIZE - 1;
3755*22dc650dSSadaf Ebrahimi
3756*22dc650dSSadaf Ebrahimi if (dst & SLJIT_MEM)
3757*22dc650dSSadaf Ebrahimi PTR_FAIL_IF(emit_op_mem(compiler, WORD_DATA, TMP_REG2, dst, dstw));
3758*22dc650dSSadaf Ebrahimi
3759*22dc650dSSadaf Ebrahimi return jump;
3760*22dc650dSSadaf Ebrahimi }
3761*22dc650dSSadaf Ebrahimi
sljit_set_const(sljit_uw addr,sljit_sw new_constant,sljit_sw executable_offset)3762*22dc650dSSadaf Ebrahimi SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset)
3763*22dc650dSSadaf Ebrahimi {
3764*22dc650dSSadaf Ebrahimi sljit_set_jump_addr(addr, (sljit_uw)new_constant, executable_offset);
3765*22dc650dSSadaf Ebrahimi }
3766