1*f6dc9357SAndroid Build Coastguard Worker /* Bra86.c -- Branch converter for X86 code (BCJ)
2*f6dc9357SAndroid Build Coastguard Worker 2023-04-02 : Igor Pavlov : Public domain */
3*f6dc9357SAndroid Build Coastguard Worker
4*f6dc9357SAndroid Build Coastguard Worker #include "Precomp.h"
5*f6dc9357SAndroid Build Coastguard Worker
6*f6dc9357SAndroid Build Coastguard Worker #include "Bra.h"
7*f6dc9357SAndroid Build Coastguard Worker #include "CpuArch.h"
8*f6dc9357SAndroid Build Coastguard Worker
9*f6dc9357SAndroid Build Coastguard Worker
10*f6dc9357SAndroid Build Coastguard Worker #if defined(MY_CPU_SIZEOF_POINTER) \
11*f6dc9357SAndroid Build Coastguard Worker && ( MY_CPU_SIZEOF_POINTER == 4 \
12*f6dc9357SAndroid Build Coastguard Worker || MY_CPU_SIZEOF_POINTER == 8)
13*f6dc9357SAndroid Build Coastguard Worker #define BR_CONV_USE_OPT_PC_PTR
14*f6dc9357SAndroid Build Coastguard Worker #endif
15*f6dc9357SAndroid Build Coastguard Worker
16*f6dc9357SAndroid Build Coastguard Worker #ifdef BR_CONV_USE_OPT_PC_PTR
17*f6dc9357SAndroid Build Coastguard Worker #define BR_PC_INIT pc -= (UInt32)(SizeT)p; // (MY_uintptr_t)
18*f6dc9357SAndroid Build Coastguard Worker #define BR_PC_GET (pc + (UInt32)(SizeT)p)
19*f6dc9357SAndroid Build Coastguard Worker #else
20*f6dc9357SAndroid Build Coastguard Worker #define BR_PC_INIT pc += (UInt32)size;
21*f6dc9357SAndroid Build Coastguard Worker #define BR_PC_GET (pc - (UInt32)(SizeT)(lim - p))
22*f6dc9357SAndroid Build Coastguard Worker // #define BR_PC_INIT
23*f6dc9357SAndroid Build Coastguard Worker // #define BR_PC_GET (pc + (UInt32)(SizeT)(p - data))
24*f6dc9357SAndroid Build Coastguard Worker #endif
25*f6dc9357SAndroid Build Coastguard Worker
26*f6dc9357SAndroid Build Coastguard Worker #define BR_CONVERT_VAL(v, c) if (encoding) v += c; else v -= c;
27*f6dc9357SAndroid Build Coastguard Worker // #define BR_CONVERT_VAL(v, c) if (!encoding) c = (UInt32)0 - c; v += c;
28*f6dc9357SAndroid Build Coastguard Worker
29*f6dc9357SAndroid Build Coastguard Worker #define Z7_BRANCH_CONV_ST(name) z7_BranchConvSt_ ## name
30*f6dc9357SAndroid Build Coastguard Worker
31*f6dc9357SAndroid Build Coastguard Worker #define BR86_NEED_CONV_FOR_MS_BYTE(b) ((((b) + 1) & 0xfe) == 0)
32*f6dc9357SAndroid Build Coastguard Worker
33*f6dc9357SAndroid Build Coastguard Worker #ifdef MY_CPU_LE_UNALIGN
34*f6dc9357SAndroid Build Coastguard Worker #define BR86_PREPARE_BCJ_SCAN const UInt32 v = GetUi32(p) ^ 0xe8e8e8e8;
35*f6dc9357SAndroid Build Coastguard Worker #define BR86_IS_BCJ_BYTE(n) ((v & ((UInt32)0xfe << (n) * 8)) == 0)
36*f6dc9357SAndroid Build Coastguard Worker #else
37*f6dc9357SAndroid Build Coastguard Worker #define BR86_PREPARE_BCJ_SCAN
38*f6dc9357SAndroid Build Coastguard Worker // bad for MSVC X86 (partial write to byte reg):
39*f6dc9357SAndroid Build Coastguard Worker #define BR86_IS_BCJ_BYTE(n) ((p[n - 4] & 0xfe) == 0xe8)
40*f6dc9357SAndroid Build Coastguard Worker // bad for old MSVC (partial write to byte reg):
41*f6dc9357SAndroid Build Coastguard Worker // #define BR86_IS_BCJ_BYTE(n) (((*p ^ 0xe8) & 0xfe) == 0)
42*f6dc9357SAndroid Build Coastguard Worker #endif
43*f6dc9357SAndroid Build Coastguard Worker
44*f6dc9357SAndroid Build Coastguard Worker static
45*f6dc9357SAndroid Build Coastguard Worker Z7_FORCE_INLINE
46*f6dc9357SAndroid Build Coastguard Worker Z7_ATTRIB_NO_VECTOR
Z7_BRANCH_CONV_ST(X86)47*f6dc9357SAndroid Build Coastguard Worker Byte *Z7_BRANCH_CONV_ST(X86)(Byte *p, SizeT size, UInt32 pc, UInt32 *state, int encoding)
48*f6dc9357SAndroid Build Coastguard Worker {
49*f6dc9357SAndroid Build Coastguard Worker if (size < 5)
50*f6dc9357SAndroid Build Coastguard Worker return p;
51*f6dc9357SAndroid Build Coastguard Worker {
52*f6dc9357SAndroid Build Coastguard Worker // Byte *p = data;
53*f6dc9357SAndroid Build Coastguard Worker const Byte *lim = p + size - 4;
54*f6dc9357SAndroid Build Coastguard Worker unsigned mask = (unsigned)*state; // & 7;
55*f6dc9357SAndroid Build Coastguard Worker #ifdef BR_CONV_USE_OPT_PC_PTR
56*f6dc9357SAndroid Build Coastguard Worker /* if BR_CONV_USE_OPT_PC_PTR is defined: we need to adjust (pc) for (+4),
57*f6dc9357SAndroid Build Coastguard Worker because call/jump offset is relative to the next instruction.
58*f6dc9357SAndroid Build Coastguard Worker if BR_CONV_USE_OPT_PC_PTR is not defined : we don't need to adjust (pc) for (+4),
59*f6dc9357SAndroid Build Coastguard Worker because BR_PC_GET uses (pc - (lim - p)), and lim was adjusted for (-4) before.
60*f6dc9357SAndroid Build Coastguard Worker */
61*f6dc9357SAndroid Build Coastguard Worker pc += 4;
62*f6dc9357SAndroid Build Coastguard Worker #endif
63*f6dc9357SAndroid Build Coastguard Worker BR_PC_INIT
64*f6dc9357SAndroid Build Coastguard Worker goto start;
65*f6dc9357SAndroid Build Coastguard Worker
66*f6dc9357SAndroid Build Coastguard Worker for (;; mask |= 4)
67*f6dc9357SAndroid Build Coastguard Worker {
68*f6dc9357SAndroid Build Coastguard Worker // cont: mask |= 4;
69*f6dc9357SAndroid Build Coastguard Worker start:
70*f6dc9357SAndroid Build Coastguard Worker if (p >= lim)
71*f6dc9357SAndroid Build Coastguard Worker goto fin;
72*f6dc9357SAndroid Build Coastguard Worker {
73*f6dc9357SAndroid Build Coastguard Worker BR86_PREPARE_BCJ_SCAN
74*f6dc9357SAndroid Build Coastguard Worker p += 4;
75*f6dc9357SAndroid Build Coastguard Worker if (BR86_IS_BCJ_BYTE(0)) { goto m0; } mask >>= 1;
76*f6dc9357SAndroid Build Coastguard Worker if (BR86_IS_BCJ_BYTE(1)) { goto m1; } mask >>= 1;
77*f6dc9357SAndroid Build Coastguard Worker if (BR86_IS_BCJ_BYTE(2)) { goto m2; } mask = 0;
78*f6dc9357SAndroid Build Coastguard Worker if (BR86_IS_BCJ_BYTE(3)) { goto a3; }
79*f6dc9357SAndroid Build Coastguard Worker }
80*f6dc9357SAndroid Build Coastguard Worker goto main_loop;
81*f6dc9357SAndroid Build Coastguard Worker
82*f6dc9357SAndroid Build Coastguard Worker m0: p--;
83*f6dc9357SAndroid Build Coastguard Worker m1: p--;
84*f6dc9357SAndroid Build Coastguard Worker m2: p--;
85*f6dc9357SAndroid Build Coastguard Worker if (mask == 0)
86*f6dc9357SAndroid Build Coastguard Worker goto a3;
87*f6dc9357SAndroid Build Coastguard Worker if (p > lim)
88*f6dc9357SAndroid Build Coastguard Worker goto fin_p;
89*f6dc9357SAndroid Build Coastguard Worker
90*f6dc9357SAndroid Build Coastguard Worker // if (((0x17u >> mask) & 1) == 0)
91*f6dc9357SAndroid Build Coastguard Worker if (mask > 4 || mask == 3)
92*f6dc9357SAndroid Build Coastguard Worker {
93*f6dc9357SAndroid Build Coastguard Worker mask >>= 1;
94*f6dc9357SAndroid Build Coastguard Worker continue; // goto cont;
95*f6dc9357SAndroid Build Coastguard Worker }
96*f6dc9357SAndroid Build Coastguard Worker mask >>= 1;
97*f6dc9357SAndroid Build Coastguard Worker if (BR86_NEED_CONV_FOR_MS_BYTE(p[mask]))
98*f6dc9357SAndroid Build Coastguard Worker continue; // goto cont;
99*f6dc9357SAndroid Build Coastguard Worker // if (!BR86_NEED_CONV_FOR_MS_BYTE(p[3])) continue; // goto cont;
100*f6dc9357SAndroid Build Coastguard Worker {
101*f6dc9357SAndroid Build Coastguard Worker UInt32 v = GetUi32(p);
102*f6dc9357SAndroid Build Coastguard Worker UInt32 c;
103*f6dc9357SAndroid Build Coastguard Worker v += (1 << 24); if (v & 0xfe000000) continue; // goto cont;
104*f6dc9357SAndroid Build Coastguard Worker c = BR_PC_GET;
105*f6dc9357SAndroid Build Coastguard Worker BR_CONVERT_VAL(v, c)
106*f6dc9357SAndroid Build Coastguard Worker {
107*f6dc9357SAndroid Build Coastguard Worker mask <<= 3;
108*f6dc9357SAndroid Build Coastguard Worker if (BR86_NEED_CONV_FOR_MS_BYTE(v >> mask))
109*f6dc9357SAndroid Build Coastguard Worker {
110*f6dc9357SAndroid Build Coastguard Worker v ^= (((UInt32)0x100 << mask) - 1);
111*f6dc9357SAndroid Build Coastguard Worker #ifdef MY_CPU_X86
112*f6dc9357SAndroid Build Coastguard Worker // for X86 : we can recalculate (c) to reduce register pressure
113*f6dc9357SAndroid Build Coastguard Worker c = BR_PC_GET;
114*f6dc9357SAndroid Build Coastguard Worker #endif
115*f6dc9357SAndroid Build Coastguard Worker BR_CONVERT_VAL(v, c)
116*f6dc9357SAndroid Build Coastguard Worker }
117*f6dc9357SAndroid Build Coastguard Worker mask = 0;
118*f6dc9357SAndroid Build Coastguard Worker }
119*f6dc9357SAndroid Build Coastguard Worker // v = (v & ((1 << 24) - 1)) - (v & (1 << 24));
120*f6dc9357SAndroid Build Coastguard Worker v &= (1 << 25) - 1; v -= (1 << 24);
121*f6dc9357SAndroid Build Coastguard Worker SetUi32(p, v)
122*f6dc9357SAndroid Build Coastguard Worker p += 4;
123*f6dc9357SAndroid Build Coastguard Worker goto main_loop;
124*f6dc9357SAndroid Build Coastguard Worker }
125*f6dc9357SAndroid Build Coastguard Worker
126*f6dc9357SAndroid Build Coastguard Worker main_loop:
127*f6dc9357SAndroid Build Coastguard Worker if (p >= lim)
128*f6dc9357SAndroid Build Coastguard Worker goto fin;
129*f6dc9357SAndroid Build Coastguard Worker for (;;)
130*f6dc9357SAndroid Build Coastguard Worker {
131*f6dc9357SAndroid Build Coastguard Worker BR86_PREPARE_BCJ_SCAN
132*f6dc9357SAndroid Build Coastguard Worker p += 4;
133*f6dc9357SAndroid Build Coastguard Worker if (BR86_IS_BCJ_BYTE(0)) { goto a0; }
134*f6dc9357SAndroid Build Coastguard Worker if (BR86_IS_BCJ_BYTE(1)) { goto a1; }
135*f6dc9357SAndroid Build Coastguard Worker if (BR86_IS_BCJ_BYTE(2)) { goto a2; }
136*f6dc9357SAndroid Build Coastguard Worker if (BR86_IS_BCJ_BYTE(3)) { goto a3; }
137*f6dc9357SAndroid Build Coastguard Worker if (p >= lim)
138*f6dc9357SAndroid Build Coastguard Worker goto fin;
139*f6dc9357SAndroid Build Coastguard Worker }
140*f6dc9357SAndroid Build Coastguard Worker
141*f6dc9357SAndroid Build Coastguard Worker a0: p--;
142*f6dc9357SAndroid Build Coastguard Worker a1: p--;
143*f6dc9357SAndroid Build Coastguard Worker a2: p--;
144*f6dc9357SAndroid Build Coastguard Worker a3:
145*f6dc9357SAndroid Build Coastguard Worker if (p > lim)
146*f6dc9357SAndroid Build Coastguard Worker goto fin_p;
147*f6dc9357SAndroid Build Coastguard Worker // if (!BR86_NEED_CONV_FOR_MS_BYTE(p[3])) continue; // goto cont;
148*f6dc9357SAndroid Build Coastguard Worker {
149*f6dc9357SAndroid Build Coastguard Worker UInt32 v = GetUi32(p);
150*f6dc9357SAndroid Build Coastguard Worker UInt32 c;
151*f6dc9357SAndroid Build Coastguard Worker v += (1 << 24); if (v & 0xfe000000) continue; // goto cont;
152*f6dc9357SAndroid Build Coastguard Worker c = BR_PC_GET;
153*f6dc9357SAndroid Build Coastguard Worker BR_CONVERT_VAL(v, c)
154*f6dc9357SAndroid Build Coastguard Worker // v = (v & ((1 << 24) - 1)) - (v & (1 << 24));
155*f6dc9357SAndroid Build Coastguard Worker v &= (1 << 25) - 1; v -= (1 << 24);
156*f6dc9357SAndroid Build Coastguard Worker SetUi32(p, v)
157*f6dc9357SAndroid Build Coastguard Worker p += 4;
158*f6dc9357SAndroid Build Coastguard Worker goto main_loop;
159*f6dc9357SAndroid Build Coastguard Worker }
160*f6dc9357SAndroid Build Coastguard Worker }
161*f6dc9357SAndroid Build Coastguard Worker
162*f6dc9357SAndroid Build Coastguard Worker fin_p:
163*f6dc9357SAndroid Build Coastguard Worker p--;
164*f6dc9357SAndroid Build Coastguard Worker fin:
165*f6dc9357SAndroid Build Coastguard Worker // the following processing for tail is optional and can be commented
166*f6dc9357SAndroid Build Coastguard Worker /*
167*f6dc9357SAndroid Build Coastguard Worker lim += 4;
168*f6dc9357SAndroid Build Coastguard Worker for (; p < lim; p++, mask >>= 1)
169*f6dc9357SAndroid Build Coastguard Worker if ((*p & 0xfe) == 0xe8)
170*f6dc9357SAndroid Build Coastguard Worker break;
171*f6dc9357SAndroid Build Coastguard Worker */
172*f6dc9357SAndroid Build Coastguard Worker *state = (UInt32)mask;
173*f6dc9357SAndroid Build Coastguard Worker return p;
174*f6dc9357SAndroid Build Coastguard Worker }
175*f6dc9357SAndroid Build Coastguard Worker }
176*f6dc9357SAndroid Build Coastguard Worker
177*f6dc9357SAndroid Build Coastguard Worker
178*f6dc9357SAndroid Build Coastguard Worker #define Z7_BRANCH_CONV_ST_FUNC_IMP(name, m, encoding) \
179*f6dc9357SAndroid Build Coastguard Worker Z7_NO_INLINE \
180*f6dc9357SAndroid Build Coastguard Worker Z7_ATTRIB_NO_VECTOR \
181*f6dc9357SAndroid Build Coastguard Worker Byte *m(name)(Byte *data, SizeT size, UInt32 pc, UInt32 *state) \
182*f6dc9357SAndroid Build Coastguard Worker { return Z7_BRANCH_CONV_ST(name)(data, size, pc, state, encoding); }
183*f6dc9357SAndroid Build Coastguard Worker
184*f6dc9357SAndroid Build Coastguard Worker Z7_BRANCH_CONV_ST_FUNC_IMP(X86, Z7_BRANCH_CONV_ST_DEC, 0)
185*f6dc9357SAndroid Build Coastguard Worker #ifndef Z7_EXTRACT_ONLY
186*f6dc9357SAndroid Build Coastguard Worker Z7_BRANCH_CONV_ST_FUNC_IMP(X86, Z7_BRANCH_CONV_ST_ENC, 1)
187*f6dc9357SAndroid Build Coastguard Worker #endif
188