1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "Constants.hpp"
16
17 #include "System/Half.hpp"
18 #include "System/Math.hpp"
19
20 #include <cstring>
21
22 namespace sw {
23
Constants()24 Constants::Constants()
25 {
26 static const unsigned int transposeBit0[16] = {
27 0x00000000,
28 0x00000001,
29 0x00000010,
30 0x00000011,
31 0x00000100,
32 0x00000101,
33 0x00000110,
34 0x00000111,
35 0x00001000,
36 0x00001001,
37 0x00001010,
38 0x00001011,
39 0x00001100,
40 0x00001101,
41 0x00001110,
42 0x00001111
43 };
44
45 static const unsigned int transposeBit1[16] = {
46 0x00000000,
47 0x00000002,
48 0x00000020,
49 0x00000022,
50 0x00000200,
51 0x00000202,
52 0x00000220,
53 0x00000222,
54 0x00002000,
55 0x00002002,
56 0x00002020,
57 0x00002022,
58 0x00002200,
59 0x00002202,
60 0x00002220,
61 0x00002222
62 };
63
64 static const unsigned int transposeBit2[16] = {
65 0x00000000,
66 0x00000004,
67 0x00000040,
68 0x00000044,
69 0x00000400,
70 0x00000404,
71 0x00000440,
72 0x00000444,
73 0x00004000,
74 0x00004004,
75 0x00004040,
76 0x00004044,
77 0x00004400,
78 0x00004404,
79 0x00004440,
80 0x00004444
81 };
82
83 memcpy(&this->transposeBit0, transposeBit0, sizeof(transposeBit0));
84 memcpy(&this->transposeBit1, transposeBit1, sizeof(transposeBit1));
85 memcpy(&this->transposeBit2, transposeBit2, sizeof(transposeBit2));
86
87 static const ushort4 cWeight[17] = {
88 { 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF }, // 0xFFFF / 1 = 0xFFFF
89 { 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF }, // 0xFFFF / 1 = 0xFFFF
90 { 0x8000, 0x8000, 0x8000, 0x8000 }, // 0xFFFF / 2 = 0x8000
91 { 0x5555, 0x5555, 0x5555, 0x5555 }, // 0xFFFF / 3 = 0x5555
92 { 0x4000, 0x4000, 0x4000, 0x4000 }, // 0xFFFF / 4 = 0x4000
93 { 0x3333, 0x3333, 0x3333, 0x3333 }, // 0xFFFF / 5 = 0x3333
94 { 0x2AAA, 0x2AAA, 0x2AAA, 0x2AAA }, // 0xFFFF / 6 = 0x2AAA
95 { 0x2492, 0x2492, 0x2492, 0x2492 }, // 0xFFFF / 7 = 0x2492
96 { 0x2000, 0x2000, 0x2000, 0x2000 }, // 0xFFFF / 8 = 0x2000
97 { 0x1C71, 0x1C71, 0x1C71, 0x1C71 }, // 0xFFFF / 9 = 0x1C71
98 { 0x1999, 0x1999, 0x1999, 0x1999 }, // 0xFFFF / 10 = 0x1999
99 { 0x1745, 0x1745, 0x1745, 0x1745 }, // 0xFFFF / 11 = 0x1745
100 { 0x1555, 0x1555, 0x1555, 0x1555 }, // 0xFFFF / 12 = 0x1555
101 { 0x13B1, 0x13B1, 0x13B1, 0x13B1 }, // 0xFFFF / 13 = 0x13B1
102 { 0x1249, 0x1249, 0x1249, 0x1249 }, // 0xFFFF / 14 = 0x1249
103 { 0x1111, 0x1111, 0x1111, 0x1111 }, // 0xFFFF / 15 = 0x1111
104 { 0x1000, 0x1000, 0x1000, 0x1000 }, // 0xFFFF / 16 = 0x1000
105 };
106
107 static const float4 uvWeight[17] = {
108 { 1.0f / 1.0f, 1.0f / 1.0f, 1.0f / 1.0f, 1.0f / 1.0f },
109 { 1.0f / 1.0f, 1.0f / 1.0f, 1.0f / 1.0f, 1.0f / 1.0f },
110 { 1.0f / 2.0f, 1.0f / 2.0f, 1.0f / 2.0f, 1.0f / 2.0f },
111 { 1.0f / 3.0f, 1.0f / 3.0f, 1.0f / 3.0f, 1.0f / 3.0f },
112 { 1.0f / 4.0f, 1.0f / 4.0f, 1.0f / 4.0f, 1.0f / 4.0f },
113 { 1.0f / 5.0f, 1.0f / 5.0f, 1.0f / 5.0f, 1.0f / 5.0f },
114 { 1.0f / 6.0f, 1.0f / 6.0f, 1.0f / 6.0f, 1.0f / 6.0f },
115 { 1.0f / 7.0f, 1.0f / 7.0f, 1.0f / 7.0f, 1.0f / 7.0f },
116 { 1.0f / 8.0f, 1.0f / 8.0f, 1.0f / 8.0f, 1.0f / 8.0f },
117 { 1.0f / 9.0f, 1.0f / 9.0f, 1.0f / 9.0f, 1.0f / 9.0f },
118 { 1.0f / 10.0f, 1.0f / 10.0f, 1.0f / 10.0f, 1.0f / 10.0f },
119 { 1.0f / 11.0f, 1.0f / 11.0f, 1.0f / 11.0f, 1.0f / 11.0f },
120 { 1.0f / 12.0f, 1.0f / 12.0f, 1.0f / 12.0f, 1.0f / 12.0f },
121 { 1.0f / 13.0f, 1.0f / 13.0f, 1.0f / 13.0f, 1.0f / 13.0f },
122 { 1.0f / 14.0f, 1.0f / 14.0f, 1.0f / 14.0f, 1.0f / 14.0f },
123 { 1.0f / 15.0f, 1.0f / 15.0f, 1.0f / 15.0f, 1.0f / 15.0f },
124 { 1.0f / 16.0f, 1.0f / 16.0f, 1.0f / 16.0f, 1.0f / 16.0f },
125 };
126
127 static const float4 uvStart[17] = {
128 { -0.0f / 2.0f, -0.0f / 2.0f, -0.0f / 2.0f, -0.0f / 2.0f },
129 { -0.0f / 2.0f, -0.0f / 2.0f, -0.0f / 2.0f, -0.0f / 2.0f },
130 { -1.0f / 4.0f, -1.0f / 4.0f, -1.0f / 4.0f, -1.0f / 4.0f },
131 { -2.0f / 6.0f, -2.0f / 6.0f, -2.0f / 6.0f, -2.0f / 6.0f },
132 { -3.0f / 8.0f, -3.0f / 8.0f, -3.0f / 8.0f, -3.0f / 8.0f },
133 { -4.0f / 10.0f, -4.0f / 10.0f, -4.0f / 10.0f, -4.0f / 10.0f },
134 { -5.0f / 12.0f, -5.0f / 12.0f, -5.0f / 12.0f, -5.0f / 12.0f },
135 { -6.0f / 14.0f, -6.0f / 14.0f, -6.0f / 14.0f, -6.0f / 14.0f },
136 { -7.0f / 16.0f, -7.0f / 16.0f, -7.0f / 16.0f, -7.0f / 16.0f },
137 { -8.0f / 18.0f, -8.0f / 18.0f, -8.0f / 18.0f, -8.0f / 18.0f },
138 { -9.0f / 20.0f, -9.0f / 20.0f, -9.0f / 20.0f, -9.0f / 20.0f },
139 { -10.0f / 22.0f, -10.0f / 22.0f, -10.0f / 22.0f, -10.0f / 22.0f },
140 { -11.0f / 24.0f, -11.0f / 24.0f, -11.0f / 24.0f, -11.0f / 24.0f },
141 { -12.0f / 26.0f, -12.0f / 26.0f, -12.0f / 26.0f, -12.0f / 26.0f },
142 { -13.0f / 28.0f, -13.0f / 28.0f, -13.0f / 28.0f, -13.0f / 28.0f },
143 { -14.0f / 30.0f, -14.0f / 30.0f, -14.0f / 30.0f, -14.0f / 30.0f },
144 { -15.0f / 32.0f, -15.0f / 32.0f, -15.0f / 32.0f, -15.0f / 32.0f },
145 };
146
147 memcpy(&this->cWeight, cWeight, sizeof(cWeight));
148 memcpy(&this->uvWeight, uvWeight, sizeof(uvWeight));
149 memcpy(&this->uvStart, uvStart, sizeof(uvStart));
150
151 static const unsigned int occlusionCount[16] = { 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4 };
152
153 memcpy(&this->occlusionCount, &occlusionCount, sizeof(occlusionCount));
154
155 for(int i = 0; i < 16; i++)
156 {
157 maskB4Q[i][0] = -(i >> 0 & 1);
158 maskB4Q[i][1] = -(i >> 1 & 1);
159 maskB4Q[i][2] = -(i >> 2 & 1);
160 maskB4Q[i][3] = -(i >> 3 & 1);
161 maskB4Q[i][4] = -(i >> 0 & 1);
162 maskB4Q[i][5] = -(i >> 1 & 1);
163 maskB4Q[i][6] = -(i >> 2 & 1);
164 maskB4Q[i][7] = -(i >> 3 & 1);
165
166 invMaskB4Q[i][0] = ~maskB4Q[i][0];
167 invMaskB4Q[i][1] = ~maskB4Q[i][1];
168 invMaskB4Q[i][2] = ~maskB4Q[i][2];
169 invMaskB4Q[i][3] = ~maskB4Q[i][3];
170 invMaskB4Q[i][4] = ~maskB4Q[i][4];
171 invMaskB4Q[i][5] = ~maskB4Q[i][5];
172 invMaskB4Q[i][6] = ~maskB4Q[i][6];
173 invMaskB4Q[i][7] = ~maskB4Q[i][7];
174
175 maskW4Q[i][0] = -(i >> 0 & 1);
176 maskW4Q[i][1] = -(i >> 1 & 1);
177 maskW4Q[i][2] = -(i >> 2 & 1);
178 maskW4Q[i][3] = -(i >> 3 & 1);
179
180 invMaskW4Q[i][0] = ~maskW4Q[i][0];
181 invMaskW4Q[i][1] = ~maskW4Q[i][1];
182 invMaskW4Q[i][2] = ~maskW4Q[i][2];
183 invMaskW4Q[i][3] = ~maskW4Q[i][3];
184
185 maskD4X[i][0] = -(i >> 0 & 1);
186 maskD4X[i][1] = -(i >> 1 & 1);
187 maskD4X[i][2] = -(i >> 2 & 1);
188 maskD4X[i][3] = -(i >> 3 & 1);
189
190 invMaskD4X[i][0] = ~maskD4X[i][0];
191 invMaskD4X[i][1] = ~maskD4X[i][1];
192 invMaskD4X[i][2] = ~maskD4X[i][2];
193 invMaskD4X[i][3] = ~maskD4X[i][3];
194
195 maskQ0Q[i] = -(i >> 0 & 1);
196 maskQ1Q[i] = -(i >> 1 & 1);
197 maskQ2Q[i] = -(i >> 2 & 1);
198 maskQ3Q[i] = -(i >> 3 & 1);
199
200 invMaskQ0Q[i] = ~maskQ0Q[i];
201 invMaskQ1Q[i] = ~maskQ1Q[i];
202 invMaskQ2Q[i] = ~maskQ2Q[i];
203 invMaskQ3Q[i] = ~maskQ3Q[i];
204
205 maskX0X[i][0] = maskX0X[i][1] = maskX0X[i][2] = maskX0X[i][3] = -(i >> 0 & 1);
206 maskX1X[i][0] = maskX1X[i][1] = maskX1X[i][2] = maskX1X[i][3] = -(i >> 1 & 1);
207 maskX2X[i][0] = maskX2X[i][1] = maskX2X[i][2] = maskX2X[i][3] = -(i >> 2 & 1);
208 maskX3X[i][0] = maskX3X[i][1] = maskX3X[i][2] = maskX3X[i][3] = -(i >> 3 & 1);
209
210 invMaskX0X[i][0] = invMaskX0X[i][1] = invMaskX0X[i][2] = invMaskX0X[i][3] = ~maskX0X[i][0];
211 invMaskX1X[i][0] = invMaskX1X[i][1] = invMaskX1X[i][2] = invMaskX1X[i][3] = ~maskX1X[i][0];
212 invMaskX2X[i][0] = invMaskX2X[i][1] = invMaskX2X[i][2] = invMaskX2X[i][3] = ~maskX2X[i][0];
213 invMaskX3X[i][0] = invMaskX3X[i][1] = invMaskX3X[i][2] = invMaskX3X[i][3] = ~maskX3X[i][0];
214
215 maskD01Q[i][0] = -(i >> 0 & 1);
216 maskD01Q[i][1] = -(i >> 1 & 1);
217 maskD23Q[i][0] = -(i >> 2 & 1);
218 maskD23Q[i][1] = -(i >> 3 & 1);
219
220 invMaskD01Q[i][0] = ~maskD01Q[i][0];
221 invMaskD01Q[i][1] = ~maskD01Q[i][1];
222 invMaskD23Q[i][0] = ~maskD23Q[i][0];
223 invMaskD23Q[i][1] = ~maskD23Q[i][1];
224
225 maskQ01X[i][0] = -(i >> 0 & 1);
226 maskQ01X[i][1] = -(i >> 1 & 1);
227 maskQ23X[i][0] = -(i >> 2 & 1);
228 maskQ23X[i][1] = -(i >> 3 & 1);
229
230 invMaskQ01X[i][0] = ~maskQ01X[i][0];
231 invMaskQ01X[i][1] = ~maskQ01X[i][1];
232 invMaskQ23X[i][0] = ~maskQ23X[i][0];
233 invMaskQ23X[i][1] = ~maskQ23X[i][1];
234 }
235
236 for(int i = 0; i < 8; i++)
237 {
238 mask565Q[i] = word4((i & 0x1 ? 0x001F : 0) | (i & 0x2 ? 0x07E0 : 0) | (i & 0x4 ? 0xF800 : 0));
239 mask11X[i] = dword4((i & 0x1 ? 0x000007FFu : 0) | (i & 0x2 ? 0x003FF800u : 0) | (i & 0x4 ? 0xFFC00000u : 0));
240 }
241
242 for(int i = 0; i < 16; i++)
243 {
244 mask5551Q[i] = word4((i & 0x1 ? 0x001F : 0) | (i & 0x2 ? 0x03E0 : 0) | (i & 0x4 ? 0x7C00 : 0) | (i & 8 ? 0x8000 : 0));
245 maskr5g5b5a1Q[i] = word4((i & 0x1 ? 0xF800 : 0) | (i & 0x2 ? 0x07C0 : 0) | (i & 0x4 ? 0x003E : 0) | (i & 8 ? 0x0001 : 0));
246 maskb5g5r5a1Q[i] = word4((i & 0x1 ? 0x003E : 0) | (i & 0x2 ? 0x07C0 : 0) | (i & 0x4 ? 0xF800 : 0) | (i & 8 ? 0x0001 : 0));
247 mask4argbQ[i] = word4((i & 0x1 ? 0x00F0 : 0) | (i & 0x2 ? 0x0F00 : 0) | (i & 0x4 ? 0xF000 : 0) | (i & 8 ? 0x000F : 0));
248 mask4rgbaQ[i] = word4((i & 0x1 ? 0x000F : 0) | (i & 0x2 ? 0x00F0 : 0) | (i & 0x4 ? 0x0F00 : 0) | (i & 8 ? 0xF000 : 0));
249 }
250
251 for(int i = 0; i < 4; i++)
252 {
253 maskW01Q[i][0] = -(i >> 0 & 1);
254 maskW01Q[i][1] = -(i >> 1 & 1);
255 maskW01Q[i][2] = -(i >> 0 & 1);
256 maskW01Q[i][3] = -(i >> 1 & 1);
257
258 maskD01X[i][0] = -(i >> 0 & 1);
259 maskD01X[i][1] = -(i >> 1 & 1);
260 maskD01X[i][2] = -(i >> 0 & 1);
261 maskD01X[i][3] = -(i >> 1 & 1);
262 }
263
264 for(int i = 0; i < 16; i++)
265 {
266 mask10Q[i][0] = mask10Q[i][1] =
267 (i & 0x1 ? 0x3FF : 0) |
268 (i & 0x2 ? 0xFFC00 : 0) |
269 (i & 0x4 ? 0x3FF00000 : 0) |
270 (i & 0x8 ? 0xC0000000 : 0);
271 }
272
273 for(int i = 0; i < 256; i++)
274 {
275 sRGBtoLinearFF_FF00[i] = (unsigned short)(sRGBtoLinear((float)i / 0xFF) * 0xFF00 + 0.5f);
276 }
277
278 for(int q = 0; q < 4; q++)
279 {
280 for(int c = 0; c < 16; c++)
281 {
282 for(int i = 0; i < 4; i++)
283 {
284 sampleX[q][c][i] = c & (1 << i) ? SampleLocationsX[q] : 0.0f;
285 sampleY[q][c][i] = c & (1 << i) ? SampleLocationsY[q] : 0.0f;
286 weight[c][i] = c & (1 << i) ? 1.0f : 0.0f;
287 }
288 }
289 }
290
291 constexpr auto subPixB = vk::SUBPIXEL_PRECISION_BITS;
292
293 const int Xf[4] = { toFixedPoint(SampleLocationsX[0], subPixB), toFixedPoint(SampleLocationsX[1], subPixB), toFixedPoint(SampleLocationsX[2], subPixB), toFixedPoint(SampleLocationsX[3], subPixB) };
294 const int Yf[4] = { toFixedPoint(SampleLocationsY[0], subPixB), toFixedPoint(SampleLocationsY[1], subPixB), toFixedPoint(SampleLocationsY[2], subPixB), toFixedPoint(SampleLocationsY[3], subPixB) };
295
296 memcpy(&this->Xf, &Xf, sizeof(Xf));
297 memcpy(&this->Yf, &Yf, sizeof(Yf));
298
299 for(int i = 0; i <= 0xFFFF; i++)
300 {
301 half2float[i] = static_cast<float>(bit_cast<half>(i));
302 }
303 }
304
305 } // namespace sw
306