xref: /aosp_15_r20/external/swiftshader/src/Pipeline/ShaderCore.hpp (revision 03ce13f70fcc45d86ee91b7ee4cab1936a95046e)
1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //    http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #ifndef sw_ShaderCore_hpp
16 #define sw_ShaderCore_hpp
17 
18 #include "Reactor/Print.hpp"
19 #include "Reactor/Reactor.hpp"
20 #include "Reactor/SIMD.hpp"
21 #include "System/Debug.hpp"
22 
23 #include <array>
24 #include <atomic>   // std::memory_order
25 #include <utility>  // std::pair
26 
27 namespace sw {
28 
29 using namespace rr;
30 
31 class Vector4s
32 {
33 public:
34 	Vector4s();
35 	Vector4s(unsigned short x, unsigned short y, unsigned short z, unsigned short w);
36 	Vector4s(const Vector4s &rhs);
37 
38 	Short4 &operator[](int i);
39 	Vector4s &operator=(const Vector4s &rhs);
40 
41 	Short4 x;
42 	Short4 y;
43 	Short4 z;
44 	Short4 w;
45 };
46 
47 class Vector4f
48 {
49 public:
50 	Vector4f();
51 	Vector4f(float x, float y, float z, float w);
52 	Vector4f(const Vector4f &rhs);
53 
54 	Float4 &operator[](int i);
55 	Vector4f &operator=(const Vector4f &rhs);
56 
57 	Float4 x;
58 	Float4 y;
59 	Float4 z;
60 	Float4 w;
61 };
62 
63 class Vector4i
64 {
65 public:
66 	Vector4i();
67 	Vector4i(int x, int y, int z, int w);
68 	Vector4i(const Vector4i &rhs);
69 
70 	Int4 &operator[](int i);
71 	Vector4i &operator=(const Vector4i &rhs);
72 
73 	Int4 x;
74 	Int4 y;
75 	Int4 z;
76 	Int4 w;
77 };
78 
79 namespace SIMD {
80 
81 using namespace rr::SIMD;
82 
83 struct Float4
84 {
85 	SIMD::Float x;
86 	SIMD::Float y;
87 	SIMD::Float z;
88 	SIMD::Float w;
89 };
90 
91 struct Int4
92 {
93 	SIMD::Int x;
94 	SIMD::Int y;
95 	SIMD::Int z;
96 	SIMD::Int w;
97 };
98 
99 }  // namespace SIMD
100 
101 // Vulkan 'SPIR-V Extended Instructions for GLSL' (GLSL.std.450) compliant transcendental functions
102 RValue<SIMD::Float> Sin(RValue<SIMD::Float> x, bool relaxedPrecision);
103 RValue<SIMD::Float> Cos(RValue<SIMD::Float> x, bool relaxedPrecision);
104 RValue<SIMD::Float> Tan(RValue<SIMD::Float> x, bool relaxedPrecision);
105 RValue<SIMD::Float> Asin(RValue<SIMD::Float> x, bool relaxedPrecision);
106 RValue<SIMD::Float> Acos(RValue<SIMD::Float> x, bool relaxedPrecision);
107 RValue<SIMD::Float> Atan(RValue<SIMD::Float> x, bool relaxedPrecision);
108 RValue<SIMD::Float> Atan2(RValue<SIMD::Float> y, RValue<SIMD::Float> x, bool relaxedPrecision);
109 RValue<SIMD::Float> Exp2(RValue<SIMD::Float> x, bool relaxedPrecision);
110 RValue<SIMD::Float> Log2(RValue<SIMD::Float> x, bool relaxedPrecision);
111 RValue<SIMD::Float> Exp(RValue<SIMD::Float> x, bool relaxedPrecision);
112 RValue<SIMD::Float> Log(RValue<SIMD::Float> x, bool relaxedPrecision);
113 RValue<SIMD::Float> Pow(RValue<SIMD::Float> x, RValue<SIMD::Float> y, bool relaxedPrecision);
114 RValue<SIMD::Float> Sinh(RValue<SIMD::Float> x, bool relaxedPrecision);
115 RValue<SIMD::Float> Cosh(RValue<SIMD::Float> x, bool relaxedPrecision);
116 RValue<SIMD::Float> Tanh(RValue<SIMD::Float> x, bool relaxedPrecision);
117 RValue<SIMD::Float> Asinh(RValue<SIMD::Float> x, bool relaxedPrecision);
118 RValue<SIMD::Float> Acosh(RValue<SIMD::Float> x, bool relaxedPrecision);
119 RValue<SIMD::Float> Atanh(RValue<SIMD::Float> x, bool relaxedPrecision);
120 RValue<SIMD::Float> Sqrt(RValue<SIMD::Float> x, bool relaxedPrecision);
121 
122 // Splits x into a floating-point significand in the range [0.5, 1.0)
123 // and an integral exponent of two, such that:
124 //   x = significand * 2^exponent
125 // Returns the pair <significand, exponent>
126 std::pair<SIMD::Float, SIMD::Int> Frexp(RValue<SIMD::Float> val);
127 
128 RValue<SIMD::Float> Ldexp(RValue<SIMD::Float> significand, RValue<SIMD::Int> exponent);
129 
130 // Math functions with uses outside of shaders can be invoked using a verbose template argument instead
131 // of a Boolean argument to indicate precision. For example Sqrt<Mediump>(x) equals Sqrt(x, true).
132 enum Precision
133 {
134 	Highp,
135 	Relaxed,
136 	Mediump = Relaxed,  // GLSL defines mediump and lowp as corresponding with SPIR-V's RelaxedPrecision
137 };
138 
139 // clang-format off
140 template<Precision precision> RValue<SIMD::Float> Pow(RValue<SIMD::Float> x, RValue<SIMD::Float> y);
Pow(RValue<SIMD::Float> x,RValue<SIMD::Float> y)141 template<> inline RValue<SIMD::Float> Pow<Highp>(RValue<SIMD::Float> x, RValue<SIMD::Float> y) { return Pow(x, y, false); }
Pow(RValue<SIMD::Float> x,RValue<SIMD::Float> y)142 template<> inline RValue<SIMD::Float> Pow<Mediump>(RValue<SIMD::Float> x, RValue<SIMD::Float> y) { return Pow(x, y, true); }
143 
144 template<Precision precision> RValue<SIMD::Float> Sqrt(RValue<SIMD::Float> x);
Sqrt(RValue<SIMD::Float> x)145 template<> inline RValue<SIMD::Float> Sqrt<Highp>(RValue<SIMD::Float> x) { return Sqrt(x, false); }
Sqrt(RValue<SIMD::Float> x)146 template<> inline RValue<SIMD::Float> Sqrt<Mediump>(RValue<SIMD::Float> x) { return Sqrt(x, true); }
147 // clang-format on
148 
149 SIMD::UInt halfToFloatBits(SIMD::UInt halfBits);
150 SIMD::UInt floatToHalfBits(SIMD::UInt floatBits, bool storeInUpperBits);
151 SIMD::Float linearToSRGB(const SIMD::Float &c);
152 SIMD::Float sRGBtoLinear(const SIMD::Float &c);
153 
154 RValue<Float4> reciprocal(RValue<Float4> x, bool pp = false, bool exactAtPow2 = false);
155 RValue<SIMD::Float> reciprocal(RValue<SIMD::Float> x, bool pp = false, bool exactAtPow2 = false);
156 RValue<Float4> reciprocalSquareRoot(RValue<Float4> x, bool abs, bool pp = false);
157 
158 RValue<SIMD::Float> mulAdd(RValue<SIMD::Float> x, RValue<SIMD::Float> y, RValue<SIMD::Float> z);  // TODO(chromium:1299047)
159 
160 RValue<Float4> Pow(RValue<Float4> x, RValue<Float4> y, bool relaxedPrecision);
161 RValue<Float4> Sqrt(RValue<Float4> x, bool relaxedPrecision);
162 
163 // clang-format off
164 template<Precision precision> RValue<Float4> Pow(RValue<Float4> x, RValue<Float4> y);
Pow(RValue<Float4> x,RValue<Float4> y)165 template<> inline RValue<Float4> Pow<Highp>(RValue<Float4> x, RValue<Float4> y) { return Pow(x, y, false); }
Pow(RValue<Float4> x,RValue<Float4> y)166 template<> inline RValue<Float4> Pow<Mediump>(RValue<Float4> x, RValue<Float4> y) { return Pow(x, y, true); }
167 
168 template<Precision precision> RValue<Float4> Sqrt(RValue<Float4> x);
Sqrt(RValue<Float4> x)169 template<> inline RValue<Float4> Sqrt<Highp>(RValue<Float4> x) { return Sqrt(x, false); }
Sqrt(RValue<Float4> x)170 template<> inline RValue<Float4> Sqrt<Mediump>(RValue<Float4> x) { return Sqrt(x, true); }
171 // clang-format on
172 
173 void transpose4x4(Short4 &row0, Short4 &row1, Short4 &row2, Short4 &row3);
174 void transpose4x3(Short4 &row0, Short4 &row1, Short4 &row2, Short4 &row3);
175 void transpose4x4(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3);
176 void transpose4x4zyxw(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3);
177 void transpose4x3(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3);
178 void transpose4x2(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3);
179 void transpose4x1(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3);
180 void transpose2x4(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3);
181 void transpose4xN(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3, int N);
182 
183 UInt4 halfToFloatBits(RValue<UInt4> halfBits);
184 UInt4 floatToHalfBits(RValue<UInt4> floatBits, bool storeInUpperBits);
185 Float4 r11g11b10Unpack(UInt r11g11b10bits);
186 UInt r11g11b10Pack(const Float4 &value);
187 Float4 linearToSRGB(const Float4 &c);
188 Float4 sRGBtoLinear(const Float4 &c);
189 
190 template<typename T>
191 inline rr::RValue<T> AndAll(const rr::RValue<T> &mask);
192 
193 template<typename T>
194 inline rr::RValue<T> OrAll(const rr::RValue<T> &mask);
195 
196 rr::RValue<SIMD::Float> Sign(const rr::RValue<SIMD::Float> &val);
197 
198 // Returns the <whole, frac> of val.
199 // Both whole and frac will have the same sign as val.
200 std::pair<rr::RValue<SIMD::Float>, rr::RValue<SIMD::Float>>
201 Modf(const rr::RValue<SIMD::Float> &val);
202 
203 // Returns the number of 1s in bits, per lane.
204 SIMD::UInt CountBits(const rr::RValue<SIMD::UInt> &bits);
205 
206 // Returns 1 << bits.
207 // If the resulting bit overflows a 32 bit integer, 0 is returned.
208 rr::RValue<SIMD::UInt> NthBit32(const rr::RValue<SIMD::UInt> &bits);
209 
210 // Returns bitCount number of of 1's starting from the LSB.
211 rr::RValue<SIMD::UInt> Bitmask32(const rr::RValue<SIMD::UInt> &bitCount);
212 
213 // Computes `a * b + c`, which may be fused into one operation to produce a higher-precision result.
214 rr::RValue<SIMD::Float> FMA(
215     const rr::RValue<SIMD::Float> &a,
216     const rr::RValue<SIMD::Float> &b,
217     const rr::RValue<SIMD::Float> &c);
218 
219 // Returns y if y < x; otherwise result is x.
220 // If one operand is a NaN, the other operand is the result.
221 // If both operands are NaN, the result is a NaN.
222 rr::RValue<SIMD::Float> NMin(const rr::RValue<SIMD::Float> &x, const rr::RValue<SIMD::Float> &y);
223 
224 // Returns y if y > x; otherwise result is x.
225 // If one operand is a NaN, the other operand is the result.
226 // If both operands are NaN, the result is a NaN.
227 rr::RValue<SIMD::Float> NMax(const rr::RValue<SIMD::Float> &x, const rr::RValue<SIMD::Float> &y);
228 
229 // Returns the determinant of a 2x2 matrix.
230 rr::RValue<SIMD::Float> Determinant(
231     const rr::RValue<SIMD::Float> &a, const rr::RValue<SIMD::Float> &b,
232     const rr::RValue<SIMD::Float> &c, const rr::RValue<SIMD::Float> &d);
233 
234 // Returns the determinant of a 3x3 matrix.
235 rr::RValue<SIMD::Float> Determinant(
236     const rr::RValue<SIMD::Float> &a, const rr::RValue<SIMD::Float> &b, const rr::RValue<SIMD::Float> &c,
237     const rr::RValue<SIMD::Float> &d, const rr::RValue<SIMD::Float> &e, const rr::RValue<SIMD::Float> &f,
238     const rr::RValue<SIMD::Float> &g, const rr::RValue<SIMD::Float> &h, const rr::RValue<SIMD::Float> &i);
239 
240 // Returns the determinant of a 4x4 matrix.
241 rr::RValue<SIMD::Float> Determinant(
242     const rr::RValue<SIMD::Float> &a, const rr::RValue<SIMD::Float> &b, const rr::RValue<SIMD::Float> &c, const rr::RValue<SIMD::Float> &d,
243     const rr::RValue<SIMD::Float> &e, const rr::RValue<SIMD::Float> &f, const rr::RValue<SIMD::Float> &g, const rr::RValue<SIMD::Float> &h,
244     const rr::RValue<SIMD::Float> &i, const rr::RValue<SIMD::Float> &j, const rr::RValue<SIMD::Float> &k, const rr::RValue<SIMD::Float> &l,
245     const rr::RValue<SIMD::Float> &m, const rr::RValue<SIMD::Float> &n, const rr::RValue<SIMD::Float> &o, const rr::RValue<SIMD::Float> &p);
246 
247 // Returns the inverse of a 2x2 matrix.
248 std::array<rr::RValue<SIMD::Float>, 4> MatrixInverse(
249     const rr::RValue<SIMD::Float> &a, const rr::RValue<SIMD::Float> &b,
250     const rr::RValue<SIMD::Float> &c, const rr::RValue<SIMD::Float> &d);
251 
252 // Returns the inverse of a 3x3 matrix.
253 std::array<rr::RValue<SIMD::Float>, 9> MatrixInverse(
254     const rr::RValue<SIMD::Float> &a, const rr::RValue<SIMD::Float> &b, const rr::RValue<SIMD::Float> &c,
255     const rr::RValue<SIMD::Float> &d, const rr::RValue<SIMD::Float> &e, const rr::RValue<SIMD::Float> &f,
256     const rr::RValue<SIMD::Float> &g, const rr::RValue<SIMD::Float> &h, const rr::RValue<SIMD::Float> &i);
257 
258 // Returns the inverse of a 4x4 matrix.
259 std::array<rr::RValue<SIMD::Float>, 16> MatrixInverse(
260     const rr::RValue<SIMD::Float> &a, const rr::RValue<SIMD::Float> &b, const rr::RValue<SIMD::Float> &c, const rr::RValue<SIMD::Float> &d,
261     const rr::RValue<SIMD::Float> &e, const rr::RValue<SIMD::Float> &f, const rr::RValue<SIMD::Float> &g, const rr::RValue<SIMD::Float> &h,
262     const rr::RValue<SIMD::Float> &i, const rr::RValue<SIMD::Float> &j, const rr::RValue<SIMD::Float> &k, const rr::RValue<SIMD::Float> &l,
263     const rr::RValue<SIMD::Float> &m, const rr::RValue<SIMD::Float> &n, const rr::RValue<SIMD::Float> &o, const rr::RValue<SIMD::Float> &p);
264 
265 ////////////////////////////////////////////////////////////////////////////
266 // Inline functions
267 ////////////////////////////////////////////////////////////////////////////
268 
269 template<typename T>
AndAll(const rr::RValue<T> & mask)270 inline rr::RValue<T> AndAll(const rr::RValue<T> &mask)
271 {
272 	T v1 = mask;               // [x]    [y]    [z]    [w]
273 	T v2 = v1.xzxz & v1.ywyw;  // [xy]   [zw]   [xy]   [zw]
274 	return v2.xxxx & v2.yyyy;  // [xyzw] [xyzw] [xyzw] [xyzw]
275 }
276 
277 template<typename T>
OrAll(const rr::RValue<T> & mask)278 inline rr::RValue<T> OrAll(const rr::RValue<T> &mask)
279 {
280 	T v1 = mask;               // [x]    [y]    [z]    [w]
281 	T v2 = v1.xzxz | v1.ywyw;  // [xy]   [zw]   [xy]   [zw]
282 	return v2.xxxx | v2.yyyy;  // [xyzw] [xyzw] [xyzw] [xyzw]
283 }
284 
285 }  // namespace sw
286 
287 #ifdef ENABLE_RR_PRINT
288 namespace rr {
289 template<>
290 struct PrintValue::Ty<sw::Vector4f>
291 {
fmtrr::PrintValue::Ty292 	static std::string fmt(const sw::Vector4f &v)
293 	{
294 		return "[x: " + PrintValue::fmt(v.x) +
295 		       ", y: " + PrintValue::fmt(v.y) +
296 		       ", z: " + PrintValue::fmt(v.z) +
297 		       ", w: " + PrintValue::fmt(v.w) + "]";
298 	}
299 
valrr::PrintValue::Ty300 	static std::vector<rr::Value *> val(const sw::Vector4f &v)
301 	{
302 		return PrintValue::vals(v.x, v.y, v.z, v.w);
303 	}
304 };
305 template<>
306 struct PrintValue::Ty<sw::Vector4s>
307 {
fmtrr::PrintValue::Ty308 	static std::string fmt(const sw::Vector4s &v)
309 	{
310 		return "[x: " + PrintValue::fmt(v.x) +
311 		       ", y: " + PrintValue::fmt(v.y) +
312 		       ", z: " + PrintValue::fmt(v.z) +
313 		       ", w: " + PrintValue::fmt(v.w) + "]";
314 	}
315 
valrr::PrintValue::Ty316 	static std::vector<rr::Value *> val(const sw::Vector4s &v)
317 	{
318 		return PrintValue::vals(v.x, v.y, v.z, v.w);
319 	}
320 };
321 template<>
322 struct PrintValue::Ty<sw::Vector4i>
323 {
fmtrr::PrintValue::Ty324 	static std::string fmt(const sw::Vector4i &v)
325 	{
326 		return "[x: " + PrintValue::fmt(v.x) +
327 		       ", y: " + PrintValue::fmt(v.y) +
328 		       ", z: " + PrintValue::fmt(v.z) +
329 		       ", w: " + PrintValue::fmt(v.w) + "]";
330 	}
331 
valrr::PrintValue::Ty332 	static std::vector<rr::Value *> val(const sw::Vector4i &v)
333 	{
334 		return PrintValue::vals(v.x, v.y, v.z, v.w);
335 	}
336 };
337 }  // namespace rr
338 #endif  // ENABLE_RR_PRINT
339 
340 #endif  // sw_ShaderCore_hpp
341