// Copyright 2016 The SwiftShader Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #ifndef sw_ShaderCore_hpp #define sw_ShaderCore_hpp #include "Reactor/Print.hpp" #include "Reactor/Reactor.hpp" #include "Reactor/SIMD.hpp" #include "System/Debug.hpp" #include #include // std::memory_order #include // std::pair namespace sw { using namespace rr; class Vector4s { public: Vector4s(); Vector4s(unsigned short x, unsigned short y, unsigned short z, unsigned short w); Vector4s(const Vector4s &rhs); Short4 &operator[](int i); Vector4s &operator=(const Vector4s &rhs); Short4 x; Short4 y; Short4 z; Short4 w; }; class Vector4f { public: Vector4f(); Vector4f(float x, float y, float z, float w); Vector4f(const Vector4f &rhs); Float4 &operator[](int i); Vector4f &operator=(const Vector4f &rhs); Float4 x; Float4 y; Float4 z; Float4 w; }; class Vector4i { public: Vector4i(); Vector4i(int x, int y, int z, int w); Vector4i(const Vector4i &rhs); Int4 &operator[](int i); Vector4i &operator=(const Vector4i &rhs); Int4 x; Int4 y; Int4 z; Int4 w; }; namespace SIMD { using namespace rr::SIMD; struct Float4 { SIMD::Float x; SIMD::Float y; SIMD::Float z; SIMD::Float w; }; struct Int4 { SIMD::Int x; SIMD::Int y; SIMD::Int z; SIMD::Int w; }; } // namespace SIMD // Vulkan 'SPIR-V Extended Instructions for GLSL' (GLSL.std.450) compliant transcendental functions RValue Sin(RValue x, bool relaxedPrecision); RValue Cos(RValue x, bool relaxedPrecision); RValue Tan(RValue x, bool relaxedPrecision); RValue Asin(RValue x, bool relaxedPrecision); RValue Acos(RValue x, bool relaxedPrecision); RValue Atan(RValue x, bool relaxedPrecision); RValue Atan2(RValue y, RValue x, bool relaxedPrecision); RValue Exp2(RValue x, bool relaxedPrecision); RValue Log2(RValue x, bool relaxedPrecision); RValue Exp(RValue x, bool relaxedPrecision); RValue Log(RValue x, bool relaxedPrecision); RValue Pow(RValue x, RValue y, bool relaxedPrecision); RValue Sinh(RValue x, bool relaxedPrecision); RValue Cosh(RValue x, bool relaxedPrecision); RValue Tanh(RValue x, bool relaxedPrecision); RValue Asinh(RValue x, bool relaxedPrecision); RValue Acosh(RValue x, bool relaxedPrecision); RValue Atanh(RValue x, bool relaxedPrecision); RValue Sqrt(RValue x, bool relaxedPrecision); // Splits x into a floating-point significand in the range [0.5, 1.0) // and an integral exponent of two, such that: // x = significand * 2^exponent // Returns the pair std::pair Frexp(RValue val); RValue Ldexp(RValue significand, RValue exponent); // Math functions with uses outside of shaders can be invoked using a verbose template argument instead // of a Boolean argument to indicate precision. For example Sqrt(x) equals Sqrt(x, true). enum Precision { Highp, Relaxed, Mediump = Relaxed, // GLSL defines mediump and lowp as corresponding with SPIR-V's RelaxedPrecision }; // clang-format off template RValue Pow(RValue x, RValue y); template<> inline RValue Pow(RValue x, RValue y) { return Pow(x, y, false); } template<> inline RValue Pow(RValue x, RValue y) { return Pow(x, y, true); } template RValue Sqrt(RValue x); template<> inline RValue Sqrt(RValue x) { return Sqrt(x, false); } template<> inline RValue Sqrt(RValue x) { return Sqrt(x, true); } // clang-format on SIMD::UInt halfToFloatBits(SIMD::UInt halfBits); SIMD::UInt floatToHalfBits(SIMD::UInt floatBits, bool storeInUpperBits); SIMD::Float linearToSRGB(const SIMD::Float &c); SIMD::Float sRGBtoLinear(const SIMD::Float &c); RValue reciprocal(RValue x, bool pp = false, bool exactAtPow2 = false); RValue reciprocal(RValue x, bool pp = false, bool exactAtPow2 = false); RValue reciprocalSquareRoot(RValue x, bool abs, bool pp = false); RValue mulAdd(RValue x, RValue y, RValue z); // TODO(chromium:1299047) RValue Pow(RValue x, RValue y, bool relaxedPrecision); RValue Sqrt(RValue x, bool relaxedPrecision); // clang-format off template RValue Pow(RValue x, RValue y); template<> inline RValue Pow(RValue x, RValue y) { return Pow(x, y, false); } template<> inline RValue Pow(RValue x, RValue y) { return Pow(x, y, true); } template RValue Sqrt(RValue x); template<> inline RValue Sqrt(RValue x) { return Sqrt(x, false); } template<> inline RValue Sqrt(RValue x) { return Sqrt(x, true); } // clang-format on void transpose4x4(Short4 &row0, Short4 &row1, Short4 &row2, Short4 &row3); void transpose4x3(Short4 &row0, Short4 &row1, Short4 &row2, Short4 &row3); void transpose4x4(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3); void transpose4x4zyxw(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3); void transpose4x3(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3); void transpose4x2(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3); void transpose4x1(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3); void transpose2x4(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3); void transpose4xN(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3, int N); UInt4 halfToFloatBits(RValue halfBits); UInt4 floatToHalfBits(RValue floatBits, bool storeInUpperBits); Float4 r11g11b10Unpack(UInt r11g11b10bits); UInt r11g11b10Pack(const Float4 &value); Float4 linearToSRGB(const Float4 &c); Float4 sRGBtoLinear(const Float4 &c); template inline rr::RValue AndAll(const rr::RValue &mask); template inline rr::RValue OrAll(const rr::RValue &mask); rr::RValue Sign(const rr::RValue &val); // Returns the of val. // Both whole and frac will have the same sign as val. std::pair, rr::RValue> Modf(const rr::RValue &val); // Returns the number of 1s in bits, per lane. SIMD::UInt CountBits(const rr::RValue &bits); // Returns 1 << bits. // If the resulting bit overflows a 32 bit integer, 0 is returned. rr::RValue NthBit32(const rr::RValue &bits); // Returns bitCount number of of 1's starting from the LSB. rr::RValue Bitmask32(const rr::RValue &bitCount); // Computes `a * b + c`, which may be fused into one operation to produce a higher-precision result. rr::RValue FMA( const rr::RValue &a, const rr::RValue &b, const rr::RValue &c); // Returns y if y < x; otherwise result is x. // If one operand is a NaN, the other operand is the result. // If both operands are NaN, the result is a NaN. rr::RValue NMin(const rr::RValue &x, const rr::RValue &y); // Returns y if y > x; otherwise result is x. // If one operand is a NaN, the other operand is the result. // If both operands are NaN, the result is a NaN. rr::RValue NMax(const rr::RValue &x, const rr::RValue &y); // Returns the determinant of a 2x2 matrix. rr::RValue Determinant( const rr::RValue &a, const rr::RValue &b, const rr::RValue &c, const rr::RValue &d); // Returns the determinant of a 3x3 matrix. rr::RValue Determinant( const rr::RValue &a, const rr::RValue &b, const rr::RValue &c, const rr::RValue &d, const rr::RValue &e, const rr::RValue &f, const rr::RValue &g, const rr::RValue &h, const rr::RValue &i); // Returns the determinant of a 4x4 matrix. rr::RValue Determinant( const rr::RValue &a, const rr::RValue &b, const rr::RValue &c, const rr::RValue &d, const rr::RValue &e, const rr::RValue &f, const rr::RValue &g, const rr::RValue &h, const rr::RValue &i, const rr::RValue &j, const rr::RValue &k, const rr::RValue &l, const rr::RValue &m, const rr::RValue &n, const rr::RValue &o, const rr::RValue &p); // Returns the inverse of a 2x2 matrix. std::array, 4> MatrixInverse( const rr::RValue &a, const rr::RValue &b, const rr::RValue &c, const rr::RValue &d); // Returns the inverse of a 3x3 matrix. std::array, 9> MatrixInverse( const rr::RValue &a, const rr::RValue &b, const rr::RValue &c, const rr::RValue &d, const rr::RValue &e, const rr::RValue &f, const rr::RValue &g, const rr::RValue &h, const rr::RValue &i); // Returns the inverse of a 4x4 matrix. std::array, 16> MatrixInverse( const rr::RValue &a, const rr::RValue &b, const rr::RValue &c, const rr::RValue &d, const rr::RValue &e, const rr::RValue &f, const rr::RValue &g, const rr::RValue &h, const rr::RValue &i, const rr::RValue &j, const rr::RValue &k, const rr::RValue &l, const rr::RValue &m, const rr::RValue &n, const rr::RValue &o, const rr::RValue &p); //////////////////////////////////////////////////////////////////////////// // Inline functions //////////////////////////////////////////////////////////////////////////// template inline rr::RValue AndAll(const rr::RValue &mask) { T v1 = mask; // [x] [y] [z] [w] T v2 = v1.xzxz & v1.ywyw; // [xy] [zw] [xy] [zw] return v2.xxxx & v2.yyyy; // [xyzw] [xyzw] [xyzw] [xyzw] } template inline rr::RValue OrAll(const rr::RValue &mask) { T v1 = mask; // [x] [y] [z] [w] T v2 = v1.xzxz | v1.ywyw; // [xy] [zw] [xy] [zw] return v2.xxxx | v2.yyyy; // [xyzw] [xyzw] [xyzw] [xyzw] } } // namespace sw #ifdef ENABLE_RR_PRINT namespace rr { template<> struct PrintValue::Ty { static std::string fmt(const sw::Vector4f &v) { return "[x: " + PrintValue::fmt(v.x) + ", y: " + PrintValue::fmt(v.y) + ", z: " + PrintValue::fmt(v.z) + ", w: " + PrintValue::fmt(v.w) + "]"; } static std::vector val(const sw::Vector4f &v) { return PrintValue::vals(v.x, v.y, v.z, v.w); } }; template<> struct PrintValue::Ty { static std::string fmt(const sw::Vector4s &v) { return "[x: " + PrintValue::fmt(v.x) + ", y: " + PrintValue::fmt(v.y) + ", z: " + PrintValue::fmt(v.z) + ", w: " + PrintValue::fmt(v.w) + "]"; } static std::vector val(const sw::Vector4s &v) { return PrintValue::vals(v.x, v.y, v.z, v.w); } }; template<> struct PrintValue::Ty { static std::string fmt(const sw::Vector4i &v) { return "[x: " + PrintValue::fmt(v.x) + ", y: " + PrintValue::fmt(v.y) + ", z: " + PrintValue::fmt(v.z) + ", w: " + PrintValue::fmt(v.w) + "]"; } static std::vector val(const sw::Vector4i &v) { return PrintValue::vals(v.x, v.y, v.z, v.w); } }; } // namespace rr #endif // ENABLE_RR_PRINT #endif // sw_ShaderCore_hpp