// Copyright 2022 The SwiftShader Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include "SIMD.hpp" #include "Assert.hpp" #include "Debug.hpp" #include "Print.hpp" #include namespace rr { SIMD::Int::Int() : XYZW(this) { } SIMD::Int::Int(RValue cast) : XYZW(this) { Value *xyzw = Nucleus::createFPToSI(cast.value(), SIMD::Int::type()); storeValue(xyzw); } SIMD::Int::Int(int broadcast) : XYZW(this) { std::vector constantVector = { broadcast }; storeValue(Nucleus::createConstantVector(constantVector, type())); } SIMD::Int::Int(int x, int y, int z, int w) : XYZW(this) { std::vector constantVector = { x, y, z, w }; storeValue(Nucleus::createConstantVector(constantVector, type())); } SIMD::Int::Int(std::vector v) : XYZW(this) { std::vector constantVector; for(int i : v) { constantVector.push_back(i); } storeValue(Nucleus::createConstantVector(constantVector, type())); } SIMD::Int::Int(std::function LaneValueProducer) : XYZW(this) { std::vector constantVector; for(int i = 0; i < SIMD::Width; i++) { constantVector.push_back(LaneValueProducer(i)); } storeValue(Nucleus::createConstantVector(constantVector, type())); } SIMD::Int::Int(RValue rhs) : XYZW(this) { store(rhs); } SIMD::Int::Int(const SIMD::Int &rhs) : XYZW(this) { store(rhs.load()); } SIMD::Int::Int(const Reference &rhs) : XYZW(this) { store(rhs.load()); } SIMD::Int::Int(RValue rhs) : XYZW(this) { storeValue(rhs.value()); } SIMD::Int::Int(const SIMD::UInt &rhs) : XYZW(this) { storeValue(rhs.loadValue()); } SIMD::Int::Int(const Reference &rhs) : XYZW(this) { storeValue(rhs.loadValue()); } SIMD::Int::Int(const scalar::Int &rhs) : XYZW(this) { *this = RValue(rhs.loadValue()); } SIMD::Int::Int(const Reference &rhs) : XYZW(this) { *this = RValue(rhs.loadValue()); } RValue SIMD::Int::operator=(int x) { return *this = SIMD::Int(x); } RValue SIMD::Int::operator=(RValue rhs) { return store(rhs); } RValue SIMD::Int::operator=(const SIMD::Int &rhs) { return store(rhs.load()); } RValue SIMD::Int::operator=(const Reference &rhs) { return store(rhs.load()); } RValue operator+(RValue lhs, RValue rhs) { return RValue(Nucleus::createAdd(lhs.value(), rhs.value())); } RValue operator-(RValue lhs, RValue rhs) { return RValue(Nucleus::createSub(lhs.value(), rhs.value())); } RValue operator*(RValue lhs, RValue rhs) { return RValue(Nucleus::createMul(lhs.value(), rhs.value())); } RValue operator/(RValue lhs, RValue rhs) { return RValue(Nucleus::createSDiv(lhs.value(), rhs.value())); } RValue operator%(RValue lhs, RValue rhs) { return RValue(Nucleus::createSRem(lhs.value(), rhs.value())); } RValue operator&(RValue lhs, RValue rhs) { return RValue(Nucleus::createAnd(lhs.value(), rhs.value())); } RValue operator|(RValue lhs, RValue rhs) { return RValue(Nucleus::createOr(lhs.value(), rhs.value())); } RValue operator^(RValue lhs, RValue rhs) { return RValue(Nucleus::createXor(lhs.value(), rhs.value())); } RValue operator<<(RValue lhs, RValue rhs) { return RValue(Nucleus::createShl(lhs.value(), rhs.value())); } RValue operator>>(RValue lhs, RValue rhs) { return RValue(Nucleus::createAShr(lhs.value(), rhs.value())); } RValue operator+=(SIMD::Int &lhs, RValue rhs) { return lhs = lhs + rhs; } RValue operator-=(SIMD::Int &lhs, RValue rhs) { return lhs = lhs - rhs; } RValue operator*=(SIMD::Int &lhs, RValue rhs) { return lhs = lhs * rhs; } // RValue operator/=(SIMD::Int &lhs, RValue rhs) // { // return lhs = lhs / rhs; // } // RValue operator%=(SIMD::Int &lhs, RValue rhs) // { // return lhs = lhs % rhs; // } RValue operator&=(SIMD::Int &lhs, RValue rhs) { return lhs = lhs & rhs; } RValue operator|=(SIMD::Int &lhs, RValue rhs) { return lhs = lhs | rhs; } RValue operator^=(SIMD::Int &lhs, RValue rhs) { return lhs = lhs ^ rhs; } RValue operator<<=(SIMD::Int &lhs, unsigned char rhs) { return lhs = lhs << rhs; } RValue operator>>=(SIMD::Int &lhs, unsigned char rhs) { return lhs = lhs >> rhs; } RValue operator+(RValue val) { return val; } RValue operator-(RValue val) { return RValue(Nucleus::createNeg(val.value())); } RValue operator~(RValue val) { return RValue(Nucleus::createNot(val.value())); } RValue Extract(RValue x, int i) { return RValue(Nucleus::createExtractElement(x.value(), scalar::Int::type(), i)); } RValue Insert(RValue x, RValue element, int i) { return RValue(Nucleus::createInsertElement(x.value(), element.value(), i)); } SIMD::UInt::UInt() : XYZW(this) { } SIMD::UInt::UInt(int broadcast) : XYZW(this) { std::vector constantVector = { broadcast }; storeValue(Nucleus::createConstantVector(constantVector, type())); } SIMD::UInt::UInt(int x, int y, int z, int w) : XYZW(this) { std::vector constantVector = { x, y, z, w }; storeValue(Nucleus::createConstantVector(constantVector, type())); } SIMD::UInt::UInt(std::vector v) : XYZW(this) { std::vector constantVector; for(int i : v) { constantVector.push_back(i); } storeValue(Nucleus::createConstantVector(constantVector, type())); } SIMD::UInt::UInt(std::function LaneValueProducer) : XYZW(this) { std::vector constantVector; for(int i = 0; i < SIMD::Width; i++) { constantVector.push_back(LaneValueProducer(i)); } storeValue(Nucleus::createConstantVector(constantVector, type())); } SIMD::UInt::UInt(RValue rhs) : XYZW(this) { store(rhs); } SIMD::UInt::UInt(const SIMD::UInt &rhs) : XYZW(this) { store(rhs.load()); } SIMD::UInt::UInt(const Reference &rhs) : XYZW(this) { store(rhs.load()); } SIMD::UInt::UInt(RValue rhs) : XYZW(this) { storeValue(rhs.value()); } SIMD::UInt::UInt(const SIMD::Int &rhs) : XYZW(this) { storeValue(rhs.loadValue()); } SIMD::UInt::UInt(const Reference &rhs) : XYZW(this) { storeValue(rhs.loadValue()); } SIMD::UInt::UInt(const scalar::UInt &rhs) : XYZW(this) { *this = RValue(rhs.loadValue()); } SIMD::UInt::UInt(const Reference &rhs) : XYZW(this) { *this = RValue(rhs.loadValue()); } RValue SIMD::UInt::operator=(RValue rhs) { return store(rhs); } RValue SIMD::UInt::operator=(const SIMD::UInt &rhs) { return store(rhs.load()); } RValue SIMD::UInt::operator=(const Reference &rhs) { return store(rhs.load()); } RValue operator+(RValue lhs, RValue rhs) { return RValue(Nucleus::createAdd(lhs.value(), rhs.value())); } RValue operator-(RValue lhs, RValue rhs) { return RValue(Nucleus::createSub(lhs.value(), rhs.value())); } RValue operator*(RValue lhs, RValue rhs) { return RValue(Nucleus::createMul(lhs.value(), rhs.value())); } RValue operator/(RValue lhs, RValue rhs) { return RValue(Nucleus::createUDiv(lhs.value(), rhs.value())); } RValue operator%(RValue lhs, RValue rhs) { return RValue(Nucleus::createURem(lhs.value(), rhs.value())); } RValue operator&(RValue lhs, RValue rhs) { return RValue(Nucleus::createAnd(lhs.value(), rhs.value())); } RValue operator|(RValue lhs, RValue rhs) { return RValue(Nucleus::createOr(lhs.value(), rhs.value())); } RValue operator^(RValue lhs, RValue rhs) { return RValue(Nucleus::createXor(lhs.value(), rhs.value())); } RValue operator<<(RValue lhs, RValue rhs) { return RValue(Nucleus::createShl(lhs.value(), rhs.value())); } RValue operator>>(RValue lhs, RValue rhs) { return RValue(Nucleus::createLShr(lhs.value(), rhs.value())); } RValue operator+=(SIMD::UInt &lhs, RValue rhs) { return lhs = lhs + rhs; } RValue operator-=(SIMD::UInt &lhs, RValue rhs) { return lhs = lhs - rhs; } RValue operator*=(SIMD::UInt &lhs, RValue rhs) { return lhs = lhs * rhs; } // RValue operator/=(SIMD::UInt &lhs, RValue rhs) // { // return lhs = lhs / rhs; // } // RValue operator%=(SIMD::UInt &lhs, RValue rhs) // { // return lhs = lhs % rhs; // } RValue operator&=(SIMD::UInt &lhs, RValue rhs) { return lhs = lhs & rhs; } RValue operator|=(SIMD::UInt &lhs, RValue rhs) { return lhs = lhs | rhs; } RValue operator^=(SIMD::UInt &lhs, RValue rhs) { return lhs = lhs ^ rhs; } RValue operator<<=(SIMD::UInt &lhs, unsigned char rhs) { return lhs = lhs << rhs; } RValue operator>>=(SIMD::UInt &lhs, unsigned char rhs) { return lhs = lhs >> rhs; } RValue operator+(RValue val) { return val; } RValue operator-(RValue val) { return RValue(Nucleus::createNeg(val.value())); } RValue operator~(RValue val) { return RValue(Nucleus::createNot(val.value())); } RValue Extract(RValue x, int i) { return RValue(Nucleus::createExtractElement(x.value(), scalar::Int::type(), i)); } RValue Insert(RValue x, RValue element, int i) { return RValue(Nucleus::createInsertElement(x.value(), element.value(), i)); } SIMD::Float::Float(RValue cast) : XYZW(this) { Value *xyzw = Nucleus::createSIToFP(cast.value(), SIMD::Float::type()); storeValue(xyzw); } SIMD::Float::Float(RValue cast) : XYZW(this) { RValue result = SIMD::Float(SIMD::Int(cast & SIMD::UInt(0x7FFFFFFF))) + As((As(cast) >> 31) & As(SIMD::Float(0x80000000u))); storeValue(result.value()); } SIMD::Float::Float() : XYZW(this) { } SIMD::Float::Float(float broadcast) : XYZW(this) { // See rr::Float(float) constructor for the rationale behind this assert. ASSERT(std::isfinite(broadcast)); std::vector constantVector = { broadcast }; storeValue(Nucleus::createConstantVector(constantVector, type())); } SIMD::Float::Float(float x, float y, float z, float w) : XYZW(this) { std::vector constantVector = { x, y, z, w }; storeValue(Nucleus::createConstantVector(constantVector, type())); } SIMD::Float::Float(std::vector v) : XYZW(this) { std::vector constantVector; for(int f : v) { constantVector.push_back(f); } storeValue(Nucleus::createConstantVector(constantVector, type())); } SIMD::Float::Float(std::function LaneValueProducer) : XYZW(this) { std::vector constantVector; for(int i = 0; i < SIMD::Width; i++) { constantVector.push_back(LaneValueProducer(i)); } storeValue(Nucleus::createConstantVector(constantVector, type())); } SIMD::Float SIMD::Float::infinity() { SIMD::Float result; constexpr double inf = std::numeric_limits::infinity(); std::vector constantVector = { inf }; result.storeValue(Nucleus::createConstantVector(constantVector, type())); return result; } SIMD::Float::Float(RValue rhs) : XYZW(this) { store(rhs); } SIMD::Float::Float(const SIMD::Float &rhs) : XYZW(this) { store(rhs.load()); } SIMD::Float::Float(const Reference &rhs) : XYZW(this) { store(rhs.load()); } SIMD::Float::Float(const scalar::Float &rhs) : XYZW(this) { *this = RValue(rhs.loadValue()); } SIMD::Float::Float(const Reference &rhs) : XYZW(this) { *this = RValue(rhs.loadValue()); } SIMD::Float::Float(RValue rhs) : XYZW(this) { ASSERT(SIMD::Width == 4); *this = Insert128(*this, rhs, 0); } RValue SIMD::Float::operator=(RValue rhs) { return *this = SIMD::Float(rhs); } RValue SIMD::Float::operator=(float x) { return *this = SIMD::Float(x); } RValue SIMD::Float::operator=(RValue rhs) { return store(rhs); } RValue SIMD::Float::operator=(const SIMD::Float &rhs) { return store(rhs.load()); } RValue SIMD::Float::operator=(const Reference &rhs) { return store(rhs.load()); } RValue SIMD::Float::operator=(RValue rhs) { return *this = SIMD::Float(rhs); } RValue SIMD::Float::operator=(const scalar::Float &rhs) { return *this = SIMD::Float(rhs); } RValue SIMD::Float::operator=(const Reference &rhs) { return *this = SIMD::Float(rhs); } RValue operator+(RValue lhs, RValue rhs) { return RValue(Nucleus::createFAdd(lhs.value(), rhs.value())); } RValue operator-(RValue lhs, RValue rhs) { return RValue(Nucleus::createFSub(lhs.value(), rhs.value())); } RValue operator*(RValue lhs, RValue rhs) { return RValue(Nucleus::createFMul(lhs.value(), rhs.value())); } RValue operator/(RValue lhs, RValue rhs) { return RValue(Nucleus::createFDiv(lhs.value(), rhs.value())); } RValue operator+=(SIMD::Float &lhs, RValue rhs) { return lhs = lhs + rhs; } RValue operator-=(SIMD::Float &lhs, RValue rhs) { return lhs = lhs - rhs; } RValue operator*=(SIMD::Float &lhs, RValue rhs) { return lhs = lhs * rhs; } RValue operator/=(SIMD::Float &lhs, RValue rhs) { return lhs = lhs / rhs; } RValue operator%=(SIMD::Float &lhs, RValue rhs) { return lhs = lhs % rhs; } RValue operator+(RValue val) { return val; } RValue operator-(RValue val) { return RValue(Nucleus::createFNeg(val.value())); } RValue Rcp(RValue x, bool relaxedPrecision, bool exactAtPow2) { ASSERT(SIMD::Width == 4); return SIMD::Float(Rcp(Extract128(x, 0), relaxedPrecision, exactAtPow2)); } RValue RcpSqrt(RValue x, bool relaxedPrecision) { ASSERT(SIMD::Width == 4); return SIMD::Float(RcpSqrt(Extract128(x, 0), relaxedPrecision)); } RValue Insert(RValue x, RValue element, int i) { return RValue(Nucleus::createInsertElement(x.value(), element.value(), i)); } RValue Extract(RValue x, int i) { return RValue(Nucleus::createExtractElement(x.value(), scalar::Float::type(), i)); } RValue IsInf(RValue x) { return CmpEQ(As(x) & SIMD::Int(0x7FFFFFFF), SIMD::Int(0x7F800000)); } RValue IsNan(RValue x) { return ~CmpEQ(x, x); } RValue Sin(RValue x) { return ScalarizeCall(sinf, x); } RValue Cos(RValue x) { return ScalarizeCall(cosf, x); } RValue Tan(RValue x) { return ScalarizeCall(tanf, x); } RValue Asin(RValue x) { return ScalarizeCall(asinf, x); } RValue Acos(RValue x) { return ScalarizeCall(acosf, x); } RValue Atan(RValue x) { return ScalarizeCall(atanf, x); } RValue Sinh(RValue x) { return ScalarizeCall(sinhf, x); } RValue Cosh(RValue x) { return ScalarizeCall(coshf, x); } RValue Tanh(RValue x) { return ScalarizeCall(tanhf, x); } RValue Asinh(RValue x) { return ScalarizeCall(asinhf, x); } RValue Acosh(RValue x) { return ScalarizeCall(acoshf, x); } RValue Atanh(RValue x) { return ScalarizeCall(atanhf, x); } RValue Atan2(RValue x, RValue y) { return ScalarizeCall(atan2f, x, y); } RValue Pow(RValue x, RValue y) { return ScalarizeCall(powf, x, y); } RValue Exp(RValue x) { return ScalarizeCall(expf, x); } RValue Log(RValue x) { return ScalarizeCall(logf, x); } RValue Exp2(RValue x) { return ScalarizeCall(exp2f, x); } RValue Log2(RValue x) { return ScalarizeCall(log2f, x); } RValue SignMask(RValue x) { ASSERT(SIMD::Width == 4); return SignMask(Extract128(x, 0)); } RValue Ctlz(RValue x, bool isZeroUndef) { ASSERT(SIMD::Width == 4); SIMD::UInt result; return Insert128(result, Ctlz(Extract128(x, 0), isZeroUndef), 0); } RValue Cttz(RValue x, bool isZeroUndef) { ASSERT(SIMD::Width == 4); SIMD::UInt result; return Insert128(result, Cttz(Extract128(x, 0), isZeroUndef), 0); } RValue MulHigh(RValue x, RValue y) { ASSERT(SIMD::Width == 4); SIMD::Int result; return Insert128(result, MulHigh(Extract128(x, 0), Extract128(y, 0)), 0); } RValue MulHigh(RValue x, RValue y) { ASSERT(SIMD::Width == 4); SIMD::UInt result; return Insert128(result, MulHigh(Extract128(x, 0), Extract128(y, 0)), 0); } RValue AnyTrue(const RValue &bools) { ASSERT(SIMD::Width == 4); return AnyTrue(Extract128(bools, 0)); } RValue AnyFalse(const RValue &bools) { ASSERT(SIMD::Width == 4); return AnyFalse(Extract128(bools, 0)); } RValue Divergent(const RValue &ints) { ASSERT(SIMD::Width == 4); return Divergent(Extract128(ints, 0)); } RValue Swizzle(RValue x, uint16_t select) { ASSERT(SIMD::Width == 4); SIMD::Int result; return Insert128(result, Swizzle(Extract128(x, 0), select), 0); } RValue Swizzle(RValue x, uint16_t select) { ASSERT(SIMD::Width == 4); SIMD::UInt result; return Insert128(result, Swizzle(Extract128(x, 0), select), 0); } RValue Swizzle(RValue x, uint16_t select) { ASSERT(SIMD::Width == 4); SIMD::Float result; return Insert128(result, Swizzle(Extract128(x, 0), select), 0); } RValue Shuffle(RValue x, RValue y, uint16_t select) { ASSERT(SIMD::Width == 4); SIMD::Int result; return Insert128(result, Shuffle(Extract128(x, 0), Extract128(y, 0), select), 0); } RValue Shuffle(RValue x, RValue y, uint16_t select) { ASSERT(SIMD::Width == 4); SIMD::UInt result; return Insert128(result, Shuffle(Extract128(x, 0), Extract128(y, 0), select), 0); } RValue Shuffle(RValue x, RValue y, uint16_t select) { ASSERT(SIMD::Width == 4); SIMD::Float result; return Insert128(result, Shuffle(Extract128(x, 0), Extract128(y, 0), select), 0); } SIMD::Pointer::Pointer(scalar::Pointer base, rr::Int limit) : base(base) , dynamicLimit(limit) , staticLimit(0) , dynamicOffsets(0) , staticOffsets(SIMD::Width) , hasDynamicLimit(true) , hasDynamicOffsets(false) , isBasePlusOffset(true) {} SIMD::Pointer::Pointer(scalar::Pointer base, unsigned int limit) : base(base) , dynamicLimit(0) , staticLimit(limit) , dynamicOffsets(0) , staticOffsets(SIMD::Width) , hasDynamicLimit(false) , hasDynamicOffsets(false) , isBasePlusOffset(true) {} SIMD::Pointer::Pointer(scalar::Pointer base, rr::Int limit, SIMD::Int offset) : base(base) , dynamicLimit(limit) , staticLimit(0) , dynamicOffsets(offset) , staticOffsets(SIMD::Width) , hasDynamicLimit(true) , hasDynamicOffsets(true) , isBasePlusOffset(true) {} SIMD::Pointer::Pointer(scalar::Pointer base, unsigned int limit, SIMD::Int offset) : base(base) , dynamicLimit(0) , staticLimit(limit) , dynamicOffsets(offset) , staticOffsets(SIMD::Width) , hasDynamicLimit(false) , hasDynamicOffsets(true) , isBasePlusOffset(true) {} SIMD::Pointer::Pointer(std::vector> pointers) : pointers(pointers) , isBasePlusOffset(false) {} SIMD::Pointer::Pointer(SIMD::UInt cast) : pointers(SIMD::Width) , isBasePlusOffset(false) { assert(sizeof(void *) == 4); for(int i = 0; i < SIMD::Width; i++) { pointers[i] = As>(Extract(cast, i)); } } SIMD::Pointer::Pointer(SIMD::UInt castLow, SIMD::UInt castHigh) : pointers(SIMD::Width) , isBasePlusOffset(false) { assert(sizeof(void *) == 8); for(int i = 0; i < SIMD::Width; i++) { UInt2 address; address = Insert(address, Extract(castLow, i), 0); address = Insert(address, Extract(castHigh, i), 1); pointers[i] = As>(address); } } SIMD::Pointer &SIMD::Pointer::operator+=(SIMD::Int i) { if(isBasePlusOffset) { dynamicOffsets += i; hasDynamicOffsets = true; } else { for(int el = 0; el < SIMD::Width; el++) { pointers[el] += Extract(i, el); } } return *this; } SIMD::Pointer SIMD::Pointer::operator+(SIMD::Int i) { SIMD::Pointer p = *this; p += i; return p; } SIMD::Pointer &SIMD::Pointer::operator+=(int i) { if(isBasePlusOffset) { for(int el = 0; el < SIMD::Width; el++) { staticOffsets[el] += i; } } else { for(int el = 0; el < SIMD::Width; el++) { pointers[el] += i; } } return *this; } SIMD::Pointer SIMD::Pointer::operator+(int i) { SIMD::Pointer p = *this; p += i; return p; } SIMD::Int SIMD::Pointer::offsets() const { ASSERT_MSG(isBasePlusOffset, "No offsets for this type of pointer"); return dynamicOffsets + SIMD::Int(staticOffsets); } SIMD::Int SIMD::Pointer::isInBounds(unsigned int accessSize, OutOfBoundsBehavior robustness) const { ASSERT(accessSize > 0); if(isStaticallyInBounds(accessSize, robustness)) { return SIMD::Int(0xFFFFFFFF); } if(!hasDynamicOffsets && !hasDynamicLimit) { ASSERT(SIMD::Width == 4); // Common fast paths. return SIMD::Int( (staticOffsets[0] + accessSize - 1 < staticLimit) ? 0xFFFFFFFF : 0, (staticOffsets[1] + accessSize - 1 < staticLimit) ? 0xFFFFFFFF : 0, (staticOffsets[2] + accessSize - 1 < staticLimit) ? 0xFFFFFFFF : 0, (staticOffsets[3] + accessSize - 1 < staticLimit) ? 0xFFFFFFFF : 0); } return CmpGE(offsets(), 0) & CmpLT(offsets() + SIMD::Int(accessSize - 1), limit()); } bool SIMD::Pointer::isStaticallyInBounds(unsigned int accessSize, OutOfBoundsBehavior robustness) const { if(hasDynamicOffsets) { return false; } if(hasDynamicLimit) { if(hasStaticEqualOffsets() || hasStaticSequentialOffsets(accessSize)) { switch(robustness) { case OutOfBoundsBehavior::UndefinedBehavior: // With this robustness setting the application/compiler guarantees in-bounds accesses on active lanes, // but since it can't know in advance which branches are taken this must be true even for inactives lanes. return true; case OutOfBoundsBehavior::Nullify: case OutOfBoundsBehavior::RobustBufferAccess: case OutOfBoundsBehavior::UndefinedValue: return false; } } } for(int i = 0; i < SIMD::Width; i++) { if(staticOffsets[i] + accessSize - 1 >= staticLimit) { return false; } } return true; } SIMD::Int SIMD::Pointer::limit() const { return dynamicLimit + staticLimit; } // Returns true if all offsets are compile-time static and sequential // (N+0*step, N+1*step, N+2*step, N+3*step) bool SIMD::Pointer::hasStaticSequentialOffsets(unsigned int step) const { ASSERT_MSG(isBasePlusOffset, "No offsets for this type of pointer"); if(hasDynamicOffsets) { return false; } for(int i = 1; i < SIMD::Width; i++) { if(staticOffsets[i - 1] + int32_t(step) != staticOffsets[i]) { return false; } } return true; } // Returns true if all offsets are compile-time static and equal // (N, N, N, N) bool SIMD::Pointer::hasStaticEqualOffsets() const { ASSERT_MSG(isBasePlusOffset, "No offsets for this type of pointer"); if(hasDynamicOffsets) { return false; } for(int i = 1; i < SIMD::Width; i++) { if(staticOffsets[0] != staticOffsets[i]) { return false; } } return true; } scalar::Pointer SIMD::Pointer::getUniformPointer() const { #ifndef NDEBUG if(isBasePlusOffset) { SIMD::Int uniform = offsets(); scalar::Int x = Extract(uniform, 0); for(int i = 1; i < SIMD::Width; i++) { Assert(x == Extract(uniform, i)); } } else { for(int i = 1; i < SIMD::Width; i++) { Assert(pointers[0] == pointers[i]); } } #endif return getPointerForLane(0); } scalar::Pointer SIMD::Pointer::getPointerForLane(int lane) const { if(isBasePlusOffset) { return base + Extract(offsets(), lane); } else { return pointers[lane]; } } void SIMD::Pointer::castTo(SIMD::UInt &bits) const { assert(sizeof(void *) == 4); for(int i = 0; i < SIMD::Width; i++) { bits = Insert(bits, As(pointers[i]), i); } } void SIMD::Pointer::castTo(SIMD::UInt &lowerBits, SIMD::UInt &upperBits) const { assert(sizeof(void *) == 8); for(int i = 0; i < SIMD::Width; i++) { UInt2 address = As(pointers[i]); lowerBits = Insert(lowerBits, Extract(address, 0), i); upperBits = Insert(upperBits, Extract(address, 1), i); } } SIMD::Pointer SIMD::Pointer::IfThenElse(SIMD::Int condition, const SIMD::Pointer &lhs, const SIMD::Pointer &rhs) { std::vector> pointers(SIMD::Width); for(int i = 0; i < SIMD::Width; i++) { If(Extract(condition, i) != 0) { pointers[i] = lhs.getPointerForLane(i); } Else { pointers[i] = rhs.getPointerForLane(i); } } return { pointers }; } #ifdef ENABLE_RR_PRINT std::vector SIMD::Pointer::getPrintValues() const { if(isBasePlusOffset) { return PrintValue::vals(base, offsets()); } else { std::vector vals; for(int i = 0; i < SIMD::Width; i++) { vals.push_back(RValue>(pointers[i]).value()); } return vals; } } #endif } // namespace rr