// Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // * Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // * Neither the name of NVIDIA CORPORATION nor the names of its // contributors may be used to endorse or promote products derived // from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Copyright (c) 2008-2023 NVIDIA Corporation. All rights reserved. // Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. // Copyright (c) 2001-2004 NovodeX AG. All rights reserved. #ifndef PX_SIMD_HELPERS_H #define PX_SIMD_HELPERS_H #include "foundation/PxMat33.h" #include "foundation/PxVecMath.h" #include "foundation/PxTransform.h" #if !PX_DOXYGEN namespace physx { #endif //! A padded version of PxMat33, to safely load its data using SIMD class PxMat33Padded : public PxMat33 { public: explicit PX_FORCE_INLINE PxMat33Padded(const PxQuat& q) { using namespace aos; const QuatV qV = V4LoadU(&q.x); Vec3V column0V, column1V, column2V; QuatGetMat33V(qV, column0V, column1V, column2V); #if defined(PX_SIMD_DISABLED) || (PX_LINUX && (PX_ARM || PX_A64)) V3StoreU(column0V, column0); V3StoreU(column1V, column1); V3StoreU(column2V, column2); #else V4StoreU(column0V, &column0.x); V4StoreU(column1V, &column1.x); V4StoreU(column2V, &column2.x); #endif } PX_FORCE_INLINE ~PxMat33Padded() {} PX_FORCE_INLINE void operator=(const PxMat33& other) { column0 = other.column0; column1 = other.column1; column2 = other.column2; } PxU32 padding; }; #if !PX_DOXYGEN namespace aos { #endif PX_FORCE_INLINE void transformKernelVec4( const FloatVArg wa, const Vec4VArg va, const Vec4VArg pa, const FloatVArg wb, const Vec4VArg vb, const Vec4VArg pb, FloatV& wo, Vec4V& vo, Vec4V& po) { wo = FSub(FMul(wa, wb), V4Dot3(va, vb)); vo = V4ScaleAdd(va, wb, V4ScaleAdd(vb, wa, V4Cross(va, vb))); const Vec4V t1 = V4Scale(pb, FScaleAdd(wa, wa, FLoad(-0.5f))); const Vec4V t2 = V4ScaleAdd(V4Cross(va, pb), wa, t1); const Vec4V t3 = V4ScaleAdd(va, V4Dot3(va, pb), t2); po = V4ScaleAdd(t3, FLoad(2.0f), pa); } // PT: out = a * b template PX_FORCE_INLINE void transformMultiply(PxTransform& out, const PxTransform& a, const PxTransform& b) { PX_ASSERT(!alignedInput || (size_t(&a)&15) == 0); PX_ASSERT(!alignedInput || (size_t(&b)&15) == 0); const Vec4V aPos = alignedInput ? V4LoadA(&a.p.x) : V4LoadU(&a.p.x); const Vec4V aRot = alignedInput ? V4LoadA(&a.q.x) : V4LoadU(&a.q.x); const Vec4V bPos = alignedInput ? V4LoadA(&b.p.x) : V4LoadU(&b.p.x); const Vec4V bRot = alignedInput ? V4LoadA(&b.q.x) : V4LoadU(&b.q.x); Vec4V v, p; FloatV w; transformKernelVec4(V4GetW(aRot), aRot, aPos, V4GetW(bRot), bRot, bPos, w, v, p); if(alignedOutput) { PX_ASSERT((size_t(&out)&15) == 0); V4StoreA(p, &out.p.x); V4StoreA(V4SetW(v,w), &out.q.x); } else { V4StoreU(p, &out.p.x); V4StoreU(V4SetW(v,w), &out.q.x); } } // PT: out = a * b PX_FORCE_INLINE void transformMultiply(PxTransform32& out, const PxTransform32& a, const PxTransform32& b) { transformMultiply(out, a, b); } #if !PX_DOXYGEN } // namespace aos #endif #if !PX_DOXYGEN } // namespace physx #endif #endif