135 lines
4.5 KiB
C++
135 lines
4.5 KiB
C++
// Redistribution and use in source and binary forms, with or without
|
|
// modification, are permitted provided that the following conditions
|
|
// are met:
|
|
// * Redistributions of source code must retain the above copyright
|
|
// notice, this list of conditions and the following disclaimer.
|
|
// * Redistributions in binary form must reproduce the above copyright
|
|
// notice, this list of conditions and the following disclaimer in the
|
|
// documentation and/or other materials provided with the distribution.
|
|
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
|
// contributors may be used to endorse or promote products derived
|
|
// from this software without specific prior written permission.
|
|
//
|
|
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
|
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
|
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
|
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
|
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
|
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
|
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
//
|
|
// Copyright (c) 2008-2023 NVIDIA Corporation. All rights reserved.
|
|
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
|
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
|
|
|
#ifndef PX_SIMD_HELPERS_H
|
|
#define PX_SIMD_HELPERS_H
|
|
|
|
#include "foundation/PxMat33.h"
|
|
#include "foundation/PxVecMath.h"
|
|
#include "foundation/PxTransform.h"
|
|
|
|
#if !PX_DOXYGEN
|
|
namespace physx
|
|
{
|
|
#endif
|
|
|
|
//! A padded version of PxMat33, to safely load its data using SIMD
|
|
class PxMat33Padded : public PxMat33
|
|
{
|
|
public:
|
|
explicit PX_FORCE_INLINE PxMat33Padded(const PxQuat& q)
|
|
{
|
|
using namespace aos;
|
|
const QuatV qV = V4LoadU(&q.x);
|
|
Vec3V column0V, column1V, column2V;
|
|
QuatGetMat33V(qV, column0V, column1V, column2V);
|
|
#if defined(PX_SIMD_DISABLED) || (PX_LINUX && (PX_ARM || PX_A64))
|
|
V3StoreU(column0V, column0);
|
|
V3StoreU(column1V, column1);
|
|
V3StoreU(column2V, column2);
|
|
#else
|
|
V4StoreU(column0V, &column0.x);
|
|
V4StoreU(column1V, &column1.x);
|
|
V4StoreU(column2V, &column2.x);
|
|
#endif
|
|
}
|
|
PX_FORCE_INLINE ~PxMat33Padded() {}
|
|
PX_FORCE_INLINE void operator=(const PxMat33& other)
|
|
{
|
|
column0 = other.column0;
|
|
column1 = other.column1;
|
|
column2 = other.column2;
|
|
}
|
|
PxU32 padding;
|
|
};
|
|
|
|
#if !PX_DOXYGEN
|
|
namespace aos
|
|
{
|
|
#endif
|
|
|
|
PX_FORCE_INLINE void transformKernelVec4( const FloatVArg wa, const Vec4VArg va, const Vec4VArg pa,
|
|
const FloatVArg wb, const Vec4VArg vb, const Vec4VArg pb,
|
|
FloatV& wo, Vec4V& vo, Vec4V& po)
|
|
{
|
|
wo = FSub(FMul(wa, wb), V4Dot3(va, vb));
|
|
vo = V4ScaleAdd(va, wb, V4ScaleAdd(vb, wa, V4Cross(va, vb)));
|
|
|
|
const Vec4V t1 = V4Scale(pb, FScaleAdd(wa, wa, FLoad(-0.5f)));
|
|
const Vec4V t2 = V4ScaleAdd(V4Cross(va, pb), wa, t1);
|
|
const Vec4V t3 = V4ScaleAdd(va, V4Dot3(va, pb), t2);
|
|
|
|
po = V4ScaleAdd(t3, FLoad(2.0f), pa);
|
|
}
|
|
|
|
// PT: out = a * b
|
|
template<const bool alignedInput, const bool alignedOutput>
|
|
PX_FORCE_INLINE void transformMultiply(PxTransform& out, const PxTransform& a, const PxTransform& b)
|
|
{
|
|
PX_ASSERT(!alignedInput || (size_t(&a)&15) == 0);
|
|
PX_ASSERT(!alignedInput || (size_t(&b)&15) == 0);
|
|
|
|
const Vec4V aPos = alignedInput ? V4LoadA(&a.p.x) : V4LoadU(&a.p.x);
|
|
const Vec4V aRot = alignedInput ? V4LoadA(&a.q.x) : V4LoadU(&a.q.x);
|
|
|
|
const Vec4V bPos = alignedInput ? V4LoadA(&b.p.x) : V4LoadU(&b.p.x);
|
|
const Vec4V bRot = alignedInput ? V4LoadA(&b.q.x) : V4LoadU(&b.q.x);
|
|
|
|
Vec4V v, p;
|
|
FloatV w;
|
|
transformKernelVec4(V4GetW(aRot), aRot, aPos, V4GetW(bRot), bRot, bPos, w, v, p);
|
|
|
|
if(alignedOutput)
|
|
{
|
|
PX_ASSERT((size_t(&out)&15) == 0);
|
|
V4StoreA(p, &out.p.x);
|
|
V4StoreA(V4SetW(v,w), &out.q.x);
|
|
}
|
|
else
|
|
{
|
|
V4StoreU(p, &out.p.x);
|
|
V4StoreU(V4SetW(v,w), &out.q.x);
|
|
}
|
|
}
|
|
|
|
// PT: out = a * b
|
|
PX_FORCE_INLINE void transformMultiply(PxTransform32& out, const PxTransform32& a, const PxTransform32& b)
|
|
{
|
|
transformMultiply<true, true>(out, a, b);
|
|
}
|
|
|
|
#if !PX_DOXYGEN
|
|
} // namespace aos
|
|
#endif
|
|
|
|
#if !PX_DOXYGEN
|
|
} // namespace physx
|
|
#endif
|
|
|
|
#endif
|