61 static constexpr size_t n =
sizeof (vSIMDType) /
sizeof (
ScalarType);
62 static constexpr size_t mask = (
sizeof (vSIMDType) /
sizeof (
ScalarType)) - 1;
63 static constexpr size_t bits = SIMDInternal::Log2Helper<(
int) n>::value;
66 using MaskType = SIMDInternal::MaskType<ScalarType>;
67 union UnionType { vSIMDType v;
ScalarType s[n]; };
68 union UnionMaskType { vSIMDType v; MaskType m[n]; };
72 static forcedinline vSIMDType add (vSIMDType a, vSIMDType b)
noexcept {
return apply<ScalarAdd> (a, b); }
73 static forcedinline vSIMDType sub (vSIMDType a, vSIMDType b)
noexcept {
return apply<ScalarSub> (a, b); }
74 static forcedinline vSIMDType mul (vSIMDType a, vSIMDType b)
noexcept {
return apply<ScalarMul> (a, b); }
75 static forcedinline vSIMDType bit_and (vSIMDType a, vSIMDType b)
noexcept {
return bitapply<ScalarAnd> (a, b); }
76 static forcedinline vSIMDType bit_or (vSIMDType a, vSIMDType b)
noexcept {
return bitapply<ScalarOr > (a, b); }
77 static forcedinline vSIMDType bit_xor (vSIMDType a, vSIMDType b)
noexcept {
return bitapply<ScalarXor> (a, b); }
78 static forcedinline vSIMDType bit_notand (vSIMDType a, vSIMDType b)
noexcept {
return bitapply<ScalarNot> (a, b); }
80 static forcedinline vSIMDType min (vSIMDType a, vSIMDType b)
noexcept {
return apply<ScalarMin> (a, b); }
81 static forcedinline vSIMDType max (vSIMDType a, vSIMDType b)
noexcept {
return apply<ScalarMax> (a, b); }
82 static forcedinline vSIMDType equal (vSIMDType a, vSIMDType b)
noexcept {
return cmp<ScalarEq > (a, b); }
83 static forcedinline vSIMDType notEqual (vSIMDType a, vSIMDType b)
noexcept {
return cmp<ScalarNeq> (a, b); }
84 static forcedinline vSIMDType greaterThan (vSIMDType a, vSIMDType b)
noexcept {
return cmp<ScalarGt > (a, b); }
85 static forcedinline vSIMDType greaterThanOrEqual (vSIMDType a, vSIMDType b)
noexcept {
return cmp<ScalarGeq> (a, b); }
87 static forcedinline
ScalarType get (vSIMDType v,
size_t i)
noexcept
93 static forcedinline vSIMDType set (vSIMDType v,
size_t i,
ScalarType s)
noexcept
101 static forcedinline vSIMDType bit_not (vSIMDType
av)
noexcept
103 UnionMaskType a {
av};
105 for (
size_t i = 0; i < n; ++i)
111 static forcedinline
ScalarType sum (vSIMDType
av)
noexcept
116 for (
size_t i = 0; i < n; ++i)
122 static forcedinline vSIMDType truncate (vSIMDType
av)
noexcept
126 for (
size_t i = 0; i < n; ++i)
127 a.s[i] =
static_cast<ScalarType> (
static_cast<int> (a.s[i]));
132 static forcedinline vSIMDType multiplyAdd (vSIMDType
av, vSIMDType
bv, vSIMDType
cv)
noexcept
134 UnionType a {
av}, b {
bv}, c {
cv};
136 for (
size_t i = 0; i < n; ++i)
137 a.s[i] += b.s[i] * c.s[i];
143 static forcedinline
bool allEqual (vSIMDType
av, vSIMDType
bv)
noexcept
145 UnionType a {
av}, b {
bv};
147 for (
size_t i = 0; i < n; ++i)
148 if (! exactlyEqual (a.s[i], b.s[i]))
155 static forcedinline vSIMDType cmplxmul (vSIMDType
av, vSIMDType
bv)
noexcept
157 UnionType a {
av}, b {
bv}, r;
159 const int m = n >> 1;
160 for (
int i = 0; i < m; ++i)
162 std::complex<ScalarType> result
163 = std::complex<ScalarType> (a.s[i<<1], a.s[(i<<1)|1])
164 * std::complex<ScalarType> (b.s[i<<1], b.s[(i<<1)|1]);
166 r.s[i<<1] = result.real();
167 r.s[(i<<1)|1] = result.imag();
178 struct ScalarAnd {
static forcedinline MaskType op (MaskType a, MaskType b)
noexcept {
return a & b; } };
179 struct ScalarOr {
static forcedinline MaskType op (MaskType a, MaskType b)
noexcept {
return a | b; } };
180 struct ScalarXor {
static forcedinline MaskType op (MaskType a, MaskType b)
noexcept {
return a ^ b; } };
181 struct ScalarNot {
static forcedinline MaskType op (MaskType a, MaskType b)
noexcept {
return (~a) & b; } };
182 struct ScalarEq {
static forcedinline
bool op (
ScalarType a,
ScalarType b)
noexcept {
return exactlyEqual (a, b); } };
183 struct ScalarNeq {
static forcedinline
bool op (
ScalarType a,
ScalarType b)
noexcept {
return ! exactlyEqual (a, b); } };
184 struct ScalarGt {
static forcedinline
bool op (
ScalarType a,
ScalarType b)
noexcept {
return (a > b); } };
185 struct ScalarGeq {
static forcedinline
bool op (
ScalarType a,
ScalarType b)
noexcept {
return (a >= b); } };
188 template <
typename Op>
189 static forcedinline vSIMDType apply (vSIMDType
av, vSIMDType
bv)
noexcept
191 UnionType a {
av}, b {
bv};
193 for (
size_t i = 0; i < n; ++i)
194 a.s[i] = Op::op (a.s[i], b.s[i]);
199 template <
typename Op>
200 static forcedinline vSIMDType cmp (vSIMDType
av, vSIMDType
bv)
noexcept
202 UnionType a {
av}, b {
bv};
205 for (
size_t i = 0; i < n; ++i)
206 r.m[i] = Op::op (a.s[i], b.s[i]) ?
static_cast<MaskType
> (-1) :
static_cast<MaskType
> (0);
211 template <
typename Op>
212 static forcedinline vSIMDType bitapply (vSIMDType
av, vSIMDType
bv)
noexcept
214 UnionMaskType a {
av}, b {
bv};
216 for (
size_t i = 0; i < n; ++i)
217 a.m[i] = Op::op (a.m[i], b.m[i]);
222 static forcedinline vSIMDType expand (
ScalarType s)
noexcept
226 for (
size_t i = 0; i < n; ++i)
232 static forcedinline vSIMDType load (
const ScalarType* a)
noexcept
236 for (
size_t i = 0; i < n; ++i)
242 static forcedinline
void store (vSIMDType
av,
ScalarType* dest)
noexcept
246 for (
size_t i = 0; i < n; ++i)
250 template <
unsigned int shuffle_
idx>
251 static forcedinline vSIMDType shuffle (vSIMDType
av)
noexcept
257 for (
size_t i = 0; i < n; ++i)