58struct SIMDNativeOps<float>
61 using vSIMDType = __m128;
64 DECLARE_SSE_SIMD_CONST (int32_t, kAllBitsSet);
65 DECLARE_SSE_SIMD_CONST (int32_t, kEvenHighBit);
66 DECLARE_SSE_SIMD_CONST (
float, kOne);
69 static forcedinline __m128 JUCE_VECTOR_CALLTYPE expand (
float s)
noexcept {
return _mm_load1_ps (&s); }
70 static forcedinline __m128 JUCE_VECTOR_CALLTYPE load (
const float* a)
noexcept {
return _mm_load_ps (a); }
71 static forcedinline
void JUCE_VECTOR_CALLTYPE store (__m128 value,
float* dest)
noexcept { _mm_store_ps (dest, value); }
72 static forcedinline __m128 JUCE_VECTOR_CALLTYPE add (__m128 a, __m128 b)
noexcept {
return _mm_add_ps (a, b); }
73 static forcedinline __m128 JUCE_VECTOR_CALLTYPE sub (__m128 a, __m128 b)
noexcept {
return _mm_sub_ps (a, b); }
74 static forcedinline __m128 JUCE_VECTOR_CALLTYPE mul (__m128 a, __m128 b)
noexcept {
return _mm_mul_ps (a, b); }
75 static forcedinline __m128 JUCE_VECTOR_CALLTYPE bit_and (__m128 a, __m128 b)
noexcept {
return _mm_and_ps (a, b); }
76 static forcedinline __m128 JUCE_VECTOR_CALLTYPE bit_or (__m128 a, __m128 b)
noexcept {
return _mm_or_ps (a, b); }
77 static forcedinline __m128 JUCE_VECTOR_CALLTYPE bit_xor (__m128 a, __m128 b)
noexcept {
return _mm_xor_ps (a, b); }
78 static forcedinline __m128 JUCE_VECTOR_CALLTYPE bit_notand (__m128 a, __m128 b)
noexcept {
return _mm_andnot_ps (a, b); }
79 static forcedinline __m128 JUCE_VECTOR_CALLTYPE bit_not (__m128 a)
noexcept {
return bit_notand (a, _mm_loadu_ps ((
float*) kAllBitsSet)); }
80 static forcedinline __m128 JUCE_VECTOR_CALLTYPE min (__m128 a, __m128 b)
noexcept {
return _mm_min_ps (a, b); }
81 static forcedinline __m128 JUCE_VECTOR_CALLTYPE max (__m128 a, __m128 b)
noexcept {
return _mm_max_ps (a, b); }
82 static forcedinline __m128 JUCE_VECTOR_CALLTYPE equal (__m128 a, __m128 b)
noexcept {
return _mm_cmpeq_ps (a, b); }
83 static forcedinline __m128 JUCE_VECTOR_CALLTYPE notEqual (__m128 a, __m128 b)
noexcept {
return _mm_cmpneq_ps (a, b); }
84 static forcedinline __m128 JUCE_VECTOR_CALLTYPE greaterThan (__m128 a, __m128 b)
noexcept {
return _mm_cmpgt_ps (a, b); }
85 static forcedinline __m128 JUCE_VECTOR_CALLTYPE greaterThanOrEqual (__m128 a, __m128 b)
noexcept {
return _mm_cmpge_ps (a, b); }
86 static forcedinline
bool JUCE_VECTOR_CALLTYPE allEqual (__m128 a, __m128 b )
noexcept {
return (_mm_movemask_ps (equal (a, b)) == 0xf); }
87 static forcedinline __m128 JUCE_VECTOR_CALLTYPE multiplyAdd (__m128 a, __m128 b, __m128 c)
noexcept {
return _mm_add_ps (a, _mm_mul_ps (b, c)); }
88 static forcedinline __m128 JUCE_VECTOR_CALLTYPE dupeven (__m128 a)
noexcept {
return _mm_shuffle_ps (a, a, _MM_SHUFFLE (2, 2, 0, 0)); }
89 static forcedinline __m128 JUCE_VECTOR_CALLTYPE dupodd (__m128 a)
noexcept {
return _mm_shuffle_ps (a, a, _MM_SHUFFLE (3, 3, 1, 1)); }
90 static forcedinline __m128 JUCE_VECTOR_CALLTYPE swapevenodd (__m128 a)
noexcept {
return _mm_shuffle_ps (a, a, _MM_SHUFFLE (2, 3, 0, 1)); }
91 static forcedinline __m128 JUCE_VECTOR_CALLTYPE oddevensum (__m128 a)
noexcept {
return _mm_add_ps (_mm_shuffle_ps (a, a, _MM_SHUFFLE (1, 0, 3, 2)), a); }
94 static forcedinline __m128 JUCE_VECTOR_CALLTYPE truncate (__m128 a)
noexcept {
return _mm_cvtepi32_ps (_mm_cvttps_epi32 (a)); }
97 static forcedinline __m128 JUCE_VECTOR_CALLTYPE cmplxmul (__m128 a, __m128 b)
noexcept
99 __m128 rr_ir = mul (a, dupeven (b));
100 __m128 ii_ri = mul (swapevenodd (a), dupodd (b));
101 return add (rr_ir, bit_xor (ii_ri, _mm_loadu_ps ((
float*) kEvenHighBit)));
104 static forcedinline
float JUCE_VECTOR_CALLTYPE sum (__m128 a)
noexcept
106 #if defined (__SSE4__)
107 const auto retval = _mm_dp_ps (a, _mm_loadu_ps (kOne), 0xff);
108 #elif defined (__SSE3__)
109 const auto shuffled = _mm_movehdup_ps (a);
110 const auto sums = _mm_add_ps (a, shuffled);
111 const auto retval = _mm_add_ss (sums, _mm_movehl_ps (shuffled, sums));
113 auto retval = _mm_add_ps (_mm_shuffle_ps (a, a, 0x4e), a);
114 retval = _mm_add_ps (retval, _mm_shuffle_ps (retval, retval, 0xb1));
116 return _mm_cvtss_f32 (retval);
126struct SIMDNativeOps<double>
129 using vSIMDType = __m128d;
132 DECLARE_SSE_SIMD_CONST (int64_t, kAllBitsSet);
133 DECLARE_SSE_SIMD_CONST (int64_t, kEvenHighBit);
134 DECLARE_SSE_SIMD_CONST (
double, kOne);
137 static forcedinline __m128d JUCE_VECTOR_CALLTYPE vconst (
const double* a)
noexcept {
return load (a); }
138 static forcedinline __m128d JUCE_VECTOR_CALLTYPE vconst (
const int64_t* a)
noexcept {
return _mm_castsi128_pd (_mm_load_si128 (
reinterpret_cast<const __m128i*
> (a))); }
139 static forcedinline __m128d JUCE_VECTOR_CALLTYPE expand (
double s)
noexcept {
return _mm_load1_pd (&s); }
140 static forcedinline __m128d JUCE_VECTOR_CALLTYPE load (
const double* a)
noexcept {
return _mm_load_pd (a); }
141 static forcedinline
void JUCE_VECTOR_CALLTYPE store (__m128d value,
double* dest)
noexcept { _mm_store_pd (dest, value); }
142 static forcedinline __m128d JUCE_VECTOR_CALLTYPE add (__m128d a, __m128d b)
noexcept {
return _mm_add_pd (a, b); }
143 static forcedinline __m128d JUCE_VECTOR_CALLTYPE sub (__m128d a, __m128d b)
noexcept {
return _mm_sub_pd (a, b); }
144 static forcedinline __m128d JUCE_VECTOR_CALLTYPE mul (__m128d a, __m128d b)
noexcept {
return _mm_mul_pd (a, b); }
145 static forcedinline __m128d JUCE_VECTOR_CALLTYPE bit_and (__m128d a, __m128d b)
noexcept {
return _mm_and_pd (a, b); }
146 static forcedinline __m128d JUCE_VECTOR_CALLTYPE bit_or (__m128d a, __m128d b)
noexcept {
return _mm_or_pd (a, b); }
147 static forcedinline __m128d JUCE_VECTOR_CALLTYPE bit_xor (__m128d a, __m128d b)
noexcept {
return _mm_xor_pd (a, b); }
148 static forcedinline __m128d JUCE_VECTOR_CALLTYPE bit_notand (__m128d a, __m128d b)
noexcept {
return _mm_andnot_pd (a, b); }
149 static forcedinline __m128d JUCE_VECTOR_CALLTYPE bit_not (__m128d a)
noexcept {
return bit_notand (a, vconst (kAllBitsSet)); }
150 static forcedinline __m128d JUCE_VECTOR_CALLTYPE min (__m128d a, __m128d b)
noexcept {
return _mm_min_pd (a, b); }
151 static forcedinline __m128d JUCE_VECTOR_CALLTYPE max (__m128d a, __m128d b)
noexcept {
return _mm_max_pd (a, b); }
152 static forcedinline __m128d JUCE_VECTOR_CALLTYPE equal (__m128d a, __m128d b)
noexcept {
return _mm_cmpeq_pd (a, b); }
153 static forcedinline __m128d JUCE_VECTOR_CALLTYPE notEqual (__m128d a, __m128d b)
noexcept {
return _mm_cmpneq_pd (a, b); }
154 static forcedinline __m128d JUCE_VECTOR_CALLTYPE greaterThan (__m128d a, __m128d b)
noexcept {
return _mm_cmpgt_pd (a, b); }
155 static forcedinline __m128d JUCE_VECTOR_CALLTYPE greaterThanOrEqual (__m128d a, __m128d b)
noexcept {
return _mm_cmpge_pd (a, b); }
156 static forcedinline
bool JUCE_VECTOR_CALLTYPE allEqual (__m128d a, __m128d b )
noexcept {
return (_mm_movemask_pd (equal (a, b)) == 0x3); }
157 static forcedinline __m128d JUCE_VECTOR_CALLTYPE multiplyAdd (__m128d a, __m128d b, __m128d c)
noexcept {
return _mm_add_pd (a, _mm_mul_pd (b, c)); }
158 static forcedinline __m128d JUCE_VECTOR_CALLTYPE dupeven (__m128d a)
noexcept {
return _mm_shuffle_pd (a, a, _MM_SHUFFLE2 (0, 0)); }
159 static forcedinline __m128d JUCE_VECTOR_CALLTYPE dupodd (__m128d a)
noexcept {
return _mm_shuffle_pd (a, a, _MM_SHUFFLE2 (1, 1)); }
160 static forcedinline __m128d JUCE_VECTOR_CALLTYPE swapevenodd (__m128d a)
noexcept {
return _mm_shuffle_pd (a, a, _MM_SHUFFLE2 (0, 1)); }
161 static forcedinline __m128d JUCE_VECTOR_CALLTYPE oddevensum (__m128d a)
noexcept {
return a; }
164 static forcedinline __m128d JUCE_VECTOR_CALLTYPE truncate (__m128d a)
noexcept {
return _mm_cvtepi32_pd (_mm_cvttpd_epi32 (a)); }
167 static forcedinline __m128d JUCE_VECTOR_CALLTYPE cmplxmul (__m128d a, __m128d b)
noexcept
169 __m128d rr_ir = mul (a, dupeven (b));
170 __m128d ii_ri = mul (swapevenodd (a), dupodd (b));
171 return add (rr_ir, bit_xor (ii_ri, vconst (kEvenHighBit)));
174 static forcedinline
double JUCE_VECTOR_CALLTYPE sum (__m128d a)
noexcept
176 #if defined (__SSE4__)
177 __m128d retval = _mm_dp_pd (a, vconst (kOne), 0xff);
178 #elif defined (__SSE3__)
179 __m128d retval = _mm_hadd_pd (a, a);
181 __m128d retval = _mm_add_pd (_mm_shuffle_pd (a, a, 0x01), a);
183 return _mm_cvtsd_f64 (retval);
193struct SIMDNativeOps<int8_t>
196 using vSIMDType = __m128i;
199 DECLARE_SSE_SIMD_CONST (int8_t, kAllBitsSet);
201 static forcedinline __m128i JUCE_VECTOR_CALLTYPE vconst (
const int8_t* a)
noexcept {
return load (a); }
202 static forcedinline __m128i JUCE_VECTOR_CALLTYPE load (
const int8_t* a)
noexcept {
return _mm_load_si128 (
reinterpret_cast<const __m128i*
> (a)); }
203 static forcedinline
void JUCE_VECTOR_CALLTYPE store (__m128i v, int8_t* p)
noexcept { _mm_store_si128 (
reinterpret_cast<__m128i*
> (p), v); }
204 static forcedinline __m128i JUCE_VECTOR_CALLTYPE expand (int8_t s)
noexcept {
return _mm_set1_epi8 (s); }
205 static forcedinline __m128i JUCE_VECTOR_CALLTYPE add (__m128i a, __m128i b)
noexcept {
return _mm_add_epi8 (a, b); }
206 static forcedinline __m128i JUCE_VECTOR_CALLTYPE sub (__m128i a, __m128i b)
noexcept {
return _mm_sub_epi8 (a, b); }
207 static forcedinline __m128i JUCE_VECTOR_CALLTYPE bit_and (__m128i a, __m128i b)
noexcept {
return _mm_and_si128 (a, b); }
208 static forcedinline __m128i JUCE_VECTOR_CALLTYPE bit_or (__m128i a, __m128i b)
noexcept {
return _mm_or_si128 (a, b); }
209 static forcedinline __m128i JUCE_VECTOR_CALLTYPE bit_xor (__m128i a, __m128i b)
noexcept {
return _mm_xor_si128 (a, b); }
210 static forcedinline __m128i JUCE_VECTOR_CALLTYPE bit_andnot (__m128i a, __m128i b)
noexcept {
return _mm_andnot_si128 (a, b); }
211 static forcedinline __m128i JUCE_VECTOR_CALLTYPE bit_not (__m128i a)
noexcept {
return _mm_andnot_si128 (a, vconst (kAllBitsSet)); }
212 #if defined (__SSE4__)
213 static forcedinline __m128i JUCE_VECTOR_CALLTYPE min (__m128i a, __m128i b)
noexcept {
return _mm_min_epi8 (a, b); }
214 static forcedinline __m128i JUCE_VECTOR_CALLTYPE max (__m128i a, __m128i b)
noexcept {
return _mm_max_epi8 (a, b); }
216 static forcedinline __m128i JUCE_VECTOR_CALLTYPE min (__m128i a, __m128i b)
noexcept { __m128i lt = greaterThan (b, a);
return bit_or (bit_and (lt, a), bit_andnot (lt, b)); }
217 static forcedinline __m128i JUCE_VECTOR_CALLTYPE max (__m128i a, __m128i b)
noexcept { __m128i gt = greaterThan (a, b);
return bit_or (bit_and (gt, a), bit_andnot (gt, b)); }
219 static forcedinline __m128i JUCE_VECTOR_CALLTYPE equal (__m128i a, __m128i b)
noexcept {
return _mm_cmpeq_epi8 (a, b); }
220 static forcedinline __m128i JUCE_VECTOR_CALLTYPE greaterThan (__m128i a, __m128i b)
noexcept {
return _mm_cmpgt_epi8 (a, b); }
221 static forcedinline __m128i JUCE_VECTOR_CALLTYPE greaterThanOrEqual (__m128i a, __m128i b)
noexcept {
return bit_or (greaterThan (a, b), equal (a,b)); }
222 static forcedinline __m128i JUCE_VECTOR_CALLTYPE multiplyAdd (__m128i a, __m128i b, __m128i c)
noexcept {
return add (a, mul (b, c)); }
223 static forcedinline __m128i JUCE_VECTOR_CALLTYPE notEqual (__m128i a, __m128i b)
noexcept {
return bit_not (equal (a, b)); }
224 static forcedinline
bool JUCE_VECTOR_CALLTYPE allEqual (__m128i a, __m128i b)
noexcept {
return (_mm_movemask_epi8 (equal (a, b)) == 0xffff); }
227 static forcedinline __m128i JUCE_VECTOR_CALLTYPE truncate (__m128i a)
noexcept {
return a; }
230 static forcedinline int8_t JUCE_VECTOR_CALLTYPE sum (__m128i a)
noexcept
233 __m128i lo = _mm_unpacklo_epi8 (a, _mm_setzero_si128());
234 __m128i hi = _mm_unpackhi_epi8 (a, _mm_setzero_si128());
236 for (
int i = 0; i < 3; ++i)
238 lo = _mm_hadd_epi16 (lo, lo);
239 hi = _mm_hadd_epi16 (hi, hi);
242 return static_cast<int8_t
> ((_mm_cvtsi128_si32 (lo) & 0xff) + (_mm_cvtsi128_si32 (hi) & 0xff));
248 static forcedinline __m128i JUCE_VECTOR_CALLTYPE mul (__m128i a, __m128i b)
251 __m128i even = _mm_mullo_epi16 (a, b);
252 __m128i odd = _mm_mullo_epi16 (_mm_srli_epi16 (a, 8), _mm_srli_epi16 (b, 8));
254 return _mm_or_si128 (_mm_slli_epi16 (odd, 8),
255 _mm_srli_epi16 (_mm_slli_epi16 (even, 8), 8));
265struct SIMDNativeOps<uint8_t>
268 using vSIMDType = __m128i;
271 DECLARE_SSE_SIMD_CONST (uint8_t, kHighBit);
272 DECLARE_SSE_SIMD_CONST (uint8_t, kAllBitsSet);
274 static forcedinline __m128i JUCE_VECTOR_CALLTYPE vconst (
const uint8_t* a)
noexcept {
return load (a); }
275 static forcedinline __m128i JUCE_VECTOR_CALLTYPE ssign (__m128i a)
noexcept {
return _mm_xor_si128 (a, vconst (kHighBit)); }
276 static forcedinline __m128i JUCE_VECTOR_CALLTYPE load (
const uint8_t* a)
noexcept {
return _mm_load_si128 (
reinterpret_cast<const __m128i*
> (a)); }
277 static forcedinline
void JUCE_VECTOR_CALLTYPE store (__m128i v, uint8_t* p)
noexcept { _mm_store_si128 (
reinterpret_cast<__m128i*
> (p), v); }
278 static forcedinline __m128i JUCE_VECTOR_CALLTYPE expand (uint8_t s)
noexcept {
return _mm_set1_epi8 ((int8_t) s); }
279 static forcedinline __m128i JUCE_VECTOR_CALLTYPE add (__m128i a, __m128i b)
noexcept {
return _mm_add_epi8 (a, b); }
280 static forcedinline __m128i JUCE_VECTOR_CALLTYPE sub (__m128i a, __m128i b)
noexcept {
return _mm_sub_epi8 (a, b); }
281 static forcedinline __m128i JUCE_VECTOR_CALLTYPE bit_and (__m128i a, __m128i b)
noexcept {
return _mm_and_si128 (a, b); }
282 static forcedinline __m128i JUCE_VECTOR_CALLTYPE bit_or (__m128i a, __m128i b)
noexcept {
return _mm_or_si128 (a, b); }
283 static forcedinline __m128i JUCE_VECTOR_CALLTYPE bit_xor (__m128i a, __m128i b)
noexcept {
return _mm_xor_si128 (a, b); }
284 static forcedinline __m128i JUCE_VECTOR_CALLTYPE bit_andnot (__m128i a, __m128i b)
noexcept {
return _mm_andnot_si128 (a, b); }
285 static forcedinline __m128i JUCE_VECTOR_CALLTYPE bit_not (__m128i a)
noexcept {
return _mm_andnot_si128 (a, vconst (kAllBitsSet)); }
286 static forcedinline __m128i JUCE_VECTOR_CALLTYPE min (__m128i a, __m128i b)
noexcept {
return _mm_min_epu8 (a, b); }
287 static forcedinline __m128i JUCE_VECTOR_CALLTYPE max (__m128i a, __m128i b)
noexcept {
return _mm_max_epu8 (a, b); }
288 static forcedinline __m128i JUCE_VECTOR_CALLTYPE equal (__m128i a, __m128i b)
noexcept {
return _mm_cmpeq_epi8 (a, b); }
289 static forcedinline __m128i JUCE_VECTOR_CALLTYPE greaterThan (__m128i a, __m128i b)
noexcept {
return _mm_cmpgt_epi8 (ssign (a), ssign (b)); }
290 static forcedinline __m128i JUCE_VECTOR_CALLTYPE greaterThanOrEqual (__m128i a, __m128i b)
noexcept {
return bit_or (greaterThan (a, b), equal (a,b)); }
291 static forcedinline __m128i JUCE_VECTOR_CALLTYPE multiplyAdd (__m128i a, __m128i b, __m128i c)
noexcept {
return add (a, mul (b, c)); }
292 static forcedinline __m128i JUCE_VECTOR_CALLTYPE notEqual (__m128i a, __m128i b)
noexcept {
return bit_not (equal (a, b)); }
293 static forcedinline
bool JUCE_VECTOR_CALLTYPE allEqual (__m128i a, __m128i b)
noexcept {
return (_mm_movemask_epi8 (equal (a, b)) == 0xffff); }
296 static forcedinline __m128i JUCE_VECTOR_CALLTYPE truncate (__m128i a)
noexcept {
return a; }
299 static forcedinline uint8_t JUCE_VECTOR_CALLTYPE sum (__m128i a)
noexcept
302 __m128i lo = _mm_unpacklo_epi8 (a, _mm_setzero_si128());
303 __m128i hi = _mm_unpackhi_epi8 (a, _mm_setzero_si128());
305 for (
int i = 0; i < 3; ++i)
307 lo = _mm_hadd_epi16 (lo, lo);
308 hi = _mm_hadd_epi16 (hi, hi);
311 return static_cast<uint8_t
> ((
static_cast<uint32_t
> (_mm_cvtsi128_si32 (lo)) & 0xffu)
312 + (
static_cast<uint32_t
> (_mm_cvtsi128_si32 (hi)) & 0xffu));
318 static forcedinline __m128i JUCE_VECTOR_CALLTYPE mul (__m128i a, __m128i b)
321 __m128i even = _mm_mullo_epi16 (a, b);
322 __m128i odd = _mm_mullo_epi16 (_mm_srli_epi16 (a, 8), _mm_srli_epi16 (b, 8));
324 return _mm_or_si128 (_mm_slli_epi16 (odd, 8),
325 _mm_srli_epi16 (_mm_slli_epi16 (even, 8), 8));
335struct SIMDNativeOps<int16_t>
338 using vSIMDType = __m128i;
341 DECLARE_SSE_SIMD_CONST (int16_t, kAllBitsSet);
344 static forcedinline __m128i JUCE_VECTOR_CALLTYPE vconst (
const int16_t* a)
noexcept {
return load (a); }
345 static forcedinline __m128i JUCE_VECTOR_CALLTYPE load (
const int16_t* a)
noexcept {
return _mm_load_si128 (
reinterpret_cast<const __m128i*
> (a)); }
346 static forcedinline
void JUCE_VECTOR_CALLTYPE store (__m128i v, int16_t* p)
noexcept { _mm_store_si128 (
reinterpret_cast<__m128i*
> (p), v); }
347 static forcedinline __m128i JUCE_VECTOR_CALLTYPE expand (int16_t s)
noexcept {
return _mm_set1_epi16 (s); }
348 static forcedinline __m128i JUCE_VECTOR_CALLTYPE add (__m128i a, __m128i b)
noexcept {
return _mm_add_epi16 (a, b); }
349 static forcedinline __m128i JUCE_VECTOR_CALLTYPE sub (__m128i a, __m128i b)
noexcept {
return _mm_sub_epi16 (a, b); }
350 static forcedinline __m128i JUCE_VECTOR_CALLTYPE mul (__m128i a, __m128i b)
noexcept {
return _mm_mullo_epi16 (a, b); }
351 static forcedinline __m128i JUCE_VECTOR_CALLTYPE bit_and (__m128i a, __m128i b)
noexcept {
return _mm_and_si128 (a, b); }
352 static forcedinline __m128i JUCE_VECTOR_CALLTYPE bit_or (__m128i a, __m128i b)
noexcept {
return _mm_or_si128 (a, b); }
353 static forcedinline __m128i JUCE_VECTOR_CALLTYPE bit_xor (__m128i a, __m128i b)
noexcept {
return _mm_xor_si128 (a, b); }
354 static forcedinline __m128i JUCE_VECTOR_CALLTYPE bit_andnot (__m128i a, __m128i b)
noexcept {
return _mm_andnot_si128 (a, b); }
355 static forcedinline __m128i JUCE_VECTOR_CALLTYPE bit_not (__m128i a)
noexcept {
return _mm_andnot_si128 (a, vconst (kAllBitsSet)); }
356 static forcedinline __m128i JUCE_VECTOR_CALLTYPE min (__m128i a, __m128i b)
noexcept {
return _mm_min_epi16 (a, b); }
357 static forcedinline __m128i JUCE_VECTOR_CALLTYPE max (__m128i a, __m128i b)
noexcept {
return _mm_max_epi16 (a, b); }
358 static forcedinline __m128i JUCE_VECTOR_CALLTYPE equal (__m128i a, __m128i b)
noexcept {
return _mm_cmpeq_epi16 (a, b); }
359 static forcedinline __m128i JUCE_VECTOR_CALLTYPE greaterThan (__m128i a, __m128i b)
noexcept {
return _mm_cmpgt_epi16 (a, b); }
360 static forcedinline __m128i JUCE_VECTOR_CALLTYPE greaterThanOrEqual (__m128i a, __m128i b)
noexcept {
return bit_or (greaterThan (a, b), equal (a,b)); }
361 static forcedinline __m128i JUCE_VECTOR_CALLTYPE multiplyAdd (__m128i a, __m128i b, __m128i c)
noexcept {
return add (a, mul (b, c)); }
362 static forcedinline __m128i JUCE_VECTOR_CALLTYPE notEqual (__m128i a, __m128i b)
noexcept {
return bit_not (equal (a, b)); }
363 static forcedinline
bool JUCE_VECTOR_CALLTYPE allEqual (__m128i a, __m128i b)
noexcept {
return (_mm_movemask_epi8 (equal (a, b)) == 0xffff); }
366 static forcedinline __m128i JUCE_VECTOR_CALLTYPE truncate (__m128i a)
noexcept {
return a; }
369 static forcedinline int16_t JUCE_VECTOR_CALLTYPE sum (__m128i a)
noexcept
372 __m128i tmp = _mm_hadd_epi16 (a, a);
373 tmp = _mm_hadd_epi16 (tmp, tmp);
374 tmp = _mm_hadd_epi16 (tmp, tmp);
376 return static_cast<int16_t
> (_mm_cvtsi128_si32 (tmp) & 0xffff);
389struct SIMDNativeOps<uint16_t>
392 using vSIMDType = __m128i;
395 DECLARE_SSE_SIMD_CONST (uint16_t, kHighBit);
396 DECLARE_SSE_SIMD_CONST (uint16_t, kAllBitsSet);
399 static forcedinline __m128i JUCE_VECTOR_CALLTYPE vconst (
const uint16_t* a)
noexcept {
return load (a); }
400 static forcedinline __m128i JUCE_VECTOR_CALLTYPE ssign (__m128i a)
noexcept {
return _mm_xor_si128 (a, vconst (kHighBit)); }
401 static forcedinline __m128i JUCE_VECTOR_CALLTYPE load (
const uint16_t* a)
noexcept {
return _mm_load_si128 (
reinterpret_cast<const __m128i*
> (a)); }
402 static forcedinline
void JUCE_VECTOR_CALLTYPE store (__m128i v, uint16_t* p)
noexcept { _mm_store_si128 (
reinterpret_cast<__m128i*
> (p), v); }
403 static forcedinline __m128i JUCE_VECTOR_CALLTYPE expand (uint16_t s)
noexcept {
return _mm_set1_epi16 ((int16_t) s); }
404 static forcedinline __m128i JUCE_VECTOR_CALLTYPE add (__m128i a, __m128i b)
noexcept {
return _mm_add_epi16 (a, b); }
405 static forcedinline __m128i JUCE_VECTOR_CALLTYPE sub (__m128i a, __m128i b)
noexcept {
return _mm_sub_epi16 (a, b); }
406 static forcedinline __m128i JUCE_VECTOR_CALLTYPE mul (__m128i a, __m128i b)
noexcept {
return _mm_mullo_epi16 (a, b); }
407 static forcedinline __m128i JUCE_VECTOR_CALLTYPE bit_and (__m128i a, __m128i b)
noexcept {
return _mm_and_si128 (a, b); }
408 static forcedinline __m128i JUCE_VECTOR_CALLTYPE bit_or (__m128i a, __m128i b)
noexcept {
return _mm_or_si128 (a, b); }
409 static forcedinline __m128i JUCE_VECTOR_CALLTYPE bit_xor (__m128i a, __m128i b)
noexcept {
return _mm_xor_si128 (a, b); }
410 static forcedinline __m128i JUCE_VECTOR_CALLTYPE bit_andnot (__m128i a, __m128i b)
noexcept {
return _mm_andnot_si128 (a, b); }
411 static forcedinline __m128i JUCE_VECTOR_CALLTYPE bit_not (__m128i a)
noexcept {
return _mm_andnot_si128 (a, vconst (kAllBitsSet)); }
412 #if defined (__SSE4__)
413 static forcedinline __m128i JUCE_VECTOR_CALLTYPE min (__m128i a, __m128i b)
noexcept {
return _mm_min_epu16 (a, b); }
414 static forcedinline __m128i JUCE_VECTOR_CALLTYPE max (__m128i a, __m128i b)
noexcept {
return _mm_max_epu16 (a, b); }
416 static forcedinline __m128i JUCE_VECTOR_CALLTYPE min (__m128i a, __m128i b)
noexcept { __m128i lt = greaterThan (b, a);
return bit_or (bit_and (lt, a), bit_andnot (lt, b)); }
417 static forcedinline __m128i JUCE_VECTOR_CALLTYPE max (__m128i a, __m128i b)
noexcept { __m128i gt = greaterThan (a, b);
return bit_or (bit_and (gt, a), bit_andnot (gt, b)); }
419 static forcedinline __m128i JUCE_VECTOR_CALLTYPE equal (__m128i a, __m128i b)
noexcept {
return _mm_cmpeq_epi16 (a, b); }
420 static forcedinline __m128i JUCE_VECTOR_CALLTYPE greaterThan (__m128i a, __m128i b)
noexcept {
return _mm_cmpgt_epi16 (ssign (a), ssign (b)); }
421 static forcedinline __m128i JUCE_VECTOR_CALLTYPE greaterThanOrEqual (__m128i a, __m128i b)
noexcept {
return bit_or (greaterThan (a, b), equal (a,b)); }
422 static forcedinline __m128i JUCE_VECTOR_CALLTYPE multiplyAdd (__m128i a, __m128i b, __m128i c)
noexcept {
return add (a, mul (b, c)); }
423 static forcedinline __m128i JUCE_VECTOR_CALLTYPE notEqual (__m128i a, __m128i b)
noexcept {
return bit_not (equal (a, b)); }
424 static forcedinline
bool JUCE_VECTOR_CALLTYPE allEqual (__m128i a, __m128i b)
noexcept {
return (_mm_movemask_epi8 (equal (a, b)) == 0xffff); }
427 static forcedinline __m128i JUCE_VECTOR_CALLTYPE truncate (__m128i a)
noexcept {
return a; }
430 static forcedinline uint16_t JUCE_VECTOR_CALLTYPE sum (__m128i a)
noexcept
433 __m128i tmp = _mm_hadd_epi16 (a, a);
434 tmp = _mm_hadd_epi16 (tmp, tmp);
435 tmp = _mm_hadd_epi16 (tmp, tmp);
437 return static_cast<uint16_t
> (
static_cast<uint32_t
> (_mm_cvtsi128_si32 (tmp)) & 0xffffu);
450struct SIMDNativeOps<int32_t>
453 using vSIMDType = __m128i;
456 DECLARE_SSE_SIMD_CONST (int32_t, kAllBitsSet);
459 static forcedinline __m128i JUCE_VECTOR_CALLTYPE vconst (
const int32_t* a)
noexcept {
return load (a); }
460 static forcedinline __m128i JUCE_VECTOR_CALLTYPE load (
const int32_t* a)
noexcept {
return _mm_load_si128 (
reinterpret_cast<const __m128i*
> (a)); }
461 static forcedinline
void JUCE_VECTOR_CALLTYPE store (__m128i v, int32_t* p)
noexcept { _mm_store_si128 (
reinterpret_cast<__m128i*
> (p), v); }
462 static forcedinline __m128i JUCE_VECTOR_CALLTYPE expand (int32_t s)
noexcept {
return _mm_set1_epi32 (s); }
463 static forcedinline __m128i JUCE_VECTOR_CALLTYPE add (__m128i a, __m128i b)
noexcept {
return _mm_add_epi32 (a, b); }
464 static forcedinline __m128i JUCE_VECTOR_CALLTYPE sub (__m128i a, __m128i b)
noexcept {
return _mm_sub_epi32 (a, b); }
465 static forcedinline __m128i JUCE_VECTOR_CALLTYPE bit_and (__m128i a, __m128i b)
noexcept {
return _mm_and_si128 (a, b); }
466 static forcedinline __m128i JUCE_VECTOR_CALLTYPE bit_or (__m128i a, __m128i b)
noexcept {
return _mm_or_si128 (a, b); }
467 static forcedinline __m128i JUCE_VECTOR_CALLTYPE bit_xor (__m128i a, __m128i b)
noexcept {
return _mm_xor_si128 (a, b); }
468 static forcedinline __m128i JUCE_VECTOR_CALLTYPE bit_andnot (__m128i a, __m128i b)
noexcept {
return _mm_andnot_si128 (a, b); }
469 static forcedinline __m128i JUCE_VECTOR_CALLTYPE bit_not (__m128i a)
noexcept {
return _mm_andnot_si128 (a, vconst (kAllBitsSet)); }
470 static forcedinline __m128i JUCE_VECTOR_CALLTYPE equal (__m128i a, __m128i b)
noexcept {
return _mm_cmpeq_epi32 (a, b); }
471 static forcedinline __m128i JUCE_VECTOR_CALLTYPE greaterThan (__m128i a, __m128i b)
noexcept {
return _mm_cmpgt_epi32 (a, b); }
472 static forcedinline __m128i JUCE_VECTOR_CALLTYPE greaterThanOrEqual (__m128i a, __m128i b)
noexcept {
return bit_or (greaterThan (a, b), equal (a,b)); }
473 static forcedinline __m128i JUCE_VECTOR_CALLTYPE multiplyAdd (__m128i a, __m128i b, __m128i c)
noexcept {
return add (a, mul (b, c)); }
474 static forcedinline __m128i JUCE_VECTOR_CALLTYPE notEqual (__m128i a, __m128i b)
noexcept {
return bit_not (equal (a, b)); }
475 static forcedinline
bool JUCE_VECTOR_CALLTYPE allEqual (__m128i a, __m128i b)
noexcept {
return (_mm_movemask_epi8 (equal (a, b)) == 0xffff); }
478 static forcedinline __m128i JUCE_VECTOR_CALLTYPE truncate (__m128i a)
noexcept {
return a; }
481 static forcedinline int32_t JUCE_VECTOR_CALLTYPE sum (__m128i a)
noexcept
484 __m128i tmp = _mm_hadd_epi32 (a, a);
485 return _mm_cvtsi128_si32 (_mm_hadd_epi32 (tmp, tmp));
491 static forcedinline __m128i JUCE_VECTOR_CALLTYPE mul (__m128i a, __m128i b)
noexcept
493 #if defined (__SSE4_1__)
494 return _mm_mullo_epi32 (a, b);
496 __m128i even = _mm_mul_epu32 (a,b);
497 __m128i odd = _mm_mul_epu32 (_mm_srli_si128 (a,4), _mm_srli_si128 (b,4));
498 return _mm_unpacklo_epi32 (_mm_shuffle_epi32 (even, _MM_SHUFFLE (0,0,2,0)),
499 _mm_shuffle_epi32 (odd, _MM_SHUFFLE (0,0,2,0)));
503 static forcedinline __m128i JUCE_VECTOR_CALLTYPE min (__m128i a, __m128i b)
noexcept
505 #if defined (__SSE4_1__)
506 return _mm_min_epi32 (a, b);
508 __m128i lt = greaterThan (b, a);
509 return bit_or (bit_and (lt, a), bit_andnot (lt, b));
513 static forcedinline __m128i JUCE_VECTOR_CALLTYPE max (__m128i a, __m128i b)
noexcept
515 #if defined (__SSE4_1__)
516 return _mm_max_epi32 (a, b);
518 __m128i gt = greaterThan (a, b);
519 return bit_or (bit_and (gt, a), bit_andnot (gt, b));
530struct SIMDNativeOps<uint32_t>
533 using vSIMDType = __m128i;
536 DECLARE_SSE_SIMD_CONST (uint32_t, kAllBitsSet);
537 DECLARE_SSE_SIMD_CONST (uint32_t, kHighBit);
540 static forcedinline __m128i JUCE_VECTOR_CALLTYPE vconst (
const uint32_t* a)
noexcept {
return load (a); }
541 static forcedinline __m128i JUCE_VECTOR_CALLTYPE ssign (__m128i a)
noexcept {
return _mm_xor_si128 (a, vconst (kHighBit)); }
542 static forcedinline __m128i JUCE_VECTOR_CALLTYPE load (
const uint32_t* a)
noexcept {
return _mm_load_si128 (
reinterpret_cast<const __m128i*
> (a)); }
543 static forcedinline
void JUCE_VECTOR_CALLTYPE store (__m128i v, uint32_t* p)
noexcept { _mm_store_si128 (
reinterpret_cast<__m128i*
> (p), v); }
544 static forcedinline __m128i JUCE_VECTOR_CALLTYPE expand (uint32_t s)
noexcept {
return _mm_set1_epi32 ((int32_t) s); }
545 static forcedinline __m128i JUCE_VECTOR_CALLTYPE add (__m128i a, __m128i b)
noexcept {
return _mm_add_epi32 (a, b); }
546 static forcedinline __m128i JUCE_VECTOR_CALLTYPE sub (__m128i a, __m128i b)
noexcept {
return _mm_sub_epi32 (a, b); }
547 static forcedinline __m128i JUCE_VECTOR_CALLTYPE bit_and (__m128i a, __m128i b)
noexcept {
return _mm_and_si128 (a, b); }
548 static forcedinline __m128i JUCE_VECTOR_CALLTYPE bit_or (__m128i a, __m128i b)
noexcept {
return _mm_or_si128 (a, b); }
549 static forcedinline __m128i JUCE_VECTOR_CALLTYPE bit_xor (__m128i a, __m128i b)
noexcept {
return _mm_xor_si128 (a, b); }
550 static forcedinline __m128i JUCE_VECTOR_CALLTYPE bit_andnot (__m128i a, __m128i b)
noexcept {
return _mm_andnot_si128 (a, b); }
551 static forcedinline __m128i JUCE_VECTOR_CALLTYPE bit_not (__m128i a)
noexcept {
return _mm_andnot_si128 (a, vconst (kAllBitsSet)); }
552 static forcedinline __m128i JUCE_VECTOR_CALLTYPE equal (__m128i a, __m128i b)
noexcept {
return _mm_cmpeq_epi32 (a, b); }
553 static forcedinline __m128i JUCE_VECTOR_CALLTYPE greaterThan (__m128i a, __m128i b)
noexcept {
return _mm_cmpgt_epi32 (ssign (a), ssign (b)); }
554 static forcedinline __m128i JUCE_VECTOR_CALLTYPE greaterThanOrEqual (__m128i a, __m128i b)
noexcept {
return bit_or (greaterThan (a, b), equal (a,b)); }
555 static forcedinline __m128i JUCE_VECTOR_CALLTYPE multiplyAdd (__m128i a, __m128i b, __m128i c)
noexcept {
return add (a, mul (b, c)); }
556 static forcedinline __m128i JUCE_VECTOR_CALLTYPE notEqual (__m128i a, __m128i b)
noexcept {
return bit_not (equal (a, b)); }
557 static forcedinline
bool JUCE_VECTOR_CALLTYPE allEqual (__m128i a, __m128i b)
noexcept {
return (_mm_movemask_epi8 (equal (a, b)) == 0xffff); }
560 static forcedinline __m128i JUCE_VECTOR_CALLTYPE truncate (__m128i a)
noexcept {
return a; }
563 static forcedinline uint32_t JUCE_VECTOR_CALLTYPE sum (__m128i a)
noexcept
566 __m128i tmp = _mm_hadd_epi32 (a, a);
567 return static_cast<uint32_t
> (_mm_cvtsi128_si32 (_mm_hadd_epi32 (tmp, tmp)));
573 static forcedinline __m128i JUCE_VECTOR_CALLTYPE mul (__m128i a, __m128i b)
noexcept
575 #if defined (__SSE4_1__)
576 return _mm_mullo_epi32 (a, b);
578 __m128i even = _mm_mul_epu32 (a,b);
579 __m128i odd = _mm_mul_epu32 (_mm_srli_si128 (a,4), _mm_srli_si128 (b,4));
580 return _mm_unpacklo_epi32 (_mm_shuffle_epi32 (even, _MM_SHUFFLE (0,0,2,0)),
581 _mm_shuffle_epi32 (odd, _MM_SHUFFLE (0,0,2,0)));
585 static forcedinline __m128i JUCE_VECTOR_CALLTYPE min (__m128i a, __m128i b)
noexcept
587 #if defined (__SSE4_1__)
588 return _mm_min_epi32 (a, b);
590 __m128i lt = greaterThan (b, a);
591 return bit_or (bit_and (lt, a), bit_andnot (lt, b));
595 static forcedinline __m128i JUCE_VECTOR_CALLTYPE max (__m128i a, __m128i b)
noexcept
597 #if defined (__SSE4_1__)
598 return _mm_max_epi32 (a, b);
600 __m128i gt = greaterThan (a, b);
601 return bit_or (bit_and (gt, a), bit_andnot (gt, b));
612struct SIMDNativeOps<int64_t>
615 using vSIMDType = __m128i;
618 DECLARE_SSE_SIMD_CONST (int64_t, kAllBitsSet);
620 static forcedinline __m128i JUCE_VECTOR_CALLTYPE vconst (
const int64_t* a)
noexcept {
return load (a); }
621 static forcedinline __m128i JUCE_VECTOR_CALLTYPE expand (int64_t s)
noexcept {
return _mm_set1_epi64x (s); }
622 static forcedinline __m128i JUCE_VECTOR_CALLTYPE load (
const int64_t* a)
noexcept {
return _mm_load_si128 (
reinterpret_cast<const __m128i*
> (a)); }
623 static forcedinline
void JUCE_VECTOR_CALLTYPE store (__m128i v, int64_t* p)
noexcept { _mm_store_si128 (
reinterpret_cast<__m128i*
> (p), v); }
624 static forcedinline __m128i JUCE_VECTOR_CALLTYPE add (__m128i a, __m128i b)
noexcept {
return _mm_add_epi64 (a, b); }
625 static forcedinline __m128i JUCE_VECTOR_CALLTYPE sub (__m128i a, __m128i b)
noexcept {
return _mm_sub_epi64 (a, b); }
626 static forcedinline __m128i JUCE_VECTOR_CALLTYPE bit_and (__m128i a, __m128i b)
noexcept {
return _mm_and_si128 (a, b); }
627 static forcedinline __m128i JUCE_VECTOR_CALLTYPE bit_or (__m128i a, __m128i b)
noexcept {
return _mm_or_si128 (a, b); }
628 static forcedinline __m128i JUCE_VECTOR_CALLTYPE bit_xor (__m128i a, __m128i b)
noexcept {
return _mm_xor_si128 (a, b); }
629 static forcedinline __m128i JUCE_VECTOR_CALLTYPE bit_andnot (__m128i a, __m128i b)
noexcept {
return _mm_andnot_si128 (a, b); }
630 static forcedinline __m128i JUCE_VECTOR_CALLTYPE bit_not (__m128i a)
noexcept {
return _mm_andnot_si128 (a, vconst (kAllBitsSet)); }
631 static forcedinline __m128i JUCE_VECTOR_CALLTYPE min (__m128i a, __m128i b)
noexcept { __m128i lt = greaterThan (b, a);
return bit_or (bit_and (lt, a), bit_andnot (lt, b)); }
632 static forcedinline __m128i JUCE_VECTOR_CALLTYPE max (__m128i a, __m128i b)
noexcept { __m128i gt = greaterThan (a, b);
return bit_or (bit_and (gt, a), bit_andnot (gt, b)); }
633 static forcedinline __m128i JUCE_VECTOR_CALLTYPE greaterThanOrEqual (__m128i a, __m128i b)
noexcept {
return bit_or (greaterThan (a, b), equal (a,b)); }
634 static forcedinline __m128i JUCE_VECTOR_CALLTYPE multiplyAdd (__m128i a, __m128i b, __m128i c)
noexcept {
return add (a, mul (b, c)); }
635 static forcedinline __m128i JUCE_VECTOR_CALLTYPE notEqual (__m128i a, __m128i b)
noexcept {
return bit_not (equal (a, b)); }
636 static forcedinline
bool JUCE_VECTOR_CALLTYPE allEqual (__m128i a, __m128i b)
noexcept {
return (_mm_movemask_epi8 (equal (a, b)) == 0xffff); }
641 static forcedinline __m128i JUCE_VECTOR_CALLTYPE truncate (__m128i a)
noexcept {
return a; }
643 static forcedinline __m128i JUCE_VECTOR_CALLTYPE equal (__m128i a, __m128i b)
noexcept
645 #if defined (__SSE4_1__)
646 return _mm_cmpeq_epi64 (a, b);
648 __m128i bitmask = _mm_cmpeq_epi32 (a, b);
649 bitmask = _mm_and_si128 (bitmask, _mm_shuffle_epi32 (bitmask, _MM_SHUFFLE (2, 3, 0, 1)));
650 return _mm_shuffle_epi32 (bitmask, _MM_SHUFFLE (2, 2, 0, 0));
654 static forcedinline __m128i JUCE_VECTOR_CALLTYPE greaterThan (__m128i a, __m128i b)
noexcept
656 #if defined (__SSE4_2__)
657 return _mm_cmpgt_epi64 (a, b);
670struct SIMDNativeOps<uint64_t>
673 using vSIMDType = __m128i;
676 DECLARE_SSE_SIMD_CONST (uint64_t, kAllBitsSet);
677 DECLARE_SSE_SIMD_CONST (uint64_t, kHighBit);
679 static forcedinline __m128i JUCE_VECTOR_CALLTYPE vconst (
const uint64_t* a)
noexcept {
return load (a); }
680 static forcedinline __m128i JUCE_VECTOR_CALLTYPE expand (uint64_t s)
noexcept {
return _mm_set1_epi64x ((int64_t) s); }
681 static forcedinline __m128i JUCE_VECTOR_CALLTYPE ssign (__m128i a)
noexcept {
return _mm_xor_si128 (a, vconst (kHighBit)); }
682 static forcedinline __m128i JUCE_VECTOR_CALLTYPE load (
const uint64_t* a)
noexcept {
return _mm_load_si128 (
reinterpret_cast<const __m128i*
> (a)); }
683 static forcedinline
void JUCE_VECTOR_CALLTYPE store (__m128i v, uint64_t* p)
noexcept { _mm_store_si128 (
reinterpret_cast<__m128i*
> (p), v); }
684 static forcedinline __m128i JUCE_VECTOR_CALLTYPE add (__m128i a, __m128i b)
noexcept {
return _mm_add_epi64 (a, b); }
685 static forcedinline __m128i JUCE_VECTOR_CALLTYPE sub (__m128i a, __m128i b)
noexcept {
return _mm_sub_epi64 (a, b); }
686 static forcedinline __m128i JUCE_VECTOR_CALLTYPE bit_and (__m128i a, __m128i b)
noexcept {
return _mm_and_si128 (a, b); }
687 static forcedinline __m128i JUCE_VECTOR_CALLTYPE bit_or (__m128i a, __m128i b)
noexcept {
return _mm_or_si128 (a, b); }
688 static forcedinline __m128i JUCE_VECTOR_CALLTYPE bit_xor (__m128i a, __m128i b)
noexcept {
return _mm_xor_si128 (a, b); }
689 static forcedinline __m128i JUCE_VECTOR_CALLTYPE bit_andnot (__m128i a, __m128i b)
noexcept {
return _mm_andnot_si128 (a, b); }
690 static forcedinline __m128i JUCE_VECTOR_CALLTYPE bit_not (__m128i a)
noexcept {
return _mm_andnot_si128 (a, vconst (kAllBitsSet)); }
691 static forcedinline __m128i JUCE_VECTOR_CALLTYPE min (__m128i a, __m128i b)
noexcept { __m128i lt = greaterThan (b, a);
return bit_or (bit_and (lt, a), bit_andnot (lt, b)); }
692 static forcedinline __m128i JUCE_VECTOR_CALLTYPE max (__m128i a, __m128i b)
noexcept { __m128i gt = greaterThan (a, b);
return bit_or (bit_and (gt, a), bit_andnot (gt, b)); }
693 static forcedinline __m128i JUCE_VECTOR_CALLTYPE greaterThanOrEqual (__m128i a, __m128i b)
noexcept {
return bit_or (greaterThan (a, b), equal (a,b)); }
694 static forcedinline __m128i JUCE_VECTOR_CALLTYPE multiplyAdd (__m128i a, __m128i b, __m128i c)
noexcept {
return add (a, mul (b, c)); }
695 static forcedinline __m128i JUCE_VECTOR_CALLTYPE notEqual (__m128i a, __m128i b)
noexcept {
return bit_not (equal (a, b)); }
696 static forcedinline
bool JUCE_VECTOR_CALLTYPE allEqual (__m128i a, __m128i b)
noexcept {
return (_mm_movemask_epi8 (equal (a, b)) == 0xffff); }
701 static forcedinline __m128i JUCE_VECTOR_CALLTYPE truncate (__m128i a)
noexcept {
return a; }
703 static forcedinline __m128i JUCE_VECTOR_CALLTYPE equal (__m128i a, __m128i b)
noexcept
705 #if defined (__SSE4_1__)
706 return _mm_cmpeq_epi64 (a, b);
708 __m128i bitmask = _mm_cmpeq_epi32 (a, b);
709 bitmask = _mm_and_si128 (bitmask, _mm_shuffle_epi32 (bitmask, _MM_SHUFFLE (2, 3, 0, 1)));
710 return _mm_shuffle_epi32 (bitmask, _MM_SHUFFLE (2, 2, 0, 0));
714 static forcedinline __m128i JUCE_VECTOR_CALLTYPE greaterThan (__m128i a, __m128i b)
noexcept
716 #if defined (__SSE4_2__)
717 return _mm_cmpgt_epi64 (ssign (a), ssign (b));