20 #ifndef VC_SSE_SHUFFLE_H
21 #define VC_SSE_SHUFFLE_H
39 return _mm_shuffle_ps(x, y, Dst0 + Dst1 * 4 + (Dst2 -
Y0) * 16 + (Dst3 -
Y0) * 64);
46 return _mm_shuffle_pd(x, y, Dst0 + (Dst1 -
Y0) * 2);
49 #if !defined(VC_IMPL_SSE4_1) && !defined(VC_IMPL_AVX)
50 #define Vc_MAKE_INTRINSIC__(name__) Vc::SSE::_VC_CAT(m,m,_,name__)
52 #define Vc_MAKE_INTRINSIC__(name__) _VC_CAT(_,mm,_,name__)
69 (Dst0 /
Y0) * 1 + (Dst1 /
Y1) * 2 +
70 (Dst2 /
Y2) * 4 + (Dst3 /
Y3) * 8);
73 template<VecPos Dst0, VecPos Dst1, VecPos Dst2, VecPos Dst3, VecPos Dst4, VecPos Dst5, VecPos Dst6, VecPos Dst7>
84 (Dst0 /
Y0) * 1 + (Dst1 /
Y1) * 2 +
85 (Dst2 /
Y2) * 4 + (Dst3 /
Y3) * 8 +
86 (Dst4 /
Y4) * 16 + (Dst5 /
Y5) * 32 +
87 (Dst6 /
Y6) * 64 + (Dst7 /
Y7) *128
95 return _mm_shuffle_ps(x, x, Dst0 + Dst1 * 4 + Dst2 * 16 + Dst3 * 64);
101 return _mm_shuffle_epi32(x, Dst0 + Dst1 * 4 + Dst2 * 16 + Dst3 * 64);
107 return _mm_shufflelo_epi16(x, Dst0 + Dst1 * 4 + Dst2 * 16 + Dst3 * 64);
113 return _mm_shufflehi_epi16(x, (Dst0 -
X4) + (Dst1 -
X4) * 4 + (Dst2 -
X4) * 16 + (Dst3 -
X4) * 64);
116 template<VecPos Dst0, VecPos Dst1, VecPos Dst2, VecPos Dst3, VecPos Dst4, VecPos Dst5, VecPos Dst6, VecPos Dst7>
122 if (Dst0 !=
X0 || Dst1 !=
X1 || Dst2 !=
X2 || Dst3 !=
X3) {
123 x = _mm_shufflelo_epi16(x, Dst0 + Dst1 * 4 + Dst2 * 16 + Dst3 * 64);
125 if (Dst4 !=
X4 || Dst5 !=
X5 || Dst6 !=
X6 || Dst7 !=
X7) {
126 x = _mm_shufflehi_epi16(x, (Dst4 -
X4) + (Dst5 -
X4) * 4 + (Dst6 -
X4) * 16 + (Dst7 -
X4) * 64);
136 return Mem::shuffle<Dst0, Dst1, Dst2, Dst3>(
x,
y);
141 return Mem::shuffle<Dst0, Dst1>(
x,
y);
148 return _mm_shuffle_epi32(x, Dst0 + Dst1 * 4 + Dst2 * 16 + Dst3 * 64);
155 return _mm_castps_si128(_mm_shuffle_ps(_mm_castsi128_ps(x), _mm_castsi128_ps(y), Dst0 + Dst1 * 4 + (Dst2 -
Y0) * 16 + (Dst3 -
Y0) * 64));
160 return Mem::blend<Dst0, Dst1>(
x,
y);
164 return Mem::blend<Dst0, Dst1, Dst2, Dst3>(
x,
y);
172 #endif // VC_SSE_SHUFFLE_H
static Vc_ALWAYS_INLINE m256 Vc_CONST blend(param256 x, param256 y)
static Vc_ALWAYS_INLINE m256d Vc_CONST permute(param256d x)
static Vc_ALWAYS_INLINE m256d Vc_CONST permute(param256d x)
#define Vc_MAKE_INTRINSIC__(name__)
static Vc_ALWAYS_INLINE __m128i Vc_CONST permuteHi(__m128i x)
static Vc_ALWAYS_INLINE m256d Vc_CONST shuffle(param256d x, param256d y)
static Vc_ALWAYS_INLINE m256d Vc_CONST shuffle(param256d x, param256d y)
#define VC_STATIC_ASSERT(cond, msg)
static Vc_ALWAYS_INLINE __m128d Vc_CONST blend(__m128d x, __m128d y)
static Vc_ALWAYS_INLINE __m128i Vc_CONST permuteLo(__m128i x)