doc/v606/sse_2vectorhelper_8h_source.html

 /*  This file is part of the Vc library.


     Copyright (C) 2009-2011 Matthias Kretz <kretz@kde.org>


     Vc is free software: you can redistribute it and/or modify

     it under the terms of the GNU Lesser General Public License as

     published by the Free Software Foundation, either version 3 of

     the License, or (at your option) any later version.


     Vc is distributed in the hope that it will be useful, but

     WITHOUT ANY WARRANTY; without even the implied warranty of

     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the

     GNU Lesser General Public License for more details.


     You should have received a copy of the GNU Lesser General Public

     License along with Vc.  If not, see <http://www.gnu.org/licenses/>.


 */


 #ifndef SSE_VECTORHELPER_H

 #define SSE_VECTORHELPER_H


 #include "types.h"

 #include <limits>

 #include "macros.h"


 namespace ROOT {

 namespace Vc

 {

 namespace SSE

 {


 namespace Internal

 {

 Vc_INTRINSIC Vc_CONST __m128 exponent(__m128 v)

 {

     __m128i tmp = _mm_srli_epi32(_mm_castps_si128(v), 23);

     tmp = _mm_sub_epi32(tmp, _mm_set1_epi32(0x7f));

     return _mm_cvtepi32_ps(tmp);

 }

 Vc_INTRINSIC Vc_CONST M256 exponent(VC_ALIGNED_PARAMETER(M256) v)

 {

     __m128i tmp0 = _mm_srli_epi32(_mm_castps_si128(v[0]), 23);

     __m128i tmp1 = _mm_srli_epi32(_mm_castps_si128(v[1]), 23);

     tmp0 = _mm_sub_epi32(tmp0, _mm_set1_epi32(0x7f));

     tmp1 = _mm_sub_epi32(tmp1, _mm_set1_epi32(0x7f));

     return M256::create( _mm_cvtepi32_ps(tmp0), _mm_cvtepi32_ps(tmp1));

 }

 Vc_INTRINSIC Vc_CONST __m128d exponent(__m128d v)

 {

     __m128i tmp = _mm_srli_epi64(_mm_castpd_si128(v), 52);

     tmp = _mm_sub_epi32(tmp, _mm_set1_epi32(0x3ff));

     return _mm_cvtepi32_pd(_mm_shuffle_epi32(tmp, 0x08));

 }

 } // namespace Internal


     template<typename VectorType, unsigned int Size> struct SortHelper

     {

         static inline Vc_CONST_L VectorType sort(VectorType) Vc_CONST_R;

     };

     template<unsigned int Size> struct SortHelper<M256, Size>

     {

         static inline Vc_PURE_L M256 sort(const M256 &) Vc_PURE_R;

     };


 #define OP0(name, code) static Vc_ALWAYS_INLINE Vc_CONST VectorType name() { return code; }

 #define OP2(name, code) static Vc_ALWAYS_INLINE Vc_CONST VectorType name(VectorTypeArg a, VectorTypeArg b) { return code; }

 #define OP3(name, code) static Vc_ALWAYS_INLINE Vc_CONST VectorType name(VectorTypeArg a, VectorTypeArg b, VectorTypeArg c) { return code; }

     template<> struct VectorHelper<M256>

     {

         typedef M256 VectorType;

 #ifdef VC_PASSING_VECTOR_BY_VALUE_IS_BROKEN

         typedef const VectorType &VectorTypeArg;

 #else

         typedef const VectorType VectorTypeArg;

 #endif

         template<typename A> static Vc_ALWAYS_INLINE_L Vc_PURE_L VectorType load(const float *x, A) Vc_ALWAYS_INLINE_R Vc_PURE_R;

         static Vc_ALWAYS_INLINE_L void store(float *mem, VectorTypeArg x, AlignedFlag) Vc_ALWAYS_INLINE_R;

         static Vc_ALWAYS_INLINE_L void store(float *mem, VectorTypeArg x, UnalignedFlag) Vc_ALWAYS_INLINE_R;

         static Vc_ALWAYS_INLINE_L void store(float *mem, VectorTypeArg x, StreamingAndAlignedFlag) Vc_ALWAYS_INLINE_R;

         static Vc_ALWAYS_INLINE_L void store(float *mem, VectorTypeArg x, StreamingAndUnalignedFlag) Vc_ALWAYS_INLINE_R;

         static Vc_ALWAYS_INLINE_L void store(float *mem, VectorTypeArg x, VectorTypeArg m, AlignedFlag) Vc_ALWAYS_INLINE_R;

         static Vc_ALWAYS_INLINE_L void store(float *mem, VectorTypeArg x, VectorTypeArg m, UnalignedFlag) Vc_ALWAYS_INLINE_R;

         static Vc_ALWAYS_INLINE_L void store(float *mem, VectorTypeArg x, VectorTypeArg m, StreamingAndAlignedFlag) Vc_ALWAYS_INLINE_R;

         static Vc_ALWAYS_INLINE_L void store(float *mem, VectorTypeArg x, VectorTypeArg m, StreamingAndUnalignedFlag) Vc_ALWAYS_INLINE_R;


         OP0(allone, VectorType::create(_mm_setallone_ps(), _mm_setallone_ps()))

         OP0(zero, VectorType::create(_mm_setzero_ps(), _mm_setzero_ps()))

         OP2(or_, VectorType::create(_mm_or_ps(a[0], b[0]), _mm_or_ps(a[1], b[1])))

         OP2(xor_, VectorType::create(_mm_xor_ps(a[0], b[0]), _mm_xor_ps(a[1], b[1])))

         OP2(and_, VectorType::create(_mm_and_ps(a[0], b[0]), _mm_and_ps(a[1], b[1])))

         OP2(andnot_, VectorType::create(_mm_andnot_ps(a[0], b[0]), _mm_andnot_ps(a[1], b[1])))

         OP3(blend, VectorType::create(mm_blendv_ps(a[0], b[0], c[0]), mm_blendv_ps(a[1], b[1], c[1])))

     };

 #undef OP0

 #undef OP2

 #undef OP3


 #define OP0(name, code) static Vc_ALWAYS_INLINE Vc_CONST VectorType name() { return code; }

 #define OP1(name, code) static Vc_ALWAYS_INLINE Vc_CONST VectorType name(const VectorType a) { return code; }

 #define OP2(name, code) static Vc_ALWAYS_INLINE Vc_CONST VectorType name(const VectorType a, const VectorType b) { return code; }

 #define OP3(name, code) static Vc_ALWAYS_INLINE Vc_CONST VectorType name(const VectorType a, const VectorType b, const VectorType c) { return code; }


         template<> struct VectorHelper<_M128>

         {

             typedef _M128 VectorType;

             template<typename A> static Vc_ALWAYS_INLINE_L Vc_PURE_L VectorType load(const float *x, A) Vc_ALWAYS_INLINE_R Vc_PURE_R;

             static Vc_ALWAYS_INLINE_L void store(float *mem, const VectorType x, AlignedFlag) Vc_ALWAYS_INLINE_R;

             static Vc_ALWAYS_INLINE_L void store(float *mem, const VectorType x, UnalignedFlag) Vc_ALWAYS_INLINE_R;

             static Vc_ALWAYS_INLINE_L void store(float *mem, const VectorType x, StreamingAndAlignedFlag) Vc_ALWAYS_INLINE_R;

             static Vc_ALWAYS_INLINE_L void store(float *mem, const VectorType x, StreamingAndUnalignedFlag) Vc_ALWAYS_INLINE_R;

             static Vc_ALWAYS_INLINE_L void store(float *mem, const VectorType x, const VectorType m, AlignedFlag) Vc_ALWAYS_INLINE_R;

             static Vc_ALWAYS_INLINE_L void store(float *mem, const VectorType x, const VectorType m, UnalignedFlag) Vc_ALWAYS_INLINE_R;

             static Vc_ALWAYS_INLINE_L void store(float *mem, const VectorType x, const VectorType m, StreamingAndAlignedFlag) Vc_ALWAYS_INLINE_R;

             static Vc_ALWAYS_INLINE_L void store(float *mem, const VectorType x, const VectorType m, StreamingAndUnalignedFlag) Vc_ALWAYS_INLINE_R;


             OP0(allone, _mm_setallone_ps())

             OP0(zero, _mm_setzero_ps())

             OP2(or_, _mm_or_ps(a, b))

             OP2(xor_, _mm_xor_ps(a, b))

             OP2(and_, _mm_and_ps(a, b))

             OP2(andnot_, _mm_andnot_ps(a, b))

             OP3(blend, mm_blendv_ps(a, b, c))

         };


         template<> struct VectorHelper<_M128D>

         {

             typedef _M128D VectorType;

             template<typename A> static Vc_ALWAYS_INLINE_L Vc_PURE_L VectorType load(const double *x, A) Vc_ALWAYS_INLINE_R Vc_PURE_R;

             static Vc_ALWAYS_INLINE_L void store(double *mem, const VectorType x, AlignedFlag) Vc_ALWAYS_INLINE_R;

             static Vc_ALWAYS_INLINE_L void store(double *mem, const VectorType x, UnalignedFlag) Vc_ALWAYS_INLINE_R;

             static Vc_ALWAYS_INLINE_L void store(double *mem, const VectorType x, StreamingAndAlignedFlag) Vc_ALWAYS_INLINE_R;

             static Vc_ALWAYS_INLINE_L void store(double *mem, const VectorType x, StreamingAndUnalignedFlag) Vc_ALWAYS_INLINE_R;

             static Vc_ALWAYS_INLINE_L void store(double *mem, const VectorType x, const VectorType m, AlignedFlag) Vc_ALWAYS_INLINE_R;

             static Vc_ALWAYS_INLINE_L void store(double *mem, const VectorType x, const VectorType m, UnalignedFlag) Vc_ALWAYS_INLINE_R;

             static Vc_ALWAYS_INLINE_L void store(double *mem, const VectorType x, const VectorType m, StreamingAndAlignedFlag) Vc_ALWAYS_INLINE_R;

             static Vc_ALWAYS_INLINE_L void store(double *mem, const VectorType x, const VectorType m, StreamingAndUnalignedFlag) Vc_ALWAYS_INLINE_R;


             OP0(allone, _mm_setallone_pd())

             OP0(zero, _mm_setzero_pd())

             OP2(or_, _mm_or_pd(a, b))

             OP2(xor_, _mm_xor_pd(a, b))

             OP2(and_, _mm_and_pd(a, b))

             OP2(andnot_, _mm_andnot_pd(a, b))

             OP3(blend, mm_blendv_pd(a, b, c))

         };


         template<> struct VectorHelper<_M128I>

         {

             typedef _M128I VectorType;

             template<typename T> static Vc_ALWAYS_INLINE_L Vc_PURE_L VectorType load(const T *x, AlignedFlag) Vc_ALWAYS_INLINE_R Vc_PURE_R;

             template<typename T> static Vc_ALWAYS_INLINE_L Vc_PURE_L VectorType load(const T *x, UnalignedFlag) Vc_ALWAYS_INLINE_R Vc_PURE_R;

             template<typename T> static Vc_ALWAYS_INLINE_L Vc_PURE_L VectorType load(const T *x, StreamingAndAlignedFlag) Vc_ALWAYS_INLINE_R Vc_PURE_R;

             template<typename T> static Vc_ALWAYS_INLINE_L Vc_PURE_L VectorType load(const T *x, StreamingAndUnalignedFlag) Vc_ALWAYS_INLINE_R Vc_PURE_R;

             template<typename T> static Vc_ALWAYS_INLINE_L void store(T *mem, const VectorType x, AlignedFlag) Vc_ALWAYS_INLINE_R;

             template<typename T> static Vc_ALWAYS_INLINE_L void store(T *mem, const VectorType x, UnalignedFlag) Vc_ALWAYS_INLINE_R;

             template<typename T> static Vc_ALWAYS_INLINE_L void store(T *mem, const VectorType x, StreamingAndAlignedFlag) Vc_ALWAYS_INLINE_R;

             template<typename T> static Vc_ALWAYS_INLINE_L void store(T *mem, const VectorType x, StreamingAndUnalignedFlag) Vc_ALWAYS_INLINE_R;

             template<typename T> static Vc_ALWAYS_INLINE_L void store(T *mem, const VectorType x, const VectorType m, AlignedFlag) Vc_ALWAYS_INLINE_R;

             template<typename T> static Vc_ALWAYS_INLINE_L void store(T *mem, const VectorType x, const VectorType m, UnalignedFlag) Vc_ALWAYS_INLINE_R;

             template<typename T> static Vc_ALWAYS_INLINE_L void store(T *mem, const VectorType x, const VectorType m, StreamingAndAlignedFlag) Vc_ALWAYS_INLINE_R;

             template<typename T> static Vc_ALWAYS_INLINE_L void store(T *mem, const VectorType x, const VectorType m, StreamingAndUnalignedFlag) Vc_ALWAYS_INLINE_R;


             OP0(allone, _mm_setallone_si128())

             OP0(zero, _mm_setzero_si128())

             OP2(or_, _mm_or_si128(a, b))

             OP2(xor_, _mm_xor_si128(a, b))

             OP2(and_, _mm_and_si128(a, b))

             OP2(andnot_, _mm_andnot_si128(a, b))

             OP3(blend, mm_blendv_epi8(a, b, c))

         };


 #undef OP1

 #undef OP2

 #undef OP3


 #define OP1(op) \

         static Vc_ALWAYS_INLINE Vc_CONST VectorType op(const VectorType a) { return CAT(_mm_##op##_, SUFFIX)(a); }

 #define OP(op) \

         static Vc_ALWAYS_INLINE Vc_CONST VectorType op(const VectorType a, const VectorType b) { return CAT(_mm_##op##_ , SUFFIX)(a, b); }

 #define OP_(op) \

         static Vc_ALWAYS_INLINE Vc_CONST VectorType op(const VectorType a, const VectorType b) { return CAT(_mm_##op    , SUFFIX)(a, b); }

 #define OPx(op, op2) \

         static Vc_ALWAYS_INLINE Vc_CONST VectorType op(const VectorType a, const VectorType b) { return CAT(_mm_##op2##_, SUFFIX)(a, b); }

 #define OPcmp(op) \

         static Vc_ALWAYS_INLINE Vc_CONST VectorType cmp##op(const VectorType a, const VectorType b) { return CAT(_mm_cmp##op##_, SUFFIX)(a, b); }

 #define OP_CAST_(op) \

         static Vc_ALWAYS_INLINE Vc_CONST VectorType op(const VectorType a, const VectorType b) { return CAT(_mm_castps_, SUFFIX)( \

             _mm_##op##ps(CAT(CAT(_mm_cast, SUFFIX), _ps)(a), \

               CAT(CAT(_mm_cast, SUFFIX), _ps)(b))); \

         }

 #define MINMAX \

         static Vc_ALWAYS_INLINE Vc_CONST VectorType min(VectorType a, VectorType b) { return CAT(_mm_min_, SUFFIX)(a, b); } \

         static Vc_ALWAYS_INLINE Vc_CONST VectorType max(VectorType a, VectorType b) { return CAT(_mm_max_, SUFFIX)(a, b); }


         template<> struct VectorHelper<double> {

             typedef _M128D VectorType;

             typedef double EntryType;

 #define SUFFIX pd


             OP_(or_) OP_(and_) OP_(xor_)

             static Vc_ALWAYS_INLINE Vc_CONST VectorType notMaskedToZero(VectorType a, _M128 mask) { return CAT(_mm_and_, SUFFIX)(_mm_castps_pd(mask), a); }

             static Vc_ALWAYS_INLINE Vc_CONST VectorType set(const double a) { return CAT(_mm_set1_, SUFFIX)(a); }

             static Vc_ALWAYS_INLINE Vc_CONST VectorType set(const double a, const double b) { return CAT(_mm_set_, SUFFIX)(a, b); }

             static Vc_ALWAYS_INLINE Vc_CONST VectorType zero() { return CAT(_mm_setzero_, SUFFIX)(); }

             static Vc_ALWAYS_INLINE Vc_CONST VectorType one()  { return CAT(_mm_setone_, SUFFIX)(); }// set(1.); }


 #ifdef VC_IMPL_FMA4

             static Vc_ALWAYS_INLINE void fma(VectorType &v1, VectorType v2, VectorType v3) {

                 v1 = _mm_macc_pd(v1, v2, v3);

             }

 #else

             static inline void fma(VectorType &v1, VectorType v2, VectorType v3) {

                 VectorType h1 = _mm_and_pd(v1, _mm_load_pd(reinterpret_cast<const double *>(&c_general::highMaskDouble)));

                 VectorType h2 = _mm_and_pd(v2, _mm_load_pd(reinterpret_cast<const double *>(&c_general::highMaskDouble)));

 #if defined(VC_GCC) && VC_GCC < 0x40703

                 // GCC before 4.7.3 uses an incorrect optimization where it replaces the subtraction with an andnot

                 // http://gcc.gnu.org/bugzilla/show_bug.cgi?id=54703

                 asm("":"+x"(h1), "+x"(h2));

 #endif

                 const VectorType l1 = _mm_sub_pd(v1, h1);

                 const VectorType l2 = _mm_sub_pd(v2, h2);

                 const VectorType ll = mul(l1, l2);

                 const VectorType lh = add(mul(l1, h2), mul(h1, l2));

                 const VectorType hh = mul(h1, h2);

                 // ll < lh < hh for all entries is certain

                 const VectorType lh_lt_v3 = cmplt(abs(lh), abs(v3)); // |lh| < |v3|

                 const VectorType b = mm_blendv_pd(v3, lh, lh_lt_v3);

                 const VectorType c = mm_blendv_pd(lh, v3, lh_lt_v3);

                 v1 = add(add(ll, b), add(c, hh));

             }

 #endif


             OP(add) OP(sub) OP(mul)

             OPcmp(eq) OPcmp(neq)

             OPcmp(lt) OPcmp(nlt)

             OPcmp(le) OPcmp(nle)


             OP1(sqrt)

             static Vc_ALWAYS_INLINE Vc_CONST VectorType rsqrt(VectorType x) {

                 return _mm_div_pd(one(), sqrt(x));

             }

             static Vc_ALWAYS_INLINE Vc_CONST VectorType reciprocal(VectorType x) {

                 return _mm_div_pd(one(), x);

             }

             static Vc_ALWAYS_INLINE Vc_CONST VectorType isNaN(VectorType x) {

                 return _mm_cmpunord_pd(x, x);

             }

             static Vc_ALWAYS_INLINE Vc_CONST VectorType isFinite(VectorType x) {

                 return _mm_cmpord_pd(x, _mm_mul_pd(zero(), x));

             }

             static Vc_ALWAYS_INLINE Vc_CONST VectorType abs(const VectorType a) {

                 return CAT(_mm_and_, SUFFIX)(a, _mm_setabsmask_pd());

             }


             MINMAX

             static Vc_ALWAYS_INLINE Vc_CONST EntryType min(VectorType a) {

                 a = _mm_min_sd(a, _mm_unpackhi_pd(a, a));

                 return _mm_cvtsd_f64(a);

             }

             static Vc_ALWAYS_INLINE Vc_CONST EntryType max(VectorType a) {

                 a = _mm_max_sd(a, _mm_unpackhi_pd(a, a));

                 return _mm_cvtsd_f64(a);

             }

             static Vc_ALWAYS_INLINE Vc_CONST EntryType mul(VectorType a) {

                 a = _mm_mul_sd(a, _mm_shuffle_pd(a, a, _MM_SHUFFLE2(0, 1)));

                 return _mm_cvtsd_f64(a);

             }

             static Vc_ALWAYS_INLINE Vc_CONST EntryType add(VectorType a) {

                 a = _mm_add_sd(a, _mm_shuffle_pd(a, a, _MM_SHUFFLE2(0, 1)));

                 return _mm_cvtsd_f64(a);

             }

 #undef SUFFIX

             static Vc_ALWAYS_INLINE Vc_CONST VectorType round(VectorType a) {

 #ifdef VC_IMPL_SSE4_1

                 return _mm_round_pd(a, _MM_FROUND_NINT);

 #else

                 //XXX: slow: _MM_SET_ROUNDING_MODE(_MM_ROUND_NEAREST);

                 return _mm_cvtepi32_pd(_mm_cvtpd_epi32(a));

 #endif

             }

         };


         template<> struct VectorHelper<float> {

             typedef float EntryType;

             typedef _M128 VectorType;

 #define SUFFIX ps


             OP_(or_) OP_(and_) OP_(xor_)

             static Vc_ALWAYS_INLINE Vc_CONST VectorType notMaskedToZero(VectorType a, _M128 mask) { return CAT(_mm_and_, SUFFIX)(mask, a); }

             static Vc_ALWAYS_INLINE Vc_CONST VectorType set(const float a) { return CAT(_mm_set1_, SUFFIX)(a); }

             static Vc_ALWAYS_INLINE Vc_CONST VectorType set(const float a, const float b, const float c, const float d) { return CAT(_mm_set_, SUFFIX)(a, b, c, d); }

             static Vc_ALWAYS_INLINE Vc_CONST VectorType zero() { return CAT(_mm_setzero_, SUFFIX)(); }

             static Vc_ALWAYS_INLINE Vc_CONST VectorType one()  { return CAT(_mm_setone_, SUFFIX)(); }// set(1.f); }

             static Vc_ALWAYS_INLINE Vc_CONST _M128 concat(_M128D a, _M128D b) { return _mm_movelh_ps(_mm_cvtpd_ps(a), _mm_cvtpd_ps(b)); }


 #ifdef VC_IMPL_FMA4

             static Vc_ALWAYS_INLINE void fma(VectorType &v1, VectorType v2, VectorType v3) {

                 v1 = _mm_macc_ps(v1, v2, v3);

             }

 #else

             static inline void fma(VectorType &v1, VectorType v2, VectorType v3) {

                 __m128d v1_0 = _mm_cvtps_pd(v1);

                 __m128d v1_1 = _mm_cvtps_pd(_mm_movehl_ps(v1, v1));

                 __m128d v2_0 = _mm_cvtps_pd(v2);

                 __m128d v2_1 = _mm_cvtps_pd(_mm_movehl_ps(v2, v2));

                 __m128d v3_0 = _mm_cvtps_pd(v3);

                 __m128d v3_1 = _mm_cvtps_pd(_mm_movehl_ps(v3, v3));

                 v1 = _mm_movelh_ps(

                         _mm_cvtpd_ps(_mm_add_pd(_mm_mul_pd(v1_0, v2_0), v3_0)),

                         _mm_cvtpd_ps(_mm_add_pd(_mm_mul_pd(v1_1, v2_1), v3_1)));

             }

 #endif


             OP(add) OP(sub) OP(mul)

             OPcmp(eq) OPcmp(neq)

             OPcmp(lt) OPcmp(nlt)

             OPcmp(le) OPcmp(nle)


             OP1(sqrt) OP1(rsqrt)

             static Vc_ALWAYS_INLINE Vc_CONST VectorType isNaN(VectorType x) {

                 return _mm_cmpunord_ps(x, x);

             }

             static Vc_ALWAYS_INLINE Vc_CONST VectorType isFinite(VectorType x) {

                 return _mm_cmpord_ps(x, _mm_mul_ps(zero(), x));

             }

             static Vc_ALWAYS_INLINE Vc_CONST VectorType reciprocal(VectorType x) {

                 return _mm_rcp_ps(x);

             }

             static Vc_ALWAYS_INLINE Vc_CONST VectorType abs(const VectorType a) {

                 return CAT(_mm_and_, SUFFIX)(a, _mm_setabsmask_ps());

             }


             MINMAX

             static Vc_ALWAYS_INLINE Vc_CONST EntryType min(VectorType a) {

                 a = _mm_min_ps(a, _mm_movehl_ps(a, a));   // a = min(a0, a2), min(a1, a3), min(a2, a2), min(a3, a3)

                 a = _mm_min_ss(a, _mm_shuffle_ps(a, a, _MM_SHUFFLE(1, 1, 1, 1))); // a = min(a0, a1), a1, a2, a3

                 return _mm_cvtss_f32(a);

             }

             static Vc_ALWAYS_INLINE Vc_CONST EntryType max(VectorType a) {

                 a = _mm_max_ps(a, _mm_movehl_ps(a, a));   // a = max(a0, a2), max(a1, a3), max(a2, a2), max(a3, a3)

                 a = _mm_max_ss(a, _mm_shuffle_ps(a, a, _MM_SHUFFLE(1, 1, 1, 1))); // a = max(a0, a1), a1, a2, a3

                 return _mm_cvtss_f32(a);

             }

             static Vc_ALWAYS_INLINE Vc_CONST EntryType mul(VectorType a) {

                 a = _mm_mul_ps(a, _mm_shuffle_ps(a, a, _MM_SHUFFLE(0, 1, 2, 3)));

                 a = _mm_mul_ss(a, _mm_shuffle_ps(a, a, _MM_SHUFFLE(3, 2, 0, 1)));

                 return _mm_cvtss_f32(a);

             }

             static Vc_ALWAYS_INLINE Vc_CONST EntryType add(VectorType a) {

                 a = _mm_add_ps(a, _mm_shuffle_ps(a, a, _MM_SHUFFLE(0, 1, 2, 3)));

                 a = _mm_add_ss(a, _mm_shuffle_ps(a, a, _MM_SHUFFLE(3, 2, 0, 1)));

                 return _mm_cvtss_f32(a);

             }

 #undef SUFFIX

             static Vc_ALWAYS_INLINE Vc_CONST VectorType round(VectorType a) {

 #ifdef VC_IMPL_SSE4_1

                 return _mm_round_ps(a, _MM_FROUND_NINT);

 #else

                 //XXX slow: _MM_SET_ROUNDING_MODE(_MM_ROUND_NEAREST);

                 return _mm_cvtepi32_ps(_mm_cvtps_epi32(a));

 #endif

             }

         };


         template<> struct VectorHelper<float8> {

             typedef float EntryType;

             typedef M256 VectorType;

 #ifdef VC_PASSING_VECTOR_BY_VALUE_IS_BROKEN

             typedef const VectorType &VectorTypeArg;

 #else

             typedef const VectorType VectorTypeArg;

 #endif


             static Vc_ALWAYS_INLINE Vc_CONST VectorType set(const float a) {

                 const _M128 x = _mm_set1_ps(a);

                 return VectorType::create(x, x);

             }

             static Vc_ALWAYS_INLINE Vc_CONST VectorType set(const float a, const float b, const float c, const float d) {

                 const _M128 x = _mm_set_ps(a, b, c, d);

                 return VectorType::create(x, x);

             }

             static Vc_ALWAYS_INLINE Vc_CONST VectorType set(const float a, const float b, const float c, const float d,

                     const float e, const float f, const float g, const float h) {

                 return VectorType::create(_mm_set_ps(a, b, c, d), _mm_set_ps(e, f, g, h));

             }

             static Vc_ALWAYS_INLINE Vc_CONST VectorType zero() { return VectorType::create(_mm_setzero_ps(), _mm_setzero_ps()); }

             static Vc_ALWAYS_INLINE Vc_CONST VectorType one()  { return set(1.f); }


 #define REUSE_FLOAT_IMPL1(fun) \

             static Vc_ALWAYS_INLINE Vc_CONST VectorType fun(VectorTypeArg x) { \

                 return VectorType::create(VectorHelper<float>::fun(x[0]), VectorHelper<float>::fun(x[1])); \

             }

 #define REUSE_FLOAT_IMPL2(fun) \

             static Vc_ALWAYS_INLINE Vc_CONST VectorType fun(VectorTypeArg x, VectorTypeArg y) { \

                 return VectorType::create(VectorHelper<float>::fun(x[0], y[0]), VectorHelper<float>::fun(x[1], y[1])); \

             }

             REUSE_FLOAT_IMPL1(reciprocal)

             REUSE_FLOAT_IMPL1(sqrt)

             REUSE_FLOAT_IMPL1(rsqrt)

             REUSE_FLOAT_IMPL1(isNaN)

             REUSE_FLOAT_IMPL1(isFinite)

             REUSE_FLOAT_IMPL1(abs)

             REUSE_FLOAT_IMPL1(round)


             REUSE_FLOAT_IMPL2(and_)

             REUSE_FLOAT_IMPL2(or_)

             REUSE_FLOAT_IMPL2(xor_)

             REUSE_FLOAT_IMPL2(notMaskedToZero)

             REUSE_FLOAT_IMPL2(add)

             REUSE_FLOAT_IMPL2(sub)

             REUSE_FLOAT_IMPL2(mul)

             REUSE_FLOAT_IMPL2(cmple)

             REUSE_FLOAT_IMPL2(cmpnle)

             REUSE_FLOAT_IMPL2(cmplt)

             REUSE_FLOAT_IMPL2(cmpnlt)

             REUSE_FLOAT_IMPL2(cmpeq)

             REUSE_FLOAT_IMPL2(cmpneq)

             REUSE_FLOAT_IMPL2(min)

             REUSE_FLOAT_IMPL2(max)


             static Vc_ALWAYS_INLINE Vc_CONST EntryType min(VectorTypeArg a) {

                 return VectorHelper<float>::min(VectorHelper<float>::min(a[0], a[1]));

             }

             static Vc_ALWAYS_INLINE Vc_CONST EntryType max(VectorTypeArg a) {

                 return VectorHelper<float>::max(VectorHelper<float>::max(a[0], a[1]));

             }

             static Vc_ALWAYS_INLINE Vc_CONST EntryType mul(VectorTypeArg a) {

                 return VectorHelper<float>::mul(VectorHelper<float>::mul(a[0], a[1]));

             }

             static Vc_ALWAYS_INLINE Vc_CONST EntryType add(VectorTypeArg a) {

                 return VectorHelper<float>::add(VectorHelper<float>::add(a[0], a[1]));

             }


             static inline void fma(VectorType &a, VectorTypeArg b, VectorTypeArg c) {

                 VectorHelper<float>::fma(a[0], b[0], c[0]);

                 VectorHelper<float>::fma(a[1], b[1], c[1]);

             }

 #undef REUSE_FLOAT_IMPL2

 #undef REUSE_FLOAT_IMPL1

         };


         template<> struct VectorHelper<int> {

             typedef int EntryType;

             typedef _M128I VectorType;

 #define SUFFIX si128


             OP_(or_) OP_(and_) OP_(xor_)

             static Vc_ALWAYS_INLINE Vc_CONST VectorType zero() { return CAT(_mm_setzero_, SUFFIX)(); }

             static Vc_ALWAYS_INLINE Vc_CONST VectorType notMaskedToZero(VectorType a, _M128 mask) { return CAT(_mm_and_, SUFFIX)(_mm_castps_si128(mask), a); }

 #undef SUFFIX

 #define SUFFIX epi32

             static Vc_ALWAYS_INLINE Vc_CONST VectorType one() { return CAT(_mm_setone_, SUFFIX)(); }


             static Vc_ALWAYS_INLINE Vc_CONST VectorType set(const int a) { return CAT(_mm_set1_, SUFFIX)(a); }

             static Vc_ALWAYS_INLINE Vc_CONST VectorType set(const int a, const int b, const int c, const int d) { return CAT(_mm_set_, SUFFIX)(a, b, c, d); }


             static Vc_ALWAYS_INLINE void fma(VectorType &v1, VectorType v2, VectorType v3) { v1 = add(mul(v1, v2), v3); }


             static Vc_ALWAYS_INLINE Vc_CONST VectorType shiftLeft(VectorType a, int shift) {

                 return CAT(_mm_slli_, SUFFIX)(a, shift);

             }

             static Vc_ALWAYS_INLINE Vc_CONST VectorType shiftRight(VectorType a, int shift) {

                 return CAT(_mm_srai_, SUFFIX)(a, shift);

             }

             static Vc_INTRINSIC Vc_CONST VectorType abs(const VectorType a) { return mm_abs_epi32(a); }


             static Vc_INTRINSIC Vc_CONST VectorType min(const VectorType a, const VectorType b) { return mm_min_epi32(a, b); }

             static Vc_INTRINSIC Vc_CONST VectorType max(const VectorType a, const VectorType b) { return mm_max_epi32(a, b); }

             static Vc_ALWAYS_INLINE Vc_CONST EntryType min(VectorType a) {

                 a = min(a, _mm_shuffle_epi32(a, _MM_SHUFFLE(1, 0, 3, 2)));

                 // using lo_epi16 for speed here

                 a = min(a, _mm_shufflelo_epi16(a, _MM_SHUFFLE(1, 0, 3, 2)));

                 return _mm_cvtsi128_si32(a);

             }

             static Vc_ALWAYS_INLINE Vc_CONST EntryType max(VectorType a) {

                 a = max(a, _mm_shuffle_epi32(a, _MM_SHUFFLE(1, 0, 3, 2)));

                 // using lo_epi16 for speed here

                 a = max(a, _mm_shufflelo_epi16(a, _MM_SHUFFLE(1, 0, 3, 2)));

                 return _mm_cvtsi128_si32(a);

             }

             static Vc_ALWAYS_INLINE Vc_CONST EntryType add(VectorType a) {

                 a = add(a, _mm_shuffle_epi32(a, _MM_SHUFFLE(1, 0, 3, 2)));

                 a = add(a, _mm_shufflelo_epi16(a, _MM_SHUFFLE(1, 0, 3, 2)));

                 return _mm_cvtsi128_si32(a);

             }

 #ifdef VC_IMPL_SSE4_1

             static Vc_ALWAYS_INLINE Vc_CONST VectorType mul(VectorType a, VectorType b) { return _mm_mullo_epi32(a, b); }

             static Vc_ALWAYS_INLINE Vc_CONST EntryType mul(VectorType a) {

                 a = mul(a, _mm_shuffle_epi32(a, _MM_SHUFFLE(1, 0, 3, 2)));

                 a = mul(a, _mm_shufflelo_epi16(a, _MM_SHUFFLE(1, 0, 3, 2)));

                 return _mm_cvtsi128_si32(a);

             }

 #else

             static inline Vc_CONST VectorType mul(const VectorType a, const VectorType b) {

                 const VectorType aShift = _mm_srli_si128(a, 4);

                 const VectorType ab02 = _mm_mul_epu32(a, b); // [a0 * b0, a2 * b2]

                 const VectorType bShift = _mm_srli_si128(b, 4);

                 const VectorType ab13 = _mm_mul_epu32(aShift, bShift); // [a1 * b1, a3 * b3]

                 return _mm_unpacklo_epi32(_mm_shuffle_epi32(ab02, 8), _mm_shuffle_epi32(ab13, 8));

             }

 #endif


             OP(add) OP(sub)

             OPcmp(eq)

             OPcmp(lt)

             OPcmp(gt)

             static Vc_ALWAYS_INLINE Vc_CONST VectorType cmpneq(const VectorType a, const VectorType b) { _M128I x = cmpeq(a, b); return _mm_andnot_si128(x, _mm_setallone_si128()); }

             static Vc_ALWAYS_INLINE Vc_CONST VectorType cmpnlt(const VectorType a, const VectorType b) { _M128I x = cmplt(a, b); return _mm_andnot_si128(x, _mm_setallone_si128()); }

             static Vc_ALWAYS_INLINE Vc_CONST VectorType cmple (const VectorType a, const VectorType b) { _M128I x = cmpgt(a, b); return _mm_andnot_si128(x, _mm_setallone_si128()); }

             static Vc_ALWAYS_INLINE Vc_CONST VectorType cmpnle(const VectorType a, const VectorType b) { return cmpgt(a, b); }

 #undef SUFFIX

             static Vc_ALWAYS_INLINE Vc_CONST VectorType round(VectorType a) { return a; }

         };


         template<> struct VectorHelper<unsigned int> {

             typedef unsigned int EntryType;

             typedef _M128I VectorType;

 #define SUFFIX si128

             OP_CAST_(or_) OP_CAST_(and_) OP_CAST_(xor_)

             static Vc_ALWAYS_INLINE Vc_CONST VectorType zero() { return CAT(_mm_setzero_, SUFFIX)(); }

             static Vc_ALWAYS_INLINE Vc_CONST VectorType notMaskedToZero(VectorType a, _M128 mask) { return CAT(_mm_and_, SUFFIX)(_mm_castps_si128(mask), a); }


 #undef SUFFIX

 #define SUFFIX epu32

             static Vc_ALWAYS_INLINE Vc_CONST VectorType one() { return CAT(_mm_setone_, SUFFIX)(); }


             static Vc_INTRINSIC Vc_CONST VectorType min(const VectorType a, const VectorType b) { return mm_min_epu32(a, b); }

             static Vc_INTRINSIC Vc_CONST VectorType max(const VectorType a, const VectorType b) { return mm_max_epu32(a, b); }

             static Vc_ALWAYS_INLINE Vc_CONST EntryType min(VectorType a) {

                 a = min(a, _mm_shuffle_epi32(a, _MM_SHUFFLE(1, 0, 3, 2)));

                 // using lo_epi16 for speed here

                 a = min(a, _mm_shufflelo_epi16(a, _MM_SHUFFLE(1, 0, 3, 2)));

                 return _mm_cvtsi128_si32(a);

             }

             static Vc_ALWAYS_INLINE Vc_CONST EntryType max(VectorType a) {

                 a = max(a, _mm_shuffle_epi32(a, _MM_SHUFFLE(1, 0, 3, 2)));

                 // using lo_epi16 for speed here

                 a = max(a, _mm_shufflelo_epi16(a, _MM_SHUFFLE(1, 0, 3, 2)));

                 return _mm_cvtsi128_si32(a);

             }

             static Vc_ALWAYS_INLINE Vc_CONST EntryType mul(VectorType a) {

                 a = mul(a, _mm_shuffle_epi32(a, _MM_SHUFFLE(1, 0, 3, 2)));

                 // using lo_epi16 for speed here

                 a = mul(a, _mm_shufflelo_epi16(a, _MM_SHUFFLE(1, 0, 3, 2)));

                 return _mm_cvtsi128_si32(a);

             }

             static Vc_ALWAYS_INLINE Vc_CONST EntryType add(VectorType a) {

                 a = add(a, _mm_shuffle_epi32(a, _MM_SHUFFLE(1, 0, 3, 2)));

                 // using lo_epi16 for speed here

                 a = add(a, _mm_shufflelo_epi16(a, _MM_SHUFFLE(1, 0, 3, 2)));

                 return _mm_cvtsi128_si32(a);

             }


             static Vc_ALWAYS_INLINE void fma(VectorType &v1, VectorType v2, VectorType v3) { v1 = add(mul(v1, v2), v3); }


             static Vc_ALWAYS_INLINE Vc_CONST VectorType mul(const VectorType a, const VectorType b) {

                 return VectorHelper<int>::mul(a, b);

             }

 //X             template<unsigned int b> static Vc_ALWAYS_INLINE Vc_CONST VectorType mul(const VectorType a) {

 //X                 switch (b) {

 //X                     case    0: return zero();

 //X                     case    1: return a;

 //X                     case    2: return _mm_slli_epi32(a,  1);

 //X                     case    4: return _mm_slli_epi32(a,  2);

 //X                     case    8: return _mm_slli_epi32(a,  3);

 //X                     case   16: return _mm_slli_epi32(a,  4);

 //X                     case   32: return _mm_slli_epi32(a,  5);

 //X                     case   64: return _mm_slli_epi32(a,  6);

 //X                     case  128: return _mm_slli_epi32(a,  7);

 //X                     case  256: return _mm_slli_epi32(a,  8);

 //X                     case  512: return _mm_slli_epi32(a,  9);

 //X                     case 1024: return _mm_slli_epi32(a, 10);

 //X                     case 2048: return _mm_slli_epi32(a, 11);

 //X                 }

 //X                 return mul(a, set(b));

 //X             }


 #undef SUFFIX

 #define SUFFIX epi32

             static Vc_ALWAYS_INLINE Vc_CONST VectorType shiftLeft(VectorType a, int shift) {

                 return CAT(_mm_slli_, SUFFIX)(a, shift);

             }

             static Vc_ALWAYS_INLINE Vc_CONST VectorType shiftRight(VectorType a, int shift) {

                 return CAT(_mm_srli_, SUFFIX)(a, shift);

             }

             static Vc_ALWAYS_INLINE Vc_CONST VectorType set(const unsigned int a) { return CAT(_mm_set1_, SUFFIX)(a); }

             static Vc_ALWAYS_INLINE Vc_CONST VectorType set(const unsigned int a, const unsigned int b, const unsigned int c, const unsigned int d) { return CAT(_mm_set_, SUFFIX)(a, b, c, d); }


             OP(add) OP(sub)

             OPcmp(eq)

             static Vc_ALWAYS_INLINE Vc_CONST VectorType cmpneq(const VectorType a, const VectorType b) { return _mm_andnot_si128(cmpeq(a, b), _mm_setallone_si128()); }


 #ifndef USE_INCORRECT_UNSIGNED_COMPARE

             static Vc_ALWAYS_INLINE Vc_CONST VectorType cmplt(const VectorType a, const VectorType b) {

                 return _mm_cmplt_epu32(a, b);

             }

             static Vc_ALWAYS_INLINE Vc_CONST VectorType cmpgt(const VectorType a, const VectorType b) {

                 return _mm_cmpgt_epu32(a, b);

             }

 #else

             OPcmp(lt)

             OPcmp(gt)

 #endif

             static Vc_ALWAYS_INLINE Vc_CONST VectorType cmpnlt(const VectorType a, const VectorType b) { return _mm_andnot_si128(cmplt(a, b), _mm_setallone_si128()); }

             static Vc_ALWAYS_INLINE Vc_CONST VectorType cmple (const VectorType a, const VectorType b) { return _mm_andnot_si128(cmpgt(a, b), _mm_setallone_si128()); }

             static Vc_ALWAYS_INLINE Vc_CONST VectorType cmpnle(const VectorType a, const VectorType b) { return cmpgt(a, b); }


 #undef SUFFIX

             static Vc_ALWAYS_INLINE Vc_CONST VectorType round(VectorType a) { return a; }

         };


         template<> struct VectorHelper<signed short> {

             typedef _M128I VectorType;

             typedef signed short EntryType;

 #define SUFFIX si128


             OP_(or_) OP_(and_) OP_(xor_)

             static Vc_ALWAYS_INLINE Vc_CONST VectorType zero() { return CAT(_mm_setzero_, SUFFIX)(); }

             static Vc_ALWAYS_INLINE Vc_CONST VectorType notMaskedToZero(VectorType a, _M128 mask) { return CAT(_mm_and_, SUFFIX)(_mm_castps_si128(mask), a); }

             static Vc_ALWAYS_INLINE Vc_CONST _M128I concat(_M128I a, _M128I b) { return _mm_packs_epi32(a, b); }

             static Vc_ALWAYS_INLINE Vc_CONST _M128I expand0(_M128I x) { return _mm_srai_epi32(_mm_unpacklo_epi16(x, x), 16); }

             static Vc_ALWAYS_INLINE Vc_CONST _M128I expand1(_M128I x) { return _mm_srai_epi32(_mm_unpackhi_epi16(x, x), 16); }


 #undef SUFFIX

 #define SUFFIX epi16

             static Vc_ALWAYS_INLINE Vc_CONST VectorType one() { return CAT(_mm_setone_, SUFFIX)(); }


             static Vc_ALWAYS_INLINE Vc_CONST VectorType shiftLeft(VectorType a, int shift) {

                 return CAT(_mm_slli_, SUFFIX)(a, shift);

             }

             static Vc_ALWAYS_INLINE Vc_CONST VectorType shiftRight(VectorType a, int shift) {

                 return CAT(_mm_srai_, SUFFIX)(a, shift);

             }

             static Vc_ALWAYS_INLINE Vc_CONST VectorType set(const EntryType a) { return CAT(_mm_set1_, SUFFIX)(a); }

             static Vc_ALWAYS_INLINE Vc_CONST VectorType set(const EntryType a, const EntryType b, const EntryType c, const EntryType d,

                     const EntryType e, const EntryType f, const EntryType g, const EntryType h) {

                 return CAT(_mm_set_, SUFFIX)(a, b, c, d, e, f, g, h);

             }


             static Vc_ALWAYS_INLINE void fma(VectorType &v1, VectorType v2, VectorType v3) {

                 v1 = add(mul(v1, v2), v3); }


             static Vc_INTRINSIC Vc_CONST VectorType abs(const VectorType a) { return mm_abs_epi16(a); }


             OPx(mul, mullo)

             OP(min) OP(max)

             static Vc_ALWAYS_INLINE Vc_CONST EntryType min(VectorType a) {

                 // reminder: _MM_SHUFFLE(3, 2, 1, 0) means "no change"

                 a = min(a, _mm_shuffle_epi32(a, _MM_SHUFFLE(1, 0, 3, 2)));

                 a = min(a, _mm_shufflelo_epi16(a, _MM_SHUFFLE(1, 0, 3, 2)));

                 a = min(a, _mm_shufflelo_epi16(a, _MM_SHUFFLE(1, 1, 1, 1)));

                 return _mm_cvtsi128_si32(a); // & 0xffff is implicit

             }

             static Vc_ALWAYS_INLINE Vc_CONST EntryType max(VectorType a) {

                 // reminder: _MM_SHUFFLE(3, 2, 1, 0) means "no change"

                 a = max(a, _mm_shuffle_epi32(a, _MM_SHUFFLE(1, 0, 3, 2)));

                 a = max(a, _mm_shufflelo_epi16(a, _MM_SHUFFLE(1, 0, 3, 2)));

                 a = max(a, _mm_shufflelo_epi16(a, _MM_SHUFFLE(1, 1, 1, 1)));

                 return _mm_cvtsi128_si32(a); // & 0xffff is implicit

             }

             static Vc_ALWAYS_INLINE Vc_CONST EntryType mul(VectorType a) {

                 a = mul(a, _mm_shuffle_epi32(a, _MM_SHUFFLE(1, 0, 3, 2)));

                 a = mul(a, _mm_shufflelo_epi16(a, _MM_SHUFFLE(1, 0, 3, 2)));

                 a = mul(a, _mm_shufflelo_epi16(a, _MM_SHUFFLE(1, 1, 1, 1)));

                 return _mm_cvtsi128_si32(a); // & 0xffff is implicit

             }

             static Vc_ALWAYS_INLINE Vc_CONST EntryType add(VectorType a) {

                 a = add(a, _mm_shuffle_epi32(a, _MM_SHUFFLE(1, 0, 3, 2)));

                 a = add(a, _mm_shufflelo_epi16(a, _MM_SHUFFLE(1, 0, 3, 2)));

                 a = add(a, _mm_shufflelo_epi16(a, _MM_SHUFFLE(1, 1, 1, 1)));

                 return _mm_cvtsi128_si32(a); // & 0xffff is implicit

             }


             OP(add) OP(sub)

             OPcmp(eq)

             OPcmp(lt)

             OPcmp(gt)

             static Vc_ALWAYS_INLINE Vc_CONST VectorType cmpneq(const VectorType a, const VectorType b) { _M128I x = cmpeq(a, b); return _mm_andnot_si128(x, _mm_setallone_si128()); }

             static Vc_ALWAYS_INLINE Vc_CONST VectorType cmpnlt(const VectorType a, const VectorType b) { _M128I x = cmplt(a, b); return _mm_andnot_si128(x, _mm_setallone_si128()); }

             static Vc_ALWAYS_INLINE Vc_CONST VectorType cmple (const VectorType a, const VectorType b) { _M128I x = cmpgt(a, b); return _mm_andnot_si128(x, _mm_setallone_si128()); }

             static Vc_ALWAYS_INLINE Vc_CONST VectorType cmpnle(const VectorType a, const VectorType b) { return cmpgt(a, b); }

 #undef SUFFIX

             static Vc_ALWAYS_INLINE Vc_CONST VectorType round(VectorType a) { return a; }

         };


         template<> struct VectorHelper<unsigned short> {

             typedef _M128I VectorType;

             typedef unsigned short EntryType;

 #define SUFFIX si128

             OP_CAST_(or_) OP_CAST_(and_) OP_CAST_(xor_)

             static Vc_ALWAYS_INLINE Vc_CONST VectorType zero() { return CAT(_mm_setzero_, SUFFIX)(); }

             static Vc_ALWAYS_INLINE Vc_CONST VectorType notMaskedToZero(VectorType a, _M128 mask) { return CAT(_mm_and_, SUFFIX)(_mm_castps_si128(mask), a); }

 #ifdef VC_IMPL_SSE4_1

             static Vc_ALWAYS_INLINE Vc_CONST _M128I concat(_M128I a, _M128I b) { return _mm_packus_epi32(a, b); }

 #else

             // XXX too bad, but this is broken without SSE 4.1

             static Vc_ALWAYS_INLINE Vc_CONST _M128I concat(_M128I a, _M128I b) { return _mm_packs_epi32(a, b); }

 #endif

             static Vc_ALWAYS_INLINE Vc_CONST _M128I expand0(_M128I x) { return _mm_srli_epi32(_mm_unpacklo_epi16(x, x), 16); }

             static Vc_ALWAYS_INLINE Vc_CONST _M128I expand1(_M128I x) { return _mm_srli_epi32(_mm_unpackhi_epi16(x, x), 16); }


 #undef SUFFIX

 #define SUFFIX epu16

             static Vc_ALWAYS_INLINE Vc_CONST VectorType one() { return CAT(_mm_setone_, SUFFIX)(); }


 //X             template<unsigned int b> static Vc_ALWAYS_INLINE Vc_CONST VectorType mul(const VectorType a) {

 //X                 switch (b) {

 //X                     case    0: return zero();

 //X                     case    1: return a;

 //X                     case    2: return _mm_slli_epi16(a,  1);

 //X                     case    4: return _mm_slli_epi16(a,  2);

 //X                     case    8: return _mm_slli_epi16(a,  3);

 //X                     case   16: return _mm_slli_epi16(a,  4);

 //X                     case   32: return _mm_slli_epi16(a,  5);

 //X                     case   64: return _mm_slli_epi16(a,  6);

 //X                     case  128: return _mm_slli_epi16(a,  7);

 //X                     case  256: return _mm_slli_epi16(a,  8);

 //X                     case  512: return _mm_slli_epi16(a,  9);

 //X                     case 1024: return _mm_slli_epi16(a, 10);

 //X                     case 2048: return _mm_slli_epi16(a, 11);

 //X                 }

 //X                 return mul(a, set(b));

 //X             }

 #if !defined(USE_INCORRECT_UNSIGNED_COMPARE) || VC_IMPL_SSE4_1

             static Vc_INTRINSIC Vc_CONST VectorType min(const VectorType a, const VectorType b) { return CAT(mm_min_, SUFFIX)(a, b); }

             static Vc_INTRINSIC Vc_CONST VectorType max(const VectorType a, const VectorType b) { return CAT(mm_max_, SUFFIX)(a, b); }

 #endif

 #undef SUFFIX

 #define SUFFIX epi16

             static Vc_ALWAYS_INLINE Vc_CONST VectorType shiftLeft(VectorType a, int shift) {

                 return CAT(_mm_slli_, SUFFIX)(a, shift);

             }

             static Vc_ALWAYS_INLINE Vc_CONST VectorType shiftRight(VectorType a, int shift) {

                 return CAT(_mm_srli_, SUFFIX)(a, shift);

             }


             static Vc_ALWAYS_INLINE void fma(VectorType &v1, VectorType v2, VectorType v3) { v1 = add(mul(v1, v2), v3); }


             OPx(mul, mullo) // should work correctly for all values

 #if defined(USE_INCORRECT_UNSIGNED_COMPARE) && !defined(VC_IMPL_SSE4_1)

             OP(min) OP(max) // XXX breaks for values with MSB set

 #endif

             static Vc_ALWAYS_INLINE Vc_CONST EntryType min(VectorType a) {

                 // reminder: _MM_SHUFFLE(3, 2, 1, 0) means "no change"

                 a = min(a, _mm_shuffle_epi32(a, _MM_SHUFFLE(1, 0, 3, 2)));

                 a = min(a, _mm_shufflelo_epi16(a, _MM_SHUFFLE(1, 0, 3, 2)));

                 a = min(a, _mm_shufflelo_epi16(a, _MM_SHUFFLE(1, 1, 1, 1)));

                 return _mm_cvtsi128_si32(a); // & 0xffff is implicit

             }

             static Vc_ALWAYS_INLINE Vc_CONST EntryType max(VectorType a) {

                 // reminder: _MM_SHUFFLE(3, 2, 1, 0) means "no change"

                 a = max(a, _mm_shuffle_epi32(a, _MM_SHUFFLE(1, 0, 3, 2)));

                 a = max(a, _mm_shufflelo_epi16(a, _MM_SHUFFLE(1, 0, 3, 2)));

                 a = max(a, _mm_shufflelo_epi16(a, _MM_SHUFFLE(1, 1, 1, 1)));

                 return _mm_cvtsi128_si32(a); // & 0xffff is implicit

             }

             static Vc_ALWAYS_INLINE Vc_CONST EntryType mul(VectorType a) {

                 // reminder: _MM_SHUFFLE(3, 2, 1, 0) means "no change"

                 a = mul(a, _mm_shuffle_epi32(a, _MM_SHUFFLE(1, 0, 3, 2)));

                 a = mul(a, _mm_shufflelo_epi16(a, _MM_SHUFFLE(1, 0, 3, 2)));

                 a = mul(a, _mm_shufflelo_epi16(a, _MM_SHUFFLE(1, 1, 1, 1)));

                 return _mm_cvtsi128_si32(a); // & 0xffff is implicit

             }

             static Vc_ALWAYS_INLINE Vc_CONST EntryType add(VectorType a) {

                 // reminder: _MM_SHUFFLE(3, 2, 1, 0) means "no change"

                 a = add(a, _mm_shuffle_epi32(a, _MM_SHUFFLE(1, 0, 3, 2)));

                 a = add(a, _mm_shufflelo_epi16(a, _MM_SHUFFLE(1, 0, 3, 2)));

                 a = add(a, _mm_shufflelo_epi16(a, _MM_SHUFFLE(1, 1, 1, 1)));

                 return _mm_cvtsi128_si32(a); // & 0xffff is implicit

             }

             static Vc_ALWAYS_INLINE Vc_CONST VectorType set(const EntryType a) { return CAT(_mm_set1_, SUFFIX)(a); }

             static Vc_ALWAYS_INLINE Vc_CONST VectorType set(const EntryType a, const EntryType b, const EntryType c,

                     const EntryType d, const EntryType e, const EntryType f,

                     const EntryType g, const EntryType h) {

                 return CAT(_mm_set_, SUFFIX)(a, b, c, d, e, f, g, h);

             }


             OP(add) OP(sub)

             OPcmp(eq)

             static Vc_ALWAYS_INLINE Vc_CONST VectorType cmpneq(const VectorType a, const VectorType b) { return _mm_andnot_si128(cmpeq(a, b), _mm_setallone_si128()); }


 #ifndef USE_INCORRECT_UNSIGNED_COMPARE

             static Vc_ALWAYS_INLINE Vc_CONST VectorType cmplt(const VectorType a, const VectorType b) {

                 return _mm_cmplt_epu16(a, b);

             }

             static Vc_ALWAYS_INLINE Vc_CONST VectorType cmpgt(const VectorType a, const VectorType b) {

                 return _mm_cmpgt_epu16(a, b);

             }

 #else

             OPcmp(lt)

             OPcmp(gt)

 #endif

             static Vc_ALWAYS_INLINE Vc_CONST VectorType cmpnlt(const VectorType a, const VectorType b) { return _mm_andnot_si128(cmplt(a, b), _mm_setallone_si128()); }

             static Vc_ALWAYS_INLINE Vc_CONST VectorType cmple (const VectorType a, const VectorType b) { return _mm_andnot_si128(cmpgt(a, b), _mm_setallone_si128()); }

             static Vc_ALWAYS_INLINE Vc_CONST VectorType cmpnle(const VectorType a, const VectorType b) { return cmpgt(a, b); }

 #undef SUFFIX

             static Vc_ALWAYS_INLINE Vc_CONST VectorType round(VectorType a) { return a; }

         };

 #undef OP1

 #undef OP

 #undef OP_

 #undef OPx

 #undef OPcmp


 } // namespace SSE

 } // namespace Vc

 } // namespace ROOT


 #include "vectorhelper.tcc"

 #include "undomacros.h"


 #endif // SSE_VECTORHELPER_H

ROOT::Vc::SSE::VectorHelper< unsigned short >::cmpnle
static Vc_ALWAYS_INLINE Vc_CONST VectorType cmpnle(const VectorType a, const VectorType b)
Definition: vectorhelper.h:797

ROOT::Vc::SSE::VectorHelper< signed short >::abs
static Vc_INTRINSIC Vc_CONST VectorType abs(const VectorType a)
Definition: vectorhelper.h:645

ROOT::Vc::SSE::VectorHelper< unsigned int >::notMaskedToZero
static Vc_ALWAYS_INLINE Vc_CONST VectorType notMaskedToZero(VectorType a, _M128 mask)
Definition: vectorhelper.h:523

ROOT::Vc::SSE::VectorHelper< unsigned int >::mul
static Vc_ALWAYS_INLINE Vc_CONST VectorType mul(const VectorType a, const VectorType b)
Definition: vectorhelper.h:558

ROOT::Vc::SSE::VectorHelper< float >::mul
static Vc_ALWAYS_INLINE Vc_CONST EntryType mul(VectorType a)
Definition: vectorhelper.h:346

ROOT::Vc::SSE::SortHelper::sort
static Vc_CONST_L VectorType sort(VectorType) Vc_CONST_R

ROOT::Vc::SSE::VectorHelper< unsigned short >::OP_CAST_
OP_CAST_(or_) OP_CAST_(and_) OP_CAST_(xor_) static Vc_ALWAYS_INLINE Vc_CONST VectorType zero()
Definition: vectorhelper.h:692

ROOT::Vc::SSE::VectorHelper< signed short >::concat
static Vc_ALWAYS_INLINE Vc_CONST _M128I concat(_M128I a, _M128I b)
Definition: vectorhelper.h:622

ROOT::Vc::SSE::VectorHelper< float >::set
static Vc_ALWAYS_INLINE Vc_CONST VectorType set(const float a, const float b, const float c, const float d)
Definition: vectorhelper.h:293

ROOT::Vc::SSE::VectorHelper< double >::round
static Vc_ALWAYS_INLINE Vc_CONST VectorType round(VectorType a)
Definition: vectorhelper.h:275

ROOT::Vc::SSE::Internal::exponent
Vc_INTRINSIC Vc_CONST __m128 exponent(__m128 v)
Definition: vectorhelper.h:35

ROOT::Vc::AVX::min
static Vc_ALWAYS_INLINE int_v min(const int_v &x, const int_v &y)
Definition: vector.h:433

ROOT::Vc::SSE::VectorHelper< unsigned short >::expand0
static Vc_ALWAYS_INLINE Vc_CONST _M128I expand0(_M128I x)
Definition: vectorhelper.h:701

ROOT::Vc::SSE::VectorHelper< unsigned int >::cmpgt
static Vc_ALWAYS_INLINE Vc_CONST VectorType cmpgt(const VectorType a, const VectorType b)
Definition: vectorhelper.h:599

ROOT::Vc::SSE::VectorHelper< unsigned int >::OP_CAST_
OP_CAST_(or_) OP_CAST_(and_) OP_CAST_(xor_) static Vc_ALWAYS_INLINE Vc_CONST VectorType zero()
Definition: vectorhelper.h:521

template
Small helper to encapsulate whether to return the value pointed to by the iterator or its address...

v1
const Double_t * v1
Definition: TArcBall.cxx:33

Vc_CONST_L
#define Vc_CONST_L
Definition: macros.h:134

ROOT::Vc::SSE::VectorHelper< unsigned short >::concat
static Vc_ALWAYS_INLINE Vc_CONST _M128I concat(_M128I a, _M128I b)
Definition: vectorhelper.h:699

ROOT::Vc::SSE::sqrt
static Vc_ALWAYS_INLINE Vc_PURE Vector< T > sqrt(const Vector< T > &x)
Definition: vector.h:522

ROOT::Vc::SSE::VectorHelper< unsigned short >::EntryType
unsigned short EntryType
Definition: vectorhelper.h:690

OP0
#define OP0(name, code)
Definition: vectorhelper.h:99

ROOT::Vc::SSE::VectorHelper< unsigned short >::set
static Vc_ALWAYS_INLINE Vc_CONST VectorType set(const EntryType a, const EntryType b, const EntryType c, const EntryType d, const EntryType e, const EntryType f, const EntryType g, const EntryType h)
Definition: vectorhelper.h:774

ROOT::Vc::SSE::VectorHelper< float >::min
static MINMAX Vc_ALWAYS_INLINE Vc_CONST EntryType min(VectorType a)
Definition: vectorhelper.h:336

ROOT::Vc::SSE::VectorHelper< unsigned short >::OPx
OPx(mul, mullo) static Vc_ALWAYS_INLINE Vc_CONST EntryType min(VectorType a)
Definition: vectorhelper.h:741

ROOT::Vc::SSE::VectorHelper< unsigned int >::max
static Vc_ALWAYS_INLINE Vc_CONST EntryType max(VectorType a)
Definition: vectorhelper.h:537

ROOT::Vc::SSE::VectorHelper< signed short >::OP_
OP_(or_) OP_(and_) OP_(xor_) static Vc_ALWAYS_INLINE Vc_CONST VectorType zero()
Definition: vectorhelper.h:619

ROOT::Vc::SSE::VectorHelper< double >::mul
static Vc_ALWAYS_INLINE Vc_CONST EntryType mul(VectorType a)
Definition: vectorhelper.h:266

c
return c
Definition: entrylist_figure1.C:47

ROOT::Vc::SSE::VectorHelper< int >::shiftLeft
static Vc_ALWAYS_INLINE Vc_CONST VectorType shiftLeft(VectorType a, int shift)
Definition: vectorhelper.h:461

ROOT::Vc::SSE::VectorHelper< signed short >::EntryType
signed short EntryType
Definition: vectorhelper.h:616

xmlio::Size
const char * Size
Definition: TXMLSetup.cxx:56

undomacros.h

ROOT::Vc::SSE::min
static Vc_ALWAYS_INLINE Vc_PURE int_v min(const int_v &x, const int_v &y)
Definition: vector.h:502

OP
#define OP(op)
Definition: vectorhelper.h:180

ROOT::Vc::SSE::VectorHelper< double >::fma
static void fma(VectorType &v1, VectorType v2, VectorType v3)
Definition: vectorhelper.h:214

ROOT::Vc::SSE::VectorHelper< float8 >::EntryType
float EntryType
Definition: vectorhelper.h:368

h
TH1 * h
Definition: legend2.C:5

ROOT::Vc::SSE::VectorHelper< unsigned short >::cmplt
static Vc_ALWAYS_INLINE Vc_CONST VectorType cmplt(const VectorType a, const VectorType b)
Definition: vectorhelper.h:785

ROOT::Vc::SSE::VectorHelper< unsigned short >::add
static Vc_ALWAYS_INLINE Vc_CONST EntryType add(VectorType a)
Definition: vectorhelper.h:766

ROOT::Vc::SSE::VectorHelper< signed short >::expand1
static Vc_ALWAYS_INLINE Vc_CONST _M128I expand1(_M128I x)
Definition: vectorhelper.h:624

ROOT::Vc::SSE::VectorHelper< int >::min
static Vc_INTRINSIC Vc_CONST VectorType min(const VectorType a, const VectorType b)
Definition: vectorhelper.h:469

ROOT::Vc::SSE::VectorHelper< unsigned short >::VectorType
_M128I VectorType
Definition: vectorhelper.h:689

ROOT::Vc::SSE::VectorHelper< double >::reciprocal
static Vc_ALWAYS_INLINE Vc_CONST VectorType reciprocal(VectorType x)
Definition: vectorhelper.h:244

MINMAX
#define MINMAX
Definition: vectorhelper.h:193

ROOT::Vc::SSE::VectorHelper< float >::fma
static void fma(VectorType &v1, VectorType v2, VectorType v3)
Definition: vectorhelper.h:303

ROOT::Vc::SSE::_mm_setallone_pd
static Vc_INTRINSIC __m128d Vc_CONST _mm_setallone_pd()
Definition: intrinsics.h:83

ROOT::Vc::SSE::VectorHelper< float >::one
static Vc_ALWAYS_INLINE Vc_CONST VectorType one()
Definition: vectorhelper.h:295

ROOT::Vc::SSE::VectorHelper< signed short >::set
static Vc_ALWAYS_INLINE Vc_CONST VectorType set(const EntryType a, const EntryType b, const EntryType c, const EntryType d, const EntryType e, const EntryType f, const EntryType g, const EntryType h)
Definition: vectorhelper.h:637

ROOT::Vc::StreamingAndUnalignedFlag
StreamingAndUnalignedFlag
Definition: global.h:317

Vc_PURE_L
#define Vc_PURE_L
Definition: macros.h:137

a
TArc * a
Definition: textangle.C:12

_pythonization.g
g
Definition: _pythonization.py:167

ROOT::Vc::SSE::VectorHelper< signed short >::set
static Vc_ALWAYS_INLINE Vc_CONST VectorType set(const EntryType a)
Definition: vectorhelper.h:636

ROOT::Vc::SSE::VectorHelper< unsigned int >::shiftRight
static Vc_ALWAYS_INLINE Vc_CONST VectorType shiftRight(VectorType a, int shift)
Definition: vectorhelper.h:585

ROOT::Vc::SSE::M256::create
static Vc_INTRINSIC Vc_CONST M256 create(_M128 a, _M128 b)
Definition: types.h:70

ROOT::Vc::SSE::VectorHelper< float8 >::fma
static void fma(VectorType &a, VectorTypeArg b, VectorTypeArg c)
Definition: vectorhelper.h:436

ROOT::Vc::SSE::VectorHelper< float8 >::max
static Vc_ALWAYS_INLINE Vc_CONST EntryType max(VectorTypeArg a)
Definition: vectorhelper.h:426

ROOT::Vc::SSE::VectorHelper< float8 >::zero
static Vc_ALWAYS_INLINE Vc_CONST VectorType zero()
Definition: vectorhelper.h:388

Vc_INTRINSIC
#define Vc_INTRINSIC
Definition: macros.h:139

ROOT::Vc::SSE::VectorHelper< unsigned short >::shiftRight
static Vc_ALWAYS_INLINE Vc_CONST VectorType shiftRight(VectorType a, int shift)
Definition: vectorhelper.h:735

ROOT::Vc::SSE::VectorHelper< int >::VectorType
_M128I VectorType
Definition: vectorhelper.h:446

ROOT::Math::Cephes::A
static double A[]
Definition: SpecFuncCephes.cxx:170

ROOT::Vc::SSE::VectorHelper< float8 >::set
static Vc_ALWAYS_INLINE Vc_CONST VectorType set(const float a)
Definition: vectorhelper.h:376

ROOT::Vc::SSE::VectorHelper< float >::EntryType
float EntryType
Definition: vectorhelper.h:286

ROOT::Vc::SSE::VectorHelper< _M128I >::VectorType
_M128I VectorType
Definition: vectorhelper.h:151

f
TFile * f
Definition: memstatExample.C:52

ROOT::Vc::SSE::VectorHelper< int >::cmpnlt
static Vc_ALWAYS_INLINE Vc_CONST VectorType cmpnlt(const VectorType a, const VectorType b)
Definition: vectorhelper.h:510

ROOT::Vc::SSE::VectorHelper< double >::add
static Vc_ALWAYS_INLINE Vc_CONST EntryType add(VectorType a)
Definition: vectorhelper.h:270

ROOT::Vc::SSE::VectorHelper< signed short >::mul
static Vc_ALWAYS_INLINE Vc_CONST EntryType mul(VectorType a)
Definition: vectorhelper.h:663

ROOT::Vc::SSE::VectorHelper< unsigned int >::fma
static Vc_ALWAYS_INLINE void fma(VectorType &v1, VectorType v2, VectorType v3)
Definition: vectorhelper.h:556

ROOT::Vc::SSE::_mm_cmplt_epu16
static Vc_INTRINSIC __m128i Vc_CONST _mm_cmplt_epu16(__m128i a, __m128i b)
Definition: intrinsics.h:109

T
TTree * T
Definition: memstatExample.C:53

ROOT::Vc::SSE::VectorHelper< signed short >::expand0
static Vc_ALWAYS_INLINE Vc_CONST _M128I expand0(_M128I x)
Definition: vectorhelper.h:623

ROOT::Vc::SSE::mm_max_epu32
static Vc_INTRINSIC __m128i Vc_CONST mm_max_epu32(__m128i a, __m128i b)
Definition: intrinsics.h:432

ROOT::Vc::SSE::VectorHelper< float8 >::mul
static Vc_ALWAYS_INLINE Vc_CONST EntryType mul(VectorTypeArg a)
Definition: vectorhelper.h:429

ROOT::Vc::SSE::M256
Definition: types.h:65

ROOT::Vc::SSE::VectorHelper< float >::OP
OP(add) OP(sub) OP(mul) OPcmp(eq) OPcmp(neq) OPcmp(lt) OPcmp(nlt) OPcmp(le) OPcmp(nle) OP1(sqrt) OP1(rsqrt) static Vc_ALWAYS_INLINE Vc_CONST VectorType isNaN(VectorType x)
Definition: vectorhelper.h:316

ROOT::Vc::SSE::VectorHelper< float >::zero
static Vc_ALWAYS_INLINE Vc_CONST VectorType zero()
Definition: vectorhelper.h:294

OP_CAST_
#define OP_CAST_(op)
Definition: vectorhelper.h:188

ROOT::Vc::SSE::abs
static Vc_ALWAYS_INLINE Vc_PURE Vector< T > abs(const Vector< T > &x)
Definition: vector.h:524

ROOT::Vc::SSE::VectorHelper< int >::min
static Vc_ALWAYS_INLINE Vc_CONST EntryType min(VectorType a)
Definition: vectorhelper.h:471

ROOT::Vc::SSE::SortHelper
Definition: vectorhelper.h:57

ROOT::Vc::SSE::VectorHelper< double >::max
static Vc_ALWAYS_INLINE Vc_CONST EntryType max(VectorType a)
Definition: vectorhelper.h:262

ROOT::Vc::SSE::VectorHelper< unsigned int >::max
static Vc_INTRINSIC Vc_CONST VectorType max(const VectorType a, const VectorType b)
Definition: vectorhelper.h:530

ROOT::Vc::SSE::VectorHelper< int >::fma
static Vc_ALWAYS_INLINE void fma(VectorType &v1, VectorType v2, VectorType v3)
Definition: vectorhelper.h:459

ROOT::Vc::SSE::_mm_setallone_si128
static Vc_INTRINSIC __m128i Vc_CONST _mm_setallone_si128()
Definition: intrinsics.h:82

ROOT::Vc::SSE::VectorHelper< float >::add
static Vc_ALWAYS_INLINE Vc_CONST EntryType add(VectorType a)
Definition: vectorhelper.h:351

tornado.d
int d
Definition: tornado.py:11

ROOT::Vc::SSE::VectorHelper< signed short >::shiftRight
static Vc_ALWAYS_INLINE Vc_CONST VectorType shiftRight(VectorType a, int shift)
Definition: vectorhelper.h:633

l1
TLine l1(2.5, 4.5, 15.5, 4.5)

ROOT::Vc::SSE::VectorHelper< unsigned short >::shiftLeft
static Vc_ALWAYS_INLINE Vc_CONST VectorType shiftLeft(VectorType a, int shift)
Definition: vectorhelper.h:732

CAT
#define CAT(a, b)
Definition: macros.h:281

double
double
Definition: RooCFunction2Binding.cxx:40

ROOT::Vc::SSE::VectorHelper< unsigned short >::expand1
static Vc_ALWAYS_INLINE Vc_CONST _M128I expand1(_M128I x)
Definition: vectorhelper.h:702

ROOT::Vc::AVX::concat
Vc_INTRINSIC Vc_CONST m256 concat(param128 a, param128 b)
Definition: casts.h:123

ROOT::Vc::sfloat
Definition: types.h:33

ROOT::Vc::SSE::VectorHelper< unsigned short >::max
static Vc_INTRINSIC Vc_CONST VectorType max(const VectorType a, const VectorType b)
Definition: vectorhelper.h:728

OP_
#define OP_(op)
Definition: vectorhelper.h:182

h2
TH2D * h2
Definition: fit2dHist.C:45

ROOT::Vc::SSE::mm_abs_epi32
static Vc_INTRINSIC __m128i Vc_CONST mm_abs_epi32(__m128i a)
Definition: intrinsics.h:179

ROOT::Vc::SSE::VectorHelper< float >::abs
static Vc_ALWAYS_INLINE Vc_CONST VectorType abs(const VectorType a)
Definition: vectorhelper.h:331

ROOT::Vc::SSE::VectorHelper< int >::notMaskedToZero
static Vc_ALWAYS_INLINE Vc_CONST VectorType notMaskedToZero(VectorType a, _M128 mask)
Definition: vectorhelper.h:451

ROOT::Vc::SSE::mm_blendv_epi8
static Vc_INTRINSIC __m128i mm_blendv_epi8(__m128i a, __m128i b, __m128i c)
Definition: intrinsics.h:282

ROOT::Vc::SSE::VectorHelper< float >::mask
_M128 mask
Definition: vectorhelper.h:291

ROOT::Vc::SSE::VectorHelper< unsigned int >::cmplt
static Vc_ALWAYS_INLINE Vc_CONST VectorType cmplt(const VectorType a, const VectorType b)
Definition: vectorhelper.h:596

h1
TH1F * h1
Definition: legend1.C:5

SUFFIX
#define SUFFIX
Definition: vectorhelper.h:731

ROOT::Vc::SSE::VectorHelper< float8 >::VectorTypeArg
const VectorType VectorTypeArg
Definition: vectorhelper.h:373

ROOT::Vc::SSE::VectorHelper< int >::set
static Vc_ALWAYS_INLINE Vc_CONST VectorType set(const int a)
Definition: vectorhelper.h:456

ROOT::Vc::SSE::VectorHelper< unsigned short >::set
static Vc_ALWAYS_INLINE Vc_CONST VectorType set(const EntryType a)
Definition: vectorhelper.h:773

ROOT::Vc::SSE::round
static Vc_ALWAYS_INLINE Vc_PURE Vector< T > round(const Vector< T > &x)
Definition: vector.h:526

ROOT::Vc::SSE::VectorHelper< unsigned int >::EntryType
unsigned int EntryType
Definition: vectorhelper.h:518

ROOT::Vc::SSE::VectorHelper< double >::one
static Vc_ALWAYS_INLINE Vc_CONST VectorType one()
Definition: vectorhelper.h:207

ROOT::Vc::SSE::_mm_cmpgt_epu16
static Vc_INTRINSIC __m128i Vc_CONST _mm_cmpgt_epu16(__m128i a, __m128i b)
Definition: intrinsics.h:111

ROOT::Vc::SSE::VectorHelper< float >::isFinite
static Vc_ALWAYS_INLINE Vc_CONST VectorType isFinite(VectorType x)
Definition: vectorhelper.h:325

ROOT::Vc::SSE::VectorHelper< M256 >::VectorTypeArg
const VectorType VectorTypeArg
Definition: vectorhelper.h:75

ROOT::Vc::SSE::VectorHelper< unsigned int >::mul
static Vc_ALWAYS_INLINE Vc_CONST EntryType mul(VectorType a)
Definition: vectorhelper.h:543

ROOT::Vc::SSE::mm_abs_epi16
static Vc_INTRINSIC __m128i Vc_CONST mm_abs_epi16(__m128i a)
Definition: intrinsics.h:175

ROOT::Vc::SSE::VectorHelper< float8 >::add
static Vc_ALWAYS_INLINE Vc_CONST EntryType add(VectorTypeArg a)
Definition: vectorhelper.h:432

ROOT::Vc::SSE::VectorHelper< unsigned short >::mul
static Vc_ALWAYS_INLINE Vc_CONST EntryType mul(VectorType a)
Definition: vectorhelper.h:759

ROOT::Vc::SSE::VectorHelper< double >::set
static Vc_ALWAYS_INLINE Vc_CONST VectorType set(const double a, const double b)
Definition: vectorhelper.h:205

ROOT::Vc::SSE::VectorHelper< _M128 >::VectorType
_M128 VectorType
Definition: vectorhelper.h:106

ROOT::Vc::SSE::VectorHelper< int >::add
static Vc_ALWAYS_INLINE Vc_CONST EntryType add(VectorType a)
Definition: vectorhelper.h:483

VC_ALIGNED_PARAMETER
#define VC_ALIGNED_PARAMETER(_Type)
Definition: macros.h:368

ROOT::Vc::StreamingAndAlignedFlag
StreamingAndAlignedFlag
Definition: global.h:314

OPcmp
#define OPcmp(op)
Definition: vectorhelper.h:186

ROOT::Vc::SSE::VectorHelper< unsigned int >::round
static Vc_ALWAYS_INLINE Vc_CONST VectorType round(VectorType a)
Definition: vectorhelper.h:611

v
SVector< double, 2 > v
Definition: Dict.h:5

ROOT::Vc::SSE::VectorHelper< unsigned short >::min
static Vc_INTRINSIC Vc_CONST VectorType min(const VectorType a, const VectorType b)
Definition: vectorhelper.h:727

ROOT::Vc::SSE::VectorHelper< double >::isFinite
static Vc_ALWAYS_INLINE Vc_CONST VectorType isFinite(VectorType x)
Definition: vectorhelper.h:250

ROOT::Vc::SSE::VectorHelper< unsigned short >::cmple
static Vc_ALWAYS_INLINE Vc_CONST VectorType cmple(const VectorType a, const VectorType b)
Definition: vectorhelper.h:796

ROOT::Vc::SSE::VectorHelper< unsigned int >::set
static Vc_ALWAYS_INLINE Vc_CONST VectorType set(const unsigned int a, const unsigned int b, const unsigned int c, const unsigned int d)
Definition: vectorhelper.h:589

ROOT::Vc::SSE::VectorHelper< float >::reciprocal
static Vc_ALWAYS_INLINE Vc_CONST VectorType reciprocal(VectorType x)
Definition: vectorhelper.h:328

ROOT::Vc::SSE::VectorHelper< int >::max
static Vc_ALWAYS_INLINE Vc_CONST EntryType max(VectorType a)
Definition: vectorhelper.h:477

Vc_CONST
#define Vc_CONST
Definition: macros.h:133

ROOT::Vc::SSE::mm_blendv_pd
static Vc_INTRINSIC __m128d mm_blendv_pd(__m128d a, __m128d b, __m128d c)
Definition: intrinsics.h:276

OP1
#define OP1(name, code)
Definition: vectorhelper.h:178

_M128
#define _M128
Definition: macros.h:27

ROOT::Vc::SSE::_mm_cmplt_epu32
static Vc_INTRINSIC __m128i Vc_CONST _mm_cmplt_epu32(__m128i a, __m128i b)
Definition: intrinsics.h:113

ROOT::Vc::SSE::rsqrt
static Vc_ALWAYS_INLINE Vc_PURE Vector< T > rsqrt(const Vector< T > &x)
Definition: vector.h:523

ROOT::Vc::SSE::VectorHelper< double >::min
static MINMAX Vc_ALWAYS_INLINE Vc_CONST EntryType min(VectorType a)
Definition: vectorhelper.h:258

ROOT::Vc::SSE::VectorHelper< signed short >::one
static Vc_ALWAYS_INLINE Vc_CONST VectorType one()
Definition: vectorhelper.h:628

ROOT::Vc::SSE::VectorHelper< unsigned short >::max
static Vc_ALWAYS_INLINE Vc_CONST EntryType max(VectorType a)
Definition: vectorhelper.h:752

ROOT::Vc::SSE::VectorHelper< float >::round
static Vc_ALWAYS_INLINE Vc_CONST VectorType round(VectorType a)
Definition: vectorhelper.h:357

ROOT::Vc::SSE::VectorHelper< float8 >::set
static Vc_ALWAYS_INLINE Vc_CONST VectorType set(const float a, const float b, const float c, const float d, const float e, const float f, const float g, const float h)
Definition: vectorhelper.h:384

ROOT::Vc::SSE::VectorHelper< unsigned int >::cmpnle
static Vc_ALWAYS_INLINE Vc_CONST VectorType cmpnle(const VectorType a, const VectorType b)
Definition: vectorhelper.h:608

ROOT::Vc::SSE::reciprocal
static Vc_ALWAYS_INLINE Vc_PURE Vector< T > reciprocal(const Vector< T > &x)
Definition: vector.h:525

ROOT::Vc::SSE::VectorHelper< double >::VectorType
_M128D VectorType
Definition: vectorhelper.h:198

ROOT::Vc::SSE::VectorHelper< unsigned short >::cmpgt
static Vc_ALWAYS_INLINE Vc_CONST VectorType cmpgt(const VectorType a, const VectorType b)
Definition: vectorhelper.h:788

ROOT::Vc::SSE::max
static Vc_ALWAYS_INLINE Vc_PURE int_v max(const int_v &x, const int_v &y)
Definition: vector.h:508

Vc_ALWAYS_INLINE
#define Vc_ALWAYS_INLINE
Definition: macros.h:130

ROOT::Vc::SSE::VectorHelper< unsigned int >::set
static Vc_ALWAYS_INLINE Vc_CONST VectorType set(const unsigned int a)
Definition: vectorhelper.h:588

ROOT::Vc::SSE::VectorHelper< unsigned int >::min
static Vc_ALWAYS_INLINE Vc_CONST EntryType min(VectorType a)
Definition: vectorhelper.h:531

ROOT::Vc::SSE::VectorHelper< signed short >::cmple
static Vc_ALWAYS_INLINE Vc_CONST VectorType cmple(const VectorType a, const VectorType b)
Definition: vectorhelper.h:682

ROOT::Vc::SSE::mm_min_epi32
static Vc_INTRINSIC __m128i Vc_CONST mm_min_epi32(__m128i a, __m128i b)
Definition: intrinsics.h:447

ROOT::Vc::SSE::VectorHelper< int >::mul
static Vc_CONST VectorType mul(const VectorType a, const VectorType b)
Definition: vectorhelper.h:496

macros.h

ROOT::Vc::SSE::VectorHelper< int >::cmple
static Vc_ALWAYS_INLINE Vc_CONST VectorType cmple(const VectorType a, const VectorType b)
Definition: vectorhelper.h:511

ROOT::Vc::SSE::VectorHelper< float8 >::set
static Vc_ALWAYS_INLINE Vc_CONST VectorType set(const float a, const float b, const float c, const float d)
Definition: vectorhelper.h:380

ROOT::Vc::SSE::VectorHelper< M256 >::VectorType
M256 VectorType
Definition: vectorhelper.h:71

OP2
#define OP2(name, code)
Definition: vectorhelper.h:101

Vc_CONST_R
#define Vc_CONST_R
Definition: macros.h:135

ROOT::Vc::SSE::VectorHelper< unsigned int >::add
static Vc_ALWAYS_INLINE Vc_CONST EntryType add(VectorType a)
Definition: vectorhelper.h:549

Vc_ALWAYS_INLINE_L
#define Vc_ALWAYS_INLINE_L
Definition: macros.h:131

ROOT::Vc::SSE::VectorHelper< int >::set
static Vc_ALWAYS_INLINE Vc_CONST VectorType set(const int a, const int b, const int c, const int d)
Definition: vectorhelper.h:457

ROOT::Vc::SSE::VectorHelper< signed short >::round
static Vc_ALWAYS_INLINE Vc_CONST VectorType round(VectorType a)
Definition: vectorhelper.h:685

ROOT::Vc::SSE::VectorHelper< int >::abs
static Vc_INTRINSIC Vc_CONST VectorType abs(const VectorType a)
Definition: vectorhelper.h:467

ROOT::Vc::SSE::VectorHelper< double >::abs
static Vc_ALWAYS_INLINE Vc_CONST VectorType abs(const VectorType a)
Definition: vectorhelper.h:253

types.h

ROOT::Vc::SSE::mm_min_epu32
static Vc_INTRINSIC __m128i Vc_CONST mm_min_epu32(__m128i a, __m128i b)
Definition: intrinsics.h:441

OP3
#define OP3(name, code)
Definition: vectorhelper.h:102

ROOT::Vc::SSE::VectorHelper< double >::EntryType
double EntryType
Definition: vectorhelper.h:199

ROOT::Vc::SSE::_mm_setabsmask_pd
static Vc_INTRINSIC __m128d Vc_CONST _mm_setabsmask_pd()
Definition: intrinsics.h:96

ROOT::Vc::SSE::VectorHelper< signed short >::cmpnlt
static Vc_ALWAYS_INLINE Vc_CONST VectorType cmpnlt(const VectorType a, const VectorType b)
Definition: vectorhelper.h:681

ROOT::Vc::SSE::VectorHelper< unsigned int >::min
static Vc_INTRINSIC Vc_CONST VectorType min(const VectorType a, const VectorType b)
Definition: vectorhelper.h:529

ROOT::Vc::SSE::VectorHelper< signed short >::max
static Vc_ALWAYS_INLINE Vc_CONST EntryType max(VectorType a)
Definition: vectorhelper.h:656

SSE
#define SSE
Definition: global.h:84

ROOT::Vc::AVX::max
static Vc_ALWAYS_INLINE int_v max(const int_v &x, const int_v &y)
Definition: vector.h:440

ROOT::Vc::SSE::VectorHelper< float >::max
static Vc_ALWAYS_INLINE Vc_CONST EntryType max(VectorType a)
Definition: vectorhelper.h:341

ROOT::Vc::SSE::VectorHelper< double >::OP
OP(add) OP(sub) OP(mul) OPcmp(eq) OPcmp(neq) OPcmp(lt) OPcmp(nlt) OPcmp(le) OPcmp(nle) static Vc_ALWAYS_INLINE Vc_CONST VectorType rsqrt(VectorType x)
Definition: vectorhelper.h:235

ROOT::Vc::SSE::VectorHelper< double >::a
_M128 mask return a
Definition: vectorhelper.h:203

ROOT::Vc::SSE::VectorHelper< double >::set
static Vc_ALWAYS_INLINE Vc_CONST VectorType set(const double a)
Definition: vectorhelper.h:204

Vc_PURE_R
#define Vc_PURE_R
Definition: macros.h:138

ROOT::Vc::SSE::VectorHelper< unsigned int >::VectorType
_M128I VectorType
Definition: vectorhelper.h:519

ROOT::Vc::SSE::mm_blendv_ps
static Vc_INTRINSIC __m128 mm_blendv_ps(__m128 a, __m128 b, __m128 c)
Definition: intrinsics.h:279

ROOT::Vc::SSE::VectorHelper< int >::EntryType
int EntryType
Definition: vectorhelper.h:445

ROOT::Vc::SSE::VectorHelper< signed short >::OPx
OPx(mul, mullo) OP(min) OP(max) static Vc_ALWAYS_INLINE Vc_CONST EntryType min(VectorType a)
Definition: vectorhelper.h:647

ROOT::Vc::SSE::VectorHelper< unsigned short >::cmpnlt
static Vc_ALWAYS_INLINE Vc_CONST VectorType cmpnlt(const VectorType a, const VectorType b)
Definition: vectorhelper.h:795

ROOT::Vc::SSE::VectorHelper< int >::max
static Vc_INTRINSIC Vc_CONST VectorType max(const VectorType a, const VectorType b)
Definition: vectorhelper.h:470

ROOT::Vc::SSE::VectorHelper< unsigned int >::cmple
static Vc_ALWAYS_INLINE Vc_CONST VectorType cmple(const VectorType a, const VectorType b)
Definition: vectorhelper.h:607

ROOT::Vc::SSE::VectorHelper< int >::OP_
OP_(or_) OP_(and_) OP_(xor_) static Vc_ALWAYS_INLINE Vc_CONST VectorType zero()
Definition: vectorhelper.h:449

ROOT::Vc::SSE::VectorHelper< double >::isNaN
static Vc_ALWAYS_INLINE Vc_CONST VectorType isNaN(VectorType x)
Definition: vectorhelper.h:247

ROOT::Vc::SSE::VectorHelper
Definition: types.h:105

ROOT::Vc::SSE::VectorHelper< int >::shiftRight
static Vc_ALWAYS_INLINE Vc_CONST VectorType shiftRight(VectorType a, int shift)
Definition: vectorhelper.h:464

ROOT::Vc::SSE::VectorHelper< unsigned int >::cmpnlt
static Vc_ALWAYS_INLINE Vc_CONST VectorType cmpnlt(const VectorType a, const VectorType b)
Definition: vectorhelper.h:606

ROOT::Vc::SSE::mm_max_epi32
static Vc_INTRINSIC __m128i Vc_CONST mm_max_epi32(__m128i a, __m128i b)
Definition: intrinsics.h:423

ROOT::Vc::AlignedFlag
AlignedFlag
Definition: global.h:308

ROOT::Vc::SSE::VectorHelper< int >::round
static Vc_ALWAYS_INLINE Vc_CONST VectorType round(VectorType a)
Definition: vectorhelper.h:514

ROOT::Vc::SSE::_mm_cmpgt_epu32
static Vc_INTRINSIC __m128i Vc_CONST _mm_cmpgt_epu32(__m128i a, __m128i b)
Definition: intrinsics.h:115

ROOT::Vc::SSE::VectorHelper< unsigned short >::fma
static Vc_ALWAYS_INLINE void fma(VectorType &v1, VectorType v2, VectorType v3)
Definition: vectorhelper.h:739

ROOT::Vc::SSE::VectorHelper< signed short >::notMaskedToZero
static Vc_ALWAYS_INLINE Vc_CONST VectorType notMaskedToZero(VectorType a, _M128 mask)
Definition: vectorhelper.h:621

ROOT::Vc::SSE::VectorHelper< int >::cmpnle
static Vc_ALWAYS_INLINE Vc_CONST VectorType cmpnle(const VectorType a, const VectorType b)
Definition: vectorhelper.h:512

ROOT::Vc::SSE::VectorHelper< signed short >::shiftLeft
static Vc_ALWAYS_INLINE Vc_CONST VectorType shiftLeft(VectorType a, int shift)
Definition: vectorhelper.h:630

ROOT::Vc::SSE::VectorHelper< _M128D >::VectorType
_M128D VectorType
Definition: vectorhelper.h:129

ROOT::Vc::UnalignedFlag
UnalignedFlag
Definition: global.h:311

ROOT::Vc::SSE::VectorHelper< double >::zero
static Vc_ALWAYS_INLINE Vc_CONST VectorType zero()
Definition: vectorhelper.h:206

ROOT::Vc::SSE::VectorHelper< signed short >::add
static Vc_ALWAYS_INLINE Vc_CONST EntryType add(VectorType a)
Definition: vectorhelper.h:669

ROOT::Vc::SSE::VectorHelper< float >::VectorType
_M128 VectorType
Definition: vectorhelper.h:287

ROOT::Vc::SSE::VectorHelper< signed short >::VectorType
_M128I VectorType
Definition: vectorhelper.h:615

ROOT::Vc::SSE::VectorHelper< signed short >::fma
static Vc_ALWAYS_INLINE void fma(VectorType &v1, VectorType v2, VectorType v3)
Definition: vectorhelper.h:642

ROOT::Vc::SSE::VectorHelper< float8 >::one
static Vc_ALWAYS_INLINE Vc_CONST VectorType one()
Definition: vectorhelper.h:389

ROOT::Vc::SSE::VectorHelper< unsigned short >::notMaskedToZero
static Vc_ALWAYS_INLINE Vc_CONST VectorType notMaskedToZero(VectorType a, _M128 mask)
Definition: vectorhelper.h:694

ROOT::Vc::SSE::VectorHelper< unsigned short >::round
static Vc_ALWAYS_INLINE Vc_CONST VectorType round(VectorType a)
Definition: vectorhelper.h:799

ROOT::Vc::SSE::VectorHelper< signed short >::cmpnle
static Vc_ALWAYS_INLINE Vc_CONST VectorType cmpnle(const VectorType a, const VectorType b)
Definition: vectorhelper.h:683

ROOT::Vc::SSE::VectorHelper< float >::set
static Vc_ALWAYS_INLINE Vc_CONST VectorType set(const float a)
Definition: vectorhelper.h:292

ROOT::Vc::SSE::VectorHelper< unsigned int >::shiftLeft
static Vc_ALWAYS_INLINE Vc_CONST VectorType shiftLeft(VectorType a, int shift)
Definition: vectorhelper.h:582

ROOT::Vc::SSE::VectorHelper< unsigned short >::one
static Vc_ALWAYS_INLINE Vc_CONST VectorType one()
Definition: vectorhelper.h:706

ROOT::Vc::SSE::_mm_setabsmask_ps
static Vc_INTRINSIC __m128 Vc_CONST _mm_setabsmask_ps()
Definition: intrinsics.h:97

ROOT::Vc::SSE::VectorHelper< float >::concat
static Vc_ALWAYS_INLINE Vc_CONST _M128 concat(_M128D a, _M128D b)
Definition: vectorhelper.h:296

REUSE_FLOAT_IMPL1
#define REUSE_FLOAT_IMPL1(fun)
Definition: vectorhelper.h:391

ROOT::Vc::SSE::VectorHelper< int >::one
static Vc_ALWAYS_INLINE Vc_CONST VectorType one()
Definition: vectorhelper.h:454

fma
void fma()
Definition: arithmetics.cpp:409

ROOT::Vc::SSE::VectorHelper< float8 >::VectorType
M256 VectorType
Definition: vectorhelper.h:369

REUSE_FLOAT_IMPL2
#define REUSE_FLOAT_IMPL2(fun)
Definition: vectorhelper.h:395

ROOT::Vc::SSE::VectorHelper< unsigned int >::one
static Vc_ALWAYS_INLINE Vc_CONST VectorType one()
Definition: vectorhelper.h:527