ROOT  6.06/09
Reference Guide
mask.h
Go to the documentation of this file.
1 /* This file is part of the Vc library.
2 
3  Copyright (C) 2009-2012 Matthias Kretz <kretz@kde.org>
4 
5  Vc is free software: you can redistribute it and/or modify
6  it under the terms of the GNU Lesser General Public License as
7  published by the Free Software Foundation, either version 3 of
8  the License, or (at your option) any later version.
9 
10  Vc is distributed in the hope that it will be useful, but
11  WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  GNU Lesser General Public License for more details.
14 
15  You should have received a copy of the GNU Lesser General Public
16  License along with Vc. If not, see <http://www.gnu.org/licenses/>.
17 
18 */
19 
20 #ifndef SSE_MASK_H
21 #define SSE_MASK_H
22 
23 #include "intrinsics.h"
24 #include "macros.h"
25 
26 namespace ROOT {
27 namespace Vc
28 {
29 namespace SSE
30 {
31 
32 template<unsigned int Size1> struct MaskHelper;
33 template<> struct MaskHelper<2> {
34  static Vc_ALWAYS_INLINE Vc_CONST bool cmpeq (_M128 k1, _M128 k2) { return _mm_movemask_pd(_mm_castps_pd(k1)) == _mm_movemask_pd(_mm_castps_pd(k2)); }
35  static Vc_ALWAYS_INLINE Vc_CONST bool cmpneq(_M128 k1, _M128 k2) { return _mm_movemask_pd(_mm_castps_pd(k1)) != _mm_movemask_pd(_mm_castps_pd(k2)); }
36 };
37 template<> struct MaskHelper<4> {
38  static Vc_ALWAYS_INLINE Vc_CONST bool cmpeq (_M128 k1, _M128 k2) { return _mm_movemask_ps(k1) == _mm_movemask_ps(k2); }
39  static Vc_ALWAYS_INLINE Vc_CONST bool cmpneq(_M128 k1, _M128 k2) { return _mm_movemask_ps(k1) != _mm_movemask_ps(k2); }
40 };
41 template<> struct MaskHelper<8> {
42  static Vc_ALWAYS_INLINE Vc_CONST bool cmpeq (_M128 k1, _M128 k2) { return _mm_movemask_epi8(_mm_castps_si128(k1)) == _mm_movemask_epi8(_mm_castps_si128(k2)); }
43  static Vc_ALWAYS_INLINE Vc_CONST bool cmpneq(_M128 k1, _M128 k2) { return _mm_movemask_epi8(_mm_castps_si128(k1)) != _mm_movemask_epi8(_mm_castps_si128(k2)); }
44 };
45 
46 class Float8Mask;
47 template<unsigned int VectorSize> class Mask
48 {
49  friend class Mask<2u>;
50  friend class Mask<4u>;
51  friend class Mask<8u>;
52  friend class Mask<16u>;
53  friend class Float8Mask;
54  public:
56 
57  // abstracts the way Masks are passed to functions, it can easily be changed to const ref here
58  // Also Float8Mask requires const ref on MSVC 32bit.
59 #if defined VC_MSVC && defined _WIN32
60  typedef const Mask<VectorSize> &Argument;
61 #else
63 #endif
64 
66  Vc_ALWAYS_INLINE Mask(const __m128 &x) : k(x) {}
67  Vc_ALWAYS_INLINE Mask(const __m128d &x) : k(_mm_castpd_ps(x)) {}
68  Vc_ALWAYS_INLINE Mask(const __m128i &x) : k(_mm_castsi128_ps(x)) {}
71  Vc_ALWAYS_INLINE explicit Mask(bool b) : k(b ? _mm_setallone_ps() : _mm_setzero_ps()) {}
72  Vc_ALWAYS_INLINE Mask(const Mask &rhs) : k(rhs.k) {}
74  : k(_mm_castsi128_ps(_mm_packs_epi16(a[0].dataI(), a[1].dataI()))) {}
75  Vc_ALWAYS_INLINE explicit Mask(const Float8Mask &m);
76 
77  template<unsigned int OtherSize> Vc_ALWAYS_INLINE_L explicit Mask(const Mask<OtherSize> &x) Vc_ALWAYS_INLINE_R;
78 //X {
79 //X _M128I tmp = x.dataI();
80 //X if (OtherSize < VectorSize) {
81 //X tmp = _mm_packs_epi16(tmp, _mm_setzero_si128());
82 //X if (VectorSize / OtherSize >= 4u) { tmp = _mm_packs_epi16(tmp, _mm_setzero_si128()); }
83 //X if (VectorSize / OtherSize >= 8u) { tmp = _mm_packs_epi16(tmp, _mm_setzero_si128()); }
84 //X } else if (OtherSize > VectorSize) {
85 //X tmp = _mm_unpacklo_epi8(tmp, tmp);
86 //X if (OtherSize / VectorSize >= 4u) { tmp = _mm_unpacklo_epi8(tmp, tmp); }
87 //X if (OtherSize / VectorSize >= 8u) { tmp = _mm_unpacklo_epi8(tmp, tmp); }
88 //X }
89 //X k = _mm_castsi128_ps(tmp);
90 //X }
91 
92  inline void expand(Mask<VectorSize / 2> *x) const;
93 
94  Vc_ALWAYS_INLINE Vc_PURE bool operator==(const Mask &rhs) const { return MaskHelper<VectorSize>::cmpeq (k, rhs.k); }
95  Vc_ALWAYS_INLINE Vc_PURE bool operator!=(const Mask &rhs) const { return MaskHelper<VectorSize>::cmpneq(k, rhs.k); }
96 
97  Vc_ALWAYS_INLINE Vc_PURE Mask operator!() const { return _mm_andnot_si128(dataI(), _mm_setallone_si128()); }
98 
99  Vc_ALWAYS_INLINE Mask &operator&=(const Mask &rhs) { k = _mm_and_ps(k, rhs.k); return *this; }
100  Vc_ALWAYS_INLINE Mask &operator|=(const Mask &rhs) { k = _mm_or_ps (k, rhs.k); return *this; }
101  Vc_ALWAYS_INLINE Mask &operator^=(const Mask &rhs) { k = _mm_xor_ps(k, rhs.k); return *this; }
102 
103  Vc_ALWAYS_INLINE Vc_PURE bool isFull () const { return
104 #ifdef VC_USE_PTEST
105  _mm_testc_si128(dataI(), _mm_setallone_si128()); // return 1 if (0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff) == (~0 & k)
106 #else
107  _mm_movemask_epi8(dataI()) == 0xffff;
108 #endif
109  }
110  Vc_ALWAYS_INLINE Vc_PURE bool isEmpty() const { return
111 #ifdef VC_USE_PTEST
112  _mm_testz_si128(dataI(), dataI()); // return 1 if (0, 0, 0, 0) == (k & k)
113 #else
114  _mm_movemask_epi8(dataI()) == 0x0000;
115 #endif
116  }
118 #ifdef VC_USE_PTEST
119  return _mm_test_mix_ones_zeros(dataI(), _mm_setallone_si128());
120 #else
121  const int tmp = _mm_movemask_epi8(dataI());
122  return tmp != 0 && (tmp ^ 0xffff) != 0;
123 #endif
124  }
125 
126 #ifndef VC_NO_AUTOMATIC_BOOL_FROM_MASK
127  Vc_ALWAYS_INLINE Vc_PURE operator bool() const { return isFull(); }
128 #endif
129 
130  Vc_ALWAYS_INLINE_L Vc_PURE_L int shiftMask() const Vc_ALWAYS_INLINE_R Vc_PURE_R;
131 
132  Vc_ALWAYS_INLINE_L Vc_PURE_L int toInt() const Vc_ALWAYS_INLINE_R Vc_PURE_R;
133 
134  Vc_ALWAYS_INLINE Vc_PURE _M128 data () const { return k; }
135  Vc_ALWAYS_INLINE Vc_PURE _M128I dataI() const { return _mm_castps_si128(k); }
136  Vc_ALWAYS_INLINE Vc_PURE _M128D dataD() const { return _mm_castps_pd(k); }
137 
138  template<unsigned int OtherSize> Vc_ALWAYS_INLINE Vc_PURE Mask<OtherSize> cast() const { return Mask<OtherSize>(k); }
139 
140  Vc_ALWAYS_INLINE_L Vc_PURE_L bool operator[](int index) const Vc_ALWAYS_INLINE_R Vc_PURE_R;
141 
142  Vc_ALWAYS_INLINE_L Vc_PURE_L int count() const Vc_ALWAYS_INLINE_R Vc_PURE_R;
143 
144  /**
145  * Returns the index of the first one in the mask.
146  *
147  * The return value is undefined if the mask is empty.
148  */
149  Vc_ALWAYS_INLINE_L Vc_PURE_L int firstOne() const Vc_ALWAYS_INLINE_R Vc_PURE_R;
150 
151  private:
152 #ifdef VC_COMPILE_BENCHMARKS
153  public:
154 #endif
156 };
157 
159 {
161  bool brk;
163  Vc_ALWAYS_INLINE ForeachHelper(_long _mask) : mask(_mask), brk(false), outerBreak(false) {}
164  Vc_ALWAYS_INLINE bool outer() const { return (mask != 0) && !outerBreak; }
165  Vc_ALWAYS_INLINE bool inner() { return (brk = !brk); }
166  Vc_ALWAYS_INLINE void noBreak() { outerBreak = false; }
168  outerBreak = true;
169 #ifdef VC_GNU_ASM
170  const _long bit = __builtin_ctzl(mask);
171  __asm__("btr %1,%0" : "+r"(mask) : "r"(bit));
172 #elif defined(_WIN64)
173  unsigned long bit;
174  _BitScanForward64(&bit, mask);
175  _bittestandreset64(&mask, bit);
176 #elif defined(_WIN32)
177  unsigned long bit;
178  _BitScanForward(&bit, mask);
179  _bittestandreset(&mask, bit);
180 #else
181 #error "Not implemented yet. Please contact vc-devel@compeng.uni-frankfurt.de"
182 #endif
183  return bit;
184  }
185 };
186 
187 #define Vc_foreach_bit(_it_, _mask_) \
188  for (Vc::SSE::ForeachHelper Vc__make_unique(foreach_bit_obj)((_mask_).toInt()); Vc__make_unique(foreach_bit_obj).outer(); ) \
189  for (_it_ = Vc__make_unique(foreach_bit_obj).next(); Vc__make_unique(foreach_bit_obj).inner(); Vc__make_unique(foreach_bit_obj).noBreak())
190 
191 template<unsigned int Size> Vc_ALWAYS_INLINE Vc_PURE int Mask<Size>::shiftMask() const
192 {
193  return _mm_movemask_epi8(dataI());
194 }
195 
196 template<> template<> Vc_ALWAYS_INLINE Mask<2>::Mask(const Mask<4> &x) {
197  k = _mm_unpacklo_ps(x.data(), x.data());
198 }
199 template<> template<> Vc_ALWAYS_INLINE Mask<2>::Mask(const Mask<8> &x) {
200  _M128I tmp = _mm_unpacklo_epi16(x.dataI(), x.dataI());
201  k = _mm_castsi128_ps(_mm_unpacklo_epi32(tmp, tmp));
202 }
203 template<> template<> Vc_ALWAYS_INLINE Mask<2>::Mask(const Mask<16> &x) {
204  _M128I tmp = _mm_unpacklo_epi8(x.dataI(), x.dataI());
205  tmp = _mm_unpacklo_epi16(tmp, tmp);
206  k = _mm_castsi128_ps(_mm_unpacklo_epi32(tmp, tmp));
207 }
208 template<> template<> Vc_ALWAYS_INLINE Mask<4>::Mask(const Mask<2> &x) {
209  k = _mm_castsi128_ps(_mm_packs_epi16(x.dataI(), _mm_setzero_si128()));
210 }
211 template<> template<> Vc_ALWAYS_INLINE Mask<4>::Mask(const Mask<8> &x) {
212  k = _mm_castsi128_ps(_mm_unpacklo_epi16(x.dataI(), x.dataI()));
213 }
214 template<> template<> Vc_ALWAYS_INLINE Mask<4>::Mask(const Mask<16> &x) {
215  _M128I tmp = _mm_unpacklo_epi8(x.dataI(), x.dataI());
216  k = _mm_castsi128_ps(_mm_unpacklo_epi16(tmp, tmp));
217 }
218 template<> template<> Vc_ALWAYS_INLINE Mask<8>::Mask(const Mask<2> &x) {
219  _M128I tmp = _mm_packs_epi16(x.dataI(), x.dataI());
220  k = _mm_castsi128_ps(_mm_packs_epi16(tmp, tmp));
221 }
222 template<> template<> Vc_ALWAYS_INLINE Mask<8>::Mask(const Mask<4> &x) {
223  k = _mm_castsi128_ps(_mm_packs_epi16(x.dataI(), x.dataI()));
224 }
225 template<> template<> Vc_ALWAYS_INLINE Mask<8>::Mask(const Mask<16> &x) {
226  k = _mm_castsi128_ps(_mm_unpacklo_epi8(x.dataI(), x.dataI()));
227 }
228 
229 template<> inline void Mask< 4>::expand(Mask<2> *x) const {
230  x[0].k = _mm_unpacklo_ps(data(), data());
231  x[1].k = _mm_unpackhi_ps(data(), data());
232 }
233 template<> inline void Mask< 8>::expand(Mask<4> *x) const {
234  x[0].k = _mm_castsi128_ps(_mm_unpacklo_epi16(dataI(), dataI()));
235  x[1].k = _mm_castsi128_ps(_mm_unpackhi_epi16(dataI(), dataI()));
236 }
237 template<> inline void Mask<16>::expand(Mask<8> *x) const {
238  x[0].k = _mm_castsi128_ps(_mm_unpacklo_epi8 (dataI(), dataI()));
239  x[1].k = _mm_castsi128_ps(_mm_unpackhi_epi8 (dataI(), dataI()));
240 }
241 
242 template<> Vc_ALWAYS_INLINE Vc_PURE int Mask< 2>::toInt() const { return _mm_movemask_pd(dataD()); }
243 template<> Vc_ALWAYS_INLINE Vc_PURE int Mask< 4>::toInt() const { return _mm_movemask_ps(data ()); }
244 template<> Vc_ALWAYS_INLINE Vc_PURE int Mask< 8>::toInt() const { return _mm_movemask_epi8(_mm_packs_epi16(dataI(), _mm_setzero_si128())); }
245 template<> Vc_ALWAYS_INLINE Vc_PURE int Mask<16>::toInt() const { return _mm_movemask_epi8(dataI()); }
246 
247 template<> Vc_ALWAYS_INLINE Vc_PURE bool Mask< 2>::operator[](int index) const { return toInt() & (1 << index); }
248 template<> Vc_ALWAYS_INLINE Vc_PURE bool Mask< 4>::operator[](int index) const { return toInt() & (1 << index); }
249 template<> Vc_ALWAYS_INLINE Vc_PURE bool Mask< 8>::operator[](int index) const { return shiftMask() & (1 << 2 * index); }
250 template<> Vc_ALWAYS_INLINE Vc_PURE bool Mask<16>::operator[](int index) const { return toInt() & (1 << index); }
251 
253 {
254  int mask = _mm_movemask_pd(dataD());
255  return (mask & 1) + (mask >> 1);
256 }
257 
259 {
260 #ifdef VC_IMPL_POPCNT
261  return _mm_popcnt_u32(_mm_movemask_ps(data()));
262 //X tmp = (tmp & 5) + ((tmp >> 1) & 5);
263 //X return (tmp & 3) + ((tmp >> 2) & 3);
264 #else
265  _M128I x = _mm_srli_epi32(dataI(), 31);
266  x = _mm_add_epi32(x, _mm_shuffle_epi32(x, _MM_SHUFFLE(0, 1, 2, 3)));
267  x = _mm_add_epi32(x, _mm_shufflelo_epi16(x, _MM_SHUFFLE(1, 0, 3, 2)));
268  return _mm_cvtsi128_si32(x);
269 #endif
270 }
271 
273 {
274 #ifdef VC_IMPL_POPCNT
275  return _mm_popcnt_u32(_mm_movemask_epi8(dataI())) / 2;
276 #else
277 //X int tmp = _mm_movemask_epi8(dataI());
278 //X tmp = (tmp & 0x1111) + ((tmp >> 2) & 0x1111);
279 //X tmp = (tmp & 0x0303) + ((tmp >> 4) & 0x0303);
280 //X return (tmp & 0x000f) + ((tmp >> 8) & 0x000f);
281  _M128I x = _mm_srli_epi16(dataI(), 15);
282  x = _mm_add_epi16(x, _mm_shuffle_epi32(x, _MM_SHUFFLE(0, 1, 2, 3)));
283  x = _mm_add_epi16(x, _mm_shufflelo_epi16(x, _MM_SHUFFLE(0, 1, 2, 3)));
284  x = _mm_add_epi16(x, _mm_shufflelo_epi16(x, _MM_SHUFFLE(2, 3, 0, 1)));
285  return _mm_extract_epi16(x, 0);
286 #endif
287 }
288 
290 {
291  int tmp = _mm_movemask_epi8(dataI());
292 #ifdef VC_IMPL_POPCNT
293  return _mm_popcnt_u32(tmp);
294 #else
295  tmp = (tmp & 0x5555) + ((tmp >> 1) & 0x5555);
296  tmp = (tmp & 0x3333) + ((tmp >> 2) & 0x3333);
297  tmp = (tmp & 0x0f0f) + ((tmp >> 4) & 0x0f0f);
298  return (tmp & 0x00ff) + ((tmp >> 8) & 0x00ff);
299 #endif
300 }
301 
302 
304 {
305  enum Constants {
308  };
309  public:
311 
312  // abstracts the way Masks are passed to functions, it can easily be changed to const ref here
313  // Also Float8Mask requires const ref on MSVC 32bit.
314 #if defined VC_MSVC && defined _WIN32
315  typedef const Float8Mask & Argument;
316 #else
318 #endif
319 
323  k[0] = _mm_setzero_ps();
324  k[1] = _mm_setzero_ps();
325  }
327  k[0] = _mm_setallone_ps();
328  k[1] = _mm_setallone_ps();
329  }
330  Vc_ALWAYS_INLINE explicit Float8Mask(bool b) {
331  const __m128 tmp = b ? _mm_setallone_ps() : _mm_setzero_ps();
332  k[0] = tmp;
333  k[1] = tmp;
334  }
336  k[0] = _mm_castsi128_ps(_mm_unpacklo_epi16(a.dataI(), a.dataI()));
337  k[1] = _mm_castsi128_ps(_mm_unpackhi_epi16(a.dataI(), a.dataI()));
338  }
339 
341  return MaskHelper<PartialSize>::cmpeq (k[0], rhs.k[0])
342  && MaskHelper<PartialSize>::cmpeq (k[1], rhs.k[1]);
343  }
345  return MaskHelper<PartialSize>::cmpneq(k[0], rhs.k[0])
346  || MaskHelper<PartialSize>::cmpneq(k[1], rhs.k[1]);
347  }
348 
350  Float8Mask r;
351  r.k[0] = _mm_and_ps(k[0], rhs.k[0]);
352  r.k[1] = _mm_and_ps(k[1], rhs.k[1]);
353  return r;
354  }
356  Float8Mask r;
357  r.k[0] = _mm_and_ps(k[0], rhs.k[0]);
358  r.k[1] = _mm_and_ps(k[1], rhs.k[1]);
359  return r;
360  }
362  Float8Mask r;
363  r.k[0] = _mm_or_ps(k[0], rhs.k[0]);
364  r.k[1] = _mm_or_ps(k[1], rhs.k[1]);
365  return r;
366  }
368  Float8Mask r;
369  r.k[0] = _mm_or_ps(k[0], rhs.k[0]);
370  r.k[1] = _mm_or_ps(k[1], rhs.k[1]);
371  return r;
372  }
374  Float8Mask r;
375  r.k[0] = _mm_xor_ps(k[0], rhs.k[0]);
376  r.k[1] = _mm_xor_ps(k[1], rhs.k[1]);
377  return r;
378  }
380  Float8Mask r;
381  r.k[0] = _mm_andnot_ps(k[0], _mm_setallone_ps());
382  r.k[1] = _mm_andnot_ps(k[1], _mm_setallone_ps());
383  return r;
384  }
386  k[0] = _mm_and_ps(k[0], rhs.k[0]);
387  k[1] = _mm_and_ps(k[1], rhs.k[1]);
388  return *this;
389  }
391  k[0] = _mm_or_ps (k[0], rhs.k[0]);
392  k[1] = _mm_or_ps (k[1], rhs.k[1]);
393  return *this;
394  }
396  k[0] = _mm_xor_ps(k[0], rhs.k[0]);
397  k[1] = _mm_xor_ps(k[1], rhs.k[1]);
398  return *this;
399  }
400 
402  const _M128 tmp = _mm_and_ps(k[0], k[1]);
403 #ifdef VC_USE_PTEST
404  return _mm_testc_si128(_mm_castps_si128(tmp), _mm_setallone_si128());
405 #else
406  return _mm_movemask_ps(tmp) == 0xf;
407  //_mm_movemask_ps(k[0]) == 0xf &&
408  //_mm_movemask_ps(k[1]) == 0xf;
409 #endif
410  }
412  const _M128 tmp = _mm_or_ps(k[0], k[1]);
413 #ifdef VC_USE_PTEST
414  return _mm_testz_si128(_mm_castps_si128(tmp), _mm_castps_si128(tmp));
415 #else
416  return _mm_movemask_ps(tmp) == 0x0;
417  //_mm_movemask_ps(k[0]) == 0x0 &&
418  //_mm_movemask_ps(k[1]) == 0x0;
419 #endif
420  }
422  // consider [1111 0000]
423  // solution:
424  // if k[0] != k[1] => return true
425  // if k[0] == k[1] => return k[0].isMix
426 #ifdef VC_USE_PTEST
427  __m128i tmp = _mm_castps_si128(_mm_xor_ps(k[0], k[1]));
428  // tmp == 0 <=> k[0] == k[1]
429  return !_mm_testz_si128(tmp, tmp) ||
430  _mm_test_mix_ones_zeros(_mm_castps_si128(k[0]), _mm_setallone_si128());
431 #else
432  const int tmp = _mm_movemask_ps(k[0]) + _mm_movemask_ps(k[1]);
433  return tmp > 0x0 && tmp < (0xf + 0xf);
434 #endif
435  }
436 
437 #ifndef VC_NO_AUTOMATIC_BOOL_FROM_MASK
438  Vc_ALWAYS_INLINE Vc_PURE operator bool() const { return isFull(); }
439 #endif
440 
442  return (_mm_movemask_ps(k[1]) << 4) + _mm_movemask_ps(k[0]);
443  }
444  Vc_ALWAYS_INLINE Vc_PURE int toInt() const { return (_mm_movemask_ps(k[1]) << 4) + _mm_movemask_ps(k[0]); }
445 
446  Vc_ALWAYS_INLINE Vc_PURE const M256 &data () const { return k; }
447 
448  Vc_ALWAYS_INLINE Vc_PURE bool operator[](int index) const {
449  return (toInt() & (1 << index)) != 0;
450  }
451 
453 #ifdef VC_IMPL_POPCNT
454  return _mm_popcnt_u32(toInt());
455 #else
456 //X int tmp1 = _mm_movemask_ps(k[0]);
457 //X int tmp2 = _mm_movemask_ps(k[1]);
458 //X tmp1 = (tmp1 & 5) + ((tmp1 >> 1) & 5);
459 //X tmp2 = (tmp2 & 5) + ((tmp2 >> 1) & 5);
460 //X return (tmp1 & 3) + (tmp2 & 3) + ((tmp1 >> 2) & 3) + ((tmp2 >> 2) & 3);
461  _M128I x = _mm_add_epi32(_mm_srli_epi32(_mm_castps_si128(k[0]), 31),
462  _mm_srli_epi32(_mm_castps_si128(k[1]), 31));
463  x = _mm_add_epi32(x, _mm_shuffle_epi32(x, _MM_SHUFFLE(0, 1, 2, 3)));
464  x = _mm_add_epi32(x, _mm_shufflelo_epi16(x, _MM_SHUFFLE(1, 0, 3, 2)));
465  return _mm_cvtsi128_si32(x);
466 #endif
467  }
468 
469  Vc_ALWAYS_INLINE_L Vc_PURE_L int firstOne() const Vc_ALWAYS_INLINE_R Vc_PURE_R;
470 
471  private:
472 #ifdef VC_COMPILE_BENCHMARKS
473  public:
474 #endif
476 };
477 
478 template<unsigned int Size> Vc_ALWAYS_INLINE Vc_PURE int Mask<Size>::firstOne() const
479 {
480  const int mask = toInt();
481 #ifdef _MSC_VER
482  unsigned long bit;
483  _BitScanForward(&bit, mask);
484 #else
485  int bit;
486  __asm__("bsf %1,%0" : "=&r"(bit) : "r"(mask));
487 #endif
488  return bit;
489 }
491 {
492  const int mask = toInt();
493 #ifdef _MSC_VER
494  unsigned long bit;
495  _BitScanForward(&bit, mask);
496 #else
497  int bit;
498  __asm__("bsf %1,%0" : "=&r"(bit) : "r"(mask));
499 #endif
500  return bit;
501 }
502 
503 template<unsigned int VectorSize>
505  : k(_mm_castsi128_ps(_mm_packs_epi32(_mm_castps_si128(m.data()[0]), _mm_castps_si128(m.data()[1])))) {}
506 
508 {
509  public:
510  Float8GatherMask(const Mask<8u> &k) : mask(k.toInt()) {}
511  Float8GatherMask(const Float8Mask &k) : mask(k.toInt()) {}
512  int toInt() const { return mask; }
513  private:
514  const int mask;
515 };
516 
517 /**
518  * Loop over all set bits in the mask. The iterator variable will be set to the position of the set
519  * bits. A mask of e.g. 00011010 would result in the loop being called with the iterator being set to
520  * 1, 3, and 4.
521  *
522  * This allows you to write:
523  * \code
524  * float_v a = ...;
525  * foreach_bit(int i, a < 0.f) {
526  * std::cout << a[i] << "\n";
527  * }
528  * \endcode
529  * The example prints all the values in \p a that are negative, and only those.
530  *
531  * \param it The iterator variable. For example "int i".
532  * \param mask The mask to iterate over. You can also just write a vector operation that returns a
533  * mask.
534  */
535 //X #define foreach_bit(it, mask)
536 //X for (int _sse_vector_foreach_inner = 1, ForeachScope _sse_vector_foreach_scope(mask.toInt()), int it = _sse_vector_foreach_scope.bit(); _sse_vector_foreach_inner; --_sse_vector_foreach_inner)
537 //X for (int _sse_vector_foreach_mask = (mask).toInt(), int _sse_vector_foreach_it = _sse_bitscan(mask.toInt());
538 //X _sse_vector_foreach_it > 0;
539 //X _sse_vector_foreach_it = _sse_bitscan_initialized(_sse_vector_foreach_it, mask.data()))
540 //X for (int _sse_vector_foreach_inner = 1, it = _sse_vector_foreach_it; _sse_vector_foreach_inner; --_sse_vector_foreach_inner)
541 
542 // Operators
543 // let binary and/or/xor work for any combination of masks (as long as they have the same sizeof)
544 template<unsigned int LSize, unsigned int RSize> Mask<LSize> operator& (const Mask<LSize> &lhs, const Mask<RSize> &rhs) { return _mm_and_ps(lhs.data(), rhs.data()); }
545 template<unsigned int LSize, unsigned int RSize> Mask<LSize> operator| (const Mask<LSize> &lhs, const Mask<RSize> &rhs) { return _mm_or_ps (lhs.data(), rhs.data()); }
546 template<unsigned int LSize, unsigned int RSize> Mask<LSize> operator^ (const Mask<LSize> &lhs, const Mask<RSize> &rhs) { return _mm_xor_ps(lhs.data(), rhs.data()); }
547 
548 // binary and/or/xor cannot work with one operand larger than the other
549 template<unsigned int Size> void operator& (const Mask<Size> &lhs, const Float8Mask &rhs);
550 template<unsigned int Size> void operator| (const Mask<Size> &lhs, const Float8Mask &rhs);
551 template<unsigned int Size> void operator^ (const Mask<Size> &lhs, const Float8Mask &rhs);
552 template<unsigned int Size> void operator& (const Float8Mask &rhs, const Mask<Size> &lhs);
553 template<unsigned int Size> void operator| (const Float8Mask &rhs, const Mask<Size> &lhs);
554 template<unsigned int Size> void operator^ (const Float8Mask &rhs, const Mask<Size> &lhs);
555 
556 // disable logical and/or for incompatible masks
557 template<unsigned int LSize, unsigned int RSize> void operator&&(const Mask<LSize> &lhs, const Mask<RSize> &rhs);
558 template<unsigned int LSize, unsigned int RSize> void operator||(const Mask<LSize> &lhs, const Mask<RSize> &rhs);
559 template<unsigned int Size> void operator&&(const Mask<Size> &lhs, const Float8Mask &rhs);
560 template<unsigned int Size> void operator||(const Mask<Size> &lhs, const Float8Mask &rhs);
561 template<unsigned int Size> void operator&&(const Float8Mask &rhs, const Mask<Size> &lhs);
562 template<unsigned int Size> void operator||(const Float8Mask &rhs, const Mask<Size> &lhs);
563 
564 // logical and/or for compatible masks
565 template<unsigned int Size> Vc_ALWAYS_INLINE Vc_PURE Mask<Size> operator&&(const Mask<Size> &lhs, const Mask<Size> &rhs) { return _mm_and_ps(lhs.data(), rhs.data()); }
566 template<unsigned int Size> Vc_ALWAYS_INLINE Vc_PURE Mask<Size> operator||(const Mask<Size> &lhs, const Mask<Size> &rhs) { return _mm_or_ps (lhs.data(), rhs.data()); }
567 Vc_ALWAYS_INLINE Vc_PURE Mask<8> operator&&(const Float8Mask &rhs, const Mask<8> &lhs) { return static_cast<Mask<8> >(rhs) && lhs; }
568 Vc_ALWAYS_INLINE Vc_PURE Mask<8> operator||(const Float8Mask &rhs, const Mask<8> &lhs) { return static_cast<Mask<8> >(rhs) || lhs; }
569 Vc_ALWAYS_INLINE Vc_PURE Mask<8> operator&&(const Mask<8> &rhs, const Float8Mask &lhs) { return rhs && static_cast<Mask<8> >(lhs); }
570 Vc_ALWAYS_INLINE Vc_PURE Mask<8> operator||(const Mask<8> &rhs, const Float8Mask &lhs) { return rhs || static_cast<Mask<8> >(lhs); }
571 
572 } // namespace SSE
573 } // namespace Vc
574 } // namespace ROOT
575 
576 #include "undomacros.h"
577 
578 #endif // SSE_MASK_H
Vc_ALWAYS_INLINE Vc_PURE Mask operator!() const
Definition: mask.h:97
Vc_ALWAYS_INLINE Vc_PURE bool operator[](int index) const
Definition: mask.h:448
static Vc_ALWAYS_INLINE Vc_CONST bool cmpneq(_M128 k1, _M128 k2)
Definition: mask.h:35
Vc_ALWAYS_INLINE Float8Mask()
Definition: mask.h:320
Namespace for new ROOT classes and functions.
Definition: ROOT.py:1
static Vc_ALWAYS_INLINE Vc_CONST bool cmpeq(_M128 k1, _M128 k2)
Definition: mask.h:42
Vc_ALWAYS_INLINE Mask(const Mask< VectorSize/2 > *a)
Definition: mask.h:73
Vc_ALWAYS_INLINE Mask(const __m128i &x)
Definition: mask.h:68
Vc_ALWAYS_INLINE Mask(bool b)
Definition: mask.h:71
#define FREE_STORE_OPERATORS_ALIGNED(alignment)
Definition: macros.h:165
static Vc_ALWAYS_INLINE Vc_CONST bool cmpneq(_M128 k1, _M128 k2)
Definition: mask.h:43
Vc_ALWAYS_INLINE Vc_PURE bool operator==(const Mask &rhs) const
Definition: mask.h:94
Vc_ALWAYS_INLINE Vc_PURE _M128 data() const
Definition: mask.h:134
Vc_ALWAYS_INLINE_L Vc_PURE_L int count() const Vc_ALWAYS_INLINE_R Vc_PURE_R
#define Vc_PURE_L
Definition: macros.h:137
TArc * a
Definition: textangle.C:12
Vc_ALWAYS_INLINE Float8Mask(const Mask< VectorSize > &a)
Definition: mask.h:335
Mask< LSize > operator&(const Mask< LSize > &lhs, const Mask< RSize > &rhs)
Loop over all set bits in the mask.
Definition: mask.h:544
Vc_ALWAYS_INLINE bool outer() const
Definition: mask.h:164
void operator&&(const Mask< LSize > &lhs, const Mask< RSize > &rhs)
ClassImp(TIterator) Bool_t TIterator return false
Compare two iterator objects.
Definition: TIterator.cxx:20
Vc_ALWAYS_INLINE Float8Mask(const M256 &x)
Definition: mask.h:321
Vc_ALWAYS_INLINE bool inner()
Definition: mask.h:165
Vc_ALWAYS_INLINE Mask & operator&=(const Mask &rhs)
Definition: mask.h:99
Vc_ALWAYS_INLINE Mask(VectorSpecialInitializerZero::ZEnum)
Definition: mask.h:69
Vc_ALWAYS_INLINE Float8Mask(VectorSpecialInitializerZero::ZEnum)
Definition: mask.h:322
Vc_ALWAYS_INLINE void noBreak()
Definition: mask.h:166
Double_t x[n]
Definition: legend1.C:17
Vc_ALWAYS_INLINE Mask(const __m128d &x)
Definition: mask.h:67
static Vc_ALWAYS_INLINE Vc_CONST bool cmpneq(_M128 k1, _M128 k2)
Definition: mask.h:39
Vc_ALWAYS_INLINE Mask()
Definition: mask.h:65
static Vc_INTRINSIC __m128i Vc_CONST _mm_setallone_si128()
Definition: intrinsics.h:82
Vc_ALWAYS_INLINE Vc_PURE bool isMix() const
Definition: mask.h:117
static Vc_INTRINSIC __m128 Vc_CONST _mm_setallone_ps()
Definition: intrinsics.h:84
#define _M128D
Definition: macros.h:35
long _long
Definition: types.h:43
Vc_ALWAYS_INLINE Vc_PURE bool operator!=(const Float8Mask &rhs) const
Definition: mask.h:344
Float8Mask Argument
Definition: mask.h:317
Vc_ALWAYS_INLINE Mask(const __m128 &x)
Definition: mask.h:66
Vc_ALWAYS_INLINE Vc_PURE Mask< OtherSize > cast() const
Definition: mask.h:138
#define Vc_PURE
Definition: macros.h:136
#define _M128I
Definition: macros.h:31
static Vc_ALWAYS_INLINE Vc_CONST bool cmpeq(_M128 k1, _M128 k2)
Definition: mask.h:38
#define Vc_ALWAYS_INLINE_R
Definition: macros.h:132
Vc_ALWAYS_INLINE _long next()
Definition: mask.h:167
Vc_ALWAYS_INLINE Vc_PURE Float8Mask operator&(const Float8Mask &rhs) const
Definition: mask.h:355
Float8GatherMask(const Float8Mask &k)
Definition: mask.h:511
Vc_ALWAYS_INLINE Vc_PURE Float8Mask operator||(const Float8Mask &rhs) const
Definition: mask.h:361
Mask< VectorSize > Argument
Definition: mask.h:62
Vc_ALWAYS_INLINE Vc_PURE int count() const
Definition: mask.h:452
ROOT::R::TRInterface & r
Definition: Object.C:4
Vc_ALWAYS_INLINE Vc_PURE bool isEmpty() const
Definition: mask.h:110
TMarker * m
Definition: textangle.C:8
#define Vc_CONST
Definition: macros.h:133
#define _M128
Definition: macros.h:27
Vc_ALWAYS_INLINE Mask(const Mask &rhs)
Definition: mask.h:72
Vc_ALWAYS_INLINE Float8Mask & operator&=(const Float8Mask &rhs)
Definition: mask.h:385
Vc_ALWAYS_INLINE Vc_PURE Float8Mask operator^(const Float8Mask &rhs) const
Definition: mask.h:373
Vc_ALWAYS_INLINE Vc_PURE const M256 & data() const
Definition: mask.h:446
Vc_ALWAYS_INLINE Vc_PURE Float8Mask operator!() const
Definition: mask.h:379
Vc_ALWAYS_INLINE Float8Mask(bool b)
Definition: mask.h:330
#define Vc_ALWAYS_INLINE
Definition: macros.h:130
Vc_ALWAYS_INLINE Vc_PURE bool isFull() const
Definition: mask.h:103
Vc_ALWAYS_INLINE Vc_PURE Float8Mask operator&&(const Float8Mask &rhs) const
Definition: mask.h:349
#define Vc_ALWAYS_INLINE_L
Definition: macros.h:131
Vc_ALWAYS_INLINE_L Vc_PURE_L bool operator[](int index) const Vc_ALWAYS_INLINE_R Vc_PURE_R
Vc_ALWAYS_INLINE_L Vc_PURE_L int firstOne() const Vc_ALWAYS_INLINE_R Vc_PURE_R
Definition: mask.h:490
#define SSE
Definition: global.h:84
Vc_ALWAYS_INLINE Vc_PURE _M128D dataD() const
Definition: mask.h:136
Vc_ALWAYS_INLINE Float8Mask(VectorSpecialInitializerOne::OEnum)
Definition: mask.h:326
Vc_ALWAYS_INLINE Vc_PURE int shiftMask() const
Definition: mask.h:441
Vc_ALWAYS_INLINE Mask & operator^=(const Mask &rhs)
Definition: mask.h:101
#define Vc_PURE_R
Definition: macros.h:138
Vc_ALWAYS_INLINE Vc_PURE bool operator==(const Float8Mask &rhs) const
Definition: mask.h:340
Vc_ALWAYS_INLINE Vc_PURE int toInt() const
Definition: mask.h:444
Mask< LSize > operator|(const Mask< LSize > &lhs, const Mask< RSize > &rhs)
Definition: mask.h:545
Vc_ALWAYS_INLINE Vc_PURE _M128I dataI() const
Definition: mask.h:135
Vc_ALWAYS_INLINE Vc_PURE bool isFull() const
Definition: mask.h:401
static Vc_ALWAYS_INLINE Vc_CONST bool cmpeq(_M128 k1, _M128 k2)
Definition: mask.h:34
Float8GatherMask(const Mask< 8u > &k)
Definition: mask.h:510
Vc_ALWAYS_INLINE Float8Mask & operator|=(const Float8Mask &rhs)
Definition: mask.h:390
Definition: casts.h:28
Vc_ALWAYS_INLINE Mask(VectorSpecialInitializerOne::OEnum)
Definition: mask.h:70
void expand(Mask< VectorSize/2 > *x) const
Mask< LSize > operator^(const Mask< LSize > &lhs, const Mask< RSize > &rhs)
Definition: mask.h:546
Vc_ALWAYS_INLINE_L Vc_PURE_L int shiftMask() const Vc_ALWAYS_INLINE_R Vc_PURE_R
Definition: mask.h:191
Vc_ALWAYS_INLINE Vc_PURE Float8Mask operator|(const Float8Mask &rhs) const
Definition: mask.h:367
Vc_ALWAYS_INLINE Float8Mask & operator^=(const Float8Mask &rhs)
Definition: mask.h:395
Vc_ALWAYS_INLINE_L Vc_PURE_L int toInt() const Vc_ALWAYS_INLINE_R Vc_PURE_R
Vc_ALWAYS_INLINE_L Vc_PURE_L int firstOne() const Vc_ALWAYS_INLINE_R Vc_PURE_R
Returns the index of the first one in the mask.
Definition: mask.h:478
Vc_ALWAYS_INLINE Vc_PURE bool operator!=(const Mask &rhs) const
Definition: mask.h:95
Vc_ALWAYS_INLINE ForeachHelper(_long _mask)
Definition: mask.h:163
void operator||(const Mask< LSize > &lhs, const Mask< RSize > &rhs)
Vc_ALWAYS_INLINE Vc_PURE bool isEmpty() const
Definition: mask.h:411
Vc_ALWAYS_INLINE Mask & operator|=(const Mask &rhs)
Definition: mask.h:100
Vc_ALWAYS_INLINE Vc_PURE bool isMix() const
Definition: mask.h:421