ROOT  6.07/01
Reference Guide
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Groups Pages
interleavedmemory.h
Go to the documentation of this file.
1 /* This file is part of the Vc library. {{{
2 
3  Copyright (C) 2012 Matthias Kretz <kretz@kde.org>
4 
5  Vc is free software: you can redistribute it and/or modify
6  it under the terms of the GNU Lesser General Public License as
7  published by the Free Software Foundation, either version 3 of
8  the License, or (at your option) any later version.
9 
10  Vc is distributed in the hope that it will be useful, but
11  WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  GNU Lesser General Public License for more details.
14 
15  You should have received a copy of the GNU Lesser General Public
16  License along with Vc. If not, see <http://www.gnu.org/licenses/>.
17 
18 }}}*/
19 
20 #ifndef VC_COMMON_INTERLEAVEDMEMORY_H
21 #define VC_COMMON_INTERLEAVEDMEMORY_H
22 
23 #include "macros.h"
24 
25 namespace ROOT {
26 namespace Vc
27 {
28 namespace Common
29 {
30 
31 namespace Internal
32 {
33 template<typename A, typename B> struct CopyConst { typedef B Type; };
34 template<typename A, typename B> struct CopyConst<const A, B> { typedef const B Type; };
35 
36 template<typename S, typename X, typename R> struct EnableInterleaves { typedef R Type; };
37 template<typename S, typename X, typename R> struct EnableInterleaves<const S, X, R>;
38 } // namespace Internal
39 
40 /**
41  * \internal
42  */
43 template<typename V> struct InterleavedMemoryAccessBase
44 {
45  typedef typename V::EntryType T;
46  typedef typename V::IndexType I;
47  typedef typename V::AsArg VArg;
48  typedef T Ta Vc_MAY_ALIAS;
49  const I m_indexes;
50  Ta *const m_data;
51 
52  Vc_ALWAYS_INLINE InterleavedMemoryAccessBase(typename I::AsArg indexes, Ta *data)
53  : m_indexes(indexes), m_data(data)
54  {
55  }
56 
57  // implementations of the following are in {scalar,sse,avx}/interleavedmemory.tcc
58  void deinterleave(V &v0, V &v1) const;
59  void deinterleave(V &v0, V &v1, V &v2) const;
60  void deinterleave(V &v0, V &v1, V &v2, V &v3) const;
61  void deinterleave(V &v0, V &v1, V &v2, V &v3, V &v4) const;
62  void deinterleave(V &v0, V &v1, V &v2, V &v3, V &v4, V &v5) const;
63  void deinterleave(V &v0, V &v1, V &v2, V &v3, V &v4, V &v5, V &v6) const;
64  void deinterleave(V &v0, V &v1, V &v2, V &v3, V &v4, V &v5, V &v6, V &v7) const;
65 
66  void interleave(VArg v0, VArg v1);
67  void interleave(VArg v0, VArg v1, VArg v2);
68  void interleave(VArg v0, VArg v1, VArg v2, VArg v3);
69  void interleave(VArg v0, VArg v1, VArg v2, VArg v3, VArg v4);
70  void interleave(VArg v0, VArg v1, VArg v2, VArg v3, VArg v4, VArg v5);
71  void interleave(VArg v0, VArg v1, VArg v2, VArg v3, VArg v4, VArg v5, VArg v6);
72  void interleave(VArg v0, VArg v1, VArg v2, VArg v3, VArg v4, VArg v5, VArg v6, VArg v7);
73 };
74 
75 /**
76  * \internal
77  */
78 // delay execution of the deinterleaving gather until operator=
79 template<size_t StructSize, typename V> struct InterleavedMemoryReadAccess : public InterleavedMemoryAccessBase<V>
80 {
82  typedef typename Base::Ta Ta;
83  typedef typename Base::I I;
84 
85  Vc_ALWAYS_INLINE InterleavedMemoryReadAccess(const Ta *data, typename I::AsArg indexes)
86  : Base(indexes * I(StructSize), const_cast<Ta *>(data)) // this needs to be refactored to properly keep the constness
87  {
88  }
89 };
90 
91 /**
92  * \internal
93  */
94 template<size_t StructSize, typename V> struct InterleavedMemoryAccess : public InterleavedMemoryReadAccess<StructSize, V>
95 {
97  typedef typename Base::Ta Ta;
98  typedef typename Base::I I;
99 
100  Vc_ALWAYS_INLINE InterleavedMemoryAccess(Ta *data, typename I::AsArg indexes)
101  : InterleavedMemoryReadAccess<StructSize, V>(data, indexes)
102  {
103  }
104 
105 #define _VC_SCATTER_ASSIGNMENT(LENGTH, parameters) \
106  Vc_ALWAYS_INLINE void operator=(const VectorTuple<LENGTH, V> &rhs) \
107  { \
108  VC_STATIC_ASSERT(LENGTH <= StructSize, You_are_trying_to_scatter_more_data_into_the_struct_than_it_has); \
109  this->interleave parameters ; \
110  } \
111  Vc_ALWAYS_INLINE void operator=(const VectorTuple<LENGTH, const V> &rhs) \
112  { \
113  VC_STATIC_ASSERT(LENGTH <= StructSize, You_are_trying_to_scatter_more_data_into_the_struct_than_it_has); \
114  checkIndexesUnique(); \
115  this->interleave parameters ; \
116  }
117  _VC_SCATTER_ASSIGNMENT(2, (rhs.l, rhs.r))
118  _VC_SCATTER_ASSIGNMENT(3, (rhs.l.l, rhs.l.r, rhs.r));
119  _VC_SCATTER_ASSIGNMENT(4, (rhs.l.l.l, rhs.l.l.r, rhs.l.r, rhs.r));
120  _VC_SCATTER_ASSIGNMENT(5, (rhs.l.l.l.l, rhs.l.l.l.r, rhs.l.l.r, rhs.l.r, rhs.r));
121  _VC_SCATTER_ASSIGNMENT(6, (rhs.l.l.l.l.l, rhs.l.l.l.l.r, rhs.l.l.l.r, rhs.l.l.r, rhs.l.r, rhs.r));
122  _VC_SCATTER_ASSIGNMENT(7, (rhs.l.l.l.l.l.l, rhs.l.l.l.l.l.r, rhs.l.l.l.l.r, rhs.l.l.l.r, rhs.l.l.r, rhs.l.r, rhs.r));
123  _VC_SCATTER_ASSIGNMENT(8, (rhs.l.l.l.l.l.l.l, rhs.l.l.l.l.l.l.r, rhs.l.l.l.l.l.r, rhs.l.l.l.l.r, rhs.l.l.l.r, rhs.l.l.r, rhs.l.r, rhs.r));
124 #undef _VC_SCATTER_ASSIGNMENT
125 
126 private:
127 #ifdef NDEBUG
128  Vc_ALWAYS_INLINE void checkIndexesUnique() const {}
129 #else
130  void checkIndexesUnique() const
131  {
132  const I test = Base::m_indexes.sorted();
133  VC_ASSERT(I::Size == 1 || (test == test.rotated(1)).isEmpty())
134  }
135 #endif
136 };
137 
138 #ifdef DOXYGEN
139 } // namespace Common
140 // in doxygen InterleavedMemoryWrapper should appear in the Vc namespace (see the using statement
141 // below)
142 #endif
143 
144 /**
145  * Wraps a pointer to memory with convenience functions to access it via vectors.
146  *
147  * \param S The type of the struct.
148  * \param V The type of the vector to be returned when read. This should reflect the type of the
149  * members inside the struct.
150  *
151  * \see operator[]
152  * \ingroup Utilities
153  * \headerfile interleavedmemory.h <Vc/Memory>
154  */
155 template<typename S, typename V> class InterleavedMemoryWrapper
156 {
157  typedef typename V::EntryType T;
158  typedef typename V::IndexType I;
159  typedef typename V::AsArg VArg;
160  typedef typename I::AsArg IndexType;
161  typedef InterleavedMemoryAccess<sizeof(S) / sizeof(T), V> Access;
162  typedef InterleavedMemoryReadAccess<sizeof(S) / sizeof(T), V> ReadAccess;
164  Ta *const m_data;
165 
166  VC_STATIC_ASSERT((sizeof(S) / sizeof(T)) * sizeof(T) == sizeof(S), InterleavedMemoryAccess_does_not_support_packed_structs);
167 
168 public:
169  /**
170  * Constructs the wrapper object.
171  *
172  * \param s A pointer to a C-array.
173  */
175  : m_data(reinterpret_cast<Ta *>(s))
176  {
177  }
178 
179  /**
180  * Interleaved scatter/gather access.
181  *
182  * Assuming you have a struct of floats and a vector of \p indexes into the array, this function
183  * can be used to access the struct entries as vectors using the minimal number of store or load
184  * instructions.
185  *
186  * \param indexes Vector of indexes that determine the gather locations.
187  *
188  * \return A special (magic) object that executes the loads and deinterleave on assignment to a
189  * vector tuple.
190  *
191  * Example:
192  * \code
193  * struct Foo {
194  * float x, y, z;
195  * };
196  *
197  * void fillWithBar(Foo *_data, uint_v indexes)
198  * {
199  * Vc::InterleavedMemoryWrapper<Foo, float_v> data(_data);
200  * const float_v x = bar(1);
201  * const float_v y = bar(2);
202  * const float_v z = bar(3);
203  * data[indexes] = (x, y, z);
204  * // it's also possible to just store a subset at the front of the struct:
205  * data[indexes] = (x, y);
206  * // if you want to store a single entry, use scatter:
207  * z.scatter(_data, &Foo::x, indexes);
208  * }
209  *
210  * float_v normalizeStuff(Foo *_data, uint_v indexes)
211  * {
212  * Vc::InterleavedMemoryWrapper<Foo, float_v> data(_data);
213  * float_v x, y, z;
214  * (x, y, z) = data[indexes];
215  * // it is also possible to just load a subset from the front of the struct:
216  * // (x, y) = data[indexes];
217  * return Vc::sqrt(x * x + y * y + z * z);
218  * }
219  * \endcode
220  *
221  * You may think of the gather operation (or scatter as the inverse) like this:
222 \verbatim
223  Memory: {x0 y0 z0 x1 y1 z1 x2 y2 z2 x3 y3 z3 x4 y4 z4 x5 y5 z5 x6 y6 z6 x7 y7 z7 x8 y8 z8}
224  indexes: [5, 0, 1, 7]
225 Result in (x, y, z): ({x5 x0 x1 x7}, {y5 y0 y1 y7}, {z5 z0 z1 z7})
226 \endverbatim
227  *
228  * \warning If \p indexes contains non-unique entries on scatter, the result is undefined. If
229  * \c NDEBUG is not defined the implementation will assert that the \p indexes entries are unique.
230  */
231 #ifdef DOXYGEN
232  Vc_ALWAYS_INLINE Access operator[](IndexType indexes)
233 #else
234  // need to SFINAE disable this for objects that wrap constant data
235  template <typename U>
237  VC_ALIGNED_PARAMETER(U) indexes)
238 #endif
239  {
240  return Access(m_data, indexes);
241  }
242 
243  /// const overload (gathers only) of the above function
245  {
246  return ReadAccess(m_data, indexes);
247  }
248 
249  /// alias of the above function
251  {
252  return operator[](indexes);
253  }
254 
255  //Vc_ALWAYS_INLINE Access scatter(I indexes, VArg v0, VArg v1);
256 };
257 #ifndef DOXYGEN
258 } // namespace Common
259 
260 using Common::InterleavedMemoryWrapper;
261 #endif
262 
263 } // namespace Vc
264 } // namespace ROOT
265 
266 #include "undomacros.h"
267 
268 #endif // VC_COMMON_INTERLEAVEDMEMORY_H
static double B[]
Vc_ALWAYS_INLINE InterleavedMemoryReadAccess(const Ta *data, typename I::AsArg indexes)
const Double_t * v1
Definition: TArcBall.cxx:33
const char * Size
Definition: TXMLSetup.cxx:56
Internal::CopyConst< S, T >::Type Ta Vc_MAY_ALIAS
static double A[]
Vc_ALWAYS_INLINE ReadAccess gather(VC_ALIGNED_PARAMETER(IndexType) indexes) const
alias of the above function
Vc_ALWAYS_INLINE InterleavedMemoryAccess(Ta *data, typename I::AsArg indexes)
Vc_ALWAYS_INLINE Internal::EnableInterleaves< S, U, Access >::Type operator[](VC_ALIGNED_PARAMETER(U) indexes)
Interleaved scatter/gather access.
Vc_ALWAYS_INLINE InterleavedMemoryWrapper(S *s)
Constructs the wrapper object.
InterleavedMemoryAccessBase< V > Base
#define VC_ASSERT(x)
Definition: macros.h:212
InterleavedMemoryAccessBase< V > Base
Vc_ALWAYS_INLINE InterleavedMemoryAccessBase(typename I::AsArg indexes, Ta *data)
#define VC_ALIGNED_PARAMETER(_Type)
Definition: macros.h:368
#define Vc_ALWAYS_INLINE
Definition: macros.h:130
static const float S
Definition: mandel.cpp:113
Vc_ALWAYS_INLINE ReadAccess operator[](VC_ALIGNED_PARAMETER(IndexType) indexes) const
const overload (gathers only) of the above function
_VC_SCATTER_ASSIGNMENT(3,(rhs.l.l, rhs.l.r, rhs.r))
#define VC_STATIC_ASSERT(cond, msg)
Definition: macros.h:246
TCanvas * Ta
Definition: textalign.C:2
InterleavedMemoryAccess< sizeof(S)/sizeof(T), V > Access
Wraps a pointer to memory with convenience functions to access it via vectors.
TRandom3 R
a TMatrixD.
Definition: testIO.cxx:28
InterleavedMemoryReadAccess< sizeof(S)/sizeof(T), V > ReadAccess