ROOT  6.06/09
Reference Guide
global.h
Go to the documentation of this file.
1 /* This file is part of the Vc library.
2 
3  Copyright (C) 2009-2012 Matthias Kretz <kretz@kde.org>
4 
5  Vc is free software: you can redistribute it and/or modify
6  it under the terms of the GNU Lesser General Public License as
7  published by the Free Software Foundation, either version 3 of
8  the License, or (at your option) any later version.
9 
10  Vc is distributed in the hope that it will be useful, but
11  WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  GNU Lesser General Public License for more details.
14 
15  You should have received a copy of the GNU Lesser General Public
16  License along with Vc. If not, see <http://www.gnu.org/licenses/>.
17 
18 */
19 
20 #ifndef VC_GLOBAL_H
21 #define VC_GLOBAL_H
22 
23 #ifndef DOXYGEN
24 
25 // Compiler defines
26 #ifdef __INTEL_COMPILER
27 #define VC_ICC __INTEL_COMPILER_BUILD_DATE
28 #elif defined(__OPENCC__)
29 #define VC_OPEN64 1
30 #elif defined(__clang__)
31 #define VC_CLANG (__clang_major__ * 0x10000 + __clang_minor__ * 0x100 + __clang_patchlevel__)
32 #elif defined(__GNUC__)
33 #define VC_GCC (__GNUC__ * 0x10000 + __GNUC_MINOR__ * 0x100 + __GNUC_PATCHLEVEL__)
34 #elif defined(_MSC_VER)
35 #define VC_MSVC _MSC_FULL_VER
36 #else
37 #define VC_UNSUPPORTED_COMPILER 1
38 #endif
39 
40 // Features/Quirks defines
41 #if defined VC_MSVC && defined _WIN32
42 // the Win32 ABI can't handle function parameters with alignment >= 16
43 #define VC_PASSING_VECTOR_BY_VALUE_IS_BROKEN 1
44 #endif
45 #if defined(__GNUC__) && !defined(VC_NO_INLINE_ASM)
46 #define VC_GNU_ASM 1
47 #endif
48 #if defined(VC_GCC) && (VC_GCC <= 0x40405 || (VC_GCC >= 0x40500 && VC_GCC <= 0x40502)) && !(VC_GCC == 0x40502 && defined(__GNUC_UBUNTU_VERSION__) && __GNUC_UBUNTU_VERSION__ == 0xb0408)
49 // GCC 4.6.0 / 4.5.3 / 4.4.6 switched to the interface as defined by ICC
50 // (Ubuntu 11.04 ships a GCC 4.5.2 with the new interface)
51 #define VC_MM256_MASKSTORE_WRONG_MASK_TYPE 1
52 #endif
53 #if defined(VC_GCC) && VC_GCC >= 0x40300
54 #define VC_HAVE_ATTRIBUTE_ERROR 1
55 #define VC_HAVE_ATTRIBUTE_WARNING 1
56 #endif
57 
58 #if (defined(__GXX_EXPERIMENTAL_CXX0X__) && VC_GCC >= 0x40600) || __cplusplus >= 201103
59 # define VC_CXX11 1
60 # ifdef VC_GCC
61 # if VC_GCC >= 0x40700 // && VC_GCC < 0x408000)
62 // ::max_align_t was introduced with GCC 4.7. std::max_align_t took a bit longer.
63 # define VC_HAVE_MAX_ALIGN_T 1
64 # endif
65 # elif defined(VC_ICC)
66 # define VC_HAVE_MAX_ALIGN_T 1
67 # elif !defined(VC_CLANG)
68 // Clang doesn't provide max_align_t at all
69 # define VC_HAVE_STD_MAX_ALIGN_T 1
70 # endif
71 #endif
72 
73 // ICC ships the AVX2 intrinsics inside the AVX1 header.
74 // FIXME: the number 20120731 is too large, but I don't know which one is the right one
75 #if (defined(VC_ICC) && VC_ICC >= 20120731) || (defined(VC_MSVC) && VC_MSVC >= 170000000)
76 #define VC_UNCONDITIONAL_AVX2_INTRINSICS 1
77 #endif
78 
79 /* Define the following strings to a unique integer, which is the only type the preprocessor can
80  * compare. This allows to use -DVC_IMPL=SSE3. The preprocessor will then consider VC_IMPL and SSE3
81  * to be equal. Of course, it is important to undefine the strings later on!
82  */
83 #define Scalar 0x00100000
84 #define SSE 0x00200000
85 #define SSE2 0x00300000
86 #define SSE3 0x00400000
87 #define SSSE3 0x00500000
88 #define SSE4_1 0x00600000
89 #define SSE4_2 0x00700000
90 #define AVX 0x00800000
91 
92 #define XOP 0x00000001
93 #define FMA4 0x00000002
94 #define F16C 0x00000004
95 #define POPCNT 0x00000008
96 #define SSE4a 0x00000010
97 #define FMA 0x00000020
98 
99 #define IMPL_MASK 0xFFF00000
100 #define EXT_MASK 0x000FFFFF
101 
102 #ifdef VC_MSVC
103 # ifdef _M_IX86_FP
104 # if _M_IX86_FP >= 1
105 # ifndef __SSE__
106 # define __SSE__ 1
107 # endif
108 # endif
109 # if _M_IX86_FP >= 2
110 # ifndef __SSE2__
111 # define __SSE2__ 1
112 # endif
113 # endif
114 # elif defined(_M_AMD64)
115 // If the target is x86_64 then SSE2 is guaranteed
116 # ifndef __SSE__
117 # define __SSE__ 1
118 # endif
119 # ifndef __SSE2__
120 # define __SSE2__ 1
121 # endif
122 # endif
123 #endif
124 
125 #ifndef VC_IMPL
126 
127 # if defined(__AVX__)
128 # define VC_IMPL_AVX 1
129 # else
130 # if defined(__SSE4_2__)
131 # define VC_IMPL_SSE 1
132 # define VC_IMPL_SSE4_2 1
133 # endif
134 # if defined(__SSE4_1__)
135 # define VC_IMPL_SSE 1
136 # define VC_IMPL_SSE4_1 1
137 # endif
138 # if defined(__SSE3__)
139 # define VC_IMPL_SSE 1
140 # define VC_IMPL_SSE3 1
141 # endif
142 # if defined(__SSSE3__)
143 # define VC_IMPL_SSE 1
144 # define VC_IMPL_SSSE3 1
145 # endif
146 # if defined(__SSE2__)
147 # define VC_IMPL_SSE 1
148 # define VC_IMPL_SSE2 1
149 # endif
150 
151 # if defined(VC_IMPL_SSE)
152  // nothing
153 # else
154 # define VC_IMPL_Scalar 1
155 # endif
156 # endif
157 # if defined(VC_IMPL_AVX) || defined(VC_IMPL_SSE)
158 # ifdef __FMA4__
159 # define VC_IMPL_FMA4 1
160 # endif
161 # ifdef __XOP__
162 # define VC_IMPL_XOP 1
163 # endif
164 # ifdef __F16C__
165 # define VC_IMPL_F16C 1
166 # endif
167 # ifdef __POPCNT__
168 # define VC_IMPL_POPCNT 1
169 # endif
170 # ifdef __SSE4A__
171 # define VC_IMPL_SSE4a 1
172 # endif
173 # ifdef __FMA__
174 # define VC_IMPL_FMA 1
175 # endif
176 # endif
177 
178 #else // VC_IMPL
179 
180 # if (VC_IMPL & IMPL_MASK) == AVX // AVX supersedes SSE
181 # define VC_IMPL_AVX 1
182 # elif (VC_IMPL & IMPL_MASK) == Scalar
183 # define VC_IMPL_Scalar 1
184 # elif (VC_IMPL & IMPL_MASK) == SSE4_2
185 # define VC_IMPL_SSE4_2 1
186 # define VC_IMPL_SSE4_1 1
187 # define VC_IMPL_SSSE3 1
188 # define VC_IMPL_SSE3 1
189 # define VC_IMPL_SSE2 1
190 # define VC_IMPL_SSE 1
191 # elif (VC_IMPL & IMPL_MASK) == SSE4_1
192 # define VC_IMPL_SSE4_1 1
193 # define VC_IMPL_SSSE3 1
194 # define VC_IMPL_SSE3 1
195 # define VC_IMPL_SSE2 1
196 # define VC_IMPL_SSE 1
197 # elif (VC_IMPL & IMPL_MASK) == SSSE3
198 # define VC_IMPL_SSSE3 1
199 # define VC_IMPL_SSE3 1
200 # define VC_IMPL_SSE2 1
201 # define VC_IMPL_SSE 1
202 # elif (VC_IMPL & IMPL_MASK) == SSE3
203 # define VC_IMPL_SSE3 1
204 # define VC_IMPL_SSE2 1
205 # define VC_IMPL_SSE 1
206 # elif (VC_IMPL & IMPL_MASK) == SSE2
207 # define VC_IMPL_SSE2 1
208 # define VC_IMPL_SSE 1
209 # elif (VC_IMPL & IMPL_MASK) == SSE
210 # define VC_IMPL_SSE 1
211 # if defined(__SSE4_2__)
212 # define VC_IMPL_SSE4_2 1
213 # endif
214 # if defined(__SSE4_1__)
215 # define VC_IMPL_SSE4_1 1
216 # endif
217 # if defined(__SSE3__)
218 # define VC_IMPL_SSE3 1
219 # endif
220 # if defined(__SSSE3__)
221 # define VC_IMPL_SSSE3 1
222 # endif
223 # if defined(__SSE2__)
224 # define VC_IMPL_SSE2 1
225 # endif
226 # elif (VC_IMPL & IMPL_MASK) == 0 && (VC_IMPL & SSE4a)
227  // this is for backward compatibility only where SSE4a was included in the main
228  // line of available SIMD instruction sets
229 # define VC_IMPL_SSE3 1
230 # define VC_IMPL_SSE2 1
231 # define VC_IMPL_SSE 1
232 # endif
233 # if (VC_IMPL & XOP)
234 # define VC_IMPL_XOP 1
235 # endif
236 # if (VC_IMPL & FMA4)
237 # define VC_IMPL_FMA4 1
238 # endif
239 # if (VC_IMPL & F16C)
240 # define VC_IMPL_F16C 1
241 # endif
242 # if (VC_IMPL & POPCNT)
243 # define VC_IMPL_POPCNT 1
244 # endif
245 # if (VC_IMPL & SSE4a)
246 # define VC_IMPL_SSE4a 1
247 # endif
248 # if (VC_IMPL & FMA)
249 # define VC_IMPL_FMA 1
250 # endif
251 # undef VC_IMPL
252 
253 #endif // VC_IMPL
254 
255 // If AVX is enabled in the compiler it will use VEX coding for the SIMD instructions.
256 #ifdef __AVX__
257 # define VC_USE_VEX_CODING 1
258 #endif
259 
260 #if defined(VC_GCC) && VC_GCC < 0x40300 && !defined(VC_IMPL_Scalar)
261 # ifndef VC_DONT_WARN_OLD_GCC
262 # warning "GCC < 4.3 does not have full support for SSE2 intrinsics. Using scalar types/operations only. Define VC_DONT_WARN_OLD_GCC to silence this warning."
263 # endif
264 # undef VC_IMPL_SSE
265 # undef VC_IMPL_SSE2
266 # undef VC_IMPL_SSE3
267 # undef VC_IMPL_SSE4_1
268 # undef VC_IMPL_SSE4_2
269 # undef VC_IMPL_SSSE3
270 # undef VC_IMPL_AVX
271 # undef VC_IMPL_FMA4
272 # undef VC_IMPL_XOP
273 # undef VC_IMPL_F16C
274 # undef VC_IMPL_POPCNT
275 # undef VC_IMPL_SSE4a
276 # undef VC_IMPL_FMA
277 # undef VC_USE_VEX_CODING
278 # define VC_IMPL_Scalar 1
279 #endif
280 
281 # if !defined(VC_IMPL_Scalar) && !defined(VC_IMPL_SSE) && !defined(VC_IMPL_AVX)
282 # error "No suitable Vc implementation was selected! Probably VC_IMPL was set to an invalid value."
283 # elif defined(VC_IMPL_SSE) && !defined(VC_IMPL_SSE2)
284 # error "SSE requested but no SSE2 support. Vc needs at least SSE2!"
285 # endif
286 
287 #undef Scalar
288 #undef SSE
289 #undef SSE2
290 #undef SSE3
291 #undef SSSE3
292 #undef SSE4_1
293 #undef SSE4_2
294 #undef AVX
295 
296 #undef XOP
297 #undef FMA4
298 #undef F16C
299 #undef POPCNT
300 #undef SSE4a
301 #undef FMA
302 
303 #undef IMPL_MASK
304 #undef EXT_MASK
305 
306 namespace ROOT {
307 namespace Vc {
310 };
313 };
314 enum StreamingAndAlignedFlag { // implies Aligned
316 };
319 };
320 #endif // DOXYGEN
321 
322 /**
323  * \ingroup Utilities
324  *
325  * Enum that specifies the alignment and padding restrictions to use for memory allocation with
326  * Vc::malloc.
327  */
329  /**
330  * Align on boundary of vector sizes (e.g. 16 Bytes on SSE platforms) and pad to allow
331  * vector access to the end. Thus the allocated memory contains a multiple of
332  * VectorAlignment bytes.
333  */
335  /**
336  * Align on boundary of cache line sizes (e.g. 64 Bytes on x86) and pad to allow
337  * full cache line access to the end. Thus the allocated memory contains a multiple of
338  * 64 bytes.
339  */
341  /**
342  * Align on boundary of page sizes (e.g. 4096 Bytes on x86) and pad to allow
343  * full page access to the end. Thus the allocated memory contains a multiple of
344  * 4096 bytes.
345  */
347 };
348 
349 #if __cplusplus >= 201103 /*C++11*/
350 #define Vc_CONSTEXPR constexpr
351 #elif defined(__GNUC__)
352 #define Vc_CONSTEXPR inline __attribute__((__always_inline__, __const__))
353 #elif defined(VC_MSVC)
354 #define Vc_CONSTEXPR inline __forceinline
355 #else
356 #define Vc_CONSTEXPR inline
357 #endif
362 
367 
368 /**
369  * \ingroup Utilities
370  *
371  * Enum to identify a certain SIMD instruction set.
372  *
373  * You can use \ref VC_IMPL for the currently active implementation.
374  *
375  * \see ExtraInstructions
376  */
378  /// uses only fundamental types
380  /// x86 SSE + SSE2
382  /// x86 SSE + SSE2 + SSE3
384  /// x86 SSE + SSE2 + SSE3 + SSSE3
386  /// x86 SSE + SSE2 + SSE3 + SSSE3 + SSE4.1
388  /// x86 SSE + SSE2 + SSE3 + SSSE3 + SSE4.1 + SSE4.2
390  /// x86 AVX
392  /// x86 AVX + AVX2
395 };
396 
397 /**
398  * \ingroup Utilities
399  *
400  * The list of available instructions is not easily described by a linear list of instruction sets.
401  * On x86 the following instruction sets always include their predecessors:
402  * SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2
403  *
404  * But there are additional instructions that are not necessarily required by this list. These are
405  * covered in this enum.
406  */
408  //! Support for float16 conversions in hardware
410  //! Support for FMA4 instructions
411  Fma4Instructions = 0x02000,
412  //! Support for XOP instructions
413  XopInstructions = 0x04000,
414  //! Support for the population count instruction
416  //! Support for SSE4a instructions
417  Sse4aInstructions = 0x10000,
418  //! Support for FMA instructions (3 operand variant)
419  FmaInstructions = 0x20000,
420  // PclmulqdqInstructions,
421  // AesInstructions,
422  // RdrandInstructions
423  ExtraInstructionsMask = 0xfffff000u
424 };
425 
426 #ifndef DOXYGEN
427 
428 #ifdef VC_IMPL_Scalar
429 #define VC_IMPL ::ROOT::Vc::ScalarImpl
430 #elif defined(VC_IMPL_AVX)
431 #define VC_IMPL ::ROOT::Vc::AVXImpl
432 #elif defined(VC_IMPL_SSE4_2)
433 #define VC_IMPL ::ROOT::Vc::SSE42Impl
434 #elif defined(VC_IMPL_SSE4_1)
435 #define VC_IMPL ::ROOT::Vc::SSE41Impl
436 #elif defined(VC_IMPL_SSSE3)
437 #define VC_IMPL ::ROOT::Vc::SSSE3Impl
438 #elif defined(VC_IMPL_SSE3)
439 #define VC_IMPL ::ROOT::Vc::SSE3Impl
440 #elif defined(VC_IMPL_SSE2)
441 #define VC_IMPL ::ROOT::Vc::SSE2Impl
442 #endif
443 
444 template<unsigned int Features> struct ImplementationT { enum _Value {
445  Value = Features,
448 }; };
449 
450 typedef ImplementationT<
451 #ifdef VC_USE_VEX_CODING
452  // everything will use VEX coding, so the system has to support AVX even if VC_IMPL_AVX is not set
453  // but AFAIU the OSXSAVE and xgetbv tests do not have to positive (unless, of course, the
454  // compiler decides to insert an instruction that uses the full register size - so better be on
455  // the safe side)
456  AVXImpl
457 #else
458  VC_IMPL
459 #endif
460 #ifdef VC_IMPL_SSE4a
462 #ifdef VC_IMPL_XOP
464 #ifdef VC_IMPL_FMA4
466 #endif
467 #endif
468 #endif
469 #ifdef VC_IMPL_POPCNT
471 #endif
472 #ifdef VC_IMPL_FMA
474 #endif
476 
477 namespace Internal {
478  template<Implementation Impl> struct HelperImpl;
479  typedef HelperImpl<VC_IMPL> Helper;
480 
481  template<typename A> struct FlagObject;
482  template<> struct FlagObject<AlignedFlag> { static Vc_CONSTEXPR AlignedFlag the() { return Aligned; } };
483  template<> struct FlagObject<UnalignedFlag> { static Vc_CONSTEXPR UnalignedFlag the() { return Unaligned; } };
486 } // namespace Internal
487 
488 namespace Warnings
489 {
491 #ifdef VC_HAVE_ATTRIBUTE_WARNING
492  __attribute__((warning("\n\tUse of Vc::Vector::operator[] to modify scalar entries is known to miscompile with GCC 4.3.x.\n\tPlease upgrade to a more recent GCC or avoid operator[] altogether.\n\t(This warning adds an unnecessary function call to operator[] which should work around the problem at a little extra cost.)")))
493 #endif
494  ;
495 } // namespace Warnings
496 
497 namespace Error
498 {
499  template<typename L, typename R> struct invalid_operands_of_types {};
500 } // namespace Error
501 
502 #endif // DOXYGEN
503 } // namespace Vc
504 } // namespace ROOT
505 
506 #undef Vc_CONSTEXPR
507 #include "version.h"
508 
509 #endif // VC_GLOBAL_H
Support for XOP instructions.
Definition: global.h:413
ImplementationT< VC_IMPL > CurrentImplementation
Definition: global.h:475
#define VC_IMPL
Definition: global.h:429
static Vc_CONSTEXPR UnalignedFlag the()
Definition: global.h:483
Namespace for new ROOT classes and functions.
Definition: ROOT.py:1
x86 SSE + SSE2 + SSE3 + SSSE3 + SSE4.1 + SSE4.2
Definition: global.h:389
static Vc_CONSTEXPR StreamingAndUnalignedFlag the()
Definition: global.h:485
x86 SSE + SSE2
Definition: global.h:381
Align on boundary of cache line sizes (e.g.
Definition: global.h:340
StreamingAndUnalignedFlag
Definition: global.h:317
Support for FMA4 instructions.
Definition: global.h:411
void _operator_bracket_warning()
Definition: const.cpp:512
x86 SSE + SSE2 + SSE3 + SSSE3 + SSE4.1
Definition: global.h:387
Support for float16 conversions in hardware.
Definition: global.h:409
Support for FMA instructions (3 operand variant)
Definition: global.h:419
x86 SSE + SSE2 + SSE3
Definition: global.h:383
RooCmdArg Warnings(Bool_t flag=kTRUE)
StreamingAndAlignedFlag
Definition: global.h:314
Align on boundary of vector sizes (e.g.
Definition: global.h:334
static Vc_CONSTEXPR StreamingAndAlignedFlag the()
Definition: global.h:484
HelperImpl< VC_IMPL > Helper
Definition: global.h:478
uses only fundamental types
Definition: global.h:379
Support for SSE4a instructions.
Definition: global.h:417
x86 AVX
Definition: global.h:391
x86 SSE + SSE2 + SSE3 + SSSE3
Definition: global.h:385
#define Vc_CONSTEXPR
Definition: global.h:356
Vc_CONSTEXPR StreamingAndUnalignedFlag operator&(UnalignedFlag, StreamingAndAlignedFlag)
Definition: global.h:360
MallocAlignment
Enum that specifies the alignment and padding restrictions to use for memory allocation with Vc::mall...
Definition: global.h:328
x86 AVX + AVX2
Definition: global.h:393
static Vc_CONSTEXPR AlignedFlag the()
Definition: global.h:482
Support for the population count instruction.
Definition: global.h:415
Implementation
Enum to identify a certain SIMD instruction set.
Definition: global.h:377
AlignedFlag
Definition: global.h:308
Definition: casts.h:28
UnalignedFlag
Definition: global.h:311
Align on boundary of page sizes (e.g.
Definition: global.h:346
Vc_CONSTEXPR StreamingAndUnalignedFlag operator|(UnalignedFlag, StreamingAndAlignedFlag)
Definition: global.h:358
void Error(ErrorHandler_t func, int code, const char *va_(fmt),...)
Write error message and call a handler, if required.
ExtraInstructions
The list of available instructions is not easily described by a linear list of instruction sets...
Definition: global.h:407