ROOT  6.06/09
Reference Guide
support.cpp
Go to the documentation of this file.
1 /* This file is part of the Vc library.
2 
3  Copyright (C) 2010-2012 Matthias Kretz <kretz@kde.org>
4 
5  Vc is free software: you can redistribute it and/or modify
6  it under the terms of the GNU Lesser General Public License as
7  published by the Free Software Foundation, either version 3 of
8  the License, or (at your option) any later version.
9 
10  Vc is distributed in the hope that it will be useful, but
11  WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  GNU Lesser General Public License for more details.
14 
15  You should have received a copy of the GNU Lesser General Public
16  License along with Vc. If not, see <http://www.gnu.org/licenses/>.
17 
18 */
19 
20 #include <Vc/global.h>
21 #include <Vc/cpuid.h>
22 #include <Vc/support.h>
23 
24 #ifdef VC_MSVC
25 #include <intrin.h>
26 #endif
27 
28 #if defined(VC_GCC) && VC_GCC >= 0x40400
29 #define VC_TARGET_NO_SIMD __attribute__((target("no-sse2,no-avx")))
30 #else
31 #define VC_TARGET_NO_SIMD
32 #endif
33 
34 namespace ROOT {
35 namespace Vc
36 {
37 
39 static inline bool xgetbvCheck(unsigned int bits)
40 {
41 #if defined(VC_MSVC) && VC_MSVC >= 160040219 // MSVC 2010 SP1 introduced _xgetbv
42  unsigned long long xcrFeatureMask = _xgetbv(_XCR_XFEATURE_ENABLED_MASK);
43  return (xcrFeatureMask & bits) == bits;
44 #elif defined(VC_GNU_ASM) && !defined(VC_NO_XGETBV)
45  unsigned int eax;
46  asm("xgetbv" : "=a"(eax) : "c"(0) : "edx");
47  return (eax & bits) == bits;
48 #else
49  // can't check, but if OSXSAVE is true let's assume it'll work
50  return bits > 0; // ignore 'warning: unused parameter'
51 #endif
52 }
53 
56 {
57  CpuId::init();
58 
59  switch (impl) {
60  case ScalarImpl:
61  return true;
62  case SSE2Impl:
63  return CpuId::hasSse2();
64  case SSE3Impl:
65  return CpuId::hasSse3();
66  case SSSE3Impl:
67  return CpuId::hasSsse3();
68  case SSE41Impl:
69  return CpuId::hasSse41();
70  case SSE42Impl:
71  return CpuId::hasSse42();
72  case AVXImpl:
73  return CpuId::hasOsxsave() && CpuId::hasAvx() && xgetbvCheck(0x6);
74  case AVX2Impl:
75  return false;
76  case ImplementationMask:
77  return false;
78  }
79  return false;
80 }
81 
84 {
85  CpuId::init();
86 
87  if (!CpuId::hasSse2 ()) return Vc::ScalarImpl;
88  if (!CpuId::hasSse3 ()) return Vc::SSE2Impl;
89  if (!CpuId::hasSsse3()) return Vc::SSE3Impl;
90  if (!CpuId::hasSse41()) return Vc::SSSE3Impl;
91  if (!CpuId::hasSse42()) return Vc::SSE41Impl;
92  if (CpuId::hasAvx() && CpuId::hasOsxsave() && xgetbvCheck(0x6)) {
93  return Vc::AVXImpl;
94  }
95  return Vc::SSE42Impl;
96 }
97 
100 {
101  unsigned int flags = 0;
103  if (CpuId::hasFma4()) flags |= Vc::Fma4Instructions;
104  if (CpuId::hasXop ()) flags |= Vc::XopInstructions;
106  if (CpuId::hasSse4a()) flags |= Vc::Sse4aInstructions;
107  if (CpuId::hasFma ()) flags |= Vc::FmaInstructions;
108  //if (CpuId::hasPclmulqdq()) flags |= Vc::PclmulqdqInstructions;
109  //if (CpuId::hasAes()) flags |= Vc::AesInstructions;
110  //if (CpuId::hasRdrand()) flags |= Vc::RdrandInstructions;
111  return flags;
112 }
113 
114 } // namespace Vc
115 } // namespace ROOT
116 
117 #undef VC_TARGET_NO_SIMD
118 
119 // vim: sw=4 sts=4 et tw=100
Support for XOP instructions.
Definition: global.h:413
static bool hasXop()
Return whether the CPU supports the XOP instructions.
Definition: cpuid.h:159
Namespace for new ROOT classes and functions.
Definition: ROOT.py:1
x86 SSE + SSE2 + SSE3 + SSSE3 + SSE4.1 + SSE4.2
Definition: global.h:389
static bool hasFma()
Return whether the CPU supports FMA extensions using YMM state.
Definition: cpuid.h:95
VC_TARGET_NO_SIMD Vc::Implementation bestImplementationSupported()
Determines the best supported implementation for the current system.
Definition: support.cpp:83
x86 SSE + SSE2
Definition: global.h:381
Support for FMA4 instructions.
Definition: global.h:411
static void init()
Reads the CPU capabilities and stores them for faster subsequent access.
Definition: cpuid.cpp:120
VC_TARGET_NO_SIMD bool isImplementationSupported(Vc::Implementation impl)
Tests whether the given implementation is supported by the system the code is executing on...
Definition: support.cpp:55
static bool hasFma4()
Return whether the CPU supports the FMA4 instructions.
Definition: cpuid.h:161
x86 SSE + SSE2 + SSE3 + SSSE3 + SSE4.1
Definition: global.h:387
Support for float16 conversions in hardware.
Definition: global.h:409
static bool hasSse42()
Return whether the CPU supports SSE 4.2.
Definition: cpuid.h:105
Support for FMA instructions (3 operand variant)
Definition: global.h:419
x86 SSE + SSE2 + SSE3
Definition: global.h:383
static bool hasSse2()
Return whether the CPU supports SSE2.
Definition: cpuid.h:150
static bool hasSse41()
Return whether the CPU supports SSE 4.1.
Definition: cpuid.h:103
static bool hasOsxsave()
Return whether the CPU and OS support the XSETBV/XGETBV instructions.
Definition: cpuid.h:115
uses only fundamental types
Definition: global.h:379
Support for SSE4a instructions.
Definition: global.h:417
VC_TARGET_NO_SIMD unsigned int extraInstructionsSupported()
Determines the extra instructions supported by the current CPU.
Definition: support.cpp:99
x86 AVX
Definition: global.h:391
static bool hasF16c()
Return whether the CPU supports 16-bit floating-point conversion instructions.
Definition: cpuid.h:119
static VC_TARGET_NO_SIMD bool xgetbvCheck(unsigned int bits)
Definition: support.cpp:39
static bool hasSse4a()
Return whether the CPU supports SSE4a.
Definition: cpuid.h:153
x86 SSE + SSE2 + SSE3 + SSSE3
Definition: global.h:385
static bool hasPopcnt()
Return whether the CPU supports the POPCNT instruction.
Definition: cpuid.h:109
static bool hasSse3()
Return whether the CPU supports SSE3.
Definition: cpuid.h:79
x86 AVX + AVX2
Definition: global.h:393
#define VC_TARGET_NO_SIMD
Definition: support.cpp:31
Support for the population count instruction.
Definition: global.h:415
Implementation
Enum to identify a certain SIMD instruction set.
Definition: global.h:377
Definition: casts.h:28
static bool hasSsse3()
Return whether the CPU supports SSSE3.
Definition: cpuid.h:93
static bool hasAvx()
Return whether the CPU supports AVX.
Definition: cpuid.h:117