ROOT  6.07/01
Reference Guide
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Groups Pages
logarithm.h
Go to the documentation of this file.
1 /* This file is part of the Vc library.
2 
3  Copyright (C) 2009-2012 Matthias Kretz <kretz@kde.org>
4 
5  Vc is free software: you can redistribute it and/or modify
6  it under the terms of the GNU Lesser General Public License as
7  published by the Free Software Foundation, either version 3 of
8  the License, or (at your option) any later version.
9 
10  Vc is distributed in the hope that it will be useful, but
11  WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  GNU Lesser General Public License for more details.
14 
15  You should have received a copy of the GNU Lesser General Public
16  License along with Vc. If not, see <http://www.gnu.org/licenses/>.
17 
18 */
19 
20 /* The log implementations are based on code from Julien Pommier which carries the following
21  copyright information:
22  */
23 /*
24  Inspired by Intel Approximate Math library, and based on the
25  corresponding algorithms of the cephes math library
26 */
27 /* Copyright (C) 2007 Julien Pommier
28 
29  This software is provided 'as-is', without any express or implied
30  warranty. In no event will the authors be held liable for any damages
31  arising from the use of this software.
32 
33  Permission is granted to anyone to use this software for any purpose,
34  including commercial applications, and to alter it and redistribute it
35  freely, subject to the following restrictions:
36 
37  1. The origin of this software must not be misrepresented; you must not
38  claim that you wrote the original software. If you use this software
39  in a product, an acknowledgment in the product documentation would be
40  appreciated but is not required.
41  2. Altered source versions must be plainly marked as such, and must not be
42  misrepresented as being the original software.
43  3. This notice may not be removed or altered from any source distribution.
44 
45  (this is the zlib license)
46 */
47 
48 #ifndef VC_COMMON_LOGARITHM_H
49 #define VC_COMMON_LOGARITHM_H
50 
51 #include "macros.h"
52 namespace ROOT {
53 namespace Vc
54 {
55 namespace Common
56 {
57 #ifdef VC__USE_NAMESPACE
58 using Vc::VC__USE_NAMESPACE::Const;
59 using Vc::VC__USE_NAMESPACE::Vector;
60 namespace Internal
61 {
62  using namespace Vc::VC__USE_NAMESPACE::Internal;
63 } // namespace Internal
64 #endif
67 };
68 
69 template<LogarithmBase Base>
70 struct LogImpl
71 {
72  template<typename T> static Vc_ALWAYS_INLINE void log_series(Vector<T> &VC_RESTRICT x, typename Vector<T>::AsArg exponent) {
73  typedef Vector<T> V;
74  typedef Const<T> C;
75  // Taylor series around x = 2^exponent
76  // f(x) = ln(x) → exponent * ln(2) → C::ln2_small + C::ln2_large
77  // f'(x) = x⁻¹ → x → 1
78  // f''(x) = - x⁻² → -x² / 2 → C::_1_2()
79  // = 2!x⁻³ → x³ / 3 → C::P(8)
80  // = -3!x⁻⁴ → -x⁴ / 4 → C::P(7)
81  // = 4!x⁻⁵ → x⁵ / 5 → C::P(6)
82  // ...
83  // The high order coefficients are adjusted to reduce the error that occurs from ommission
84  // of higher order terms.
85  // P(0) is the smallest term and |x| < 1 ⇒ |xⁿ| > |xⁿ⁺¹|
86  // The order of additions must go from smallest to largest terms
87  const V x2 = x * x; // 0 → 4
88 #ifdef VC_LOG_ILP
89  V y2 = (C::P(6) * /*4 → 8*/ x2 + /* 8 → 11*/ C::P(7) * /*1 → 5*/ x) + /*11 → 14*/ C::P(8);
90  V y0 = (C::P(0) * /*5 → 9*/ x2 + /* 9 → 12*/ C::P(1) * /*2 → 6*/ x) + /*12 → 15*/ C::P(2);
91  V y1 = (C::P(3) * /*6 → 10*/ x2 + /*10 → 13*/ C::P(4) * /*3 → 7*/ x) + /*13 → 16*/ C::P(5);
92  const V x3 = x2 * x; // 7 → 11
93  const V x6 = x3 * x3; // 11 → 15
94  const V x9 = x6 * x3; // 15 → 19
95  V y = (y0 * /*19 → 23*/ x9 + /*23 → 26*/ y1 * /*16 → 20*/ x6) + /*26 → 29*/ y2 * /*14 → 18*/ x3;
96 #elif defined VC_LOG_ILP2
97  /*
98  * name start done
99  * movaps %xmm0, %xmm1 ; x 0 1
100  * movaps %xmm0, %xmm2 ; x 0 1
101  * mulps %xmm1, %xmm1 ; x2 1 5 *xmm1
102  * movaps <P8>, %xmm15 ; y8 1 2
103  * mulps %xmm1, %xmm2 ; x3 5 9 *xmm2
104  * movaps %xmm1, %xmm3 ; x2 5 6
105  * movaps %xmm1, %xmm4 ; x2 5 6
106  * mulps %xmm3, %xmm3 ; x4 6 10 *xmm3
107  * movaps %xmm2, %xmm5 ; x3 9 10
108  * movaps %xmm2, %xmm6 ; x3 9 10
109  * mulps %xmm2, %xmm4 ; x5 9 13 *xmm4
110  * movaps %xmm3, %xmm7 ; x4 10 11
111  * movaps %xmm3, %xmm8 ; x4 10 11
112  * movaps %xmm3, %xmm9 ; x4 10 11
113  * mulps %xmm5, %xmm5 ; x6 10 14 *xmm5
114  * mulps %xmm3, %xmm6 ; x7 11 15 *xmm6
115  * mulps %xmm7, %xmm7 ; x8 12 16 *xmm7
116  * movaps %xmm4, %xmm10 ; x5 13 14
117  * mulps %xmm4, %xmm8 ; x9 13 17 *xmm8
118  * mulps %xmm5, %xmm10 ; x11 14 18 *xmm10
119  * mulps %xmm5, %xmm9 ; x10 15 19 *xmm9
120  * mulps <P0>, %xmm10 ; y0 18 22
121  * mulps <P1>, %xmm9 ; y1 19 23
122  * mulps <P2>, %xmm8 ; y2 20 24
123  * mulps <P3>, %xmm7 ; y3 21 25
124  * addps %xmm10, %xmm9 ; y 23 26
125  * addps %xmm9, %xmm8 ; y 26 29
126  * addps %xmm8, %xmm7 ; y 29 32
127  */
128  const V x3 = x2 * x; // 4 → 8
129  const V x4 = x2 * x2; // 5 → 9
130  const V x5 = x2 * x3; // 8 → 12
131  const V x6 = x3 * x3; // 9 → 13
132  const V x7 = x4 * x3; //
133  const V x8 = x4 * x4;
134  const V x9 = x5 * x4;
135  const V x10 = x5 * x5;
136  const V x11 = x5 * x6; // 13 → 17
137  V y = C::P(0) * x11 + C::P(1) * x10 + C::P(2) * x9 + C::P(3) * x8 + C::P(4) * x7
138  + C::P(5) * x6 + C::P(6) * x5 + C::P(7) * x4 + C::P(8) * x3;
139 #else
140  V y = C::P(0);
141  unrolled_loop16(i, 1, 9,
142  y = y * x + C::P(i);
143  );
144  y *= x * x2;
145 #endif
146  switch (Base) {
147  case BaseE:
148  // ln(2) is split in two parts to increase precision (i.e. ln2_small + ln2_large = ln(2))
149  y += exponent * C::ln2_small();
150  y -= x2 * C::_1_2(); // [0, 0.25[
151  x += y;
152  x += exponent * C::ln2_large();
153  break;
154  case Base10:
155  y += exponent * C::ln2_small();
156  y -= x2 * C::_1_2(); // [0, 0.25[
157  x += y;
158  x += exponent * C::ln2_large();
159  x *= C::log10_e();
160  break;
161  case Base2:
162  {
163  const V x_ = x;
164  x *= C::log2_e();
165  y *= C::log2_e();
166  y -= x_ * x * C::_1_2(); // [0, 0.25[
167  x += y;
168  x += exponent;
169  break;
170  }
171  }
172  }
173 
174  static Vc_ALWAYS_INLINE void log_series(Vector<double> &VC_RESTRICT x, Vector<double>::AsArg exponent) {
175  typedef Vector<double> V;
176  typedef Const<double> C;
177  const V x2 = x * x;
178  V y = C::P(0);
179  V y2 = C::Q(0) + x;
180  unrolled_loop16(i, 1, 5,
181  y = y * x + C::P(i);
182  y2 = y2 * x + C::Q(i);
183  );
184  y2 = x / y2;
185  y = y * x + C::P(5);
186  y = x2 * y * y2;
187  // TODO: refactor the following with the float implementation:
188  switch (Base) {
189  case BaseE:
190  // ln(2) is split in two parts to increase precision (i.e. ln2_small + ln2_large = ln(2))
191  y += exponent * C::ln2_small();
192  y -= x2 * C::_1_2(); // [0, 0.25[
193  x += y;
194  x += exponent * C::ln2_large();
195  break;
196  case Base10:
197  y += exponent * C::ln2_small();
198  y -= x2 * C::_1_2(); // [0, 0.25[
199  x += y;
200  x += exponent * C::ln2_large();
201  x *= C::log10_e();
202  break;
203  case Base2:
204  {
205  const V x_ = x;
206  x *= C::log2_e();
207  y *= C::log2_e();
208  y -= x_ * x * C::_1_2(); // [0, 0.25[
209  x += y;
210  x += exponent;
211  break;
212  }
213  }
214  }
215 
216  template<typename T> static inline Vector<T> calc(VC_ALIGNED_PARAMETER(Vector<T>) _x) {
217  typedef Vector<T> V;
218  typedef typename V::Mask M;
219  typedef Const<T> C;
220 
221  V x(_x);
222 
223  const M invalidMask = x < V::Zero();
224  const M infinityMask = x == V::Zero();
225  const M denormal = x <= C::min();
226 
227  x(denormal) *= V(Vc_buildDouble(1, 0, 54)); // 2²⁵
228  V exponent = Internal::exponent(x.data()); // = ⎣log₂(x)⎦
229  exponent(denormal) -= 54;
230 
231  x.setZero(C::exponentMask()); // keep only the fractional part ⇒ x ∈ [1, 2[
232  x |= C::_1_2(); // and set the exponent to 2⁻¹ ⇒ x ∈ [½, 1[
233 
234  // split calculation in two cases:
235  // A: x ∈ [½, √½[
236  // B: x ∈ [√½, 1[
237  // √½ defines the point where Δe(x) := log₂(x) - ⎣log₂(x)⎦ = ½, i.e.
238  // log₂(√½) - ⎣log₂(√½)⎦ = ½ * -1 - ⎣½ * -1⎦ = -½ + 1 = ½
239 
240  const M smallX = x < C::_1_sqrt2();
241  x(smallX) += x; // => x ∈ [√½, 1[ ∪ [1.5, 1 + √½[
242  x -= V::One(); // => x ∈ [√½ - 1, 0[ ∪ [0.5, √½[
243  exponent(!smallX) += V::One();
244 
245  log_series(x, exponent); // A: (ˣ⁄₂ᵉ - 1, e) B: (ˣ⁄₂ᵉ⁺¹ - 1, e + 1)
246 
247  x.setQnan(invalidMask); // x < 0 → NaN
248  x(infinityMask) = C::neginf(); // x = 0 → -∞
249 
250  return x;
251  }
252 };
253 
254 template<typename T> static Vc_ALWAYS_INLINE Vc_CONST Vector<T> log(VC_ALIGNED_PARAMETER(Vector<T>) x) {
255  return LogImpl<BaseE>::calc(x);
256 }
257 template<typename T> static Vc_ALWAYS_INLINE Vc_CONST Vector<T> log10(VC_ALIGNED_PARAMETER(Vector<T>) x) {
258  return LogImpl<Base10>::calc(x);
259 }
260 template<typename T> static Vc_ALWAYS_INLINE Vc_CONST Vector<T> log2(VC_ALIGNED_PARAMETER(Vector<T>) x) {
261  return LogImpl<Base2>::calc(x);
262 }
263 } // namespace Common
264 #ifdef VC__USE_NAMESPACE
265 namespace VC__USE_NAMESPACE
266 {
267  using Vc::Common::log;
268  using Vc::Common::log10;
269  using Vc::Common::log2;
270 } // namespace VC__USE_NAMESPACE
271 #undef VC__USE_NAMESPACE
272 #endif
273 } // namespace Vc
274 } // namespace ROOT
275 #include "undomacros.h"
276 
277 #endif // VC_COMMON_LOGARITHM_H
static Vector< T > calc(VC_ALIGNED_PARAMETER(Vector< T >) _x)
Definition: logarithm.h:216
static Vc_ALWAYS_INLINE int_v min(const int_v &x, const int_v &y)
Definition: vector.h:433
static Vc_ALWAYS_INLINE Vc_CONST Vector< T > log2(VC_ALIGNED_PARAMETER(Vector< T >) x)
Definition: logarithm.h:260
static const float log2_e
Definition: exponential.h:45
static const double x2[5]
Double_t x[n]
Definition: legend1.C:17
static Vc_ALWAYS_INLINE Vc_CONST Vector< T > log(VC_ALIGNED_PARAMETER(Vector< T >) x)
Definition: logarithm.h:254
static const double x4[22]
#define VC_ALIGNED_PARAMETER(_Type)
Definition: macros.h:368
static double C[]
#define Vc_buildDouble(sign, mantissa, exponent)
Definition: macros.h:283
#define Vc_CONST
Definition: macros.h:133
static Vc_ALWAYS_INLINE Vc_CONST Vector< T > log10(VC_ALIGNED_PARAMETER(Vector< T >) x)
Definition: logarithm.h:257
static Vc_ALWAYS_INLINE void log_series(Vector< T > &VC_RESTRICT x, typename Vector< T >::AsArg exponent)
Definition: logarithm.h:72
#define Vc_ALWAYS_INLINE
Definition: macros.h:130
MyComplex< T > P(MyComplex< T > z, T c_real, T c_imag)
[MyComplex]
Definition: mandel.cpp:155
Double_t y[n]
Definition: legend1.C:17
#define VC_RESTRICT
Definition: macros.h:145
Vc_INTRINSIC Vc_CONST m256 exponent(param256 v)
Definition: vectorhelper.h:37
#define unrolled_loop16(_it_, _start_, _end_, _code_)
Definition: macros.h:183
#define VC__USE_NAMESPACE
Definition: math.h:115
static Vc_ALWAYS_INLINE void log_series(Vector< double > &VC_RESTRICT x, Vector< double >::AsArg exponent)
Definition: logarithm.h:174
static double Q[]
static const double x3[11]