NumeRe v1.1.4
NumeRe: Framework für Numerische Rechnungen
float_common.h
Go to the documentation of this file.
1#ifndef FASTFLOAT_FLOAT_COMMON_H
2#define FASTFLOAT_FLOAT_COMMON_H
3
4#include <cfloat>
5#include <cstdint>
6#include <cassert>
7#include <cstring>
8#include <type_traits>
9
10#if (defined(__x86_64) || defined(__x86_64__) || defined(_M_X64) \
11 || defined(__amd64) || defined(__aarch64__) || defined(_M_ARM64) \
12 || defined(__MINGW64__) \
13 || defined(__s390x__) \
14 || (defined(__ppc64__) || defined(__PPC64__) || defined(__ppc64le__) || defined(__PPC64LE__)) )
15#define FASTFLOAT_64BIT
16#elif (defined(__i386) || defined(__i386__) || defined(_M_IX86) \
17 || defined(__arm__) || defined(_M_ARM) \
18 || defined(__MINGW32__) || defined(__EMSCRIPTEN__))
19#define FASTFLOAT_32BIT
20#else
21 // Need to check incrementally, since SIZE_MAX is a size_t, avoid overflow.
22 // We can never tell the register width, but the SIZE_MAX is a good approximation.
23 // UINTPTR_MAX and INTPTR_MAX are optional, so avoid them for max portability.
24 #if SIZE_MAX == 0xffff
25 #error Unknown platform (16-bit, unsupported)
26 #elif SIZE_MAX == 0xffffffff
27 #define FASTFLOAT_32BIT
28 #elif SIZE_MAX == 0xffffffffffffffff
29 #define FASTFLOAT_64BIT
30 #else
31 #error Unknown platform (not 32-bit, not 64-bit?)
32 #endif
33#endif
34
35#if ((defined(_WIN32) || defined(_WIN64)) && !defined(__clang__))
36//#include <intrin.h>
37#endif
38
39#if defined(_MSC_VER) && !defined(__clang__)
40#define FASTFLOAT_VISUAL_STUDIO 1
41#endif
42
43#if defined __BYTE_ORDER__ && defined __ORDER_BIG_ENDIAN__
44#define FASTFLOAT_IS_BIG_ENDIAN (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
45#elif defined _WIN32
46#define FASTFLOAT_IS_BIG_ENDIAN 0
47#else
48#if defined(__APPLE__) || defined(__FreeBSD__)
49#include <machine/endian.h>
50#elif defined(sun) || defined(__sun)
51#include <sys/byteorder.h>
52#else
53#include <endian.h>
54#endif
55#
56#ifndef __BYTE_ORDER__
57// safe choice
58#define FASTFLOAT_IS_BIG_ENDIAN 0
59#endif
60#
61#ifndef __ORDER_LITTLE_ENDIAN__
62// safe choice
63#define FASTFLOAT_IS_BIG_ENDIAN 0
64#endif
65#
66#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
67#define FASTFLOAT_IS_BIG_ENDIAN 0
68#else
69#define FASTFLOAT_IS_BIG_ENDIAN 1
70#endif
71#endif
72
73#ifdef FASTFLOAT_VISUAL_STUDIO
74#define fastfloat_really_inline __forceinline
75#else
76#define fastfloat_really_inline inline __attribute__((always_inline))
77#endif
78
79#ifndef FASTFLOAT_ASSERT
80#define FASTFLOAT_ASSERT(x) { if (!(x)) abort(); }
81#endif
82
83#ifndef FASTFLOAT_DEBUG_ASSERT
84#include <cassert>
85#define FASTFLOAT_DEBUG_ASSERT(x) assert(x)
86#endif
87
88// rust style `try!()` macro, or `?` operator
89#define FASTFLOAT_TRY(x) { if (!(x)) return false; }
90
91namespace fast_float {
92
93// Compares two ASCII strings in a case insensitive manner.
94inline bool fastfloat_strncasecmp(const char *input1, const char *input2,
95 size_t length) {
96 char running_diff{0};
97 for (size_t i = 0; i < length; i++) {
98 running_diff |= (input1[i] ^ input2[i]);
99 }
100 return (running_diff == 0) || (running_diff == 32);
101}
102
103#ifndef FLT_EVAL_METHOD
104#error "FLT_EVAL_METHOD should be defined, please include cfloat."
105#endif
106
107// a pointer and a length to a contiguous block of memory
108template <typename T>
109struct span {
110 const T* ptr;
111 size_t length;
112 span(const T* _ptr, size_t _length) : ptr(_ptr), length(_length) {}
113 span() : ptr(nullptr), length(0) {}
114
115 constexpr size_t len() const noexcept {
116 return length;
117 }
118
119 const T& operator[](size_t index) const noexcept {
121 return ptr[index];
122 }
123};
124
125struct value128 {
126 uint64_t low;
127 uint64_t high;
128 value128(uint64_t _low, uint64_t _high) : low(_low), high(_high) {}
129 value128() : low(0), high(0) {}
130};
131
132/* result might be undefined when input_num is zero */
134 assert(input_num > 0);
135#ifdef FASTFLOAT_VISUAL_STUDIO
136 #if defined(_M_X64) || defined(_M_ARM64)
137 unsigned long leading_zero = 0;
138 // Search the mask data from most significant bit (MSB)
139 // to least significant bit (LSB) for a set bit (1).
140 _BitScanReverse64(&leading_zero, input_num);
141 return (int)(63 - leading_zero);
142 #else
143 int last_bit = 0;
144 if(input_num & uint64_t(0xffffffff00000000)) input_num >>= 32, last_bit |= 32;
145 if(input_num & uint64_t( 0xffff0000)) input_num >>= 16, last_bit |= 16;
146 if(input_num & uint64_t( 0xff00)) input_num >>= 8, last_bit |= 8;
147 if(input_num & uint64_t( 0xf0)) input_num >>= 4, last_bit |= 4;
148 if(input_num & uint64_t( 0xc)) input_num >>= 2, last_bit |= 2;
149 if(input_num & uint64_t( 0x2)) input_num >>= 1, last_bit |= 1;
150 return 63 - last_bit;
151 #endif
152#else
153 return __builtin_clzll(input_num);
154#endif
155}
156
157#ifdef FASTFLOAT_32BIT
158
159// slow emulation routine for 32-bit
160fastfloat_really_inline uint64_t emulu(uint32_t x, uint32_t y) {
161 return x * (uint64_t)y;
162}
163
164// slow emulation routine for 32-bit
165#if !defined(__MINGW64__)
166fastfloat_really_inline uint64_t _umul128(uint64_t ab, uint64_t cd,
167 uint64_t *hi) {
168 uint64_t ad = emulu((uint32_t)(ab >> 32), (uint32_t)cd);
169 uint64_t bd = emulu((uint32_t)ab, (uint32_t)cd);
170 uint64_t adbc = ad + emulu((uint32_t)ab, (uint32_t)(cd >> 32));
171 uint64_t adbc_carry = !!(adbc < ad);
172 uint64_t lo = bd + (adbc << 32);
173 *hi = emulu((uint32_t)(ab >> 32), (uint32_t)(cd >> 32)) + (adbc >> 32) +
174 (adbc_carry << 32) + !!(lo < bd);
175 return lo;
176}
177#endif // !__MINGW64__
178
179#endif // FASTFLOAT_32BIT
180
181
182// compute 64-bit a*b
184 uint64_t b) {
185 value128 answer;
186#ifdef _M_ARM64
187 // ARM64 has native support for 64-bit multiplications, no need to emulate
188 answer.high = __umulh(a, b);
189 answer.low = a * b;
190#elif defined(FASTFLOAT_32BIT) || (defined(_WIN64) && !defined(__clang__))
191 answer.low = _umul128(a, b, &answer.high); // _umul128 not available on ARM64
192#elif defined(FASTFLOAT_64BIT)
193 __uint128_t r = ((__uint128_t)a) * b;
194 answer.low = uint64_t(r);
195 answer.high = uint64_t(r >> 64);
196#else
197 #error Not implemented
198#endif
199 return answer;
200}
201
203 uint64_t mantissa{0};
204 int32_t power2{0}; // a negative value indicates an invalid result
205 adjusted_mantissa() = default;
206 bool operator==(const adjusted_mantissa &o) const {
207 return mantissa == o.mantissa && power2 == o.power2;
208 }
209 bool operator!=(const adjusted_mantissa &o) const {
210 return mantissa != o.mantissa || power2 != o.power2;
211 }
212};
213
214// Bias so we can get the real exponent with an invalid adjusted_mantissa.
215constexpr static int32_t invalid_am_bias = -0x8000;
216
217constexpr static double powers_of_ten_double[] = {
218 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1e10, 1e11,
219 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19, 1e20, 1e21, 1e22};
220constexpr static float powers_of_ten_float[] = {1e0, 1e1, 1e2, 1e3, 1e4, 1e5,
221 1e6, 1e7, 1e8, 1e9, 1e10};
222
223template <typename T> struct binary_format {
224 using equiv_uint = typename std::conditional<sizeof(T) == 4, uint32_t, uint64_t>::type;
225
226 static inline constexpr int mantissa_explicit_bits();
227 static inline constexpr int minimum_exponent();
228 static inline constexpr int infinite_power();
229 static inline constexpr int sign_index();
230 static inline constexpr int min_exponent_fast_path();
231 static inline constexpr int max_exponent_fast_path();
232 static inline constexpr int max_exponent_round_to_even();
233 static inline constexpr int min_exponent_round_to_even();
234 static inline constexpr uint64_t max_mantissa_fast_path();
235 static inline constexpr int largest_power_of_ten();
236 static inline constexpr int smallest_power_of_ten();
237 static inline constexpr T exact_power_of_ten(int64_t power);
238 static inline constexpr size_t max_digits();
239 static inline constexpr equiv_uint exponent_mask();
240 static inline constexpr equiv_uint mantissa_mask();
241 static inline constexpr equiv_uint hidden_bit_mask();
242};
243
244template <> inline constexpr int binary_format<double>::mantissa_explicit_bits() {
245 return 52;
246}
247template <> inline constexpr int binary_format<float>::mantissa_explicit_bits() {
248 return 23;
249}
250
251template <> inline constexpr int binary_format<double>::max_exponent_round_to_even() {
252 return 23;
253}
254
255template <> inline constexpr int binary_format<float>::max_exponent_round_to_even() {
256 return 10;
257}
258
259template <> inline constexpr int binary_format<double>::min_exponent_round_to_even() {
260 return -4;
261}
262
263template <> inline constexpr int binary_format<float>::min_exponent_round_to_even() {
264 return -17;
265}
266
267template <> inline constexpr int binary_format<double>::minimum_exponent() {
268 return -1023;
269}
270template <> inline constexpr int binary_format<float>::minimum_exponent() {
271 return -127;
272}
273
274template <> inline constexpr int binary_format<double>::infinite_power() {
275 return 0x7FF;
276}
277template <> inline constexpr int binary_format<float>::infinite_power() {
278 return 0xFF;
279}
280
281template <> inline constexpr int binary_format<double>::sign_index() { return 63; }
282template <> inline constexpr int binary_format<float>::sign_index() { return 31; }
283
284template <> inline constexpr int binary_format<double>::min_exponent_fast_path() {
285#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0)
286 return 0;
287#else
288 return -22;
289#endif
290}
291template <> inline constexpr int binary_format<float>::min_exponent_fast_path() {
292#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0)
293 return 0;
294#else
295 return -10;
296#endif
297}
298
299template <> inline constexpr int binary_format<double>::max_exponent_fast_path() {
300 return 22;
301}
302template <> inline constexpr int binary_format<float>::max_exponent_fast_path() {
303 return 10;
304}
305
306template <> inline constexpr uint64_t binary_format<double>::max_mantissa_fast_path() {
307 return uint64_t(2) << mantissa_explicit_bits();
308}
309template <> inline constexpr uint64_t binary_format<float>::max_mantissa_fast_path() {
310 return uint64_t(2) << mantissa_explicit_bits();
311}
312
313template <>
314inline constexpr double binary_format<double>::exact_power_of_ten(int64_t power) {
316}
317template <>
318inline constexpr float binary_format<float>::exact_power_of_ten(int64_t power) {
319
321}
322
323
324template <>
326 return 308;
327}
328template <>
330 return 38;
331}
332
333template <>
335 return -342;
336}
337template <>
339 return -65;
340}
341
342template <> inline constexpr size_t binary_format<double>::max_digits() {
343 return 769;
344}
345template <> inline constexpr size_t binary_format<float>::max_digits() {
346 return 114;
347}
348
349template <> inline constexpr binary_format<float>::equiv_uint
351 return 0x7F800000;
352}
353template <> inline constexpr binary_format<double>::equiv_uint
355 return 0x7FF0000000000000;
356}
357
358template <> inline constexpr binary_format<float>::equiv_uint
360 return 0x007FFFFF;
361}
362template <> inline constexpr binary_format<double>::equiv_uint
364 return 0x000FFFFFFFFFFFFF;
365}
366
367template <> inline constexpr binary_format<float>::equiv_uint
369 return 0x00800000;
370}
371template <> inline constexpr binary_format<double>::equiv_uint
373 return 0x0010000000000000;
374}
375
376template<typename T>
377fastfloat_really_inline void to_float(bool negative, adjusted_mantissa am, T &value) {
378 uint64_t word = am.mantissa;
379 word |= uint64_t(am.power2) << binary_format<T>::mantissa_explicit_bits();
380 word = negative
381 ? word | (uint64_t(1) << binary_format<T>::sign_index()) : word;
382#if FASTFLOAT_IS_BIG_ENDIAN == 1
383 if (std::is_same<T, float>::value) {
384 ::memcpy(&value, (char *)&word + 4, sizeof(T)); // extract value at offset 4-7 if float on big-endian
385 } else {
386 ::memcpy(&value, &word, sizeof(T));
387 }
388#else
389 // For little-endian systems:
390 ::memcpy(&value, &word, sizeof(T));
391#endif
392}
393
394} // namespace fast_float
395
396#endif
#define FASTFLOAT_DEBUG_ASSERT(x)
Definition: float_common.h:85
#define fastfloat_really_inline
Definition: float_common.h:76
constexpr fastfloat_really_inline int32_t power(int32_t q) noexcept
fastfloat_really_inline void to_float(bool negative, adjusted_mantissa am, T &value)
Definition: float_common.h:377
fastfloat_really_inline int leading_zeroes(uint64_t input_num)
Definition: float_common.h:133
static constexpr int32_t invalid_am_bias
Definition: float_common.h:215
bool fastfloat_strncasecmp(const char *input1, const char *input2, size_t length)
Definition: float_common.h:94
static constexpr float powers_of_ten_float[]
Definition: float_common.h:220
fastfloat_really_inline value128 full_multiplication(uint64_t a, uint64_t b)
Definition: float_common.h:183
static constexpr double powers_of_ten_double[]
Definition: float_common.h:217
bool operator==(const adjusted_mantissa &o) const
Definition: float_common.h:206
bool operator!=(const adjusted_mantissa &o) const
Definition: float_common.h:209
static constexpr int mantissa_explicit_bits()
static constexpr int min_exponent_round_to_even()
typename std::conditional< sizeof(T)==4, uint32_t, uint64_t >::type equiv_uint
Definition: float_common.h:224
static constexpr int max_exponent_round_to_even()
static constexpr uint64_t max_mantissa_fast_path()
static constexpr equiv_uint hidden_bit_mask()
static constexpr int largest_power_of_ten()
static constexpr int max_exponent_fast_path()
static constexpr int sign_index()
static constexpr equiv_uint exponent_mask()
static constexpr size_t max_digits()
static constexpr equiv_uint mantissa_mask()
static constexpr T exact_power_of_ten(int64_t power)
static constexpr int smallest_power_of_ten()
static constexpr int infinite_power()
static constexpr int min_exponent_fast_path()
static constexpr int minimum_exponent()
const T & operator[](size_t index) const noexcept
Definition: float_common.h:119
constexpr size_t len() const noexcept
Definition: float_common.h:115
span(const T *_ptr, size_t _length)
Definition: float_common.h:112
value128(uint64_t _low, uint64_t _high)
Definition: float_common.h:128