NumeRe v1.1.4
NumeRe: Framework für Numerische Rechnungen
decimal_to_binary.h
Go to the documentation of this file.
1#ifndef FASTFLOAT_DECIMAL_TO_BINARY_H
2#define FASTFLOAT_DECIMAL_TO_BINARY_H
3
4#include "float_common.h"
5#include "fast_table.h"
6#include <cfloat>
7#include <cinttypes>
8#include <cmath>
9#include <cstdint>
10#include <cstdlib>
11#include <cstring>
12
13namespace fast_float {
14
15// This will compute or rather approximate w * 5**q and return a pair of 64-bit words approximating
16// the result, with the "high" part corresponding to the most significant bits and the
17// low part corresponding to the least significant bits.
18//
19template <int bit_precision>
22 const int index = 2 * int(q - powers::smallest_power_of_five);
23 // For small values of q, e.g., q in [0,27], the answer is always exact because
24 // The line value128 firstproduct = full_multiplication(w, power_of_five_128[index]);
25 // gives the exact answer.
27 static_assert((bit_precision >= 0) && (bit_precision <= 64), " precision should be in (0,64]");
28 constexpr uint64_t precision_mask = (bit_precision < 64) ?
29 (uint64_t(0xFFFFFFFFFFFFFFFF) >> bit_precision)
30 : uint64_t(0xFFFFFFFFFFFFFFFF);
31 if((firstproduct.high & precision_mask) == precision_mask) { // could further guard with (lower + w < lower)
32 // regarding the second product, we only need secondproduct.high, but our expectation is that the compiler will optimize this extra work away if needed.
33 value128 secondproduct = full_multiplication(w, powers::power_of_five_128[index + 1]);
34 firstproduct.low += secondproduct.high;
35 if(secondproduct.high > firstproduct.low) {
36 firstproduct.high++;
37 }
38 }
39 return firstproduct;
40}
41
42namespace detail {
58 constexpr fastfloat_really_inline int32_t power(int32_t q) noexcept {
59 return (((152170 + 65536) * q) >> 16) + 63;
60 }
61} // namespace detail
62
63// create an adjusted mantissa, biased by the invalid power2
64// for significant digits already multiplied by 10 ** q.
65template <typename binary>
67adjusted_mantissa compute_error_scaled(int64_t q, uint64_t w, int lz) noexcept {
68 int hilz = int(w >> 63) ^ 1;
69 adjusted_mantissa answer;
70 answer.mantissa = w << hilz;
71 int bias = binary::mantissa_explicit_bits() - binary::minimum_exponent();
72 answer.power2 = int32_t(detail::power(int32_t(q)) + bias - hilz - lz - 62 + invalid_am_bias);
73 return answer;
74}
75
76// w * 10 ** q, without rounding the representation up.
77// the power2 in the exponent will be adjusted by invalid_am_bias.
78template <typename binary>
80adjusted_mantissa compute_error(int64_t q, uint64_t w) noexcept {
81 int lz = leading_zeroes(w);
82 w <<= lz;
83 value128 product = compute_product_approximation<binary::mantissa_explicit_bits() + 3>(q, w);
84 return compute_error_scaled<binary>(q, product.high, lz);
85}
86
87// w * 10 ** q
88// The returned value should be a valid ieee64 number that simply need to be packed.
89// However, in some very rare cases, the computation will fail. In such cases, we
90// return an adjusted_mantissa with a negative power of 2: the caller should recompute
91// in such cases.
92template <typename binary>
94adjusted_mantissa compute_float(int64_t q, uint64_t w) noexcept {
95 adjusted_mantissa answer;
96 if ((w == 0) || (q < binary::smallest_power_of_ten())) {
97 answer.power2 = 0;
98 answer.mantissa = 0;
99 // result should be zero
100 return answer;
101 }
102 if (q > binary::largest_power_of_ten()) {
103 // we want to get infinity:
104 answer.power2 = binary::infinite_power();
105 answer.mantissa = 0;
106 return answer;
107 }
108 // At this point in time q is in [powers::smallest_power_of_five, powers::largest_power_of_five].
109
110 // We want the most significant bit of i to be 1. Shift if needed.
111 int lz = leading_zeroes(w);
112 w <<= lz;
113
114 // The required precision is binary::mantissa_explicit_bits() + 3 because
115 // 1. We need the implicit bit
116 // 2. We need an extra bit for rounding purposes
117 // 3. We might lose a bit due to the "upperbit" routine (result too small, requiring a shift)
118
119 value128 product = compute_product_approximation<binary::mantissa_explicit_bits() + 3>(q, w);
120 if(product.low == 0xFFFFFFFFFFFFFFFF) { // could guard it further
121 // In some very rare cases, this could happen, in which case we might need a more accurate
122 // computation that what we can provide cheaply. This is very, very unlikely.
123 //
124 const bool inside_safe_exponent = (q >= -27) && (q <= 55); // always good because 5**q <2**128 when q>=0,
125 // and otherwise, for q<0, we have 5**-q<2**64 and the 128-bit reciprocal allows for exact computation.
126 if(!inside_safe_exponent) {
127 return compute_error_scaled<binary>(q, product.high, lz);
128 }
129 }
130 // The "compute_product_approximation" function can be slightly slower than a branchless approach:
131 // value128 product = compute_product(q, w);
132 // but in practice, we can win big with the compute_product_approximation if its additional branch
133 // is easily predicted. Which is best is data specific.
134 int upperbit = int(product.high >> 63);
135
136 answer.mantissa = product.high >> (upperbit + 64 - binary::mantissa_explicit_bits() - 3);
137
138 answer.power2 = int32_t(detail::power(int32_t(q)) + upperbit - lz - binary::minimum_exponent());
139 if (answer.power2 <= 0) { // we have a subnormal?
140 // Here have that answer.power2 <= 0 so -answer.power2 >= 0
141 if(-answer.power2 + 1 >= 64) { // if we have more than 64 bits below the minimum exponent, you have a zero for sure.
142 answer.power2 = 0;
143 answer.mantissa = 0;
144 // result should be zero
145 return answer;
146 }
147 // next line is safe because -answer.power2 + 1 < 64
148 answer.mantissa >>= -answer.power2 + 1;
149 // Thankfully, we can't have both "round-to-even" and subnormals because
150 // "round-to-even" only occurs for powers close to 0.
151 answer.mantissa += (answer.mantissa & 1); // round up
152 answer.mantissa >>= 1;
153 // There is a weird scenario where we don't have a subnormal but just.
154 // Suppose we start with 2.2250738585072013e-308, we end up
155 // with 0x3fffffffffffff x 2^-1023-53 which is technically subnormal
156 // whereas 0x40000000000000 x 2^-1023-53 is normal. Now, we need to round
157 // up 0x3fffffffffffff x 2^-1023-53 and once we do, we are no longer
158 // subnormal, but we can only know this after rounding.
159 // So we only declare a subnormal if we are smaller than the threshold.
160 answer.power2 = (answer.mantissa < (uint64_t(1) << binary::mantissa_explicit_bits())) ? 0 : 1;
161 return answer;
162 }
163
164 // usually, we round *up*, but if we fall right in between and and we have an
165 // even basis, we need to round down
166 // We are only concerned with the cases where 5**q fits in single 64-bit word.
167 if ((product.low <= 1) && (q >= binary::min_exponent_round_to_even()) && (q <= binary::max_exponent_round_to_even()) &&
168 ((answer.mantissa & 3) == 1) ) { // we may fall between two floats!
169 // To be in-between two floats we need that in doing
170 // answer.mantissa = product.high >> (upperbit + 64 - binary::mantissa_explicit_bits() - 3);
171 // ... we dropped out only zeroes. But if this happened, then we can go back!!!
172 if((answer.mantissa << (upperbit + 64 - binary::mantissa_explicit_bits() - 3)) == product.high) {
173 answer.mantissa &= ~uint64_t(1); // flip it so that we do not round up
174 }
175 }
176
177 answer.mantissa += (answer.mantissa & 1); // round up
178 answer.mantissa >>= 1;
179 if (answer.mantissa >= (uint64_t(2) << binary::mantissa_explicit_bits())) {
180 answer.mantissa = (uint64_t(1) << binary::mantissa_explicit_bits());
181 answer.power2++; // undo previous addition
182 }
183
184 answer.mantissa &= ~(uint64_t(1) << binary::mantissa_explicit_bits());
185 if (answer.power2 >= binary::infinite_power()) { // infinity
186 answer.power2 = binary::infinite_power();
187 answer.mantissa = 0;
188 }
189 return answer;
190}
191
192} // namespace fast_float
193
194#endif
#define fastfloat_really_inline
Definition: float_common.h:76
constexpr fastfloat_really_inline int32_t power(int32_t q) noexcept
fastfloat_really_inline int leading_zeroes(uint64_t input_num)
Definition: float_common.h:133
static constexpr int32_t invalid_am_bias
Definition: float_common.h:215
fastfloat_really_inline value128 compute_product_approximation(int64_t q, uint64_t w)
fastfloat_really_inline adjusted_mantissa compute_error(int64_t q, uint64_t w) noexcept
fastfloat_really_inline adjusted_mantissa compute_float(int64_t q, uint64_t w) noexcept
fastfloat_really_inline value128 full_multiplication(uint64_t a, uint64_t b)
Definition: float_common.h:183
fastfloat_really_inline adjusted_mantissa compute_error_scaled(int64_t q, uint64_t w, int lz) noexcept
static constexpr int smallest_power_of_five
Definition: fast_table.h:35
static const uint64_t power_of_five_128[number_of_entries]
Definition: fast_table.h:39