NumeRe v1.1.4
NumeRe: Framework für Numerische Rechnungen
ascii_number.h
Go to the documentation of this file.
1#ifndef FASTFLOAT_ASCII_NUMBER_H
2#define FASTFLOAT_ASCII_NUMBER_H
3
4#include <cctype>
5#include <cstdint>
6#include <cstring>
7#include <iterator>
8
9#include "float_common.h"
10
11namespace fast_float {
12
13// Next function can be micro-optimized, but compilers are entirely
14// able to optimize it well.
15fastfloat_really_inline bool is_integer(char c) noexcept { return c >= '0' && c <= '9'; }
16
17fastfloat_really_inline uint64_t byteswap(uint64_t val) {
18 return (val & 0xFF00000000000000) >> 56
19 | (val & 0x00FF000000000000) >> 40
20 | (val & 0x0000FF0000000000) >> 24
21 | (val & 0x000000FF00000000) >> 8
22 | (val & 0x00000000FF000000) << 8
23 | (val & 0x0000000000FF0000) << 24
24 | (val & 0x000000000000FF00) << 40
25 | (val & 0x00000000000000FF) << 56;
26}
27
28fastfloat_really_inline uint64_t read_u64(const char *chars) {
29 uint64_t val;
30 ::memcpy(&val, chars, sizeof(uint64_t));
31#if FASTFLOAT_IS_BIG_ENDIAN == 1
32 // Need to read as-if the number was in little-endian order.
33 val = byteswap(val);
34#endif
35 return val;
36}
37
38fastfloat_really_inline void write_u64(uint8_t *chars, uint64_t val) {
39#if FASTFLOAT_IS_BIG_ENDIAN == 1
40 // Need to read as-if the number was in little-endian order.
41 val = byteswap(val);
42#endif
43 ::memcpy(chars, &val, sizeof(uint64_t));
44}
45
46// credit @aqrit
48 const uint64_t mask = 0x000000FF000000FF;
49 const uint64_t mul1 = 0x000F424000000064; // 100 + (1000000ULL << 32)
50 const uint64_t mul2 = 0x0000271000000001; // 1 + (10000ULL << 32)
51 val -= 0x3030303030303030;
52 val = (val * 10) + (val >> 8); // val = (val * 2561) >> 8;
53 val = (((val & mask) * mul1) + (((val >> 16) & mask) * mul2)) >> 32;
54 return uint32_t(val);
55}
56
57fastfloat_really_inline uint32_t parse_eight_digits_unrolled(const char *chars) noexcept {
59}
60
61// credit @aqrit
63 return !((((val + 0x4646464646464646) | (val - 0x3030303030303030)) &
64 0x8080808080808080));
65}
66
67fastfloat_really_inline bool is_made_of_eight_digits_fast(const char *chars) noexcept {
69}
70
72
74 int64_t exponent{0};
75 uint64_t mantissa{0};
76 const char *lastmatch{nullptr};
77 bool negative{false};
78 bool valid{false};
79 bool too_many_digits{false};
80 // contains the range of the significant digits
81 byte_span integer{}; // non-nullable
82 byte_span fraction{}; // nullable
83};
84
85// Assuming that you use no more than 19 digits, this will
86// parse an ASCII string.
88parsed_number_string parse_number_string(const char *p, const char *pend, parse_options options) noexcept {
89 const chars_format fmt = options.format;
90 const char decimal_point = options.decimal_point;
91
93 answer.valid = false;
94 answer.too_many_digits = false;
95 answer.negative = (*p == '-');
96 if (*p == '-') { // C++17 20.19.3.(7.1) explicitly forbids '+' sign here
97 ++p;
98 if (p == pend) {
99 return answer;
100 }
101 if (!is_integer(*p) && (*p != decimal_point)) { // a sign must be followed by an integer or the dot
102 return answer;
103 }
104 }
105 const char *const start_digits = p;
106
107 uint64_t i = 0; // an unsigned int avoids signed overflows (which are bad)
108
109 while ((std::distance(p, pend) >= 8) && is_made_of_eight_digits_fast(p)) {
110 i = i * 100000000 + parse_eight_digits_unrolled(p); // in rare cases, this will overflow, but that's ok
111 p += 8;
112 }
113 while ((p != pend) && is_integer(*p)) {
114 // a multiplication by 10 is cheaper than an arbitrary integer
115 // multiplication
116 i = 10 * i +
117 uint64_t(*p - '0'); // might overflow, we will handle the overflow later
118 ++p;
119 }
120 const char *const end_of_integer_part = p;
121 int64_t digit_count = int64_t(end_of_integer_part - start_digits);
122 answer.integer = byte_span(start_digits, size_t(digit_count));
123 int64_t exponent = 0;
124 if ((p != pend) && (*p == decimal_point)) {
125 ++p;
126 const char* before = p;
127 // can occur at most twice without overflowing, but let it occur more, since
128 // for integers with many digits, digit parsing is the primary bottleneck.
129 while ((std::distance(p, pend) >= 8) && is_made_of_eight_digits_fast(p)) {
130 i = i * 100000000 + parse_eight_digits_unrolled(p); // in rare cases, this will overflow, but that's ok
131 p += 8;
132 }
133 while ((p != pend) && is_integer(*p)) {
134 uint8_t digit = uint8_t(*p - '0');
135 ++p;
136 i = i * 10 + digit; // in rare cases, this will overflow, but that's ok
137 }
138 exponent = before - p;
139 answer.fraction = byte_span(before, size_t(p - before));
140 digit_count -= exponent;
141 }
142 // we must have encountered at least one integer!
143 if (digit_count == 0) {
144 return answer;
145 }
146 int64_t exp_number = 0; // explicit exponential part
147 if ((fmt & chars_format::scientific) && (p != pend) && (('e' == *p) || ('E' == *p))) {
148 const char * location_of_e = p;
149 ++p;
150 bool neg_exp = false;
151 if ((p != pend) && ('-' == *p)) {
152 neg_exp = true;
153 ++p;
154 } else if ((p != pend) && ('+' == *p)) { // '+' on exponent is allowed by C++17 20.19.3.(7.1)
155 ++p;
156 }
157 if ((p == pend) || !is_integer(*p)) {
158 if(!(fmt & chars_format::fixed)) {
159 // We are in error.
160 return answer;
161 }
162 // Otherwise, we will be ignoring the 'e'.
163 p = location_of_e;
164 } else {
165 while ((p != pend) && is_integer(*p)) {
166 uint8_t digit = uint8_t(*p - '0');
167 if (exp_number < 0x10000000) {
168 exp_number = 10 * exp_number + digit;
169 }
170 ++p;
171 }
172 if(neg_exp) { exp_number = - exp_number; }
173 exponent += exp_number;
174 }
175 } else {
176 // If it scientific and not fixed, we have to bail out.
177 if((fmt & chars_format::scientific) && !(fmt & chars_format::fixed)) { return answer; }
178 }
179 answer.lastmatch = p;
180 answer.valid = true;
181
182 // If we frequently had to deal with long strings of digits,
183 // we could extend our code by using a 128-bit integer instead
184 // of a 64-bit integer. However, this is uncommon.
185 //
186 // We can deal with up to 19 digits.
187 if (digit_count > 19) { // this is uncommon
188 // It is possible that the integer had an overflow.
189 // We have to handle the case where we have 0.0000somenumber.
190 // We need to be mindful of the case where we only have zeroes...
191 // E.g., 0.000000000...000.
192 const char *start = start_digits;
193 while ((start != pend) && (*start == '0' || *start == decimal_point)) {
194 if(*start == '0') { digit_count --; }
195 start++;
196 }
197 if (digit_count > 19) {
198 answer.too_many_digits = true;
199 // Let us start again, this time, avoiding overflows.
200 // We don't need to check if is_integer, since we use the
201 // pre-tokenized spans from above.
202 i = 0;
203 p = answer.integer.ptr;
204 const char* int_end = p + answer.integer.len();
205 const uint64_t minimal_nineteen_digit_integer{1000000000000000000};
206 while((i < minimal_nineteen_digit_integer) && (p != int_end)) {
207 i = i * 10 + uint64_t(*p - '0');
208 ++p;
209 }
210 if (i >= minimal_nineteen_digit_integer) { // We have a big integers
211 exponent = end_of_integer_part - p + exp_number;
212 } else { // We have a value with a fractional component.
213 p = answer.fraction.ptr;
214 const char* frac_end = p + answer.fraction.len();
215 while((i < minimal_nineteen_digit_integer) && (p != frac_end)) {
216 i = i * 10 + uint64_t(*p - '0');
217 ++p;
218 }
219 exponent = answer.fraction.ptr - p + exp_number;
220 }
221 // We have now corrected both exponent and i, to a truncated value
222 }
223 }
224 answer.exponent = exponent;
225 answer.mantissa = i;
226 return answer;
227}
228
229} // namespace fast_float
230
231#endif
#define fastfloat_really_inline
Definition: float_common.h:76
fastfloat_really_inline uint32_t parse_eight_digits_unrolled(uint64_t val)
Definition: ascii_number.h:47
fastfloat_really_inline uint64_t byteswap(uint64_t val)
Definition: ascii_number.h:17
@ scientific
Definition: fast_float.h:8
span< const char > byte_span
Definition: ascii_number.h:71
fastfloat_really_inline void write_u64(uint8_t *chars, uint64_t val)
Definition: ascii_number.h:38
fastfloat_really_inline parsed_number_string parse_number_string(const char *p, const char *pend, parse_options options) noexcept
Definition: ascii_number.h:88
fastfloat_really_inline uint64_t read_u64(const char *chars)
Definition: ascii_number.h:28
fastfloat_really_inline bool is_integer(char c) noexcept
Definition: ascii_number.h:15
fastfloat_really_inline bool is_made_of_eight_digits_fast(uint64_t val) noexcept
Definition: ascii_number.h:62
constexpr size_t len() const noexcept
Definition: float_common.h:115