docs/ascii__number_8h_source.html

#ifndef FASTFLOAT_ASCII_NUMBER_H

#define FASTFLOAT_ASCII_NUMBER_H


#include <cctype>

#include <cstdint>

#include <cstring>

#include <iterator>


#include "float_common.h"


namespace fast_float {


// Next function can be micro-optimized, but compilers are entirely

// able to optimize it well.

fastfloat_really_inline bool is_integer(char c)  noexcept  { return c >= '0' && c <= '9'; }


fastfloat_really_inline uint64_t byteswap(uint64_t val) {

  return (val & 0xFF00000000000000) >> 56

    | (val & 0x00FF000000000000) >> 40

    | (val & 0x0000FF0000000000) >> 24

    | (val & 0x000000FF00000000) >> 8

    | (val & 0x00000000FF000000) << 8

    | (val & 0x0000000000FF0000) << 24

    | (val & 0x000000000000FF00) << 40

    | (val & 0x00000000000000FF) << 56;

}


fastfloat_really_inline uint64_t read_u64(const char *chars) {

  uint64_t val;

  ::memcpy(&val, chars, sizeof(uint64_t));

#if FASTFLOAT_IS_BIG_ENDIAN == 1

  // Need to read as-if the number was in little-endian order.

  val = byteswap(val);

#endif

  return val;

}


fastfloat_really_inline void write_u64(uint8_t *chars, uint64_t val) {

#if FASTFLOAT_IS_BIG_ENDIAN == 1

  // Need to read as-if the number was in little-endian order.

  val = byteswap(val);

#endif

  ::memcpy(chars, &val, sizeof(uint64_t));

}


// credit  @aqrit

fastfloat_really_inline uint32_t  parse_eight_digits_unrolled(uint64_t val) {

  const uint64_t mask = 0x000000FF000000FF;

  const uint64_t mul1 = 0x000F424000000064; // 100 + (1000000ULL << 32)

  const uint64_t mul2 = 0x0000271000000001; // 1 + (10000ULL << 32)

  val -= 0x3030303030303030;

  val = (val * 10) + (val >> 8); // val = (val * 2561) >> 8;

  val = (((val & mask) * mul1) + (((val >> 16) & mask) * mul2)) >> 32;

  return uint32_t(val);

}


fastfloat_really_inline uint32_t parse_eight_digits_unrolled(const char *chars)  noexcept  {

  return parse_eight_digits_unrolled(read_u64(chars));

}


// credit @aqrit

fastfloat_really_inline bool is_made_of_eight_digits_fast(uint64_t val)  noexcept  {

  return !((((val + 0x4646464646464646) | (val - 0x3030303030303030)) &

     0x8080808080808080));

}


fastfloat_really_inline bool is_made_of_eight_digits_fast(const char *chars)  noexcept  {

  return is_made_of_eight_digits_fast(read_u64(chars));

}


typedef span<const char> byte_span;


struct parsed_number_string {

  int64_t exponent{0};

  uint64_t mantissa{0};

  const char *lastmatch{nullptr};

  bool negative{false};

  bool valid{false};

  bool too_many_digits{false};

  // contains the range of the significant digits

  byte_span integer{};  // non-nullable

  byte_span fraction{}; // nullable

};


// Assuming that you use no more than 19 digits, this will

// parse an ASCII string.

fastfloat_really_inline

parsed_number_string parse_number_string(const char *p, const char *pend, parse_options options) noexcept {

  const chars_format fmt = options.format;

  const char decimal_point = options.decimal_point;


  parsed_number_string answer;

  answer.valid = false;

  answer.too_many_digits = false;

  answer.negative = (*p == '-');

  if (*p == '-') { // C++17 20.19.3.(7.1) explicitly forbids '+' sign here

    ++p;

    if (p == pend) {

      return answer;

    }

    if (!is_integer(*p) && (*p != decimal_point)) { // a sign must be followed by an integer or the dot

      return answer;

    }

  }

  const char *const start_digits = p;


  uint64_t i = 0; // an unsigned int avoids signed overflows (which are bad)


  while ((std::distance(p, pend) >= 8) && is_made_of_eight_digits_fast(p)) {

    i = i * 100000000 + parse_eight_digits_unrolled(p); // in rare cases, this will overflow, but that's ok

    p += 8;

  }

  while ((p != pend) && is_integer(*p)) {

    // a multiplication by 10 is cheaper than an arbitrary integer

    // multiplication

    i = 10 * i +

        uint64_t(*p - '0'); // might overflow, we will handle the overflow later

    ++p;

  }

  const char *const end_of_integer_part = p;

  int64_t digit_count = int64_t(end_of_integer_part - start_digits);

  answer.integer = byte_span(start_digits, size_t(digit_count));

  int64_t exponent = 0;

  if ((p != pend) && (*p == decimal_point)) {

    ++p;

    const char* before = p;

    // can occur at most twice without overflowing, but let it occur more, since

    // for integers with many digits, digit parsing is the primary bottleneck.

    while ((std::distance(p, pend) >= 8) && is_made_of_eight_digits_fast(p)) {

      i = i * 100000000 + parse_eight_digits_unrolled(p); // in rare cases, this will overflow, but that's ok

      p += 8;

    }

    while ((p != pend) && is_integer(*p)) {

      uint8_t digit = uint8_t(*p - '0');

      ++p;

      i = i * 10 + digit; // in rare cases, this will overflow, but that's ok

    }

    exponent = before - p;

    answer.fraction = byte_span(before, size_t(p - before));

    digit_count -= exponent;

  }

  // we must have encountered at least one integer!

  if (digit_count == 0) {

    return answer;

  }

  int64_t exp_number = 0;            // explicit exponential part

  if ((fmt & chars_format::scientific) && (p != pend) && (('e' == *p) || ('E' == *p))) {

    const char * location_of_e = p;

    ++p;

    bool neg_exp = false;

    if ((p != pend) && ('-' == *p)) {

      neg_exp = true;

      ++p;

    } else if ((p != pend) && ('+' == *p)) { // '+' on exponent is allowed by C++17 20.19.3.(7.1)

      ++p;

    }

    if ((p == pend) || !is_integer(*p)) {

      if(!(fmt & chars_format::fixed)) {

        // We are in error.

        return answer;

      }

      // Otherwise, we will be ignoring the 'e'.

      p = location_of_e;

    } else {

      while ((p != pend) && is_integer(*p)) {

        uint8_t digit = uint8_t(*p - '0');

        if (exp_number < 0x10000000) {

          exp_number = 10 * exp_number + digit;

        }

        ++p;

      }

      if(neg_exp) { exp_number = - exp_number; }

      exponent += exp_number;

    }

  } else {

    // If it scientific and not fixed, we have to bail out.

    if((fmt & chars_format::scientific) && !(fmt & chars_format::fixed)) { return answer; }

  }

  answer.lastmatch = p;

  answer.valid = true;


  // If we frequently had to deal with long strings of digits,

  // we could extend our code by using a 128-bit integer instead

  // of a 64-bit integer. However, this is uncommon.

  //

  // We can deal with up to 19 digits.

  if (digit_count > 19) { // this is uncommon

    // It is possible that the integer had an overflow.

    // We have to handle the case where we have 0.0000somenumber.

    // We need to be mindful of the case where we only have zeroes...

    // E.g., 0.000000000...000.

    const char *start = start_digits;

    while ((start != pend) && (*start == '0' || *start == decimal_point)) {

      if(*start == '0') { digit_count --; }

      start++;

    }

    if (digit_count > 19) {

      answer.too_many_digits = true;

      // Let us start again, this time, avoiding overflows.

      // We don't need to check if is_integer, since we use the

      // pre-tokenized spans from above.

      i = 0;

      p = answer.integer.ptr;

      const char* int_end = p + answer.integer.len();

      const uint64_t minimal_nineteen_digit_integer{1000000000000000000};

      while((i < minimal_nineteen_digit_integer) && (p != int_end)) {

        i = i * 10 + uint64_t(*p - '0');

        ++p;

      }

      if (i >= minimal_nineteen_digit_integer) { // We have a big integers

        exponent = end_of_integer_part - p + exp_number;

      } else { // We have a value with a fractional component.

          p = answer.fraction.ptr;

          const char* frac_end = p + answer.fraction.len();

          while((i < minimal_nineteen_digit_integer) && (p != frac_end)) {

            i = i * 10 + uint64_t(*p - '0');

            ++p;

          }

          exponent = answer.fraction.ptr - p + exp_number;

      }

      // We have now corrected both exponent and i, to a truncated value

    }

  }

  answer.exponent = exponent;

  answer.mantissa = i;

  return answer;

}


} // namespace fast_float


#endif

float_common.h

fastfloat_really_inline
#define fastfloat_really_inline
Definition: float_common.h:76

fast_float
Definition: ascii_number.h:11

fast_float::parse_eight_digits_unrolled
fastfloat_really_inline uint32_t parse_eight_digits_unrolled(uint64_t val)
Definition: ascii_number.h:47

fast_float::byteswap
fastfloat_really_inline uint64_t byteswap(uint64_t val)
Definition: ascii_number.h:17

fast_float::chars_format
chars_format
Definition: fast_float.h:7

fast_float::scientific
@ scientific
Definition: fast_float.h:8

fast_float::fixed
@ fixed
Definition: fast_float.h:9

fast_float::byte_span
span< const char > byte_span
Definition: ascii_number.h:71

fast_float::write_u64
fastfloat_really_inline void write_u64(uint8_t *chars, uint64_t val)
Definition: ascii_number.h:38

fast_float::parse_number_string
fastfloat_really_inline parsed_number_string parse_number_string(const char *p, const char *pend, parse_options options) noexcept
Definition: ascii_number.h:88

fast_float::read_u64
fastfloat_really_inline uint64_t read_u64(const char *chars)
Definition: ascii_number.h:28

fast_float::is_integer
fastfloat_really_inline bool is_integer(char c) noexcept
Definition: ascii_number.h:15

fast_float::is_made_of_eight_digits_fast
fastfloat_really_inline bool is_made_of_eight_digits_fast(uint64_t val) noexcept
Definition: ascii_number.h:62

fast_float::parse_options
Definition: fast_float.h:20

fast_float::parsed_number_string
Definition: ascii_number.h:73

fast_float::parsed_number_string::mantissa
uint64_t mantissa
Definition: ascii_number.h:75

fast_float::parsed_number_string::integer
byte_span integer
Definition: ascii_number.h:81

fast_float::parsed_number_string::lastmatch
const char * lastmatch
Definition: ascii_number.h:76

fast_float::parsed_number_string::fraction
byte_span fraction
Definition: ascii_number.h:82

fast_float::parsed_number_string::valid
bool valid
Definition: ascii_number.h:78

fast_float::parsed_number_string::too_many_digits
bool too_many_digits
Definition: ascii_number.h:79

fast_float::parsed_number_string::negative
bool negative
Definition: ascii_number.h:77

fast_float::parsed_number_string::exponent
int64_t exponent
Definition: ascii_number.h:74

fast_float::span< const char >

fast_float::span::ptr
const T * ptr
Definition: float_common.h:110

fast_float::span::len
constexpr size_t len() const noexcept
Definition: float_common.h:115