mirror of
https://codeberg.org/ziglang/zig.git
synced 2025-12-06 22:04:21 +00:00
293 lines
9.2 KiB
Zig
293 lines
9.2 KiB
Zig
const std = @import("std");
|
|
const common = @import("common.zig");
|
|
const FloatStream = @import("FloatStream.zig");
|
|
const isEightDigits = common.isEightDigits;
|
|
const Number = common.Number;
|
|
|
|
/// Parse 8 digits, loaded as bytes in little-endian order.
|
|
///
|
|
/// This uses the trick where every digit is in [0x030, 0x39],
|
|
/// and therefore can be parsed in 3 multiplications, much
|
|
/// faster than the normal 8.
|
|
///
|
|
/// This is based off the algorithm described in "Fast numeric string to
|
|
/// int", available here: <https://johnnylee-sde.github.io/Fast-numeric-string-to-int/>.
|
|
fn parse8Digits(v_: u64) u64 {
|
|
var v = v_;
|
|
const mask = 0x0000_00ff_0000_00ff;
|
|
const mul1 = 0x000f_4240_0000_0064;
|
|
const mul2 = 0x0000_2710_0000_0001;
|
|
v -= 0x3030_3030_3030_3030;
|
|
v = (v * 10) + (v >> 8); // will not overflow, fits in 63 bits
|
|
const v1 = (v & mask) *% mul1;
|
|
const v2 = ((v >> 16) & mask) *% mul2;
|
|
return @as(u64, @as(u32, @truncate((v1 +% v2) >> 32)));
|
|
}
|
|
|
|
/// Parse digits until a non-digit character is found.
|
|
fn tryParseDigits(comptime T: type, stream: *FloatStream, x: *T, comptime base: u8) void {
|
|
// Try to parse 8 digits at a time, using an optimized algorithm.
|
|
// This only supports decimal digits.
|
|
if (base == 10) {
|
|
while (stream.hasLen(8)) {
|
|
const v = stream.readU64Unchecked();
|
|
if (!isEightDigits(v)) {
|
|
break;
|
|
}
|
|
|
|
x.* = x.* *% 1_0000_0000 +% parse8Digits(v);
|
|
stream.advance(8);
|
|
}
|
|
}
|
|
|
|
while (stream.scanDigit(base)) |digit| {
|
|
x.* *%= base;
|
|
x.* +%= digit;
|
|
}
|
|
}
|
|
|
|
fn min_n_digit_int(comptime T: type, digit_count: usize) T {
|
|
var n: T = 1;
|
|
var i: usize = 1;
|
|
while (i < digit_count) : (i += 1) n *= 10;
|
|
return n;
|
|
}
|
|
|
|
/// Parse up to N digits
|
|
fn tryParseNDigits(comptime T: type, stream: *FloatStream, x: *T, comptime base: u8, comptime n: usize) void {
|
|
while (x.* < min_n_digit_int(T, n)) {
|
|
if (stream.scanDigit(base)) |digit| {
|
|
x.* *%= base;
|
|
x.* +%= digit;
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Parse the scientific notation component of a float.
|
|
fn parseScientific(stream: *FloatStream) ?i64 {
|
|
var exponent: i64 = 0;
|
|
var negative = false;
|
|
|
|
if (stream.first()) |c| {
|
|
negative = c == '-';
|
|
if (c == '-' or c == '+') {
|
|
stream.advance(1);
|
|
}
|
|
}
|
|
if (stream.firstIsDigit(10)) {
|
|
while (stream.scanDigit(10)) |digit| {
|
|
// no overflows here, saturate well before overflow
|
|
if (exponent < 0x1000_0000) {
|
|
exponent = 10 * exponent + digit;
|
|
}
|
|
}
|
|
|
|
return if (negative) -exponent else exponent;
|
|
}
|
|
|
|
return null;
|
|
}
|
|
|
|
const ParseInfo = struct {
|
|
// 10 or 16
|
|
base: u8,
|
|
// 10^19 fits in u64, 16^16 fits in u64
|
|
max_mantissa_digits: usize,
|
|
// e.g. e or p (E and P also checked)
|
|
exp_char_lower: u8,
|
|
};
|
|
|
|
fn parsePartialNumberBase(comptime T: type, stream: *FloatStream, negative: bool, n: *usize, comptime info: ParseInfo) ?Number(T) {
|
|
const MantissaT = common.mantissaType(T);
|
|
|
|
// parse initial digits before dot
|
|
var mantissa: MantissaT = 0;
|
|
tryParseDigits(MantissaT, stream, &mantissa, info.base);
|
|
const int_end = stream.offsetTrue();
|
|
var n_digits = @as(isize, @intCast(stream.offsetTrue()));
|
|
// the base being 16 implies a 0x prefix, which shouldn't be included in the digit count
|
|
if (info.base == 16) n_digits -= 2;
|
|
|
|
// handle dot with the following digits
|
|
var exponent: i64 = 0;
|
|
if (stream.firstIs('.')) {
|
|
stream.advance(1);
|
|
const marker = stream.offsetTrue();
|
|
tryParseDigits(MantissaT, stream, &mantissa, info.base);
|
|
const n_after_dot = stream.offsetTrue() - marker;
|
|
exponent = -@as(i64, @intCast(n_after_dot));
|
|
n_digits += @as(isize, @intCast(n_after_dot));
|
|
}
|
|
|
|
// adjust required shift to offset mantissa for base-16 (2^4)
|
|
if (info.base == 16) {
|
|
exponent *= 4;
|
|
}
|
|
|
|
if (n_digits == 0) {
|
|
return null;
|
|
}
|
|
|
|
// handle scientific format
|
|
var exp_number: i64 = 0;
|
|
if (stream.firstIsLower(info.exp_char_lower)) {
|
|
stream.advance(1);
|
|
exp_number = parseScientific(stream) orelse return null;
|
|
exponent += exp_number;
|
|
}
|
|
|
|
const len = stream.offset; // length must be complete parsed length
|
|
n.* = len;
|
|
|
|
if (stream.underscore_count > 0 and !validUnderscores(stream.slice, info.base)) {
|
|
return null;
|
|
}
|
|
|
|
// common case with not many digits
|
|
if (n_digits <= info.max_mantissa_digits) {
|
|
return Number(T){
|
|
.exponent = exponent,
|
|
.mantissa = mantissa,
|
|
.negative = negative,
|
|
.many_digits = false,
|
|
.hex = info.base == 16,
|
|
};
|
|
}
|
|
|
|
n_digits -= info.max_mantissa_digits;
|
|
var many_digits = false;
|
|
stream.reset(); // re-parse from beginning
|
|
while (stream.firstIs3('0', '.', '_')) {
|
|
// '0' = '.' + 2
|
|
const next = stream.firstUnchecked();
|
|
if (next != '_') {
|
|
n_digits -= @as(isize, @intCast(next -| ('0' - 1)));
|
|
} else {
|
|
stream.underscore_count += 1;
|
|
}
|
|
stream.advance(1);
|
|
}
|
|
if (n_digits > 0) {
|
|
// at this point we have more than max_mantissa_digits significant digits, let's try again
|
|
many_digits = true;
|
|
mantissa = 0;
|
|
stream.reset();
|
|
tryParseNDigits(MantissaT, stream, &mantissa, info.base, info.max_mantissa_digits);
|
|
|
|
exponent = blk: {
|
|
if (mantissa >= min_n_digit_int(MantissaT, info.max_mantissa_digits)) {
|
|
// big int
|
|
break :blk @as(i64, @intCast(int_end)) - @as(i64, @intCast(stream.offsetTrue()));
|
|
} else {
|
|
// the next byte must be present and be '.'
|
|
// We know this is true because we had more than 19
|
|
// digits previously, so we overflowed a 64-bit integer,
|
|
// but parsing only the integral digits produced less
|
|
// than 19 digits. That means we must have a decimal
|
|
// point, and at least 1 fractional digit.
|
|
stream.advance(1);
|
|
const marker = stream.offsetTrue();
|
|
tryParseNDigits(MantissaT, stream, &mantissa, info.base, info.max_mantissa_digits);
|
|
break :blk @as(i64, @intCast(marker)) - @as(i64, @intCast(stream.offsetTrue()));
|
|
}
|
|
};
|
|
// add back the explicit part
|
|
exponent += exp_number;
|
|
}
|
|
|
|
return Number(T){
|
|
.exponent = exponent,
|
|
.mantissa = mantissa,
|
|
.negative = negative,
|
|
.many_digits = many_digits,
|
|
.hex = info.base == 16,
|
|
};
|
|
}
|
|
|
|
/// Parse a partial, non-special floating point number.
|
|
///
|
|
/// This creates a representation of the float as the
|
|
/// significant digits and the decimal exponent.
|
|
fn parsePartialNumber(comptime T: type, s: []const u8, negative: bool, n: *usize) ?Number(T) {
|
|
std.debug.assert(s.len != 0);
|
|
var stream = FloatStream.init(s);
|
|
const MantissaT = common.mantissaType(T);
|
|
|
|
if (stream.hasLen(2) and stream.atUnchecked(0) == '0' and std.ascii.toLower(stream.atUnchecked(1)) == 'x') {
|
|
stream.advance(2);
|
|
return parsePartialNumberBase(T, &stream, negative, n, .{
|
|
.base = 16,
|
|
.max_mantissa_digits = if (MantissaT == u64) 16 else 32,
|
|
.exp_char_lower = 'p',
|
|
});
|
|
} else {
|
|
return parsePartialNumberBase(T, &stream, negative, n, .{
|
|
.base = 10,
|
|
.max_mantissa_digits = if (MantissaT == u64) 19 else 38,
|
|
.exp_char_lower = 'e',
|
|
});
|
|
}
|
|
}
|
|
|
|
pub fn parseNumber(comptime T: type, s: []const u8, negative: bool) ?Number(T) {
|
|
var consumed: usize = 0;
|
|
if (parsePartialNumber(T, s, negative, &consumed)) |number| {
|
|
// must consume entire float (no trailing data)
|
|
if (s.len == consumed) {
|
|
return number;
|
|
}
|
|
}
|
|
return null;
|
|
}
|
|
|
|
fn parsePartialInfOrNan(comptime T: type, s: []const u8, negative: bool, n: *usize) ?T {
|
|
// inf/infinity; infxxx should only consume inf.
|
|
if (std.ascii.startsWithIgnoreCase(s, "inf")) {
|
|
n.* = 3;
|
|
if (std.ascii.startsWithIgnoreCase(s[3..], "inity")) {
|
|
n.* = 8;
|
|
}
|
|
|
|
return if (!negative) std.math.inf(T) else -std.math.inf(T);
|
|
}
|
|
|
|
if (std.ascii.startsWithIgnoreCase(s, "nan")) {
|
|
n.* = 3;
|
|
return std.math.nan(T);
|
|
}
|
|
|
|
return null;
|
|
}
|
|
|
|
pub fn parseInfOrNan(comptime T: type, s: []const u8, negative: bool) ?T {
|
|
var consumed: usize = 0;
|
|
if (parsePartialInfOrNan(T, s, negative, &consumed)) |special| {
|
|
if (s.len == consumed) {
|
|
return special;
|
|
}
|
|
}
|
|
return null;
|
|
}
|
|
|
|
pub fn validUnderscores(s: []const u8, comptime base: u8) bool {
|
|
var i: usize = 0;
|
|
while (i < s.len) : (i += 1) {
|
|
if (s[i] == '_') {
|
|
// underscore at start of end
|
|
if (i == 0 or i + 1 == s.len) {
|
|
return false;
|
|
}
|
|
// consecutive underscores
|
|
if (!common.isDigit(s[i - 1], base) or !common.isDigit(s[i + 1], base)) {
|
|
return false;
|
|
}
|
|
|
|
// next is guaranteed a digit, skip an extra
|
|
i += 1;
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|