Merge pull request #4741 from momumi/master

allow `_` separators in number literals (stage 1)
This commit is contained in:
Andrew Kelley 2020-03-23 00:54:54 -04:00 committed by GitHub
commit 13d04f9963
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
10 changed files with 903 additions and 160 deletions

View file

@ -885,6 +885,12 @@ const hex_int = 0xff;
const another_hex_int = 0xFF; const another_hex_int = 0xFF;
const octal_int = 0o755; const octal_int = 0o755;
const binary_int = 0b11110000; const binary_int = 0b11110000;
// underscores may be placed between two digits as a visual separator
const one_billion = 1_000_000_000;
const binary_mask = 0b1_1111_1111;
const permissions = 0o7_5_5;
const big_address = 0xFF80_0000_0000_0000;
{#code_end#} {#code_end#}
{#header_close#} {#header_close#}
{#header_open|Runtime Integer Values#} {#header_open|Runtime Integer Values#}
@ -947,6 +953,11 @@ const yet_another = 123.0e+77;
const hex_floating_point = 0x103.70p-5; const hex_floating_point = 0x103.70p-5;
const another_hex_float = 0x103.70; const another_hex_float = 0x103.70;
const yet_another_hex_float = 0x103.70P-5; const yet_another_hex_float = 0x103.70P-5;
// underscores may be placed between two digits as a visual separator
const lightspeed = 299_792_458.000_000;
const nanosecond = 0.000_000_001;
const more_hex = 0x1234_5678.9ABC_CDEFp-10;
{#code_end#} {#code_end#}
<p> <p>
There is no syntax for NaN, infinity, or negative infinity. For these special values, There is no syntax for NaN, infinity, or negative infinity. For these special values,

View file

@ -373,6 +373,7 @@ pub const Int = struct {
const d = switch (ch) { const d = switch (ch) {
'0'...'9' => ch - '0', '0'...'9' => ch - '0',
'a'...'f' => (ch - 'a') + 0xa, 'a'...'f' => (ch - 'a') + 0xa,
'A'...'F' => (ch - 'A') + 0xa,
else => return error.InvalidCharForDigit, else => return error.InvalidCharForDigit,
}; };
@ -393,8 +394,9 @@ pub const Int = struct {
/// Set self from the string representation `value`. /// Set self from the string representation `value`.
/// ///
/// value must contain only digits <= `base`. Base prefixes are not allowed (e.g. 0x43 should /// `value` must contain only digits <= `base` and is case insensitive. Base prefixes are
/// simply be 43). /// not allowed (e.g. 0x43 should simply be 43). Underscores in the input string are
/// ignored and can be used as digit separators.
/// ///
/// Returns an error if memory could not be allocated or `value` has invalid digits for the /// Returns an error if memory could not be allocated or `value` has invalid digits for the
/// requested base. /// requested base.
@ -415,6 +417,9 @@ pub const Int = struct {
try self.set(0); try self.set(0);
for (value[i..]) |ch| { for (value[i..]) |ch| {
if (ch == '_') {
continue;
}
const d = try charToDigit(ch, base); const d = try charToDigit(ch, base);
const ap_d = Int.initFixed(([_]Limb{d})[0..]); const ap_d = Int.initFixed(([_]Limb{d})[0..]);
@ -1582,6 +1587,22 @@ test "big.int string negative" {
testing.expect((try a.to(i32)) == -1023); testing.expect((try a.to(i32)) == -1023);
} }
test "big.int string set number with underscores" {
var a = try Int.init(testing.allocator);
defer a.deinit();
try a.setString(10, "__1_2_0_3_1_7_2_4_1_2_0_____9_1__2__4_7_8_1_2_4_1_2_9_0_8_4_7_1_2_4___");
testing.expect((try a.to(u128)) == 120317241209124781241290847124);
}
test "big.int string set case insensitive number" {
var a = try Int.init(testing.allocator);
defer a.deinit();
try a.setString(16, "aB_cD_eF");
testing.expect((try a.to(u32)) == 0xabcdef);
}
test "big.int string set bad char error" { test "big.int string set bad char error" {
var a = try Int.init(testing.allocator); var a = try Int.init(testing.allocator);
defer a.deinit(); defer a.deinit();

View file

@ -69,23 +69,23 @@ test "floatundisf" {
test__floatundisf(0, 0.0); test__floatundisf(0, 0.0);
test__floatundisf(1, 1.0); test__floatundisf(1, 1.0);
test__floatundisf(2, 2.0); test__floatundisf(2, 2.0);
test__floatundisf(0x7FFFFF8000000000, 0x1.FFFFFEp+62F); test__floatundisf(0x7FFFFF8000000000, 0x1.FFFFFEp+62);
test__floatundisf(0x7FFFFF0000000000, 0x1.FFFFFCp+62F); test__floatundisf(0x7FFFFF0000000000, 0x1.FFFFFCp+62);
test__floatundisf(0x8000008000000000, 0x1p+63F); test__floatundisf(0x8000008000000000, 0x1p+63);
test__floatundisf(0x8000010000000000, 0x1.000002p+63F); test__floatundisf(0x8000010000000000, 0x1.000002p+63);
test__floatundisf(0x8000000000000000, 0x1p+63F); test__floatundisf(0x8000000000000000, 0x1p+63);
test__floatundisf(0x8000000000000001, 0x1p+63F); test__floatundisf(0x8000000000000001, 0x1p+63);
test__floatundisf(0xFFFFFFFFFFFFFFFE, 0x1p+64F); test__floatundisf(0xFFFFFFFFFFFFFFFE, 0x1p+64);
test__floatundisf(0xFFFFFFFFFFFFFFFF, 0x1p+64F); test__floatundisf(0xFFFFFFFFFFFFFFFF, 0x1p+64);
test__floatundisf(0x0007FB72E8000000, 0x1.FEDCBAp+50F); test__floatundisf(0x0007FB72E8000000, 0x1.FEDCBAp+50);
test__floatundisf(0x0007FB72EA000000, 0x1.FEDCBAp+50F); test__floatundisf(0x0007FB72EA000000, 0x1.FEDCBAp+50);
test__floatundisf(0x0007FB72EB000000, 0x1.FEDCBAp+50F); test__floatundisf(0x0007FB72EB000000, 0x1.FEDCBAp+50);
test__floatundisf(0x0007FB72EBFFFFFF, 0x1.FEDCBAp+50F); test__floatundisf(0x0007FB72EBFFFFFF, 0x1.FEDCBAp+50);
test__floatundisf(0x0007FB72EC000000, 0x1.FEDCBCp+50F); test__floatundisf(0x0007FB72EC000000, 0x1.FEDCBCp+50);
test__floatundisf(0x0007FB72E8000001, 0x1.FEDCBAp+50F); test__floatundisf(0x0007FB72E8000001, 0x1.FEDCBAp+50);
test__floatundisf(0x0007FB72E6000000, 0x1.FEDCBAp+50F); test__floatundisf(0x0007FB72E6000000, 0x1.FEDCBAp+50);
test__floatundisf(0x0007FB72E7000000, 0x1.FEDCBAp+50F); test__floatundisf(0x0007FB72E7000000, 0x1.FEDCBAp+50);
test__floatundisf(0x0007FB72E7FFFFFF, 0x1.FEDCBAp+50F); test__floatundisf(0x0007FB72E7FFFFFF, 0x1.FEDCBAp+50);
test__floatundisf(0x0007FB72E4000001, 0x1.FEDCBAp+50F); test__floatundisf(0x0007FB72E4000001, 0x1.FEDCBAp+50);
test__floatundisf(0x0007FB72E4000000, 0x1.FEDCB8p+50F); test__floatundisf(0x0007FB72E4000000, 0x1.FEDCB8p+50);
} }

View file

@ -2815,6 +2815,75 @@ test "zig fmt: extern without container keyword returns error" {
); );
} }
test "zig fmt: integer literals with underscore separators" {
try testTransform(
\\const
\\ x =
\\ 1_234_567
\\ +(0b0_1-0o7_0+0xff_FF ) + 0_0;
,
\\const x = 1_234_567 + (0b0_1 - 0o7_0 + 0xff_FF) + 0_0;
\\
);
}
test "zig fmt: hex literals with underscore separators" {
try testTransform(
\\pub fn orMask(a: [ 1_000 ]u64, b: [ 1_000] u64) [1_000]u64 {
\\ var c: [1_000]u64 = [1]u64{ 0xFFFF_FFFF_FFFF_FFFF}**1_000;
\\ for (c [ 0_0 .. ]) |_, i| {
\\ c[i] = (a[i] | b[i]) & 0xCCAA_CCAA_CCAA_CCAA;
\\ }
\\ return c;
\\}
\\
\\
,
\\pub fn orMask(a: [1_000]u64, b: [1_000]u64) [1_000]u64 {
\\ var c: [1_000]u64 = [1]u64{0xFFFF_FFFF_FFFF_FFFF} ** 1_000;
\\ for (c[0_0..]) |_, i| {
\\ c[i] = (a[i] | b[i]) & 0xCCAA_CCAA_CCAA_CCAA;
\\ }
\\ return c;
\\}
\\
);
}
test "zig fmt: decimal float literals with underscore separators" {
try testTransform(
\\pub fn main() void {
\\ const a:f64=(10.0e-0+(10.e+0))+10_00.00_00e-2+00_00.00_10e+4;
\\ const b:f64=010.0--0_10.+0_1_0.0_0+1e2;
\\ std.debug.warn("a: {}, b: {} -> a+b: {}\n", .{ a, b, a + b });
\\}
,
\\pub fn main() void {
\\ const a: f64 = (10.0e-0 + (10.e+0)) + 10_00.00_00e-2 + 00_00.00_10e+4;
\\ const b: f64 = 010.0 - -0_10. + 0_1_0.0_0 + 1e2;
\\ std.debug.warn("a: {}, b: {} -> a+b: {}\n", .{ a, b, a + b });
\\}
\\
);
}
test "zig fmt: hexadeciaml float literals with underscore separators" {
try testTransform(
\\pub fn main() void {
\\ const a: f64 = (0x10.0p-0+(0x10.p+0))+0x10_00.00_00p-8+0x00_00.00_10p+16;
\\ const b: f64 = 0x0010.0--0x00_10.+0x10.00+0x1p4;
\\ std.debug.warn("a: {}, b: {} -> a+b: {}\n", .{ a, b, a + b });
\\}
,
\\pub fn main() void {
\\ const a: f64 = (0x10.0p-0 + (0x10.p+0)) + 0x10_00.00_00p-8 + 0x00_00.00_10p+16;
\\ const b: f64 = 0x0010.0 - -0x00_10. + 0x10.00 + 0x1p4;
\\ std.debug.warn("a: {}, b: {} -> a+b: {}\n", .{ a, b, a + b });
\\}
\\
);
}
const std = @import("std"); const std = @import("std");
const mem = std.mem; const mem = std.mem;
const warn = std.debug.warn; const warn = std.debug.warn;

View file

@ -387,17 +387,23 @@ pub const Tokenizer = struct {
DocComment, DocComment,
ContainerDocComment, ContainerDocComment,
Zero, Zero,
IntegerLiteral, IntegerLiteralDec,
IntegerLiteralWithRadix, IntegerLiteralDecNoUnderscore,
IntegerLiteralWithRadixHex, IntegerLiteralBin,
NumberDot, IntegerLiteralBinNoUnderscore,
IntegerLiteralOct,
IntegerLiteralOctNoUnderscore,
IntegerLiteralHex,
IntegerLiteralHexNoUnderscore,
NumberDotDec,
NumberDotHex, NumberDotHex,
FloatFraction, FloatFractionDec,
FloatFractionDecNoUnderscore,
FloatFractionHex, FloatFractionHex,
FloatFractionHexNoUnderscore,
FloatExponentUnsigned, FloatExponentUnsigned,
FloatExponentUnsignedHex,
FloatExponentNumber, FloatExponentNumber,
FloatExponentNumberHex, FloatExponentNumberNoUnderscore,
Ampersand, Ampersand,
Caret, Caret,
Percent, Percent,
@ -412,6 +418,10 @@ pub const Tokenizer = struct {
SawAtSign, SawAtSign,
}; };
fn isIdentifierChar(char: u8) bool {
return std.ascii.isAlNum(char) or char == '_';
}
pub fn next(self: *Tokenizer) Token { pub fn next(self: *Tokenizer) Token {
if (self.pending_invalid_token) |token| { if (self.pending_invalid_token) |token| {
self.pending_invalid_token = null; self.pending_invalid_token = null;
@ -550,7 +560,7 @@ pub const Tokenizer = struct {
result.id = Token.Id.IntegerLiteral; result.id = Token.Id.IntegerLiteral;
}, },
'1'...'9' => { '1'...'9' => {
state = State.IntegerLiteral; state = State.IntegerLiteralDec;
result.id = Token.Id.IntegerLiteral; result.id = Token.Id.IntegerLiteral;
}, },
else => { else => {
@ -1048,55 +1058,145 @@ pub const Tokenizer = struct {
else => self.checkLiteralCharacter(), else => self.checkLiteralCharacter(),
}, },
State.Zero => switch (c) { State.Zero => switch (c) {
'b', 'o' => { 'b' => {
state = State.IntegerLiteralWithRadix; state = State.IntegerLiteralBinNoUnderscore;
},
'o' => {
state = State.IntegerLiteralOctNoUnderscore;
}, },
'x' => { 'x' => {
state = State.IntegerLiteralWithRadixHex; state = State.IntegerLiteralHexNoUnderscore;
},
'0'...'9', '_', '.', 'e', 'E' => {
// reinterpret as a decimal number
self.index -= 1;
state = State.IntegerLiteralDec;
}, },
else => { else => {
// reinterpret as a normal number if (isIdentifierChar(c)) {
self.index -= 1; result.id = Token.Id.Invalid;
state = State.IntegerLiteral; }
break;
}, },
}, },
State.IntegerLiteral => switch (c) { State.IntegerLiteralBinNoUnderscore => switch (c) {
'0'...'1' => {
state = State.IntegerLiteralBin;
},
else => {
result.id = Token.Id.Invalid;
break;
},
},
State.IntegerLiteralBin => switch (c) {
'_' => {
state = State.IntegerLiteralBinNoUnderscore;
},
'0'...'1' => {},
else => {
if (isIdentifierChar(c)) {
result.id = Token.Id.Invalid;
}
break;
},
},
State.IntegerLiteralOctNoUnderscore => switch (c) {
'0'...'7' => {
state = State.IntegerLiteralOct;
},
else => {
result.id = Token.Id.Invalid;
break;
},
},
State.IntegerLiteralOct => switch (c) {
'_' => {
state = State.IntegerLiteralOctNoUnderscore;
},
'0'...'7' => {},
else => {
if (isIdentifierChar(c)) {
result.id = Token.Id.Invalid;
}
break;
},
},
State.IntegerLiteralDecNoUnderscore => switch (c) {
'0'...'9' => {
state = State.IntegerLiteralDec;
},
else => {
result.id = Token.Id.Invalid;
break;
},
},
State.IntegerLiteralDec => switch (c) {
'_' => {
state = State.IntegerLiteralDecNoUnderscore;
},
'.' => { '.' => {
state = State.NumberDot; state = State.NumberDotDec;
result.id = Token.Id.FloatLiteral;
}, },
'p', 'P', 'e', 'E' => { 'e', 'E' => {
state = State.FloatExponentUnsigned; state = State.FloatExponentUnsigned;
result.id = Token.Id.FloatLiteral;
}, },
'0'...'9' => {}, '0'...'9' => {},
else => break, else => {
if (isIdentifierChar(c)) {
result.id = Token.Id.Invalid;
}
break;
}, },
State.IntegerLiteralWithRadix => switch (c) {
'.' => {
state = State.NumberDot;
}, },
'0'...'9' => {}, State.IntegerLiteralHexNoUnderscore => switch (c) {
else => break, '0'...'9', 'a'...'f', 'A'...'F' => {
state = State.IntegerLiteralHex;
},
else => {
result.id = Token.Id.Invalid;
break;
},
},
State.IntegerLiteralHex => switch (c) {
'_' => {
state = State.IntegerLiteralHexNoUnderscore;
}, },
State.IntegerLiteralWithRadixHex => switch (c) {
'.' => { '.' => {
state = State.NumberDotHex; state = State.NumberDotHex;
result.id = Token.Id.FloatLiteral;
}, },
'p', 'P' => { 'p', 'P' => {
state = State.FloatExponentUnsignedHex; state = State.FloatExponentUnsigned;
result.id = Token.Id.FloatLiteral;
}, },
'0'...'9', 'a'...'f', 'A'...'F' => {}, '0'...'9', 'a'...'f', 'A'...'F' => {},
else => break, else => {
if (isIdentifierChar(c)) {
result.id = Token.Id.Invalid;
}
break;
}, },
State.NumberDot => switch (c) { },
State.NumberDotDec => switch (c) {
'.' => { '.' => {
self.index -= 1; self.index -= 1;
state = State.Start; state = State.Start;
break; break;
}, },
else => { 'e', 'E' => {
self.index -= 1; state = State.FloatExponentUnsigned;
},
'0'...'9' => {
result.id = Token.Id.FloatLiteral; result.id = Token.Id.FloatLiteral;
state = State.FloatFraction; state = State.FloatFractionDec;
},
else => {
if (isIdentifierChar(c)) {
result.id = Token.Id.Invalid;
}
break;
}, },
}, },
State.NumberDotHex => switch (c) { State.NumberDotHex => switch (c) {
@ -1105,65 +1205,112 @@ pub const Tokenizer = struct {
state = State.Start; state = State.Start;
break; break;
}, },
else => { 'p', 'P' => {
self.index -= 1; state = State.FloatExponentUnsigned;
},
'0'...'9', 'a'...'f', 'A'...'F' => {
result.id = Token.Id.FloatLiteral; result.id = Token.Id.FloatLiteral;
state = State.FloatFractionHex; state = State.FloatFractionHex;
}, },
else => {
if (isIdentifierChar(c)) {
result.id = Token.Id.Invalid;
}
break;
},
},
State.FloatFractionDecNoUnderscore => switch (c) {
'0'...'9' => {
state = State.FloatFractionDec;
},
else => {
result.id = Token.Id.Invalid;
break;
},
},
State.FloatFractionDec => switch (c) {
'_' => {
state = State.FloatFractionDecNoUnderscore;
}, },
State.FloatFraction => switch (c) {
'e', 'E' => { 'e', 'E' => {
state = State.FloatExponentUnsigned; state = State.FloatExponentUnsigned;
}, },
'0'...'9' => {}, '0'...'9' => {},
else => break, else => {
if (isIdentifierChar(c)) {
result.id = Token.Id.Invalid;
}
break;
},
},
State.FloatFractionHexNoUnderscore => switch (c) {
'0'...'9', 'a'...'f', 'A'...'F' => {
state = State.FloatFractionHex;
},
else => {
result.id = Token.Id.Invalid;
break;
},
}, },
State.FloatFractionHex => switch (c) { State.FloatFractionHex => switch (c) {
'_' => {
state = State.FloatFractionHexNoUnderscore;
},
'p', 'P' => { 'p', 'P' => {
state = State.FloatExponentUnsignedHex; state = State.FloatExponentUnsigned;
}, },
'0'...'9', 'a'...'f', 'A'...'F' => {}, '0'...'9', 'a'...'f', 'A'...'F' => {},
else => break, else => {
if (isIdentifierChar(c)) {
result.id = Token.Id.Invalid;
}
break;
},
}, },
State.FloatExponentUnsigned => switch (c) { State.FloatExponentUnsigned => switch (c) {
'+', '-' => { '+', '-' => {
state = State.FloatExponentNumber; state = State.FloatExponentNumberNoUnderscore;
}, },
else => { else => {
// reinterpret as a normal exponent number // reinterpret as a normal exponent number
self.index -= 1; self.index -= 1;
state = State.FloatExponentNumberNoUnderscore;
},
},
State.FloatExponentNumberNoUnderscore => switch (c) {
'0'...'9' => {
state = State.FloatExponentNumber; state = State.FloatExponentNumber;
}, },
},
State.FloatExponentUnsignedHex => switch (c) {
'+', '-' => {
state = State.FloatExponentNumberHex;
},
else => { else => {
// reinterpret as a normal exponent number result.id = Token.Id.Invalid;
self.index -= 1; break;
state = State.FloatExponentNumberHex;
}, },
}, },
State.FloatExponentNumber => switch (c) { State.FloatExponentNumber => switch (c) {
'0'...'9' => {}, '_' => {
else => break, state = State.FloatExponentNumberNoUnderscore;
},
'0'...'9' => {},
else => {
if (isIdentifierChar(c)) {
result.id = Token.Id.Invalid;
}
break;
}, },
State.FloatExponentNumberHex => switch (c) {
'0'...'9', 'a'...'f', 'A'...'F' => {},
else => break,
}, },
} }
} else if (self.index == self.buffer.len) { } else if (self.index == self.buffer.len) {
switch (state) { switch (state) {
State.Start, State.Start,
State.IntegerLiteral, State.IntegerLiteralDec,
State.IntegerLiteralWithRadix, State.IntegerLiteralBin,
State.IntegerLiteralWithRadixHex, State.IntegerLiteralOct,
State.FloatFraction, State.IntegerLiteralHex,
State.NumberDotDec,
State.NumberDotHex,
State.FloatFractionDec,
State.FloatFractionHex, State.FloatFractionHex,
State.FloatExponentNumber, State.FloatExponentNumber,
State.FloatExponentNumberHex,
State.StringLiteral, // find this error later State.StringLiteral, // find this error later
State.MultilineStringLiteralLine, State.MultilineStringLiteralLine,
State.Builtin, State.Builtin,
@ -1184,10 +1331,14 @@ pub const Tokenizer = struct {
result.id = Token.Id.ContainerDocComment; result.id = Token.Id.ContainerDocComment;
}, },
State.NumberDot, State.IntegerLiteralDecNoUnderscore,
State.NumberDotHex, State.IntegerLiteralBinNoUnderscore,
State.IntegerLiteralOctNoUnderscore,
State.IntegerLiteralHexNoUnderscore,
State.FloatFractionDecNoUnderscore,
State.FloatFractionHexNoUnderscore,
State.FloatExponentNumberNoUnderscore,
State.FloatExponentUnsigned, State.FloatExponentUnsigned,
State.FloatExponentUnsignedHex,
State.SawAtSign, State.SawAtSign,
State.Backslash, State.Backslash,
State.CharLiteral, State.CharLiteral,
@ -1585,6 +1736,236 @@ test "correctly parse pointer assignment" {
}); });
} }
test "tokenizer - number literals decimal" {
testTokenize("0", &[_]Token.Id{.IntegerLiteral});
testTokenize("1", &[_]Token.Id{.IntegerLiteral});
testTokenize("2", &[_]Token.Id{.IntegerLiteral});
testTokenize("3", &[_]Token.Id{.IntegerLiteral});
testTokenize("4", &[_]Token.Id{.IntegerLiteral});
testTokenize("5", &[_]Token.Id{.IntegerLiteral});
testTokenize("6", &[_]Token.Id{.IntegerLiteral});
testTokenize("7", &[_]Token.Id{.IntegerLiteral});
testTokenize("8", &[_]Token.Id{.IntegerLiteral});
testTokenize("9", &[_]Token.Id{.IntegerLiteral});
testTokenize("0a", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("9b", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("1z", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("1z_1", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("9z3", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("0_0", &[_]Token.Id{.IntegerLiteral});
testTokenize("0001", &[_]Token.Id{.IntegerLiteral});
testTokenize("01234567890", &[_]Token.Id{.IntegerLiteral});
testTokenize("012_345_6789_0", &[_]Token.Id{.IntegerLiteral});
testTokenize("0_1_2_3_4_5_6_7_8_9_0", &[_]Token.Id{.IntegerLiteral});
testTokenize("00_", &[_]Token.Id{.Invalid});
testTokenize("0_0_", &[_]Token.Id{.Invalid});
testTokenize("0__0", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("0_0f", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("0_0_f", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("0_0_f_00", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("1_,", &[_]Token.Id{ .Invalid, .Comma });
testTokenize("1.", &[_]Token.Id{.FloatLiteral});
testTokenize("0.0", &[_]Token.Id{.FloatLiteral});
testTokenize("1.0", &[_]Token.Id{.FloatLiteral});
testTokenize("10.0", &[_]Token.Id{.FloatLiteral});
testTokenize("0e0", &[_]Token.Id{.FloatLiteral});
testTokenize("1e0", &[_]Token.Id{.FloatLiteral});
testTokenize("1e100", &[_]Token.Id{.FloatLiteral});
testTokenize("1.e100", &[_]Token.Id{.FloatLiteral});
testTokenize("1.0e100", &[_]Token.Id{.FloatLiteral});
testTokenize("1.0e+100", &[_]Token.Id{.FloatLiteral});
testTokenize("1.0e-100", &[_]Token.Id{.FloatLiteral});
testTokenize("1_0_0_0.0_0_0_0_0_1e1_0_0_0", &[_]Token.Id{.FloatLiteral});
testTokenize("1.+", &[_]Token.Id{ .FloatLiteral, .Plus });
testTokenize("1e", &[_]Token.Id{.Invalid});
testTokenize("1.0e1f0", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("1.0p100", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("1.0p-100", &[_]Token.Id{ .Invalid, .Identifier, .Minus, .IntegerLiteral });
testTokenize("1.0p1f0", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("1.0_,", &[_]Token.Id{ .Invalid, .Comma });
testTokenize("1_.0", &[_]Token.Id{ .Invalid, .Period, .IntegerLiteral });
testTokenize("1._", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("1.a", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("1.z", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("1._0", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("1._+", &[_]Token.Id{ .Invalid, .Identifier, .Plus });
testTokenize("1._e", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("1.0e", &[_]Token.Id{.Invalid});
testTokenize("1.0e,", &[_]Token.Id{ .Invalid, .Comma });
testTokenize("1.0e_", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("1.0e+_", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("1.0e-_", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("1.0e0_+", &[_]Token.Id{ .Invalid, .Plus });
}
test "tokenizer - number literals binary" {
testTokenize("0b0", &[_]Token.Id{.IntegerLiteral});
testTokenize("0b1", &[_]Token.Id{.IntegerLiteral});
testTokenize("0b2", &[_]Token.Id{ .Invalid, .IntegerLiteral });
testTokenize("0b3", &[_]Token.Id{ .Invalid, .IntegerLiteral });
testTokenize("0b4", &[_]Token.Id{ .Invalid, .IntegerLiteral });
testTokenize("0b5", &[_]Token.Id{ .Invalid, .IntegerLiteral });
testTokenize("0b6", &[_]Token.Id{ .Invalid, .IntegerLiteral });
testTokenize("0b7", &[_]Token.Id{ .Invalid, .IntegerLiteral });
testTokenize("0b8", &[_]Token.Id{ .Invalid, .IntegerLiteral });
testTokenize("0b9", &[_]Token.Id{ .Invalid, .IntegerLiteral });
testTokenize("0ba", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("0bb", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("0bc", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("0bd", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("0be", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("0bf", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("0bz", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("0b0000_0000", &[_]Token.Id{.IntegerLiteral});
testTokenize("0b1111_1111", &[_]Token.Id{.IntegerLiteral});
testTokenize("0b10_10_10_10", &[_]Token.Id{.IntegerLiteral});
testTokenize("0b0_1_0_1_0_1_0_1", &[_]Token.Id{.IntegerLiteral});
testTokenize("0b1.", &[_]Token.Id{ .IntegerLiteral, .Period });
testTokenize("0b1.0", &[_]Token.Id{ .IntegerLiteral, .Period, .IntegerLiteral });
testTokenize("0B0", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("0b_", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("0b_0", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("0b1_", &[_]Token.Id{.Invalid});
testTokenize("0b0__1", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("0b0_1_", &[_]Token.Id{.Invalid});
testTokenize("0b1e", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("0b1p", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("0b1e0", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("0b1p0", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("0b1_,", &[_]Token.Id{ .Invalid, .Comma });
}
test "tokenizer - number literals octal" {
testTokenize("0o0", &[_]Token.Id{.IntegerLiteral});
testTokenize("0o1", &[_]Token.Id{.IntegerLiteral});
testTokenize("0o2", &[_]Token.Id{.IntegerLiteral});
testTokenize("0o3", &[_]Token.Id{.IntegerLiteral});
testTokenize("0o4", &[_]Token.Id{.IntegerLiteral});
testTokenize("0o5", &[_]Token.Id{.IntegerLiteral});
testTokenize("0o6", &[_]Token.Id{.IntegerLiteral});
testTokenize("0o7", &[_]Token.Id{.IntegerLiteral});
testTokenize("0o8", &[_]Token.Id{ .Invalid, .IntegerLiteral });
testTokenize("0o9", &[_]Token.Id{ .Invalid, .IntegerLiteral });
testTokenize("0oa", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("0ob", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("0oc", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("0od", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("0oe", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("0of", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("0oz", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("0o01234567", &[_]Token.Id{.IntegerLiteral});
testTokenize("0o0123_4567", &[_]Token.Id{.IntegerLiteral});
testTokenize("0o01_23_45_67", &[_]Token.Id{.IntegerLiteral});
testTokenize("0o0_1_2_3_4_5_6_7", &[_]Token.Id{.IntegerLiteral});
testTokenize("0o7.", &[_]Token.Id{ .IntegerLiteral, .Period });
testTokenize("0o7.0", &[_]Token.Id{ .IntegerLiteral, .Period, .IntegerLiteral });
testTokenize("0O0", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("0o_", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("0o_0", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("0o1_", &[_]Token.Id{.Invalid});
testTokenize("0o0__1", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("0o0_1_", &[_]Token.Id{.Invalid});
testTokenize("0o1e", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("0o1p", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("0o1e0", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("0o1p0", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("0o_,", &[_]Token.Id{ .Invalid, .Identifier, .Comma });
}
test "tokenizer - number literals hexadeciaml" {
testTokenize("0x0", &[_]Token.Id{.IntegerLiteral});
testTokenize("0x1", &[_]Token.Id{.IntegerLiteral});
testTokenize("0x2", &[_]Token.Id{.IntegerLiteral});
testTokenize("0x3", &[_]Token.Id{.IntegerLiteral});
testTokenize("0x4", &[_]Token.Id{.IntegerLiteral});
testTokenize("0x5", &[_]Token.Id{.IntegerLiteral});
testTokenize("0x6", &[_]Token.Id{.IntegerLiteral});
testTokenize("0x7", &[_]Token.Id{.IntegerLiteral});
testTokenize("0x8", &[_]Token.Id{.IntegerLiteral});
testTokenize("0x9", &[_]Token.Id{.IntegerLiteral});
testTokenize("0xa", &[_]Token.Id{.IntegerLiteral});
testTokenize("0xb", &[_]Token.Id{.IntegerLiteral});
testTokenize("0xc", &[_]Token.Id{.IntegerLiteral});
testTokenize("0xd", &[_]Token.Id{.IntegerLiteral});
testTokenize("0xe", &[_]Token.Id{.IntegerLiteral});
testTokenize("0xf", &[_]Token.Id{.IntegerLiteral});
testTokenize("0xA", &[_]Token.Id{.IntegerLiteral});
testTokenize("0xB", &[_]Token.Id{.IntegerLiteral});
testTokenize("0xC", &[_]Token.Id{.IntegerLiteral});
testTokenize("0xD", &[_]Token.Id{.IntegerLiteral});
testTokenize("0xE", &[_]Token.Id{.IntegerLiteral});
testTokenize("0xF", &[_]Token.Id{.IntegerLiteral});
testTokenize("0x0z", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("0xz", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("0x0123456789ABCDEF", &[_]Token.Id{.IntegerLiteral});
testTokenize("0x0123_4567_89AB_CDEF", &[_]Token.Id{.IntegerLiteral});
testTokenize("0x01_23_45_67_89AB_CDE_F", &[_]Token.Id{.IntegerLiteral});
testTokenize("0x0_1_2_3_4_5_6_7_8_9_A_B_C_D_E_F", &[_]Token.Id{.IntegerLiteral});
testTokenize("0X0", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("0x_", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("0x_1", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("0x1_", &[_]Token.Id{.Invalid});
testTokenize("0x0__1", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("0x0_1_", &[_]Token.Id{.Invalid});
testTokenize("0x_,", &[_]Token.Id{ .Invalid, .Identifier, .Comma });
testTokenize("0x1.", &[_]Token.Id{.FloatLiteral});
testTokenize("0x1.0", &[_]Token.Id{.FloatLiteral});
testTokenize("0xF.", &[_]Token.Id{.FloatLiteral});
testTokenize("0xF.0", &[_]Token.Id{.FloatLiteral});
testTokenize("0xF.F", &[_]Token.Id{.FloatLiteral});
testTokenize("0xF.Fp0", &[_]Token.Id{.FloatLiteral});
testTokenize("0xF.FP0", &[_]Token.Id{.FloatLiteral});
testTokenize("0x1p0", &[_]Token.Id{.FloatLiteral});
testTokenize("0xfp0", &[_]Token.Id{.FloatLiteral});
testTokenize("0x1.+0xF.", &[_]Token.Id{ .FloatLiteral, .Plus, .FloatLiteral });
testTokenize("0x0123456.789ABCDEF", &[_]Token.Id{.FloatLiteral});
testTokenize("0x0_123_456.789_ABC_DEF", &[_]Token.Id{.FloatLiteral});
testTokenize("0x0_1_2_3_4_5_6.7_8_9_A_B_C_D_E_F", &[_]Token.Id{.FloatLiteral});
testTokenize("0x0p0", &[_]Token.Id{.FloatLiteral});
testTokenize("0x0.0p0", &[_]Token.Id{.FloatLiteral});
testTokenize("0xff.ffp10", &[_]Token.Id{.FloatLiteral});
testTokenize("0xff.ffP10", &[_]Token.Id{.FloatLiteral});
testTokenize("0xff.p10", &[_]Token.Id{.FloatLiteral});
testTokenize("0xffp10", &[_]Token.Id{.FloatLiteral});
testTokenize("0xff_ff.ff_ffp1_0_0_0", &[_]Token.Id{.FloatLiteral});
testTokenize("0xf_f_f_f.f_f_f_fp+1_000", &[_]Token.Id{.FloatLiteral});
testTokenize("0xf_f_f_f.f_f_f_fp-1_00_0", &[_]Token.Id{.FloatLiteral});
testTokenize("0x1e", &[_]Token.Id{.IntegerLiteral});
testTokenize("0x1e0", &[_]Token.Id{.IntegerLiteral});
testTokenize("0x1p", &[_]Token.Id{.Invalid});
testTokenize("0xfp0z1", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("0xff.ffpff", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("0x0.p", &[_]Token.Id{.Invalid});
testTokenize("0x0.z", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("0x0._", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("0x0_.0", &[_]Token.Id{ .Invalid, .Period, .IntegerLiteral });
testTokenize("0x0_.0.0", &[_]Token.Id{ .Invalid, .Period, .FloatLiteral });
testTokenize("0x0._0", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("0x0.0_", &[_]Token.Id{.Invalid});
testTokenize("0x0_p0", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("0x0_.p0", &[_]Token.Id{ .Invalid, .Period, .Identifier });
testTokenize("0x0._p0", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("0x0.0_p0", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("0x0._0p0", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("0x0.0p_0", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("0x0.0p+_0", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("0x0.0p-_0", &[_]Token.Id{ .Invalid, .Identifier });
testTokenize("0x0.0p0_", &[_]Token.Id{ .Invalid, .Eof });
}
fn testTokenize(source: []const u8, expected_tokens: []const Token.Id) void { fn testTokenize(source: []const u8, expected_tokens: []const Token.Id) void {
var tokenizer = Tokenizer.init(source); var tokenizer = Tokenizer.init(source);
for (expected_tokens) |expected_token_id| { for (expected_tokens) |expected_token_id| {

View file

@ -1311,13 +1311,16 @@ pub const Builder = struct {
var base: u8 = undefined; var base: u8 = undefined;
var rest: []const u8 = undefined; var rest: []const u8 = undefined;
if (int_token.len >= 3 and int_token[0] == '0') { if (int_token.len >= 3 and int_token[0] == '0') {
base = switch (int_token[1]) {
'b' => 2,
'o' => 8,
'x' => 16,
else => unreachable,
};
rest = int_token[2..]; rest = int_token[2..];
switch (int_token[1]) {
'b' => base = 2,
'o' => base = 8,
'x' => base = 16,
else => {
base = 10;
rest = int_token;
},
}
} else { } else {
base = 10; base = 10;
rest = int_token; rest = int_token;

View file

@ -172,15 +172,29 @@ static long long scanexp(struct MuslFILE *f, int pok)
c = shgetc(f); c = shgetc(f);
if (c-'0'>=10U && pok) shunget(f); if (c-'0'>=10U && pok) shunget(f);
} }
if (c-'0'>=10U) { if (c-'0'>=10U && c!='_') {
shunget(f); shunget(f);
return LLONG_MIN; return LLONG_MIN;
} }
for (x=0; c-'0'<10U && x<INT_MAX/10; c = shgetc(f)) for (x=0; ; c = shgetc(f)) {
if (c=='_') {
continue;
} else if (c-'0'<10U && x<INT_MAX/10) {
x = 10*x + c-'0'; x = 10*x + c-'0';
for (y=x; c-'0'<10U && y<LLONG_MAX/100; c = shgetc(f)) } else {
break;
}
}
for (y=x; ; c = shgetc(f)) {
if (c=='_') {
continue;
} else if (c-'0'<10U && y<LLONG_MAX/100) {
y = 10*y + c-'0'; y = 10*y + c-'0';
for (; c-'0'<10U; c = shgetc(f)); } else {
break;
}
}
for (; c-'0'<10U || c=='_'; c = shgetc(f));
shunget(f); shunget(f);
return neg ? -y : y; return neg ? -y : y;
} }
@ -450,16 +464,36 @@ static float128_t decfloat(struct MuslFILE *f, int c, int bits, int emin, int si
j=0; j=0;
k=0; k=0;
/* Don't let leading zeros consume buffer space */ /* Don't let leading zeros/underscores consume buffer space */
for (; c=='0'; c = shgetc(f)) gotdig=1; for (; ; c = shgetc(f)) {
if (c=='_') {
continue;
} else if (c=='0') {
gotdig=1;
} else {
break;
}
}
if (c=='.') { if (c=='.') {
gotrad = 1; gotrad = 1;
for (c = shgetc(f); c=='0'; c = shgetc(f)) gotdig=1, lrp--; for (c = shgetc(f); ; c = shgetc(f)) {
if (c == '_') {
continue;
} else if (c=='0') {
gotdig=1;
lrp--;
} else {
break;
}
}
} }
x[0] = 0; x[0] = 0;
for (; c-'0'<10U || c=='.'; c = shgetc(f)) { for (; c-'0'<10U || c=='.' || c=='_'; c = shgetc(f)) {
if (c == '.') { if (c == '_') {
continue;
} else if (c == '.') {
if (gotrad) break; if (gotrad) break;
gotrad = 1; gotrad = 1;
lrp = dc; lrp = dc;
@ -773,18 +807,29 @@ static float128_t hexfloat(struct MuslFILE *f, int bits, int emin, int sign, int
c = shgetc(f); c = shgetc(f);
/* Skip leading zeros */ /* Skip leading zeros/underscores */
for (; c=='0'; c = shgetc(f)) gotdig = 1; for (; c=='0' || c=='_'; c = shgetc(f)) gotdig = 1;
if (c=='.') { if (c=='.') {
gotrad = 1; gotrad = 1;
c = shgetc(f); c = shgetc(f);
/* Count zeros after the radix point before significand */ /* Count zeros after the radix point before significand */
for (rp=0; c=='0'; c = shgetc(f), rp--) gotdig = 1; for (rp=0; ; c = shgetc(f)) {
if (c == '_') {
continue;
} else if (c == '0') {
gotdig = 1;
rp--;
} else {
break;
}
}
} }
for (; c-'0'<10U || (c|32)-'a'<6U || c=='.'; c = shgetc(f)) { for (; c-'0'<10U || (c|32)-'a'<6U || c=='.' || c=='_'; c = shgetc(f)) {
if (c=='.') { if (c=='_') {
continue;
} else if (c=='.') {
if (gotrad) break; if (gotrad) break;
rp = dc; rp = dc;
gotrad = 1; gotrad = 1;

View file

@ -177,10 +177,13 @@ enum TokenizeState {
TokenizeStateSymbol, TokenizeStateSymbol,
TokenizeStateZero, // "0", which might lead to "0x" TokenizeStateZero, // "0", which might lead to "0x"
TokenizeStateNumber, // "123", "0x123" TokenizeStateNumber, // "123", "0x123"
TokenizeStateNumberNoUnderscore, // "12_", "0x12_" next char must be digit
TokenizeStateNumberDot, TokenizeStateNumberDot,
TokenizeStateFloatFraction, // "123.456", "0x123.456" TokenizeStateFloatFraction, // "123.456", "0x123.456"
TokenizeStateFloatFractionNoUnderscore, // "123.45_", "0x123.45_"
TokenizeStateFloatExponentUnsigned, // "123.456e", "123e", "0x123p" TokenizeStateFloatExponentUnsigned, // "123.456e", "123e", "0x123p"
TokenizeStateFloatExponentNumber, // "123.456e-", "123.456e5", "123.456e5e-5" TokenizeStateFloatExponentNumber, // "123.456e7", "123.456e+7", "123.456e-7"
TokenizeStateFloatExponentNumberNoUnderscore, // "123.456e7_", "123.456e+7_", "123.456e-7_"
TokenizeStateString, TokenizeStateString,
TokenizeStateStringEscape, TokenizeStateStringEscape,
TokenizeStateStringEscapeUnicodeStart, TokenizeStateStringEscapeUnicodeStart,
@ -233,14 +236,10 @@ struct Tokenize {
Token *cur_tok; Token *cur_tok;
Tokenization *out; Tokenization *out;
uint32_t radix; uint32_t radix;
int32_t exp_add_amt; bool is_trailing_underscore;
bool is_exp_negative;
size_t char_code_index; size_t char_code_index;
bool unicode; bool unicode;
uint32_t char_code; uint32_t char_code;
int exponent_in_bin_or_dec;
BigInt specified_exponent;
BigInt significand;
size_t remaining_code_units; size_t remaining_code_units;
}; };
@ -426,20 +425,16 @@ void tokenize(Buf *buf, Tokenization *out) {
case '0': case '0':
t.state = TokenizeStateZero; t.state = TokenizeStateZero;
begin_token(&t, TokenIdIntLiteral); begin_token(&t, TokenIdIntLiteral);
t.is_trailing_underscore = false;
t.radix = 10; t.radix = 10;
t.exp_add_amt = 1;
t.exponent_in_bin_or_dec = 0;
bigint_init_unsigned(&t.cur_tok->data.int_lit.bigint, 0); bigint_init_unsigned(&t.cur_tok->data.int_lit.bigint, 0);
bigint_init_unsigned(&t.specified_exponent, 0);
break; break;
case DIGIT_NON_ZERO: case DIGIT_NON_ZERO:
t.state = TokenizeStateNumber; t.state = TokenizeStateNumber;
begin_token(&t, TokenIdIntLiteral); begin_token(&t, TokenIdIntLiteral);
t.is_trailing_underscore = false;
t.radix = 10; t.radix = 10;
t.exp_add_amt = 1;
t.exponent_in_bin_or_dec = 0;
bigint_init_unsigned(&t.cur_tok->data.int_lit.bigint, get_digit_value(c)); bigint_init_unsigned(&t.cur_tok->data.int_lit.bigint, get_digit_value(c));
bigint_init_unsigned(&t.specified_exponent, 0);
break; break;
case '"': case '"':
begin_token(&t, TokenIdStringLiteral); begin_token(&t, TokenIdStringLiteral);
@ -1189,17 +1184,15 @@ void tokenize(Buf *buf, Tokenization *out) {
switch (c) { switch (c) {
case 'b': case 'b':
t.radix = 2; t.radix = 2;
t.state = TokenizeStateNumber; t.state = TokenizeStateNumberNoUnderscore;
break; break;
case 'o': case 'o':
t.radix = 8; t.radix = 8;
t.exp_add_amt = 3; t.state = TokenizeStateNumberNoUnderscore;
t.state = TokenizeStateNumber;
break; break;
case 'x': case 'x':
t.radix = 16; t.radix = 16;
t.exp_add_amt = 4; t.state = TokenizeStateNumberNoUnderscore;
t.state = TokenizeStateNumber;
break; break;
default: default:
// reinterpret as normal number // reinterpret as normal number
@ -1208,9 +1201,27 @@ void tokenize(Buf *buf, Tokenization *out) {
continue; continue;
} }
break; break;
case TokenizeStateNumberNoUnderscore:
if (c == '_') {
invalid_char_error(&t, c);
break;
} else if (get_digit_value(c) < t.radix) {
t.is_trailing_underscore = false;
t.state = TokenizeStateNumber;
}
// fall through
case TokenizeStateNumber: case TokenizeStateNumber:
{ {
if (c == '_') {
t.is_trailing_underscore = true;
t.state = TokenizeStateNumberNoUnderscore;
break;
}
if (c == '.') { if (c == '.') {
if (t.is_trailing_underscore) {
invalid_char_error(&t, c);
break;
}
if (t.radix != 16 && t.radix != 10) { if (t.radix != 16 && t.radix != 10) {
invalid_char_error(&t, c); invalid_char_error(&t, c);
} }
@ -1218,17 +1229,26 @@ void tokenize(Buf *buf, Tokenization *out) {
break; break;
} }
if (is_exponent_signifier(c, t.radix)) { if (is_exponent_signifier(c, t.radix)) {
if (t.is_trailing_underscore) {
invalid_char_error(&t, c);
break;
}
if (t.radix != 16 && t.radix != 10) { if (t.radix != 16 && t.radix != 10) {
invalid_char_error(&t, c); invalid_char_error(&t, c);
} }
t.state = TokenizeStateFloatExponentUnsigned; t.state = TokenizeStateFloatExponentUnsigned;
t.radix = 10; // exponent is always base 10
assert(t.cur_tok->id == TokenIdIntLiteral); assert(t.cur_tok->id == TokenIdIntLiteral);
bigint_init_bigint(&t.significand, &t.cur_tok->data.int_lit.bigint);
set_token_id(&t, t.cur_tok, TokenIdFloatLiteral); set_token_id(&t, t.cur_tok, TokenIdFloatLiteral);
break; break;
} }
uint32_t digit_value = get_digit_value(c); uint32_t digit_value = get_digit_value(c);
if (digit_value >= t.radix) { if (digit_value >= t.radix) {
if (t.is_trailing_underscore) {
invalid_char_error(&t, c);
break;
}
if (is_symbol_char(c)) { if (is_symbol_char(c)) {
invalid_char_error(&t, c); invalid_char_error(&t, c);
} }
@ -1259,20 +1279,41 @@ void tokenize(Buf *buf, Tokenization *out) {
continue; continue;
} }
t.pos -= 1; t.pos -= 1;
t.state = TokenizeStateFloatFraction; t.state = TokenizeStateFloatFractionNoUnderscore;
assert(t.cur_tok->id == TokenIdIntLiteral); assert(t.cur_tok->id == TokenIdIntLiteral);
bigint_init_bigint(&t.significand, &t.cur_tok->data.int_lit.bigint);
set_token_id(&t, t.cur_tok, TokenIdFloatLiteral); set_token_id(&t, t.cur_tok, TokenIdFloatLiteral);
continue; continue;
} }
case TokenizeStateFloatFractionNoUnderscore:
if (c == '_') {
invalid_char_error(&t, c);
} else if (get_digit_value(c) < t.radix) {
t.is_trailing_underscore = false;
t.state = TokenizeStateFloatFraction;
}
// fall through
case TokenizeStateFloatFraction: case TokenizeStateFloatFraction:
{ {
if (c == '_') {
t.is_trailing_underscore = true;
t.state = TokenizeStateFloatFractionNoUnderscore;
break;
}
if (is_exponent_signifier(c, t.radix)) { if (is_exponent_signifier(c, t.radix)) {
if (t.is_trailing_underscore) {
invalid_char_error(&t, c);
break;
}
t.state = TokenizeStateFloatExponentUnsigned; t.state = TokenizeStateFloatExponentUnsigned;
t.radix = 10; // exponent is always base 10
break; break;
} }
uint32_t digit_value = get_digit_value(c); uint32_t digit_value = get_digit_value(c);
if (digit_value >= t.radix) { if (digit_value >= t.radix) {
if (t.is_trailing_underscore) {
invalid_char_error(&t, c);
break;
}
if (is_symbol_char(c)) { if (is_symbol_char(c)) {
invalid_char_error(&t, c); invalid_char_error(&t, c);
} }
@ -1282,46 +1323,47 @@ void tokenize(Buf *buf, Tokenization *out) {
t.state = TokenizeStateStart; t.state = TokenizeStateStart;
continue; continue;
} }
t.exponent_in_bin_or_dec -= t.exp_add_amt;
if (t.radix == 10) { // we use parse_f128 to generate the float literal, so just
// For now we use strtod to parse decimal floats, so we just have to get to the // need to get to the end of the token
// end of the token.
break;
} }
BigInt digit_value_bi;
bigint_init_unsigned(&digit_value_bi, digit_value);
BigInt radix_bi;
bigint_init_unsigned(&radix_bi, t.radix);
BigInt multiplied;
bigint_mul(&multiplied, &t.significand, &radix_bi);
bigint_add(&t.significand, &multiplied, &digit_value_bi);
break; break;
}
case TokenizeStateFloatExponentUnsigned: case TokenizeStateFloatExponentUnsigned:
switch (c) { switch (c) {
case '+': case '+':
t.is_exp_negative = false; t.state = TokenizeStateFloatExponentNumberNoUnderscore;
t.state = TokenizeStateFloatExponentNumber;
break; break;
case '-': case '-':
t.is_exp_negative = true; t.state = TokenizeStateFloatExponentNumberNoUnderscore;
t.state = TokenizeStateFloatExponentNumber;
break; break;
default: default:
// reinterpret as normal exponent number // reinterpret as normal exponent number
t.pos -= 1; t.pos -= 1;
t.is_exp_negative = false; t.state = TokenizeStateFloatExponentNumberNoUnderscore;
t.state = TokenizeStateFloatExponentNumber;
continue; continue;
} }
break; break;
case TokenizeStateFloatExponentNumberNoUnderscore:
if (c == '_') {
invalid_char_error(&t, c);
} else if (get_digit_value(c) < t.radix) {
t.is_trailing_underscore = false;
t.state = TokenizeStateFloatExponentNumber;
}
// fall through
case TokenizeStateFloatExponentNumber: case TokenizeStateFloatExponentNumber:
{ {
if (c == '_') {
t.is_trailing_underscore = true;
t.state = TokenizeStateFloatExponentNumberNoUnderscore;
break;
}
uint32_t digit_value = get_digit_value(c); uint32_t digit_value = get_digit_value(c);
if (digit_value >= t.radix) { if (digit_value >= t.radix) {
if (t.is_trailing_underscore) {
invalid_char_error(&t, c);
break;
}
if (is_symbol_char(c)) { if (is_symbol_char(c)) {
invalid_char_error(&t, c); invalid_char_error(&t, c);
} }
@ -1331,21 +1373,9 @@ void tokenize(Buf *buf, Tokenization *out) {
t.state = TokenizeStateStart; t.state = TokenizeStateStart;
continue; continue;
} }
if (t.radix == 10) {
// For now we use strtod to parse decimal floats, so we just have to get to the
// end of the token.
break;
}
BigInt digit_value_bi;
bigint_init_unsigned(&digit_value_bi, digit_value);
BigInt radix_bi; // we use parse_f128 to generate the float literal, so just
bigint_init_unsigned(&radix_bi, 10); // need to get to the end of the token
BigInt multiplied;
bigint_mul(&multiplied, &t.specified_exponent, &radix_bi);
bigint_add(&t.specified_exponent, &multiplied, &digit_value_bi);
} }
break; break;
case TokenizeStateSawDash: case TokenizeStateSawDash:
@ -1399,6 +1429,9 @@ void tokenize(Buf *buf, Tokenization *out) {
case TokenizeStateStart: case TokenizeStateStart:
case TokenizeStateError: case TokenizeStateError:
break; break;
case TokenizeStateNumberNoUnderscore:
case TokenizeStateFloatFractionNoUnderscore:
case TokenizeStateFloatExponentNumberNoUnderscore:
case TokenizeStateNumberDot: case TokenizeStateNumberDot:
tokenize_error(&t, "unterminated number literal"); tokenize_error(&t, "unterminated number literal");
break; break;

View file

@ -395,11 +395,163 @@ pub fn addCases(cases: *tests.CompileErrorContext) void {
\\ var bad_float :f32 = 0.0; \\ var bad_float :f32 = 0.0;
\\ bad_float = bad_float + .20; \\ bad_float = bad_float + .20;
\\ std.debug.assert(bad_float < 1.0); \\ std.debug.assert(bad_float < 1.0);
\\}) \\}
, &[_][]const u8{ , &[_][]const u8{
"tmp.zig:5:29: error: invalid token: '.'", "tmp.zig:5:29: error: invalid token: '.'",
}); });
cases.add("invalid exponent in float literal - 1",
\\fn main() void {
\\ var bad: f128 = 0x1.0p1ab1;
\\}
, &[_][]const u8{
"tmp.zig:2:28: error: invalid character: 'a'",
});
cases.add("invalid exponent in float literal - 2",
\\fn main() void {
\\ var bad: f128 = 0x1.0p50F;
\\}
, &[_][]const u8{
"tmp.zig:2:29: error: invalid character: 'F'",
});
cases.add("invalid underscore placement in float literal - 1",
\\fn main() void {
\\ var bad: f128 = 0._0;
\\}
, &[_][]const u8{
"tmp.zig:2:23: error: invalid character: '_'",
});
cases.add("invalid underscore placement in float literal - 2",
\\fn main() void {
\\ var bad: f128 = 0_.0;
\\}
, &[_][]const u8{
"tmp.zig:2:23: error: invalid character: '.'",
});
cases.add("invalid underscore placement in float literal - 3",
\\fn main() void {
\\ var bad: f128 = 0.0_;
\\}
, &[_][]const u8{
"tmp.zig:2:25: error: invalid character: ';'",
});
cases.add("invalid underscore placement in float literal - 4",
\\fn main() void {
\\ var bad: f128 = 1.0e_1;
\\}
, &[_][]const u8{
"tmp.zig:2:25: error: invalid character: '_'",
});
cases.add("invalid underscore placement in float literal - 5",
\\fn main() void {
\\ var bad: f128 = 1.0e+_1;
\\}
, &[_][]const u8{
"tmp.zig:2:26: error: invalid character: '_'",
});
cases.add("invalid underscore placement in float literal - 6",
\\fn main() void {
\\ var bad: f128 = 1.0e-_1;
\\}
, &[_][]const u8{
"tmp.zig:2:26: error: invalid character: '_'",
});
cases.add("invalid underscore placement in float literal - 7",
\\fn main() void {
\\ var bad: f128 = 1.0e-1_;
\\}
, &[_][]const u8{
"tmp.zig:2:28: error: invalid character: ';'",
});
cases.add("invalid underscore placement in float literal - 9",
\\fn main() void {
\\ var bad: f128 = 1__0.0e-1;
\\}
, &[_][]const u8{
"tmp.zig:2:23: error: invalid character: '_'",
});
cases.add("invalid underscore placement in float literal - 10",
\\fn main() void {
\\ var bad: f128 = 1.0__0e-1;
\\}
, &[_][]const u8{
"tmp.zig:2:25: error: invalid character: '_'",
});
cases.add("invalid underscore placement in float literal - 11",
\\fn main() void {
\\ var bad: f128 = 1.0e-1__0;
\\}
, &[_][]const u8{
"tmp.zig:2:28: error: invalid character: '_'",
});
cases.add("invalid underscore placement in float literal - 12",
\\fn main() void {
\\ var bad: f128 = 0_x0.0;
\\}
, &[_][]const u8{
"tmp.zig:2:23: error: invalid character: 'x'",
});
cases.add("invalid underscore placement in float literal - 13",
\\fn main() void {
\\ var bad: f128 = 0x_0.0;
\\}
, &[_][]const u8{
"tmp.zig:2:23: error: invalid character: '_'",
});
cases.add("invalid underscore placement in float literal - 14",
\\fn main() void {
\\ var bad: f128 = 0x0.0_p1;
\\}
, &[_][]const u8{
"tmp.zig:2:27: error: invalid character: 'p'",
});
cases.add("invalid underscore placement in int literal - 1",
\\fn main() void {
\\ var bad: u128 = 0010_;
\\}
, &[_][]const u8{
"tmp.zig:2:26: error: invalid character: ';'",
});
cases.add("invalid underscore placement in int literal - 2",
\\fn main() void {
\\ var bad: u128 = 0b0010_;
\\}
, &[_][]const u8{
"tmp.zig:2:28: error: invalid character: ';'",
});
cases.add("invalid underscore placement in int literal - 3",
\\fn main() void {
\\ var bad: u128 = 0o0010_;
\\}
, &[_][]const u8{
"tmp.zig:2:28: error: invalid character: ';'",
});
cases.add("invalid underscore placement in int literal - 4",
\\fn main() void {
\\ var bad: u128 = 0x0010_;
\\}
, &[_][]const u8{
"tmp.zig:2:28: error: invalid character: ';'",
});
cases.add("var args without c calling conv", cases.add("var args without c calling conv",
\\fn foo(args: ...) void {} \\fn foo(args: ...) void {}
\\comptime { \\comptime {

View file

@ -411,6 +411,34 @@ test "quad hex float literal parsing accurate" {
comptime S.doTheTest(); comptime S.doTheTest();
} }
test "underscore separator parsing" {
expect(0_0_0_0 == 0);
expect(1_234_567 == 1234567);
expect(001_234_567 == 1234567);
expect(0_0_1_2_3_4_5_6_7 == 1234567);
expect(0b0_0_0_0 == 0);
expect(0b1010_1010 == 0b10101010);
expect(0b0000_1010_1010 == 0b10101010);
expect(0b1_0_1_0_1_0_1_0 == 0b10101010);
expect(0o0_0_0_0 == 0);
expect(0o1010_1010 == 0o10101010);
expect(0o0000_1010_1010 == 0o10101010);
expect(0o1_0_1_0_1_0_1_0 == 0o10101010);
expect(0x0_0_0_0 == 0);
expect(0x1010_1010 == 0x10101010);
expect(0x0000_1010_1010 == 0x10101010);
expect(0x1_0_1_0_1_0_1_0 == 0x10101010);
expect(123_456.789_000e1_0 == 123456.789000e10);
expect(0_1_2_3_4_5_6.7_8_9_0_0_0e0_0_1_0 == 123456.789000e10);
expect(0x1234_5678.9ABC_DEF0p-1_0 == 0x12345678.9ABCDEF0p-10);
expect(0x1_2_3_4_5_6_7_8.9_A_B_C_D_E_F_0p-0_0_0_1_0 == 0x12345678.9ABCDEF0p-10);
}
test "hex float literal within range" { test "hex float literal within range" {
const a = 0x1.0p16383; const a = 0x1.0p16383;
const b = 0x0.1p16387; const b = 0x0.1p16387;