diff --git a/lib/std/json.zig b/lib/std/json.zig index 011463faef..10449cdace 100644 --- a/lib/std/json.zig +++ b/lib/std/json.zig @@ -1,2818 +1,59 @@ -// JSON parser conforming to RFC8259. -// -// https://tools.ietf.org/html/rfc8259 +//! JSON parsing and stringification conforming to RFC 8259. https://datatracker.ietf.org/doc/html/rfc8259 +//! +//! The low-level `Scanner` API reads from an input slice or successive slices of inputs, +//! The `Reader` API connects a `std.io.Reader` to a `Scanner`. +//! +//! The high-level `parseFromSlice` and `parseFromTokenSource` deserializes a JSON document into a Zig type. +//! The high-level `Parser` parses any JSON document into a dynamically typed `ValueTree` that has its own memory arena. +//! +//! The low-level `writeStream` emits syntax-conformant JSON tokens to a `std.io.Writer`. +//! The high-level `stringify` serializes a Zig type into JSON. -const builtin = @import("builtin"); -const std = @import("std.zig"); -const debug = std.debug; -const assert = debug.assert; -const testing = std.testing; -const mem = std.mem; -const maxInt = std.math.maxInt; +pub const ValueTree = @import("json/dynamic.zig").ValueTree; +pub const ObjectMap = @import("json/dynamic.zig").ObjectMap; +pub const Array = @import("json/dynamic.zig").Array; +pub const Value = @import("json/dynamic.zig").Value; +pub const Parser = @import("json/dynamic.zig").Parser; + +pub const validate = @import("json/scanner.zig").validate; +pub const Error = @import("json/scanner.zig").Error; +pub const reader = @import("json/scanner.zig").reader; +pub const default_buffer_size = @import("json/scanner.zig").default_buffer_size; +pub const Token = @import("json/scanner.zig").Token; +pub const TokenType = @import("json/scanner.zig").TokenType; +pub const Diagnostics = @import("json/scanner.zig").Diagnostics; +pub const AllocWhen = @import("json/scanner.zig").AllocWhen; +pub const default_max_value_len = @import("json/scanner.zig").default_max_value_len; +pub const Reader = @import("json/scanner.zig").Reader; +pub const Scanner = @import("json/scanner.zig").Scanner; +pub const isNumberFormattedLikeAnInteger = @import("json/scanner.zig").isNumberFormattedLikeAnInteger; + +pub const ParseOptions = @import("json/static.zig").ParseOptions; +pub const parseFromSlice = @import("json/static.zig").parseFromSlice; +pub const parseFromTokenSource = @import("json/static.zig").parseFromTokenSource; +pub const ParseError = @import("json/static.zig").ParseError; +pub const parseFree = @import("json/static.zig").parseFree; + +pub const StringifyOptions = @import("json/stringify.zig").StringifyOptions; +pub const encodeJsonString = @import("json/stringify.zig").encodeJsonString; +pub const encodeJsonStringChars = @import("json/stringify.zig").encodeJsonStringChars; +pub const stringify = @import("json/stringify.zig").stringify; +pub const stringifyAlloc = @import("json/stringify.zig").stringifyAlloc; pub const WriteStream = @import("json/write_stream.zig").WriteStream; pub const writeStream = @import("json/write_stream.zig").writeStream; -const StringEscapes = union(enum) { - None, - - Some: struct { - size_diff: isize, - }, -}; - -/// Checks to see if a string matches what it would be as a json-encoded string -/// Assumes that `encoded` is a well-formed json string -fn encodesTo(decoded: []const u8, encoded: []const u8) bool { - var i: usize = 0; - var j: usize = 0; - while (i < decoded.len) { - if (j >= encoded.len) return false; - if (encoded[j] != '\\') { - if (decoded[i] != encoded[j]) return false; - j += 1; - i += 1; - } else { - const escape_type = encoded[j + 1]; - if (escape_type != 'u') { - const t: u8 = switch (escape_type) { - '\\' => '\\', - '/' => '/', - 'n' => '\n', - 'r' => '\r', - 't' => '\t', - 'f' => 12, - 'b' => 8, - '"' => '"', - else => unreachable, - }; - if (decoded[i] != t) return false; - j += 2; - i += 1; - } else { - var codepoint = std.fmt.parseInt(u21, encoded[j + 2 .. j + 6], 16) catch unreachable; - j += 6; - if (codepoint >= 0xD800 and codepoint < 0xDC00) { - // surrogate pair - assert(encoded[j] == '\\'); - assert(encoded[j + 1] == 'u'); - const low_surrogate = std.fmt.parseInt(u21, encoded[j + 2 .. j + 6], 16) catch unreachable; - codepoint = 0x10000 + (((codepoint & 0x03ff) << 10) | (low_surrogate & 0x03ff)); - j += 6; - } - var buf: [4]u8 = undefined; - const len = std.unicode.utf8Encode(codepoint, &buf) catch unreachable; - if (i + len > decoded.len) return false; - if (!mem.eql(u8, decoded[i..][0..len], buf[0..len])) return false; - i += len; - } - } - } - assert(i == decoded.len); - assert(j == encoded.len); - return true; -} - -/// A single token slice into the parent string. -/// -/// Use `token.slice()` on the input at the current position to get the current slice. -pub const Token = union(enum) { - ObjectBegin, - ObjectEnd, - ArrayBegin, - ArrayEnd, - String: struct { - /// How many bytes the token is. - count: usize, - - /// Whether string contains an escape sequence and cannot be zero-copied - escapes: StringEscapes, - - pub fn decodedLength(self: @This()) usize { - return self.count +% switch (self.escapes) { - .None => 0, - .Some => |s| @bitCast(usize, s.size_diff), - }; - } - - /// Slice into the underlying input string. - pub fn slice(self: @This(), input: []const u8, i: usize) []const u8 { - return input[i - self.count .. i]; - } - }, - Number: struct { - /// How many bytes the token is. - count: usize, - - /// Whether number is simple and can be represented by an integer (i.e. no `.` or `e`) - is_integer: bool, - - /// Slice into the underlying input string. - pub fn slice(self: @This(), input: []const u8, i: usize) []const u8 { - return input[i - self.count .. i]; - } - }, - True, - False, - Null, -}; - -const AggregateContainerType = enum(u1) { object, array }; - -// A LIFO bit-stack. Tracks which container-types have been entered during parse. -fn AggregateContainerStack(comptime n: usize) type { - return struct { - const Self = @This(); - - const element_bitcount = 8 * @sizeOf(usize); - const element_count = n / element_bitcount; - const ElementType = @Type(.{ .Int = .{ .signedness = .unsigned, .bits = element_bitcount } }); - const ElementShiftAmountType = std.math.Log2Int(ElementType); - - comptime { - std.debug.assert(n % element_bitcount == 0); - } - - memory: [element_count]ElementType, - len: usize, - - pub fn init(self: *Self) void { - self.memory = [_]ElementType{0} ** element_count; - self.len = 0; - } - - pub fn push(self: *Self, ty: AggregateContainerType) ?void { - if (self.len >= n) { - return null; - } - - const index = self.len / element_bitcount; - const sub_index = @intCast(ElementShiftAmountType, self.len % element_bitcount); - const clear_mask = ~(@as(ElementType, 1) << sub_index); - const set_bits = @as(ElementType, @enumToInt(ty)) << sub_index; - - self.memory[index] &= clear_mask; - self.memory[index] |= set_bits; - self.len += 1; - } - - pub fn peek(self: *Self) ?AggregateContainerType { - if (self.len == 0) { - return null; - } - - const bit_to_extract = self.len - 1; - const index = bit_to_extract / element_bitcount; - const sub_index = @intCast(ElementShiftAmountType, bit_to_extract % element_bitcount); - const bit = @intCast(u1, (self.memory[index] >> sub_index) & 1); - return @intToEnum(AggregateContainerType, bit); - } - - pub fn pop(self: *Self) ?AggregateContainerType { - if (self.peek()) |ty| { - self.len -= 1; - return ty; - } - - return null; - } - }; -} - -/// A small streaming JSON parser. This accepts input one byte at a time and returns tokens as -/// they are encountered. No copies or allocations are performed during parsing and the entire -/// parsing state requires ~40-50 bytes of stack space. -/// -/// Conforms strictly to RFC8259. -/// -/// For a non-byte based wrapper, consider using TokenStream instead. -pub const StreamingParser = struct { - const default_max_nestings = 256; - - // Current state - state: State, - // How many bytes we have counted for the current token - count: usize, - // What state to follow after parsing a string (either property or value string) - after_string_state: State, - // What state to follow after parsing a value (either top-level or value end) - after_value_state: State, - // If we stopped now, would the complete parsed string to now be a valid json string - complete: bool, - // Current token flags to pass through to the next generated, see Token. - string_escapes: StringEscapes, - // When in .String states, was the previous character a high surrogate? - string_last_was_high_surrogate: bool, - // Used inside of StringEscapeHexUnicode* states - string_unicode_codepoint: u21, - // The first byte needs to be stored to validate 3- and 4-byte sequences. - sequence_first_byte: u8 = undefined, - // When in .Number states, is the number a (still) valid integer? - number_is_integer: bool, - // Bit-stack for nested object/map literals (max 256 nestings). - stack: AggregateContainerStack(default_max_nestings), - - pub fn init() StreamingParser { - var p: StreamingParser = undefined; - p.reset(); - return p; - } - - pub fn reset(p: *StreamingParser) void { - p.state = .TopLevelBegin; - p.count = 0; - // Set before ever read in main transition function - p.after_string_state = undefined; - p.after_value_state = .ValueEnd; // handle end of values normally - p.stack.init(); - p.complete = false; - p.string_escapes = undefined; - p.string_last_was_high_surrogate = undefined; - p.string_unicode_codepoint = undefined; - p.number_is_integer = undefined; - } - - pub const State = enum(u8) { - // These must be first with these explicit values as we rely on them for indexing the - // bit-stack directly and avoiding a branch. - ObjectSeparator = 0, - ValueEnd = 1, - - TopLevelBegin, - TopLevelEnd, - - ValueBegin, - ValueBeginNoClosing, - - String, - StringUtf8Byte2Of2, - StringUtf8Byte2Of3, - StringUtf8Byte3Of3, - StringUtf8Byte2Of4, - StringUtf8Byte3Of4, - StringUtf8Byte4Of4, - StringEscapeCharacter, - StringEscapeHexUnicode4, - StringEscapeHexUnicode3, - StringEscapeHexUnicode2, - StringEscapeHexUnicode1, - - Number, - NumberMaybeDotOrExponent, - NumberMaybeDigitOrDotOrExponent, - NumberFractionalRequired, - NumberFractional, - NumberMaybeExponent, - NumberExponent, - NumberExponentDigitsRequired, - NumberExponentDigits, - - TrueLiteral1, - TrueLiteral2, - TrueLiteral3, - - FalseLiteral1, - FalseLiteral2, - FalseLiteral3, - FalseLiteral4, - - NullLiteral1, - NullLiteral2, - NullLiteral3, - - // Given an aggregate container type, return the state which should be entered after - // processing a complete value type. - pub fn fromAggregateContainerType(ty: AggregateContainerType) State { - comptime { - std.debug.assert(@enumToInt(AggregateContainerType.object) == @enumToInt(State.ObjectSeparator)); - std.debug.assert(@enumToInt(AggregateContainerType.array) == @enumToInt(State.ValueEnd)); - } - - return @intToEnum(State, @enumToInt(ty)); - } - }; - - pub const Error = error{ - InvalidTopLevel, - TooManyNestedItems, - TooManyClosingItems, - InvalidValueBegin, - InvalidValueEnd, - UnbalancedBrackets, - UnbalancedBraces, - UnexpectedClosingBracket, - UnexpectedClosingBrace, - InvalidNumber, - InvalidSeparator, - InvalidLiteral, - InvalidEscapeCharacter, - InvalidUnicodeHexSymbol, - InvalidUtf8Byte, - InvalidTopLevelTrailing, - InvalidControlCharacter, - }; - - /// Give another byte to the parser and obtain any new tokens. This may (rarely) return two - /// tokens. token2 is always null if token1 is null. - /// - /// There is currently no error recovery on a bad stream. - pub fn feed(p: *StreamingParser, c: u8, token1: *?Token, token2: *?Token) Error!void { - token1.* = null; - token2.* = null; - p.count += 1; - - // unlikely - if (try p.transition(c, token1)) { - _ = try p.transition(c, token2); - } - } - - // Perform a single transition on the state machine and return any possible token. - fn transition(p: *StreamingParser, c: u8, token: *?Token) Error!bool { - switch (p.state) { - .TopLevelBegin => switch (c) { - '{' => { - p.stack.push(.object) orelse return error.TooManyNestedItems; - p.state = .ValueBegin; - p.after_string_state = .ObjectSeparator; - - token.* = Token.ObjectBegin; - }, - '[' => { - p.stack.push(.array) orelse return error.TooManyNestedItems; - p.state = .ValueBegin; - p.after_string_state = .ValueEnd; - - token.* = Token.ArrayBegin; - }, - '-' => { - p.number_is_integer = true; - p.state = .Number; - p.after_value_state = .TopLevelEnd; - p.count = 0; - }, - '0' => { - p.number_is_integer = true; - p.state = .NumberMaybeDotOrExponent; - p.after_value_state = .TopLevelEnd; - p.count = 0; - }, - '1'...'9' => { - p.number_is_integer = true; - p.state = .NumberMaybeDigitOrDotOrExponent; - p.after_value_state = .TopLevelEnd; - p.count = 0; - }, - '"' => { - p.state = .String; - p.after_value_state = .TopLevelEnd; - // We don't actually need the following since after_value_state should override. - p.after_string_state = .ValueEnd; - p.string_escapes = .None; - p.string_last_was_high_surrogate = false; - p.count = 0; - }, - 't' => { - p.state = .TrueLiteral1; - p.after_value_state = .TopLevelEnd; - p.count = 0; - }, - 'f' => { - p.state = .FalseLiteral1; - p.after_value_state = .TopLevelEnd; - p.count = 0; - }, - 'n' => { - p.state = .NullLiteral1; - p.after_value_state = .TopLevelEnd; - p.count = 0; - }, - 0x09, 0x0A, 0x0D, 0x20 => { - // whitespace - }, - else => { - return error.InvalidTopLevel; - }, - }, - - .TopLevelEnd => switch (c) { - 0x09, 0x0A, 0x0D, 0x20 => { - // whitespace - }, - else => { - return error.InvalidTopLevelTrailing; - }, - }, - - .ValueBegin => switch (c) { - // NOTE: These are shared in ValueEnd as well, think we can reorder states to - // be a bit clearer and avoid this duplication. - '}' => { - const last_type = p.stack.peek() orelse return error.TooManyClosingItems; - - if (last_type != .object) { - return error.UnexpectedClosingBrace; - } - - _ = p.stack.pop(); - p.state = .ValueBegin; - p.after_string_state = State.fromAggregateContainerType(last_type); - - switch (p.stack.len) { - 0 => { - p.complete = true; - p.state = .TopLevelEnd; - }, - else => { - p.state = .ValueEnd; - }, - } - - token.* = Token.ObjectEnd; - }, - ']' => { - const last_type = p.stack.peek() orelse return error.TooManyClosingItems; - - if (last_type != .array) { - return error.UnexpectedClosingBracket; - } - - _ = p.stack.pop(); - p.state = .ValueBegin; - p.after_string_state = State.fromAggregateContainerType(last_type); - - switch (p.stack.len) { - 0 => { - p.complete = true; - p.state = .TopLevelEnd; - }, - else => { - p.state = .ValueEnd; - }, - } - - token.* = Token.ArrayEnd; - }, - '{' => { - p.stack.push(.object) orelse return error.TooManyNestedItems; - - p.state = .ValueBegin; - p.after_string_state = .ObjectSeparator; - - token.* = Token.ObjectBegin; - }, - '[' => { - p.stack.push(.array) orelse return error.TooManyNestedItems; - - p.state = .ValueBegin; - p.after_string_state = .ValueEnd; - - token.* = Token.ArrayBegin; - }, - '-' => { - p.number_is_integer = true; - p.state = .Number; - p.count = 0; - }, - '0' => { - p.number_is_integer = true; - p.state = .NumberMaybeDotOrExponent; - p.count = 0; - }, - '1'...'9' => { - p.number_is_integer = true; - p.state = .NumberMaybeDigitOrDotOrExponent; - p.count = 0; - }, - '"' => { - p.state = .String; - p.string_escapes = .None; - p.string_last_was_high_surrogate = false; - p.count = 0; - }, - 't' => { - p.state = .TrueLiteral1; - p.count = 0; - }, - 'f' => { - p.state = .FalseLiteral1; - p.count = 0; - }, - 'n' => { - p.state = .NullLiteral1; - p.count = 0; - }, - 0x09, 0x0A, 0x0D, 0x20 => { - // whitespace - }, - else => { - return error.InvalidValueBegin; - }, - }, - - // TODO: A bit of duplication here and in the following state, redo. - .ValueBeginNoClosing => switch (c) { - '{' => { - p.stack.push(.object) orelse return error.TooManyNestedItems; - - p.state = .ValueBegin; - p.after_string_state = .ObjectSeparator; - - token.* = Token.ObjectBegin; - }, - '[' => { - p.stack.push(.array) orelse return error.TooManyNestedItems; - - p.state = .ValueBegin; - p.after_string_state = .ValueEnd; - - token.* = Token.ArrayBegin; - }, - '-' => { - p.number_is_integer = true; - p.state = .Number; - p.count = 0; - }, - '0' => { - p.number_is_integer = true; - p.state = .NumberMaybeDotOrExponent; - p.count = 0; - }, - '1'...'9' => { - p.number_is_integer = true; - p.state = .NumberMaybeDigitOrDotOrExponent; - p.count = 0; - }, - '"' => { - p.state = .String; - p.string_escapes = .None; - p.string_last_was_high_surrogate = false; - p.count = 0; - }, - 't' => { - p.state = .TrueLiteral1; - p.count = 0; - }, - 'f' => { - p.state = .FalseLiteral1; - p.count = 0; - }, - 'n' => { - p.state = .NullLiteral1; - p.count = 0; - }, - 0x09, 0x0A, 0x0D, 0x20 => { - // whitespace - }, - else => { - return error.InvalidValueBegin; - }, - }, - - .ValueEnd => switch (c) { - ',' => { - const last_type = p.stack.peek() orelse unreachable; - p.after_string_state = State.fromAggregateContainerType(last_type); - p.state = .ValueBeginNoClosing; - }, - ']' => { - const last_type = p.stack.peek() orelse return error.TooManyClosingItems; - - if (last_type != .array) { - return error.UnexpectedClosingBracket; - } - - _ = p.stack.pop(); - p.state = .ValueEnd; - p.after_string_state = State.fromAggregateContainerType(last_type); - - if (p.stack.len == 0) { - p.complete = true; - p.state = .TopLevelEnd; - } - - token.* = Token.ArrayEnd; - }, - '}' => { - const last_type = p.stack.peek() orelse return error.TooManyClosingItems; - - if (last_type != .object) { - return error.UnexpectedClosingBrace; - } - - _ = p.stack.pop(); - p.state = .ValueEnd; - p.after_string_state = State.fromAggregateContainerType(last_type); - - if (p.stack.len == 0) { - p.complete = true; - p.state = .TopLevelEnd; - } - - token.* = Token.ObjectEnd; - }, - 0x09, 0x0A, 0x0D, 0x20 => { - // whitespace - }, - else => { - return error.InvalidValueEnd; - }, - }, - - .ObjectSeparator => switch (c) { - ':' => { - p.state = .ValueBeginNoClosing; - p.after_string_state = .ValueEnd; - }, - 0x09, 0x0A, 0x0D, 0x20 => { - // whitespace - }, - else => { - return error.InvalidSeparator; - }, - }, - - .String => switch (c) { - 0x00...0x1F => { - return error.InvalidControlCharacter; - }, - '"' => { - p.state = p.after_string_state; - if (p.after_value_state == .TopLevelEnd) { - p.state = .TopLevelEnd; - p.complete = true; - } - - token.* = .{ - .String = .{ - .count = p.count - 1, - .escapes = p.string_escapes, - }, - }; - p.string_escapes = undefined; - p.string_last_was_high_surrogate = undefined; - }, - '\\' => { - p.state = .StringEscapeCharacter; - switch (p.string_escapes) { - .None => { - p.string_escapes = .{ .Some = .{ .size_diff = 0 } }; - }, - .Some => {}, - } - }, - 0x20, 0x21, 0x23...0x5B, 0x5D...0x7F => { - // non-control ascii - p.string_last_was_high_surrogate = false; - }, - 0xC2...0xDF => { - p.state = .StringUtf8Byte2Of2; - }, - 0xE0...0xEF => { - p.state = .StringUtf8Byte2Of3; - p.sequence_first_byte = c; - }, - 0xF0...0xF4 => { - p.state = .StringUtf8Byte2Of4; - p.sequence_first_byte = c; - }, - else => { - return error.InvalidUtf8Byte; - }, - }, - - .StringUtf8Byte2Of2 => switch (c >> 6) { - 0b10 => p.state = .String, - else => return error.InvalidUtf8Byte, - }, - .StringUtf8Byte2Of3 => { - switch (p.sequence_first_byte) { - 0xE0 => switch (c) { - 0xA0...0xBF => {}, - else => return error.InvalidUtf8Byte, - }, - 0xE1...0xEF => switch (c) { - 0x80...0xBF => {}, - else => return error.InvalidUtf8Byte, - }, - else => return error.InvalidUtf8Byte, - } - p.state = .StringUtf8Byte3Of3; - }, - .StringUtf8Byte3Of3 => switch (c) { - 0x80...0xBF => p.state = .String, - else => return error.InvalidUtf8Byte, - }, - .StringUtf8Byte2Of4 => { - switch (p.sequence_first_byte) { - 0xF0 => switch (c) { - 0x90...0xBF => {}, - else => return error.InvalidUtf8Byte, - }, - 0xF1...0xF3 => switch (c) { - 0x80...0xBF => {}, - else => return error.InvalidUtf8Byte, - }, - 0xF4 => switch (c) { - 0x80...0x8F => {}, - else => return error.InvalidUtf8Byte, - }, - else => return error.InvalidUtf8Byte, - } - p.state = .StringUtf8Byte3Of4; - }, - .StringUtf8Byte3Of4 => switch (c) { - 0x80...0xBF => p.state = .StringUtf8Byte4Of4, - else => return error.InvalidUtf8Byte, - }, - .StringUtf8Byte4Of4 => switch (c) { - 0x80...0xBF => p.state = .String, - else => return error.InvalidUtf8Byte, - }, - - .StringEscapeCharacter => switch (c) { - // NOTE: '/' is allowed as an escaped character but it also is allowed - // as unescaped according to the RFC. There is a reported errata which suggests - // removing the non-escaped variant but it makes more sense to simply disallow - // it as an escape code here. - // - // The current JSONTestSuite tests rely on both of this behaviour being present - // however, so we default to the status quo where both are accepted until this - // is further clarified. - '"', '\\', '/', 'b', 'f', 'n', 'r', 't' => { - p.string_escapes.Some.size_diff -= 1; - p.state = .String; - p.string_last_was_high_surrogate = false; - }, - 'u' => { - p.state = .StringEscapeHexUnicode4; - }, - else => { - return error.InvalidEscapeCharacter; - }, - }, - - .StringEscapeHexUnicode4 => { - var codepoint: u21 = undefined; - switch (c) { - else => return error.InvalidUnicodeHexSymbol, - '0'...'9' => { - codepoint = c - '0'; - }, - 'A'...'F' => { - codepoint = c - 'A' + 10; - }, - 'a'...'f' => { - codepoint = c - 'a' + 10; - }, - } - p.state = .StringEscapeHexUnicode3; - p.string_unicode_codepoint = codepoint << 12; - }, - - .StringEscapeHexUnicode3 => { - var codepoint: u21 = undefined; - switch (c) { - else => return error.InvalidUnicodeHexSymbol, - '0'...'9' => { - codepoint = c - '0'; - }, - 'A'...'F' => { - codepoint = c - 'A' + 10; - }, - 'a'...'f' => { - codepoint = c - 'a' + 10; - }, - } - p.state = .StringEscapeHexUnicode2; - p.string_unicode_codepoint |= codepoint << 8; - }, - - .StringEscapeHexUnicode2 => { - var codepoint: u21 = undefined; - switch (c) { - else => return error.InvalidUnicodeHexSymbol, - '0'...'9' => { - codepoint = c - '0'; - }, - 'A'...'F' => { - codepoint = c - 'A' + 10; - }, - 'a'...'f' => { - codepoint = c - 'a' + 10; - }, - } - p.state = .StringEscapeHexUnicode1; - p.string_unicode_codepoint |= codepoint << 4; - }, - - .StringEscapeHexUnicode1 => { - var codepoint: u21 = undefined; - switch (c) { - else => return error.InvalidUnicodeHexSymbol, - '0'...'9' => { - codepoint = c - '0'; - }, - 'A'...'F' => { - codepoint = c - 'A' + 10; - }, - 'a'...'f' => { - codepoint = c - 'a' + 10; - }, - } - p.state = .String; - p.string_unicode_codepoint |= codepoint; - if (p.string_unicode_codepoint < 0xD800 or p.string_unicode_codepoint >= 0xE000) { - // not part of surrogate pair - p.string_escapes.Some.size_diff -= @as(isize, 6 - (std.unicode.utf8CodepointSequenceLength(p.string_unicode_codepoint) catch unreachable)); - p.string_last_was_high_surrogate = false; - } else if (p.string_unicode_codepoint < 0xDC00) { - // 'high' surrogate - // takes 3 bytes to encode a half surrogate pair into wtf8 - p.string_escapes.Some.size_diff -= 6 - 3; - p.string_last_was_high_surrogate = true; - } else { - // 'low' surrogate - p.string_escapes.Some.size_diff -= 6; - if (p.string_last_was_high_surrogate) { - // takes 4 bytes to encode a full surrogate pair into utf8 - // 3 bytes are already reserved by high surrogate - p.string_escapes.Some.size_diff -= -1; - } else { - // takes 3 bytes to encode a half surrogate pair into wtf8 - p.string_escapes.Some.size_diff -= -3; - } - p.string_last_was_high_surrogate = false; - } - p.string_unicode_codepoint = undefined; - }, - - .Number => { - p.complete = p.after_value_state == .TopLevelEnd; - switch (c) { - '0' => { - p.state = .NumberMaybeDotOrExponent; - }, - '1'...'9' => { - p.state = .NumberMaybeDigitOrDotOrExponent; - }, - else => { - return error.InvalidNumber; - }, - } - }, - - .NumberMaybeDotOrExponent => { - p.complete = p.after_value_state == .TopLevelEnd; - switch (c) { - '.' => { - p.number_is_integer = false; - p.state = .NumberFractionalRequired; - }, - 'e', 'E' => { - p.number_is_integer = false; - p.state = .NumberExponent; - }, - else => { - p.state = p.after_value_state; - token.* = .{ - .Number = .{ - .count = p.count, - .is_integer = p.number_is_integer, - }, - }; - p.number_is_integer = undefined; - return true; - }, - } - }, - - .NumberMaybeDigitOrDotOrExponent => { - p.complete = p.after_value_state == .TopLevelEnd; - switch (c) { - '.' => { - p.number_is_integer = false; - p.state = .NumberFractionalRequired; - }, - 'e', 'E' => { - p.number_is_integer = false; - p.state = .NumberExponent; - }, - '0'...'9' => { - // another digit - }, - else => { - p.state = p.after_value_state; - token.* = .{ - .Number = .{ - .count = p.count, - .is_integer = p.number_is_integer, - }, - }; - return true; - }, - } - }, - - .NumberFractionalRequired => { - p.complete = p.after_value_state == .TopLevelEnd; - switch (c) { - '0'...'9' => { - p.state = .NumberFractional; - }, - else => { - return error.InvalidNumber; - }, - } - }, - - .NumberFractional => { - p.complete = p.after_value_state == .TopLevelEnd; - switch (c) { - '0'...'9' => { - // another digit - }, - 'e', 'E' => { - p.number_is_integer = false; - p.state = .NumberExponent; - }, - else => { - p.state = p.after_value_state; - token.* = .{ - .Number = .{ - .count = p.count, - .is_integer = p.number_is_integer, - }, - }; - return true; - }, - } - }, - - .NumberMaybeExponent => { - p.complete = p.after_value_state == .TopLevelEnd; - switch (c) { - 'e', 'E' => { - p.number_is_integer = false; - p.state = .NumberExponent; - }, - else => { - p.state = p.after_value_state; - token.* = .{ - .Number = .{ - .count = p.count, - .is_integer = p.number_is_integer, - }, - }; - return true; - }, - } - }, - - .NumberExponent => switch (c) { - '-', '+' => { - p.complete = false; - p.state = .NumberExponentDigitsRequired; - }, - '0'...'9' => { - p.complete = p.after_value_state == .TopLevelEnd; - p.state = .NumberExponentDigits; - }, - else => { - return error.InvalidNumber; - }, - }, - - .NumberExponentDigitsRequired => switch (c) { - '0'...'9' => { - p.complete = p.after_value_state == .TopLevelEnd; - p.state = .NumberExponentDigits; - }, - else => { - return error.InvalidNumber; - }, - }, - - .NumberExponentDigits => { - p.complete = p.after_value_state == .TopLevelEnd; - switch (c) { - '0'...'9' => { - // another digit - }, - else => { - p.state = p.after_value_state; - token.* = .{ - .Number = .{ - .count = p.count, - .is_integer = p.number_is_integer, - }, - }; - return true; - }, - } - }, - - .TrueLiteral1 => switch (c) { - 'r' => p.state = .TrueLiteral2, - else => return error.InvalidLiteral, - }, - - .TrueLiteral2 => switch (c) { - 'u' => p.state = .TrueLiteral3, - else => return error.InvalidLiteral, - }, - - .TrueLiteral3 => switch (c) { - 'e' => { - p.state = p.after_value_state; - p.complete = p.state == .TopLevelEnd; - token.* = Token.True; - }, - else => { - return error.InvalidLiteral; - }, - }, - - .FalseLiteral1 => switch (c) { - 'a' => p.state = .FalseLiteral2, - else => return error.InvalidLiteral, - }, - - .FalseLiteral2 => switch (c) { - 'l' => p.state = .FalseLiteral3, - else => return error.InvalidLiteral, - }, - - .FalseLiteral3 => switch (c) { - 's' => p.state = .FalseLiteral4, - else => return error.InvalidLiteral, - }, - - .FalseLiteral4 => switch (c) { - 'e' => { - p.state = p.after_value_state; - p.complete = p.state == .TopLevelEnd; - token.* = Token.False; - }, - else => { - return error.InvalidLiteral; - }, - }, - - .NullLiteral1 => switch (c) { - 'u' => p.state = .NullLiteral2, - else => return error.InvalidLiteral, - }, - - .NullLiteral2 => switch (c) { - 'l' => p.state = .NullLiteral3, - else => return error.InvalidLiteral, - }, - - .NullLiteral3 => switch (c) { - 'l' => { - p.state = p.after_value_state; - p.complete = p.state == .TopLevelEnd; - token.* = Token.Null; - }, - else => { - return error.InvalidLiteral; - }, - }, - } - - return false; - } -}; - -/// A small wrapper over a StreamingParser for full slices. Returns a stream of json Tokens. -pub const TokenStream = struct { - i: usize, - slice: []const u8, - parser: StreamingParser, - token: ?Token, - - pub const Error = StreamingParser.Error || error{UnexpectedEndOfJson}; - - pub fn init(slice: []const u8) TokenStream { - return TokenStream{ - .i = 0, - .slice = slice, - .parser = StreamingParser.init(), - .token = null, - }; - } - - fn stackUsed(self: *TokenStream) usize { - return self.parser.stack.len + if (self.token != null) @as(usize, 1) else 0; - } - - pub fn next(self: *TokenStream) Error!?Token { - if (self.token) |token| { - self.token = null; - return token; - } - - var t1: ?Token = undefined; - var t2: ?Token = undefined; - - while (self.i < self.slice.len) { - try self.parser.feed(self.slice[self.i], &t1, &t2); - self.i += 1; - - if (t1) |token| { - self.token = t2; - return token; - } - } - - // Without this a bare number fails, the streaming parser doesn't know the input ended - try self.parser.feed(' ', &t1, &t2); - self.i += 1; - - if (t1) |token| { - return token; - } else if (self.parser.complete) { - return null; - } else { - return error.UnexpectedEndOfJson; - } - } -}; - -/// Validate a JSON string. This does not limit number precision so a decoder may not necessarily -/// be able to decode the string even if this returns true. -pub fn validate(s: []const u8) bool { - var p = StreamingParser.init(); - - for (s) |c| { - var token1: ?Token = undefined; - var token2: ?Token = undefined; - - p.feed(c, &token1, &token2) catch { - return false; - }; - } - - return p.complete; -} - -const Allocator = std.mem.Allocator; -const ArenaAllocator = std.heap.ArenaAllocator; -const ArrayList = std.ArrayList; -const StringArrayHashMap = std.StringArrayHashMap; - -pub const ValueTree = struct { - arena: *ArenaAllocator, - root: Value, - - pub fn deinit(self: *ValueTree) void { - self.arena.deinit(); - self.arena.child_allocator.destroy(self.arena); - } -}; - -pub const ObjectMap = StringArrayHashMap(Value); -pub const Array = ArrayList(Value); - -/// Represents a JSON value -/// Currently only supports numbers that fit into i64 or f64. -pub const Value = union(enum) { - Null, - Bool: bool, - Integer: i64, - Float: f64, - NumberString: []const u8, - String: []const u8, - Array: Array, - Object: ObjectMap, - - pub fn jsonStringify( - value: @This(), - options: StringifyOptions, - out_stream: anytype, - ) @TypeOf(out_stream).Error!void { - switch (value) { - .Null => try stringify(null, options, out_stream), - .Bool => |inner| try stringify(inner, options, out_stream), - .Integer => |inner| try stringify(inner, options, out_stream), - .Float => |inner| try stringify(inner, options, out_stream), - .NumberString => |inner| try out_stream.writeAll(inner), - .String => |inner| try stringify(inner, options, out_stream), - .Array => |inner| try stringify(inner.items, options, out_stream), - .Object => |inner| { - try out_stream.writeByte('{'); - var field_output = false; - var child_options = options; - if (child_options.whitespace) |*child_whitespace| { - child_whitespace.indent_level += 1; - } - var it = inner.iterator(); - while (it.next()) |entry| { - if (!field_output) { - field_output = true; - } else { - try out_stream.writeByte(','); - } - if (child_options.whitespace) |child_whitespace| { - try child_whitespace.outputIndent(out_stream); - } - - try stringify(entry.key_ptr.*, options, out_stream); - try out_stream.writeByte(':'); - if (child_options.whitespace) |child_whitespace| { - if (child_whitespace.separator) { - try out_stream.writeByte(' '); - } - } - try stringify(entry.value_ptr.*, child_options, out_stream); - } - if (field_output) { - if (options.whitespace) |whitespace| { - try whitespace.outputIndent(out_stream); - } - } - try out_stream.writeByte('}'); - }, - } - } - - pub fn dump(self: Value) void { - std.debug.getStderrMutex().lock(); - defer std.debug.getStderrMutex().unlock(); - - const stderr = std.io.getStdErr().writer(); - std.json.stringify(self, std.json.StringifyOptions{ .whitespace = null }, stderr) catch return; - } -}; - -/// parse tokens from a stream, returning `false` if they do not decode to `value` -fn parsesTo(comptime T: type, value: T, tokens: *TokenStream, options: ParseOptions) !bool { - // TODO: should be able to write this function to not require an allocator - const tmp = try parse(T, tokens, options); - defer parseFree(T, tmp, options); - - return parsedEqual(tmp, value); -} - -/// Returns if a value returned by `parse` is deep-equal to another value -fn parsedEqual(a: anytype, b: @TypeOf(a)) bool { - switch (@typeInfo(@TypeOf(a))) { - .Optional => { - if (a == null and b == null) return true; - if (a == null or b == null) return false; - return parsedEqual(a.?, b.?); - }, - .Union => |info| { - if (info.tag_type) |UnionTag| { - const tag_a = std.meta.activeTag(a); - const tag_b = std.meta.activeTag(b); - if (tag_a != tag_b) return false; - - inline for (info.fields) |field_info| { - if (@field(UnionTag, field_info.name) == tag_a) { - return parsedEqual(@field(a, field_info.name), @field(b, field_info.name)); - } - } - return false; - } else { - unreachable; - } - }, - .Array => { - for (a, 0..) |e, i| - if (!parsedEqual(e, b[i])) return false; - return true; - }, - .Struct => |info| { - inline for (info.fields) |field_info| { - if (!parsedEqual(@field(a, field_info.name), @field(b, field_info.name))) return false; - } - return true; - }, - .Pointer => |ptrInfo| switch (ptrInfo.size) { - .One => return parsedEqual(a.*, b.*), - .Slice => { - if (a.len != b.len) return false; - for (a, 0..) |e, i| - if (!parsedEqual(e, b[i])) return false; - return true; - }, - .Many, .C => unreachable, - }, - else => return a == b, - } - unreachable; -} - -pub const ParseOptions = struct { - allocator: ?Allocator = null, - - /// Behaviour when a duplicate field is encountered. - duplicate_field_behavior: enum { - UseFirst, - Error, - UseLast, - } = .Error, - - /// If false, finding an unknown field returns an error. - ignore_unknown_fields: bool = false, - - allow_trailing_data: bool = false, -}; - -const SkipValueError = error{UnexpectedJsonDepth} || TokenStream.Error; - -fn skipValue(tokens: *TokenStream) SkipValueError!void { - const original_depth = tokens.stackUsed(); - - // Return an error if no value is found - _ = try tokens.next(); - if (tokens.stackUsed() < original_depth) return error.UnexpectedJsonDepth; - if (tokens.stackUsed() == original_depth) return; - - while (try tokens.next()) |_| { - if (tokens.stackUsed() == original_depth) return; - } -} - -fn ParseInternalError(comptime T: type) type { - // `inferred_types` is used to avoid infinite recursion for recursive type definitions. - const inferred_types = [_]type{}; - return ParseInternalErrorImpl(T, &inferred_types); -} - -fn ParseInternalErrorImpl(comptime T: type, comptime inferred_types: []const type) type { - for (inferred_types) |ty| { - if (T == ty) return error{}; - } - - switch (@typeInfo(T)) { - .Bool => return error{UnexpectedToken}, - .Float, .ComptimeFloat => return error{UnexpectedToken} || std.fmt.ParseFloatError, - .Int, .ComptimeInt => { - return error{ UnexpectedToken, InvalidNumber, Overflow } || - std.fmt.ParseIntError || std.fmt.ParseFloatError; - }, - .Optional => |optionalInfo| { - return ParseInternalErrorImpl(optionalInfo.child, inferred_types ++ [_]type{T}); - }, - .Enum => return error{ UnexpectedToken, InvalidEnumTag } || std.fmt.ParseIntError || - std.meta.IntToEnumError || std.meta.IntToEnumError, - .Union => |unionInfo| { - if (unionInfo.tag_type) |_| { - var errors = error{NoUnionMembersMatched}; - for (unionInfo.fields) |u_field| { - errors = errors || ParseInternalErrorImpl(u_field.type, inferred_types ++ [_]type{T}); - } - return errors; - } else { - @compileError("Unable to parse into untagged union '" ++ @typeName(T) ++ "'"); - } - }, - .Struct => |structInfo| { - var errors = error{ - DuplicateJSONField, - UnexpectedEndOfJson, - UnexpectedToken, - UnexpectedValue, - UnknownField, - MissingField, - } || SkipValueError || TokenStream.Error; - for (structInfo.fields) |field| { - errors = errors || ParseInternalErrorImpl(field.type, inferred_types ++ [_]type{T}); - } - return errors; - }, - .Array => |arrayInfo| { - return error{ UnexpectedEndOfJson, UnexpectedToken, LengthMismatch } || TokenStream.Error || - UnescapeValidStringError || - ParseInternalErrorImpl(arrayInfo.child, inferred_types ++ [_]type{T}); - }, - .Vector => |vecInfo| { - return error{ UnexpectedEndOfJson, UnexpectedToken, LengthMismatch } || TokenStream.Error || - UnescapeValidStringError || - ParseInternalErrorImpl(vecInfo.child, inferred_types ++ [_]type{T}); - }, - .Pointer => |ptrInfo| { - var errors = error{AllocatorRequired} || std.mem.Allocator.Error; - switch (ptrInfo.size) { - .One => { - return errors || ParseInternalErrorImpl(ptrInfo.child, inferred_types ++ [_]type{T}); - }, - .Slice => { - return errors || error{ UnexpectedEndOfJson, UnexpectedToken } || - ParseInternalErrorImpl(ptrInfo.child, inferred_types ++ [_]type{T}) || - UnescapeValidStringError || TokenStream.Error; - }, - else => @compileError("Unable to parse into type '" ++ @typeName(T) ++ "'"), - } - }, - else => return error{}, - } - unreachable; -} - -fn parseInternalArray( - comptime T: type, - comptime Elt: type, - comptime arr_len: usize, - tokens: *TokenStream, - options: ParseOptions, -) ParseInternalError(T)!T { - var r: T = undefined; - var i: usize = 0; - var child_options = options; - child_options.allow_trailing_data = true; - errdefer { - // Without the r.len check `r[i]` is not allowed - if (arr_len > 0) while (true) : (i -= 1) { - parseFree(Elt, r[i], options); - if (i == 0) break; - }; - } - if (arr_len > 0) while (i < arr_len) : (i += 1) { - r[i] = try parse(Elt, tokens, child_options); - }; - const tok = (try tokens.next()) orelse return error.UnexpectedEndOfJson; - switch (tok) { - .ArrayEnd => {}, - else => return error.UnexpectedToken, - } - return r; -} - -fn parseInternal( - comptime T: type, - token: Token, - tokens: *TokenStream, - options: ParseOptions, -) ParseInternalError(T)!T { - switch (@typeInfo(T)) { - .Bool => { - return switch (token) { - .True => true, - .False => false, - else => error.UnexpectedToken, - }; - }, - .Float, .ComptimeFloat => { - switch (token) { - .Number => |numberToken| return try std.fmt.parseFloat(T, numberToken.slice(tokens.slice, tokens.i - 1)), - .String => |stringToken| return try std.fmt.parseFloat(T, stringToken.slice(tokens.slice, tokens.i - 1)), - else => return error.UnexpectedToken, - } - }, - .Int, .ComptimeInt => { - switch (token) { - .Number => |numberToken| { - if (numberToken.is_integer) - return try std.fmt.parseInt(T, numberToken.slice(tokens.slice, tokens.i - 1), 10); - const float = try std.fmt.parseFloat(f128, numberToken.slice(tokens.slice, tokens.i - 1)); - if (@round(float) != float) return error.InvalidNumber; - if (float > std.math.maxInt(T) or float < std.math.minInt(T)) return error.Overflow; - return @floatToInt(T, float); - }, - .String => |stringToken| { - return std.fmt.parseInt(T, stringToken.slice(tokens.slice, tokens.i - 1), 10) catch |err| { - switch (err) { - error.Overflow => return err, - error.InvalidCharacter => { - const float = try std.fmt.parseFloat(f128, stringToken.slice(tokens.slice, tokens.i - 1)); - if (@round(float) != float) return error.InvalidNumber; - if (float > std.math.maxInt(T) or float < std.math.minInt(T)) return error.Overflow; - return @floatToInt(T, float); - }, - } - }; - }, - else => return error.UnexpectedToken, - } - }, - .Optional => |optionalInfo| { - if (token == .Null) { - return null; - } else { - return try parseInternal(optionalInfo.child, token, tokens, options); - } - }, - .Enum => |enumInfo| { - switch (token) { - .Number => |numberToken| { - if (!numberToken.is_integer) return error.UnexpectedToken; - const n = try std.fmt.parseInt(enumInfo.tag_type, numberToken.slice(tokens.slice, tokens.i - 1), 10); - return try std.meta.intToEnum(T, n); - }, - .String => |stringToken| { - const source_slice = stringToken.slice(tokens.slice, tokens.i - 1); - switch (stringToken.escapes) { - .None => return std.meta.stringToEnum(T, source_slice) orelse return error.InvalidEnumTag, - .Some => { - inline for (enumInfo.fields) |field| { - if (field.name.len == stringToken.decodedLength() and encodesTo(field.name, source_slice)) { - return @field(T, field.name); - } - } - return error.InvalidEnumTag; - }, - } - }, - else => return error.UnexpectedToken, - } - }, - .Union => |unionInfo| { - if (unionInfo.tag_type) |_| { - // try each of the union fields until we find one that matches - inline for (unionInfo.fields) |u_field| { - // take a copy of tokens so we can withhold mutations until success - var tokens_copy = tokens.*; - if (parseInternal(u_field.type, token, &tokens_copy, options)) |value| { - tokens.* = tokens_copy; - return @unionInit(T, u_field.name, value); - } else |err| { - // Bubble up error.OutOfMemory - // Parsing some types won't have OutOfMemory in their - // error-sets, for the condition to be valid, merge it in. - if (@as(@TypeOf(err) || error{OutOfMemory}, err) == error.OutOfMemory) return err; - // Bubble up AllocatorRequired, as it indicates missing option - if (@as(@TypeOf(err) || error{AllocatorRequired}, err) == error.AllocatorRequired) return err; - // otherwise continue through the `inline for` - } - } - return error.NoUnionMembersMatched; - } else { - @compileError("Unable to parse into untagged union '" ++ @typeName(T) ++ "'"); - } - }, - .Struct => |structInfo| { - if (structInfo.is_tuple) { - switch (token) { - .ArrayBegin => {}, - else => return error.UnexpectedToken, - } - var r: T = undefined; - var child_options = options; - child_options.allow_trailing_data = true; - var fields_seen: usize = 0; - errdefer { - inline for (0..structInfo.fields.len) |i| { - if (i < fields_seen) { - parseFree(structInfo.fields[i].type, r[i], options); - } - } - } - inline for (0..structInfo.fields.len) |i| { - r[i] = try parse(structInfo.fields[i].type, tokens, child_options); - fields_seen = i + 1; - } - const tok = (try tokens.next()) orelse return error.UnexpectedEndOfJson; - switch (tok) { - .ArrayEnd => {}, - else => return error.UnexpectedToken, - } - return r; - } - - switch (token) { - .ObjectBegin => {}, - else => return error.UnexpectedToken, - } - var r: T = undefined; - var fields_seen = [_]bool{false} ** structInfo.fields.len; - errdefer { - inline for (structInfo.fields, 0..) |field, i| { - if (fields_seen[i] and !field.is_comptime) { - parseFree(field.type, @field(r, field.name), options); - } - } - } - - while (true) { - switch ((try tokens.next()) orelse return error.UnexpectedEndOfJson) { - .ObjectEnd => break, - .String => |stringToken| { - const key_source_slice = stringToken.slice(tokens.slice, tokens.i - 1); - var child_options = options; - child_options.allow_trailing_data = true; - var found = false; - inline for (structInfo.fields, 0..) |field, i| { - if (switch (stringToken.escapes) { - .None => mem.eql(u8, field.name, key_source_slice), - .Some => (field.name.len == stringToken.decodedLength() and encodesTo(field.name, key_source_slice)), - }) { - if (fields_seen[i]) { - switch (options.duplicate_field_behavior) { - .UseFirst => { - // unconditionally ignore value. for comptime fields, this skips check against default_value - parseFree(field.type, try parse(field.type, tokens, child_options), child_options); - found = true; - break; - }, - .Error => return error.DuplicateJSONField, - .UseLast => { - if (!field.is_comptime) { - parseFree(field.type, @field(r, field.name), child_options); - } - fields_seen[i] = false; - }, - } - } - if (field.is_comptime) { - if (!try parsesTo(field.type, @ptrCast(*align(1) const field.type, field.default_value.?).*, tokens, child_options)) { - return error.UnexpectedValue; - } - } else { - @field(r, field.name) = try parse(field.type, tokens, child_options); - } - fields_seen[i] = true; - found = true; - break; - } - } - if (!found) { - if (options.ignore_unknown_fields) { - try skipValue(tokens); - continue; - } else { - return error.UnknownField; - } - } - }, - else => return error.UnexpectedToken, - } - } - inline for (structInfo.fields, 0..) |field, i| { - if (!fields_seen[i]) { - if (field.default_value) |default_ptr| { - if (!field.is_comptime) { - const default = @ptrCast(*align(1) const field.type, default_ptr).*; - @field(r, field.name) = default; - } - } else { - return error.MissingField; - } - } - } - return r; - }, - .Array => |arrayInfo| { - switch (token) { - .ArrayBegin => { - const len = @typeInfo(T).Array.len; - return parseInternalArray(T, arrayInfo.child, len, tokens, options); - }, - .String => |stringToken| { - if (arrayInfo.child != u8) return error.UnexpectedToken; - var r: T = undefined; - const source_slice = stringToken.slice(tokens.slice, tokens.i - 1); - if (r.len != stringToken.decodedLength()) return error.LengthMismatch; - switch (stringToken.escapes) { - .None => @memcpy(r[0..source_slice.len], source_slice), - .Some => try unescapeValidString(&r, source_slice), - } - return r; - }, - else => return error.UnexpectedToken, - } - }, - .Vector => |vecInfo| { - switch (token) { - .ArrayBegin => { - const len = @typeInfo(T).Vector.len; - return parseInternalArray(T, vecInfo.child, len, tokens, options); - }, - else => return error.UnexpectedToken, - } - }, - .Pointer => |ptrInfo| { - const allocator = options.allocator orelse return error.AllocatorRequired; - switch (ptrInfo.size) { - .One => { - const r: *ptrInfo.child = try allocator.create(ptrInfo.child); - errdefer allocator.destroy(r); - r.* = try parseInternal(ptrInfo.child, token, tokens, options); - return r; - }, - .Slice => { - switch (token) { - .ArrayBegin => { - var arraylist = std.ArrayList(ptrInfo.child).init(allocator); - errdefer { - while (arraylist.popOrNull()) |v| { - parseFree(ptrInfo.child, v, options); - } - arraylist.deinit(); - } - - while (true) { - const tok = (try tokens.next()) orelse return error.UnexpectedEndOfJson; - switch (tok) { - .ArrayEnd => break, - else => {}, - } - - try arraylist.ensureUnusedCapacity(1); - const v = try parseInternal(ptrInfo.child, tok, tokens, options); - arraylist.appendAssumeCapacity(v); - } - - if (ptrInfo.sentinel) |some| { - const sentinel_value = @ptrCast(*align(1) const ptrInfo.child, some).*; - return try arraylist.toOwnedSliceSentinel(sentinel_value); - } - - return try arraylist.toOwnedSlice(); - }, - .String => |stringToken| { - if (ptrInfo.child != u8) return error.UnexpectedToken; - const source_slice = stringToken.slice(tokens.slice, tokens.i - 1); - const len = stringToken.decodedLength(); - const output = if (ptrInfo.sentinel) |sentinel_ptr| - try allocator.allocSentinel(u8, len, @ptrCast(*const u8, sentinel_ptr).*) - else - try allocator.alloc(u8, len); - errdefer allocator.free(output); - switch (stringToken.escapes) { - .None => @memcpy(output[0..source_slice.len], source_slice), - .Some => try unescapeValidString(output, source_slice), - } - - return output; - }, - else => return error.UnexpectedToken, - } - }, - else => @compileError("Unable to parse into type '" ++ @typeName(T) ++ "'"), - } - }, - else => @compileError("Unable to parse into type '" ++ @typeName(T) ++ "'"), - } - unreachable; -} - -pub fn ParseError(comptime T: type) type { - return ParseInternalError(T) || error{UnexpectedEndOfJson} || TokenStream.Error; -} - -pub fn parse(comptime T: type, tokens: *TokenStream, options: ParseOptions) ParseError(T)!T { - const token = (try tokens.next()) orelse return error.UnexpectedEndOfJson; - const r = try parseInternal(T, token, tokens, options); - errdefer parseFree(T, r, options); - if (!options.allow_trailing_data) { - if ((try tokens.next()) != null) unreachable; - assert(tokens.i >= tokens.slice.len); - } - return r; -} - -/// Releases resources created by `parse`. -/// Should be called with the same type and `ParseOptions` that were passed to `parse` -pub fn parseFree(comptime T: type, value: T, options: ParseOptions) void { - switch (@typeInfo(T)) { - .Bool, .Float, .ComptimeFloat, .Int, .ComptimeInt, .Enum => {}, - .Optional => { - if (value) |v| { - return parseFree(@TypeOf(v), v, options); - } - }, - .Union => |unionInfo| { - if (unionInfo.tag_type) |UnionTagType| { - inline for (unionInfo.fields) |u_field| { - if (value == @field(UnionTagType, u_field.name)) { - parseFree(u_field.type, @field(value, u_field.name), options); - break; - } - } - } else { - unreachable; - } - }, - .Struct => |structInfo| { - inline for (structInfo.fields) |field| { - if (!field.is_comptime) { - var should_free = true; - if (field.default_value) |default| { - switch (@typeInfo(field.type)) { - // We must not attempt to free pointers to struct default values - .Pointer => |fieldPtrInfo| { - const field_value = @field(value, field.name); - const field_ptr = switch (fieldPtrInfo.size) { - .One => field_value, - .Slice => field_value.ptr, - else => unreachable, // Other pointer types are not parseable - }; - const field_addr = @ptrToInt(field_ptr); - - const casted_default = @ptrCast(*const field.type, @alignCast(@alignOf(field.type), default)).*; - const default_ptr = switch (fieldPtrInfo.size) { - .One => casted_default, - .Slice => casted_default.ptr, - else => unreachable, // Other pointer types are not parseable - }; - const default_addr = @ptrToInt(default_ptr); - - if (field_addr == default_addr) { - should_free = false; - } - }, - else => {}, - } - } - if (should_free) { - parseFree(field.type, @field(value, field.name), options); - } - } - } - }, - .Array => |arrayInfo| { - for (value) |v| { - parseFree(arrayInfo.child, v, options); - } - }, - .Vector => |vecInfo| { - var i: usize = 0; - var v_len: usize = @typeInfo(@TypeOf(value)).Vector.len; - while (i < v_len) : (i += 1) { - parseFree(vecInfo.child, value[i], options); - } - }, - .Pointer => |ptrInfo| { - const allocator = options.allocator orelse unreachable; - switch (ptrInfo.size) { - .One => { - parseFree(ptrInfo.child, value.*, options); - allocator.destroy(value); - }, - .Slice => { - for (value) |v| { - parseFree(ptrInfo.child, v, options); - } - allocator.free(value); - }, - else => unreachable, - } - }, - else => unreachable, - } -} - -/// A non-stream JSON parser which constructs a tree of Value's. -pub const Parser = struct { - allocator: Allocator, - state: State, - copy_strings: bool, - // Stores parent nodes and un-combined Values. - stack: Array, - - const State = enum { - ObjectKey, - ObjectValue, - ArrayValue, - Simple, - }; - - pub fn init(allocator: Allocator, copy_strings: bool) Parser { - return Parser{ - .allocator = allocator, - .state = .Simple, - .copy_strings = copy_strings, - .stack = Array.init(allocator), - }; - } - - pub fn deinit(p: *Parser) void { - p.stack.deinit(); - } - - pub fn reset(p: *Parser) void { - p.state = .Simple; - p.stack.shrinkRetainingCapacity(0); - } - - pub fn parse(p: *Parser, input: []const u8) !ValueTree { - var s = TokenStream.init(input); - - var arena = try p.allocator.create(ArenaAllocator); - errdefer p.allocator.destroy(arena); - - arena.* = ArenaAllocator.init(p.allocator); - errdefer arena.deinit(); - - const allocator = arena.allocator(); - - while (try s.next()) |token| { - try p.transition(allocator, input, s.i - 1, token); - } - - debug.assert(p.stack.items.len == 1); - - return ValueTree{ - .arena = arena, - .root = p.stack.items[0], - }; - } - - // Even though p.allocator exists, we take an explicit allocator so that allocation state - // can be cleaned up on error correctly during a `parse` on call. - fn transition(p: *Parser, allocator: Allocator, input: []const u8, i: usize, token: Token) !void { - switch (p.state) { - .ObjectKey => switch (token) { - .ObjectEnd => { - if (p.stack.items.len == 1) { - return; - } - - var value = p.stack.pop(); - try p.pushToParent(&value); - }, - .String => |s| { - try p.stack.append(try p.parseString(allocator, s, input, i)); - p.state = .ObjectValue; - }, - else => { - // The streaming parser would return an error eventually. - // To prevent invalid state we return an error now. - // TODO make the streaming parser return an error as soon as it encounters an invalid object key - return error.InvalidLiteral; - }, - }, - .ObjectValue => { - var object = &p.stack.items[p.stack.items.len - 2].Object; - var key = p.stack.items[p.stack.items.len - 1].String; - - switch (token) { - .ObjectBegin => { - try p.stack.append(Value{ .Object = ObjectMap.init(allocator) }); - p.state = .ObjectKey; - }, - .ArrayBegin => { - try p.stack.append(Value{ .Array = Array.init(allocator) }); - p.state = .ArrayValue; - }, - .String => |s| { - try object.put(key, try p.parseString(allocator, s, input, i)); - _ = p.stack.pop(); - p.state = .ObjectKey; - }, - .Number => |n| { - try object.put(key, try p.parseNumber(n, input, i)); - _ = p.stack.pop(); - p.state = .ObjectKey; - }, - .True => { - try object.put(key, Value{ .Bool = true }); - _ = p.stack.pop(); - p.state = .ObjectKey; - }, - .False => { - try object.put(key, Value{ .Bool = false }); - _ = p.stack.pop(); - p.state = .ObjectKey; - }, - .Null => { - try object.put(key, Value.Null); - _ = p.stack.pop(); - p.state = .ObjectKey; - }, - .ObjectEnd, .ArrayEnd => { - unreachable; - }, - } - }, - .ArrayValue => { - var array = &p.stack.items[p.stack.items.len - 1].Array; - - switch (token) { - .ArrayEnd => { - if (p.stack.items.len == 1) { - return; - } - - var value = p.stack.pop(); - try p.pushToParent(&value); - }, - .ObjectBegin => { - try p.stack.append(Value{ .Object = ObjectMap.init(allocator) }); - p.state = .ObjectKey; - }, - .ArrayBegin => { - try p.stack.append(Value{ .Array = Array.init(allocator) }); - p.state = .ArrayValue; - }, - .String => |s| { - try array.append(try p.parseString(allocator, s, input, i)); - }, - .Number => |n| { - try array.append(try p.parseNumber(n, input, i)); - }, - .True => { - try array.append(Value{ .Bool = true }); - }, - .False => { - try array.append(Value{ .Bool = false }); - }, - .Null => { - try array.append(Value.Null); - }, - .ObjectEnd => { - unreachable; - }, - } - }, - .Simple => switch (token) { - .ObjectBegin => { - try p.stack.append(Value{ .Object = ObjectMap.init(allocator) }); - p.state = .ObjectKey; - }, - .ArrayBegin => { - try p.stack.append(Value{ .Array = Array.init(allocator) }); - p.state = .ArrayValue; - }, - .String => |s| { - try p.stack.append(try p.parseString(allocator, s, input, i)); - }, - .Number => |n| { - try p.stack.append(try p.parseNumber(n, input, i)); - }, - .True => { - try p.stack.append(Value{ .Bool = true }); - }, - .False => { - try p.stack.append(Value{ .Bool = false }); - }, - .Null => { - try p.stack.append(Value.Null); - }, - .ObjectEnd, .ArrayEnd => { - unreachable; - }, - }, - } - } - - fn pushToParent(p: *Parser, value: *const Value) !void { - switch (p.stack.items[p.stack.items.len - 1]) { - // Object Parent -> [ ..., object, , value ] - Value.String => |key| { - _ = p.stack.pop(); - - var object = &p.stack.items[p.stack.items.len - 1].Object; - try object.put(key, value.*); - p.state = .ObjectKey; - }, - // Array Parent -> [ ..., , value ] - Value.Array => |*array| { - try array.append(value.*); - p.state = .ArrayValue; - }, - else => { - unreachable; - }, - } - } - - fn parseString(p: *Parser, allocator: Allocator, s: std.meta.TagPayload(Token, Token.String), input: []const u8, i: usize) !Value { - const slice = s.slice(input, i); - switch (s.escapes) { - .None => return Value{ .String = if (p.copy_strings) try allocator.dupe(u8, slice) else slice }, - .Some => { - const output = try allocator.alloc(u8, s.decodedLength()); - errdefer allocator.free(output); - try unescapeValidString(output, slice); - return Value{ .String = output }; - }, - } - } - - fn parseNumber(p: *Parser, n: std.meta.TagPayload(Token, Token.Number), input: []const u8, i: usize) !Value { - _ = p; - return if (n.is_integer) - Value{ - .Integer = std.fmt.parseInt(i64, n.slice(input, i), 10) catch |e| switch (e) { - error.Overflow => return Value{ .NumberString = n.slice(input, i) }, - error.InvalidCharacter => |err| return err, - }, - } - else - Value{ .Float = try std.fmt.parseFloat(f64, n.slice(input, i)) }; - } -}; - -pub const UnescapeValidStringError = error{InvalidUnicodeHexSymbol}; - -/// Unescape a JSON string -/// Only to be used on strings already validated by the parser -/// (note the unreachable statements and lack of bounds checking) -pub fn unescapeValidString(output: []u8, input: []const u8) UnescapeValidStringError!void { - var inIndex: usize = 0; - var outIndex: usize = 0; - - while (inIndex < input.len) { - if (input[inIndex] != '\\') { - // not an escape sequence - output[outIndex] = input[inIndex]; - inIndex += 1; - outIndex += 1; - } else if (input[inIndex + 1] != 'u') { - // a simple escape sequence - output[outIndex] = @as(u8, switch (input[inIndex + 1]) { - '\\' => '\\', - '/' => '/', - 'n' => '\n', - 'r' => '\r', - 't' => '\t', - 'f' => 12, - 'b' => 8, - '"' => '"', - else => unreachable, - }); - inIndex += 2; - outIndex += 1; - } else { - // a unicode escape sequence - const firstCodeUnit = std.fmt.parseInt(u16, input[inIndex + 2 .. inIndex + 6], 16) catch unreachable; - - // guess optimistically that it's not a surrogate pair - if (std.unicode.utf8Encode(firstCodeUnit, output[outIndex..])) |byteCount| { - outIndex += byteCount; - inIndex += 6; - } else |err| { - // it might be a surrogate pair - if (err != error.Utf8CannotEncodeSurrogateHalf) { - return error.InvalidUnicodeHexSymbol; - } - // check if a second code unit is present - if (inIndex + 7 >= input.len or input[inIndex + 6] != '\\' or input[inIndex + 7] != 'u') { - return error.InvalidUnicodeHexSymbol; - } - - const secondCodeUnit = std.fmt.parseInt(u16, input[inIndex + 8 .. inIndex + 12], 16) catch unreachable; - - const utf16le_seq = [2]u16{ - mem.nativeToLittle(u16, firstCodeUnit), - mem.nativeToLittle(u16, secondCodeUnit), - }; - if (std.unicode.utf16leToUtf8(output[outIndex..], &utf16le_seq)) |byteCount| { - outIndex += byteCount; - inIndex += 12; - } else |_| { - return error.InvalidUnicodeHexSymbol; - } - } - } - } - assert(outIndex == output.len); -} - -pub const StringifyOptions = struct { - pub const Whitespace = struct { - /// How many indentation levels deep are we? - indent_level: usize = 0, - - /// What character(s) should be used for indentation? - indent: union(enum) { - Space: u8, - Tab: void, - None: void, - } = .{ .Space = 4 }, - - /// After a colon, should whitespace be inserted? - separator: bool = true, - - pub fn outputIndent( - whitespace: @This(), - out_stream: anytype, - ) @TypeOf(out_stream).Error!void { - var char: u8 = undefined; - var n_chars: usize = undefined; - switch (whitespace.indent) { - .Space => |n_spaces| { - char = ' '; - n_chars = n_spaces; - }, - .Tab => { - char = '\t'; - n_chars = 1; - }, - .None => return, - } - try out_stream.writeByte('\n'); - n_chars *= whitespace.indent_level; - try out_stream.writeByteNTimes(char, n_chars); - } - }; - - /// Controls the whitespace emitted - whitespace: ?Whitespace = null, - - /// Should optional fields with null value be written? - emit_null_optional_fields: bool = true, - - string: StringOptions = StringOptions{ .String = .{} }, - - /// Should []u8 be serialised as a string? or an array? - pub const StringOptions = union(enum) { - Array, - String: StringOutputOptions, - - /// String output options - const StringOutputOptions = struct { - /// Should '/' be escaped in strings? - escape_solidus: bool = false, - - /// Should unicode characters be escaped in strings? - escape_unicode: bool = false, - }; - }; -}; - -fn outputUnicodeEscape( - codepoint: u21, - out_stream: anytype, -) !void { - if (codepoint <= 0xFFFF) { - // If the character is in the Basic Multilingual Plane (U+0000 through U+FFFF), - // then it may be represented as a six-character sequence: a reverse solidus, followed - // by the lowercase letter u, followed by four hexadecimal digits that encode the character's code point. - try out_stream.writeAll("\\u"); - try std.fmt.formatIntValue(codepoint, "x", std.fmt.FormatOptions{ .width = 4, .fill = '0' }, out_stream); - } else { - assert(codepoint <= 0x10FFFF); - // To escape an extended character that is not in the Basic Multilingual Plane, - // the character is represented as a 12-character sequence, encoding the UTF-16 surrogate pair. - const high = @intCast(u16, (codepoint - 0x10000) >> 10) + 0xD800; - const low = @intCast(u16, codepoint & 0x3FF) + 0xDC00; - try out_stream.writeAll("\\u"); - try std.fmt.formatIntValue(high, "x", std.fmt.FormatOptions{ .width = 4, .fill = '0' }, out_stream); - try out_stream.writeAll("\\u"); - try std.fmt.formatIntValue(low, "x", std.fmt.FormatOptions{ .width = 4, .fill = '0' }, out_stream); - } -} - -/// Write `string` to `writer` as a JSON encoded string. -pub fn encodeJsonString(string: []const u8, options: StringifyOptions, writer: anytype) !void { - try writer.writeByte('\"'); - try encodeJsonStringChars(string, options, writer); - try writer.writeByte('\"'); -} - -/// Write `chars` to `writer` as JSON encoded string characters. -pub fn encodeJsonStringChars(chars: []const u8, options: StringifyOptions, writer: anytype) !void { - var i: usize = 0; - while (i < chars.len) : (i += 1) { - switch (chars[i]) { - // normal ascii character - 0x20...0x21, 0x23...0x2E, 0x30...0x5B, 0x5D...0x7F => |c| try writer.writeByte(c), - // only 2 characters that *must* be escaped - '\\' => try writer.writeAll("\\\\"), - '\"' => try writer.writeAll("\\\""), - // solidus is optional to escape - '/' => { - if (options.string.String.escape_solidus) { - try writer.writeAll("\\/"); - } else { - try writer.writeByte('/'); - } - }, - // control characters with short escapes - // TODO: option to switch between unicode and 'short' forms? - 0x8 => try writer.writeAll("\\b"), - 0xC => try writer.writeAll("\\f"), - '\n' => try writer.writeAll("\\n"), - '\r' => try writer.writeAll("\\r"), - '\t' => try writer.writeAll("\\t"), - else => { - const ulen = std.unicode.utf8ByteSequenceLength(chars[i]) catch unreachable; - // control characters (only things left with 1 byte length) should always be printed as unicode escapes - if (ulen == 1 or options.string.String.escape_unicode) { - const codepoint = std.unicode.utf8Decode(chars[i..][0..ulen]) catch unreachable; - try outputUnicodeEscape(codepoint, writer); - } else { - try writer.writeAll(chars[i..][0..ulen]); - } - i += ulen - 1; - }, - } - } -} - -pub fn stringify( - value: anytype, - options: StringifyOptions, - out_stream: anytype, -) !void { - const T = @TypeOf(value); - switch (@typeInfo(T)) { - .Float, .ComptimeFloat => { - return std.fmt.formatFloatScientific(value, std.fmt.FormatOptions{}, out_stream); - }, - .Int, .ComptimeInt => { - return std.fmt.formatIntValue(value, "", std.fmt.FormatOptions{}, out_stream); - }, - .Bool => { - return out_stream.writeAll(if (value) "true" else "false"); - }, - .Null => { - return out_stream.writeAll("null"); - }, - .Optional => { - if (value) |payload| { - return try stringify(payload, options, out_stream); - } else { - return try stringify(null, options, out_stream); - } - }, - .Enum => { - if (comptime std.meta.trait.hasFn("jsonStringify")(T)) { - return value.jsonStringify(options, out_stream); - } - - @compileError("Unable to stringify enum '" ++ @typeName(T) ++ "'"); - }, - .Union => { - if (comptime std.meta.trait.hasFn("jsonStringify")(T)) { - return value.jsonStringify(options, out_stream); - } - - const info = @typeInfo(T).Union; - if (info.tag_type) |UnionTagType| { - inline for (info.fields) |u_field| { - if (value == @field(UnionTagType, u_field.name)) { - return try stringify(@field(value, u_field.name), options, out_stream); - } - } - } else { - @compileError("Unable to stringify untagged union '" ++ @typeName(T) ++ "'"); - } - }, - .Struct => |S| { - if (comptime std.meta.trait.hasFn("jsonStringify")(T)) { - return value.jsonStringify(options, out_stream); - } - - try out_stream.writeByte(if (S.is_tuple) '[' else '{'); - var field_output = false; - var child_options = options; - if (child_options.whitespace) |*child_whitespace| { - child_whitespace.indent_level += 1; - } - inline for (S.fields) |Field| { - // don't include void fields - if (Field.type == void) continue; - - var emit_field = true; - - // don't include optional fields that are null when emit_null_optional_fields is set to false - if (@typeInfo(Field.type) == .Optional) { - if (options.emit_null_optional_fields == false) { - if (@field(value, Field.name) == null) { - emit_field = false; - } - } - } - - if (emit_field) { - if (!field_output) { - field_output = true; - } else { - try out_stream.writeByte(','); - } - if (child_options.whitespace) |child_whitespace| { - try child_whitespace.outputIndent(out_stream); - } - if (!S.is_tuple) { - try encodeJsonString(Field.name, options, out_stream); - try out_stream.writeByte(':'); - if (child_options.whitespace) |child_whitespace| { - if (child_whitespace.separator) { - try out_stream.writeByte(' '); - } - } - } - try stringify(@field(value, Field.name), child_options, out_stream); - } - } - if (field_output) { - if (options.whitespace) |whitespace| { - try whitespace.outputIndent(out_stream); - } - } - try out_stream.writeByte(if (S.is_tuple) ']' else '}'); - return; - }, - .ErrorSet => return stringify(@as([]const u8, @errorName(value)), options, out_stream), - .Pointer => |ptr_info| switch (ptr_info.size) { - .One => switch (@typeInfo(ptr_info.child)) { - .Array => { - const Slice = []const std.meta.Elem(ptr_info.child); - return stringify(@as(Slice, value), options, out_stream); - }, - else => { - // TODO: avoid loops? - return stringify(value.*, options, out_stream); - }, - }, - .Many, .Slice => { - if (ptr_info.size == .Many and ptr_info.sentinel == null) - @compileError("unable to stringify type '" ++ @typeName(T) ++ "' without sentinel"); - const slice = if (ptr_info.size == .Many) mem.span(value) else value; - - if (ptr_info.child == u8 and options.string == .String and std.unicode.utf8ValidateSlice(slice)) { - try encodeJsonString(slice, options, out_stream); - return; - } - - try out_stream.writeByte('['); - var child_options = options; - if (child_options.whitespace) |*whitespace| { - whitespace.indent_level += 1; - } - for (slice, 0..) |x, i| { - if (i != 0) { - try out_stream.writeByte(','); - } - if (child_options.whitespace) |child_whitespace| { - try child_whitespace.outputIndent(out_stream); - } - try stringify(x, child_options, out_stream); - } - if (slice.len != 0) { - if (options.whitespace) |whitespace| { - try whitespace.outputIndent(out_stream); - } - } - try out_stream.writeByte(']'); - return; - }, - else => @compileError("Unable to stringify type '" ++ @typeName(T) ++ "'"), - }, - .Array => return stringify(&value, options, out_stream), - .Vector => |info| { - const array: [info.len]info.child = value; - return stringify(&array, options, out_stream); - }, - else => @compileError("Unable to stringify type '" ++ @typeName(T) ++ "'"), - } - unreachable; -} - -// Same as `stringify` but accepts an Allocator and stores result in dynamically allocated memory instead of using a Writer. -// Caller owns returned memory. -pub fn stringifyAlloc(allocator: std.mem.Allocator, value: anytype, options: StringifyOptions) ![]const u8 { - var list = std.ArrayList(u8).init(allocator); - errdefer list.deinit(); - try stringify(value, options, list.writer()); - return list.toOwnedSlice(); -} +// Deprecations +pub const parse = @compileError("Deprecated; use parseFromSlice() or parseFromTokenSource() instead."); +pub const StreamingParser = @compileError("Deprecated; use json.Scanner or json.Reader instead."); +pub const TokenStream = @compileError("Deprecated; use json.Scanner or json.Reader instead."); test { _ = @import("json/test.zig"); + _ = @import("json/scanner.zig"); _ = @import("json/write_stream.zig"); -} - -test "stringify null optional fields" { - const MyStruct = struct { - optional: ?[]const u8 = null, - required: []const u8 = "something", - another_optional: ?[]const u8 = null, - another_required: []const u8 = "something else", - }; - try teststringify( - \\{"optional":null,"required":"something","another_optional":null,"another_required":"something else"} - , - MyStruct{}, - StringifyOptions{}, - ); - try teststringify( - \\{"required":"something","another_required":"something else"} - , - MyStruct{}, - StringifyOptions{ .emit_null_optional_fields = false }, - ); - - var ts = TokenStream.init( - \\{"required":"something","another_required":"something else"} - ); - try std.testing.expect(try parsesTo(MyStruct, MyStruct{}, &ts, .{ - .allocator = std.testing.allocator, - })); -} - -test "skipValue" { - var ts = TokenStream.init("false"); - try skipValue(&ts); - ts = TokenStream.init("true"); - try skipValue(&ts); - ts = TokenStream.init("null"); - try skipValue(&ts); - ts = TokenStream.init("42"); - try skipValue(&ts); - ts = TokenStream.init("42.0"); - try skipValue(&ts); - ts = TokenStream.init("\"foo\""); - try skipValue(&ts); - ts = TokenStream.init("[101, 111, 121]"); - try skipValue(&ts); - ts = TokenStream.init("{}"); - try skipValue(&ts); - ts = TokenStream.init("{\"foo\": \"bar\"}"); - try skipValue(&ts); - - { // An absurd number of nestings - const nestings = StreamingParser.default_max_nestings + 1; - - ts = TokenStream.init("[" ** nestings ++ "]" ** nestings); - try testing.expectError(error.TooManyNestedItems, skipValue(&ts)); - } - - { // Would a number token cause problems in a deeply-nested array? - const nestings = StreamingParser.default_max_nestings; - const deeply_nested_array = "[" ** nestings ++ "0.118, 999, 881.99, 911.9, 725, 3" ++ "]" ** nestings; - - ts = TokenStream.init(deeply_nested_array); - try skipValue(&ts); - - ts = TokenStream.init("[" ++ deeply_nested_array ++ "]"); - try testing.expectError(error.TooManyNestedItems, skipValue(&ts)); - } - - // Mismatched brace/square bracket - ts = TokenStream.init("[102, 111, 111}"); - try testing.expectError(error.UnexpectedClosingBrace, skipValue(&ts)); - - { // should fail if no value found (e.g. immediate close of object) - var empty_object = TokenStream.init("{}"); - assert(.ObjectBegin == (try empty_object.next()).?); - try testing.expectError(error.UnexpectedJsonDepth, skipValue(&empty_object)); - - var empty_array = TokenStream.init("[]"); - assert(.ArrayBegin == (try empty_array.next()).?); - try testing.expectError(error.UnexpectedJsonDepth, skipValue(&empty_array)); - } -} - -test "stringify basic types" { - try teststringify("false", false, StringifyOptions{}); - try teststringify("true", true, StringifyOptions{}); - try teststringify("null", @as(?u8, null), StringifyOptions{}); - try teststringify("null", @as(?*u32, null), StringifyOptions{}); - try teststringify("42", 42, StringifyOptions{}); - try teststringify("4.2e+01", 42.0, StringifyOptions{}); - try teststringify("42", @as(u8, 42), StringifyOptions{}); - try teststringify("42", @as(u128, 42), StringifyOptions{}); - try teststringify("4.2e+01", @as(f32, 42), StringifyOptions{}); - try teststringify("4.2e+01", @as(f64, 42), StringifyOptions{}); - try teststringify("\"ItBroke\"", @as(anyerror, error.ItBroke), StringifyOptions{}); -} - -test "stringify string" { - try teststringify("\"hello\"", "hello", StringifyOptions{}); - try teststringify("\"with\\nescapes\\r\"", "with\nescapes\r", StringifyOptions{}); - try teststringify("\"with\\nescapes\\r\"", "with\nescapes\r", StringifyOptions{ .string = .{ .String = .{ .escape_unicode = true } } }); - try teststringify("\"with unicode\\u0001\"", "with unicode\u{1}", StringifyOptions{}); - try teststringify("\"with unicode\\u0001\"", "with unicode\u{1}", StringifyOptions{ .string = .{ .String = .{ .escape_unicode = true } } }); - try teststringify("\"with unicode\u{80}\"", "with unicode\u{80}", StringifyOptions{}); - try teststringify("\"with unicode\\u0080\"", "with unicode\u{80}", StringifyOptions{ .string = .{ .String = .{ .escape_unicode = true } } }); - try teststringify("\"with unicode\u{FF}\"", "with unicode\u{FF}", StringifyOptions{}); - try teststringify("\"with unicode\\u00ff\"", "with unicode\u{FF}", StringifyOptions{ .string = .{ .String = .{ .escape_unicode = true } } }); - try teststringify("\"with unicode\u{100}\"", "with unicode\u{100}", StringifyOptions{}); - try teststringify("\"with unicode\\u0100\"", "with unicode\u{100}", StringifyOptions{ .string = .{ .String = .{ .escape_unicode = true } } }); - try teststringify("\"with unicode\u{800}\"", "with unicode\u{800}", StringifyOptions{}); - try teststringify("\"with unicode\\u0800\"", "with unicode\u{800}", StringifyOptions{ .string = .{ .String = .{ .escape_unicode = true } } }); - try teststringify("\"with unicode\u{8000}\"", "with unicode\u{8000}", StringifyOptions{}); - try teststringify("\"with unicode\\u8000\"", "with unicode\u{8000}", StringifyOptions{ .string = .{ .String = .{ .escape_unicode = true } } }); - try teststringify("\"with unicode\u{D799}\"", "with unicode\u{D799}", StringifyOptions{}); - try teststringify("\"with unicode\\ud799\"", "with unicode\u{D799}", StringifyOptions{ .string = .{ .String = .{ .escape_unicode = true } } }); - try teststringify("\"with unicode\u{10000}\"", "with unicode\u{10000}", StringifyOptions{}); - try teststringify("\"with unicode\\ud800\\udc00\"", "with unicode\u{10000}", StringifyOptions{ .string = .{ .String = .{ .escape_unicode = true } } }); - try teststringify("\"with unicode\u{10FFFF}\"", "with unicode\u{10FFFF}", StringifyOptions{}); - try teststringify("\"with unicode\\udbff\\udfff\"", "with unicode\u{10FFFF}", StringifyOptions{ .string = .{ .String = .{ .escape_unicode = true } } }); - try teststringify("\"/\"", "/", StringifyOptions{}); - try teststringify("\"\\/\"", "/", StringifyOptions{ .string = .{ .String = .{ .escape_solidus = true } } }); -} - -test "stringify many-item sentinel-terminated string" { - try teststringify("\"hello\"", @as([*:0]const u8, "hello"), StringifyOptions{}); - try teststringify("\"with\\nescapes\\r\"", @as([*:0]const u8, "with\nescapes\r"), StringifyOptions{ .string = .{ .String = .{ .escape_unicode = true } } }); - try teststringify("\"with unicode\\u0001\"", @as([*:0]const u8, "with unicode\u{1}"), StringifyOptions{ .string = .{ .String = .{ .escape_unicode = true } } }); -} - -test "stringify tagged unions" { - try teststringify("42", union(enum) { - Foo: u32, - Bar: bool, - }{ .Foo = 42 }, StringifyOptions{}); -} - -test "stringify struct" { - try teststringify("{\"foo\":42}", struct { - foo: u32, - }{ .foo = 42 }, StringifyOptions{}); -} - -test "stringify struct with string as array" { - try teststringify("{\"foo\":\"bar\"}", .{ .foo = "bar" }, StringifyOptions{}); - try teststringify("{\"foo\":[98,97,114]}", .{ .foo = "bar" }, StringifyOptions{ .string = .Array }); -} - -test "stringify struct with indentation" { - try teststringify( - \\{ - \\ "foo": 42, - \\ "bar": [ - \\ 1, - \\ 2, - \\ 3 - \\ ] - \\} - , - struct { - foo: u32, - bar: [3]u32, - }{ - .foo = 42, - .bar = .{ 1, 2, 3 }, - }, - StringifyOptions{ - .whitespace = .{}, - }, - ); - try teststringify( - "{\n\t\"foo\":42,\n\t\"bar\":[\n\t\t1,\n\t\t2,\n\t\t3\n\t]\n}", - struct { - foo: u32, - bar: [3]u32, - }{ - .foo = 42, - .bar = .{ 1, 2, 3 }, - }, - StringifyOptions{ - .whitespace = .{ - .indent = .Tab, - .separator = false, - }, - }, - ); - try teststringify( - \\{"foo":42,"bar":[1,2,3]} - , - struct { - foo: u32, - bar: [3]u32, - }{ - .foo = 42, - .bar = .{ 1, 2, 3 }, - }, - StringifyOptions{ - .whitespace = .{ - .indent = .None, - .separator = false, - }, - }, - ); -} - -test "stringify struct with void field" { - try teststringify("{\"foo\":42}", struct { - foo: u32, - bar: void = {}, - }{ .foo = 42 }, StringifyOptions{}); -} - -test "stringify array of structs" { - const MyStruct = struct { - foo: u32, - }; - try teststringify("[{\"foo\":42},{\"foo\":100},{\"foo\":1000}]", [_]MyStruct{ - MyStruct{ .foo = 42 }, - MyStruct{ .foo = 100 }, - MyStruct{ .foo = 1000 }, - }, StringifyOptions{}); -} - -test "stringify struct with custom stringifier" { - try teststringify("[\"something special\",42]", struct { - foo: u32, - const Self = @This(); - pub fn jsonStringify( - value: Self, - options: StringifyOptions, - out_stream: anytype, - ) !void { - _ = value; - try out_stream.writeAll("[\"something special\","); - try stringify(42, options, out_stream); - try out_stream.writeByte(']'); - } - }{ .foo = 42 }, StringifyOptions{}); -} - -test "stringify vector" { - try teststringify("[1,1]", @splat(2, @as(u32, 1)), StringifyOptions{}); -} - -test "stringify tuple" { - try teststringify("[\"foo\",42]", std.meta.Tuple(&.{ []const u8, usize }){ "foo", 42 }, StringifyOptions{}); -} - -fn teststringify(expected: []const u8, value: anytype, options: StringifyOptions) !void { - const ValidationWriter = struct { - const Self = @This(); - pub const Writer = std.io.Writer(*Self, Error, write); - pub const Error = error{ - TooMuchData, - DifferentData, - }; - - expected_remaining: []const u8, - - fn init(exp: []const u8) Self { - return .{ .expected_remaining = exp }; - } - - pub fn writer(self: *Self) Writer { - return .{ .context = self }; - } - - fn write(self: *Self, bytes: []const u8) Error!usize { - if (self.expected_remaining.len < bytes.len) { - std.debug.print( - \\====== expected this output: ========= - \\{s} - \\======== instead found this: ========= - \\{s} - \\====================================== - , .{ - self.expected_remaining, - bytes, - }); - return error.TooMuchData; - } - if (!mem.eql(u8, self.expected_remaining[0..bytes.len], bytes)) { - std.debug.print( - \\====== expected this output: ========= - \\{s} - \\======== instead found this: ========= - \\{s} - \\====================================== - , .{ - self.expected_remaining[0..bytes.len], - bytes, - }); - return error.DifferentData; - } - self.expected_remaining = self.expected_remaining[bytes.len..]; - return bytes.len; - } - }; - - var vos = ValidationWriter.init(expected); - try stringify(value, options, vos.writer()); - if (vos.expected_remaining.len > 0) return error.NotEnoughData; -} - -test "encodesTo" { - // same - try testing.expectEqual(true, encodesTo("false", "false")); - // totally different - try testing.expectEqual(false, encodesTo("false", "true")); - // different lengths - try testing.expectEqual(false, encodesTo("false", "other")); - // with escape - try testing.expectEqual(true, encodesTo("\\", "\\\\")); - try testing.expectEqual(true, encodesTo("with\nescape", "with\\nescape")); - // with unicode - try testing.expectEqual(true, encodesTo("ą", "\\u0105")); - try testing.expectEqual(true, encodesTo("😂", "\\ud83d\\ude02")); - try testing.expectEqual(true, encodesTo("withąunicode😂", "with\\u0105unicode\\ud83d\\ude02")); -} - -test "deserializing string with escape sequence into sentinel slice" { - const json = "\"\\n\""; - var token_stream = std.json.TokenStream.init(json); - const options = ParseOptions{ .allocator = std.testing.allocator }; - - // Pre-fix, this line would panic: - const result = try std.json.parse([:0]const u8, &token_stream, options); - defer std.json.parseFree([:0]const u8, result, options); - - // Double-check that we're getting the right result - try testing.expect(mem.eql(u8, result, "\n")); -} - -test "stringify struct with custom stringify that returns a custom error" { - var ret = std.json.stringify(struct { - field: Field = .{}, - - pub const Field = struct { - field: ?[]*Field = null, - - const Self = @This(); - pub fn jsonStringify(_: Self, _: StringifyOptions, _: anytype) error{CustomError}!void { - return error.CustomError; - } - }; - }{}, StringifyOptions{}, std.io.null_writer); - - try std.testing.expectError(error.CustomError, ret); + _ = @import("json/dynamic.zig"); + _ = @import("json/static.zig"); + _ = @import("json/stringify.zig"); + _ = @import("json/JSONTestSuite_test.zig"); } diff --git a/lib/std/json/JSONTestSuite_test.zig b/lib/std/json/JSONTestSuite_test.zig new file mode 100644 index 0000000000..30c5907f8b --- /dev/null +++ b/lib/std/json/JSONTestSuite_test.zig @@ -0,0 +1,960 @@ +// This file was generated by _generate_JSONTestSuite.zig +// These test cases are sourced from: https://github.com/nst/JSONTestSuite +const ok = @import("./test.zig").ok; +const err = @import("./test.zig").err; +const any = @import("./test.zig").any; + +test "i_number_double_huge_neg_exp.json" { + try any("[123.456e-789]"); +} +test "i_number_huge_exp.json" { + try any("[0.4e00669999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999969999999006]"); +} +test "i_number_neg_int_huge_exp.json" { + try any("[-1e+9999]"); +} +test "i_number_pos_double_huge_exp.json" { + try any("[1.5e+9999]"); +} +test "i_number_real_neg_overflow.json" { + try any("[-123123e100000]"); +} +test "i_number_real_pos_overflow.json" { + try any("[123123e100000]"); +} +test "i_number_real_underflow.json" { + try any("[123e-10000000]"); +} +test "i_number_too_big_neg_int.json" { + try any("[-123123123123123123123123123123]"); +} +test "i_number_too_big_pos_int.json" { + try any("[100000000000000000000]"); +} +test "i_number_very_big_negative_int.json" { + try any("[-237462374673276894279832749832423479823246327846]"); +} +test "i_object_key_lone_2nd_surrogate.json" { + try any("{\"\\uDFAA\":0}"); +} +test "i_string_1st_surrogate_but_2nd_missing.json" { + try any("[\"\\uDADA\"]"); +} +test "i_string_1st_valid_surrogate_2nd_invalid.json" { + try any("[\"\\uD888\\u1234\"]"); +} +test "i_string_UTF-16LE_with_BOM.json" { + try any("\xff\xfe[\x00\"\x00\xe9\x00\"\x00]\x00"); +} +test "i_string_UTF-8_invalid_sequence.json" { + try any("[\"\xe6\x97\xa5\xd1\x88\xfa\"]"); +} +test "i_string_UTF8_surrogate_U+D800.json" { + try any("[\"\xed\xa0\x80\"]"); +} +test "i_string_incomplete_surrogate_and_escape_valid.json" { + try any("[\"\\uD800\\n\"]"); +} +test "i_string_incomplete_surrogate_pair.json" { + try any("[\"\\uDd1ea\"]"); +} +test "i_string_incomplete_surrogates_escape_valid.json" { + try any("[\"\\uD800\\uD800\\n\"]"); +} +test "i_string_invalid_lonely_surrogate.json" { + try any("[\"\\ud800\"]"); +} +test "i_string_invalid_surrogate.json" { + try any("[\"\\ud800abc\"]"); +} +test "i_string_invalid_utf-8.json" { + try any("[\"\xff\"]"); +} +test "i_string_inverted_surrogates_U+1D11E.json" { + try any("[\"\\uDd1e\\uD834\"]"); +} +test "i_string_iso_latin_1.json" { + try any("[\"\xe9\"]"); +} +test "i_string_lone_second_surrogate.json" { + try any("[\"\\uDFAA\"]"); +} +test "i_string_lone_utf8_continuation_byte.json" { + try any("[\"\x81\"]"); +} +test "i_string_not_in_unicode_range.json" { + try any("[\"\xf4\xbf\xbf\xbf\"]"); +} +test "i_string_overlong_sequence_2_bytes.json" { + try any("[\"\xc0\xaf\"]"); +} +test "i_string_overlong_sequence_6_bytes.json" { + try any("[\"\xfc\x83\xbf\xbf\xbf\xbf\"]"); +} +test "i_string_overlong_sequence_6_bytes_null.json" { + try any("[\"\xfc\x80\x80\x80\x80\x80\"]"); +} +test "i_string_truncated-utf-8.json" { + try any("[\"\xe0\xff\"]"); +} +test "i_string_utf16BE_no_BOM.json" { + try any("\x00[\x00\"\x00\xe9\x00\"\x00]"); +} +test "i_string_utf16LE_no_BOM.json" { + try any("[\x00\"\x00\xe9\x00\"\x00]\x00"); +} +test "i_structure_500_nested_arrays.json" { + try any("[" ** 500 ++ "]" ** 500); +} +test "i_structure_UTF-8_BOM_empty_object.json" { + try any("\xef\xbb\xbf{}"); +} +test "n_array_1_true_without_comma.json" { + try err("[1 true]"); +} +test "n_array_a_invalid_utf8.json" { + try err("[a\xe5]"); +} +test "n_array_colon_instead_of_comma.json" { + try err("[\"\": 1]"); +} +test "n_array_comma_after_close.json" { + try err("[\"\"],"); +} +test "n_array_comma_and_number.json" { + try err("[,1]"); +} +test "n_array_double_comma.json" { + try err("[1,,2]"); +} +test "n_array_double_extra_comma.json" { + try err("[\"x\",,]"); +} +test "n_array_extra_close.json" { + try err("[\"x\"]]"); +} +test "n_array_extra_comma.json" { + try err("[\"\",]"); +} +test "n_array_incomplete.json" { + try err("[\"x\""); +} +test "n_array_incomplete_invalid_value.json" { + try err("[x"); +} +test "n_array_inner_array_no_comma.json" { + try err("[3[4]]"); +} +test "n_array_invalid_utf8.json" { + try err("[\xff]"); +} +test "n_array_items_separated_by_semicolon.json" { + try err("[1:2]"); +} +test "n_array_just_comma.json" { + try err("[,]"); +} +test "n_array_just_minus.json" { + try err("[-]"); +} +test "n_array_missing_value.json" { + try err("[ , \"\"]"); +} +test "n_array_newlines_unclosed.json" { + try err("[\"a\",\n4\n,1,"); +} +test "n_array_number_and_comma.json" { + try err("[1,]"); +} +test "n_array_number_and_several_commas.json" { + try err("[1,,]"); +} +test "n_array_spaces_vertical_tab_formfeed.json" { + try err("[\"\x0ba\"\\f]"); +} +test "n_array_star_inside.json" { + try err("[*]"); +} +test "n_array_unclosed.json" { + try err("[\"\""); +} +test "n_array_unclosed_trailing_comma.json" { + try err("[1,"); +} +test "n_array_unclosed_with_new_lines.json" { + try err("[1,\n1\n,1"); +} +test "n_array_unclosed_with_object_inside.json" { + try err("[{}"); +} +test "n_incomplete_false.json" { + try err("[fals]"); +} +test "n_incomplete_null.json" { + try err("[nul]"); +} +test "n_incomplete_true.json" { + try err("[tru]"); +} +test "n_multidigit_number_then_00.json" { + try err("123\x00"); +} +test "n_number_++.json" { + try err("[++1234]"); +} +test "n_number_+1.json" { + try err("[+1]"); +} +test "n_number_+Inf.json" { + try err("[+Inf]"); +} +test "n_number_-01.json" { + try err("[-01]"); +} +test "n_number_-1.0..json" { + try err("[-1.0.]"); +} +test "n_number_-2..json" { + try err("[-2.]"); +} +test "n_number_-NaN.json" { + try err("[-NaN]"); +} +test "n_number_.-1.json" { + try err("[.-1]"); +} +test "n_number_.2e-3.json" { + try err("[.2e-3]"); +} +test "n_number_0.1.2.json" { + try err("[0.1.2]"); +} +test "n_number_0.3e+.json" { + try err("[0.3e+]"); +} +test "n_number_0.3e.json" { + try err("[0.3e]"); +} +test "n_number_0.e1.json" { + try err("[0.e1]"); +} +test "n_number_0_capital_E+.json" { + try err("[0E+]"); +} +test "n_number_0_capital_E.json" { + try err("[0E]"); +} +test "n_number_0e+.json" { + try err("[0e+]"); +} +test "n_number_0e.json" { + try err("[0e]"); +} +test "n_number_1.0e+.json" { + try err("[1.0e+]"); +} +test "n_number_1.0e-.json" { + try err("[1.0e-]"); +} +test "n_number_1.0e.json" { + try err("[1.0e]"); +} +test "n_number_1_000.json" { + try err("[1 000.0]"); +} +test "n_number_1eE2.json" { + try err("[1eE2]"); +} +test "n_number_2.e+3.json" { + try err("[2.e+3]"); +} +test "n_number_2.e-3.json" { + try err("[2.e-3]"); +} +test "n_number_2.e3.json" { + try err("[2.e3]"); +} +test "n_number_9.e+.json" { + try err("[9.e+]"); +} +test "n_number_Inf.json" { + try err("[Inf]"); +} +test "n_number_NaN.json" { + try err("[NaN]"); +} +test "n_number_U+FF11_fullwidth_digit_one.json" { + try err("[\xef\xbc\x91]"); +} +test "n_number_expression.json" { + try err("[1+2]"); +} +test "n_number_hex_1_digit.json" { + try err("[0x1]"); +} +test "n_number_hex_2_digits.json" { + try err("[0x42]"); +} +test "n_number_infinity.json" { + try err("[Infinity]"); +} +test "n_number_invalid+-.json" { + try err("[0e+-1]"); +} +test "n_number_invalid-negative-real.json" { + try err("[-123.123foo]"); +} +test "n_number_invalid-utf-8-in-bigger-int.json" { + try err("[123\xe5]"); +} +test "n_number_invalid-utf-8-in-exponent.json" { + try err("[1e1\xe5]"); +} +test "n_number_invalid-utf-8-in-int.json" { + try err("[0\xe5]\n"); +} +test "n_number_minus_infinity.json" { + try err("[-Infinity]"); +} +test "n_number_minus_sign_with_trailing_garbage.json" { + try err("[-foo]"); +} +test "n_number_minus_space_1.json" { + try err("[- 1]"); +} +test "n_number_neg_int_starting_with_zero.json" { + try err("[-012]"); +} +test "n_number_neg_real_without_int_part.json" { + try err("[-.123]"); +} +test "n_number_neg_with_garbage_at_end.json" { + try err("[-1x]"); +} +test "n_number_real_garbage_after_e.json" { + try err("[1ea]"); +} +test "n_number_real_with_invalid_utf8_after_e.json" { + try err("[1e\xe5]"); +} +test "n_number_real_without_fractional_part.json" { + try err("[1.]"); +} +test "n_number_starting_with_dot.json" { + try err("[.123]"); +} +test "n_number_with_alpha.json" { + try err("[1.2a-3]"); +} +test "n_number_with_alpha_char.json" { + try err("[1.8011670033376514H-308]"); +} +test "n_number_with_leading_zero.json" { + try err("[012]"); +} +test "n_object_bad_value.json" { + try err("[\"x\", truth]"); +} +test "n_object_bracket_key.json" { + try err("{[: \"x\"}\n"); +} +test "n_object_comma_instead_of_colon.json" { + try err("{\"x\", null}"); +} +test "n_object_double_colon.json" { + try err("{\"x\"::\"b\"}"); +} +test "n_object_emoji.json" { + try err("{\xf0\x9f\x87\xa8\xf0\x9f\x87\xad}"); +} +test "n_object_garbage_at_end.json" { + try err("{\"a\":\"a\" 123}"); +} +test "n_object_key_with_single_quotes.json" { + try err("{key: 'value'}"); +} +test "n_object_lone_continuation_byte_in_key_and_trailing_comma.json" { + try err("{\"\xb9\":\"0\",}"); +} +test "n_object_missing_colon.json" { + try err("{\"a\" b}"); +} +test "n_object_missing_key.json" { + try err("{:\"b\"}"); +} +test "n_object_missing_semicolon.json" { + try err("{\"a\" \"b\"}"); +} +test "n_object_missing_value.json" { + try err("{\"a\":"); +} +test "n_object_no-colon.json" { + try err("{\"a\""); +} +test "n_object_non_string_key.json" { + try err("{1:1}"); +} +test "n_object_non_string_key_but_huge_number_instead.json" { + try err("{9999E9999:1}"); +} +test "n_object_repeated_null_null.json" { + try err("{null:null,null:null}"); +} +test "n_object_several_trailing_commas.json" { + try err("{\"id\":0,,,,,}"); +} +test "n_object_single_quote.json" { + try err("{'a':0}"); +} +test "n_object_trailing_comma.json" { + try err("{\"id\":0,}"); +} +test "n_object_trailing_comment.json" { + try err("{\"a\":\"b\"}/**/"); +} +test "n_object_trailing_comment_open.json" { + try err("{\"a\":\"b\"}/**//"); +} +test "n_object_trailing_comment_slash_open.json" { + try err("{\"a\":\"b\"}//"); +} +test "n_object_trailing_comment_slash_open_incomplete.json" { + try err("{\"a\":\"b\"}/"); +} +test "n_object_two_commas_in_a_row.json" { + try err("{\"a\":\"b\",,\"c\":\"d\"}"); +} +test "n_object_unquoted_key.json" { + try err("{a: \"b\"}"); +} +test "n_object_unterminated-value.json" { + try err("{\"a\":\"a"); +} +test "n_object_with_single_string.json" { + try err("{ \"foo\" : \"bar\", \"a\" }"); +} +test "n_object_with_trailing_garbage.json" { + try err("{\"a\":\"b\"}#"); +} +test "n_single_space.json" { + try err(" "); +} +test "n_string_1_surrogate_then_escape.json" { + try err("[\"\\uD800\\\"]"); +} +test "n_string_1_surrogate_then_escape_u.json" { + try err("[\"\\uD800\\u\"]"); +} +test "n_string_1_surrogate_then_escape_u1.json" { + try err("[\"\\uD800\\u1\"]"); +} +test "n_string_1_surrogate_then_escape_u1x.json" { + try err("[\"\\uD800\\u1x\"]"); +} +test "n_string_accentuated_char_no_quotes.json" { + try err("[\xc3\xa9]"); +} +test "n_string_backslash_00.json" { + try err("[\"\\\x00\"]"); +} +test "n_string_escape_x.json" { + try err("[\"\\x00\"]"); +} +test "n_string_escaped_backslash_bad.json" { + try err("[\"\\\\\\\"]"); +} +test "n_string_escaped_ctrl_char_tab.json" { + try err("[\"\\\x09\"]"); +} +test "n_string_escaped_emoji.json" { + try err("[\"\\\xf0\x9f\x8c\x80\"]"); +} +test "n_string_incomplete_escape.json" { + try err("[\"\\\"]"); +} +test "n_string_incomplete_escaped_character.json" { + try err("[\"\\u00A\"]"); +} +test "n_string_incomplete_surrogate.json" { + try err("[\"\\uD834\\uDd\"]"); +} +test "n_string_incomplete_surrogate_escape_invalid.json" { + try err("[\"\\uD800\\uD800\\x\"]"); +} +test "n_string_invalid-utf-8-in-escape.json" { + try err("[\"\\u\xe5\"]"); +} +test "n_string_invalid_backslash_esc.json" { + try err("[\"\\a\"]"); +} +test "n_string_invalid_unicode_escape.json" { + try err("[\"\\uqqqq\"]"); +} +test "n_string_invalid_utf8_after_escape.json" { + try err("[\"\\\xe5\"]"); +} +test "n_string_leading_uescaped_thinspace.json" { + try err("[\\u0020\"asd\"]"); +} +test "n_string_no_quotes_with_bad_escape.json" { + try err("[\\n]"); +} +test "n_string_single_doublequote.json" { + try err("\""); +} +test "n_string_single_quote.json" { + try err("['single quote']"); +} +test "n_string_single_string_no_double_quotes.json" { + try err("abc"); +} +test "n_string_start_escape_unclosed.json" { + try err("[\"\\"); +} +test "n_string_unescaped_ctrl_char.json" { + try err("[\"a\x00a\"]"); +} +test "n_string_unescaped_newline.json" { + try err("[\"new\nline\"]"); +} +test "n_string_unescaped_tab.json" { + try err("[\"\x09\"]"); +} +test "n_string_unicode_CapitalU.json" { + try err("\"\\UA66D\""); +} +test "n_string_with_trailing_garbage.json" { + try err("\"\"x"); +} +test "n_structure_100000_opening_arrays.json" { + try err("[" ** 100000); +} +test "n_structure_U+2060_word_joined.json" { + try err("[\xe2\x81\xa0]"); +} +test "n_structure_UTF8_BOM_no_data.json" { + try err("\xef\xbb\xbf"); +} +test "n_structure_angle_bracket_..json" { + try err("<.>"); +} +test "n_structure_angle_bracket_null.json" { + try err("[]"); +} +test "n_structure_array_trailing_garbage.json" { + try err("[1]x"); +} +test "n_structure_array_with_extra_array_close.json" { + try err("[1]]"); +} +test "n_structure_array_with_unclosed_string.json" { + try err("[\"asd]"); +} +test "n_structure_ascii-unicode-identifier.json" { + try err("a\xc3\xa5"); +} +test "n_structure_capitalized_True.json" { + try err("[True]"); +} +test "n_structure_close_unopened_array.json" { + try err("1]"); +} +test "n_structure_comma_instead_of_closing_brace.json" { + try err("{\"x\": true,"); +} +test "n_structure_double_array.json" { + try err("[][]"); +} +test "n_structure_end_array.json" { + try err("]"); +} +test "n_structure_incomplete_UTF8_BOM.json" { + try err("\xef\xbb{}"); +} +test "n_structure_lone-invalid-utf-8.json" { + try err("\xe5"); +} +test "n_structure_lone-open-bracket.json" { + try err("["); +} +test "n_structure_no_data.json" { + try err(""); +} +test "n_structure_null-byte-outside-string.json" { + try err("[\x00]"); +} +test "n_structure_number_with_trailing_garbage.json" { + try err("2@"); +} +test "n_structure_object_followed_by_closing_object.json" { + try err("{}}"); +} +test "n_structure_object_unclosed_no_value.json" { + try err("{\"\":"); +} +test "n_structure_object_with_comment.json" { + try err("{\"a\":/*comment*/\"b\"}"); +} +test "n_structure_object_with_trailing_garbage.json" { + try err("{\"a\": true} \"x\""); +} +test "n_structure_open_array_apostrophe.json" { + try err("['"); +} +test "n_structure_open_array_comma.json" { + try err("[,"); +} +test "n_structure_open_array_object.json" { + try err("[{\"\":" ** 50000 ++ "\n"); +} +test "n_structure_open_array_open_object.json" { + try err("[{"); +} +test "n_structure_open_array_open_string.json" { + try err("[\"a"); +} +test "n_structure_open_array_string.json" { + try err("[\"a\""); +} +test "n_structure_open_object.json" { + try err("{"); +} +test "n_structure_open_object_close_array.json" { + try err("{]"); +} +test "n_structure_open_object_comma.json" { + try err("{,"); +} +test "n_structure_open_object_open_array.json" { + try err("{["); +} +test "n_structure_open_object_open_string.json" { + try err("{\"a"); +} +test "n_structure_open_object_string_with_apostrophes.json" { + try err("{'a'"); +} +test "n_structure_open_open.json" { + try err("[\"\\{[\"\\{[\"\\{[\"\\{"); +} +test "n_structure_single_eacute.json" { + try err("\xe9"); +} +test "n_structure_single_star.json" { + try err("*"); +} +test "n_structure_trailing_#.json" { + try err("{\"a\":\"b\"}#{}"); +} +test "n_structure_uescaped_LF_before_string.json" { + try err("[\\u000A\"\"]"); +} +test "n_structure_unclosed_array.json" { + try err("[1"); +} +test "n_structure_unclosed_array_partial_null.json" { + try err("[ false, nul"); +} +test "n_structure_unclosed_array_unfinished_false.json" { + try err("[ true, fals"); +} +test "n_structure_unclosed_array_unfinished_true.json" { + try err("[ false, tru"); +} +test "n_structure_unclosed_object.json" { + try err("{\"asd\":\"asd\""); +} +test "n_structure_unicode-identifier.json" { + try err("\xc3\xa5"); +} +test "n_structure_whitespace_U+2060_word_joiner.json" { + try err("[\xe2\x81\xa0]"); +} +test "n_structure_whitespace_formfeed.json" { + try err("[\x0c]"); +} +test "y_array_arraysWithSpaces.json" { + try ok("[[] ]"); +} +test "y_array_empty-string.json" { + try ok("[\"\"]"); +} +test "y_array_empty.json" { + try ok("[]"); +} +test "y_array_ending_with_newline.json" { + try ok("[\"a\"]"); +} +test "y_array_false.json" { + try ok("[false]"); +} +test "y_array_heterogeneous.json" { + try ok("[null, 1, \"1\", {}]"); +} +test "y_array_null.json" { + try ok("[null]"); +} +test "y_array_with_1_and_newline.json" { + try ok("[1\n]"); +} +test "y_array_with_leading_space.json" { + try ok(" [1]"); +} +test "y_array_with_several_null.json" { + try ok("[1,null,null,null,2]"); +} +test "y_array_with_trailing_space.json" { + try ok("[2] "); +} +test "y_number.json" { + try ok("[123e65]"); +} +test "y_number_0e+1.json" { + try ok("[0e+1]"); +} +test "y_number_0e1.json" { + try ok("[0e1]"); +} +test "y_number_after_space.json" { + try ok("[ 4]"); +} +test "y_number_double_close_to_zero.json" { + try ok("[-0.000000000000000000000000000000000000000000000000000000000000000000000000000001]\n"); +} +test "y_number_int_with_exp.json" { + try ok("[20e1]"); +} +test "y_number_minus_zero.json" { + try ok("[-0]"); +} +test "y_number_negative_int.json" { + try ok("[-123]"); +} +test "y_number_negative_one.json" { + try ok("[-1]"); +} +test "y_number_negative_zero.json" { + try ok("[-0]"); +} +test "y_number_real_capital_e.json" { + try ok("[1E22]"); +} +test "y_number_real_capital_e_neg_exp.json" { + try ok("[1E-2]"); +} +test "y_number_real_capital_e_pos_exp.json" { + try ok("[1E+2]"); +} +test "y_number_real_exponent.json" { + try ok("[123e45]"); +} +test "y_number_real_fraction_exponent.json" { + try ok("[123.456e78]"); +} +test "y_number_real_neg_exp.json" { + try ok("[1e-2]"); +} +test "y_number_real_pos_exponent.json" { + try ok("[1e+2]"); +} +test "y_number_simple_int.json" { + try ok("[123]"); +} +test "y_number_simple_real.json" { + try ok("[123.456789]"); +} +test "y_object.json" { + try ok("{\"asd\":\"sdf\", \"dfg\":\"fgh\"}"); +} +test "y_object_basic.json" { + try ok("{\"asd\":\"sdf\"}"); +} +test "y_object_duplicated_key.json" { + try ok("{\"a\":\"b\",\"a\":\"c\"}"); +} +test "y_object_duplicated_key_and_value.json" { + try ok("{\"a\":\"b\",\"a\":\"b\"}"); +} +test "y_object_empty.json" { + try ok("{}"); +} +test "y_object_empty_key.json" { + try ok("{\"\":0}"); +} +test "y_object_escaped_null_in_key.json" { + try ok("{\"foo\\u0000bar\": 42}"); +} +test "y_object_extreme_numbers.json" { + try ok("{ \"min\": -1.0e+28, \"max\": 1.0e+28 }"); +} +test "y_object_long_strings.json" { + try ok("{\"x\":[{\"id\": \"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\"}], \"id\": \"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\"}"); +} +test "y_object_simple.json" { + try ok("{\"a\":[]}"); +} +test "y_object_string_unicode.json" { + try ok("{\"title\":\"\\u041f\\u043e\\u043b\\u0442\\u043e\\u0440\\u0430 \\u0417\\u0435\\u043c\\u043b\\u0435\\u043a\\u043e\\u043f\\u0430\" }"); +} +test "y_object_with_newlines.json" { + try ok("{\n\"a\": \"b\"\n}"); +} +test "y_string_1_2_3_bytes_UTF-8_sequences.json" { + try ok("[\"\\u0060\\u012a\\u12AB\"]"); +} +test "y_string_accepted_surrogate_pair.json" { + try ok("[\"\\uD801\\udc37\"]"); +} +test "y_string_accepted_surrogate_pairs.json" { + try ok("[\"\\ud83d\\ude39\\ud83d\\udc8d\"]"); +} +test "y_string_allowed_escapes.json" { + try ok("[\"\\\"\\\\\\/\\b\\f\\n\\r\\t\"]"); +} +test "y_string_backslash_and_u_escaped_zero.json" { + try ok("[\"\\\\u0000\"]"); +} +test "y_string_backslash_doublequotes.json" { + try ok("[\"\\\"\"]"); +} +test "y_string_comments.json" { + try ok("[\"a/*b*/c/*d//e\"]"); +} +test "y_string_double_escape_a.json" { + try ok("[\"\\\\a\"]"); +} +test "y_string_double_escape_n.json" { + try ok("[\"\\\\n\"]"); +} +test "y_string_escaped_control_character.json" { + try ok("[\"\\u0012\"]"); +} +test "y_string_escaped_noncharacter.json" { + try ok("[\"\\uFFFF\"]"); +} +test "y_string_in_array.json" { + try ok("[\"asd\"]"); +} +test "y_string_in_array_with_leading_space.json" { + try ok("[ \"asd\"]"); +} +test "y_string_last_surrogates_1_and_2.json" { + try ok("[\"\\uDBFF\\uDFFF\"]"); +} +test "y_string_nbsp_uescaped.json" { + try ok("[\"new\\u00A0line\"]"); +} +test "y_string_nonCharacterInUTF-8_U+10FFFF.json" { + try ok("[\"\xf4\x8f\xbf\xbf\"]"); +} +test "y_string_nonCharacterInUTF-8_U+FFFF.json" { + try ok("[\"\xef\xbf\xbf\"]"); +} +test "y_string_null_escape.json" { + try ok("[\"\\u0000\"]"); +} +test "y_string_one-byte-utf-8.json" { + try ok("[\"\\u002c\"]"); +} +test "y_string_pi.json" { + try ok("[\"\xcf\x80\"]"); +} +test "y_string_reservedCharacterInUTF-8_U+1BFFF.json" { + try ok("[\"\xf0\x9b\xbf\xbf\"]"); +} +test "y_string_simple_ascii.json" { + try ok("[\"asd \"]"); +} +test "y_string_space.json" { + try ok("\" \""); +} +test "y_string_surrogates_U+1D11E_MUSICAL_SYMBOL_G_CLEF.json" { + try ok("[\"\\uD834\\uDd1e\"]"); +} +test "y_string_three-byte-utf-8.json" { + try ok("[\"\\u0821\"]"); +} +test "y_string_two-byte-utf-8.json" { + try ok("[\"\\u0123\"]"); +} +test "y_string_u+2028_line_sep.json" { + try ok("[\"\xe2\x80\xa8\"]"); +} +test "y_string_u+2029_par_sep.json" { + try ok("[\"\xe2\x80\xa9\"]"); +} +test "y_string_uEscape.json" { + try ok("[\"\\u0061\\u30af\\u30EA\\u30b9\"]"); +} +test "y_string_uescaped_newline.json" { + try ok("[\"new\\u000Aline\"]"); +} +test "y_string_unescaped_char_delete.json" { + try ok("[\"\x7f\"]"); +} +test "y_string_unicode.json" { + try ok("[\"\\uA66D\"]"); +} +test "y_string_unicodeEscapedBackslash.json" { + try ok("[\"\\u005C\"]"); +} +test "y_string_unicode_2.json" { + try ok("[\"\xe2\x8d\x82\xe3\x88\xb4\xe2\x8d\x82\"]"); +} +test "y_string_unicode_U+10FFFE_nonchar.json" { + try ok("[\"\\uDBFF\\uDFFE\"]"); +} +test "y_string_unicode_U+1FFFE_nonchar.json" { + try ok("[\"\\uD83F\\uDFFE\"]"); +} +test "y_string_unicode_U+200B_ZERO_WIDTH_SPACE.json" { + try ok("[\"\\u200B\"]"); +} +test "y_string_unicode_U+2064_invisible_plus.json" { + try ok("[\"\\u2064\"]"); +} +test "y_string_unicode_U+FDD0_nonchar.json" { + try ok("[\"\\uFDD0\"]"); +} +test "y_string_unicode_U+FFFE_nonchar.json" { + try ok("[\"\\uFFFE\"]"); +} +test "y_string_unicode_escaped_double_quote.json" { + try ok("[\"\\u0022\"]"); +} +test "y_string_utf8.json" { + try ok("[\"\xe2\x82\xac\xf0\x9d\x84\x9e\"]"); +} +test "y_string_with_del_character.json" { + try ok("[\"a\x7fa\"]"); +} +test "y_structure_lonely_false.json" { + try ok("false"); +} +test "y_structure_lonely_int.json" { + try ok("42"); +} +test "y_structure_lonely_negative_real.json" { + try ok("-0.1"); +} +test "y_structure_lonely_null.json" { + try ok("null"); +} +test "y_structure_lonely_string.json" { + try ok("\"asd\""); +} +test "y_structure_lonely_true.json" { + try ok("true"); +} +test "y_structure_string_empty.json" { + try ok("\"\""); +} +test "y_structure_trailing_newline.json" { + try ok("[\"a\"]\n"); +} +test "y_structure_true_in_array.json" { + try ok("[true]"); +} +test "y_structure_whitespace_array.json" { + try ok(" [] "); +} diff --git a/lib/std/json/dynamic.zig b/lib/std/json/dynamic.zig new file mode 100644 index 0000000000..057fb93ded --- /dev/null +++ b/lib/std/json/dynamic.zig @@ -0,0 +1,344 @@ +const std = @import("std"); +const debug = std.debug; +const ArenaAllocator = std.heap.ArenaAllocator; +const ArrayList = std.ArrayList; +const StringArrayHashMap = std.StringArrayHashMap; +const Allocator = std.mem.Allocator; + +const StringifyOptions = @import("./stringify.zig").StringifyOptions; +const stringify = @import("./stringify.zig").stringify; + +const JsonScanner = @import("./scanner.zig").Scanner; +const AllocWhen = @import("./scanner.zig").AllocWhen; +const Token = @import("./scanner.zig").Token; +const isNumberFormattedLikeAnInteger = @import("./scanner.zig").isNumberFormattedLikeAnInteger; + +pub const ValueTree = struct { + arena: *ArenaAllocator, + root: Value, + + pub fn deinit(self: *ValueTree) void { + self.arena.deinit(); + self.arena.child_allocator.destroy(self.arena); + } +}; + +pub const ObjectMap = StringArrayHashMap(Value); +pub const Array = ArrayList(Value); + +/// Represents a JSON value +/// Currently only supports numbers that fit into i64 or f64. +pub const Value = union(enum) { + null, + bool: bool, + integer: i64, + float: f64, + number_string: []const u8, + string: []const u8, + array: Array, + object: ObjectMap, + + pub fn jsonStringify( + value: @This(), + options: StringifyOptions, + out_stream: anytype, + ) @TypeOf(out_stream).Error!void { + switch (value) { + .null => try stringify(null, options, out_stream), + .bool => |inner| try stringify(inner, options, out_stream), + .integer => |inner| try stringify(inner, options, out_stream), + .float => |inner| try stringify(inner, options, out_stream), + .number_string => |inner| try out_stream.writeAll(inner), + .string => |inner| try stringify(inner, options, out_stream), + .array => |inner| try stringify(inner.items, options, out_stream), + .object => |inner| { + try out_stream.writeByte('{'); + var field_output = false; + var child_options = options; + child_options.whitespace.indent_level += 1; + var it = inner.iterator(); + while (it.next()) |entry| { + if (!field_output) { + field_output = true; + } else { + try out_stream.writeByte(','); + } + try child_options.whitespace.outputIndent(out_stream); + + try stringify(entry.key_ptr.*, options, out_stream); + try out_stream.writeByte(':'); + if (child_options.whitespace.separator) { + try out_stream.writeByte(' '); + } + try stringify(entry.value_ptr.*, child_options, out_stream); + } + if (field_output) { + try options.whitespace.outputIndent(out_stream); + } + try out_stream.writeByte('}'); + }, + } + } + + pub fn dump(self: Value) void { + std.debug.getStderrMutex().lock(); + defer std.debug.getStderrMutex().unlock(); + + const stderr = std.io.getStdErr().writer(); + stringify(self, .{}, stderr) catch return; + } +}; + +/// A non-stream JSON parser which constructs a tree of Value's. +pub const Parser = struct { + allocator: Allocator, + state: State, + alloc_when: AllocWhen, + // Stores parent nodes and un-combined Values. + stack: Array, + + const State = enum { + object_key, + object_value, + array_value, + simple, + }; + + pub fn init(allocator: Allocator, alloc_when: AllocWhen) Parser { + return Parser{ + .allocator = allocator, + .state = .simple, + .alloc_when = alloc_when, + .stack = Array.init(allocator), + }; + } + + pub fn deinit(p: *Parser) void { + p.stack.deinit(); + } + + pub fn reset(p: *Parser) void { + p.state = .simple; + p.stack.shrinkRetainingCapacity(0); + } + + pub fn parse(p: *Parser, input: []const u8) !ValueTree { + var scanner = JsonScanner.initCompleteInput(p.allocator, input); + defer scanner.deinit(); + + var arena = try p.allocator.create(ArenaAllocator); + errdefer p.allocator.destroy(arena); + + arena.* = ArenaAllocator.init(p.allocator); + errdefer arena.deinit(); + + const allocator = arena.allocator(); + + while (true) { + const token = try scanner.nextAlloc(allocator, p.alloc_when); + if (token == .end_of_document) break; + try p.transition(allocator, token); + } + + debug.assert(p.stack.items.len == 1); + + return ValueTree{ + .arena = arena, + .root = p.stack.items[0], + }; + } + + // Even though p.allocator exists, we take an explicit allocator so that allocation state + // can be cleaned up on error correctly during a `parse` on call. + fn transition(p: *Parser, allocator: Allocator, token: Token) !void { + switch (p.state) { + .object_key => switch (token) { + .object_end => { + if (p.stack.items.len == 1) { + return; + } + + var value = p.stack.pop(); + try p.pushToParent(&value); + }, + .string => |s| { + try p.stack.append(Value{ .string = s }); + p.state = .object_value; + }, + .allocated_string => |s| { + try p.stack.append(Value{ .string = s }); + p.state = .object_value; + }, + else => unreachable, + }, + .object_value => { + var object = &p.stack.items[p.stack.items.len - 2].object; + var key = p.stack.items[p.stack.items.len - 1].string; + + switch (token) { + .object_begin => { + try p.stack.append(Value{ .object = ObjectMap.init(allocator) }); + p.state = .object_key; + }, + .array_begin => { + try p.stack.append(Value{ .array = Array.init(allocator) }); + p.state = .array_value; + }, + .string => |s| { + try object.put(key, Value{ .string = s }); + _ = p.stack.pop(); + p.state = .object_key; + }, + .allocated_string => |s| { + try object.put(key, Value{ .string = s }); + _ = p.stack.pop(); + p.state = .object_key; + }, + .number => |slice| { + try object.put(key, try p.parseNumber(slice)); + _ = p.stack.pop(); + p.state = .object_key; + }, + .allocated_number => |slice| { + try object.put(key, try p.parseNumber(slice)); + _ = p.stack.pop(); + p.state = .object_key; + }, + .true => { + try object.put(key, Value{ .bool = true }); + _ = p.stack.pop(); + p.state = .object_key; + }, + .false => { + try object.put(key, Value{ .bool = false }); + _ = p.stack.pop(); + p.state = .object_key; + }, + .null => { + try object.put(key, .null); + _ = p.stack.pop(); + p.state = .object_key; + }, + .object_end, .array_end, .end_of_document => unreachable, + .partial_number, .partial_string, .partial_string_escaped_1, .partial_string_escaped_2, .partial_string_escaped_3, .partial_string_escaped_4 => unreachable, + } + }, + .array_value => { + var array = &p.stack.items[p.stack.items.len - 1].array; + + switch (token) { + .array_end => { + if (p.stack.items.len == 1) { + return; + } + + var value = p.stack.pop(); + try p.pushToParent(&value); + }, + .object_begin => { + try p.stack.append(Value{ .object = ObjectMap.init(allocator) }); + p.state = .object_key; + }, + .array_begin => { + try p.stack.append(Value{ .array = Array.init(allocator) }); + p.state = .array_value; + }, + .string => |s| { + try array.append(Value{ .string = s }); + }, + .allocated_string => |s| { + try array.append(Value{ .string = s }); + }, + .number => |slice| { + try array.append(try p.parseNumber(slice)); + }, + .allocated_number => |slice| { + try array.append(try p.parseNumber(slice)); + }, + .true => { + try array.append(Value{ .bool = true }); + }, + .false => { + try array.append(Value{ .bool = false }); + }, + .null => { + try array.append(.null); + }, + .object_end, .end_of_document => unreachable, + .partial_number, .partial_string, .partial_string_escaped_1, .partial_string_escaped_2, .partial_string_escaped_3, .partial_string_escaped_4 => unreachable, + } + }, + .simple => switch (token) { + .object_begin => { + try p.stack.append(Value{ .object = ObjectMap.init(allocator) }); + p.state = .object_key; + }, + .array_begin => { + try p.stack.append(Value{ .array = Array.init(allocator) }); + p.state = .array_value; + }, + .string => |s| { + try p.stack.append(Value{ .string = s }); + }, + .allocated_string => |s| { + try p.stack.append(Value{ .string = s }); + }, + .number => |slice| { + try p.stack.append(try p.parseNumber(slice)); + }, + .allocated_number => |slice| { + try p.stack.append(try p.parseNumber(slice)); + }, + .true => { + try p.stack.append(Value{ .bool = true }); + }, + .false => { + try p.stack.append(Value{ .bool = false }); + }, + .null => { + try p.stack.append(.null); + }, + .object_end, .array_end, .end_of_document => unreachable, + .partial_number, .partial_string, .partial_string_escaped_1, .partial_string_escaped_2, .partial_string_escaped_3, .partial_string_escaped_4 => unreachable, + }, + } + } + + fn pushToParent(p: *Parser, value: *const Value) !void { + switch (p.stack.items[p.stack.items.len - 1]) { + // Object Parent -> [ ..., object, , value ] + .string => |key| { + _ = p.stack.pop(); + + var object = &p.stack.items[p.stack.items.len - 1].object; + try object.put(key, value.*); + p.state = .object_key; + }, + // Array Parent -> [ ..., , value ] + .array => |*array| { + try array.append(value.*); + p.state = .array_value; + }, + else => { + unreachable; + }, + } + } + + fn parseNumber(p: *Parser, slice: []const u8) !Value { + _ = p; + return if (isNumberFormattedLikeAnInteger(slice)) + Value{ + .integer = std.fmt.parseInt(i64, slice, 10) catch |e| switch (e) { + error.Overflow => return Value{ .number_string = slice }, + error.InvalidCharacter => |err| return err, + }, + } + else + Value{ .float = try std.fmt.parseFloat(f64, slice) }; + } +}; + +test { + _ = @import("dynamic_test.zig"); +} diff --git a/lib/std/json/dynamic_test.zig b/lib/std/json/dynamic_test.zig new file mode 100644 index 0000000000..f20098f2d7 --- /dev/null +++ b/lib/std/json/dynamic_test.zig @@ -0,0 +1,285 @@ +const std = @import("std"); +const mem = std.mem; +const testing = std.testing; + +const ObjectMap = @import("dynamic.zig").ObjectMap; +const Array = @import("dynamic.zig").Array; +const Value = @import("dynamic.zig").Value; +const Parser = @import("dynamic.zig").Parser; + +test "json.parser.dynamic" { + var p = Parser.init(testing.allocator, .alloc_if_needed); + defer p.deinit(); + + const s = + \\{ + \\ "Image": { + \\ "Width": 800, + \\ "Height": 600, + \\ "Title": "View from 15th Floor", + \\ "Thumbnail": { + \\ "Url": "http://www.example.com/image/481989943", + \\ "Height": 125, + \\ "Width": 100 + \\ }, + \\ "Animated" : false, + \\ "IDs": [116, 943, 234, 38793], + \\ "ArrayOfObject": [{"n": "m"}], + \\ "double": 1.3412, + \\ "LargeInt": 18446744073709551615 + \\ } + \\} + ; + + var tree = try p.parse(s); + defer tree.deinit(); + + var root = tree.root; + + var image = root.object.get("Image").?; + + const width = image.object.get("Width").?; + try testing.expect(width.integer == 800); + + const height = image.object.get("Height").?; + try testing.expect(height.integer == 600); + + const title = image.object.get("Title").?; + try testing.expect(mem.eql(u8, title.string, "View from 15th Floor")); + + const animated = image.object.get("Animated").?; + try testing.expect(animated.bool == false); + + const array_of_object = image.object.get("ArrayOfObject").?; + try testing.expect(array_of_object.array.items.len == 1); + + const obj0 = array_of_object.array.items[0].object.get("n").?; + try testing.expect(mem.eql(u8, obj0.string, "m")); + + const double = image.object.get("double").?; + try testing.expect(double.float == 1.3412); + + const large_int = image.object.get("LargeInt").?; + try testing.expect(mem.eql(u8, large_int.number_string, "18446744073709551615")); +} + +const writeStream = @import("./write_stream.zig").writeStream; +test "write json then parse it" { + var out_buffer: [1000]u8 = undefined; + + var fixed_buffer_stream = std.io.fixedBufferStream(&out_buffer); + const out_stream = fixed_buffer_stream.writer(); + var jw = writeStream(out_stream, 4); + + try jw.beginObject(); + + try jw.objectField("f"); + try jw.emitBool(false); + + try jw.objectField("t"); + try jw.emitBool(true); + + try jw.objectField("int"); + try jw.emitNumber(1234); + + try jw.objectField("array"); + try jw.beginArray(); + + try jw.arrayElem(); + try jw.emitNull(); + + try jw.arrayElem(); + try jw.emitNumber(12.34); + + try jw.endArray(); + + try jw.objectField("str"); + try jw.emitString("hello"); + + try jw.endObject(); + + var parser = Parser.init(testing.allocator, .alloc_if_needed); + defer parser.deinit(); + var tree = try parser.parse(fixed_buffer_stream.getWritten()); + defer tree.deinit(); + + try testing.expect(tree.root.object.get("f").?.bool == false); + try testing.expect(tree.root.object.get("t").?.bool == true); + try testing.expect(tree.root.object.get("int").?.integer == 1234); + try testing.expect(tree.root.object.get("array").?.array.items[0].null == {}); + try testing.expect(tree.root.object.get("array").?.array.items[1].float == 12.34); + try testing.expect(mem.eql(u8, tree.root.object.get("str").?.string, "hello")); +} + +fn testParse(arena_allocator: std.mem.Allocator, json_str: []const u8) !Value { + var p = Parser.init(arena_allocator, .alloc_if_needed); + return (try p.parse(json_str)).root; +} + +test "parsing empty string gives appropriate error" { + var arena_allocator = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena_allocator.deinit(); + try testing.expectError(error.UnexpectedEndOfInput, testParse(arena_allocator.allocator(), "")); +} + +test "parse tree should not contain dangling pointers" { + var arena_allocator = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena_allocator.deinit(); + + var p = Parser.init(arena_allocator.allocator(), .alloc_if_needed); + defer p.deinit(); + + var tree = try p.parse("[]"); + defer tree.deinit(); + + // Allocation should succeed + var i: usize = 0; + while (i < 100) : (i += 1) { + try tree.root.array.append(Value{ .integer = 100 }); + } + try testing.expectEqual(tree.root.array.items.len, 100); +} + +test "integer after float has proper type" { + var arena_allocator = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena_allocator.deinit(); + const parsed = try testParse(arena_allocator.allocator(), + \\{ + \\ "float": 3.14, + \\ "ints": [1, 2, 3] + \\} + ); + try std.testing.expect(parsed.object.get("ints").?.array.items[0] == .integer); +} + +test "escaped characters" { + var arena_allocator = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena_allocator.deinit(); + const input = + \\{ + \\ "backslash": "\\", + \\ "forwardslash": "\/", + \\ "newline": "\n", + \\ "carriagereturn": "\r", + \\ "tab": "\t", + \\ "formfeed": "\f", + \\ "backspace": "\b", + \\ "doublequote": "\"", + \\ "unicode": "\u0105", + \\ "surrogatepair": "\ud83d\ude02" + \\} + ; + + const obj = (try testParse(arena_allocator.allocator(), input)).object; + + try testing.expectEqualSlices(u8, obj.get("backslash").?.string, "\\"); + try testing.expectEqualSlices(u8, obj.get("forwardslash").?.string, "/"); + try testing.expectEqualSlices(u8, obj.get("newline").?.string, "\n"); + try testing.expectEqualSlices(u8, obj.get("carriagereturn").?.string, "\r"); + try testing.expectEqualSlices(u8, obj.get("tab").?.string, "\t"); + try testing.expectEqualSlices(u8, obj.get("formfeed").?.string, "\x0C"); + try testing.expectEqualSlices(u8, obj.get("backspace").?.string, "\x08"); + try testing.expectEqualSlices(u8, obj.get("doublequote").?.string, "\""); + try testing.expectEqualSlices(u8, obj.get("unicode").?.string, "ą"); + try testing.expectEqualSlices(u8, obj.get("surrogatepair").?.string, "😂"); +} + +test "string copy option" { + const input = + \\{ + \\ "noescape": "aą😂", + \\ "simple": "\\\/\n\r\t\f\b\"", + \\ "unicode": "\u0105", + \\ "surrogatepair": "\ud83d\ude02" + \\} + ; + + var arena_allocator = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena_allocator.deinit(); + const allocator = arena_allocator.allocator(); + + var parser = Parser.init(allocator, .alloc_if_needed); + const tree_nocopy = try parser.parse(input); + const obj_nocopy = tree_nocopy.root.object; + + parser = Parser.init(allocator, .alloc_always); + const tree_copy = try parser.parse(input); + const obj_copy = tree_copy.root.object; + + for ([_][]const u8{ "noescape", "simple", "unicode", "surrogatepair" }) |field_name| { + try testing.expectEqualSlices(u8, obj_nocopy.get(field_name).?.string, obj_copy.get(field_name).?.string); + } + + const nocopy_addr = &obj_nocopy.get("noescape").?.string[0]; + const copy_addr = &obj_copy.get("noescape").?.string[0]; + + var found_nocopy = false; + for (input, 0..) |_, index| { + try testing.expect(copy_addr != &input[index]); + if (nocopy_addr == &input[index]) { + found_nocopy = true; + } + } + try testing.expect(found_nocopy); +} + +test "Value.jsonStringify" { + { + var buffer: [10]u8 = undefined; + var fbs = std.io.fixedBufferStream(&buffer); + try @as(Value, .null).jsonStringify(.{}, fbs.writer()); + try testing.expectEqualSlices(u8, fbs.getWritten(), "null"); + } + { + var buffer: [10]u8 = undefined; + var fbs = std.io.fixedBufferStream(&buffer); + try (Value{ .bool = true }).jsonStringify(.{}, fbs.writer()); + try testing.expectEqualSlices(u8, fbs.getWritten(), "true"); + } + { + var buffer: [10]u8 = undefined; + var fbs = std.io.fixedBufferStream(&buffer); + try (Value{ .integer = 42 }).jsonStringify(.{}, fbs.writer()); + try testing.expectEqualSlices(u8, fbs.getWritten(), "42"); + } + { + var buffer: [10]u8 = undefined; + var fbs = std.io.fixedBufferStream(&buffer); + try (Value{ .number_string = "43" }).jsonStringify(.{}, fbs.writer()); + try testing.expectEqualSlices(u8, fbs.getWritten(), "43"); + } + { + var buffer: [10]u8 = undefined; + var fbs = std.io.fixedBufferStream(&buffer); + try (Value{ .float = 42 }).jsonStringify(.{}, fbs.writer()); + try testing.expectEqualSlices(u8, fbs.getWritten(), "4.2e+01"); + } + { + var buffer: [10]u8 = undefined; + var fbs = std.io.fixedBufferStream(&buffer); + try (Value{ .string = "weeee" }).jsonStringify(.{}, fbs.writer()); + try testing.expectEqualSlices(u8, fbs.getWritten(), "\"weeee\""); + } + { + var buffer: [10]u8 = undefined; + var fbs = std.io.fixedBufferStream(&buffer); + var vals = [_]Value{ + .{ .integer = 1 }, + .{ .integer = 2 }, + .{ .number_string = "3" }, + }; + try (Value{ + .array = Array.fromOwnedSlice(undefined, &vals), + }).jsonStringify(.{}, fbs.writer()); + try testing.expectEqualSlices(u8, fbs.getWritten(), "[1,2,3]"); + } + { + var buffer: [10]u8 = undefined; + var fbs = std.io.fixedBufferStream(&buffer); + var obj = ObjectMap.init(testing.allocator); + defer obj.deinit(); + try obj.putNoClobber("a", .{ .string = "b" }); + try (Value{ .object = obj }).jsonStringify(.{}, fbs.writer()); + try testing.expectEqualSlices(u8, fbs.getWritten(), "{\"a\":\"b\"}"); + } +} diff --git a/lib/std/json/scanner.zig b/lib/std/json/scanner.zig new file mode 100644 index 0000000000..4fb7c1da01 --- /dev/null +++ b/lib/std/json/scanner.zig @@ -0,0 +1,1764 @@ +// Notes on standards compliance: https://datatracker.ietf.org/doc/html/rfc8259 +// * RFC 8259 requires JSON documents be valid UTF-8, +// but makes an allowance for systems that are "part of a closed ecosystem". +// I have no idea what that's supposed to mean in the context of a standard specification. +// This implementation requires inputs to be valid UTF-8. +// * RFC 8259 contradicts itself regarding whether lowercase is allowed in \u hex digits, +// but this is probably a bug in the spec, and it's clear that lowercase is meant to be allowed. +// (RFC 5234 defines HEXDIG to only allow uppercase.) +// * When RFC 8259 refers to a "character", I assume they really mean a "Unicode scalar value". +// See http://www.unicode.org/glossary/#unicode_scalar_value . +// * RFC 8259 doesn't explicitly disallow unpaired surrogate halves in \u escape sequences, +// but vaguely implies that \u escapes are for encoding Unicode "characters" (i.e. Unicode scalar values?), +// which would mean that unpaired surrogate halves are forbidden. +// By contrast ECMA-404 (a competing(/compatible?) JSON standard, which JavaScript's JSON.parse() conforms to) +// explicitly allows unpaired surrogate halves. +// This implementation forbids unpaired surrogate halves in \u sequences. +// If a high surrogate half appears in a \u sequence, +// then a low surrogate half must immediately follow in \u notation. +// * RFC 8259 allows implementations to "accept non-JSON forms or extensions". +// This implementation does not accept any of that. +// * RFC 8259 allows implementations to put limits on "the size of texts", +// "the maximum depth of nesting", "the range and precision of numbers", +// and "the length and character contents of strings". +// This low-level implementation does not limit these, +// except where noted above, and except that nesting depth requires memory allocation. +// Note that this low-level API does not interpret numbers numerically, +// but simply emits their source form for some higher level code to make sense of. +// * This low-level implementation allows duplicate object keys, +// and key/value pairs are emitted in the order they appear in the input. + +const std = @import("std"); + +const Allocator = std.mem.Allocator; +const ArrayList = std.ArrayList; +const assert = std.debug.assert; + +/// Scan the input and check for malformed JSON. +/// On `SyntaxError` or `UnexpectedEndOfInput`, returns `false`. +/// Returns any errors from the allocator as-is, which is unlikely, +/// but can be caused by extreme nesting depth in the input. +pub fn validate(allocator: Allocator, s: []const u8) Allocator.Error!bool { + var scanner = Scanner.initCompleteInput(allocator, s); + defer scanner.deinit(); + + while (true) { + const token = scanner.next() catch |err| switch (err) { + error.SyntaxError, error.UnexpectedEndOfInput => return false, + error.OutOfMemory => return error.OutOfMemory, + error.BufferUnderrun => unreachable, + }; + if (token == .end_of_document) break; + } + + return true; +} + +/// The parsing errors are divided into two categories: +/// * `SyntaxError` is for clearly malformed JSON documents, +/// such as giving an input document that isn't JSON at all. +/// * `UnexpectedEndOfInput` is for signaling that everything's been +/// valid so far, but the input appears to be truncated for some reason. +/// Note that a completely empty (or whitespace-only) input will give `UnexpectedEndOfInput`. +pub const Error = error{ SyntaxError, UnexpectedEndOfInput }; + +/// Calls `std.json.Reader` with `std.json.default_buffer_size`. +pub fn reader(allocator: Allocator, io_reader: anytype) Reader(default_buffer_size, @TypeOf(io_reader)) { + return Reader(default_buffer_size, @TypeOf(io_reader)).init(allocator, io_reader); +} +/// Used by `json.reader`. +pub const default_buffer_size = 0x1000; + +/// The tokens emitted by `std.json.Scanner` and `std.json.Reader` `.next*()` functions follow this grammar: +/// ``` +/// = .end_of_document +/// = +/// | +/// | +/// | +/// | +/// | .true +/// | .false +/// | .null +/// = .object_begin ( )* .object_end +/// = .array_begin ( )* .array_end +/// = +/// = +/// ``` +/// +/// What you get for `` and `` values depends on which `next*()` method you call: +/// +/// ``` +/// next(): +/// = ( .partial_number )* .number +/// = ( )* .string +/// = +/// | .partial_string +/// | .partial_string_escaped_1 +/// | .partial_string_escaped_2 +/// | .partial_string_escaped_3 +/// | .partial_string_escaped_4 +/// +/// nextAlloc*(..., .alloc_always): +/// = .allocated_number +/// = .allocated_string +/// +/// nextAlloc*(..., .alloc_if_needed): +/// = +/// | .number +/// | .allocated_number +/// = +/// | .string +/// | .allocated_string +/// ``` +/// +/// For all tokens with a `[]const u8`, `[]u8`, or `[n]u8` payload, the payload represents the content of the value. +/// For number values, this is the representation of the number exactly as it appears in the input. +/// For strings, this is the content of the string after resolving escape sequences. +/// +/// For `.allocated_number` and `.allocated_string`, the `[]u8` payloads are allocations made with the given allocator. +/// You are responsible for managing that memory. `json.Reader.deinit()` does *not* free those allocations. +/// +/// The `.partial_*` tokens indicate that a value spans multiple input buffers or that a string contains escape sequences. +/// To get a complete value in memory, you need to concatenate the values yourself. +/// Calling `nextAlloc*()` does this for you, and returns an `.allocated_*` token with the result. +/// +/// For tokens with a `[]const u8` payload, the payload is a slice into the current input buffer. +/// The memory may become undefined during the next call to `json.Scanner.feedInput()` +/// or any `json.Reader` method whose return error set includes `json.Error`. +/// To keep the value persistently, it recommended to make a copy or to use `.alloc_always`, +/// which makes a copy for you. +/// +/// Note that `.number` and `.string` tokens that follow `.partial_*` tokens may have `0` length to indicate that +/// the previously partial value is completed with no additional bytes. +/// (This can happen when the break between input buffers happens to land on the exact end of a value. E.g. `"[1234"`, `"]"`.) +/// `.partial_*` tokens never have `0` length. +/// +/// The recommended strategy for using the different `next*()` methods is something like this: +/// +/// When you're expecting an object key, use `.alloc_if_needed`. +/// You often don't need a copy of the key string to persist; you might just check which field it is. +/// In the case that the key happens to require an allocation, free it immediately after checking it. +/// +/// When you're expecting a meaningful string value (such as on the right of a `:`), +/// use `.alloc_always` in order to keep the value valid throughout parsing the rest of the document. +/// +/// When you're expecting a number value, use `.alloc_if_needed`. +/// You're probably going to be parsing the string representation of the number into a numeric representation, +/// so you need the complete string representation only temporarily. +/// +/// When you're skipping an unrecognized value, use `skipValue()`. +pub const Token = union(enum) { + object_begin, + object_end, + array_begin, + array_end, + + true, + false, + null, + + number: []const u8, + partial_number: []const u8, + allocated_number: []u8, + + string: []const u8, + partial_string: []const u8, + partial_string_escaped_1: [1]u8, + partial_string_escaped_2: [2]u8, + partial_string_escaped_3: [3]u8, + partial_string_escaped_4: [4]u8, + allocated_string: []u8, + + end_of_document, +}; + +/// This is only used in `peekNextTokenType()` and gives a categorization based on the first byte of the next token that will be emitted from a `next*()` call. +pub const TokenType = enum { + object_begin, + object_end, + array_begin, + array_end, + true, + false, + null, + number, + string, + end_of_document, +}; + +/// To enable diagnostics, declare `var diagnostics = Diagnostics{};` then call `source.enableDiagnostics(&diagnostics);` +/// where `source` is either a `std.json.Reader` or a `std.json.Scanner` that has just been initialized. +/// At any time, notably just after an error, call `getLine()`, `getColumn()`, and/or `getByteOffset()` +/// to get meaningful information from this. +pub const Diagnostics = struct { + line_number: u64 = 1, + line_start_cursor: usize = @bitCast(usize, @as(isize, -1)), // Start just "before" the input buffer to get a 1-based column for line 1. + total_bytes_before_current_input: u64 = 0, + cursor_pointer: *const usize = undefined, + + /// Starts at 1. + pub fn getLine(self: *const @This()) u64 { + return self.line_number; + } + /// Starts at 1. + pub fn getColumn(self: *const @This()) u64 { + return self.cursor_pointer.* -% self.line_start_cursor; + } + /// Starts at 0. Measures the byte offset since the start of the input. + pub fn getByteOffset(self: *const @This()) u64 { + return self.total_bytes_before_current_input + self.cursor_pointer.*; + } +}; + +/// See the documentation for `std.json.Token`. +pub const AllocWhen = enum { alloc_if_needed, alloc_always }; + +/// For security, the maximum size allocated to store a single string or number value is limited to 4MiB by default. +/// This limit can be specified by calling `nextAllocMax()` instead of `nextAlloc()`. +pub const default_max_value_len = 4 * 1024 * 1024; + +/// Connects a `std.io.Reader` to a `std.json.Scanner`. +/// All `next*()` methods here handle `error.BufferUnderrun` from `std.json.Scanner`, and then read from the reader. +pub fn Reader(comptime buffer_size: usize, comptime ReaderType: type) type { + return struct { + scanner: Scanner, + reader: ReaderType, + + buffer: [buffer_size]u8 = undefined, + + /// The allocator is only used to track `[]` and `{}` nesting levels. + pub fn init(allocator: Allocator, io_reader: ReaderType) @This() { + return .{ + .scanner = Scanner.initStreaming(allocator), + .reader = io_reader, + }; + } + pub fn deinit(self: *@This()) void { + self.scanner.deinit(); + self.* = undefined; + } + + /// Calls `std.json.Scanner.enableDiagnostics`. + pub fn enableDiagnostics(self: *@This(), diagnostics: *Diagnostics) void { + self.scanner.enableDiagnostics(diagnostics); + } + + pub const NextError = ReaderType.Error || Error || Allocator.Error; + pub const SkipError = NextError; + pub const AllocError = NextError || error{ValueTooLong}; + pub const PeekError = ReaderType.Error || Error; + + /// Equivalent to `nextAllocMax(allocator, when, default_max_value_len);` + /// See also `std.json.Token` for documentation of `nextAlloc*()` function behavior. + pub fn nextAlloc(self: *@This(), allocator: Allocator, when: AllocWhen) AllocError!Token { + return self.nextAllocMax(allocator, when, default_max_value_len); + } + /// See also `std.json.Token` for documentation of `nextAlloc*()` function behavior. + pub fn nextAllocMax(self: *@This(), allocator: Allocator, when: AllocWhen, max_value_len: usize) AllocError!Token { + const token_type = try self.peekNextTokenType(); + switch (token_type) { + .number, .string => { + var value_list = ArrayList(u8).init(allocator); + errdefer { + value_list.deinit(); + } + if (try self.allocNextIntoArrayListMax(&value_list, when, max_value_len)) |slice| { + return if (token_type == .number) + Token{ .number = slice } + else + Token{ .string = slice }; + } else { + return if (token_type == .number) + Token{ .allocated_number = try value_list.toOwnedSlice() } + else + Token{ .allocated_string = try value_list.toOwnedSlice() }; + } + }, + + // Simple tokens never alloc. + .object_begin, + .object_end, + .array_begin, + .array_end, + .true, + .false, + .null, + .end_of_document, + => return try self.next(), + } + } + + /// Equivalent to `allocNextIntoArrayListMax(value_list, when, default_max_value_len);` + pub fn allocNextIntoArrayList(self: *@This(), value_list: *ArrayList(u8), when: AllocWhen) AllocError!?[]const u8 { + return self.allocNextIntoArrayListMax(value_list, when, default_max_value_len); + } + /// Calls `std.json.Scanner.allocNextIntoArrayListMax` and handles `error.BufferUnderrun`. + pub fn allocNextIntoArrayListMax(self: *@This(), value_list: *ArrayList(u8), when: AllocWhen, max_value_len: usize) AllocError!?[]const u8 { + while (true) { + return self.scanner.allocNextIntoArrayListMax(value_list, when, max_value_len) catch |err| switch (err) { + error.BufferUnderrun => { + try self.refillBuffer(); + continue; + }, + else => |other_err| return other_err, + }; + } + } + + /// Like `std.json.Scanner.skipValue`, but handles `error.BufferUnderrun`. + pub fn skipValue(self: *@This()) SkipError!void { + switch (try self.peekNextTokenType()) { + .object_begin, .array_begin => { + try self.skipUntilStackHeight(self.stackHeight()); + }, + .number, .string => { + while (true) { + switch (try self.next()) { + .partial_number, + .partial_string, + .partial_string_escaped_1, + .partial_string_escaped_2, + .partial_string_escaped_3, + .partial_string_escaped_4, + => continue, + + .number, .string => break, + + else => unreachable, + } + } + }, + .true, .false, .null => { + _ = try self.next(); + }, + + .object_end, .array_end, .end_of_document => unreachable, // Attempt to skip a non-value token. + } + } + /// Like `std.json.Scanner.skipUntilStackHeight()` but handles `error.BufferUnderrun`. + pub fn skipUntilStackHeight(self: *@This(), terminal_stack_height: u32) NextError!void { + while (true) { + return self.scanner.skipUntilStackHeight(terminal_stack_height) catch |err| switch (err) { + error.BufferUnderrun => { + try self.refillBuffer(); + continue; + }, + else => |other_err| return other_err, + }; + } + } + + /// Calls `std.json.Scanner.stackHeight`. + pub fn stackHeight(self: *const @This()) u32 { + return self.scanner.stackHeight(); + } + /// Calls `std.json.Scanner.ensureTotalStackCapacity`. + pub fn ensureTotalStackCapacity(self: *@This(), height: u32) Allocator.Error!void { + try self.scanner.ensureTotalStackCapacity(height); + } + + /// See `std.json.Token` for documentation of this function. + pub fn next(self: *@This()) NextError!Token { + while (true) { + return self.scanner.next() catch |err| switch (err) { + error.BufferUnderrun => { + try self.refillBuffer(); + continue; + }, + else => |other_err| return other_err, + }; + } + } + + /// See `std.json.Scanner.peekNextTokenType()`. + pub fn peekNextTokenType(self: *@This()) PeekError!TokenType { + while (true) { + return self.scanner.peekNextTokenType() catch |err| switch (err) { + error.BufferUnderrun => { + try self.refillBuffer(); + continue; + }, + else => |other_err| return other_err, + }; + } + } + + fn refillBuffer(self: *@This()) ReaderType.Error!void { + const input = self.buffer[0..try self.reader.read(self.buffer[0..])]; + if (input.len > 0) { + self.scanner.feedInput(input); + } else { + self.scanner.endInput(); + } + } + }; +} + +/// The lowest level parsing API in this package; +/// supports streaming input with a low memory footprint. +/// The memory requirement is `O(d)` where d is the nesting depth of `[]` or `{}` containers in the input. +/// Specifically `d/8` bytes are required for this purpose, +/// with some extra buffer according to the implementation of `std.ArrayList`. +/// +/// This scanner can emit partial tokens; see `std.json.Token`. +/// The input to this class is a sequence of input buffers that you must supply one at a time. +/// Call `feedInput()` with the first buffer, then call `next()` repeatedly until `error.BufferUnderrun` is returned. +/// Then call `feedInput()` again and so forth. +/// Call `endInput()` when the last input buffer has been given to `feedInput()`, either immediately after calling `feedInput()`, +/// or when `error.BufferUnderrun` requests more data and there is no more. +/// Be sure to call `next()` after calling `endInput()` until `Token.end_of_document` has been returned. +pub const Scanner = struct { + state: State = .value, + string_is_object_key: bool = false, + stack: BitStack, + value_start: usize = undefined, + unicode_code_point: u21 = undefined, + + input: []const u8 = "", + cursor: usize = 0, + is_end_of_input: bool = false, + diagnostics: ?*Diagnostics = null, + + /// The allocator is only used to track `[]` and `{}` nesting levels. + pub fn initStreaming(allocator: Allocator) @This() { + return .{ + .stack = BitStack.init(allocator), + }; + } + /// Use this if your input is a single slice. + /// This is effectively equivalent to: + /// ``` + /// initStreaming(allocator); + /// feedInput(complete_input); + /// endInput(); + /// ``` + pub fn initCompleteInput(allocator: Allocator, complete_input: []const u8) @This() { + return .{ + .stack = BitStack.init(allocator), + .input = complete_input, + .is_end_of_input = true, + }; + } + pub fn deinit(self: *@This()) void { + self.stack.deinit(); + self.* = undefined; + } + + pub fn enableDiagnostics(self: *@This(), diagnostics: *Diagnostics) void { + diagnostics.cursor_pointer = &self.cursor; + self.diagnostics = diagnostics; + } + + /// Call this whenever you get `error.BufferUnderrun` from `next()`. + /// When there is no more input to provide, call `endInput()`. + pub fn feedInput(self: *@This(), input: []const u8) void { + assert(self.cursor == self.input.len); // Not done with the last input slice. + if (self.diagnostics) |diag| { + diag.total_bytes_before_current_input += self.input.len; + // This usually goes "negative" to measure how far before the beginning + // of the new buffer the current line started. + diag.line_start_cursor -%= self.cursor; + } + self.input = input; + self.cursor = 0; + self.value_start = 0; + } + /// Call this when you will no longer call `feedInput()` anymore. + /// This can be called either immediately after the last `feedInput()`, + /// or at any time afterward, such as when getting `error.BufferUnderrun` from `next()`. + /// Don't forget to call `next*()` after `endInput()` until you get `.end_of_document`. + pub fn endInput(self: *@This()) void { + self.is_end_of_input = true; + } + + pub const NextError = Error || Allocator.Error || error{BufferUnderrun}; + pub const AllocError = Error || Allocator.Error || error{ValueTooLong}; + pub const PeekError = Error || error{BufferUnderrun}; + pub const SkipError = Error || Allocator.Error; + pub const AllocIntoArrayListError = AllocError || error{BufferUnderrun}; + + /// Equivalent to `nextAllocMax(allocator, when, default_max_value_len);` + /// This function is only available after `endInput()` (or `initCompleteInput()`) has been called. + /// See also `std.json.Token` for documentation of `nextAlloc*()` function behavior. + pub fn nextAlloc(self: *@This(), allocator: Allocator, when: AllocWhen) AllocError!Token { + return self.nextAllocMax(allocator, when, default_max_value_len); + } + + /// This function is only available after `endInput()` (or `initCompleteInput()`) has been called. + /// See also `std.json.Token` for documentation of `nextAlloc*()` function behavior. + pub fn nextAllocMax(self: *@This(), allocator: Allocator, when: AllocWhen, max_value_len: usize) AllocError!Token { + assert(self.is_end_of_input); // This function is not available in streaming mode. + const token_type = self.peekNextTokenType() catch |e| switch (e) { + error.BufferUnderrun => unreachable, + else => |err| return err, + }; + switch (token_type) { + .number, .string => { + var value_list = ArrayList(u8).init(allocator); + errdefer { + value_list.deinit(); + } + if (self.allocNextIntoArrayListMax(&value_list, when, max_value_len) catch |e| switch (e) { + error.BufferUnderrun => unreachable, + else => |err| return err, + }) |slice| { + return if (token_type == .number) + Token{ .number = slice } + else + Token{ .string = slice }; + } else { + return if (token_type == .number) + Token{ .allocated_number = try value_list.toOwnedSlice() } + else + Token{ .allocated_string = try value_list.toOwnedSlice() }; + } + }, + + // Simple tokens never alloc. + .object_begin, + .object_end, + .array_begin, + .array_end, + .true, + .false, + .null, + .end_of_document, + => return self.next() catch |e| switch (e) { + error.BufferUnderrun => unreachable, + else => |err| return err, + }, + } + } + + /// Equivalent to `allocNextIntoArrayListMax(value_list, when, default_max_value_len);` + pub fn allocNextIntoArrayList(self: *@This(), value_list: *ArrayList(u8), when: AllocWhen) AllocIntoArrayListError!?[]const u8 { + return self.allocNextIntoArrayListMax(value_list, when, default_max_value_len); + } + /// The next token type must be either `.number` or `.string`. See `peekNextTokenType()`. + /// When allocation is not necessary with `.alloc_if_needed`, + /// this method returns the content slice from the input buffer, and `value_list` is not touched. + /// When allocation is necessary or with `.alloc_always`, this method concatenates partial tokens into the given `value_list`, + /// and returns `null` once the final `.number` or `.string` token has been written into it. + /// In case of an `error.BufferUnderrun`, partial values will be left in the given value_list. + /// The given `value_list` is never reset by this method, so an `error.BufferUnderrun` situation + /// can be resumed by passing the same array list in again. + /// This method does not indicate whether the token content being returned is for a `.number` or `.string` token type; + /// the caller of this method is expected to know which type of token is being processed. + pub fn allocNextIntoArrayListMax(self: *@This(), value_list: *ArrayList(u8), when: AllocWhen, max_value_len: usize) AllocIntoArrayListError!?[]const u8 { + while (true) { + const token = try self.next(); + switch (token) { + // Accumulate partial values. + .partial_number, .partial_string => |slice| { + try appendSlice(value_list, slice, max_value_len); + }, + .partial_string_escaped_1 => |buf| { + try appendSlice(value_list, buf[0..], max_value_len); + }, + .partial_string_escaped_2 => |buf| { + try appendSlice(value_list, buf[0..], max_value_len); + }, + .partial_string_escaped_3 => |buf| { + try appendSlice(value_list, buf[0..], max_value_len); + }, + .partial_string_escaped_4 => |buf| { + try appendSlice(value_list, buf[0..], max_value_len); + }, + + // Return complete values. + .number => |slice| { + if (when == .alloc_if_needed and value_list.items.len == 0) { + // No alloc necessary. + return slice; + } + try appendSlice(value_list, slice, max_value_len); + // The token is complete. + return null; + }, + .string => |slice| { + if (when == .alloc_if_needed and value_list.items.len == 0) { + // No alloc necessary. + return slice; + } + try appendSlice(value_list, slice, max_value_len); + // The token is complete. + return null; + }, + + .object_begin, + .object_end, + .array_begin, + .array_end, + .true, + .false, + .null, + .end_of_document, + => unreachable, // Only .number and .string token types are allowed here. Check peekNextTokenType() before calling this. + + .allocated_number, .allocated_string => unreachable, + } + } + } + + /// This function is only available after `endInput()` (or `initCompleteInput()`) has been called. + /// If the next token type is `.object_begin` or `.array_begin`, + /// this function calls `next()` repeatedly until the corresponding `.object_end` or `.array_end` is found. + /// If the next token type is `.number` or `.string`, + /// this function calls `next()` repeatedly until the (non `.partial_*`) `.number` or `.string` token is found. + /// If the next token type is `.true`, `.false`, or `.null`, this function calls `next()` once. + /// The next token type must not be `.object_end`, `.array_end`, or `.end_of_document`; + /// see `peekNextTokenType()`. + pub fn skipValue(self: *@This()) SkipError!void { + assert(self.is_end_of_input); // This function is not available in streaming mode. + switch (self.peekNextTokenType() catch |e| switch (e) { + error.BufferUnderrun => unreachable, + else => |err| return err, + }) { + .object_begin, .array_begin => { + self.skipUntilStackHeight(self.stackHeight()) catch |e| switch (e) { + error.BufferUnderrun => unreachable, + else => |err| return err, + }; + }, + .number, .string => { + while (true) { + switch (self.next() catch |e| switch (e) { + error.BufferUnderrun => unreachable, + else => |err| return err, + }) { + .partial_number, + .partial_string, + .partial_string_escaped_1, + .partial_string_escaped_2, + .partial_string_escaped_3, + .partial_string_escaped_4, + => continue, + + .number, .string => break, + + else => unreachable, + } + } + }, + .true, .false, .null => { + _ = self.next() catch |e| switch (e) { + error.BufferUnderrun => unreachable, + else => |err| return err, + }; + }, + + .object_end, .array_end, .end_of_document => unreachable, // Attempt to skip a non-value token. + } + } + + /// Skip tokens until an `.object_end` or `.array_end` token results in a `stackHeight()` equal the given stack height. + /// Unlike `skipValue()`, this function is available in streaming mode. + pub fn skipUntilStackHeight(self: *@This(), terminal_stack_height: u32) NextError!void { + while (true) { + switch (try self.next()) { + .object_end, .array_end => { + if (self.stackHeight() == terminal_stack_height) break; + }, + .end_of_document => unreachable, + else => continue, + } + } + } + + /// The depth of `{}` or `[]` nesting levels at the current position. + pub fn stackHeight(self: *const @This()) u32 { + return self.stack.bit_len; + } + + /// Pre allocate memory to hold the given number of nesting levels. + /// `stackHeight()` up to the given number will not cause allocations. + pub fn ensureTotalStackCapacity(self: *@This(), height: u32) Allocator.Error!void { + try self.stack.ensureTotalCapacity(height); + } + + /// See `std.json.Token` for documentation of this function. + pub fn next(self: *@This()) NextError!Token { + state_loop: while (true) { + switch (self.state) { + .value => { + switch (try self.skipWhitespaceExpectByte()) { + // Object, Array + '{' => { + try self.stack.push(OBJECT_MODE); + self.cursor += 1; + self.state = .object_start; + return .object_begin; + }, + '[' => { + try self.stack.push(ARRAY_MODE); + self.cursor += 1; + self.state = .array_start; + return .array_begin; + }, + + // String + '"' => { + self.cursor += 1; + self.value_start = self.cursor; + self.state = .string; + continue :state_loop; + }, + + // Number + '1'...'9' => { + self.value_start = self.cursor; + self.cursor += 1; + self.state = .number_int; + continue :state_loop; + }, + '0' => { + self.value_start = self.cursor; + self.cursor += 1; + self.state = .number_leading_zero; + continue :state_loop; + }, + '-' => { + self.value_start = self.cursor; + self.cursor += 1; + self.state = .number_minus; + continue :state_loop; + }, + + // literal values + 't' => { + self.cursor += 1; + self.state = .literal_t; + continue :state_loop; + }, + 'f' => { + self.cursor += 1; + self.state = .literal_f; + continue :state_loop; + }, + 'n' => { + self.cursor += 1; + self.state = .literal_n; + continue :state_loop; + }, + + else => return error.SyntaxError, + } + }, + + .post_value => { + if (try self.skipWhitespaceCheckEnd()) return .end_of_document; + + const c = self.input[self.cursor]; + if (self.string_is_object_key) { + self.string_is_object_key = false; + switch (c) { + ':' => { + self.cursor += 1; + self.state = .value; + continue :state_loop; + }, + else => return error.SyntaxError, + } + } + + switch (c) { + '}' => { + if (self.stack.pop() != OBJECT_MODE) return error.SyntaxError; + self.cursor += 1; + // stay in .post_value state. + return .object_end; + }, + ']' => { + if (self.stack.pop() != ARRAY_MODE) return error.SyntaxError; + self.cursor += 1; + // stay in .post_value state. + return .array_end; + }, + ',' => { + switch (self.stack.peek()) { + OBJECT_MODE => { + self.state = .object_post_comma; + }, + ARRAY_MODE => { + self.state = .value; + }, + } + self.cursor += 1; + continue :state_loop; + }, + else => return error.SyntaxError, + } + }, + + .object_start => { + switch (try self.skipWhitespaceExpectByte()) { + '"' => { + self.cursor += 1; + self.value_start = self.cursor; + self.state = .string; + self.string_is_object_key = true; + continue :state_loop; + }, + '}' => { + self.cursor += 1; + _ = self.stack.pop(); + self.state = .post_value; + return .object_end; + }, + else => return error.SyntaxError, + } + }, + .object_post_comma => { + switch (try self.skipWhitespaceExpectByte()) { + '"' => { + self.cursor += 1; + self.value_start = self.cursor; + self.state = .string; + self.string_is_object_key = true; + continue :state_loop; + }, + else => return error.SyntaxError, + } + }, + + .array_start => { + switch (try self.skipWhitespaceExpectByte()) { + ']' => { + self.cursor += 1; + _ = self.stack.pop(); + self.state = .post_value; + return .array_end; + }, + else => { + self.state = .value; + continue :state_loop; + }, + } + }, + + .number_minus => { + if (self.cursor >= self.input.len) return self.endOfBufferInNumber(false); + switch (self.input[self.cursor]) { + '0' => { + self.cursor += 1; + self.state = .number_leading_zero; + continue :state_loop; + }, + '1'...'9' => { + self.cursor += 1; + self.state = .number_int; + continue :state_loop; + }, + else => return error.SyntaxError, + } + }, + .number_leading_zero => { + if (self.cursor >= self.input.len) return self.endOfBufferInNumber(true); + switch (self.input[self.cursor]) { + '.' => { + self.cursor += 1; + self.state = .number_post_dot; + continue :state_loop; + }, + 'e', 'E' => { + self.cursor += 1; + self.state = .number_post_e; + continue :state_loop; + }, + else => { + self.state = .post_value; + return Token{ .number = self.takeValueSlice() }; + }, + } + }, + .number_int => { + while (self.cursor < self.input.len) : (self.cursor += 1) { + switch (self.input[self.cursor]) { + '0'...'9' => continue, + '.' => { + self.cursor += 1; + self.state = .number_post_dot; + continue :state_loop; + }, + 'e', 'E' => { + self.cursor += 1; + self.state = .number_post_e; + continue :state_loop; + }, + else => { + self.state = .post_value; + return Token{ .number = self.takeValueSlice() }; + }, + } + } + return self.endOfBufferInNumber(true); + }, + .number_post_dot => { + if (self.cursor >= self.input.len) return self.endOfBufferInNumber(false); + switch (try self.expectByte()) { + '0'...'9' => { + self.cursor += 1; + self.state = .number_frac; + continue :state_loop; + }, + else => return error.SyntaxError, + } + }, + .number_frac => { + while (self.cursor < self.input.len) : (self.cursor += 1) { + switch (self.input[self.cursor]) { + '0'...'9' => continue, + 'e', 'E' => { + self.cursor += 1; + self.state = .number_post_e; + continue :state_loop; + }, + else => { + self.state = .post_value; + return Token{ .number = self.takeValueSlice() }; + }, + } + } + return self.endOfBufferInNumber(true); + }, + .number_post_e => { + if (self.cursor >= self.input.len) return self.endOfBufferInNumber(false); + switch (self.input[self.cursor]) { + '0'...'9' => { + self.cursor += 1; + self.state = .number_exp; + continue :state_loop; + }, + '+', '-' => { + self.cursor += 1; + self.state = .number_post_e_sign; + continue :state_loop; + }, + else => return error.SyntaxError, + } + }, + .number_post_e_sign => { + if (self.cursor >= self.input.len) return self.endOfBufferInNumber(false); + switch (self.input[self.cursor]) { + '0'...'9' => { + self.cursor += 1; + self.state = .number_exp; + continue :state_loop; + }, + else => return error.SyntaxError, + } + }, + .number_exp => { + while (self.cursor < self.input.len) : (self.cursor += 1) { + switch (self.input[self.cursor]) { + '0'...'9' => continue, + else => { + self.state = .post_value; + return Token{ .number = self.takeValueSlice() }; + }, + } + } + return self.endOfBufferInNumber(true); + }, + + .string => { + while (self.cursor < self.input.len) : (self.cursor += 1) { + switch (self.input[self.cursor]) { + 0...0x1f => return error.SyntaxError, // Bare ASCII control code in string. + + // ASCII plain text. + 0x20...('"' - 1), ('"' + 1)...('\\' - 1), ('\\' + 1)...0x7F => continue, + + // Special characters. + '"' => { + const result = Token{ .string = self.takeValueSlice() }; + self.cursor += 1; + self.state = .post_value; + return result; + }, + '\\' => { + const slice = self.takeValueSlice(); + self.cursor += 1; + self.state = .string_backslash; + if (slice.len > 0) return Token{ .partial_string = slice }; + continue :state_loop; + }, + + // UTF-8 validation. + // See http://unicode.org/mail-arch/unicode-ml/y2003-m02/att-0467/01-The_Algorithm_to_Valide_an_UTF-8_String + 0xC2...0xDF => { + self.cursor += 1; + self.state = .string_utf8_last_byte; + continue :state_loop; + }, + 0xE0 => { + self.cursor += 1; + self.state = .string_utf8_second_to_last_byte_guard_against_overlong; + continue :state_loop; + }, + 0xE1...0xEC, 0xEE...0xEF => { + self.cursor += 1; + self.state = .string_utf8_second_to_last_byte; + continue :state_loop; + }, + 0xED => { + self.cursor += 1; + self.state = .string_utf8_second_to_last_byte_guard_against_surrogate_half; + continue :state_loop; + }, + 0xF0 => { + self.cursor += 1; + self.state = .string_utf8_third_to_last_byte_guard_against_overlong; + continue :state_loop; + }, + 0xF1...0xF3 => { + self.cursor += 1; + self.state = .string_utf8_third_to_last_byte; + continue :state_loop; + }, + 0xF4 => { + self.cursor += 1; + self.state = .string_utf8_third_to_last_byte_guard_against_too_large; + continue :state_loop; + }, + 0x80...0xC1, 0xF5...0xFF => return error.SyntaxError, // Invalid UTF-8. + } + } + if (self.is_end_of_input) return error.UnexpectedEndOfInput; + const slice = self.takeValueSlice(); + if (slice.len > 0) return Token{ .partial_string = slice }; + return error.BufferUnderrun; + }, + .string_backslash => { + switch (try self.expectByte()) { + '"', '\\', '/' => { + // Since these characters now represent themselves literally, + // we can simply begin the next plaintext slice here. + self.value_start = self.cursor; + self.cursor += 1; + self.state = .string; + continue :state_loop; + }, + 'b' => { + self.cursor += 1; + self.value_start = self.cursor; + self.state = .string; + return Token{ .partial_string_escaped_1 = [_]u8{0x08} }; + }, + 'f' => { + self.cursor += 1; + self.value_start = self.cursor; + self.state = .string; + return Token{ .partial_string_escaped_1 = [_]u8{0x0c} }; + }, + 'n' => { + self.cursor += 1; + self.value_start = self.cursor; + self.state = .string; + return Token{ .partial_string_escaped_1 = [_]u8{'\n'} }; + }, + 'r' => { + self.cursor += 1; + self.value_start = self.cursor; + self.state = .string; + return Token{ .partial_string_escaped_1 = [_]u8{'\r'} }; + }, + 't' => { + self.cursor += 1; + self.value_start = self.cursor; + self.state = .string; + return Token{ .partial_string_escaped_1 = [_]u8{'\t'} }; + }, + 'u' => { + self.cursor += 1; + self.state = .string_backslash_u; + continue :state_loop; + }, + else => return error.SyntaxError, + } + }, + .string_backslash_u => { + const c = try self.expectByte(); + switch (c) { + '0'...'9' => { + self.unicode_code_point = @as(u21, c - '0') << 12; + }, + 'A'...'F' => { + self.unicode_code_point = @as(u21, c - 'A' + 10) << 12; + }, + 'a'...'f' => { + self.unicode_code_point = @as(u21, c - 'a' + 10) << 12; + }, + else => return error.SyntaxError, + } + self.cursor += 1; + self.state = .string_backslash_u_1; + continue :state_loop; + }, + .string_backslash_u_1 => { + const c = try self.expectByte(); + switch (c) { + '0'...'9' => { + self.unicode_code_point |= @as(u21, c - '0') << 8; + }, + 'A'...'F' => { + self.unicode_code_point |= @as(u21, c - 'A' + 10) << 8; + }, + 'a'...'f' => { + self.unicode_code_point |= @as(u21, c - 'a' + 10) << 8; + }, + else => return error.SyntaxError, + } + self.cursor += 1; + self.state = .string_backslash_u_2; + continue :state_loop; + }, + .string_backslash_u_2 => { + const c = try self.expectByte(); + switch (c) { + '0'...'9' => { + self.unicode_code_point |= @as(u21, c - '0') << 4; + }, + 'A'...'F' => { + self.unicode_code_point |= @as(u21, c - 'A' + 10) << 4; + }, + 'a'...'f' => { + self.unicode_code_point |= @as(u21, c - 'a' + 10) << 4; + }, + else => return error.SyntaxError, + } + self.cursor += 1; + self.state = .string_backslash_u_3; + continue :state_loop; + }, + .string_backslash_u_3 => { + const c = try self.expectByte(); + switch (c) { + '0'...'9' => { + self.unicode_code_point |= c - '0'; + }, + 'A'...'F' => { + self.unicode_code_point |= c - 'A' + 10; + }, + 'a'...'f' => { + self.unicode_code_point |= c - 'a' + 10; + }, + else => return error.SyntaxError, + } + self.cursor += 1; + switch (self.unicode_code_point) { + 0xD800...0xDBFF => { + // High surrogate half. + self.unicode_code_point = 0x10000 | (self.unicode_code_point << 10); + self.state = .string_surrogate_half; + continue :state_loop; + }, + 0xDC00...0xDFFF => return error.SyntaxError, // Unexpected low surrogate half. + else => { + // Code point from a single UTF-16 code unit. + self.value_start = self.cursor; + self.state = .string; + return self.partialStringCodepoint(); + }, + } + }, + .string_surrogate_half => { + switch (try self.expectByte()) { + '\\' => { + self.cursor += 1; + self.state = .string_surrogate_half_backslash; + continue :state_loop; + }, + else => return error.SyntaxError, // Expected low surrogate half. + } + }, + .string_surrogate_half_backslash => { + switch (try self.expectByte()) { + 'u' => { + self.cursor += 1; + self.state = .string_surrogate_half_backslash_u; + continue :state_loop; + }, + else => return error.SyntaxError, // Expected low surrogate half. + } + }, + .string_surrogate_half_backslash_u => { + switch (try self.expectByte()) { + 'D', 'd' => { + self.cursor += 1; + self.state = .string_surrogate_half_backslash_u_1; + continue :state_loop; + }, + else => return error.SyntaxError, // Expected low surrogate half. + } + }, + .string_surrogate_half_backslash_u_1 => { + const c = try self.expectByte(); + switch (c) { + 'C'...'F' => { + self.cursor += 1; + self.unicode_code_point |= @as(u21, c - 'C') << 8; + self.state = .string_surrogate_half_backslash_u_2; + continue :state_loop; + }, + 'c'...'f' => { + self.cursor += 1; + self.unicode_code_point |= @as(u21, c - 'c') << 8; + self.state = .string_surrogate_half_backslash_u_2; + continue :state_loop; + }, + else => return error.SyntaxError, // Expected low surrogate half. + } + }, + .string_surrogate_half_backslash_u_2 => { + const c = try self.expectByte(); + switch (c) { + '0'...'9' => { + self.cursor += 1; + self.unicode_code_point |= @as(u21, c - '0') << 4; + self.state = .string_surrogate_half_backslash_u_3; + continue :state_loop; + }, + 'A'...'F' => { + self.cursor += 1; + self.unicode_code_point |= @as(u21, c - 'A' + 10) << 4; + self.state = .string_surrogate_half_backslash_u_3; + continue :state_loop; + }, + 'a'...'f' => { + self.cursor += 1; + self.unicode_code_point |= @as(u21, c - 'a' + 10) << 4; + self.state = .string_surrogate_half_backslash_u_3; + continue :state_loop; + }, + else => return error.SyntaxError, + } + }, + .string_surrogate_half_backslash_u_3 => { + const c = try self.expectByte(); + switch (c) { + '0'...'9' => { + self.unicode_code_point |= c - '0'; + }, + 'A'...'F' => { + self.unicode_code_point |= c - 'A' + 10; + }, + 'a'...'f' => { + self.unicode_code_point |= c - 'a' + 10; + }, + else => return error.SyntaxError, + } + self.cursor += 1; + self.value_start = self.cursor; + self.state = .string; + return self.partialStringCodepoint(); + }, + + .string_utf8_last_byte => { + switch (try self.expectByte()) { + 0x80...0xBF => { + self.cursor += 1; + self.state = .string; + continue :state_loop; + }, + else => return error.SyntaxError, // Invalid UTF-8. + } + }, + .string_utf8_second_to_last_byte => { + switch (try self.expectByte()) { + 0x80...0xBF => { + self.cursor += 1; + self.state = .string_utf8_last_byte; + continue :state_loop; + }, + else => return error.SyntaxError, // Invalid UTF-8. + } + }, + .string_utf8_second_to_last_byte_guard_against_overlong => { + switch (try self.expectByte()) { + 0xA0...0xBF => { + self.cursor += 1; + self.state = .string_utf8_last_byte; + continue :state_loop; + }, + else => return error.SyntaxError, // Invalid UTF-8. + } + }, + .string_utf8_second_to_last_byte_guard_against_surrogate_half => { + switch (try self.expectByte()) { + 0x80...0x9F => { + self.cursor += 1; + self.state = .string_utf8_last_byte; + continue :state_loop; + }, + else => return error.SyntaxError, // Invalid UTF-8. + } + }, + .string_utf8_third_to_last_byte => { + switch (try self.expectByte()) { + 0x80...0xBF => { + self.cursor += 1; + self.state = .string_utf8_second_to_last_byte; + continue :state_loop; + }, + else => return error.SyntaxError, // Invalid UTF-8. + } + }, + .string_utf8_third_to_last_byte_guard_against_overlong => { + switch (try self.expectByte()) { + 0x90...0xBF => { + self.cursor += 1; + self.state = .string_utf8_second_to_last_byte; + continue :state_loop; + }, + else => return error.SyntaxError, // Invalid UTF-8. + } + }, + .string_utf8_third_to_last_byte_guard_against_too_large => { + switch (try self.expectByte()) { + 0x80...0x8F => { + self.cursor += 1; + self.state = .string_utf8_second_to_last_byte; + continue :state_loop; + }, + else => return error.SyntaxError, // Invalid UTF-8. + } + }, + + .literal_t => { + switch (try self.expectByte()) { + 'r' => { + self.cursor += 1; + self.state = .literal_tr; + continue :state_loop; + }, + else => return error.SyntaxError, + } + }, + .literal_tr => { + switch (try self.expectByte()) { + 'u' => { + self.cursor += 1; + self.state = .literal_tru; + continue :state_loop; + }, + else => return error.SyntaxError, + } + }, + .literal_tru => { + switch (try self.expectByte()) { + 'e' => { + self.cursor += 1; + self.state = .post_value; + return .true; + }, + else => return error.SyntaxError, + } + }, + .literal_f => { + switch (try self.expectByte()) { + 'a' => { + self.cursor += 1; + self.state = .literal_fa; + continue :state_loop; + }, + else => return error.SyntaxError, + } + }, + .literal_fa => { + switch (try self.expectByte()) { + 'l' => { + self.cursor += 1; + self.state = .literal_fal; + continue :state_loop; + }, + else => return error.SyntaxError, + } + }, + .literal_fal => { + switch (try self.expectByte()) { + 's' => { + self.cursor += 1; + self.state = .literal_fals; + continue :state_loop; + }, + else => return error.SyntaxError, + } + }, + .literal_fals => { + switch (try self.expectByte()) { + 'e' => { + self.cursor += 1; + self.state = .post_value; + return .false; + }, + else => return error.SyntaxError, + } + }, + .literal_n => { + switch (try self.expectByte()) { + 'u' => { + self.cursor += 1; + self.state = .literal_nu; + continue :state_loop; + }, + else => return error.SyntaxError, + } + }, + .literal_nu => { + switch (try self.expectByte()) { + 'l' => { + self.cursor += 1; + self.state = .literal_nul; + continue :state_loop; + }, + else => return error.SyntaxError, + } + }, + .literal_nul => { + switch (try self.expectByte()) { + 'l' => { + self.cursor += 1; + self.state = .post_value; + return .null; + }, + else => return error.SyntaxError, + } + }, + } + unreachable; + } + } + + /// Seeks ahead in the input until the first byte of the next token (or the end of the input) + /// determines which type of token will be returned from the next `next*()` call. + /// This function is idempotent, only advancing past commas, colons, and inter-token whitespace. + pub fn peekNextTokenType(self: *@This()) PeekError!TokenType { + state_loop: while (true) { + switch (self.state) { + .value => { + switch (try self.skipWhitespaceExpectByte()) { + '{' => return .object_begin, + '[' => return .array_begin, + '"' => return .string, + '-', '0'...'9' => return .number, + 't' => return .true, + 'f' => return .false, + 'n' => return .null, + else => return error.SyntaxError, + } + }, + + .post_value => { + if (try self.skipWhitespaceCheckEnd()) return .end_of_document; + + const c = self.input[self.cursor]; + if (self.string_is_object_key) { + self.string_is_object_key = false; + switch (c) { + ':' => { + self.cursor += 1; + self.state = .value; + continue :state_loop; + }, + else => return error.SyntaxError, + } + } + + switch (c) { + '}' => return .object_end, + ']' => return .array_end, + ',' => { + switch (self.stack.peek()) { + OBJECT_MODE => { + self.state = .object_post_comma; + }, + ARRAY_MODE => { + self.state = .value; + }, + } + self.cursor += 1; + continue :state_loop; + }, + else => return error.SyntaxError, + } + }, + + .object_start => { + switch (try self.skipWhitespaceExpectByte()) { + '"' => return .string, + '}' => return .object_end, + else => return error.SyntaxError, + } + }, + .object_post_comma => { + switch (try self.skipWhitespaceExpectByte()) { + '"' => return .string, + else => return error.SyntaxError, + } + }, + + .array_start => { + switch (try self.skipWhitespaceExpectByte()) { + ']' => return .array_end, + else => { + self.state = .value; + continue :state_loop; + }, + } + }, + + .number_minus, + .number_leading_zero, + .number_int, + .number_post_dot, + .number_frac, + .number_post_e, + .number_post_e_sign, + .number_exp, + => return .number, + + .string, + .string_backslash, + .string_backslash_u, + .string_backslash_u_1, + .string_backslash_u_2, + .string_backslash_u_3, + .string_surrogate_half, + .string_surrogate_half_backslash, + .string_surrogate_half_backslash_u, + .string_surrogate_half_backslash_u_1, + .string_surrogate_half_backslash_u_2, + .string_surrogate_half_backslash_u_3, + => return .string, + + .string_utf8_last_byte, + .string_utf8_second_to_last_byte, + .string_utf8_second_to_last_byte_guard_against_overlong, + .string_utf8_second_to_last_byte_guard_against_surrogate_half, + .string_utf8_third_to_last_byte, + .string_utf8_third_to_last_byte_guard_against_overlong, + .string_utf8_third_to_last_byte_guard_against_too_large, + => return .string, + + .literal_t, + .literal_tr, + .literal_tru, + => return .true, + .literal_f, + .literal_fa, + .literal_fal, + .literal_fals, + => return .false, + .literal_n, + .literal_nu, + .literal_nul, + => return .null, + } + unreachable; + } + } + + const State = enum { + value, + post_value, + + object_start, + object_post_comma, + + array_start, + + number_minus, + number_leading_zero, + number_int, + number_post_dot, + number_frac, + number_post_e, + number_post_e_sign, + number_exp, + + string, + string_backslash, + string_backslash_u, + string_backslash_u_1, + string_backslash_u_2, + string_backslash_u_3, + string_surrogate_half, + string_surrogate_half_backslash, + string_surrogate_half_backslash_u, + string_surrogate_half_backslash_u_1, + string_surrogate_half_backslash_u_2, + string_surrogate_half_backslash_u_3, + + // From http://unicode.org/mail-arch/unicode-ml/y2003-m02/att-0467/01-The_Algorithm_to_Valide_an_UTF-8_String + string_utf8_last_byte, // State A + string_utf8_second_to_last_byte, // State B + string_utf8_second_to_last_byte_guard_against_overlong, // State C + string_utf8_second_to_last_byte_guard_against_surrogate_half, // State D + string_utf8_third_to_last_byte, // State E + string_utf8_third_to_last_byte_guard_against_overlong, // State F + string_utf8_third_to_last_byte_guard_against_too_large, // State G + + literal_t, + literal_tr, + literal_tru, + literal_f, + literal_fa, + literal_fal, + literal_fals, + literal_n, + literal_nu, + literal_nul, + }; + + fn expectByte(self: *const @This()) !u8 { + if (self.cursor < self.input.len) { + return self.input[self.cursor]; + } + // No byte. + if (self.is_end_of_input) return error.UnexpectedEndOfInput; + return error.BufferUnderrun; + } + + fn skipWhitespace(self: *@This()) void { + while (self.cursor < self.input.len) : (self.cursor += 1) { + switch (self.input[self.cursor]) { + // Whitespace + ' ', '\t', '\r' => continue, + '\n' => { + if (self.diagnostics) |diag| { + diag.line_number += 1; + // This will count the newline itself, + // which means a straight-forward subtraction will give a 1-based column number. + diag.line_start_cursor = self.cursor; + } + continue; + }, + else => return, + } + } + } + + fn skipWhitespaceExpectByte(self: *@This()) !u8 { + self.skipWhitespace(); + return self.expectByte(); + } + + fn skipWhitespaceCheckEnd(self: *@This()) !bool { + self.skipWhitespace(); + if (self.cursor >= self.input.len) { + // End of buffer. + if (self.is_end_of_input) { + // End of everything. + if (self.stackHeight() == 0) { + // We did it! + return true; + } + return error.UnexpectedEndOfInput; + } + return error.BufferUnderrun; + } + if (self.stackHeight() == 0) return error.SyntaxError; + return false; + } + + fn takeValueSlice(self: *@This()) []const u8 { + const slice = self.input[self.value_start..self.cursor]; + self.value_start = self.cursor; + return slice; + } + + fn endOfBufferInNumber(self: *@This(), allow_end: bool) !Token { + const slice = self.takeValueSlice(); + if (self.is_end_of_input) { + if (!allow_end) return error.UnexpectedEndOfInput; + self.state = .post_value; + return Token{ .number = slice }; + } + if (slice.len == 0) return error.BufferUnderrun; + return Token{ .partial_number = slice }; + } + + fn partialStringCodepoint(self: *@This()) Token { + const code_point = self.unicode_code_point; + self.unicode_code_point = undefined; + var buf: [4]u8 = undefined; + switch (std.unicode.utf8Encode(code_point, &buf) catch unreachable) { + 1 => return Token{ .partial_string_escaped_1 = buf[0..1].* }, + 2 => return Token{ .partial_string_escaped_2 = buf[0..2].* }, + 3 => return Token{ .partial_string_escaped_3 = buf[0..3].* }, + 4 => return Token{ .partial_string_escaped_4 = buf[0..4].* }, + else => unreachable, + } + } +}; + +const OBJECT_MODE = 0; +const ARRAY_MODE = 1; + +const BitStack = struct { + bytes: std.ArrayList(u8), + bit_len: u32 = 0, + + pub fn init(allocator: Allocator) @This() { + return .{ + .bytes = std.ArrayList(u8).init(allocator), + }; + } + + pub fn deinit(self: *@This()) void { + self.bytes.deinit(); + self.* = undefined; + } + + pub fn ensureTotalCapacity(self: *@This(), bit_capcity: u32) Allocator.Error!void { + const byte_capacity = (bit_capcity + 7) >> 3; + try self.bytes.ensureTotalCapacity(byte_capacity); + } + + pub fn push(self: *@This(), b: u1) Allocator.Error!void { + const byte_index = self.bit_len >> 3; + const bit_index = @intCast(u3, self.bit_len & 7); + + if (self.bytes.items.len <= byte_index) { + try self.bytes.append(0); + } + + self.bytes.items[byte_index] &= ~(@as(u8, 1) << bit_index); + self.bytes.items[byte_index] |= @as(u8, b) << bit_index; + + self.bit_len += 1; + } + + pub fn peek(self: *const @This()) u1 { + const byte_index = (self.bit_len - 1) >> 3; + const bit_index = @intCast(u3, (self.bit_len - 1) & 7); + return @intCast(u1, (self.bytes.items[byte_index] >> bit_index) & 1); + } + + pub fn pop(self: *@This()) u1 { + const b = self.peek(); + self.bit_len -= 1; + return b; + } +}; + +fn appendSlice(list: *std.ArrayList(u8), buf: []const u8, max_value_len: usize) !void { + const new_len = std.math.add(usize, list.items.len, buf.len) catch return error.ValueTooLong; + if (new_len > max_value_len) return error.ValueTooLong; + try list.appendSlice(buf); +} + +/// For the slice you get from a `Token.number` or `Token.allocated_number`, +/// this function returns true if the number doesn't contain any fraction or exponent components. +/// Note, the numeric value encoded by the value may still be an integer, such as `1.0`. +/// This function is meant to give a hint about whether integer parsing or float parsing should be used on the value. +/// This function will not give meaningful results on non-numeric input. +pub fn isNumberFormattedLikeAnInteger(value: []const u8) bool { + return std.mem.indexOfAny(u8, value, ".eE") == null; +} + +test { + _ = @import("./scanner_test.zig"); +} diff --git a/lib/std/json/scanner_test.zig b/lib/std/json/scanner_test.zig new file mode 100644 index 0000000000..3e06d4ca13 --- /dev/null +++ b/lib/std/json/scanner_test.zig @@ -0,0 +1,466 @@ +const std = @import("std"); +const JsonScanner = @import("./scanner.zig").Scanner; +const jsonReader = @import("./scanner.zig").reader; +const JsonReader = @import("./scanner.zig").Reader; +const Token = @import("./scanner.zig").Token; +const TokenType = @import("./scanner.zig").TokenType; +const Diagnostics = @import("./scanner.zig").Diagnostics; +const Error = @import("./scanner.zig").Error; +const validate = @import("./scanner.zig").validate; + +const example_document_str = + \\{ + \\ "Image": { + \\ "Width": 800, + \\ "Height": 600, + \\ "Title": "View from 15th Floor", + \\ "Thumbnail": { + \\ "Url": "http://www.example.com/image/481989943", + \\ "Height": 125, + \\ "Width": 100 + \\ }, + \\ "Animated" : false, + \\ "IDs": [116, 943, 234, 38793] + \\ } + \\} +; + +fn expectNext(scanner_or_reader: anytype, expected_token: Token) !void { + return expectEqualTokens(expected_token, try scanner_or_reader.next()); +} + +fn expectPeekNext(scanner_or_reader: anytype, expected_token_type: TokenType, expected_token: Token) !void { + try std.testing.expectEqual(expected_token_type, try scanner_or_reader.peekNextTokenType()); + try expectEqualTokens(expected_token, try scanner_or_reader.next()); +} + +test "json.token" { + var scanner = JsonScanner.initCompleteInput(std.testing.allocator, example_document_str); + defer scanner.deinit(); + + try expectNext(&scanner, .object_begin); + try expectNext(&scanner, Token{ .string = "Image" }); + try expectNext(&scanner, .object_begin); + try expectNext(&scanner, Token{ .string = "Width" }); + try expectNext(&scanner, Token{ .number = "800" }); + try expectNext(&scanner, Token{ .string = "Height" }); + try expectNext(&scanner, Token{ .number = "600" }); + try expectNext(&scanner, Token{ .string = "Title" }); + try expectNext(&scanner, Token{ .string = "View from 15th Floor" }); + try expectNext(&scanner, Token{ .string = "Thumbnail" }); + try expectNext(&scanner, .object_begin); + try expectNext(&scanner, Token{ .string = "Url" }); + try expectNext(&scanner, Token{ .string = "http://www.example.com/image/481989943" }); + try expectNext(&scanner, Token{ .string = "Height" }); + try expectNext(&scanner, Token{ .number = "125" }); + try expectNext(&scanner, Token{ .string = "Width" }); + try expectNext(&scanner, Token{ .number = "100" }); + try expectNext(&scanner, .object_end); + try expectNext(&scanner, Token{ .string = "Animated" }); + try expectNext(&scanner, .false); + try expectNext(&scanner, Token{ .string = "IDs" }); + try expectNext(&scanner, .array_begin); + try expectNext(&scanner, Token{ .number = "116" }); + try expectNext(&scanner, Token{ .number = "943" }); + try expectNext(&scanner, Token{ .number = "234" }); + try expectNext(&scanner, Token{ .number = "38793" }); + try expectNext(&scanner, .array_end); + try expectNext(&scanner, .object_end); + try expectNext(&scanner, .object_end); + try expectNext(&scanner, .end_of_document); +} + +const all_types_test_case = + \\[ + \\ "", "a\nb", + \\ 0, 0.0, -1.1e-1, + \\ true, false, null, + \\ {"a": {}}, + \\ [] + \\] +; + +fn testAllTypes(source: anytype, large_buffer: bool) !void { + try expectPeekNext(source, .array_begin, .array_begin); + try expectPeekNext(source, .string, Token{ .string = "" }); + try expectPeekNext(source, .string, Token{ .partial_string = "a" }); + try expectPeekNext(source, .string, Token{ .partial_string_escaped_1 = "\n".* }); + if (large_buffer) { + try expectPeekNext(source, .string, Token{ .string = "b" }); + } else { + try expectPeekNext(source, .string, Token{ .partial_string = "b" }); + try expectPeekNext(source, .string, Token{ .string = "" }); + } + if (large_buffer) { + try expectPeekNext(source, .number, Token{ .number = "0" }); + } else { + try expectPeekNext(source, .number, Token{ .partial_number = "0" }); + try expectPeekNext(source, .number, Token{ .number = "" }); + } + if (large_buffer) { + try expectPeekNext(source, .number, Token{ .number = "0.0" }); + } else { + try expectPeekNext(source, .number, Token{ .partial_number = "0" }); + try expectPeekNext(source, .number, Token{ .partial_number = "." }); + try expectPeekNext(source, .number, Token{ .partial_number = "0" }); + try expectPeekNext(source, .number, Token{ .number = "" }); + } + if (large_buffer) { + try expectPeekNext(source, .number, Token{ .number = "-1.1e-1" }); + } else { + try expectPeekNext(source, .number, Token{ .partial_number = "-" }); + try expectPeekNext(source, .number, Token{ .partial_number = "1" }); + try expectPeekNext(source, .number, Token{ .partial_number = "." }); + try expectPeekNext(source, .number, Token{ .partial_number = "1" }); + try expectPeekNext(source, .number, Token{ .partial_number = "e" }); + try expectPeekNext(source, .number, Token{ .partial_number = "-" }); + try expectPeekNext(source, .number, Token{ .partial_number = "1" }); + try expectPeekNext(source, .number, Token{ .number = "" }); + } + try expectPeekNext(source, .true, .true); + try expectPeekNext(source, .false, .false); + try expectPeekNext(source, .null, .null); + try expectPeekNext(source, .object_begin, .object_begin); + if (large_buffer) { + try expectPeekNext(source, .string, Token{ .string = "a" }); + } else { + try expectPeekNext(source, .string, Token{ .partial_string = "a" }); + try expectPeekNext(source, .string, Token{ .string = "" }); + } + try expectPeekNext(source, .object_begin, .object_begin); + try expectPeekNext(source, .object_end, .object_end); + try expectPeekNext(source, .object_end, .object_end); + try expectPeekNext(source, .array_begin, .array_begin); + try expectPeekNext(source, .array_end, .array_end); + try expectPeekNext(source, .array_end, .array_end); + try expectPeekNext(source, .end_of_document, .end_of_document); +} + +test "peek all types" { + var scanner = JsonScanner.initCompleteInput(std.testing.allocator, all_types_test_case); + defer scanner.deinit(); + try testAllTypes(&scanner, true); + + var stream = std.io.fixedBufferStream(all_types_test_case); + var json_reader = jsonReader(std.testing.allocator, stream.reader()); + defer json_reader.deinit(); + try testAllTypes(&json_reader, true); + + var tiny_stream = std.io.fixedBufferStream(all_types_test_case); + var tiny_json_reader = JsonReader(1, @TypeOf(tiny_stream.reader())).init(std.testing.allocator, tiny_stream.reader()); + defer tiny_json_reader.deinit(); + try testAllTypes(&tiny_json_reader, false); +} + +test "json.token mismatched close" { + var scanner = JsonScanner.initCompleteInput(std.testing.allocator, "[102, 111, 111 }"); + defer scanner.deinit(); + try expectNext(&scanner, .array_begin); + try expectNext(&scanner, Token{ .number = "102" }); + try expectNext(&scanner, Token{ .number = "111" }); + try expectNext(&scanner, Token{ .number = "111" }); + try std.testing.expectError(error.SyntaxError, scanner.next()); +} + +test "json.token premature object close" { + var scanner = JsonScanner.initCompleteInput(std.testing.allocator, "{ \"key\": }"); + defer scanner.deinit(); + try expectNext(&scanner, .object_begin); + try expectNext(&scanner, Token{ .string = "key" }); + try std.testing.expectError(error.SyntaxError, scanner.next()); +} + +test "JsonScanner basic" { + var scanner = JsonScanner.initCompleteInput(std.testing.allocator, example_document_str); + defer scanner.deinit(); + + while (true) { + const token = try scanner.next(); + if (token == .end_of_document) break; + } +} + +test "JsonReader basic" { + var stream = std.io.fixedBufferStream(example_document_str); + + var json_reader = jsonReader(std.testing.allocator, stream.reader()); + defer json_reader.deinit(); + + while (true) { + const token = try json_reader.next(); + if (token == .end_of_document) break; + } +} + +const number_test_stems = .{ + .{ "", "-" }, + .{ "0", "1", "10", "9999999999999999999999999" }, + .{ "", ".0", ".999999999999999999999999" }, + .{ "", "e0", "E0", "e+0", "e-0", "e9999999999999999999999999999" }, +}; +const number_test_items = blk: { + comptime var ret: []const []const u8 = &[_][]const u8{}; + for (number_test_stems[0]) |s0| { + for (number_test_stems[1]) |s1| { + for (number_test_stems[2]) |s2| { + for (number_test_stems[3]) |s3| { + ret = ret ++ &[_][]const u8{s0 ++ s1 ++ s2 ++ s3}; + } + } + } + } + break :blk ret; +}; + +test "numbers" { + for (number_test_items) |number_str| { + var scanner = JsonScanner.initCompleteInput(std.testing.allocator, number_str); + defer scanner.deinit(); + + const token = try scanner.next(); + const value = token.number; // assert this is a number + try std.testing.expectEqualStrings(number_str, value); + + try std.testing.expectEqual(Token.end_of_document, try scanner.next()); + } +} + +const string_test_cases = .{ + // The left is JSON without the "quotes". + // The right is the expected unescaped content. + .{ "", "" }, + .{ "\\\\", "\\" }, + .{ "a\\\\b", "a\\b" }, + .{ "a\\\"b", "a\"b" }, + .{ "\\n", "\n" }, + .{ "\\u000a", "\n" }, + .{ "𝄞", "\u{1D11E}" }, + .{ "\\uD834\\uDD1E", "\u{1D11E}" }, + .{ "\\uff20", "@" }, +}; + +test "strings" { + inline for (string_test_cases) |tuple| { + var stream = std.io.fixedBufferStream("\"" ++ tuple[0] ++ "\""); + var arena = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena.deinit(); + var json_reader = jsonReader(std.testing.allocator, stream.reader()); + defer json_reader.deinit(); + + const token = try json_reader.nextAlloc(arena.allocator(), .alloc_if_needed); + const value = switch (token) { + .string => |value| value, + .allocated_string => |value| value, + else => return error.ExpectedString, + }; + try std.testing.expectEqualStrings(tuple[1], value); + + try std.testing.expectEqual(Token.end_of_document, try json_reader.next()); + } +} + +const nesting_test_cases = .{ + .{ null, "[]" }, + .{ null, "{}" }, + .{ error.SyntaxError, "[}" }, + .{ error.SyntaxError, "{]" }, + .{ null, "[" ** 1000 ++ "]" ** 1000 }, + .{ null, "{\"\":" ** 1000 ++ "0" ++ "}" ** 1000 }, + .{ error.SyntaxError, "[" ** 1000 ++ "]" ** 999 ++ "}" }, + .{ error.SyntaxError, "{\"\":" ** 1000 ++ "0" ++ "}" ** 999 ++ "]" }, + .{ error.SyntaxError, "[" ** 1000 ++ "]" ** 1001 }, + .{ error.SyntaxError, "{\"\":" ** 1000 ++ "0" ++ "}" ** 1001 }, + .{ error.UnexpectedEndOfInput, "[" ** 1000 ++ "]" ** 999 }, + .{ error.UnexpectedEndOfInput, "{\"\":" ** 1000 ++ "0" ++ "}" ** 999 }, +}; + +test "nesting" { + inline for (nesting_test_cases) |tuple| { + const maybe_error = tuple[0]; + const document_str = tuple[1]; + + expectMaybeError(document_str, maybe_error) catch |err| { + std.debug.print("in json document: {s}\n", .{document_str}); + return err; + }; + } +} + +fn expectMaybeError(document_str: []const u8, maybe_error: ?Error) !void { + var scanner = JsonScanner.initCompleteInput(std.testing.allocator, document_str); + defer scanner.deinit(); + + while (true) { + const token = scanner.next() catch |err| { + if (maybe_error) |expected_err| { + if (err == expected_err) return; + } + return err; + }; + if (token == .end_of_document) break; + } + if (maybe_error != null) return error.ExpectedError; +} + +fn expectEqualTokens(expected_token: Token, actual_token: Token) !void { + try std.testing.expectEqual(std.meta.activeTag(expected_token), std.meta.activeTag(actual_token)); + switch (expected_token) { + .number => |expected_value| { + try std.testing.expectEqualStrings(expected_value, actual_token.number); + }, + .string => |expected_value| { + try std.testing.expectEqualStrings(expected_value, actual_token.string); + }, + else => {}, + } +} + +fn testTinyBufferSize(document_str: []const u8) !void { + var tiny_stream = std.io.fixedBufferStream(document_str); + var normal_stream = std.io.fixedBufferStream(document_str); + + var tiny_json_reader = JsonReader(1, @TypeOf(tiny_stream.reader())).init(std.testing.allocator, tiny_stream.reader()); + defer tiny_json_reader.deinit(); + var normal_json_reader = JsonReader(0x1000, @TypeOf(normal_stream.reader())).init(std.testing.allocator, normal_stream.reader()); + defer normal_json_reader.deinit(); + + expectEqualStreamOfTokens(&normal_json_reader, &tiny_json_reader) catch |err| { + std.debug.print("in json document: {s}\n", .{document_str}); + return err; + }; +} +fn expectEqualStreamOfTokens(control_json_reader: anytype, test_json_reader: anytype) !void { + var arena = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena.deinit(); + while (true) { + const control_token = try control_json_reader.nextAlloc(arena.allocator(), .alloc_always); + const test_token = try test_json_reader.nextAlloc(arena.allocator(), .alloc_always); + try expectEqualTokens(control_token, test_token); + if (control_token == .end_of_document) break; + _ = arena.reset(.retain_capacity); + } +} + +test "BufferUnderrun" { + try testTinyBufferSize(example_document_str); + for (number_test_items) |number_str| { + try testTinyBufferSize(number_str); + } + inline for (string_test_cases) |tuple| { + try testTinyBufferSize("\"" ++ tuple[0] ++ "\""); + } +} + +test "json.validate" { + try std.testing.expectEqual(true, try validate(std.testing.allocator, "{}")); + try std.testing.expectEqual(true, try validate(std.testing.allocator, "[]")); + try std.testing.expectEqual(false, try validate(std.testing.allocator, "[{[[[[{}]]]]}]")); + try std.testing.expectEqual(false, try validate(std.testing.allocator, "{]")); + try std.testing.expectEqual(false, try validate(std.testing.allocator, "[}")); + try std.testing.expectEqual(false, try validate(std.testing.allocator, "{{{{[]}}}]")); +} + +fn testSkipValue(s: []const u8) !void { + var scanner = JsonScanner.initCompleteInput(std.testing.allocator, s); + defer scanner.deinit(); + try scanner.skipValue(); + try expectEqualTokens(.end_of_document, try scanner.next()); + + var stream = std.io.fixedBufferStream(s); + var json_reader = jsonReader(std.testing.allocator, stream.reader()); + defer json_reader.deinit(); + try json_reader.skipValue(); + try expectEqualTokens(.end_of_document, try json_reader.next()); +} + +test "skipValue" { + try testSkipValue("false"); + try testSkipValue("true"); + try testSkipValue("null"); + try testSkipValue("42"); + try testSkipValue("42.0"); + try testSkipValue("\"foo\""); + try testSkipValue("[101, 111, 121]"); + try testSkipValue("{}"); + try testSkipValue("{\"foo\": \"bar\\nbaz\"}"); + + // An absurd number of nestings + const nestings = 1000; + try testSkipValue("[" ** nestings ++ "]" ** nestings); + + // Would a number token cause problems in a deeply-nested array? + try testSkipValue("[" ** nestings ++ "0.118, 999, 881.99, 911.9, 725, 3" ++ "]" ** nestings); + + // Mismatched brace/square bracket + try std.testing.expectError(error.SyntaxError, testSkipValue("[102, 111, 111}")); +} + +fn testEnsureStackCapacity(do_ensure: bool) !void { + var fail_alloc = std.testing.FailingAllocator.init(std.testing.allocator, 1); + const failing_allocator = fail_alloc.allocator(); + + const nestings = 999; // intentionally not a power of 2. + var scanner = JsonScanner.initCompleteInput(failing_allocator, "[" ** nestings ++ "]" ** nestings); + defer scanner.deinit(); + + if (do_ensure) { + try scanner.ensureTotalStackCapacity(nestings); + } + + try scanner.skipValue(); + try std.testing.expectEqual(Token.end_of_document, try scanner.next()); +} +test "ensureTotalStackCapacity" { + // Once to demonstrate failure. + try std.testing.expectError(error.OutOfMemory, testEnsureStackCapacity(false)); + // Then to demonstrate it works. + try testEnsureStackCapacity(true); +} + +fn testDiagnosticsFromSource(expected_error: ?anyerror, line: u64, col: u64, byte_offset: u64, source: anytype) !void { + var diagnostics = Diagnostics{}; + source.enableDiagnostics(&diagnostics); + + if (expected_error) |expected_err| { + try std.testing.expectError(expected_err, source.skipValue()); + } else { + try source.skipValue(); + try std.testing.expectEqual(Token.end_of_document, try source.next()); + } + try std.testing.expectEqual(line, diagnostics.getLine()); + try std.testing.expectEqual(col, diagnostics.getColumn()); + try std.testing.expectEqual(byte_offset, diagnostics.getByteOffset()); +} +fn testDiagnostics(expected_error: ?anyerror, line: u64, col: u64, byte_offset: u64, s: []const u8) !void { + var scanner = JsonScanner.initCompleteInput(std.testing.allocator, s); + defer scanner.deinit(); + try testDiagnosticsFromSource(expected_error, line, col, byte_offset, &scanner); + + var tiny_stream = std.io.fixedBufferStream(s); + var tiny_json_reader = JsonReader(1, @TypeOf(tiny_stream.reader())).init(std.testing.allocator, tiny_stream.reader()); + defer tiny_json_reader.deinit(); + try testDiagnosticsFromSource(expected_error, line, col, byte_offset, &tiny_json_reader); + + var medium_stream = std.io.fixedBufferStream(s); + var medium_json_reader = JsonReader(5, @TypeOf(medium_stream.reader())).init(std.testing.allocator, medium_stream.reader()); + defer medium_json_reader.deinit(); + try testDiagnosticsFromSource(expected_error, line, col, byte_offset, &medium_json_reader); +} +test "enableDiagnostics" { + try testDiagnostics(error.UnexpectedEndOfInput, 1, 1, 0, ""); + try testDiagnostics(null, 1, 3, 2, "[]"); + try testDiagnostics(null, 2, 2, 3, "[\n]"); + try testDiagnostics(null, 14, 2, example_document_str.len, example_document_str); + + try testDiagnostics(error.SyntaxError, 3, 1, 25, + \\{ + \\ "common": "mistake", + \\} + ); + + inline for ([_]comptime_int{ 5, 6, 7, 99 }) |reps| { + // The error happens 1 byte before the end. + const s = "[" ** reps ++ "}"; + try testDiagnostics(error.SyntaxError, 1, s.len, s.len - 1, s); + } +} diff --git a/lib/std/json/static.zig b/lib/std/json/static.zig new file mode 100644 index 0000000000..1b1692c380 --- /dev/null +++ b/lib/std/json/static.zig @@ -0,0 +1,621 @@ +const std = @import("std"); +const assert = std.debug.assert; +const Allocator = std.mem.Allocator; +const ArrayList = std.ArrayList; + +const Scanner = @import("./scanner.zig").Scanner; +const Token = @import("./scanner.zig").Token; +const AllocWhen = @import("./scanner.zig").AllocWhen; +const default_max_value_len = @import("./scanner.zig").default_max_value_len; +const isNumberFormattedLikeAnInteger = @import("./scanner.zig").isNumberFormattedLikeAnInteger; + +pub const ParseOptions = struct { + /// Behaviour when a duplicate field is encountered. + duplicate_field_behavior: enum { + use_first, + @"error", + use_last, + } = .@"error", + + /// If false, finding an unknown field returns an error. + ignore_unknown_fields: bool = false, + + /// Passed to json.Scanner.nextAllocMax() or json.Reader.nextAllocMax(). + /// The default for parseFromSlice() or parseFromTokenSource() with a *json.Scanner input + /// is the length of the input slice, which means error.ValueTooLong will never be returned. + /// The default for parseFromTokenSource() with a *json.Reader is default_max_value_len. + max_value_len: ?usize = null, +}; + +/// Parses the json document from s and returns the result. +/// The provided allocator is used both for temporary allocations during parsing the document, +/// and also to allocate any pointer values in the return type. +/// If T contains any pointers, free the memory with `std.json.parseFree`. +/// Note that `error.BufferUnderrun` is not actually possible to return from this function. +pub fn parseFromSlice(comptime T: type, allocator: Allocator, s: []const u8, options: ParseOptions) ParseError(T, Scanner)!T { + var scanner = Scanner.initCompleteInput(allocator, s); + defer scanner.deinit(); + + return parseFromTokenSource(T, allocator, &scanner, options); +} + +/// `scanner_or_reader` must be either a `*std.json.Scanner` with complete input or a `*std.json.Reader`. +/// allocator is used to allocate the data of T if necessary, +/// such as if T is `*u32` or `[]u32`. +/// If T contains any pointers, free the memory with `std.json.parseFree`. +/// If T contains no pointers, the allocator may sometimes be used for temporary allocations, +/// but no call to `std.json.parseFree` will be necessary; +/// all temporary allocations will be freed before this function returns. +/// Note that `error.BufferUnderrun` is not actually possible to return from this function. +pub fn parseFromTokenSource(comptime T: type, allocator: Allocator, scanner_or_reader: anytype, options: ParseOptions) ParseError(T, @TypeOf(scanner_or_reader.*))!T { + if (@TypeOf(scanner_or_reader.*) == Scanner) { + assert(scanner_or_reader.is_end_of_input); + } + + var resolved_options = options; + if (resolved_options.max_value_len == null) { + if (@TypeOf(scanner_or_reader.*) == Scanner) { + resolved_options.max_value_len = scanner_or_reader.input.len; + } else { + resolved_options.max_value_len = default_max_value_len; + } + } + + const r = try parseInternal(T, allocator, scanner_or_reader, resolved_options); + errdefer parseFree(T, allocator, r); + + assert(.end_of_document == try scanner_or_reader.next()); + + return r; +} + +/// The error set that will be returned from parsing T from *Source. +/// Note that this may contain error.BufferUnderrun, but that error will never actually be returned. +pub fn ParseError(comptime T: type, comptime Source: type) type { + // `inferred_types` is used to avoid infinite recursion for recursive type definitions. + const inferred_types = [_]type{}; + // A few of these will either always be present or present enough of the time that + // omitting them is more confusing than always including them. + return error{UnexpectedToken} || Source.NextError || Source.PeekError || + ParseInternalErrorImpl(T, Source, &inferred_types); +} + +fn ParseInternalErrorImpl(comptime T: type, comptime Source: type, comptime inferred_types: []const type) type { + for (inferred_types) |ty| { + if (T == ty) return error{}; + } + + switch (@typeInfo(T)) { + .Bool => return error{}, + .Float, .ComptimeFloat => return Source.AllocError || std.fmt.ParseFloatError, + .Int, .ComptimeInt => { + return Source.AllocError || error{ InvalidNumber, Overflow } || + std.fmt.ParseIntError || std.fmt.ParseFloatError; + }, + .Optional => |optional_info| return ParseInternalErrorImpl(optional_info.child, Source, inferred_types ++ [_]type{T}), + .Enum => return Source.AllocError || error{InvalidEnumTag}, + .Union => |unionInfo| { + if (unionInfo.tag_type) |_| { + var errors = Source.AllocError || error{UnknownField}; + for (unionInfo.fields) |u_field| { + errors = errors || ParseInternalErrorImpl(u_field.type, Source, inferred_types ++ [_]type{T}); + } + return errors; + } else { + @compileError("Unable to parse into untagged union '" ++ @typeName(T) ++ "'"); + } + }, + .Struct => |structInfo| { + var errors = Scanner.AllocError || error{ + DuplicateField, + UnknownField, + MissingField, + }; + for (structInfo.fields) |field| { + errors = errors || ParseInternalErrorImpl(field.type, Source, inferred_types ++ [_]type{T}); + } + return errors; + }, + .Array => |arrayInfo| { + return error{LengthMismatch} || + ParseInternalErrorImpl(arrayInfo.child, Source, inferred_types ++ [_]type{T}); + }, + .Vector => |vecInfo| { + return error{LengthMismatch} || + ParseInternalErrorImpl(vecInfo.child, Source, inferred_types ++ [_]type{T}); + }, + .Pointer => |ptrInfo| { + switch (ptrInfo.size) { + .One, .Slice => { + return ParseInternalErrorImpl(ptrInfo.child, Source, inferred_types ++ [_]type{T}); + }, + else => @compileError("Unable to parse into type '" ++ @typeName(T) ++ "'"), + } + }, + else => return error{}, + } + unreachable; +} + +fn parseInternal( + comptime T: type, + allocator: Allocator, + source: anytype, + options: ParseOptions, +) ParseError(T, @TypeOf(source.*))!T { + switch (@typeInfo(T)) { + .Bool => { + return switch (try source.next()) { + .true => true, + .false => false, + else => error.UnexpectedToken, + }; + }, + .Float, .ComptimeFloat => { + const token = try source.nextAllocMax(allocator, .alloc_if_needed, options.max_value_len.?); + defer freeAllocated(allocator, token); + const slice = switch (token) { + .number, .string => |slice| slice, + .allocated_number, .allocated_string => |slice| slice, + else => return error.UnexpectedToken, + }; + return try std.fmt.parseFloat(T, slice); + }, + .Int, .ComptimeInt => { + const token = try source.nextAllocMax(allocator, .alloc_if_needed, options.max_value_len.?); + defer freeAllocated(allocator, token); + const slice = switch (token) { + .number, .string => |slice| slice, + .allocated_number, .allocated_string => |slice| slice, + else => return error.UnexpectedToken, + }; + if (isNumberFormattedLikeAnInteger(slice)) + return std.fmt.parseInt(T, slice, 10); + // Try to coerce a float to an integer. + const float = try std.fmt.parseFloat(f128, slice); + if (@round(float) != float) return error.InvalidNumber; + if (float > std.math.maxInt(T) or float < std.math.minInt(T)) return error.Overflow; + return @floatToInt(T, float); + }, + .Optional => |optionalInfo| { + switch (try source.peekNextTokenType()) { + .null => { + _ = try source.next(); + return null; + }, + else => { + return try parseInternal(optionalInfo.child, allocator, source, options); + }, + } + }, + .Enum => |enumInfo| { + const token = try source.nextAllocMax(allocator, .alloc_if_needed, options.max_value_len.?); + defer freeAllocated(allocator, token); + const slice = switch (token) { + .number, .string => |slice| slice, + .allocated_number, .allocated_string => |slice| slice, + else => return error.UnexpectedToken, + }; + // Check for a named value. + if (std.meta.stringToEnum(T, slice)) |value| return value; + // Check for a numeric value. + if (!isNumberFormattedLikeAnInteger(slice)) return error.InvalidEnumTag; + const n = std.fmt.parseInt(enumInfo.tag_type, slice, 10) catch return error.InvalidEnumTag; + return try std.meta.intToEnum(T, n); + }, + .Union => |unionInfo| { + const UnionTagType = unionInfo.tag_type orelse @compileError("Unable to parse into untagged union '" ++ @typeName(T) ++ "'"); + + if (.object_begin != try source.next()) return error.UnexpectedToken; + + var result: ?T = null; + errdefer { + if (result) |r| { + inline for (unionInfo.fields) |u_field| { + if (r == @field(UnionTagType, u_field.name)) { + parseFree(u_field.type, allocator, @field(r, u_field.name)); + } + } + } + } + + var name_token: ?Token = try source.nextAllocMax(allocator, .alloc_if_needed, options.max_value_len.?); + errdefer { + if (name_token) |t| { + freeAllocated(allocator, t); + } + } + const field_name = switch (name_token.?) { + .string => |slice| slice, + .allocated_string => |slice| slice, + else => return error.UnexpectedToken, + }; + + inline for (unionInfo.fields) |u_field| { + if (std.mem.eql(u8, u_field.name, field_name)) { + // Free the name token now in case we're using an allocator that optimizes freeing the last allocated object. + // (Recursing into parseInternal() might trigger more allocations.) + freeAllocated(allocator, name_token.?); + name_token = null; + + if (u_field.type == void) { + // void isn't really a json type, but we can support void payload union tags with {} as a value. + if (.object_begin != try source.next()) return error.UnexpectedToken; + if (.object_end != try source.next()) return error.UnexpectedToken; + result = @unionInit(T, u_field.name, {}); + } else { + // Recurse. + result = @unionInit(T, u_field.name, try parseInternal(u_field.type, allocator, source, options)); + } + break; + } + } else { + // Didn't match anything. + return error.UnknownField; + } + + if (.object_end != try source.next()) return error.UnexpectedToken; + + return result.?; + }, + + .Struct => |structInfo| { + if (structInfo.is_tuple) { + if (.array_begin != try source.next()) return error.UnexpectedToken; + + var r: T = undefined; + var fields_seen: usize = 0; + errdefer { + inline for (0..structInfo.fields.len) |i| { + if (i < fields_seen) { + parseFree(structInfo.fields[i].type, allocator, r[i]); + } + } + } + inline for (0..structInfo.fields.len) |i| { + r[i] = try parseInternal(structInfo.fields[i].type, allocator, source, options); + fields_seen = i + 1; + } + + if (.array_end != try source.next()) return error.UnexpectedToken; + + return r; + } + + if (.object_begin != try source.next()) return error.UnexpectedToken; + + var r: T = undefined; + var fields_seen = [_]bool{false} ** structInfo.fields.len; + errdefer { + inline for (structInfo.fields, 0..) |field, i| { + if (fields_seen[i]) { + parseFree(field.type, allocator, @field(r, field.name)); + } + } + } + + while (true) { + var name_token: ?Token = try source.nextAllocMax(allocator, .alloc_if_needed, options.max_value_len.?); + errdefer { + if (name_token) |t| { + freeAllocated(allocator, t); + } + } + const field_name = switch (name_token.?) { + .object_end => break, // No more fields. + .string => |slice| slice, + .allocated_string => |slice| slice, + else => return error.UnexpectedToken, + }; + + inline for (structInfo.fields, 0..) |field, i| { + if (field.is_comptime) @compileError("comptime fields are not supported: " ++ @typeName(T) ++ "." ++ field.name); + if (std.mem.eql(u8, field.name, field_name)) { + // Free the name token now in case we're using an allocator that optimizes freeing the last allocated object. + // (Recursing into parseInternal() might trigger more allocations.) + freeAllocated(allocator, name_token.?); + name_token = null; + + if (fields_seen[i]) { + switch (options.duplicate_field_behavior) { + .use_first => { + // Parse and then delete the redundant value. + // We don't want to skip the value, because we want type checking. + const ignored_value = try parseInternal(field.type, allocator, source, options); + parseFree(field.type, allocator, ignored_value); + break; + }, + .@"error" => return error.DuplicateField, + .use_last => { + // Delete the stale value. We're about to get a new one. + parseFree(field.type, allocator, @field(r, field.name)); + fields_seen[i] = false; + }, + } + } + @field(r, field.name) = try parseInternal(field.type, allocator, source, options); + fields_seen[i] = true; + break; + } + } else { + // Didn't match anything. + freeAllocated(allocator, name_token.?); + if (options.ignore_unknown_fields) { + try source.skipValue(); + } else { + return error.UnknownField; + } + } + } + inline for (structInfo.fields, 0..) |field, i| { + if (!fields_seen[i]) { + if (field.default_value) |default_ptr| { + const default = @ptrCast(*align(1) const field.type, default_ptr).*; + @field(r, field.name) = default; + } else { + return error.MissingField; + } + } + } + return r; + }, + + .Array => |arrayInfo| { + switch (try source.peekNextTokenType()) { + .array_begin => { + // Typical array. + return parseInternalArray(T, arrayInfo.child, arrayInfo.len, allocator, source, options); + }, + .string => { + if (arrayInfo.child != u8) return error.UnexpectedToken; + // Fixed-length string. + + var r: T = undefined; + var i: usize = 0; + while (true) { + switch (try source.next()) { + .string => |slice| { + if (i + slice.len != r.len) return error.LengthMismatch; + @memcpy(r[i..][0..slice.len], slice); + break; + }, + .partial_string => |slice| { + if (i + slice.len > r.len) return error.LengthMismatch; + @memcpy(r[i..][0..slice.len], slice); + i += slice.len; + }, + .partial_string_escaped_1 => |arr| { + if (i + arr.len > r.len) return error.LengthMismatch; + @memcpy(r[i..][0..arr.len], arr[0..]); + i += arr.len; + }, + .partial_string_escaped_2 => |arr| { + if (i + arr.len > r.len) return error.LengthMismatch; + @memcpy(r[i..][0..arr.len], arr[0..]); + i += arr.len; + }, + .partial_string_escaped_3 => |arr| { + if (i + arr.len > r.len) return error.LengthMismatch; + @memcpy(r[i..][0..arr.len], arr[0..]); + i += arr.len; + }, + .partial_string_escaped_4 => |arr| { + if (i + arr.len > r.len) return error.LengthMismatch; + @memcpy(r[i..][0..arr.len], arr[0..]); + i += arr.len; + }, + else => unreachable, + } + } + + return r; + }, + + else => return error.UnexpectedToken, + } + }, + + .Vector => |vecInfo| { + switch (try source.peekNextTokenType()) { + .array_begin => { + return parseInternalArray(T, vecInfo.child, vecInfo.len, allocator, source, options); + }, + else => return error.UnexpectedToken, + } + }, + + .Pointer => |ptrInfo| { + switch (ptrInfo.size) { + .One => { + const r: *ptrInfo.child = try allocator.create(ptrInfo.child); + errdefer allocator.destroy(r); + r.* = try parseInternal(ptrInfo.child, allocator, source, options); + return r; + }, + .Slice => { + switch (try source.peekNextTokenType()) { + .array_begin => { + _ = try source.next(); + + // Typical array. + var arraylist = ArrayList(ptrInfo.child).init(allocator); + errdefer { + while (arraylist.popOrNull()) |v| { + parseFree(ptrInfo.child, allocator, v); + } + arraylist.deinit(); + } + + while (true) { + switch (try source.peekNextTokenType()) { + .array_end => { + _ = try source.next(); + break; + }, + else => {}, + } + + try arraylist.ensureUnusedCapacity(1); + arraylist.appendAssumeCapacity(try parseInternal(ptrInfo.child, allocator, source, options)); + } + + if (ptrInfo.sentinel) |some| { + const sentinel_value = @ptrCast(*align(1) const ptrInfo.child, some).*; + return try arraylist.toOwnedSliceSentinel(sentinel_value); + } + + return try arraylist.toOwnedSlice(); + }, + .string => { + if (ptrInfo.child != u8) return error.UnexpectedToken; + + // Dynamic length string. + if (ptrInfo.sentinel) |sentinel_ptr| { + // Use our own array list so we can append the sentinel. + var value_list = ArrayList(u8).init(allocator); + errdefer value_list.deinit(); + _ = try source.allocNextIntoArrayList(&value_list, .alloc_always); + return try value_list.toOwnedSliceSentinel(@ptrCast(*const u8, sentinel_ptr).*); + } + switch (try source.nextAllocMax(allocator, .alloc_always, options.max_value_len.?)) { + .allocated_string => |slice| return slice, + else => unreachable, + } + }, + else => return error.UnexpectedToken, + } + }, + else => @compileError("Unable to parse into type '" ++ @typeName(T) ++ "'"), + } + }, + else => @compileError("Unable to parse into type '" ++ @typeName(T) ++ "'"), + } + unreachable; +} + +fn parseInternalArray( + comptime T: type, + comptime Child: type, + comptime len: comptime_int, + allocator: Allocator, + source: anytype, + options: ParseOptions, +) !T { + assert(.array_begin == try source.next()); + + var r: T = undefined; + var i: usize = 0; + errdefer { + // Without the len check `r[i]` is not allowed + if (len > 0) while (true) : (i -= 1) { + parseFree(Child, allocator, r[i]); + if (i == 0) break; + }; + } + while (i < len) : (i += 1) { + r[i] = try parseInternal(Child, allocator, source, options); + } + + if (.array_end != try source.next()) return error.UnexpectedToken; + + return r; +} + +fn freeAllocated(allocator: Allocator, token: Token) void { + switch (token) { + .allocated_number, .allocated_string => |slice| { + allocator.free(slice); + }, + else => {}, + } +} + +/// Releases resources created by parseFromSlice() or parseFromTokenSource(). +pub fn parseFree(comptime T: type, allocator: Allocator, value: T) void { + switch (@typeInfo(T)) { + .Bool, .Float, .ComptimeFloat, .Int, .ComptimeInt, .Enum => {}, + .Optional => { + if (value) |v| { + return parseFree(@TypeOf(v), allocator, v); + } + }, + .Union => |unionInfo| { + if (unionInfo.tag_type) |UnionTagType| { + inline for (unionInfo.fields) |u_field| { + if (value == @field(UnionTagType, u_field.name)) { + parseFree(u_field.type, allocator, @field(value, u_field.name)); + break; + } + } + } else { + unreachable; + } + }, + .Struct => |structInfo| { + inline for (structInfo.fields) |field| { + var should_free = true; + if (field.default_value) |default| { + switch (@typeInfo(field.type)) { + // We must not attempt to free pointers to struct default values + .Pointer => |fieldPtrInfo| { + const field_value = @field(value, field.name); + const field_ptr = switch (fieldPtrInfo.size) { + .One => field_value, + .Slice => field_value.ptr, + else => unreachable, // Other pointer types are not parseable + }; + const field_addr = @ptrToInt(field_ptr); + + const casted_default = @ptrCast(*const field.type, @alignCast(@alignOf(field.type), default)).*; + const default_ptr = switch (fieldPtrInfo.size) { + .One => casted_default, + .Slice => casted_default.ptr, + else => unreachable, // Other pointer types are not parseable + }; + const default_addr = @ptrToInt(default_ptr); + + if (field_addr == default_addr) { + should_free = false; + } + }, + else => {}, + } + } + if (should_free) { + parseFree(field.type, allocator, @field(value, field.name)); + } + } + }, + .Array => |arrayInfo| { + for (value) |v| { + parseFree(arrayInfo.child, allocator, v); + } + }, + .Vector => |vecInfo| { + var i: usize = 0; + while (i < vecInfo.len) : (i += 1) { + parseFree(vecInfo.child, allocator, value[i]); + } + }, + .Pointer => |ptrInfo| { + switch (ptrInfo.size) { + .One => { + parseFree(ptrInfo.child, allocator, value.*); + allocator.destroy(value); + }, + .Slice => { + for (value) |v| { + parseFree(ptrInfo.child, allocator, v); + } + allocator.free(value); + }, + else => unreachable, + } + }, + else => unreachable, + } +} + +test { + _ = @import("./static_test.zig"); +} diff --git a/lib/std/json/static_test.zig b/lib/std/json/static_test.zig new file mode 100644 index 0000000000..b512f8a890 --- /dev/null +++ b/lib/std/json/static_test.zig @@ -0,0 +1,437 @@ +const std = @import("std"); +const testing = std.testing; + +const parseFromSlice = @import("./static.zig").parseFromSlice; +const parseFromTokenSource = @import("./static.zig").parseFromTokenSource; +const parseFree = @import("./static.zig").parseFree; +const ParseOptions = @import("./static.zig").ParseOptions; +const JsonScanner = @import("./scanner.zig").Scanner; +const jsonReader = @import("./scanner.zig").reader; + +test "parse" { + try testing.expectEqual(false, try parseFromSlice(bool, testing.allocator, "false", .{})); + try testing.expectEqual(true, try parseFromSlice(bool, testing.allocator, "true", .{})); + try testing.expectEqual(@as(u1, 1), try parseFromSlice(u1, testing.allocator, "1", .{})); + try testing.expectError(error.Overflow, parseFromSlice(u1, testing.allocator, "50", .{})); + try testing.expectEqual(@as(u64, 42), try parseFromSlice(u64, testing.allocator, "42", .{})); + try testing.expectEqual(@as(f64, 42), try parseFromSlice(f64, testing.allocator, "42.0", .{})); + try testing.expectEqual(@as(?bool, null), try parseFromSlice(?bool, testing.allocator, "null", .{})); + try testing.expectEqual(@as(?bool, true), try parseFromSlice(?bool, testing.allocator, "true", .{})); + + try testing.expectEqual(@as([3]u8, "foo".*), try parseFromSlice([3]u8, testing.allocator, "\"foo\"", .{})); + try testing.expectEqual(@as([3]u8, "foo".*), try parseFromSlice([3]u8, testing.allocator, "[102, 111, 111]", .{})); + try testing.expectEqual(@as([0]u8, undefined), try parseFromSlice([0]u8, testing.allocator, "[]", .{})); + + try testing.expectEqual(@as(u64, 12345678901234567890), try parseFromSlice(u64, testing.allocator, "\"12345678901234567890\"", .{})); + try testing.expectEqual(@as(f64, 123.456), try parseFromSlice(f64, testing.allocator, "\"123.456\"", .{})); +} + +test "parse into enum" { + const T = enum(u32) { + Foo = 42, + Bar, + @"with\\escape", + }; + try testing.expectEqual(@as(T, .Foo), try parseFromSlice(T, testing.allocator, "\"Foo\"", .{})); + try testing.expectEqual(@as(T, .Foo), try parseFromSlice(T, testing.allocator, "42", .{})); + try testing.expectEqual(@as(T, .@"with\\escape"), try parseFromSlice(T, testing.allocator, "\"with\\\\escape\"", .{})); + try testing.expectError(error.InvalidEnumTag, parseFromSlice(T, testing.allocator, "5", .{})); + try testing.expectError(error.InvalidEnumTag, parseFromSlice(T, testing.allocator, "\"Qux\"", .{})); +} + +test "parse into that allocates a slice" { + { + // string as string + const r = try parseFromSlice([]u8, testing.allocator, "\"foo\"", .{}); + defer parseFree([]u8, testing.allocator, r); + try testing.expectEqualSlices(u8, "foo", r); + } + { + // string as array of u8 integers + const r = try parseFromSlice([]u8, testing.allocator, "[102, 111, 111]", .{}); + defer parseFree([]u8, testing.allocator, r); + try testing.expectEqualSlices(u8, "foo", r); + } + { + const r = try parseFromSlice([]u8, testing.allocator, "\"with\\\\escape\"", .{}); + defer parseFree([]u8, testing.allocator, r); + try testing.expectEqualSlices(u8, "with\\escape", r); + } +} + +test "parse into sentinel slice" { + const result = try parseFromSlice([:0]const u8, testing.allocator, "\"\\n\"", .{}); + defer parseFree([:0]const u8, testing.allocator, result); + try testing.expect(std.mem.eql(u8, result, "\n")); +} + +test "parse into tagged union" { + const T = union(enum) { + nothing, + int: i32, + float: f64, + string: []const u8, + }; + try testing.expectEqual(T{ .float = 1.5 }, try parseFromSlice(T, testing.allocator, "{\"float\":1.5}", .{})); + try testing.expectEqual(T{ .int = 1 }, try parseFromSlice(T, testing.allocator, "{\"int\":1}", .{})); + try testing.expectEqual(T{ .nothing = {} }, try parseFromSlice(T, testing.allocator, "{\"nothing\":{}}", .{})); +} + +test "parse into tagged union errors" { + const T = union(enum) { + nothing, + int: i32, + float: f64, + string: []const u8, + }; + try testing.expectError(error.UnexpectedToken, parseFromSlice(T, testing.allocator, "42", .{})); + try testing.expectError(error.UnexpectedToken, parseFromSlice(T, testing.allocator, "{}", .{})); + try testing.expectError(error.UnknownField, parseFromSlice(T, testing.allocator, "{\"bogus\":1}", .{})); + try testing.expectError(error.UnexpectedToken, parseFromSlice(T, testing.allocator, "{\"int\":1, \"int\":1", .{})); + try testing.expectError(error.UnexpectedToken, parseFromSlice(T, testing.allocator, "{\"int\":1, \"float\":1.0}", .{})); + try testing.expectError(error.UnexpectedToken, parseFromSlice(T, testing.allocator, "{\"nothing\":null}", .{})); + try testing.expectError(error.UnexpectedToken, parseFromSlice(T, testing.allocator, "{\"nothing\":{\"no\":0}}", .{})); + + // Allocator failure + var fail_alloc = testing.FailingAllocator.init(testing.allocator, 0); + const failing_allocator = fail_alloc.allocator(); + try testing.expectError(error.OutOfMemory, parseFromSlice(T, failing_allocator, "{\"string\"\"foo\"}", .{})); +} + +test "parseFree descends into tagged union" { + const T = union(enum) { + nothing, + int: i32, + float: f64, + string: []const u8, + }; + const r = try parseFromSlice(T, testing.allocator, "{\"string\":\"foo\"}", .{}); + try testing.expectEqualSlices(u8, "foo", r.string); + parseFree(T, testing.allocator, r); +} + +test "parse into struct with no fields" { + const T = struct {}; + try testing.expectEqual(T{}, try parseFromSlice(T, testing.allocator, "{}", .{})); +} + +const test_const_value: usize = 123; + +test "parse into struct with default const pointer field" { + const T = struct { a: *const usize = &test_const_value }; + try testing.expectEqual(T{}, try parseFromSlice(T, testing.allocator, "{}", .{})); +} + +const test_default_usize: usize = 123; +const test_default_usize_ptr: *align(1) const usize = &test_default_usize; +const test_default_str: []const u8 = "test str"; +const test_default_str_slice: [2][]const u8 = [_][]const u8{ + "test1", + "test2", +}; + +test "freeing parsed structs with pointers to default values" { + const T = struct { + int: *const usize = &test_default_usize, + int_ptr: *allowzero align(1) const usize = test_default_usize_ptr, + str: []const u8 = test_default_str, + str_slice: []const []const u8 = &test_default_str_slice, + }; + + const parsed = try parseFromSlice(T, testing.allocator, "{}", .{}); + try testing.expectEqual(T{}, parsed); + // This will panic if it tries to free global constants: + parseFree(T, testing.allocator, parsed); +} + +test "parse into struct where destination and source lengths mismatch" { + const T = struct { a: [2]u8 }; + try testing.expectError(error.LengthMismatch, parseFromSlice(T, testing.allocator, "{\"a\": \"bbb\"}", .{})); +} + +test "parse into struct with misc fields" { + const T = struct { + int: i64, + float: f64, + @"with\\escape": bool, + @"withąunicode😂": bool, + language: []const u8, + optional: ?bool, + default_field: i32 = 42, + static_array: [3]f64, + dynamic_array: []f64, + + complex: struct { + nested: []const u8, + }, + + veryComplex: []struct { + foo: []const u8, + }, + + a_union: Union, + const Union = union(enum) { + x: u8, + float: f64, + string: []const u8, + }; + }; + var document_str = + \\{ + \\ "int": 420, + \\ "float": 3.14, + \\ "with\\escape": true, + \\ "with\u0105unicode\ud83d\ude02": false, + \\ "language": "zig", + \\ "optional": null, + \\ "static_array": [66.6, 420.420, 69.69], + \\ "dynamic_array": [66.6, 420.420, 69.69], + \\ "complex": { + \\ "nested": "zig" + \\ }, + \\ "veryComplex": [ + \\ { + \\ "foo": "zig" + \\ }, { + \\ "foo": "rocks" + \\ } + \\ ], + \\ "a_union": { + \\ "float": 100000 + \\ } + \\} + ; + const r = try parseFromSlice(T, testing.allocator, document_str, .{}); + defer parseFree(T, testing.allocator, r); + try testing.expectEqual(@as(i64, 420), r.int); + try testing.expectEqual(@as(f64, 3.14), r.float); + try testing.expectEqual(true, r.@"with\\escape"); + try testing.expectEqual(false, r.@"withąunicode😂"); + try testing.expectEqualSlices(u8, "zig", r.language); + try testing.expectEqual(@as(?bool, null), r.optional); + try testing.expectEqual(@as(i32, 42), r.default_field); + try testing.expectEqual(@as(f64, 66.6), r.static_array[0]); + try testing.expectEqual(@as(f64, 420.420), r.static_array[1]); + try testing.expectEqual(@as(f64, 69.69), r.static_array[2]); + try testing.expectEqual(@as(usize, 3), r.dynamic_array.len); + try testing.expectEqual(@as(f64, 66.6), r.dynamic_array[0]); + try testing.expectEqual(@as(f64, 420.420), r.dynamic_array[1]); + try testing.expectEqual(@as(f64, 69.69), r.dynamic_array[2]); + try testing.expectEqualSlices(u8, r.complex.nested, "zig"); + try testing.expectEqualSlices(u8, "zig", r.veryComplex[0].foo); + try testing.expectEqualSlices(u8, "rocks", r.veryComplex[1].foo); + try testing.expectEqual(T.Union{ .float = 100000 }, r.a_union); +} + +test "parse into struct with strings and arrays with sentinels" { + const T = struct { + language: [:0]const u8, + language_without_sentinel: []const u8, + data: [:99]const i32, + simple_data: []const i32, + }; + var document_str = + \\{ + \\ "language": "zig", + \\ "language_without_sentinel": "zig again!", + \\ "data": [1, 2, 3], + \\ "simple_data": [4, 5, 6] + \\} + ; + const r = try parseFromSlice(T, testing.allocator, document_str, .{}); + defer parseFree(T, testing.allocator, r); + + try testing.expectEqualSentinel(u8, 0, "zig", r.language); + + const data = [_:99]i32{ 1, 2, 3 }; + try testing.expectEqualSentinel(i32, 99, data[0..data.len], r.data); + + // Make sure that arrays who aren't supposed to have a sentinel still parse without one. + try testing.expectEqual(@as(?i32, null), std.meta.sentinel(@TypeOf(r.simple_data))); + try testing.expectEqual(@as(?u8, null), std.meta.sentinel(@TypeOf(r.language_without_sentinel))); +} + +test "parse into struct with duplicate field" { + // allow allocator to detect double frees by keeping bucket in use + const ballast = try testing.allocator.alloc(u64, 1); + defer testing.allocator.free(ballast); + + const options_first = ParseOptions{ .duplicate_field_behavior = .use_first }; + const options_last = ParseOptions{ .duplicate_field_behavior = .use_last }; + + const str = "{ \"a\": 1, \"a\": 0.25 }"; + + const T1 = struct { a: *u64 }; + // both .use_first and .use_last should fail because second "a" value isn't a u64 + try testing.expectError(error.InvalidNumber, parseFromSlice(T1, testing.allocator, str, options_first)); + try testing.expectError(error.InvalidNumber, parseFromSlice(T1, testing.allocator, str, options_last)); + + const T2 = struct { a: f64 }; + try testing.expectEqual(T2{ .a = 1.0 }, try parseFromSlice(T2, testing.allocator, str, options_first)); + try testing.expectEqual(T2{ .a = 0.25 }, try parseFromSlice(T2, testing.allocator, str, options_last)); +} + +test "parse into struct ignoring unknown fields" { + const T = struct { + int: i64, + language: []const u8, + }; + + var str = + \\{ + \\ "int": 420, + \\ "float": 3.14, + \\ "with\\escape": true, + \\ "with\u0105unicode\ud83d\ude02": false, + \\ "optional": null, + \\ "static_array": [66.6, 420.420, 69.69], + \\ "dynamic_array": [66.6, 420.420, 69.69], + \\ "complex": { + \\ "nested": "zig" + \\ }, + \\ "veryComplex": [ + \\ { + \\ "foo": "zig" + \\ }, { + \\ "foo": "rocks" + \\ } + \\ ], + \\ "a_union": { + \\ "float": 100000 + \\ }, + \\ "language": "zig" + \\} + ; + const r = try parseFromSlice(T, testing.allocator, str, .{ .ignore_unknown_fields = true }); + defer parseFree(T, testing.allocator, r); + + try testing.expectEqual(@as(i64, 420), r.int); + try testing.expectEqualSlices(u8, "zig", r.language); +} + +test "parse into tuple" { + const Union = union(enum) { + char: u8, + float: f64, + string: []const u8, + }; + const T = std.meta.Tuple(&.{ + i64, + f64, + bool, + []const u8, + ?bool, + struct { + foo: i32, + bar: []const u8, + }, + std.meta.Tuple(&.{ u8, []const u8, u8 }), + Union, + }); + var str = + \\[ + \\ 420, + \\ 3.14, + \\ true, + \\ "zig", + \\ null, + \\ { + \\ "foo": 1, + \\ "bar": "zero" + \\ }, + \\ [4, "två", 42], + \\ {"float": 12.34} + \\] + ; + const r = try parseFromSlice(T, testing.allocator, str, .{}); + defer parseFree(T, testing.allocator, r); + try testing.expectEqual(@as(i64, 420), r[0]); + try testing.expectEqual(@as(f64, 3.14), r[1]); + try testing.expectEqual(true, r[2]); + try testing.expectEqualSlices(u8, "zig", r[3]); + try testing.expectEqual(@as(?bool, null), r[4]); + try testing.expectEqual(@as(i32, 1), r[5].foo); + try testing.expectEqualSlices(u8, "zero", r[5].bar); + try testing.expectEqual(@as(u8, 4), r[6][0]); + try testing.expectEqualSlices(u8, "två", r[6][1]); + try testing.expectEqual(@as(u8, 42), r[6][2]); + try testing.expectEqual(Union{ .float = 12.34 }, r[7]); +} + +const ParseIntoRecursiveUnionDefinitionValue = union(enum) { + integer: i64, + array: []const ParseIntoRecursiveUnionDefinitionValue, +}; + +test "parse into recursive union definition" { + const T = struct { + values: ParseIntoRecursiveUnionDefinitionValue, + }; + + const r = try parseFromSlice(T, testing.allocator, "{\"values\":{\"array\":[{\"integer\":58}]}}", .{}); + defer parseFree(T, testing.allocator, r); + + try testing.expectEqual(@as(i64, 58), r.values.array[0].integer); +} + +const ParseIntoDoubleRecursiveUnionValueFirst = union(enum) { + integer: i64, + array: []const ParseIntoDoubleRecursiveUnionValueSecond, +}; + +const ParseIntoDoubleRecursiveUnionValueSecond = union(enum) { + boolean: bool, + array: []const ParseIntoDoubleRecursiveUnionValueFirst, +}; + +test "parse into double recursive union definition" { + const T = struct { + values: ParseIntoDoubleRecursiveUnionValueFirst, + }; + + const r = try parseFromSlice(T, testing.allocator, "{\"values\":{\"array\":[{\"array\":[{\"integer\":58}]}]}}", .{}); + defer parseFree(T, testing.allocator, r); + + try testing.expectEqual(@as(i64, 58), r.values.array[0].array[0].integer); +} + +test "parse exponential into int" { + const T = struct { int: i64 }; + const r = try parseFromSlice(T, testing.allocator, "{ \"int\": 4.2e2 }", .{}); + try testing.expectEqual(@as(i64, 420), r.int); + try testing.expectError(error.InvalidNumber, parseFromSlice(T, testing.allocator, "{ \"int\": 0.042e2 }", .{})); + try testing.expectError(error.Overflow, parseFromSlice(T, testing.allocator, "{ \"int\": 18446744073709551616.0 }", .{})); +} + +test "parseFromTokenSource" { + var scanner = JsonScanner.initCompleteInput(testing.allocator, "123"); + defer scanner.deinit(); + try testing.expectEqual(@as(u32, 123), try parseFromTokenSource(u32, testing.allocator, &scanner, .{})); + + var stream = std.io.fixedBufferStream("123"); + var json_reader = jsonReader(std.testing.allocator, stream.reader()); + defer json_reader.deinit(); + try testing.expectEqual(@as(u32, 123), try parseFromTokenSource(u32, testing.allocator, &json_reader, .{})); +} + +test "max_value_len" { + try testing.expectError(error.ValueTooLong, parseFromSlice([]u8, testing.allocator, "\"0123456789\"", .{ .max_value_len = 5 })); +} + +test "parse into vector" { + const T = struct { + vec_i32: @Vector(4, i32), + vec_f32: @Vector(2, f32), + }; + var s = + \\{ + \\ "vec_f32": [1.5, 2.5], + \\ "vec_i32": [4, 5, 6, 7] + \\} + ; + const r = try parseFromSlice(T, testing.allocator, s, .{}); + defer parseFree(T, testing.allocator, r); + try testing.expectApproxEqAbs(@as(f32, 1.5), r.vec_f32[0], 0.0000001); + try testing.expectApproxEqAbs(@as(f32, 2.5), r.vec_f32[1], 0.0000001); + try testing.expectEqual(@Vector(4, i32){ 4, 5, 6, 7 }, r.vec_i32); +} diff --git a/lib/std/json/stringify.zig b/lib/std/json/stringify.zig new file mode 100644 index 0000000000..6d10e95330 --- /dev/null +++ b/lib/std/json/stringify.zig @@ -0,0 +1,313 @@ +const std = @import("std"); +const mem = std.mem; +const assert = std.debug.assert; + +pub const StringifyOptions = struct { + pub const Whitespace = struct { + /// How many indentation levels deep are we? + indent_level: usize = 0, + + /// What character(s) should be used for indentation? + indent: union(enum) { + space: u8, + tab: void, + none: void, + } = .{ .space = 4 }, + + /// After a colon, should whitespace be inserted? + separator: bool = true, + + pub fn outputIndent( + whitespace: @This(), + out_stream: anytype, + ) @TypeOf(out_stream).Error!void { + var char: u8 = undefined; + var n_chars: usize = undefined; + switch (whitespace.indent) { + .space => |n_spaces| { + char = ' '; + n_chars = n_spaces; + }, + .tab => { + char = '\t'; + n_chars = 1; + }, + .none => return, + } + try out_stream.writeByte('\n'); + n_chars *= whitespace.indent_level; + try out_stream.writeByteNTimes(char, n_chars); + } + }; + + /// Controls the whitespace emitted + whitespace: Whitespace = .{ .indent = .none, .separator = false }, + + /// Should optional fields with null value be written? + emit_null_optional_fields: bool = true, + + string: StringOptions = StringOptions{ .String = .{} }, + + /// Should []u8 be serialised as a string? or an array? + pub const StringOptions = union(enum) { + Array, + String: StringOutputOptions, + + /// String output options + const StringOutputOptions = struct { + /// Should '/' be escaped in strings? + escape_solidus: bool = false, + + /// Should unicode characters be escaped in strings? + escape_unicode: bool = false, + }; + }; +}; + +fn outputUnicodeEscape( + codepoint: u21, + out_stream: anytype, +) !void { + if (codepoint <= 0xFFFF) { + // If the character is in the Basic Multilingual Plane (U+0000 through U+FFFF), + // then it may be represented as a six-character sequence: a reverse solidus, followed + // by the lowercase letter u, followed by four hexadecimal digits that encode the character's code point. + try out_stream.writeAll("\\u"); + try std.fmt.formatIntValue(codepoint, "x", std.fmt.FormatOptions{ .width = 4, .fill = '0' }, out_stream); + } else { + assert(codepoint <= 0x10FFFF); + // To escape an extended character that is not in the Basic Multilingual Plane, + // the character is represented as a 12-character sequence, encoding the UTF-16 surrogate pair. + const high = @intCast(u16, (codepoint - 0x10000) >> 10) + 0xD800; + const low = @intCast(u16, codepoint & 0x3FF) + 0xDC00; + try out_stream.writeAll("\\u"); + try std.fmt.formatIntValue(high, "x", std.fmt.FormatOptions{ .width = 4, .fill = '0' }, out_stream); + try out_stream.writeAll("\\u"); + try std.fmt.formatIntValue(low, "x", std.fmt.FormatOptions{ .width = 4, .fill = '0' }, out_stream); + } +} + +/// Write `string` to `writer` as a JSON encoded string. +pub fn encodeJsonString(string: []const u8, options: StringifyOptions, writer: anytype) !void { + try writer.writeByte('\"'); + try encodeJsonStringChars(string, options, writer); + try writer.writeByte('\"'); +} + +/// Write `chars` to `writer` as JSON encoded string characters. +pub fn encodeJsonStringChars(chars: []const u8, options: StringifyOptions, writer: anytype) !void { + var i: usize = 0; + while (i < chars.len) : (i += 1) { + switch (chars[i]) { + // normal ascii character + 0x20...0x21, 0x23...0x2E, 0x30...0x5B, 0x5D...0x7F => |c| try writer.writeByte(c), + // only 2 characters that *must* be escaped + '\\' => try writer.writeAll("\\\\"), + '\"' => try writer.writeAll("\\\""), + // solidus is optional to escape + '/' => { + if (options.string.String.escape_solidus) { + try writer.writeAll("\\/"); + } else { + try writer.writeByte('/'); + } + }, + // control characters with short escapes + // TODO: option to switch between unicode and 'short' forms? + 0x8 => try writer.writeAll("\\b"), + 0xC => try writer.writeAll("\\f"), + '\n' => try writer.writeAll("\\n"), + '\r' => try writer.writeAll("\\r"), + '\t' => try writer.writeAll("\\t"), + else => { + const ulen = std.unicode.utf8ByteSequenceLength(chars[i]) catch unreachable; + // control characters (only things left with 1 byte length) should always be printed as unicode escapes + if (ulen == 1 or options.string.String.escape_unicode) { + const codepoint = std.unicode.utf8Decode(chars[i..][0..ulen]) catch unreachable; + try outputUnicodeEscape(codepoint, writer); + } else { + try writer.writeAll(chars[i..][0..ulen]); + } + i += ulen - 1; + }, + } + } +} + +pub fn stringify( + value: anytype, + options: StringifyOptions, + out_stream: anytype, +) !void { + const T = @TypeOf(value); + switch (@typeInfo(T)) { + .Float, .ComptimeFloat => { + return std.fmt.formatFloatScientific(value, std.fmt.FormatOptions{}, out_stream); + }, + .Int, .ComptimeInt => { + return std.fmt.formatIntValue(value, "", std.fmt.FormatOptions{}, out_stream); + }, + .Bool => { + return out_stream.writeAll(if (value) "true" else "false"); + }, + .Null => { + return out_stream.writeAll("null"); + }, + .Optional => { + if (value) |payload| { + return try stringify(payload, options, out_stream); + } else { + return try stringify(null, options, out_stream); + } + }, + .Enum => { + if (comptime std.meta.trait.hasFn("jsonStringify")(T)) { + return value.jsonStringify(options, out_stream); + } + + @compileError("Unable to stringify enum '" ++ @typeName(T) ++ "'"); + }, + .Union => { + if (comptime std.meta.trait.hasFn("jsonStringify")(T)) { + return value.jsonStringify(options, out_stream); + } + + const info = @typeInfo(T).Union; + if (info.tag_type) |UnionTagType| { + try out_stream.writeByte('{'); + var child_options = options; + child_options.whitespace.indent_level += 1; + inline for (info.fields) |u_field| { + if (value == @field(UnionTagType, u_field.name)) { + try child_options.whitespace.outputIndent(out_stream); + try encodeJsonString(u_field.name, options, out_stream); + try out_stream.writeByte(':'); + if (child_options.whitespace.separator) { + try out_stream.writeByte(' '); + } + if (u_field.type == void) { + try out_stream.writeAll("{}"); + } else { + try stringify(@field(value, u_field.name), child_options, out_stream); + } + break; + } + } else { + unreachable; // No active tag? + } + try options.whitespace.outputIndent(out_stream); + try out_stream.writeByte('}'); + return; + } else { + @compileError("Unable to stringify untagged union '" ++ @typeName(T) ++ "'"); + } + }, + .Struct => |S| { + if (comptime std.meta.trait.hasFn("jsonStringify")(T)) { + return value.jsonStringify(options, out_stream); + } + + try out_stream.writeByte(if (S.is_tuple) '[' else '{'); + var field_output = false; + var child_options = options; + child_options.whitespace.indent_level += 1; + inline for (S.fields) |Field| { + // don't include void fields + if (Field.type == void) continue; + + var emit_field = true; + + // don't include optional fields that are null when emit_null_optional_fields is set to false + if (@typeInfo(Field.type) == .Optional) { + if (options.emit_null_optional_fields == false) { + if (@field(value, Field.name) == null) { + emit_field = false; + } + } + } + + if (emit_field) { + if (!field_output) { + field_output = true; + } else { + try out_stream.writeByte(','); + } + try child_options.whitespace.outputIndent(out_stream); + if (!S.is_tuple) { + try encodeJsonString(Field.name, options, out_stream); + try out_stream.writeByte(':'); + if (child_options.whitespace.separator) { + try out_stream.writeByte(' '); + } + } + try stringify(@field(value, Field.name), child_options, out_stream); + } + } + if (field_output) { + try options.whitespace.outputIndent(out_stream); + } + try out_stream.writeByte(if (S.is_tuple) ']' else '}'); + return; + }, + .ErrorSet => return stringify(@as([]const u8, @errorName(value)), options, out_stream), + .Pointer => |ptr_info| switch (ptr_info.size) { + .One => switch (@typeInfo(ptr_info.child)) { + .Array => { + const Slice = []const std.meta.Elem(ptr_info.child); + return stringify(@as(Slice, value), options, out_stream); + }, + else => { + // TODO: avoid loops? + return stringify(value.*, options, out_stream); + }, + }, + .Many, .Slice => { + if (ptr_info.size == .Many and ptr_info.sentinel == null) + @compileError("unable to stringify type '" ++ @typeName(T) ++ "' without sentinel"); + const slice = if (ptr_info.size == .Many) mem.span(value) else value; + + if (ptr_info.child == u8 and options.string == .String and std.unicode.utf8ValidateSlice(slice)) { + try encodeJsonString(slice, options, out_stream); + return; + } + + try out_stream.writeByte('['); + var child_options = options; + child_options.whitespace.indent_level += 1; + for (slice, 0..) |x, i| { + if (i != 0) { + try out_stream.writeByte(','); + } + try child_options.whitespace.outputIndent(out_stream); + try stringify(x, child_options, out_stream); + } + if (slice.len != 0) { + try options.whitespace.outputIndent(out_stream); + } + try out_stream.writeByte(']'); + return; + }, + else => @compileError("Unable to stringify type '" ++ @typeName(T) ++ "'"), + }, + .Array => return stringify(&value, options, out_stream), + .Vector => |info| { + const array: [info.len]info.child = value; + return stringify(&array, options, out_stream); + }, + else => @compileError("Unable to stringify type '" ++ @typeName(T) ++ "'"), + } + unreachable; +} + +// Same as `stringify` but accepts an Allocator and stores result in dynamically allocated memory instead of using a Writer. +// Caller owns returned memory. +pub fn stringifyAlloc(allocator: std.mem.Allocator, value: anytype, options: StringifyOptions) ![]const u8 { + var list = std.ArrayList(u8).init(allocator); + errdefer list.deinit(); + try stringify(value, options, list.writer()); + return list.toOwnedSlice(); +} + +test { + _ = @import("./stringify_test.zig"); +} diff --git a/lib/std/json/stringify_test.zig b/lib/std/json/stringify_test.zig new file mode 100644 index 0000000000..d9fab3c6e7 --- /dev/null +++ b/lib/std/json/stringify_test.zig @@ -0,0 +1,280 @@ +const std = @import("std"); +const mem = std.mem; +const testing = std.testing; + +const StringifyOptions = @import("stringify.zig").StringifyOptions; +const stringify = @import("stringify.zig").stringify; +const stringifyAlloc = @import("stringify.zig").stringifyAlloc; + +test "stringify null optional fields" { + const MyStruct = struct { + optional: ?[]const u8 = null, + required: []const u8 = "something", + another_optional: ?[]const u8 = null, + another_required: []const u8 = "something else", + }; + try teststringify( + \\{"optional":null,"required":"something","another_optional":null,"another_required":"something else"} + , + MyStruct{}, + StringifyOptions{}, + ); + try teststringify( + \\{"required":"something","another_required":"something else"} + , + MyStruct{}, + StringifyOptions{ .emit_null_optional_fields = false }, + ); +} + +test "stringify basic types" { + try teststringify("false", false, StringifyOptions{}); + try teststringify("true", true, StringifyOptions{}); + try teststringify("null", @as(?u8, null), StringifyOptions{}); + try teststringify("null", @as(?*u32, null), StringifyOptions{}); + try teststringify("42", 42, StringifyOptions{}); + try teststringify("4.2e+01", 42.0, StringifyOptions{}); + try teststringify("42", @as(u8, 42), StringifyOptions{}); + try teststringify("42", @as(u128, 42), StringifyOptions{}); + try teststringify("4.2e+01", @as(f32, 42), StringifyOptions{}); + try teststringify("4.2e+01", @as(f64, 42), StringifyOptions{}); + try teststringify("\"ItBroke\"", @as(anyerror, error.ItBroke), StringifyOptions{}); +} + +test "stringify string" { + try teststringify("\"hello\"", "hello", StringifyOptions{}); + try teststringify("\"with\\nescapes\\r\"", "with\nescapes\r", StringifyOptions{}); + try teststringify("\"with\\nescapes\\r\"", "with\nescapes\r", StringifyOptions{ .string = .{ .String = .{ .escape_unicode = true } } }); + try teststringify("\"with unicode\\u0001\"", "with unicode\u{1}", StringifyOptions{}); + try teststringify("\"with unicode\\u0001\"", "with unicode\u{1}", StringifyOptions{ .string = .{ .String = .{ .escape_unicode = true } } }); + try teststringify("\"with unicode\u{80}\"", "with unicode\u{80}", StringifyOptions{}); + try teststringify("\"with unicode\\u0080\"", "with unicode\u{80}", StringifyOptions{ .string = .{ .String = .{ .escape_unicode = true } } }); + try teststringify("\"with unicode\u{FF}\"", "with unicode\u{FF}", StringifyOptions{}); + try teststringify("\"with unicode\\u00ff\"", "with unicode\u{FF}", StringifyOptions{ .string = .{ .String = .{ .escape_unicode = true } } }); + try teststringify("\"with unicode\u{100}\"", "with unicode\u{100}", StringifyOptions{}); + try teststringify("\"with unicode\\u0100\"", "with unicode\u{100}", StringifyOptions{ .string = .{ .String = .{ .escape_unicode = true } } }); + try teststringify("\"with unicode\u{800}\"", "with unicode\u{800}", StringifyOptions{}); + try teststringify("\"with unicode\\u0800\"", "with unicode\u{800}", StringifyOptions{ .string = .{ .String = .{ .escape_unicode = true } } }); + try teststringify("\"with unicode\u{8000}\"", "with unicode\u{8000}", StringifyOptions{}); + try teststringify("\"with unicode\\u8000\"", "with unicode\u{8000}", StringifyOptions{ .string = .{ .String = .{ .escape_unicode = true } } }); + try teststringify("\"with unicode\u{D799}\"", "with unicode\u{D799}", StringifyOptions{}); + try teststringify("\"with unicode\\ud799\"", "with unicode\u{D799}", StringifyOptions{ .string = .{ .String = .{ .escape_unicode = true } } }); + try teststringify("\"with unicode\u{10000}\"", "with unicode\u{10000}", StringifyOptions{}); + try teststringify("\"with unicode\\ud800\\udc00\"", "with unicode\u{10000}", StringifyOptions{ .string = .{ .String = .{ .escape_unicode = true } } }); + try teststringify("\"with unicode\u{10FFFF}\"", "with unicode\u{10FFFF}", StringifyOptions{}); + try teststringify("\"with unicode\\udbff\\udfff\"", "with unicode\u{10FFFF}", StringifyOptions{ .string = .{ .String = .{ .escape_unicode = true } } }); + try teststringify("\"/\"", "/", StringifyOptions{}); + try teststringify("\"\\/\"", "/", StringifyOptions{ .string = .{ .String = .{ .escape_solidus = true } } }); +} + +test "stringify many-item sentinel-terminated string" { + try teststringify("\"hello\"", @as([*:0]const u8, "hello"), StringifyOptions{}); + try teststringify("\"with\\nescapes\\r\"", @as([*:0]const u8, "with\nescapes\r"), StringifyOptions{ .string = .{ .String = .{ .escape_unicode = true } } }); + try teststringify("\"with unicode\\u0001\"", @as([*:0]const u8, "with unicode\u{1}"), StringifyOptions{ .string = .{ .String = .{ .escape_unicode = true } } }); +} + +test "stringify tagged unions" { + const T = union(enum) { + nothing, + foo: u32, + bar: bool, + }; + try teststringify("{\"nothing\":{}}", T{ .nothing = {} }, StringifyOptions{}); + try teststringify("{\"foo\":42}", T{ .foo = 42 }, StringifyOptions{}); + try teststringify("{\"bar\":true}", T{ .bar = true }, StringifyOptions{}); +} + +test "stringify struct" { + try teststringify("{\"foo\":42}", struct { + foo: u32, + }{ .foo = 42 }, StringifyOptions{}); +} + +test "stringify struct with string as array" { + try teststringify("{\"foo\":\"bar\"}", .{ .foo = "bar" }, StringifyOptions{}); + try teststringify("{\"foo\":[98,97,114]}", .{ .foo = "bar" }, StringifyOptions{ .string = .Array }); +} + +test "stringify struct with indentation" { + try teststringify( + \\{ + \\ "foo": 42, + \\ "bar": [ + \\ 1, + \\ 2, + \\ 3 + \\ ] + \\} + , + struct { + foo: u32, + bar: [3]u32, + }{ + .foo = 42, + .bar = .{ 1, 2, 3 }, + }, + StringifyOptions{ + .whitespace = .{}, + }, + ); + try teststringify( + "{\n\t\"foo\":42,\n\t\"bar\":[\n\t\t1,\n\t\t2,\n\t\t3\n\t]\n}", + struct { + foo: u32, + bar: [3]u32, + }{ + .foo = 42, + .bar = .{ 1, 2, 3 }, + }, + StringifyOptions{ + .whitespace = .{ + .indent = .tab, + .separator = false, + }, + }, + ); + try teststringify( + \\{"foo":42,"bar":[1,2,3]} + , + struct { + foo: u32, + bar: [3]u32, + }{ + .foo = 42, + .bar = .{ 1, 2, 3 }, + }, + StringifyOptions{ + .whitespace = .{ + .indent = .none, + .separator = false, + }, + }, + ); +} + +test "stringify struct with void field" { + try teststringify("{\"foo\":42}", struct { + foo: u32, + bar: void = {}, + }{ .foo = 42 }, StringifyOptions{}); +} + +test "stringify array of structs" { + const MyStruct = struct { + foo: u32, + }; + try teststringify("[{\"foo\":42},{\"foo\":100},{\"foo\":1000}]", [_]MyStruct{ + MyStruct{ .foo = 42 }, + MyStruct{ .foo = 100 }, + MyStruct{ .foo = 1000 }, + }, StringifyOptions{}); +} + +test "stringify struct with custom stringifier" { + try teststringify("[\"something special\",42]", struct { + foo: u32, + const Self = @This(); + pub fn jsonStringify( + value: Self, + options: StringifyOptions, + out_stream: anytype, + ) !void { + _ = value; + try out_stream.writeAll("[\"something special\","); + try stringify(42, options, out_stream); + try out_stream.writeByte(']'); + } + }{ .foo = 42 }, StringifyOptions{}); +} + +test "stringify vector" { + try teststringify("[1,1]", @splat(2, @as(u32, 1)), StringifyOptions{}); +} + +test "stringify tuple" { + try teststringify("[\"foo\",42]", std.meta.Tuple(&.{ []const u8, usize }){ "foo", 42 }, StringifyOptions{}); +} + +fn teststringify(expected: []const u8, value: anytype, options: StringifyOptions) !void { + const ValidationWriter = struct { + const Self = @This(); + pub const Writer = std.io.Writer(*Self, Error, write); + pub const Error = error{ + TooMuchData, + DifferentData, + }; + + expected_remaining: []const u8, + + fn init(exp: []const u8) Self { + return .{ .expected_remaining = exp }; + } + + pub fn writer(self: *Self) Writer { + return .{ .context = self }; + } + + fn write(self: *Self, bytes: []const u8) Error!usize { + if (self.expected_remaining.len < bytes.len) { + std.debug.print( + \\====== expected this output: ========= + \\{s} + \\======== instead found this: ========= + \\{s} + \\====================================== + , .{ + self.expected_remaining, + bytes, + }); + return error.TooMuchData; + } + if (!mem.eql(u8, self.expected_remaining[0..bytes.len], bytes)) { + std.debug.print( + \\====== expected this output: ========= + \\{s} + \\======== instead found this: ========= + \\{s} + \\====================================== + , .{ + self.expected_remaining[0..bytes.len], + bytes, + }); + return error.DifferentData; + } + self.expected_remaining = self.expected_remaining[bytes.len..]; + return bytes.len; + } + }; + + var vos = ValidationWriter.init(expected); + try stringify(value, options, vos.writer()); + if (vos.expected_remaining.len > 0) return error.NotEnoughData; +} + +test "stringify struct with custom stringify that returns a custom error" { + var ret = stringify(struct { + field: Field = .{}, + + pub const Field = struct { + field: ?[]*Field = null, + + const Self = @This(); + pub fn jsonStringify(_: Self, _: StringifyOptions, _: anytype) error{CustomError}!void { + return error.CustomError; + } + }; + }{}, StringifyOptions{}, std.io.null_writer); + + try std.testing.expectError(error.CustomError, ret); +} + +test "stringify alloc" { + const allocator = std.testing.allocator; + const expected = + \\{"foo":"bar","answer":42,"my_friend":"sammy"} + ; + const actual = try stringifyAlloc(allocator, .{ .foo = "bar", .answer = 42, .my_friend = "sammy" }, .{}); + defer allocator.free(actual); + + try std.testing.expectEqualStrings(expected, actual); +} diff --git a/lib/std/json/test.zig b/lib/std/json/test.zig index 7b5e7e814d..7135bc1b34 100644 --- a/lib/std/json/test.zig +++ b/lib/std/json/test.zig @@ -1,71 +1,53 @@ -// RFC 8529 conformance tests. -// -// Tests are taken from https://github.com/nst/JSONTestSuite -// Read also http://seriot.ch/parsing_json.php for a good overview. - -const std = @import("../std.zig"); -const json = std.json; +const std = @import("std"); const testing = std.testing; -const TokenStream = std.json.TokenStream; -const parse = std.json.parse; -const ParseOptions = std.json.ParseOptions; -const parseFree = std.json.parseFree; -const Parser = std.json.Parser; -const mem = std.mem; -const writeStream = std.json.writeStream; -const Value = std.json.Value; -const StringifyOptions = std.json.StringifyOptions; -const stringify = std.json.stringify; -const stringifyAlloc = std.json.stringifyAlloc; -const StreamingParser = std.json.StreamingParser; -const Token = std.json.Token; -const validate = std.json.validate; -const Array = std.json.Array; -const ObjectMap = std.json.ObjectMap; -const assert = std.debug.assert; +const Parser = @import("./dynamic.zig").Parser; +const validate = @import("./scanner.zig").validate; +const JsonScanner = @import("./scanner.zig").Scanner; -fn testNonStreaming(s: []const u8) !void { - var p = json.Parser.init(testing.allocator, false); +// Support for JSONTestSuite.zig +pub fn ok(s: []const u8) !void { + try testLowLevelScanner(s); + try testHighLevelDynamicParser(s); +} +pub fn err(s: []const u8) !void { + try testing.expect(std.meta.isError(testLowLevelScanner(s))); + try testing.expect(std.meta.isError(testHighLevelDynamicParser(s))); +} +pub fn any(s: []const u8) !void { + testLowLevelScanner(s) catch {}; + testHighLevelDynamicParser(s) catch {}; +} +fn testLowLevelScanner(s: []const u8) !void { + var scanner = JsonScanner.initCompleteInput(testing.allocator, s); + defer scanner.deinit(); + while (true) { + const token = try scanner.next(); + if (token == .end_of_document) break; + } +} +fn testHighLevelDynamicParser(s: []const u8) !void { + var p = Parser.init(testing.allocator, .alloc_if_needed); defer p.deinit(); - var tree = try p.parse(s); defer tree.deinit(); } -fn ok(s: []const u8) !void { - try testing.expect(json.validate(s)); - - try testNonStreaming(s); +// Additional tests not part of test JSONTestSuite. +test "y_trailing_comma_after_empty" { + try roundTrip( + \\{"1":[],"2":{},"3":"4"} + ); } - -fn err(s: []const u8) !void { - try testing.expect(!json.validate(s)); - - try testing.expect(std.meta.isError(testNonStreaming(s))); -} - -fn utf8Error(s: []const u8) !void { - try testing.expect(!json.validate(s)); - - try testing.expectError(error.InvalidUtf8Byte, testNonStreaming(s)); -} - -fn any(s: []const u8) !void { - _ = json.validate(s); - - testNonStreaming(s) catch {}; -} - -fn anyStreamingErrNonStreaming(s: []const u8) !void { - _ = json.validate(s); - - try testing.expect(std.meta.isError(testNonStreaming(s))); +test "n_object_closed_missing_value" { + try err( + \\{"a":} + ); } fn roundTrip(s: []const u8) !void { - try testing.expect(json.validate(s)); + try testing.expect(try validate(testing.allocator, s)); - var p = json.Parser.init(testing.allocator, false); + var p = Parser.init(testing.allocator, .alloc_if_needed); defer p.deinit(); var tree = try p.parse(s); @@ -78,2883 +60,54 @@ fn roundTrip(s: []const u8) !void { try testing.expectEqualStrings(s, fbs.getWritten()); } -//////////////////////////////////////////////////////////////////////////////////////////////////// -// -// Additional tests not part of test JSONTestSuite. - -test "y_trailing_comma_after_empty" { - try roundTrip( - \\{"1":[],"2":{},"3":"4"} - ); -} - -test "n_object_closed_missing_value" { - try err( - \\{"a":} - ); -} - -//////////////////////////////////////////////////////////////////////////////////////////////////// - -test "y_array_arraysWithSpaces" { - try ok( - \\[[] ] - ); -} - -test "y_array_empty" { - try roundTrip( - \\[] - ); -} - -test "y_array_empty-string" { - try roundTrip( - \\[""] - ); -} - -test "y_array_ending_with_newline" { - try roundTrip( - \\["a"] - ); -} - -test "y_array_false" { - try roundTrip( - \\[false] - ); -} - -test "y_array_heterogeneous" { - try ok( - \\[null, 1, "1", {}] - ); -} - -test "y_array_null" { - try roundTrip( - \\[null] - ); -} - -test "y_array_with_1_and_newline" { - try ok( - \\[1 - \\] - ); -} - -test "y_array_with_leading_space" { - try ok( - \\ [1] - ); -} - -test "y_array_with_several_null" { - try roundTrip( - \\[1,null,null,null,2] - ); -} - -test "y_array_with_trailing_space" { - try ok("[2] "); -} - -test "y_number_0e+1" { - try ok( - \\[0e+1] - ); -} - -test "y_number_0e1" { - try ok( - \\[0e1] - ); -} - -test "y_number_after_space" { - try ok( - \\[ 4] - ); -} - -test "y_number_double_close_to_zero" { - try ok( - \\[-0.000000000000000000000000000000000000000000000000000000000000000000000000000001] - ); -} - -test "y_number_int_with_exp" { - try ok( - \\[20e1] - ); -} - -test "y_number" { - try ok( - \\[123e65] - ); -} - -test "y_number_minus_zero" { - try ok( - \\[-0] - ); -} - -test "y_number_negative_int" { - try roundTrip( - \\[-123] - ); -} - -test "y_number_negative_one" { - try roundTrip( - \\[-1] - ); -} - -test "y_number_negative_zero" { - try ok( - \\[-0] - ); -} - -test "y_number_real_capital_e" { - try ok( - \\[1E22] - ); -} - -test "y_number_real_capital_e_neg_exp" { - try ok( - \\[1E-2] - ); -} - -test "y_number_real_capital_e_pos_exp" { - try ok( - \\[1E+2] - ); -} - -test "y_number_real_exponent" { - try ok( - \\[123e45] - ); -} - -test "y_number_real_fraction_exponent" { - try ok( - \\[123.456e78] - ); -} - -test "y_number_real_neg_exp" { - try ok( - \\[1e-2] - ); -} - -test "y_number_real_pos_exponent" { - try ok( - \\[1e+2] - ); -} - -test "y_number_simple_int" { - try roundTrip( - \\[123] - ); -} - -test "y_number_simple_real" { - try ok( - \\[123.456789] - ); -} - -test "y_object_basic" { - try roundTrip( - \\{"asd":"sdf"} - ); -} - -test "y_object_duplicated_key_and_value" { - try ok( - \\{"a":"b","a":"b"} - ); -} - -test "y_object_duplicated_key" { - try ok( - \\{"a":"b","a":"c"} - ); -} - -test "y_object_empty" { - try roundTrip( - \\{} - ); -} - -test "y_object_empty_key" { - try roundTrip( - \\{"":0} - ); -} - -test "y_object_escaped_null_in_key" { - try ok( - \\{"foo\u0000bar": 42} - ); -} - -test "y_object_extreme_numbers" { - try ok( - \\{ "min": -1.0e+28, "max": 1.0e+28 } - ); -} - -test "y_object" { - try ok( - \\{"asd":"sdf", "dfg":"fgh"} - ); -} - -test "y_object_long_strings" { - try ok( - \\{"x":[{"id": "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"}], "id": "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"} - ); -} - -test "y_object_simple" { - try roundTrip( - \\{"a":[]} - ); -} - -test "y_object_string_unicode" { - try ok( - \\{"title":"\u041f\u043e\u043b\u0442\u043e\u0440\u0430 \u0417\u0435\u043c\u043b\u0435\u043a\u043e\u043f\u0430" } - ); -} - -test "y_object_with_newlines" { - try ok( - \\{ - \\"a": "b" - \\} - ); -} - -test "y_string_1_2_3_bytes_UTF-8_sequences" { - try ok( - \\["\u0060\u012a\u12AB"] - ); -} - -test "y_string_accepted_surrogate_pair" { - try ok( - \\["\uD801\udc37"] - ); -} - -test "y_string_accepted_surrogate_pairs" { - try ok( - \\["\ud83d\ude39\ud83d\udc8d"] - ); -} - -test "y_string_allowed_escapes" { - try ok( - \\["\"\\\/\b\f\n\r\t"] - ); -} - -test "y_string_backslash_and_u_escaped_zero" { - try ok( - \\["\\u0000"] - ); -} - -test "y_string_backslash_doublequotes" { - try roundTrip( - \\["\""] - ); -} - -test "y_string_comments" { - try ok( - \\["a/*b*/c/*d//e"] - ); -} - -test "y_string_double_escape_a" { - try ok( - \\["\\a"] - ); -} - -test "y_string_double_escape_n" { - try roundTrip( - \\["\\n"] - ); -} - -test "y_string_escaped_control_character" { - try ok( - \\["\u0012"] - ); -} - -test "y_string_escaped_noncharacter" { - try ok( - \\["\uFFFF"] - ); -} - -test "y_string_in_array" { - try ok( - \\["asd"] - ); -} - -test "y_string_in_array_with_leading_space" { - try ok( - \\[ "asd"] - ); -} - -test "y_string_last_surrogates_1_and_2" { - try ok( - \\["\uDBFF\uDFFF"] - ); -} - -test "y_string_nbsp_uescaped" { - try ok( - \\["new\u00A0line"] - ); -} - -test "y_string_nonCharacterInUTF-8_U+10FFFF" { - try ok( - \\["􏿿"] - ); -} - -test "y_string_nonCharacterInUTF-8_U+FFFF" { - try ok( - \\["￿"] - ); -} - -test "y_string_null_escape" { - try ok( - \\["\u0000"] - ); -} - -test "y_string_one-byte-utf-8" { - try ok( - \\["\u002c"] - ); -} - -test "y_string_pi" { - try ok( - \\["π"] - ); -} - -test "y_string_reservedCharacterInUTF-8_U+1BFFF" { - try ok( - \\["𛿿"] - ); -} - -test "y_string_simple_ascii" { - try ok( - \\["asd "] - ); -} - -test "y_string_space" { - try roundTrip( - \\" " - ); -} - -test "y_string_surrogates_U+1D11E_MUSICAL_SYMBOL_G_CLEF" { - try ok( - \\["\uD834\uDd1e"] - ); -} - -test "y_string_three-byte-utf-8" { - try ok( - \\["\u0821"] - ); -} - -test "y_string_two-byte-utf-8" { - try ok( - \\["\u0123"] - ); -} - -test "y_string_u+2028_line_sep" { - try ok("[\"\xe2\x80\xa8\"]"); -} - -test "y_string_u+2029_par_sep" { - try ok("[\"\xe2\x80\xa9\"]"); -} - -test "y_string_uescaped_newline" { - try ok( - \\["new\u000Aline"] - ); -} - -test "y_string_uEscape" { - try ok( - \\["\u0061\u30af\u30EA\u30b9"] - ); -} - -test "y_string_unescaped_char_delete" { - try ok("[\"\x7f\"]"); -} - -test "y_string_unicode_2" { - try ok( - \\["⍂㈴⍂"] - ); -} - -test "y_string_unicodeEscapedBackslash" { - try ok( - \\["\u005C"] - ); -} - -test "y_string_unicode_escaped_double_quote" { - try ok( - \\["\u0022"] - ); -} - -test "y_string_unicode" { - try ok( - \\["\uA66D"] - ); -} - -test "y_string_unicode_U+10FFFE_nonchar" { - try ok( - \\["\uDBFF\uDFFE"] - ); -} - -test "y_string_unicode_U+1FFFE_nonchar" { - try ok( - \\["\uD83F\uDFFE"] - ); -} - -test "y_string_unicode_U+200B_ZERO_WIDTH_SPACE" { - try ok( - \\["\u200B"] - ); -} - -test "y_string_unicode_U+2064_invisible_plus" { - try ok( - \\["\u2064"] - ); -} - -test "y_string_unicode_U+FDD0_nonchar" { - try ok( - \\["\uFDD0"] - ); -} - -test "y_string_unicode_U+FFFE_nonchar" { - try ok( - \\["\uFFFE"] - ); -} - -test "y_string_utf8" { - try ok( - \\["€𝄞"] - ); -} - -test "y_string_with_del_character" { - try ok("[\"a\x7fa\"]"); -} - -test "y_structure_lonely_false" { - try roundTrip( - \\false - ); -} - -test "y_structure_lonely_int" { - try roundTrip( - \\42 - ); -} - -test "y_structure_lonely_negative_real" { - try ok( - \\-0.1 - ); -} - -test "y_structure_lonely_null" { - try roundTrip( - \\null - ); -} - -test "y_structure_lonely_string" { - try roundTrip( - \\"asd" - ); -} - -test "y_structure_lonely_true" { - try roundTrip( - \\true - ); -} - -test "y_structure_string_empty" { - try roundTrip( - \\"" - ); -} - -test "y_structure_trailing_newline" { - try roundTrip( - \\["a"] - ); -} - -test "y_structure_true_in_array" { - try roundTrip( - \\[true] - ); -} - -test "y_structure_whitespace_array" { - try ok(" [] "); -} - -//////////////////////////////////////////////////////////////////////////////////////////////////// - -test "n_array_1_true_without_comma" { - try err( - \\[1 true] - ); -} - -test "n_array_a_invalid_utf8" { - try err( - \\[aå] - ); -} - -test "n_array_colon_instead_of_comma" { - try err( - \\["": 1] - ); -} - -test "n_array_comma_after_close" { - try err( - \\[""], - ); -} - -test "n_array_comma_and_number" { - try err( - \\[,1] - ); -} - -test "n_array_double_comma" { - try err( - \\[1,,2] - ); -} - -test "n_array_double_extra_comma" { - try err( - \\["x",,] - ); -} - -test "n_array_extra_close" { - try err( - \\["x"]] - ); -} - -test "n_array_extra_comma" { - try err( - \\["",] - ); -} - -test "n_array_incomplete_invalid_value" { - try err( - \\[x - ); -} - -test "n_array_incomplete" { - try err( - \\["x" - ); -} - -test "n_array_inner_array_no_comma" { - try err( - \\[3[4]] - ); -} - -test "n_array_invalid_utf8" { - try err( - \\[ÿ] - ); -} - -test "n_array_items_separated_by_semicolon" { - try err( - \\[1:2] - ); -} - -test "n_array_just_comma" { - try err( - \\[,] - ); -} - -test "n_array_just_minus" { - try err( - \\[-] - ); -} - -test "n_array_missing_value" { - try err( - \\[ , ""] - ); -} - -test "n_array_newlines_unclosed" { - try err( - \\["a", - \\4 - \\,1, - ); -} - -test "n_array_number_and_comma" { - try err( - \\[1,] - ); -} - -test "n_array_number_and_several_commas" { - try err( - \\[1,,] - ); -} - -test "n_array_spaces_vertical_tab_formfeed" { - try err("[\"\x0aa\"\\f]"); -} - -test "n_array_star_inside" { - try err( - \\[*] - ); -} - -test "n_array_unclosed" { - try err( - \\["" - ); -} - -test "n_array_unclosed_trailing_comma" { - try err( - \\[1, - ); -} - -test "n_array_unclosed_with_new_lines" { - try err( - \\[1, - \\1 - \\,1 - ); -} - -test "n_array_unclosed_with_object_inside" { - try err( - \\[{} - ); -} - -test "n_incomplete_false" { - try err( - \\[fals] - ); -} - -test "n_incomplete_null" { - try err( - \\[nul] - ); -} - -test "n_incomplete_true" { - try err( - \\[tru] - ); -} - -test "n_multidigit_number_then_00" { - try err("123\x00"); -} - -test "n_number_0.1.2" { - try err( - \\[0.1.2] - ); -} - -test "n_number_-01" { - try err( - \\[-01] - ); -} - -test "n_number_0.3e" { - try err( - \\[0.3e] - ); -} - -test "n_number_0.3e+" { - try err( - \\[0.3e+] - ); -} - -test "n_number_0_capital_E" { - try err( - \\[0E] - ); -} - -test "n_number_0_capital_E+" { - try err( - \\[0E+] - ); -} - -test "n_number_0.e1" { - try err( - \\[0.e1] - ); -} - -test "n_number_0e" { - try err( - \\[0e] - ); -} - -test "n_number_0e+" { - try err( - \\[0e+] - ); -} - -test "n_number_1_000" { - try err( - \\[1 000.0] - ); -} - -test "n_number_1.0e-" { - try err( - \\[1.0e-] - ); -} - -test "n_number_1.0e" { - try err( - \\[1.0e] - ); -} - -test "n_number_1.0e+" { - try err( - \\[1.0e+] - ); -} - -test "n_number_-1.0." { - try err( - \\[-1.0.] - ); -} - -test "n_number_1eE2" { - try err( - \\[1eE2] - ); -} - -test "n_number_.-1" { - try err( - \\[.-1] - ); -} - -test "n_number_+1" { - try err( - \\[+1] - ); -} - -test "n_number_.2e-3" { - try err( - \\[.2e-3] - ); -} - -test "n_number_2.e-3" { - try err( - \\[2.e-3] - ); -} - -test "n_number_2.e+3" { - try err( - \\[2.e+3] - ); -} - -test "n_number_2.e3" { - try err( - \\[2.e3] - ); -} - -test "n_number_-2." { - try err( - \\[-2.] - ); -} - -test "n_number_9.e+" { - try err( - \\[9.e+] - ); -} - -test "n_number_expression" { - try err( - \\[1+2] - ); -} - -test "n_number_hex_1_digit" { - try err( - \\[0x1] - ); -} - -test "n_number_hex_2_digits" { - try err( - \\[0x42] - ); -} - -test "n_number_infinity" { - try err( - \\[Infinity] - ); -} - -test "n_number_+Inf" { - try err( - \\[+Inf] - ); -} - -test "n_number_Inf" { - try err( - \\[Inf] - ); -} - -test "n_number_invalid+-" { - try err( - \\[0e+-1] - ); -} - -test "n_number_invalid-negative-real" { - try err( - \\[-123.123foo] - ); -} - -test "n_number_invalid-utf-8-in-bigger-int" { - try err( - \\[123å] - ); -} - -test "n_number_invalid-utf-8-in-exponent" { - try err( - \\[1e1å] - ); -} - -test "n_number_invalid-utf-8-in-int" { - try err( - \\[0å] - ); -} - -test "n_number_++" { - try err( - \\[++1234] - ); -} - -test "n_number_minus_infinity" { - try err( - \\[-Infinity] - ); -} - -test "n_number_minus_sign_with_trailing_garbage" { - try err( - \\[-foo] - ); -} - -test "n_number_minus_space_1" { - try err( - \\[- 1] - ); -} - -test "n_number_-NaN" { - try err( - \\[-NaN] - ); -} - -test "n_number_NaN" { - try err( - \\[NaN] - ); -} - -test "n_number_neg_int_starting_with_zero" { - try err( - \\[-012] - ); -} - -test "n_number_neg_real_without_int_part" { - try err( - \\[-.123] - ); -} - -test "n_number_neg_with_garbage_at_end" { - try err( - \\[-1x] - ); -} - -test "n_number_real_garbage_after_e" { - try err( - \\[1ea] - ); -} - -test "n_number_real_with_invalid_utf8_after_e" { - try err( - \\[1eå] - ); -} - -test "n_number_real_without_fractional_part" { - try err( - \\[1.] - ); -} - -test "n_number_starting_with_dot" { - try err( - \\[.123] - ); -} - -test "n_number_U+FF11_fullwidth_digit_one" { - try err( - \\[1] - ); -} - -test "n_number_with_alpha_char" { - try err( - \\[1.8011670033376514H-308] - ); -} - -test "n_number_with_alpha" { - try err( - \\[1.2a-3] - ); -} - -test "n_number_with_leading_zero" { - try err( - \\[012] - ); -} - -test "n_object_bad_value" { - try err( - \\["x", truth] - ); -} - -test "n_object_bracket_key" { - try err( - \\{[: "x"} - ); -} - -test "n_object_comma_instead_of_colon" { - try err( - \\{"x", null} - ); -} - -test "n_object_double_colon" { - try err( - \\{"x"::"b"} - ); -} - -test "n_object_emoji" { - try err( - \\{🇨🇭} - ); -} - -test "n_object_garbage_at_end" { - try err( - \\{"a":"a" 123} - ); -} - -test "n_object_key_with_single_quotes" { - try err( - \\{key: 'value'} - ); -} - -test "n_object_lone_continuation_byte_in_key_and_trailing_comma" { - try err( - \\{"¹":"0",} - ); -} - -test "n_object_missing_colon" { - try err( - \\{"a" b} - ); -} - -test "n_object_missing_key" { - try err( - \\{:"b"} - ); -} - -test "n_object_missing_semicolon" { - try err( - \\{"a" "b"} - ); -} - -test "n_object_missing_value" { - try err( - \\{"a": - ); -} - -test "n_object_no-colon" { - try err( - \\{"a" - ); -} - -test "n_object_non_string_key_but_huge_number_instead" { - try err( - \\{9999E9999:1} - ); -} - -test "n_object_non_string_key" { - try err( - \\{1:1} - ); -} - -test "n_object_repeated_null_null" { - try err( - \\{null:null,null:null} - ); -} - -test "n_object_several_trailing_commas" { - try err( - \\{"id":0,,,,,} - ); -} - -test "n_object_single_quote" { - try err( - \\{'a':0} - ); -} - -test "n_object_trailing_comma" { - try err( - \\{"id":0,} - ); -} - -test "n_object_trailing_comment" { - try err( - \\{"a":"b"}/**/ - ); -} - -test "n_object_trailing_comment_open" { - try err( - \\{"a":"b"}/**// - ); -} - -test "n_object_trailing_comment_slash_open_incomplete" { - try err( - \\{"a":"b"}/ - ); -} - -test "n_object_trailing_comment_slash_open" { - try err( - \\{"a":"b"}// - ); -} - -test "n_object_two_commas_in_a_row" { - try err( - \\{"a":"b",,"c":"d"} - ); -} - -test "n_object_unquoted_key" { - try err( - \\{a: "b"} - ); -} - -test "n_object_unterminated-value" { - try err( - \\{"a":"a - ); -} - -test "n_object_with_single_string" { - try err( - \\{ "foo" : "bar", "a" } - ); -} - -test "n_object_with_trailing_garbage" { - try err( - \\{"a":"b"}# - ); -} - -test "n_single_space" { - try err(" "); -} - -test "n_string_1_surrogate_then_escape" { - try err( - \\["\uD800\"] - ); -} - -test "n_string_1_surrogate_then_escape_u1" { - try err( - \\["\uD800\u1"] - ); -} - -test "n_string_1_surrogate_then_escape_u1x" { - try err( - \\["\uD800\u1x"] - ); -} - -test "n_string_1_surrogate_then_escape_u" { - try err( - \\["\uD800\u"] - ); -} - -test "n_string_accentuated_char_no_quotes" { - try err( - \\[é] - ); -} - -test "n_string_backslash_00" { - try err("[\"\x00\"]"); -} - -test "n_string_escaped_backslash_bad" { - try err( - \\["\\\"] - ); -} - -test "n_string_escaped_ctrl_char_tab" { - try err("\x5b\x22\x5c\x09\x22\x5d"); -} - -test "n_string_escaped_emoji" { - try err("[\"\x5c\xc3\xb0\xc2\x9f\xc2\x8c\xc2\x80\"]"); -} - -test "n_string_escape_x" { - try err( - \\["\x00"] - ); -} - -test "n_string_incomplete_escaped_character" { - try err( - \\["\u00A"] - ); -} - -test "n_string_incomplete_escape" { - try err( - \\["\"] - ); -} - -test "n_string_incomplete_surrogate_escape_invalid" { - try err( - \\["\uD800\uD800\x"] - ); -} - -test "n_string_incomplete_surrogate" { - try err( - \\["\uD834\uDd"] - ); -} - -test "n_string_invalid_backslash_esc" { - try err( - \\["\a"] - ); -} - -test "n_string_invalid_unicode_escape" { - try err( - \\["\uqqqq"] - ); -} - -test "n_string_invalid_utf8_after_escape" { - try err("[\"\\\x75\xc3\xa5\"]"); -} - -test "n_string_invalid-utf-8-in-escape" { - try err( - \\["\uå"] - ); -} - -test "n_string_leading_uescaped_thinspace" { - try err( - \\[\u0020"asd"] - ); -} - -test "n_string_no_quotes_with_bad_escape" { - try err( - \\[\n] - ); -} - -test "n_string_single_doublequote" { - try err( - \\" - ); -} - -test "n_string_single_quote" { - try err( - \\['single quote'] - ); -} - -test "n_string_single_string_no_double_quotes" { - try err( - \\abc - ); -} - -test "n_string_start_escape_unclosed" { - try err( - \\["\ - ); -} - -test "n_string_unescaped_crtl_char" { - try err("[\"a\x00a\"]"); -} - -test "n_string_unescaped_newline" { - try err( - \\["new - \\line"] - ); -} - -test "n_string_unescaped_tab" { - try err("[\"\t\"]"); -} - -test "n_string_unicode_CapitalU" { - try err( - \\"\UA66D" - ); -} - -test "n_string_with_trailing_garbage" { - try err( - \\""x - ); -} - -test "n_structure_100000_opening_arrays" { - try err("[" ** 100000); -} - -test "n_structure_angle_bracket_." { - try err( - \\<.> - ); -} - -test "n_structure_angle_bracket_null" { - try err( - \\[] - ); -} - -test "n_structure_array_trailing_garbage" { - try err( - \\[1]x - ); -} - -test "n_structure_array_with_extra_array_close" { - try err( - \\[1]] - ); -} - -test "n_structure_array_with_unclosed_string" { - try err( - \\["asd] - ); -} - -test "n_structure_ascii-unicode-identifier" { - try err( - \\aÃ¥ - ); -} - -test "n_structure_capitalized_True" { - try err( - \\[True] - ); -} - -test "n_structure_close_unopened_array" { - try err( - \\1] - ); -} - -test "n_structure_comma_instead_of_closing_brace" { - try err( - \\{"x": true, - ); -} - -test "n_structure_double_array" { - try err( - \\[][] - ); -} - -test "n_structure_end_array" { - try err( - \\] - ); -} - -test "n_structure_incomplete_UTF8_BOM" { - try err( - \\ï»{} - ); -} - -test "n_structure_lone-invalid-utf-8" { - try err( - \\å - ); -} - -test "n_structure_lone-open-bracket" { - try err( - \\[ - ); -} - -test "n_structure_no_data" { - try err( - \\ - ); -} - -test "n_structure_null-byte-outside-string" { - try err("[\x00]"); -} - -test "n_structure_number_with_trailing_garbage" { - try err( - \\2@ - ); -} - -test "n_structure_object_followed_by_closing_object" { - try err( - \\{}} - ); -} - -test "n_structure_object_unclosed_no_value" { - try err( - \\{"": - ); -} - -test "n_structure_object_with_comment" { - try err( - \\{"a":/*comment*/"b"} - ); -} - -test "n_structure_object_with_trailing_garbage" { - try err( - \\{"a": true} "x" - ); -} - -test "n_structure_open_array_apostrophe" { - try err( - \\[' - ); -} - -test "n_structure_open_array_comma" { - try err( - \\[, - ); -} - -test "n_structure_open_array_object" { - try err("[{\"\":" ** 50000); -} - -test "n_structure_open_array_open_object" { - try err( - \\[{ - ); -} - -test "n_structure_open_array_open_string" { - try err( - \\["a - ); -} - -test "n_structure_open_array_string" { - try err( - \\["a" - ); -} - -test "n_structure_open_object_close_array" { - try err( - \\{] - ); -} - -test "n_structure_open_object_comma" { - try err( - \\{, - ); -} - -test "n_structure_open_object" { - try err( - \\{ - ); -} - -test "n_structure_open_object_open_array" { - try err( - \\{[ - ); -} - -test "n_structure_open_object_open_string" { - try err( - \\{"a - ); -} - -test "n_structure_open_object_string_with_apostrophes" { - try err( - \\{'a' - ); -} - -test "n_structure_open_open" { - try err( - \\["\{["\{["\{["\{ - ); -} - -test "n_structure_single_eacute" { - try err( - \\é - ); -} - -test "n_structure_single_star" { - try err( - \\* - ); -} - -test "n_structure_trailing_#" { - try err( - \\{"a":"b"}#{} - ); -} - -test "n_structure_U+2060_word_joined" { - try err( - \\[⁠] - ); -} - -test "n_structure_uescaped_LF_before_string" { - try err( - \\[\u000A""] - ); -} - -test "n_structure_unclosed_array" { - try err( - \\[1 - ); -} - -test "n_structure_unclosed_array_partial_null" { - try err( - \\[ false, nul - ); -} - -test "n_structure_unclosed_array_unfinished_false" { - try err( - \\[ true, fals - ); -} - -test "n_structure_unclosed_array_unfinished_true" { - try err( - \\[ false, tru - ); -} - -test "n_structure_unclosed_object" { - try err( - \\{"asd":"asd" - ); -} - -test "n_structure_unicode-identifier" { - try err( - \\Ã¥ - ); -} - -test "n_structure_UTF8_BOM_no_data" { - try err( - \\ - ); -} - -test "n_structure_whitespace_formfeed" { - try err("[\x0c]"); -} - -test "n_structure_whitespace_U+2060_word_joiner" { - try err( - \\[⁠] - ); -} - -//////////////////////////////////////////////////////////////////////////////////////////////////// - -test "i_number_double_huge_neg_exp" { - try any( - \\[123.456e-789] - ); -} - -test "i_number_huge_exp" { - try any( - \\[0.4e00669999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999969999999006] - ); -} - -test "i_number_neg_int_huge_exp" { - try any( - \\[-1e+9999] - ); -} - -test "i_number_pos_double_huge_exp" { - try any( - \\[1.5e+9999] - ); -} - -test "i_number_real_neg_overflow" { - try any( - \\[-123123e100000] - ); -} - -test "i_number_real_pos_overflow" { - try any( - \\[123123e100000] - ); -} - -test "i_number_real_underflow" { - try any( - \\[123e-10000000] - ); -} - -test "i_number_too_big_neg_int" { - try any( - \\[-123123123123123123123123123123] - ); -} - -test "i_number_too_big_pos_int" { - try any( - \\[100000000000000000000] - ); -} - -test "i_number_very_big_negative_int" { - try any( - \\[-237462374673276894279832749832423479823246327846] - ); -} - -test "i_object_key_lone_2nd_surrogate" { - try anyStreamingErrNonStreaming( - \\{"\uDFAA":0} - ); -} - -test "i_string_1st_surrogate_but_2nd_missing" { - try anyStreamingErrNonStreaming( - \\["\uDADA"] - ); -} - -test "i_string_1st_valid_surrogate_2nd_invalid" { - try anyStreamingErrNonStreaming( - \\["\uD888\u1234"] - ); -} - -test "i_string_incomplete_surrogate_and_escape_valid" { - try anyStreamingErrNonStreaming( - \\["\uD800\n"] - ); -} - -test "i_string_incomplete_surrogate_pair" { - try anyStreamingErrNonStreaming( - \\["\uDd1ea"] - ); -} - -test "i_string_incomplete_surrogates_escape_valid" { - try anyStreamingErrNonStreaming( - \\["\uD800\uD800\n"] - ); -} - -test "i_string_invalid_lonely_surrogate" { - try anyStreamingErrNonStreaming( - \\["\ud800"] - ); -} - -test "i_string_invalid_surrogate" { - try anyStreamingErrNonStreaming( - \\["\ud800abc"] - ); -} - -test "i_string_invalid_utf-8" { - try any( - \\["ÿ"] - ); -} - -test "i_string_inverted_surrogates_U+1D11E" { - try anyStreamingErrNonStreaming( - \\["\uDd1e\uD834"] - ); -} - -test "i_string_iso_latin_1" { - try any( - \\["é"] - ); -} - -test "i_string_lone_second_surrogate" { - try anyStreamingErrNonStreaming( - \\["\uDFAA"] - ); -} - -test "i_string_lone_utf8_continuation_byte" { - try any( - \\[""] - ); -} - -test "i_string_not_in_unicode_range" { - try any( - \\["ô¿¿¿"] - ); -} - -test "i_string_overlong_sequence_2_bytes" { - try any( - \\["À¯"] - ); -} - -test "i_string_overlong_sequence_6_bytes" { - try any( - \\["üƒ¿¿¿¿"] - ); -} - -test "i_string_overlong_sequence_6_bytes_null" { - try any( - \\["ü€€€€€"] - ); -} - -test "i_string_truncated-utf-8" { - try any( - \\["àÿ"] - ); -} - -test "i_string_utf16BE_no_BOM" { - try any("\x00\x5b\x00\x22\x00\xc3\xa9\x00\x22\x00\x5d"); -} - -test "i_string_utf16LE_no_BOM" { - try any("\x5b\x00\x22\x00\xc3\xa9\x00\x22\x00\x5d\x00"); -} - -test "i_string_UTF-16LE_with_BOM" { - try any("\xc3\xbf\xc3\xbe\x5b\x00\x22\x00\xc3\xa9\x00\x22\x00\x5d\x00"); -} - -test "i_string_UTF-8_invalid_sequence" { - try any( - \\["日шú"] - ); -} - -test "i_string_UTF8_surrogate_U+D800" { - try any( - \\["í €"] - ); -} - -test "i_structure_500_nested_arrays" { - try any(("[" ** 500) ++ ("]" ** 500)); -} - -test "i_structure_UTF-8_BOM_empty_object" { - try any( - \\{} - ); -} - test "truncated UTF-8 sequence" { - try utf8Error("\"\xc2\""); - try utf8Error("\"\xdf\""); - try utf8Error("\"\xed\xa0\""); - try utf8Error("\"\xf0\x80\""); - try utf8Error("\"\xf0\x80\x80\""); + try err("\"\xc2\""); + try err("\"\xdf\""); + try err("\"\xed\xa0\""); + try err("\"\xf0\x80\""); + try err("\"\xf0\x80\x80\""); } test "invalid continuation byte" { - try utf8Error("\"\xc2\x00\""); - try utf8Error("\"\xc2\x7f\""); - try utf8Error("\"\xc2\xc0\""); - try utf8Error("\"\xc3\xc1\""); - try utf8Error("\"\xc4\xf5\""); - try utf8Error("\"\xc5\xff\""); - try utf8Error("\"\xe4\x80\x00\""); - try utf8Error("\"\xe5\x80\x10\""); - try utf8Error("\"\xe6\x80\xc0\""); - try utf8Error("\"\xe7\x80\xf5\""); - try utf8Error("\"\xe8\x00\x80\""); - try utf8Error("\"\xf2\x00\x80\x80\""); - try utf8Error("\"\xf0\x80\x00\x80\""); - try utf8Error("\"\xf1\x80\xc0\x80\""); - try utf8Error("\"\xf2\x80\x80\x00\""); - try utf8Error("\"\xf3\x80\x80\xc0\""); - try utf8Error("\"\xf4\x80\x80\xf5\""); + try err("\"\xc2\x00\""); + try err("\"\xc2\x7f\""); + try err("\"\xc2\xc0\""); + try err("\"\xc3\xc1\""); + try err("\"\xc4\xf5\""); + try err("\"\xc5\xff\""); + try err("\"\xe4\x80\x00\""); + try err("\"\xe5\x80\x10\""); + try err("\"\xe6\x80\xc0\""); + try err("\"\xe7\x80\xf5\""); + try err("\"\xe8\x00\x80\""); + try err("\"\xf2\x00\x80\x80\""); + try err("\"\xf0\x80\x00\x80\""); + try err("\"\xf1\x80\xc0\x80\""); + try err("\"\xf2\x80\x80\x00\""); + try err("\"\xf3\x80\x80\xc0\""); + try err("\"\xf4\x80\x80\xf5\""); } test "disallowed overlong form" { - try utf8Error("\"\xc0\x80\""); - try utf8Error("\"\xc0\x90\""); - try utf8Error("\"\xc1\x80\""); - try utf8Error("\"\xc1\x90\""); - try utf8Error("\"\xe0\x80\x80\""); - try utf8Error("\"\xf0\x80\x80\x80\""); + try err("\"\xc0\x80\""); + try err("\"\xc0\x90\""); + try err("\"\xc1\x80\""); + try err("\"\xc1\x90\""); + try err("\"\xe0\x80\x80\""); + try err("\"\xf0\x80\x80\x80\""); } test "out of UTF-16 range" { - try utf8Error("\"\xf4\x90\x80\x80\""); - try utf8Error("\"\xf5\x80\x80\x80\""); - try utf8Error("\"\xf6\x80\x80\x80\""); - try utf8Error("\"\xf7\x80\x80\x80\""); - try utf8Error("\"\xf8\x80\x80\x80\""); - try utf8Error("\"\xf9\x80\x80\x80\""); - try utf8Error("\"\xfa\x80\x80\x80\""); - try utf8Error("\"\xfb\x80\x80\x80\""); - try utf8Error("\"\xfc\x80\x80\x80\""); - try utf8Error("\"\xfd\x80\x80\x80\""); - try utf8Error("\"\xfe\x80\x80\x80\""); - try utf8Error("\"\xff\x80\x80\x80\""); -} - -test "parse" { - var ts = TokenStream.init("false"); - try testing.expectEqual(false, try parse(bool, &ts, ParseOptions{})); - ts = TokenStream.init("true"); - try testing.expectEqual(true, try parse(bool, &ts, ParseOptions{})); - ts = TokenStream.init("1"); - try testing.expectEqual(@as(u1, 1), try parse(u1, &ts, ParseOptions{})); - ts = TokenStream.init("50"); - try testing.expectError(error.Overflow, parse(u1, &ts, ParseOptions{})); - ts = TokenStream.init("42"); - try testing.expectEqual(@as(u64, 42), try parse(u64, &ts, ParseOptions{})); - ts = TokenStream.init("42.0"); - try testing.expectEqual(@as(f64, 42), try parse(f64, &ts, ParseOptions{})); - ts = TokenStream.init("null"); - try testing.expectEqual(@as(?bool, null), try parse(?bool, &ts, ParseOptions{})); - ts = TokenStream.init("true"); - try testing.expectEqual(@as(?bool, true), try parse(?bool, &ts, ParseOptions{})); - - ts = TokenStream.init("\"foo\""); - try testing.expectEqual(@as([3]u8, "foo".*), try parse([3]u8, &ts, ParseOptions{})); - ts = TokenStream.init("[102, 111, 111]"); - try testing.expectEqual(@as([3]u8, "foo".*), try parse([3]u8, &ts, ParseOptions{})); - ts = TokenStream.init("[]"); - try testing.expectEqual(@as([0]u8, undefined), try parse([0]u8, &ts, ParseOptions{})); - - ts = TokenStream.init("\"12345678901234567890\""); - try testing.expectEqual(@as(u64, 12345678901234567890), try parse(u64, &ts, ParseOptions{})); - ts = TokenStream.init("\"123.456\""); - try testing.expectEqual(@as(f64, 123.456), try parse(f64, &ts, ParseOptions{})); -} - -test "parse into enum" { - const T = enum(u32) { - Foo = 42, - Bar, - @"with\\escape", - }; - var ts = TokenStream.init("\"Foo\""); - try testing.expectEqual(@as(T, .Foo), try parse(T, &ts, ParseOptions{})); - ts = TokenStream.init("42"); - try testing.expectEqual(@as(T, .Foo), try parse(T, &ts, ParseOptions{})); - ts = TokenStream.init("\"with\\\\escape\""); - try testing.expectEqual(@as(T, .@"with\\escape"), try parse(T, &ts, ParseOptions{})); - ts = TokenStream.init("5"); - try testing.expectError(error.InvalidEnumTag, parse(T, &ts, ParseOptions{})); - ts = TokenStream.init("\"Qux\""); - try testing.expectError(error.InvalidEnumTag, parse(T, &ts, ParseOptions{})); -} - -test "parse with trailing data" { - var ts = TokenStream.init("falsed"); - try testing.expectEqual(false, try parse(bool, &ts, ParseOptions{ .allow_trailing_data = true })); - ts = TokenStream.init("falsed"); - try testing.expectError(error.InvalidTopLevelTrailing, parse(bool, &ts, ParseOptions{ .allow_trailing_data = false })); - // trailing whitespace is okay - ts = TokenStream.init("false \n"); - try testing.expectEqual(false, try parse(bool, &ts, ParseOptions{ .allow_trailing_data = false })); -} - -test "parse into that allocates a slice" { - var ts = TokenStream.init("\"foo\""); - try testing.expectError(error.AllocatorRequired, parse([]u8, &ts, ParseOptions{})); - - const options = ParseOptions{ .allocator = testing.allocator }; - { - ts = TokenStream.init("\"foo\""); - const r = try parse([]u8, &ts, options); - defer parseFree([]u8, r, options); - try testing.expectEqualSlices(u8, "foo", r); - } - { - ts = TokenStream.init("[102, 111, 111]"); - const r = try parse([]u8, &ts, options); - defer parseFree([]u8, r, options); - try testing.expectEqualSlices(u8, "foo", r); - } - { - ts = TokenStream.init("\"with\\\\escape\""); - const r = try parse([]u8, &ts, options); - defer parseFree([]u8, r, options); - try testing.expectEqualSlices(u8, "with\\escape", r); - } -} - -test "parse into tagged union" { - { - const T = union(enum) { - int: i32, - float: f64, - string: []const u8, - }; - var ts = TokenStream.init("1.5"); - try testing.expectEqual(T{ .float = 1.5 }, try parse(T, &ts, ParseOptions{})); - } - - { // failing allocations should be bubbled up instantly without trying next member - var fail_alloc = testing.FailingAllocator.init(testing.allocator, 0); - const options = ParseOptions{ .allocator = fail_alloc.allocator() }; - const T = union(enum) { - // both fields here match the input - string: []const u8, - array: [3]u8, - }; - var ts = TokenStream.init("[1,2,3]"); - try testing.expectError(error.OutOfMemory, parse(T, &ts, options)); - } - - { - // if multiple matches possible, takes first option - const T = union(enum) { - x: u8, - y: u8, - }; - var ts = TokenStream.init("42"); - try testing.expectEqual(T{ .x = 42 }, try parse(T, &ts, ParseOptions{})); - } - - { // needs to back out when first union member doesn't match - const T = union(enum) { - A: struct { x: u32 }, - B: struct { y: u32 }, - }; - var ts = TokenStream.init("{\"y\":42}"); - try testing.expectEqual(T{ .B = .{ .y = 42 } }, try parse(T, &ts, ParseOptions{})); - } -} - -test "parse union bubbles up AllocatorRequired" { - { // string member first in union (and not matching) - const T = union(enum) { - string: []const u8, - int: i32, - }; - var ts = TokenStream.init("42"); - try testing.expectError(error.AllocatorRequired, parse(T, &ts, ParseOptions{})); - } - - { // string member not first in union (and matching) - const T = union(enum) { - int: i32, - float: f64, - string: []const u8, - }; - var ts = TokenStream.init("\"foo\""); - try testing.expectError(error.AllocatorRequired, parse(T, &ts, ParseOptions{})); - } -} - -test "parseFree descends into tagged union" { - var fail_alloc = testing.FailingAllocator.init(testing.allocator, 1); - const options = ParseOptions{ .allocator = fail_alloc.allocator() }; - const T = union(enum) { - int: i32, - float: f64, - string: []const u8, - }; - // use a string with unicode escape so we know result can't be a reference to global constant - var ts = TokenStream.init("\"with\\u0105unicode\""); - const r = try parse(T, &ts, options); - try testing.expectEqual(std.meta.Tag(T).string, @as(std.meta.Tag(T), r)); - try testing.expectEqualSlices(u8, "withąunicode", r.string); - try testing.expectEqual(@as(usize, 0), fail_alloc.deallocations); - parseFree(T, r, options); - try testing.expectEqual(@as(usize, 1), fail_alloc.deallocations); -} - -test "parse with comptime field" { - { - const T = struct { - comptime a: i32 = 0, - b: bool, - }; - var ts = TokenStream.init( - \\{ - \\ "a": 0, - \\ "b": true - \\} - ); - try testing.expectEqual(T{ .a = 0, .b = true }, try parse(T, &ts, ParseOptions{})); - } - - { // string comptime values currently require an allocator - const T = union(enum) { - foo: struct { - comptime kind: []const u8 = "boolean", - b: bool, - }, - bar: struct { - comptime kind: []const u8 = "float", - b: f64, - }, - }; - - const options = ParseOptions{ - .allocator = std.testing.allocator, - }; - - var ts = TokenStream.init( - \\{ - \\ "kind": "float", - \\ "b": 1.0 - \\} - ); - const r = try parse(T, &ts, options); - - // check that parseFree doesn't try to free comptime fields - parseFree(T, r, options); - } -} - -test "parse into struct with no fields" { - const T = struct {}; - var ts = TokenStream.init("{}"); - try testing.expectEqual(T{}, try parse(T, &ts, ParseOptions{})); -} - -const test_const_value: usize = 123; - -test "parse into struct with default const pointer field" { - const T = struct { a: *const usize = &test_const_value }; - var ts = TokenStream.init("{}"); - try testing.expectEqual(T{}, try parse(T, &ts, .{})); -} - -const test_default_usize: usize = 123; -const test_default_usize_ptr: *align(1) const usize = &test_default_usize; -const test_default_str: []const u8 = "test str"; -const test_default_str_slice: [2][]const u8 = [_][]const u8{ - "test1", - "test2", -}; - -test "freeing parsed structs with pointers to default values" { - const T = struct { - int: *const usize = &test_default_usize, - int_ptr: *allowzero align(1) const usize = test_default_usize_ptr, - str: []const u8 = test_default_str, - str_slice: []const []const u8 = &test_default_str_slice, - }; - - var ts = json.TokenStream.init("{}"); - const options = .{ .allocator = std.heap.page_allocator }; - const parsed = try json.parse(T, &ts, options); - - try testing.expectEqual(T{}, parsed); - - json.parseFree(T, parsed, options); -} - -test "parse into struct where destination and source lengths mismatch" { - const T = struct { a: [2]u8 }; - var ts = TokenStream.init("{\"a\": \"bbb\"}"); - try testing.expectError(error.LengthMismatch, parse(T, &ts, ParseOptions{})); -} - -test "parse into struct with misc fields" { - @setEvalBranchQuota(10000); - const options = ParseOptions{ .allocator = testing.allocator }; - const T = struct { - int: i64, - float: f64, - @"with\\escape": bool, - @"withąunicode😂": bool, - language: []const u8, - optional: ?bool, - default_field: i32 = 42, - static_array: [3]f64, - dynamic_array: []f64, - - complex: struct { - nested: []const u8, - }, - - veryComplex: []struct { - foo: []const u8, - }, - - a_union: Union, - const Union = union(enum) { - x: u8, - float: f64, - string: []const u8, - }; - }; - var ts = TokenStream.init( - \\{ - \\ "int": 420, - \\ "float": 3.14, - \\ "with\\escape": true, - \\ "with\u0105unicode\ud83d\ude02": false, - \\ "language": "zig", - \\ "optional": null, - \\ "static_array": [66.6, 420.420, 69.69], - \\ "dynamic_array": [66.6, 420.420, 69.69], - \\ "complex": { - \\ "nested": "zig" - \\ }, - \\ "veryComplex": [ - \\ { - \\ "foo": "zig" - \\ }, { - \\ "foo": "rocks" - \\ } - \\ ], - \\ "a_union": 100000 - \\} - ); - const r = try parse(T, &ts, options); - defer parseFree(T, r, options); - try testing.expectEqual(@as(i64, 420), r.int); - try testing.expectEqual(@as(f64, 3.14), r.float); - try testing.expectEqual(true, r.@"with\\escape"); - try testing.expectEqual(false, r.@"withąunicode😂"); - try testing.expectEqualSlices(u8, "zig", r.language); - try testing.expectEqual(@as(?bool, null), r.optional); - try testing.expectEqual(@as(i32, 42), r.default_field); - try testing.expectEqual(@as(f64, 66.6), r.static_array[0]); - try testing.expectEqual(@as(f64, 420.420), r.static_array[1]); - try testing.expectEqual(@as(f64, 69.69), r.static_array[2]); - try testing.expectEqual(@as(usize, 3), r.dynamic_array.len); - try testing.expectEqual(@as(f64, 66.6), r.dynamic_array[0]); - try testing.expectEqual(@as(f64, 420.420), r.dynamic_array[1]); - try testing.expectEqual(@as(f64, 69.69), r.dynamic_array[2]); - try testing.expectEqualSlices(u8, r.complex.nested, "zig"); - try testing.expectEqualSlices(u8, "zig", r.veryComplex[0].foo); - try testing.expectEqualSlices(u8, "rocks", r.veryComplex[1].foo); - try testing.expectEqual(T.Union{ .float = 100000 }, r.a_union); -} - -test "parse into struct with strings and arrays with sentinels" { - @setEvalBranchQuota(10000); - const options = ParseOptions{ .allocator = testing.allocator }; - const T = struct { - language: [:0]const u8, - language_without_sentinel: []const u8, - data: [:99]const i32, - simple_data: []const i32, - }; - var ts = TokenStream.init( - \\{ - \\ "language": "zig", - \\ "language_without_sentinel": "zig again!", - \\ "data": [1, 2, 3], - \\ "simple_data": [4, 5, 6] - \\} - ); - const r = try parse(T, &ts, options); - defer parseFree(T, r, options); - - try testing.expectEqualSentinel(u8, 0, "zig", r.language); - - const data = [_:99]i32{ 1, 2, 3 }; - try testing.expectEqualSentinel(i32, 99, data[0..data.len], r.data); - - // Make sure that arrays who aren't supposed to have a sentinel still parse without one. - try testing.expectEqual(@as(?i32, null), std.meta.sentinel(@TypeOf(r.simple_data))); - try testing.expectEqual(@as(?u8, null), std.meta.sentinel(@TypeOf(r.language_without_sentinel))); -} - -test "parse into struct with duplicate field" { - // allow allocator to detect double frees by keeping bucket in use - const ballast = try testing.allocator.alloc(u64, 1); - defer testing.allocator.free(ballast); - - const options_first = ParseOptions{ .allocator = testing.allocator, .duplicate_field_behavior = .UseFirst }; - - const options_last = ParseOptions{ - .allocator = testing.allocator, - .duplicate_field_behavior = .UseLast, - }; - - const str = "{ \"a\": 1, \"a\": 0.25 }"; - - const T1 = struct { a: *u64 }; - // both .UseFirst and .UseLast should fail because second "a" value isn't a u64 - var ts = TokenStream.init(str); - try testing.expectError(error.InvalidNumber, parse(T1, &ts, options_first)); - ts = TokenStream.init(str); - try testing.expectError(error.InvalidNumber, parse(T1, &ts, options_last)); - - const T2 = struct { a: f64 }; - ts = TokenStream.init(str); - try testing.expectEqual(T2{ .a = 1.0 }, try parse(T2, &ts, options_first)); - ts = TokenStream.init(str); - try testing.expectEqual(T2{ .a = 0.25 }, try parse(T2, &ts, options_last)); - - const T3 = struct { comptime a: f64 = 1.0 }; - // .UseFirst should succeed because second "a" value is unconditionally ignored (even though != 1.0) - const t3 = T3{ .a = 1.0 }; - ts = TokenStream.init(str); - try testing.expectEqual(t3, try parse(T3, &ts, options_first)); - // .UseLast should fail because second "a" value is 0.25 which is not equal to default value of 1.0 - ts = TokenStream.init(str); - try testing.expectError(error.UnexpectedValue, parse(T3, &ts, options_last)); -} - -test "parse into struct ignoring unknown fields" { - const T = struct { - int: i64, - language: []const u8, - }; - - const ops = ParseOptions{ - .allocator = testing.allocator, - .ignore_unknown_fields = true, - }; - - var ts = TokenStream.init( - \\{ - \\ "int": 420, - \\ "float": 3.14, - \\ "with\\escape": true, - \\ "with\u0105unicode\ud83d\ude02": false, - \\ "optional": null, - \\ "static_array": [66.6, 420.420, 69.69], - \\ "dynamic_array": [66.6, 420.420, 69.69], - \\ "complex": { - \\ "nested": "zig" - \\ }, - \\ "veryComplex": [ - \\ { - \\ "foo": "zig" - \\ }, { - \\ "foo": "rocks" - \\ } - \\ ], - \\ "a_union": 100000, - \\ "language": "zig" - \\} - ); - const r = try parse(T, &ts, ops); - defer parseFree(T, r, ops); - - try testing.expectEqual(@as(i64, 420), r.int); - try testing.expectEqualSlices(u8, "zig", r.language); -} - -test "parse into tuple" { - const options = ParseOptions{ .allocator = testing.allocator }; - const Union = union(enum) { - char: u8, - float: f64, - string: []const u8, - }; - const T = std.meta.Tuple(&.{ - i64, - f64, - bool, - []const u8, - ?bool, - struct { - foo: i32, - bar: []const u8, - }, - std.meta.Tuple(&.{ u8, []const u8, u8 }), - Union, - }); - var ts = TokenStream.init( - \\[ - \\ 420, - \\ 3.14, - \\ true, - \\ "zig", - \\ null, - \\ { - \\ "foo": 1, - \\ "bar": "zero" - \\ }, - \\ [4, "två", 42], - \\ 12.34 - \\] - ); - const r = try parse(T, &ts, options); - defer parseFree(T, r, options); - try testing.expectEqual(@as(i64, 420), r[0]); - try testing.expectEqual(@as(f64, 3.14), r[1]); - try testing.expectEqual(true, r[2]); - try testing.expectEqualSlices(u8, "zig", r[3]); - try testing.expectEqual(@as(?bool, null), r[4]); - try testing.expectEqual(@as(i32, 1), r[5].foo); - try testing.expectEqualSlices(u8, "zero", r[5].bar); - try testing.expectEqual(@as(u8, 4), r[6][0]); - try testing.expectEqualSlices(u8, "två", r[6][1]); - try testing.expectEqual(@as(u8, 42), r[6][2]); - try testing.expectEqual(Union{ .float = 12.34 }, r[7]); -} - -const ParseIntoRecursiveUnionDefinitionValue = union(enum) { - integer: i64, - array: []const ParseIntoRecursiveUnionDefinitionValue, -}; - -test "parse into recursive union definition" { - const T = struct { - values: ParseIntoRecursiveUnionDefinitionValue, - }; - const ops = ParseOptions{ .allocator = testing.allocator }; - - var ts = TokenStream.init("{\"values\":[58]}"); - const r = try parse(T, &ts, ops); - defer parseFree(T, r, ops); - - try testing.expectEqual(@as(i64, 58), r.values.array[0].integer); -} - -const ParseIntoDoubleRecursiveUnionValueFirst = union(enum) { - integer: i64, - array: []const ParseIntoDoubleRecursiveUnionValueSecond, -}; - -const ParseIntoDoubleRecursiveUnionValueSecond = union(enum) { - boolean: bool, - array: []const ParseIntoDoubleRecursiveUnionValueFirst, -}; - -test "parse into double recursive union definition" { - const T = struct { - values: ParseIntoDoubleRecursiveUnionValueFirst, - }; - const ops = ParseOptions{ .allocator = testing.allocator }; - - var ts = TokenStream.init("{\"values\":[[58]]}"); - const r = try parse(T, &ts, ops); - defer parseFree(T, r, ops); - - try testing.expectEqual(@as(i64, 58), r.values.array[0].array[0].integer); -} - -test "parse into vector" { - const options = ParseOptions{ .allocator = testing.allocator }; - const T = struct { - vec_i32: @Vector(4, i32), - vec_f32: @Vector(2, f32), - }; - var ts = TokenStream.init( - \\{ - \\ "vec_f32": [1.5, 2.5], - \\ "vec_i32": [4, 5, 6, 7] - \\} - ); - const r = try parse(T, &ts, options); - defer parseFree(T, r, options); - try testing.expectApproxEqAbs(@as(f32, 1.5), r.vec_f32[0], 0.0000001); - try testing.expectApproxEqAbs(@as(f32, 2.5), r.vec_f32[1], 0.0000001); - try testing.expectEqual(@Vector(4, i32){ 4, 5, 6, 7 }, r.vec_i32); -} - -test "json.parser.dynamic" { - var p = Parser.init(testing.allocator, false); - defer p.deinit(); - - const s = - \\{ - \\ "Image": { - \\ "Width": 800, - \\ "Height": 600, - \\ "Title": "View from 15th Floor", - \\ "Thumbnail": { - \\ "Url": "http://www.example.com/image/481989943", - \\ "Height": 125, - \\ "Width": 100 - \\ }, - \\ "Animated" : false, - \\ "IDs": [116, 943, 234, 38793], - \\ "ArrayOfObject": [{"n": "m"}], - \\ "double": 1.3412, - \\ "LargeInt": 18446744073709551615 - \\ } - \\} - ; - - var tree = try p.parse(s); - defer tree.deinit(); - - var root = tree.root; - - var image = root.Object.get("Image").?; - - const width = image.Object.get("Width").?; - try testing.expect(width.Integer == 800); - - const height = image.Object.get("Height").?; - try testing.expect(height.Integer == 600); - - const title = image.Object.get("Title").?; - try testing.expect(mem.eql(u8, title.String, "View from 15th Floor")); - - const animated = image.Object.get("Animated").?; - try testing.expect(animated.Bool == false); - - const array_of_object = image.Object.get("ArrayOfObject").?; - try testing.expect(array_of_object.Array.items.len == 1); - - const obj0 = array_of_object.Array.items[0].Object.get("n").?; - try testing.expect(mem.eql(u8, obj0.String, "m")); - - const double = image.Object.get("double").?; - try testing.expect(double.Float == 1.3412); - - const large_int = image.Object.get("LargeInt").?; - try testing.expect(mem.eql(u8, large_int.NumberString, "18446744073709551615")); -} - -test "write json then parse it" { - var out_buffer: [1000]u8 = undefined; - - var fixed_buffer_stream = std.io.fixedBufferStream(&out_buffer); - const out_stream = fixed_buffer_stream.writer(); - var jw = writeStream(out_stream, 4); - - try jw.beginObject(); - - try jw.objectField("f"); - try jw.emitBool(false); - - try jw.objectField("t"); - try jw.emitBool(true); - - try jw.objectField("int"); - try jw.emitNumber(1234); - - try jw.objectField("array"); - try jw.beginArray(); - - try jw.arrayElem(); - try jw.emitNull(); - - try jw.arrayElem(); - try jw.emitNumber(12.34); - - try jw.endArray(); - - try jw.objectField("str"); - try jw.emitString("hello"); - - try jw.endObject(); - - var parser = Parser.init(testing.allocator, false); - defer parser.deinit(); - var tree = try parser.parse(fixed_buffer_stream.getWritten()); - defer tree.deinit(); - - try testing.expect(tree.root.Object.get("f").?.Bool == false); - try testing.expect(tree.root.Object.get("t").?.Bool == true); - try testing.expect(tree.root.Object.get("int").?.Integer == 1234); - try testing.expect(tree.root.Object.get("array").?.Array.items[0].Null == {}); - try testing.expect(tree.root.Object.get("array").?.Array.items[1].Float == 12.34); - try testing.expect(mem.eql(u8, tree.root.Object.get("str").?.String, "hello")); -} - -fn testParse(arena_allocator: std.mem.Allocator, json_str: []const u8) !Value { - var p = Parser.init(arena_allocator, false); - return (try p.parse(json_str)).root; -} - -test "parsing empty string gives appropriate error" { - var arena_allocator = std.heap.ArenaAllocator.init(std.testing.allocator); - defer arena_allocator.deinit(); - try testing.expectError(error.UnexpectedEndOfJson, testParse(arena_allocator.allocator(), "")); -} - -test "parse tree should not contain dangling pointers" { - var arena_allocator = std.heap.ArenaAllocator.init(std.testing.allocator); - defer arena_allocator.deinit(); - - var p = json.Parser.init(arena_allocator.allocator(), false); - defer p.deinit(); - - var tree = try p.parse("[]"); - defer tree.deinit(); - - // Allocation should succeed - var i: usize = 0; - while (i < 100) : (i += 1) { - try tree.root.Array.append(std.json.Value{ .Integer = 100 }); - } - try testing.expectEqual(tree.root.Array.items.len, 100); -} - -test "integer after float has proper type" { - var arena_allocator = std.heap.ArenaAllocator.init(std.testing.allocator); - defer arena_allocator.deinit(); - const parsed = try testParse(arena_allocator.allocator(), - \\{ - \\ "float": 3.14, - \\ "ints": [1, 2, 3] - \\} - ); - try std.testing.expect(parsed.Object.get("ints").?.Array.items[0] == .Integer); -} - -test "parse exponential into int" { - const T = struct { int: i64 }; - var ts = TokenStream.init("{ \"int\": 4.2e2 }"); - const r = try parse(T, &ts, ParseOptions{}); - try testing.expectEqual(@as(i64, 420), r.int); - ts = TokenStream.init("{ \"int\": 0.042e2 }"); - try testing.expectError(error.InvalidNumber, parse(T, &ts, ParseOptions{})); - ts = TokenStream.init("{ \"int\": 18446744073709551616.0 }"); - try testing.expectError(error.Overflow, parse(T, &ts, ParseOptions{})); -} - -test "escaped characters" { - var arena_allocator = std.heap.ArenaAllocator.init(std.testing.allocator); - defer arena_allocator.deinit(); - const input = - \\{ - \\ "backslash": "\\", - \\ "forwardslash": "\/", - \\ "newline": "\n", - \\ "carriagereturn": "\r", - \\ "tab": "\t", - \\ "formfeed": "\f", - \\ "backspace": "\b", - \\ "doublequote": "\"", - \\ "unicode": "\u0105", - \\ "surrogatepair": "\ud83d\ude02" - \\} - ; - - const obj = (try testParse(arena_allocator.allocator(), input)).Object; - - try testing.expectEqualSlices(u8, obj.get("backslash").?.String, "\\"); - try testing.expectEqualSlices(u8, obj.get("forwardslash").?.String, "/"); - try testing.expectEqualSlices(u8, obj.get("newline").?.String, "\n"); - try testing.expectEqualSlices(u8, obj.get("carriagereturn").?.String, "\r"); - try testing.expectEqualSlices(u8, obj.get("tab").?.String, "\t"); - try testing.expectEqualSlices(u8, obj.get("formfeed").?.String, "\x0C"); - try testing.expectEqualSlices(u8, obj.get("backspace").?.String, "\x08"); - try testing.expectEqualSlices(u8, obj.get("doublequote").?.String, "\""); - try testing.expectEqualSlices(u8, obj.get("unicode").?.String, "ą"); - try testing.expectEqualSlices(u8, obj.get("surrogatepair").?.String, "😂"); -} - -test "string copy option" { - const input = - \\{ - \\ "noescape": "aą😂", - \\ "simple": "\\\/\n\r\t\f\b\"", - \\ "unicode": "\u0105", - \\ "surrogatepair": "\ud83d\ude02" - \\} - ; - - var arena_allocator = std.heap.ArenaAllocator.init(std.testing.allocator); - defer arena_allocator.deinit(); - const allocator = arena_allocator.allocator(); - - var parser = Parser.init(allocator, false); - const tree_nocopy = try parser.parse(input); - const obj_nocopy = tree_nocopy.root.Object; - - parser = Parser.init(allocator, true); - const tree_copy = try parser.parse(input); - const obj_copy = tree_copy.root.Object; - - for ([_][]const u8{ "noescape", "simple", "unicode", "surrogatepair" }) |field_name| { - try testing.expectEqualSlices(u8, obj_nocopy.get(field_name).?.String, obj_copy.get(field_name).?.String); - } - - const nocopy_addr = &obj_nocopy.get("noescape").?.String[0]; - const copy_addr = &obj_copy.get("noescape").?.String[0]; - - var found_nocopy = false; - for (input, 0..) |_, index| { - try testing.expect(copy_addr != &input[index]); - if (nocopy_addr == &input[index]) { - found_nocopy = true; - } - } - try testing.expect(found_nocopy); -} - -test "stringify alloc" { - const allocator = std.testing.allocator; - const expected = - \\{"foo":"bar","answer":42,"my_friend":"sammy"} - ; - const actual = try stringifyAlloc(allocator, .{ .foo = "bar", .answer = 42, .my_friend = "sammy" }, .{}); - defer allocator.free(actual); - - try std.testing.expectEqualStrings(expected, actual); -} - -test "json.serialize issue #5959" { - var parser: StreamingParser = undefined; - // StreamingParser has multiple internal fields set to undefined. This causes issues when using - // expectEqual so these are zeroed. We are testing for equality here only because this is a - // known small test reproduction which hits the relevant LLVM issue. - @memset(@ptrCast([*]u8, &parser)[0..@sizeOf(StreamingParser)], 0); - try std.testing.expectEqual(parser, parser); -} - -fn checkNext(p: *TokenStream, id: std.meta.Tag(Token)) !void { - const token = (p.next() catch unreachable).?; - try testing.expect(std.meta.activeTag(token) == id); -} - -test "json.token" { - const s = - \\{ - \\ "Image": { - \\ "Width": 800, - \\ "Height": 600, - \\ "Title": "View from 15th Floor", - \\ "Thumbnail": { - \\ "Url": "http://www.example.com/image/481989943", - \\ "Height": 125, - \\ "Width": 100 - \\ }, - \\ "Animated" : false, - \\ "IDs": [116, 943, 234, 38793] - \\ } - \\} - ; - - var p = TokenStream.init(s); - - try checkNext(&p, .ObjectBegin); - try checkNext(&p, .String); // Image - try checkNext(&p, .ObjectBegin); - try checkNext(&p, .String); // Width - try checkNext(&p, .Number); - try checkNext(&p, .String); // Height - try checkNext(&p, .Number); - try checkNext(&p, .String); // Title - try checkNext(&p, .String); - try checkNext(&p, .String); // Thumbnail - try checkNext(&p, .ObjectBegin); - try checkNext(&p, .String); // Url - try checkNext(&p, .String); - try checkNext(&p, .String); // Height - try checkNext(&p, .Number); - try checkNext(&p, .String); // Width - try checkNext(&p, .Number); - try checkNext(&p, .ObjectEnd); - try checkNext(&p, .String); // Animated - try checkNext(&p, .False); - try checkNext(&p, .String); // IDs - try checkNext(&p, .ArrayBegin); - try checkNext(&p, .Number); - try checkNext(&p, .Number); - try checkNext(&p, .Number); - try checkNext(&p, .Number); - try checkNext(&p, .ArrayEnd); - try checkNext(&p, .ObjectEnd); - try checkNext(&p, .ObjectEnd); - - try testing.expect((try p.next()) == null); -} - -test "json.token mismatched close" { - var p = TokenStream.init("[102, 111, 111 }"); - try checkNext(&p, .ArrayBegin); - try checkNext(&p, .Number); - try checkNext(&p, .Number); - try checkNext(&p, .Number); - try testing.expectError(error.UnexpectedClosingBrace, p.next()); -} - -test "json.token premature object close" { - var p = TokenStream.init("{ \"key\": }"); - try checkNext(&p, .ObjectBegin); - try checkNext(&p, .String); - try testing.expectError(error.InvalidValueBegin, p.next()); -} - -test "json.validate" { - try testing.expectEqual(true, validate("{}")); - try testing.expectEqual(true, validate("[]")); - try testing.expectEqual(true, validate("[{[[[[{}]]]]}]")); - try testing.expectEqual(false, validate("{]")); - try testing.expectEqual(false, validate("[}")); - try testing.expectEqual(false, validate("{{{{[]}}}]")); -} - -test "Value.jsonStringify" { - { - var buffer: [10]u8 = undefined; - var fbs = std.io.fixedBufferStream(&buffer); - try @as(Value, .Null).jsonStringify(.{}, fbs.writer()); - try testing.expectEqualSlices(u8, fbs.getWritten(), "null"); - } - { - var buffer: [10]u8 = undefined; - var fbs = std.io.fixedBufferStream(&buffer); - try (Value{ .Bool = true }).jsonStringify(.{}, fbs.writer()); - try testing.expectEqualSlices(u8, fbs.getWritten(), "true"); - } - { - var buffer: [10]u8 = undefined; - var fbs = std.io.fixedBufferStream(&buffer); - try (Value{ .Integer = 42 }).jsonStringify(.{}, fbs.writer()); - try testing.expectEqualSlices(u8, fbs.getWritten(), "42"); - } - { - var buffer: [10]u8 = undefined; - var fbs = std.io.fixedBufferStream(&buffer); - try (Value{ .NumberString = "43" }).jsonStringify(.{}, fbs.writer()); - try testing.expectEqualSlices(u8, fbs.getWritten(), "43"); - } - { - var buffer: [10]u8 = undefined; - var fbs = std.io.fixedBufferStream(&buffer); - try (Value{ .Float = 42 }).jsonStringify(.{}, fbs.writer()); - try testing.expectEqualSlices(u8, fbs.getWritten(), "4.2e+01"); - } - { - var buffer: [10]u8 = undefined; - var fbs = std.io.fixedBufferStream(&buffer); - try (Value{ .String = "weeee" }).jsonStringify(.{}, fbs.writer()); - try testing.expectEqualSlices(u8, fbs.getWritten(), "\"weeee\""); - } - { - var buffer: [10]u8 = undefined; - var fbs = std.io.fixedBufferStream(&buffer); - var vals = [_]Value{ - .{ .Integer = 1 }, - .{ .Integer = 2 }, - .{ .NumberString = "3" }, - }; - try (Value{ - .Array = Array.fromOwnedSlice(undefined, &vals), - }).jsonStringify(.{}, fbs.writer()); - try testing.expectEqualSlices(u8, fbs.getWritten(), "[1,2,3]"); - } - { - var buffer: [10]u8 = undefined; - var fbs = std.io.fixedBufferStream(&buffer); - var obj = ObjectMap.init(testing.allocator); - defer obj.deinit(); - try obj.putNoClobber("a", .{ .String = "b" }); - try (Value{ .Object = obj }).jsonStringify(.{}, fbs.writer()); - try testing.expectEqualSlices(u8, fbs.getWritten(), "{\"a\":\"b\"}"); - } + try err("\"\xf4\x90\x80\x80\""); + try err("\"\xf5\x80\x80\x80\""); + try err("\"\xf6\x80\x80\x80\""); + try err("\"\xf7\x80\x80\x80\""); + try err("\"\xf8\x80\x80\x80\""); + try err("\"\xf9\x80\x80\x80\""); + try err("\"\xfa\x80\x80\x80\""); + try err("\"\xfb\x80\x80\x80\""); + try err("\"\xfc\x80\x80\x80\""); + try err("\"\xfd\x80\x80\x80\""); + try err("\"\xfe\x80\x80\x80\""); + try err("\"\xff\x80\x80\x80\""); } diff --git a/lib/std/json/write_stream.zig b/lib/std/json/write_stream.zig index 298f640856..760bad13fd 100644 --- a/lib/std/json/write_stream.zig +++ b/lib/std/json/write_stream.zig @@ -1,14 +1,19 @@ -const std = @import("../std.zig"); +const std = @import("std"); const assert = std.debug.assert; const maxInt = std.math.maxInt; +const StringifyOptions = @import("./stringify.zig").StringifyOptions; +const jsonStringify = @import("./stringify.zig").stringify; + +const Value = @import("./dynamic.zig").Value; + const State = enum { - Complete, - Value, - ArrayStart, - Array, - ObjectStart, - Object, + complete, + value, + array_start, + array, + object_start, + object, }; /// Writes JSON ([RFC8259](https://tools.ietf.org/html/rfc8259)) formatted data @@ -21,9 +26,9 @@ pub fn WriteStream(comptime OutStream: type, comptime max_depth: usize) type { pub const Stream = OutStream; - whitespace: std.json.StringifyOptions.Whitespace = std.json.StringifyOptions.Whitespace{ + whitespace: StringifyOptions.Whitespace = StringifyOptions.Whitespace{ .indent_level = 0, - .indent = .{ .Space = 1 }, + .indent = .{ .space = 1 }, }, stream: OutStream, @@ -36,38 +41,38 @@ pub fn WriteStream(comptime OutStream: type, comptime max_depth: usize) type { .state_index = 1, .state = undefined, }; - self.state[0] = .Complete; - self.state[1] = .Value; + self.state[0] = .complete; + self.state[1] = .value; return self; } pub fn beginArray(self: *Self) !void { - assert(self.state[self.state_index] == State.Value); // need to call arrayElem or objectField + assert(self.state[self.state_index] == State.value); // need to call arrayElem or objectField try self.stream.writeByte('['); - self.state[self.state_index] = State.ArrayStart; + self.state[self.state_index] = State.array_start; self.whitespace.indent_level += 1; } pub fn beginObject(self: *Self) !void { - assert(self.state[self.state_index] == State.Value); // need to call arrayElem or objectField + assert(self.state[self.state_index] == State.value); // need to call arrayElem or objectField try self.stream.writeByte('{'); - self.state[self.state_index] = State.ObjectStart; + self.state[self.state_index] = State.object_start; self.whitespace.indent_level += 1; } pub fn arrayElem(self: *Self) !void { const state = self.state[self.state_index]; switch (state) { - .Complete => unreachable, - .Value => unreachable, - .ObjectStart => unreachable, - .Object => unreachable, - .Array, .ArrayStart => { - if (state == .Array) { + .complete => unreachable, + .value => unreachable, + .object_start => unreachable, + .object => unreachable, + .array, .array_start => { + if (state == .array) { try self.stream.writeByte(','); } - self.state[self.state_index] = .Array; - self.pushState(.Value); + self.state[self.state_index] = .array; + self.pushState(.value); try self.indent(); }, } @@ -76,16 +81,16 @@ pub fn WriteStream(comptime OutStream: type, comptime max_depth: usize) type { pub fn objectField(self: *Self, name: []const u8) !void { const state = self.state[self.state_index]; switch (state) { - .Complete => unreachable, - .Value => unreachable, - .ArrayStart => unreachable, - .Array => unreachable, - .Object, .ObjectStart => { - if (state == .Object) { + .complete => unreachable, + .value => unreachable, + .array_start => unreachable, + .array => unreachable, + .object, .object_start => { + if (state == .object) { try self.stream.writeByte(','); } - self.state[self.state_index] = .Object; - self.pushState(.Value); + self.state[self.state_index] = .object; + self.pushState(.value); try self.indent(); try self.writeEscapedString(name); try self.stream.writeByte(':'); @@ -98,16 +103,16 @@ pub fn WriteStream(comptime OutStream: type, comptime max_depth: usize) type { pub fn endArray(self: *Self) !void { switch (self.state[self.state_index]) { - .Complete => unreachable, - .Value => unreachable, - .ObjectStart => unreachable, - .Object => unreachable, - .ArrayStart => { + .complete => unreachable, + .value => unreachable, + .object_start => unreachable, + .object => unreachable, + .array_start => { self.whitespace.indent_level -= 1; try self.stream.writeByte(']'); self.popState(); }, - .Array => { + .array => { self.whitespace.indent_level -= 1; try self.indent(); self.popState(); @@ -118,16 +123,16 @@ pub fn WriteStream(comptime OutStream: type, comptime max_depth: usize) type { pub fn endObject(self: *Self) !void { switch (self.state[self.state_index]) { - .Complete => unreachable, - .Value => unreachable, - .ArrayStart => unreachable, - .Array => unreachable, - .ObjectStart => { + .complete => unreachable, + .value => unreachable, + .array_start => unreachable, + .array => unreachable, + .object_start => { self.whitespace.indent_level -= 1; try self.stream.writeByte('}'); self.popState(); }, - .Object => { + .object => { self.whitespace.indent_level -= 1; try self.indent(); self.popState(); @@ -137,13 +142,13 @@ pub fn WriteStream(comptime OutStream: type, comptime max_depth: usize) type { } pub fn emitNull(self: *Self) !void { - assert(self.state[self.state_index] == State.Value); + assert(self.state[self.state_index] == State.value); try self.stringify(null); self.popState(); } pub fn emitBool(self: *Self, value: bool) !void { - assert(self.state[self.state_index] == State.Value); + assert(self.state[self.state_index] == State.value); try self.stringify(value); self.popState(); } @@ -154,7 +159,7 @@ pub fn WriteStream(comptime OutStream: type, comptime max_depth: usize) type { /// in a IEEE 754 double float, otherwise emitted as a string to the full precision. value: anytype, ) !void { - assert(self.state[self.state_index] == State.Value); + assert(self.state[self.state_index] == State.value); switch (@typeInfo(@TypeOf(value))) { .Int => |info| { if (info.bits < 53) { @@ -183,7 +188,7 @@ pub fn WriteStream(comptime OutStream: type, comptime max_depth: usize) type { } pub fn emitString(self: *Self, string: []const u8) !void { - assert(self.state[self.state_index] == State.Value); + assert(self.state[self.state_index] == State.value); try self.writeEscapedString(string); self.popState(); } @@ -194,9 +199,9 @@ pub fn WriteStream(comptime OutStream: type, comptime max_depth: usize) type { } /// Writes the complete json into the output stream - pub fn emitJson(self: *Self, json: std.json.Value) Stream.Error!void { - assert(self.state[self.state_index] == State.Value); - try self.stringify(json); + pub fn emitJson(self: *Self, value: Value) Stream.Error!void { + assert(self.state[self.state_index] == State.value); + try self.stringify(value); self.popState(); } @@ -215,7 +220,7 @@ pub fn WriteStream(comptime OutStream: type, comptime max_depth: usize) type { } fn stringify(self: *Self, value: anytype) !void { - try std.json.stringify(value, std.json.StringifyOptions{ + try jsonStringify(value, StringifyOptions{ .whitespace = self.whitespace, }, self.stream); } @@ -229,6 +234,8 @@ pub fn writeStream( return WriteStream(@TypeOf(out_stream), max_depth).init(out_stream); } +const ObjectMap = @import("./dynamic.zig").ObjectMap; + test "json write stream" { var out_buf: [1024]u8 = undefined; var slice_stream = std.io.fixedBufferStream(&out_buf); @@ -237,7 +244,7 @@ test "json write stream" { var arena_allocator = std.heap.ArenaAllocator.init(std.testing.allocator); defer arena_allocator.deinit(); - var w = std.json.writeStream(out, 10); + var w = writeStream(out, 10); try w.beginObject(); @@ -285,9 +292,9 @@ test "json write stream" { try std.testing.expect(std.mem.eql(u8, expected, result)); } -fn getJsonObject(allocator: std.mem.Allocator) !std.json.Value { - var value = std.json.Value{ .Object = std.json.ObjectMap.init(allocator) }; - try value.Object.put("one", std.json.Value{ .Integer = @intCast(i64, 1) }); - try value.Object.put("two", std.json.Value{ .Float = 2.0 }); +fn getJsonObject(allocator: std.mem.Allocator) !Value { + var value = Value{ .object = ObjectMap.init(allocator) }; + try value.object.put("one", Value{ .integer = @intCast(i64, 1) }); + try value.object.put("two", Value{ .float = 2.0 }); return value; } diff --git a/src/Autodoc.zig b/src/Autodoc.zig index 4bfba5fc2f..5e1c4c7822 100644 --- a/src/Autodoc.zig +++ b/src/Autodoc.zig @@ -295,7 +295,7 @@ pub fn generateZirData(self: *Autodoc) !void { try std.json.stringify( data, .{ - .whitespace = .{ .indent = .None, .separator = false }, + .whitespace = .{ .indent = .none, .separator = false }, .emit_null_optional_fields = true, }, out, @@ -444,7 +444,7 @@ const DocData = struct { w: anytype, ) !void { var jsw = std.json.writeStream(w, 15); - if (opts.whitespace) |ws| jsw.whitespace = ws; + jsw.whitespace = opts.whitespace; try jsw.beginObject(); inline for (comptime std.meta.tags(std.meta.FieldEnum(DocData))) |f| { const f_name = @tagName(f); @@ -495,7 +495,7 @@ const DocData = struct { w: anytype, ) !void { var jsw = std.json.writeStream(w, 15); - if (opts.whitespace) |ws| jsw.whitespace = ws; + jsw.whitespace = opts.whitespace; try jsw.beginObject(); inline for (comptime std.meta.tags(std.meta.FieldEnum(DocModule))) |f| { @@ -529,7 +529,7 @@ const DocData = struct { w: anytype, ) !void { var jsw = std.json.writeStream(w, 15); - if (opts.whitespace) |ws| jsw.whitespace = ws; + jsw.whitespace = opts.whitespace; try jsw.beginArray(); inline for (comptime std.meta.fields(Decl)) |f| { try jsw.arrayElem(); @@ -556,7 +556,7 @@ const DocData = struct { w: anytype, ) !void { var jsw = std.json.writeStream(w, 15); - if (opts.whitespace) |ws| jsw.whitespace = ws; + jsw.whitespace = opts.whitespace; try jsw.beginArray(); inline for (comptime std.meta.fields(AstNode)) |f| { try jsw.arrayElem(); @@ -689,7 +689,7 @@ const DocData = struct { ) !void { const active_tag = std.meta.activeTag(self); var jsw = std.json.writeStream(w, 15); - if (opts.whitespace) |ws| jsw.whitespace = ws; + jsw.whitespace = opts.whitespace; try jsw.beginArray(); try jsw.arrayElem(); try jsw.emitNumber(@enumToInt(active_tag)); @@ -831,7 +831,7 @@ const DocData = struct { ) @TypeOf(w).Error!void { const active_tag = std.meta.activeTag(self); var jsw = std.json.writeStream(w, 15); - if (opts.whitespace) |ws| jsw.whitespace = ws; + jsw.whitespace = opts.whitespace; try jsw.beginObject(); if (active_tag == .declIndex) { try jsw.objectField("declRef"); diff --git a/src/print_env.zig b/src/print_env.zig index 91b655dcef..58da854989 100644 --- a/src/print_env.zig +++ b/src/print_env.zig @@ -28,7 +28,7 @@ pub fn cmdEnv(gpa: Allocator, args: []const []const u8, stdout: std.fs.File.Writ var bw = std.io.bufferedWriter(stdout); const w = bw.writer(); - var jws = std.json.WriteStream(@TypeOf(w), 6).init(w); + var jws = std.json.writeStream(w, 6); try jws.beginObject(); try jws.objectField("zig_exe"); diff --git a/src/print_targets.zig b/src/print_targets.zig index a461ca6b85..ea4e30ae58 100644 --- a/src/print_targets.zig +++ b/src/print_targets.zig @@ -40,7 +40,7 @@ pub fn cmdTargets( var bw = io.bufferedWriter(stdout); const w = bw.writer(); - var jws = std.json.WriteStream(@TypeOf(w), 6).init(w); + var jws = std.json.writeStream(w, 6); try jws.beginObject(); diff --git a/tools/gen_spirv_spec.zig b/tools/gen_spirv_spec.zig index 5ed76448e4..28a5ed7ded 100644 --- a/tools/gen_spirv_spec.zig +++ b/tools/gen_spirv_spec.zig @@ -20,8 +20,7 @@ pub fn main() !void { // Required for json parsing. @setEvalBranchQuota(10000); - var tokens = std.json.TokenStream.init(spec); - var registry = try std.json.parse(g.Registry, &tokens, .{ .allocator = allocator }); + var registry = try std.json.parseFromSlice(g.Registry, allocator, spec, .{}); const core_reg = switch (registry) { .core => |core_reg| core_reg, diff --git a/tools/generate_JSONTestSuite.zig b/tools/generate_JSONTestSuite.zig new file mode 100644 index 0000000000..b8550959c7 --- /dev/null +++ b/tools/generate_JSONTestSuite.zig @@ -0,0 +1,79 @@ +// zig run this file inside the test_parsing/ directory of this repo: https://github.com/nst/JSONTestSuite + +const std = @import("std"); + +pub fn main() !void { + var gpa = std.heap.GeneralPurposeAllocator(.{}){}; + var allocator = gpa.allocator(); + + var output = std.io.getStdOut().writer(); + try output.writeAll( + \\// This file was generated by _generate_JSONTestSuite.zig + \\// These test cases are sourced from: https://github.com/nst/JSONTestSuite + \\const ok = @import("./test.zig").ok; + \\const err = @import("./test.zig").err; + \\const any = @import("./test.zig").any; + \\ + \\ + ); + + var names = std.ArrayList([]const u8).init(allocator); + var cwd = try std.fs.cwd().openIterableDir(".", .{}); + var it = cwd.iterate(); + while (try it.next()) |entry| { + try names.append(try allocator.dupe(u8, entry.name)); + } + std.sort.sort([]const u8, names.items, {}, (struct { + fn lessThan(_: void, a: []const u8, b: []const u8) bool { + return std.mem.lessThan(u8, a, b); + } + }).lessThan); + + for (names.items) |name| { + const contents = try std.fs.cwd().readFileAlloc(allocator, name, 250001); + try output.writeAll("test "); + try writeString(output, name); + try output.writeAll(" {\n try "); + switch (name[0]) { + 'y' => try output.writeAll("ok"), + 'n' => try output.writeAll("err"), + 'i' => try output.writeAll("any"), + else => unreachable, + } + try output.writeByte('('); + try writeString(output, contents); + try output.writeAll(");\n}\n"); + } +} + +const i_structure_500_nested_arrays = "[" ** 500 ++ "]" ** 500; +const n_structure_100000_opening_arrays = "[" ** 100000; +const n_structure_open_array_object = "[{\"\":" ** 50000 ++ "\n"; + +fn writeString(writer: anytype, s: []const u8) !void { + if (s.len > 200) { + // There are a few of these we can compress with Zig expressions. + if (std.mem.eql(u8, s, i_structure_500_nested_arrays)) { + return writer.writeAll("\"[\" ** 500 ++ \"]\" ** 500"); + } else if (std.mem.eql(u8, s, n_structure_100000_opening_arrays)) { + return writer.writeAll("\"[\" ** 100000"); + } else if (std.mem.eql(u8, s, n_structure_open_array_object)) { + return writer.writeAll("\"[{\\\"\\\":\" ** 50000 ++ \"\\n\""); + } + unreachable; + } + try writer.writeByte('"'); + for (s) |b| { + switch (b) { + 0...('\n' - 1), + ('\n' + 1)...0x1f, + 0x7f...0xff, + => try writer.print("\\x{x:0>2}", .{b}), + '\n' => try writer.writeAll("\\n"), + '"' => try writer.writeAll("\\\""), + '\\' => try writer.writeAll("\\\\"), + else => try writer.writeByte(b), + } + } + try writer.writeByte('"'); +} diff --git a/tools/update_clang_options.zig b/tools/update_clang_options.zig index 7639d08ce3..682ec7e152 100644 --- a/tools/update_clang_options.zig +++ b/tools/update_clang_options.zig @@ -624,9 +624,9 @@ pub fn main() anyerror!void { }, }; - var parser = json.Parser.init(allocator, false); + var parser = json.Parser.init(allocator, .alloc_if_needed); const tree = try parser.parse(json_text); - const root_map = &tree.root.Object; + const root_map = &tree.root.object; var all_objects = std.ArrayList(*json.ObjectMap).init(allocator); { @@ -634,14 +634,14 @@ pub fn main() anyerror!void { it_map: while (it.next()) |kv| { if (kv.key_ptr.len == 0) continue; if (kv.key_ptr.*[0] == '!') continue; - if (kv.value_ptr.* != .Object) continue; - if (!kv.value_ptr.Object.contains("NumArgs")) continue; - if (!kv.value_ptr.Object.contains("Name")) continue; + if (kv.value_ptr.* != .object) continue; + if (!kv.value_ptr.object.contains("NumArgs")) continue; + if (!kv.value_ptr.object.contains("Name")) continue; for (blacklisted_options) |blacklisted_key| { if (std.mem.eql(u8, blacklisted_key, kv.key_ptr.*)) continue :it_map; } - if (kv.value_ptr.Object.get("Name").?.String.len == 0) continue; - try all_objects.append(&kv.value_ptr.Object); + if (kv.value_ptr.object.get("Name").?.string.len == 0) continue; + try all_objects.append(&kv.value_ptr.object); } } // Some options have multiple matches. As an example, "-Wl,foo" matches both @@ -666,12 +666,12 @@ pub fn main() anyerror!void { ); for (all_objects.items) |obj| { - const name = obj.get("Name").?.String; + const name = obj.get("Name").?.string; var pd1 = false; var pd2 = false; var pslash = false; - for (obj.get("Prefixes").?.Array.items) |prefix_json| { - const prefix = prefix_json.String; + for (obj.get("Prefixes").?.array.items) |prefix_json| { + const prefix = prefix_json.string; if (std.mem.eql(u8, prefix, "-")) { pd1 = true; } else if (std.mem.eql(u8, prefix, "--")) { @@ -790,9 +790,9 @@ const Syntax = union(enum) { }; fn objSyntax(obj: *json.ObjectMap) ?Syntax { - const num_args = @intCast(u8, obj.get("NumArgs").?.Integer); - for (obj.get("!superclasses").?.Array.items) |superclass_json| { - const superclass = superclass_json.String; + const num_args = @intCast(u8, obj.get("NumArgs").?.integer); + for (obj.get("!superclasses").?.array.items) |superclass_json| { + const superclass = superclass_json.string; if (std.mem.eql(u8, superclass, "Joined")) { return .joined; } else if (std.mem.eql(u8, superclass, "CLJoined")) { @@ -831,20 +831,20 @@ fn objSyntax(obj: *json.ObjectMap) ?Syntax { return .{ .multi_arg = num_args }; } } - const name = obj.get("Name").?.String; + const name = obj.get("Name").?.string; if (std.mem.eql(u8, name, "")) { return .flag; } else if (std.mem.eql(u8, name, "")) { return .flag; } - const kind_def = obj.get("Kind").?.Object.get("def").?.String; + const kind_def = obj.get("Kind").?.object.get("def").?.string; if (std.mem.eql(u8, kind_def, "KIND_FLAG")) { return .flag; } - const key = obj.get("!name").?.String; + const key = obj.get("!name").?.string; std.debug.print("{s} (key {s}) has unrecognized superclasses:\n", .{ name, key }); - for (obj.get("!superclasses").?.Array.items) |superclass_json| { - std.debug.print(" {s}\n", .{superclass_json.String}); + for (obj.get("!superclasses").?.array.items) |superclass_json| { + std.debug.print(" {s}\n", .{superclass_json.string}); } //std.process.exit(1); return null; @@ -883,15 +883,15 @@ fn objectLessThan(context: void, a: *json.ObjectMap, b: *json.ObjectMap) bool { } if (!a_match_with_eql and !b_match_with_eql) { - const a_name = a.get("Name").?.String; - const b_name = b.get("Name").?.String; + const a_name = a.get("Name").?.string; + const b_name = b.get("Name").?.string; if (a_name.len != b_name.len) { return a_name.len > b_name.len; } } - const a_key = a.get("!name").?.String; - const b_key = b.get("!name").?.String; + const a_key = a.get("!name").?.string; + const b_key = b.get("!name").?.string; return std.mem.lessThan(u8, a_key, b_key); } diff --git a/tools/update_cpu_features.zig b/tools/update_cpu_features.zig index dd1b96fa7c..53bb365f41 100644 --- a/tools/update_cpu_features.zig +++ b/tools/update_cpu_features.zig @@ -1054,14 +1054,14 @@ fn processOneTarget(job: Job) anyerror!void { var json_parse_progress = progress_node.start("parse JSON", 0); json_parse_progress.activate(); - var parser = json.Parser.init(arena, false); + var parser = json.Parser.init(arena, .alloc_if_needed); const tree = try parser.parse(json_text); json_parse_progress.end(); var render_progress = progress_node.start("render zig code", 0); render_progress.activate(); - const root_map = &tree.root.Object; + const root_map = &tree.root.object; var features_table = std.StringHashMap(Feature).init(arena); var all_features = std.ArrayList(Feature).init(arena); var all_cpus = std.ArrayList(Cpu).init(arena); @@ -1070,21 +1070,21 @@ fn processOneTarget(job: Job) anyerror!void { root_it: while (it.next()) |kv| { if (kv.key_ptr.len == 0) continue; if (kv.key_ptr.*[0] == '!') continue; - if (kv.value_ptr.* != .Object) continue; - if (hasSuperclass(&kv.value_ptr.Object, "SubtargetFeature")) { - const llvm_name = kv.value_ptr.Object.get("Name").?.String; + if (kv.value_ptr.* != .object) continue; + if (hasSuperclass(&kv.value_ptr.object, "SubtargetFeature")) { + const llvm_name = kv.value_ptr.object.get("Name").?.string; if (llvm_name.len == 0) continue; var zig_name = try llvmNameToZigName(arena, llvm_name); - var desc = kv.value_ptr.Object.get("Desc").?.String; + var desc = kv.value_ptr.object.get("Desc").?.string; var deps = std.ArrayList([]const u8).init(arena); var omit = false; var flatten = false; - const implies = kv.value_ptr.Object.get("Implies").?.Array; + const implies = kv.value_ptr.object.get("Implies").?.array; for (implies.items) |imply| { - const other_key = imply.Object.get("def").?.String; - const other_obj = &root_map.getPtr(other_key).?.Object; - const other_llvm_name = other_obj.get("Name").?.String; + const other_key = imply.object.get("def").?.string; + const other_obj = &root_map.getPtr(other_key).?.object; + const other_llvm_name = other_obj.get("Name").?.string; const other_zig_name = (try llvmNameToZigNameOmit( arena, llvm_target, @@ -1126,17 +1126,17 @@ fn processOneTarget(job: Job) anyerror!void { try all_features.append(feature); } } - if (hasSuperclass(&kv.value_ptr.Object, "Processor")) { - const llvm_name = kv.value_ptr.Object.get("Name").?.String; + if (hasSuperclass(&kv.value_ptr.object, "Processor")) { + const llvm_name = kv.value_ptr.object.get("Name").?.string; if (llvm_name.len == 0) continue; var zig_name = try llvmNameToZigName(arena, llvm_name); var deps = std.ArrayList([]const u8).init(arena); - const features = kv.value_ptr.Object.get("Features").?.Array; + const features = kv.value_ptr.object.get("Features").?.array; for (features.items) |feature| { - const feature_key = feature.Object.get("def").?.String; - const feature_obj = &root_map.getPtr(feature_key).?.Object; - const feature_llvm_name = feature_obj.get("Name").?.String; + const feature_key = feature.object.get("def").?.string; + const feature_obj = &root_map.getPtr(feature_key).?.object; + const feature_llvm_name = feature_obj.get("Name").?.string; if (feature_llvm_name.len == 0) continue; const feature_zig_name = (try llvmNameToZigNameOmit( arena, @@ -1145,11 +1145,11 @@ fn processOneTarget(job: Job) anyerror!void { )) orelse continue; try deps.append(feature_zig_name); } - const tune_features = kv.value_ptr.Object.get("TuneFeatures").?.Array; + const tune_features = kv.value_ptr.object.get("TuneFeatures").?.array; for (tune_features.items) |feature| { - const feature_key = feature.Object.get("def").?.String; - const feature_obj = &root_map.getPtr(feature_key).?.Object; - const feature_llvm_name = feature_obj.get("Name").?.String; + const feature_key = feature.object.get("def").?.string; + const feature_obj = &root_map.getPtr(feature_key).?.object; + const feature_llvm_name = feature_obj.get("Name").?.string; if (feature_llvm_name.len == 0) continue; const feature_zig_name = (try llvmNameToZigNameOmit( arena, @@ -1431,8 +1431,8 @@ fn llvmNameToZigNameOmit( fn hasSuperclass(obj: *json.ObjectMap, class_name: []const u8) bool { const superclasses_json = obj.get("!superclasses") orelse return false; - for (superclasses_json.Array.items) |superclass_json| { - const superclass = superclass_json.String; + for (superclasses_json.array.items) |superclass_json| { + const superclass = superclass_json.string; if (std.mem.eql(u8, superclass, class_name)) { return true; } diff --git a/tools/update_spirv_features.zig b/tools/update_spirv_features.zig index bb859ed5b4..8d398f58de 100644 --- a/tools/update_spirv_features.zig +++ b/tools/update_spirv_features.zig @@ -74,8 +74,7 @@ pub fn main() !void { const registry_path = try fs.path.join(allocator, &.{ spirv_headers_root, "include", "spirv", "unified1", "spirv.core.grammar.json" }); const registry_json = try std.fs.cwd().readFileAlloc(allocator, registry_path, std.math.maxInt(usize)); - var tokens = std.json.TokenStream.init(registry_json); - const registry = try std.json.parse(g.CoreRegistry, &tokens, .{ .allocator = allocator }); + const registry = try std.json.parseFromSlice(g.CoreRegistry, allocator, registry_json, .{}); const capabilities = for (registry.operand_kinds) |opkind| { if (std.mem.eql(u8, opkind.kind, "Capability"))