mirror of
https://codeberg.org/ziglang/zig.git
synced 2025-12-06 13:54:21 +00:00
Merge a57a46d86f into e4be00f949
This commit is contained in:
commit
588f3f3155
2 changed files with 125 additions and 19 deletions
|
|
@ -64,6 +64,7 @@ pub const ValueOptions = struct {
|
|||
emit_codepoint_literals: EmitCodepointLiterals = .never,
|
||||
emit_strings_as_containers: bool = false,
|
||||
emit_default_optional_fields: bool = true,
|
||||
escape_non_ascii: bool = false,
|
||||
};
|
||||
|
||||
/// Determines when to emit Unicode code point literals as opposed to integer literals.
|
||||
|
|
@ -125,7 +126,7 @@ pub fn valueArbitraryDepth(self: *Serializer, val: anytype, options: ValueOption
|
|||
comptime assert(canSerializeType(@TypeOf(val)));
|
||||
switch (@typeInfo(@TypeOf(val))) {
|
||||
.int, .comptime_int => if (options.emit_codepoint_literals.emitAsCodepoint(val)) |c| {
|
||||
self.codePoint(c) catch |err| switch (err) {
|
||||
self.codePoint(c, .{ .escape_non_ascii = options.escape_non_ascii }) catch |err| switch (err) {
|
||||
error.InvalidCodepoint => unreachable, // Already validated
|
||||
else => |e| return e,
|
||||
};
|
||||
|
|
@ -146,7 +147,7 @@ pub fn valueArbitraryDepth(self: *Serializer, val: anytype, options: ValueOption
|
|||
(pointer.sentinel() == null or pointer.sentinel() == 0) and
|
||||
!options.emit_strings_as_containers)
|
||||
{
|
||||
return try self.string(val);
|
||||
return try self.string(val, .{ .escape_non_ascii = options.escape_non_ascii });
|
||||
}
|
||||
|
||||
// Serialize as either a tuple or as the child type
|
||||
|
|
@ -280,12 +281,21 @@ pub fn ident(self: *Serializer, name: []const u8) Error!void {
|
|||
}
|
||||
|
||||
pub const CodePointError = Error || error{InvalidCodepoint};
|
||||
/// Options for formatting code points.
|
||||
pub const CodePointOptions = struct {
|
||||
escape_non_ascii: bool = false,
|
||||
};
|
||||
|
||||
/// Serialize `val` as a Unicode codepoint.
|
||||
///
|
||||
/// Returns `error.InvalidCodepoint` if `val` is not a valid Unicode codepoint.
|
||||
pub fn codePoint(self: *Serializer, val: u21) CodePointError!void {
|
||||
try self.writer.print("'{f}'", .{std.zig.fmtChar(val)});
|
||||
pub fn codePoint(self: *Serializer, val: u21, options: CodePointOptions) CodePointError!void {
|
||||
try self.writer.writeByte('\'');
|
||||
try self.writeCodepoint(val, .{
|
||||
.escape_non_ascii = options.escape_non_ascii,
|
||||
.quote_style = .single,
|
||||
});
|
||||
try self.writer.writeByte('\'');
|
||||
}
|
||||
|
||||
/// Like `value`, but always serializes `val` as a tuple.
|
||||
|
|
@ -341,9 +351,101 @@ fn tupleImpl(self: *Serializer, val: anytype, options: ValueOptions) Error!void
|
|||
}
|
||||
}
|
||||
|
||||
/// Options for writing a Unicode codepoint.
|
||||
const WriteCodepointOptions = struct {
|
||||
escape_non_ascii: bool = false,
|
||||
/// If single quote style then single quotes are escaped, otherwise double quotes are escaped.
|
||||
quote_style: enum { single, double } = .single,
|
||||
};
|
||||
|
||||
/// Write a Unicode codepoint to the writer using the given options.
|
||||
///
|
||||
/// Returns `error.InvalidCodepoint` if `codepoint` is not a valid Unicode codepoint.
|
||||
fn writeCodepoint(self: *Serializer, val: u21, options: WriteCodepointOptions) CodePointError!void {
|
||||
switch (val) {
|
||||
// Printable ASCII
|
||||
' ', '!', '#'...'&', '('...'[', ']'...'~' => try self.writer.writeByte(@intCast(val)),
|
||||
// Unprintable ASCII
|
||||
0x00...0x08, 0x0B, 0x0C, 0x0E...0x1F, 0x7F => try self.writer.print("\\x{x:0>2}", .{val}),
|
||||
// ASCII with special escapes
|
||||
'\n' => try self.writer.writeAll("\\n"),
|
||||
'\r' => try self.writer.writeAll("\\r"),
|
||||
'\t' => try self.writer.writeAll("\\t"),
|
||||
'\\' => try self.writer.writeAll("\\\\"),
|
||||
// Quotes need escaping if they conflict with the in-use quote character
|
||||
'\'' => if (options.quote_style == .single) try self.writer.writeAll("\\'") else try self.writer.writeByte('\''),
|
||||
'\"' => if (options.quote_style == .double) try self.writer.writeAll("\\\"") else try self.writer.writeByte('"'),
|
||||
// Non-ASCII but still one byte
|
||||
0x80...0xFF => if (options.escape_non_ascii) {
|
||||
try self.writer.print("\\x{x:0>2}", .{val});
|
||||
} else {
|
||||
try self.writer.writeByte(@intCast(val));
|
||||
},
|
||||
|
||||
// Surrogates can only be written with an escape
|
||||
0xD800...0xDFFF => try self.writer.print("\\u{{{x}}}", .{val}),
|
||||
// Other valid codepoints
|
||||
0x100...0xD7FF, 0xE000...0x10FFFF => if (options.escape_non_ascii) {
|
||||
try self.writer.print("\\u{{{x}}}", .{val});
|
||||
} else {
|
||||
var buf: [7]u8 = undefined;
|
||||
const len = std.unicode.utf8Encode(val, &buf) catch unreachable;
|
||||
try self.writer.writeAll(buf[0..len]);
|
||||
},
|
||||
// Invalid codepoints
|
||||
0x110000...std.math.maxInt(u21) => return error.InvalidCodepoint,
|
||||
}
|
||||
}
|
||||
|
||||
pub const StringOptions = struct {
|
||||
escape_non_ascii: bool = false,
|
||||
};
|
||||
|
||||
/// Like `value`, but always serializes `val` as a string.
|
||||
pub fn string(self: *Serializer, val: []const u8) Error!void {
|
||||
try self.writer.print("\"{f}\"", .{std.zig.fmtString(val)});
|
||||
pub fn string(self: *Serializer, val: []const u8, options: StringOptions) Writer.Error!void {
|
||||
try self.writer.writeByte('"');
|
||||
// Batch write sequences of "raw" bytes (printable ASCII or non-escaped non-ASCII) for performance.
|
||||
// `val[start..i]` contains pending raw bytes to write.
|
||||
var start: usize = 0;
|
||||
var i: usize = 0;
|
||||
while (i < val.len) {
|
||||
const byte = val[i];
|
||||
// Check if this byte can be written as-is
|
||||
const is_raw = switch (byte) {
|
||||
' ', '!', '#'...'[', ']'...'~' => true,
|
||||
0x80...0xFF => !options.escape_non_ascii,
|
||||
else => false,
|
||||
};
|
||||
if (is_raw) {
|
||||
i += 1;
|
||||
continue;
|
||||
}
|
||||
// Flush pending raw bytes
|
||||
try self.writer.writeAll(val[start..i]);
|
||||
// Handle the special character
|
||||
if (byte >= 0x80) {
|
||||
// Decode UTF-8 sequence and write the codepoint
|
||||
const ulen = std.unicode.utf8ByteSequenceLength(byte) catch unreachable;
|
||||
const codepoint = std.unicode.utf8Decode(val[i..][0..ulen]) catch unreachable;
|
||||
// InvalidCodepoint cannot occur from valid UTF-8
|
||||
self.writeCodepoint(codepoint, .{
|
||||
.escape_non_ascii = options.escape_non_ascii,
|
||||
.quote_style = .double,
|
||||
}) catch unreachable;
|
||||
i += ulen;
|
||||
} else {
|
||||
// ASCII character that needs escaping
|
||||
self.writeCodepoint(byte, .{
|
||||
.escape_non_ascii = options.escape_non_ascii,
|
||||
.quote_style = .double,
|
||||
}) catch unreachable; // InvalidCodepoint cannot occur for valid ASCII values
|
||||
i += 1;
|
||||
}
|
||||
start = i;
|
||||
}
|
||||
|
||||
try self.writer.writeAll(val[start..]);
|
||||
try self.writer.writeByte('"');
|
||||
}
|
||||
|
||||
/// Options for formatting multiline strings.
|
||||
|
|
|
|||
|
|
@ -24,7 +24,7 @@
|
|||
const std = @import("std");
|
||||
const assert = std.debug.assert;
|
||||
const Writer = std.Io.Writer;
|
||||
const Serializer = std.zon.Serializer;
|
||||
const Serializer = @import("Serializer.zig");
|
||||
|
||||
pub const SerializeOptions = struct {
|
||||
/// If false, whitespace is omitted. Otherwise whitespace is emitted in standard Zig style.
|
||||
|
|
@ -37,6 +37,8 @@ pub const SerializeOptions = struct {
|
|||
/// If false, struct fields are not written if they are equal to their default value. Comparison
|
||||
/// is done by `std.meta.eql`.
|
||||
emit_default_optional_fields: bool = true,
|
||||
/// If true, non-ASCII unicode characters are escaped.
|
||||
escape_non_ascii: bool = false,
|
||||
};
|
||||
|
||||
/// Serialize the given value as ZON.
|
||||
|
|
@ -51,6 +53,7 @@ pub fn serialize(val: anytype, options: SerializeOptions, writer: *Writer) Write
|
|||
.emit_codepoint_literals = options.emit_codepoint_literals,
|
||||
.emit_strings_as_containers = options.emit_strings_as_containers,
|
||||
.emit_default_optional_fields = options.emit_default_optional_fields,
|
||||
.escape_non_ascii = options.escape_non_ascii,
|
||||
});
|
||||
}
|
||||
|
||||
|
|
@ -72,6 +75,7 @@ pub fn serializeMaxDepth(
|
|||
.emit_codepoint_literals = options.emit_codepoint_literals,
|
||||
.emit_strings_as_containers = options.emit_strings_as_containers,
|
||||
.emit_default_optional_fields = options.emit_default_optional_fields,
|
||||
.escape_non_ascii = options.escape_non_ascii,
|
||||
}, depth);
|
||||
}
|
||||
|
||||
|
|
@ -91,6 +95,7 @@ pub fn serializeArbitraryDepth(
|
|||
.emit_codepoint_literals = options.emit_codepoint_literals,
|
||||
.emit_strings_as_containers = options.emit_strings_as_containers,
|
||||
.emit_default_optional_fields = options.emit_default_optional_fields,
|
||||
.escape_non_ascii = options.escape_non_ascii,
|
||||
});
|
||||
}
|
||||
|
||||
|
|
@ -588,7 +593,7 @@ test "std.zon stringify utf8 codepoints" {
|
|||
try std.testing.expectEqualStrings("97", aw.written());
|
||||
aw.clearRetainingCapacity();
|
||||
|
||||
try s.codePoint('a');
|
||||
try s.codePoint('a', .{});
|
||||
try std.testing.expectEqualStrings("'a'", aw.written());
|
||||
aw.clearRetainingCapacity();
|
||||
|
||||
|
|
@ -609,7 +614,7 @@ test "std.zon stringify utf8 codepoints" {
|
|||
try std.testing.expectEqualStrings("10", aw.written());
|
||||
aw.clearRetainingCapacity();
|
||||
|
||||
try s.codePoint('\n');
|
||||
try s.codePoint('\n', .{});
|
||||
try std.testing.expectEqualStrings("'\\n'", aw.written());
|
||||
aw.clearRetainingCapacity();
|
||||
|
||||
|
|
@ -630,11 +635,11 @@ test "std.zon stringify utf8 codepoints" {
|
|||
try std.testing.expectEqualStrings("9889", aw.written());
|
||||
aw.clearRetainingCapacity();
|
||||
|
||||
try s.codePoint('⚡');
|
||||
try s.codePoint('⚡', .{ .escape_non_ascii = true });
|
||||
try std.testing.expectEqualStrings("'\\u{26a1}'", aw.written());
|
||||
aw.clearRetainingCapacity();
|
||||
|
||||
try s.value('⚡', .{ .emit_codepoint_literals = .always });
|
||||
try s.value('⚡', .{ .emit_codepoint_literals = .always, .escape_non_ascii = true });
|
||||
try std.testing.expectEqualStrings("'\\u{26a1}'", aw.written());
|
||||
aw.clearRetainingCapacity();
|
||||
|
||||
|
|
@ -647,8 +652,7 @@ test "std.zon stringify utf8 codepoints" {
|
|||
aw.clearRetainingCapacity();
|
||||
|
||||
// Invalid codepoint
|
||||
try s.codePoint(0x110000 + 1);
|
||||
try std.testing.expectEqualStrings("'\\u{110001}'", aw.written());
|
||||
try std.testing.expectError(error.InvalidCodepoint, s.codePoint(0x110000 + 1, .{ .escape_non_ascii = true }));
|
||||
aw.clearRetainingCapacity();
|
||||
|
||||
try s.int(0x110000 + 1);
|
||||
|
|
@ -681,7 +685,7 @@ test "std.zon stringify utf8 codepoints" {
|
|||
aw.clearRetainingCapacity();
|
||||
|
||||
// Make sure value options are passed to children
|
||||
try s.value(.{ .c = '⚡' }, .{ .emit_codepoint_literals = .always });
|
||||
try s.value(.{ .c = '⚡' }, .{ .emit_codepoint_literals = .always, .escape_non_ascii = true });
|
||||
try std.testing.expectEqualStrings(".{ .c = '\\u{26a1}' }", aw.written());
|
||||
aw.clearRetainingCapacity();
|
||||
|
||||
|
|
@ -696,8 +700,8 @@ test "std.zon stringify strings" {
|
|||
defer aw.deinit();
|
||||
|
||||
// Minimal case
|
||||
try s.string("abc⚡\n");
|
||||
try std.testing.expectEqualStrings("\"abc\\xe2\\x9a\\xa1\\n\"", aw.written());
|
||||
try s.string("abc⚡\n", .{ .escape_non_ascii = true });
|
||||
try std.testing.expectEqualStrings("\"abc\\u{26a1}\\n\"", aw.written());
|
||||
aw.clearRetainingCapacity();
|
||||
|
||||
try s.tuple("abc⚡\n", .{});
|
||||
|
|
@ -714,8 +718,8 @@ test "std.zon stringify strings" {
|
|||
, aw.written());
|
||||
aw.clearRetainingCapacity();
|
||||
|
||||
try s.value("abc⚡\n", .{});
|
||||
try std.testing.expectEqualStrings("\"abc\\xe2\\x9a\\xa1\\n\"", aw.written());
|
||||
try s.value("abc⚡\n", .{ .escape_non_ascii = false });
|
||||
try std.testing.expectEqualStrings("\"abc⚡\\n\"", aw.written());
|
||||
aw.clearRetainingCapacity();
|
||||
|
||||
try s.value("abc⚡\n", .{ .emit_strings_as_containers = true });
|
||||
|
|
@ -816,7 +820,7 @@ test "std.zon stringify multiline strings" {
|
|||
|
||||
{
|
||||
const str: []const u8 = &.{ 'a', '\r', 'c' };
|
||||
try s.string(str);
|
||||
try s.string(str, .{ .escape_non_ascii = false });
|
||||
try std.testing.expectEqualStrings("\"a\\rc\"", aw.written());
|
||||
aw.clearRetainingCapacity();
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue