mirror of
https://codeberg.org/ziglang/zig.git
synced 2025-12-06 05:44:20 +00:00
langref: add paragraph and examples about indexing non-ASCII strings
PR #10610 addressed most of the points from #1854. This additional paragraph and examples covers the OMISSIONS section clarifying issues about indexing into non-ASCII strings (whether valid UTF-8 or not). I think this finally closes #1854.
This commit is contained in:
parent
86ec26b1f0
commit
1fba88450d
1 changed files with 12 additions and 2 deletions
|
|
@ -871,6 +871,13 @@ pub fn main() void {
|
|||
However, it is possible to embed non-UTF-8 bytes into a string literal using <code>\xNN</code> notation.
|
||||
</p>
|
||||
<p>
|
||||
Indexing into a string containing non-ASCII bytes will return individual bytes, whether valid
|
||||
UTF-8 or not.
|
||||
The {#link|Zig Standard Library#} provides routines for checking the validity of UTF-8 encoded
|
||||
strings, accessing their code points and other encoding/decoding related tasks in
|
||||
{#syntax#}std.unicode{#endsyntax#}.
|
||||
</p>
|
||||
<p>
|
||||
Unicode code point literals have type {#syntax#}comptime_int{#endsyntax#}, the same as
|
||||
{#link|Integer Literals#}. All {#link|Escape Sequences#} are valid in both string literals
|
||||
and Unicode code point literals.
|
||||
|
|
@ -894,9 +901,12 @@ pub fn main() void {
|
|||
print("{}\n", .{'e' == '\x65'}); // true
|
||||
print("{d}\n", .{'\u{1f4a9}'}); // 128169
|
||||
print("{d}\n", .{'💯'}); // 128175
|
||||
print("{}\n", .{mem.eql(u8, "hello", "h\x65llo")}); // true
|
||||
print("0x{x}\n", .{"\xff"[0]}); // non-UTF-8 strings are possible with \xNN notation.
|
||||
print("{u}\n", .{'⚡'});
|
||||
print("{}\n", .{mem.eql(u8, "hello", "h\x65llo")}); // true
|
||||
print("{}\n", .{mem.eql(u8, "💯", "\xf0\x9f\x92\xaf")}); // also true
|
||||
const invalid_utf8 = "\xff\xfe"; // non-UTF-8 strings are possible with \xNN notation.
|
||||
print("0x{x}\n", .{invalid_utf8[1]}); // indexing them returns individual bytes...
|
||||
print("0x{x}\n", .{"💯"[1]}); // ...as does indexing part-way through non-ASCII characters
|
||||
}
|
||||
{#code_end#}
|
||||
{#see_also|Arrays|Source Encoding#}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue