From 61cb5143872ec2f3ae9c2942a03e18968bf27761 Mon Sep 17 00:00:00 2001 From: dweiller <4678790+dweiller@users.noreplay.github.com> Date: Sat, 21 Jan 2023 19:10:47 +1100 Subject: [PATCH] std.compress: add zstandard decompressor --- build.zig | 3 + lib/std/compress.zig | 2 + lib/std/compress/testdata/rfc8478.txt | 3027 ++++++++++++++++++ lib/std/compress/testdata/rfc8478.txt.zst.19 | Bin 0 -> 22211 bytes lib/std/compress/testdata/rfc8478.txt.zst.3 | Bin 0 -> 25639 bytes lib/std/compress/zstandard.zig | 22 + lib/std/compress/zstandard/decompress.zig | 1249 ++++++++ lib/std/compress/zstandard/types.zig | 394 +++ 8 files changed, 4697 insertions(+) create mode 100644 lib/std/compress/testdata/rfc8478.txt create mode 100644 lib/std/compress/testdata/rfc8478.txt.zst.19 create mode 100644 lib/std/compress/testdata/rfc8478.txt.zst.3 create mode 100644 lib/std/compress/zstandard.zig create mode 100644 lib/std/compress/zstandard/decompress.zig create mode 100644 lib/std/compress/zstandard/types.zig diff --git a/build.zig b/build.zig index faf14cc405..f75efeb8b4 100644 --- a/build.zig +++ b/build.zig @@ -113,8 +113,11 @@ pub fn build(b: *std.Build) !void { ".gz", ".z.0", ".z.9", + ".zstd.3", + ".zstd.19", "rfc1951.txt", "rfc1952.txt", + "rfc8478.txt", // exclude files from lib/std/compress/deflate/testdata ".expect", ".expect-noinput", diff --git a/lib/std/compress.zig b/lib/std/compress.zig index 9af1b30259..02e17474a1 100644 --- a/lib/std/compress.zig +++ b/lib/std/compress.zig @@ -6,6 +6,7 @@ pub const lzma = @import("compress/lzma.zig"); pub const lzma2 = @import("compress/lzma2.zig"); pub const xz = @import("compress/xz.zig"); pub const zlib = @import("compress/zlib.zig"); +pub const zstandard = @import("compress/zstandard.zig"); pub fn HashedReader( comptime ReaderType: anytype, @@ -44,4 +45,5 @@ test { _ = lzma2; _ = xz; _ = zlib; + _ = zstandard; } diff --git a/lib/std/compress/testdata/rfc8478.txt b/lib/std/compress/testdata/rfc8478.txt new file mode 100644 index 0000000000..e4ac22a302 --- /dev/null +++ b/lib/std/compress/testdata/rfc8478.txt @@ -0,0 +1,3027 @@ + + + + + + +Internet Engineering Task Force (IETF) Y. Collet +Request for Comments: 8478 M. Kucherawy, Ed. +Category: Informational Facebook +ISSN: 2070-1721 October 2018 + + + Zstandard Compression and the application/zstd Media Type + +Abstract + + Zstandard, or "zstd" (pronounced "zee standard"), is a data + compression mechanism. This document describes the mechanism and + registers a media type and content encoding to be used when + transporting zstd-compressed content via Multipurpose Internet Mail + Extensions (MIME). + + Despite use of the word "standard" as part of its name, readers are + advised that this document is not an Internet Standards Track + specification; it is being published for informational purposes only. + +Status of This Memo + + This document is not an Internet Standards Track specification; it is + published for informational purposes. + + This document is a product of the Internet Engineering Task Force + (IETF). It represents the consensus of the IETF community. It has + received public review and has been approved for publication by the + Internet Engineering Steering Group (IESG). Not all documents + approved by the IESG are candidates for any level of Internet + Standard; see Section 2 of RFC 7841. + + Information about the current status of this document, any errata, + and how to provide feedback on it may be obtained at + https://www.rfc-editor.org/info/rfc8478. + + + + + + + + + + + + + + + + +Collet & Kucherawy Informational [Page 1] + +RFC 8478 application/zstd October 2018 + + +Copyright Notice + + Copyright (c) 2018 IETF Trust and the persons identified as the + document authors. All rights reserved. + + This document is subject to BCP 78 and the IETF Trust's Legal + Provisions Relating to IETF Documents + (https://trustee.ietf.org/license-info) in effect on the date of + publication of this document. Please review these documents + carefully, as they describe your rights and restrictions with respect + to this document. Code Components extracted from this document must + include Simplified BSD License text as described in Section 4.e of + the Trust Legal Provisions and are provided without warranty as + described in the Simplified BSD License. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Collet & Kucherawy Informational [Page 2] + +RFC 8478 application/zstd October 2018 + + +Table of Contents + + 1. Introduction . . . . . . . . . . . . . . . . . . . . . . . . 4 + 2. Definitions . . . . . . . . . . . . . . . . . . . . . . . . . 4 + 3. Compression Algorithm . . . . . . . . . . . . . . . . . . . . 5 + 3.1. Frames . . . . . . . . . . . . . . . . . . . . . . . . . 6 + 3.1.1. Zstandard Frames . . . . . . . . . . . . . . . . . . 6 + 3.1.1.1. Frame Header . . . . . . . . . . . . . . . . . . 7 + 3.1.1.2. Blocks . . . . . . . . . . . . . . . . . . . . . 12 + 3.1.1.3. Compressed Blocks . . . . . . . . . . . . . . . . 14 + 3.1.1.4. Sequence Execution . . . . . . . . . . . . . . . 28 + 3.1.1.5. Repeat Offsets . . . . . . . . . . . . . . . . . 29 + 3.1.2. Skippable Frames . . . . . . . . . . . . . . . . . . 30 + 4. Entropy Encoding . . . . . . . . . . . . . . . . . . . . . . 30 + 4.1. FSE . . . . . . . . . . . . . . . . . . . . . . . . . . . 31 + 4.1.1. FSE Table Description . . . . . . . . . . . . . . . . 31 + 4.2. Huffman Coding . . . . . . . . . . . . . . . . . . . . . 34 + 4.2.1. Huffman Tree Description . . . . . . . . . . . . . . 35 + 4.2.1.1. Huffman Tree Header . . . . . . . . . . . . . . . 36 + 4.2.1.2. FSE Compression of Huffman Weights . . . . . . . 37 + 4.2.1.3. Conversion from Weights to Huffman Prefix Codes . 38 + 4.2.2. Huffman-Coded Streams . . . . . . . . . . . . . . . . 39 + 5. Dictionary Format . . . . . . . . . . . . . . . . . . . . . . 40 + 6. IANA Considerations . . . . . . . . . . . . . . . . . . . . . 42 + 6.1. The 'application/zstd' Media Type . . . . . . . . . . . . 42 + 6.2. Content Encoding . . . . . . . . . . . . . . . . . . . . 43 + 6.3. Dictionaries . . . . . . . . . . . . . . . . . . . . . . 43 + 7. Security Considerations . . . . . . . . . . . . . . . . . . . 43 + 8. Implementation Status . . . . . . . . . . . . . . . . . . . . 44 + 9. References . . . . . . . . . . . . . . . . . . . . . . . . . 45 + 9.1. Normative References . . . . . . . . . . . . . . . . . . 45 + 9.2. Informative References . . . . . . . . . . . . . . . . . 45 + Appendix A. Decoding Tables for Predefined Codes . . . . . . . . 46 + A.1. Literal Length Code Table . . . . . . . . . . . . . . . . 46 + A.2. Match Length Code Table . . . . . . . . . . . . . . . . . 49 + A.3. Offset Code Table . . . . . . . . . . . . . . . . . . . . 52 + Acknowledgments . . . . . . . . . . . . . . . . . . . . . . . . . 53 + Authors' Addresses . . . . . . . . . . . . . . . . . . . . . . . 54 + + + + + + + + + + + + + +Collet & Kucherawy Informational [Page 3] + +RFC 8478 application/zstd October 2018 + + +1. Introduction + + Zstandard, or "zstd" (pronounced "zee standard"), is a data + compression mechanism, akin to gzip [RFC1952]. + + Despite use of the word "standard" as part of its name, readers are + advised that this document is not an Internet Standards Track + specification; it is being published for informational purposes only. + + This document describes the Zstandard format. Also, to enable the + transport of a data object compressed with Zstandard, this document + registers a media type that can be used to identify such content when + it is used in a payload encoded using Multipurpose Internet Mail + Extensions (MIME). + +2. Definitions + + Some terms used elsewhere in this document are defined here for + clarity. + + uncompressed: Describes an arbitrary set of bytes in their original + form, prior to being subjected to compression. + + compress, compression: The act of processing a set of bytes via the + compression mechanism described here. + + compressed: Describes the result of passing a set of bytes through + this mechanism. The original input has thus been compressed. + + decompress, decompression: The act of processing a set of bytes + through the inverse of the compression mechanism described here, + in an attempt to recover the original set of bytes prior to + compression. + + decompressed: Describes the result of passing a set of bytes through + the reverse of this mechanism. When this is successful, the + decompressed payload and the uncompressed payload are + indistinguishable. + + encode: The process of translating data from one form to another; + this may include compression or it may refer to other translations + done as part of this specification. + + decode: The reverse of "encode"; describes a process of reversing a + prior encoding to recover the original content. + + + + + + +Collet & Kucherawy Informational [Page 4] + +RFC 8478 application/zstd October 2018 + + + frame: Content compressed by Zstandard is transformed into a + Zstandard frame. Multiple frames can be appended into a single + file or stream. A frame is completely independent, has a defined + beginning and end, and has a set of parameters that tells the + decoder how to decompress it. + + block: A frame encapsulates one or multiple blocks. Each block + contains arbitrary content, which is described by its header, and + has a guaranteed maximum content size that depends upon frame + parameters. Unlike frames, each block depends on previous blocks + for proper decoding. However, each block can be decompressed + without waiting for its successor, allowing streaming operations. + + natural order: A sequence or ordering of objects or values that is + typical of that type of object or value. A set of unique + integers, for example, is in "natural order" if when progressing + from one element in the set or sequence to the next, there is + never a decrease in value. + + The naming convention for identifiers within the specification is + Mixed_Case_With_Underscores. Identifiers inside square brackets + indicate that the identifier is optional in the presented context. + +3. Compression Algorithm + + This section describes the Zstandard algorithm. + + The purpose of this document is to define a lossless compressed data + format that is a) independent of the CPU type, operating system, file + system, and character set and b) is suitable for file compression and + pipe and streaming compression, using the Zstandard algorithm. The + text of the specification assumes a basic background in programming + at the level of bits and other primitive data representations. + + The data can be produced or consumed, even for an arbitrarily long + sequentially presented input data stream, using only an a priori + bounded amount of intermediate storage, and hence can be used in data + communications. The format uses the Zstandard compression method, + and an optional xxHash-64 checksum method [XXHASH], for detection of + data corruption. + + The data format defined by this specification does not attempt to + allow random access to compressed data. + + Unless otherwise indicated below, a compliant compressor must produce + data sets that conform to the specifications presented here. + However, it does not need to support all options. + + + + +Collet & Kucherawy Informational [Page 5] + +RFC 8478 application/zstd October 2018 + + + A compliant decompressor must be able to decompress at least one + working set of parameters that conforms to the specifications + presented here. It may also ignore informative fields, such as the + checksum. Whenever it does not support a parameter defined in the + compressed stream, it must produce a non-ambiguous error code and + associated error message explaining which parameter is unsupported. + + This specification is intended for use by implementers of software to + compress data into Zstandard format and/or decompress data from + Zstandard format. The Zstandard format is supported by an open + source reference implementation, written in portable C, and available + at [ZSTD]. + +3.1. Frames + + Zstandard compressed data is made up of one or more frames. Each + frame is independent and can be decompressed independently of other + frames. The decompressed content of multiple concatenated frames is + the concatenation of each frame's decompressed content. + + There are two frame formats defined for Zstandard: Zstandard frames + and skippable frames. Zstandard frames contain compressed data, + while skippable frames contain custom user metadata. + +3.1.1. Zstandard Frames + + The structure of a single Zstandard frame is as follows: + + +--------------------+------------+ + | Magic_Number | 4 bytes | + +--------------------+------------+ + | Frame_Header | 2-14 bytes | + +--------------------+------------+ + | Data_Block | n bytes | + +--------------------+------------+ + | [More Data_Blocks] | | + +--------------------+------------+ + | [Content_Checksum] | 0-4 bytes | + +--------------------+------------+ + + Magic_Number: 4 bytes, little-endian format. Value: 0xFD2FB528. + + Frame_Header: 2 to 14 bytes, detailed in Section 3.1.1.1. + + Data_Block: Detailed in Section 3.1.1.2. This is where data + appears. + + + + + +Collet & Kucherawy Informational [Page 6] + +RFC 8478 application/zstd October 2018 + + + Content_Checksum: An optional 32-bit checksum, only present if + Content_Checksum_Flag is set. The content checksum is the result + of the XXH64() hash function [XXHASH] digesting the original + (decoded) data as input, and a seed of zero. The low 4 bytes of + the checksum are stored in little-endian format. + + The magic number was selected to be less probable to find at the + beginning of an arbitrary file. It avoids trivial patterns (0x00, + 0xFF, repeated bytes, increasing bytes, etc.), contains byte values + outside of ASCII range, and doesn't map into UTF-8 space, all of + which reduce the likelihood of its appearance at the top of a text + file. + +3.1.1.1. Frame Header + + The frame header has a variable size, with a minimum of 2 bytes and + up to 14 bytes depending on optional parameters. The structure of + Frame_Header is as follows: + + +-------------------------+-----------+ + | Frame_Header_Descriptor | 1 byte | + +-------------------------+-----------+ + | [Window_Descriptor] | 0-1 byte | + +-------------------------+-----------+ + | [Dictionary_ID] | 0-4 bytes | + +-------------------------+-----------+ + | [Frame_Content_Size] | 0-8 bytes | + +-------------------------+-----------+ + + + + + + + + + + + + + + + + + + + + + + + +Collet & Kucherawy Informational [Page 7] + +RFC 8478 application/zstd October 2018 + + +3.1.1.1.1. Frame_Header_Descriptor + + The first header's byte is called the Frame_Header_Descriptor. It + describes which other fields are present. Decoding this byte is + enough to tell the size of Frame_Header. + + +------------+-------------------------+ + | Bit Number | Field Name | + +------------+-------------------------+ + | 7-6 | Frame_Content_Size_Flag | + +------------+-------------------------+ + | 5 | Single_Segment_Flag | + +------------+-------------------------+ + | 4 | (unused) | + +------------+-------------------------+ + | 3 | (reserved) | + +------------+-------------------------+ + | 2 | Content_Checksum_Flag | + +------------+-------------------------+ + | 1-0 | Dictionary_ID_Flag | + +------------+-------------------------+ + + In this table, bit 7 is the highest bit, while bit 0 is the lowest + one. + +3.1.1.1.1.1. Frame_Content_Size_Flag + + This is a 2-bit flag (equivalent to Frame_Header_Descriptor right- + shifted 6 bits) specifying whether Frame_Content_Size (the + decompressed data size) is provided within the header. Flag_Value + provides FCS_Field_Size, which is the number of bytes used by + Frame_Content_Size according to the following table: + + +----------------+--------+---+---+---+ + | Flag_Value | 0 | 1 | 2 | 3 | + +----------------+--------+---+---+---+ + | FCS_Field_Size | 0 or 1 | 2 | 4 | 8 | + +----------------+--------+---+---+---+ + + When Flag_Value is 0, FCS_Field_Size depends on Single_Segment_Flag: + If Single_Segment_Flag is set, FCS_Field_Size is 1. Otherwise, + FCS_Field_Size is 0; Frame_Content_Size is not provided. + + + + + + + + + +Collet & Kucherawy Informational [Page 8] + +RFC 8478 application/zstd October 2018 + + +3.1.1.1.1.2. Single_Segment_Flag + + If this flag is set, data must be regenerated within a single + continuous memory segment. + + In this case, Window_Descriptor byte is skipped, but + Frame_Content_Size is necessarily present. As a consequence, the + decoder must allocate a memory segment of size equal or larger than + Frame_Content_Size. + + In order to protect the decoder from unreasonable memory + requirements, a decoder is allowed to reject a compressed frame that + requests a memory size beyond the decoder's authorized range. + + For broader compatibility, decoders are recommended to support memory + sizes of at least 8 MB. This is only a recommendation; each decoder + is free to support higher or lower limits, depending on local + limitations. + +3.1.1.1.1.3. Unused Bit + + A decoder compliant with this specification version shall not + interpret this bit. It might be used in a future version, to signal + a property that is not mandatory to properly decode the frame. An + encoder compliant with this specification must set this bit to zero. + +3.1.1.1.1.4. Reserved Bit + + This bit is reserved for some future feature. Its value must be + zero. A decoder compliant with this specification version must + ensure it is not set. This bit may be used in a future revision, to + signal a feature that must be interpreted to decode the frame + correctly. + +3.1.1.1.1.5. Content_Checksum_Flag + + If this flag is set, a 32-bit Content_Checksum will be present at the + frame's end. See the description of Content_Checksum above. + + + + + + + + + + + + + +Collet & Kucherawy Informational [Page 9] + +RFC 8478 application/zstd October 2018 + + +3.1.1.1.1.6. Dictionary_ID_Flag + + This is a 2-bit flag (= Frame_Header_Descriptor & 0x3) indicating + whether a dictionary ID is provided within the header. It also + specifies the size of this field as DID_Field_Size: + + +----------------+---+---+---+---+ + | Flag_Value | 0 | 1 | 2 | 3 | + +----------------+---+---+---+---+ + | DID_Field_Size | 0 | 1 | 2 | 4 | + +----------------+---+---+---+---+ + +3.1.1.1.2. Window Descriptor + + This provides guarantees about the minimum memory buffer required to + decompress a frame. This information is important for decoders to + allocate enough memory. + + The Window_Descriptor byte is optional. When Single_Segment_Flag is + set, Window_Descriptor is not present. In this case, Window_Size is + Frame_Content_Size, which can be any value from 0 to 2^64-1 bytes (16 + ExaBytes). + + +------------+----------+----------+ + | Bit Number | 7-3 | 2-0 | + +------------+----------+----------+ + | Field Name | Exponent | Mantissa | + +------------+----------+----------+ + + The minimum memory buffer size is called Window_Size. It is + described by the following formulae: + + windowLog = 10 + Exponent; + windowBase = 1 << windowLog; + windowAdd = (windowBase / 8) * Mantissa; + Window_Size = windowBase + windowAdd; + + The minimum Window_Size is 1 KB. The maximum Window_Size is (1<<41) + + 7*(1<<38) bytes, which is 3.75 TB. + + In general, larger Window_Size values tend to improve the compression + ratio, but at the cost of increased memory usage. + + To properly decode compressed data, a decoder will need to allocate a + buffer of at least Window_Size bytes. + + + + + + +Collet & Kucherawy Informational [Page 10] + +RFC 8478 application/zstd October 2018 + + + In order to protect decoders from unreasonable memory requirements, a + decoder is allowed to reject a compressed frame that requests a + memory size beyond decoder's authorized range. + + For improved interoperability, it's recommended for decoders to + support values of Window_Size up to 8 MB and for encoders not to + generate frames requiring a Window_Size larger than 8 MB. It's + merely a recommendation though, and decoders are free to support + larger or lower limits, depending on local limitations. + +3.1.1.1.3. Dictionary_ID + + This is a variable size field, which contains the ID of the + dictionary required to properly decode the frame. This field is + optional. When it's not present, it's up to the decoder to know + which dictionary to use. + + Dictionary_ID field size is provided by DID_Field_Size. + DID_Field_Size is directly derived from the value of + Dictionary_ID_Flag. One byte can represent an ID 0-255; 2 bytes can + represent an ID 0-65535; 4 bytes can represent an ID 0-4294967295. + Format is little-endian. + + It is permitted to represent a small ID (for example, 13) with a + large 4-byte dictionary ID, even if it is less efficient. + + Within private environments, any dictionary ID can be used. However, + for frames and dictionaries distributed in public space, + Dictionary_ID must be attributed carefully. The following ranges are + reserved for use only with dictionaries that have been registered + with IANA (see Section 6.3): + + low range: <= 32767 + high range: >= (1 << 31) + + Any other value for Dictionary_ID can be used by private arrangement + between participants. + + Any payload presented for decompression that references an + unregistered reserved dictionary ID results in an error. + + + + + + + + + + + +Collet & Kucherawy Informational [Page 11] + +RFC 8478 application/zstd October 2018 + + +3.1.1.1.4. Frame Content Size + + This is the original (uncompressed) size. This information is + optional. Frame_Content_Size uses a variable number of bytes, + provided by FCS_Field_Size. FCS_Field_Size is provided by the value + of Frame_Content_Size_Flag. FCS_Field_Size can be equal to 0 (not + present), 1, 2, 4, or 8 bytes. + + +----------------+--------------+ + | FCS Field Size | Range | + +----------------+--------------+ + | 0 | unknown | + +----------------+--------------+ + | 1 | 0 - 255 | + +----------------+--------------+ + | 2 | 256 - 65791 | + +----------------+--------------+ + | 4 | 0 - 2^32 - 1 | + +----------------+--------------+ + | 8 | 0 - 2^64 - 1 | + +----------------+--------------+ + + Frame_Content_Size format is little-endian. When FCS_Field_Size is + 1, 4, or 8 bytes, the value is read directly. When FCS_Field_Size is + 2, the offset of 256 is added. It's allowed to represent a small + size (for example 18) using any compatible variant. + +3.1.1.2. Blocks + + After Magic_Number and Frame_Header, there are some number of blocks. + Each frame must have at least 1 block, but there is no upper limit on + the number of blocks per frame. + + The structure of a block is as follows: + + +--------------+---------------+ + | Block_Header | Block_Content | + +--------------+---------------+ + | 3 bytes | n bytes | + +--------------+---------------+ + + + + + + + + + + + +Collet & Kucherawy Informational [Page 12] + +RFC 8478 application/zstd October 2018 + + + Block_Header uses 3 bytes, written using little-endian convention. + It contains three fields: + + +------------+------------+------------+ + | Last_Block | Block_Type | Block_Size | + +------------+------------+------------+ + | bit 0 | bits 1-2 | bits 3-23 | + +------------+------------+------------+ + +3.1.1.2.1. Last_Block + + The lowest bit (Last_Block) signals whether this block is the last + one. The frame will end after this last block. It may be followed + by an optional Content_Checksum (see Section 3.1.1). + +3.1.1.2.2. Block_Type + + The next 2 bits represent the Block_Type. There are four block + types: + + +-----------+------------------+ + | Value | Block_Type | + +-----------+------------------+ + | 0 | Raw_Block | + +-----------+------------------+ + | 1 | RLE_Block | + +-----------+------------------+ + | 2 | Compressed_Block | + +-----------+------------------+ + | 3 | Reserved | + +-----------+------------------+ + + Raw_Block: This is an uncompressed block. Block_Content contains + Block_Size bytes. + + RLE_Block: This is a single byte, repeated Block_Size times. + Block_Content consists of a single byte. On the decompression + side, this byte must be repeated Block_Size times. + + Compressed_Block: This is a compressed block as described in + Section 3.1.1.3. Block_Size is the length of Block_Content, + namely the compressed data. The decompressed size is not known, + but its maximum possible value is guaranteed (see below). + + Reserved: This is not a block. This value cannot be used with the + current specification. If such a value is present, it is + considered to be corrupt data. + + + + +Collet & Kucherawy Informational [Page 13] + +RFC 8478 application/zstd October 2018 + + +3.1.1.2.3. Block_Size + + The upper 21 bits of Block_Header represent the Block_Size. + Block_Size is the size of the block excluding the header. A block + can contain any number of bytes (even zero), up to + Block_Maximum_Decompressed_Size, which is the smallest of: + + o Window_Size + + o 128 KB + + A Compressed_Block has the extra restriction that Block_Size is + always strictly less than the decompressed size. If this condition + cannot be respected, the block must be sent uncompressed instead + (i.e., treated as a Raw_Block). + +3.1.1.3. Compressed Blocks + + To decompress a compressed block, the compressed size must be + provided from the Block_Size field within Block_Header. + + A compressed block consists of two sections: a Literals + Section (Section 3.1.1.3.1) and a + Sequences_Section (Section 3.1.1.3.2). The results of the two + sections are then combined to produce the decompressed data in + Sequence Execution (Section 3.1.1.4). + + To decode a compressed block, the following elements are necessary: + + o Previous decoded data, up to a distance of Window_Size, or the + beginning of the Frame, whichever is smaller. Single_Segment_Flag + will be set in the latter case. + + o List of "recent offsets" from the previous Compressed_Block. + + o The previous Huffman tree, required by Treeless_Literals_Block + type. + + o Previous Finite State Entropy (FSE) decoding tables, required by + Repeat_Mode, for each symbol type (literals lengths, match + lengths, offsets). + + Note that decoding tables are not always from the previous + Compressed_Block: + + o Every decoding table can come from a dictionary. + + + + + +Collet & Kucherawy Informational [Page 14] + +RFC 8478 application/zstd October 2018 + + + o The Huffman tree comes from the previous + Compressed_Literals_Block. + +3.1.1.3.1. Literals_Section_Header + + All literals are regrouped in the first part of the block. They can + be decoded first and then copied during Sequence Execution (see + Section 3.1.1.4), or they can be decoded on the flow during Sequence + Execution. + + Literals can be stored uncompressed or compressed using Huffman + prefix codes. When compressed, an optional tree description can be + present, followed by 1 or 4 streams. + + +----------------------------+ + | Literals_Section_Header | + +----------------------------+ + | [Huffman_Tree_Description] | + +----------------------------+ + | [Jump_Table] | + +----------------------------+ + | Stream_1 | + +----------------------------+ + | [Stream_2] | + +----------------------------+ + | [Stream_3] | + +----------------------------+ + | [Stream_4] | + +----------------------------+ + +3.1.1.3.1.1. Literals_Section_Header + + This field describes how literals are packed. It's a byte-aligned + variable-size bit field, ranging from 1 to 5 bytes, using little- + endian convention. + + +---------------------+-----------+ + | Literals_Block_Type | 2 bits | + +---------------------+-----------+ + | Size_Format | 1-2 bits | + +---------------------+-----------+ + | Regenerated_Size | 5-20 bits | + +---------------------+-----------+ + | [Compressed_Size] | 0-18 bits | + +---------------------+-----------+ + + In this representation, bits at the top are the lowest bits. + + + + +Collet & Kucherawy Informational [Page 15] + +RFC 8478 application/zstd October 2018 + + + The Literals_Block_Type field uses the two lowest bits of the first + byte, describing four different block types: + + +---------------------------+-------+ + | Literals_Block_Type | Value | + +---------------------------+-------+ + | Raw_Literals_Block | 0 | + +---------------------------+-------+ + | RLE_Literals_Block | 1 | + +---------------------------+-------+ + | Compressed_Literals_Block | 2 | + +---------------------------+-------+ + | Treeless_Literals_Block | 3 | + +---------------------------+-------+ + + Raw_Literals_Block: Literals are stored uncompressed. + Literals_Section_Content is Regenerated_Size. + + RLE_Literals_Block: Literals consist of a single-byte value repeated + Regenerated_Size times. Literals_Section_Content is 1. + + Compressed_Literals_Block: This is a standard Huffman-compressed + block, starting with a Huffman tree description. See details + below. Literals_Section_Content is Compressed_Size. + + Treeless_Literals_Block: This is a Huffman-compressed block, using + the Huffman tree from the previous Compressed_Literals_Block, or a + dictionary if there is no previous Huffman-compressed literals + block. Huffman_Tree_Description will be skipped. Note that if + this mode is triggered without any previous Huffman-table in the + frame (or dictionary, per Section 5), it should be treated as data + corruption. Literals_Section_Content is Compressed_Size. + + The Size_Format is divided into two families: + + o For Raw_Literals_Block and RLE_Literals_Block, it's only necessary + to decode Regenerated_Size. There is no Compressed_Size field. + + o For Compressed_Block and Treeless_Literals_Block, it's required to + decode both Compressed_Size and Regenerated_Size (the decompressed + size). It's also necessary to decode the number of streams (1 or + 4). + + For values spanning several bytes, the convention is little endian. + + Size_Format for Raw_Literals_Block and RLE_Literals_Block uses 1 or 2 + bits. Its value is (Literals_Section_Header[0]>>2) & 0x3. + + + + +Collet & Kucherawy Informational [Page 16] + +RFC 8478 application/zstd October 2018 + + + Size_Format == 00 or 10: Size_Format uses 1 bit. Regenerated_Size + uses 5 bits (value 0-31). Literals_Section_Header uses 1 byte. + Regenerated_Size = Literal_Section_Header[0]>>3. + + Size_Format == 01: Size_Format uses 2 bits. Regenerated_Size uses + 12 bits (values 0-4095). Literals_Section_Header uses 2 bytes. + Regenerated_Size = (Literals_Section_Header[0]>>4) + + (Literals_Section_Header[1]<<4). + + Size_Format == 11: Size_Format uses 2 bits. Regenerated_Size uses + 20 bits (values 0-1048575). Literals_Section_Header uses 3 bytes. + Regenerated_Size = (Literals_Section_Header[0]>>4) + + (Literals_Section_Header[1]<<4) + (Literals_Section_Header[2]<<12) + + Only Stream_1 is present for these cases. Note that it is permitted + to represent a short value (for example, 13) using a long format, + even if it's less efficient. + + Size_Format for Compressed_Literals_Block and Treeless_Literals_Block + always uses 2 bits. + + Size_Format == 00: A single stream. Both Regenerated_Size and + Compressed_Size use 10 bits (values 0-1023). + Literals_Section_Header uses 3 bytes. + + Size_Format == 01: 4 streams. Both Regenerated_Size and + Compressed_Size use 10 bits (values 0-1023). + Literals_Section_Header uses 3 bytes. + + Size_Format == 10: 4 streams. Both Regenerated_Size and + Compressed_Size use 14 bits (values 0-16383). + Literals_Section_Header uses 4 bytes. + + Size_Format == 11: 4 streams. Both Regenerated_Size and + Compressed_Size use 18 bits (values 0-262143). + Literals_Section_Header uses 5 bytes. + + Both the Compressed_Size and Regenerated_Size fields follow little- + endian convention. Note that Compressed_Size includes the size of + the Huffman_Tree_Description when it is present. + +3.1.1.3.1.2. Raw_Literals_Block + + The data in Stream_1 is Regenerated_Size bytes long. It contains the + raw literals data to be used during Sequence Execution + (Section 3.1.1.3.2). + + + + + +Collet & Kucherawy Informational [Page 17] + +RFC 8478 application/zstd October 2018 + + +3.1.1.3.1.3. RLE_Literals_Block + + Stream_1 consists of a single byte that should be repeated + Regenerated_Size times to generate the decoded literals. + +3.1.1.3.1.4. Compressed_Literals_Block and Treeless_Literals_Block + + Both of these modes contain Huffman-encoded data. For + Treeless_Literals_Block, the Huffman table comes from the previously + compressed literals block, or from a dictionary; see Section 5. + +3.1.1.3.1.5. Huffman_Tree_Description + + This section is only present when the Literals_Block_Type type is + Compressed_Literals_Block (2). The format of + Huffman_Tree_Description can be found in Section 4.2.1. The size of + Huffman_Tree_Description is determined during the decoding process. + It must be used to determine where streams begin. + + Total_Streams_Size = Compressed_Size + - Huffman_Tree_Description_Size + +3.1.1.3.1.6. Jump_Table + + The Jump_Table is only present when there are 4 Huffman-coded + streams. + + (Reminder: Huffman-compressed data consists of either 1 or 4 Huffman- + coded streams.) + + If only 1 stream is present, it is a single bitstream occupying the + entire remaining portion of the literals block, encoded as described + within Section 4.2.2. + + If there are 4 streams, Literals_Section_Header only provides enough + information to know the decompressed and compressed sizes of all 4 + streams combined. The decompressed size of each stream is equal to + (Regenerated_Size+3)/4, except for the last stream, which may be up + to 3 bytes smaller, to reach a total decompressed size as specified + in Regenerated_Size. + + The compressed size of each stream is provided explicitly in the + Jump_Table. The Jump_Table is 6 bytes long and consists of three + 2-byte little-endian fields, describing the compressed sizes of the + first 3 streams. Stream4_Size is computed from Total_Streams_Size + minus sizes of other streams. + + + + + +Collet & Kucherawy Informational [Page 18] + +RFC 8478 application/zstd October 2018 + + + Stream4_Size = Total_Streams_Size - 6 + - Stream1_Size - Stream2_Size + - Stream3_Size + + Note that if Stream1_Size + Stream2_Size + Stream3_Size exceeds + Total_Streams_Size, the data are considered corrupted. + + Each of these 4 bitstreams is then decoded independently as a + Huffman-Coded stream, as described in Section 4.2.2. + +3.1.1.3.2. Sequences_Section + + A compressed block is a succession of sequences. A sequence is a + literal copy command, followed by a match copy command. A literal + copy command specifies a length. It is the number of bytes to be + copied (or extracted) from the Literals Section. A match copy + command specifies an offset and a length. + + When all sequences are decoded, if there are literals left in the + literals section, these bytes are added at the end of the block. + + This is described in more detail in Section 3.1.1.4. + + The Sequences_Section regroups all symbols required to decode + commands. There are three symbol types: literals lengths, offsets, + and match lengths. They are encoded together, interleaved, in a + single "bitstream". + + The Sequences_Section starts by a header, followed by optional + probability tables for each symbol type, followed by the bitstream. + + Sequences_Section_Header + [Literals_Length_Table] + [Offset_Table] + [Match_Length_Table] + bitStream + + To decode the Sequences_Section, it's necessary to know its size. + This size is deduced from the size of the Literals_Section: + Sequences_Section_Size = Block_Size - Literals_Section_Header - + Literals_Section_Content + + + + + + + + + + +Collet & Kucherawy Informational [Page 19] + +RFC 8478 application/zstd October 2018 + + +3.1.1.3.2.1. Sequences_Section_Header + + This header consists of two items: + + o Number_of_Sequences + + o Symbol_Compression_Modes + + Number_of_Sequences is a variable size field using between 1 and 3 + bytes. If the first byte is "byte0": + + o if (byte0 == 0): there are no sequences. The sequence section + stops here. Decompressed content is defined entirely as Literals + Section content. The FSE tables used in Repeat_Mode are not + updated. + + o if (byte0 < 128): Number_of_Sequences = byte0. Uses 1 byte. + + o if (byte0 < 255): Number_of_Sequences = ((byte0 - 128) << 8) + + byte1. Uses 2 bytes. + + o if (byte0 == 255): Number_of_Sequences = byte1 + (byte2 << 8) + + 0x7F00. Uses 3 bytes. + + Symbol_Compression_Modes is a single byte, defining the compression + mode of each symbol type. + + +-------------+----------------------+ + | Bit Number | Field Name | + +-------------+----------------------+ + | 7-6 | Literal_Lengths_Mode | + +-------------+----------------------+ + | 5-4 | Offsets_Mode | + +-------------+----------------------+ + | 3-2 | Match_Lengths_Mode | + +-------------+----------------------+ + | 1-0 | Reserved | + +-------------+----------------------+ + + The last field, Reserved, must be all zeroes. + + + + + + + + + + + +Collet & Kucherawy Informational [Page 20] + +RFC 8478 application/zstd October 2018 + + + Literals_Lengths_Mode, Offsets_Mode, and Match_Lengths_Mode define + the Compression_Mode of literals lengths, offsets, and match lengths + symbols, respectively. They follow the same enumeration: + + +-------+---------------------+ + | Value | Compression_Mode | + +-------+---------------------+ + | 0 | Predefined_Mode | + +-------+---------------------+ + | 1 | RLE_Mode | + +-------+---------------------+ + | 2 | FSE_Compressed_Mode | + +-------+---------------------+ + | 3 | Repeat_Mode | + +-------+---------------------+ + + Predefined_Mode: A predefined FSE (see Section 4.1) distribution + table is used, as defined in Section 3.1.1.3.2.2. No distribution + table will be present. + + RLE_Mode: The table description consists of a single byte, which + contains the symbol's value. This symbol will be used for all + sequences. + + FSE_Compressed_Mode: Standard FSE compression. A distribution table + will be present. The format of this distribution table is + described in Section 4.1.1. Note that the maximum allowed + accuracy log for literals length and match length tables is 9, and + the maximum accuracy log for the offsets table is 8. This mode + must not be used when only one symbol is present; RLE_Mode should + be used instead (although any other mode will work). + + Repeat_Mode: The table used in the previous Compressed_Block with + Number_Of_Sequences > 0 will be used again, or if this is the + first block, the table in the dictionary will be used. Note that + this includes RLE_Mode, so if Repeat_Mode follows RLE_Mode, the + same symbol will be repeated. It also includes Predefined_Mode, + in which case Repeat_Mode will have the same outcome as + Predefined_Mode. No distribution table will be present. If this + mode is used without any previous sequence table in the frame (or + dictionary; see Section 5) to repeat, this should be treated as + corruption. + + + + + + + + + +Collet & Kucherawy Informational [Page 21] + +RFC 8478 application/zstd October 2018 + + +3.1.1.3.2.1.1. Sequence Codes for Lengths and Offsets + + Each symbol is a code in its own context, which specifies Baseline + and Number_of_Bits to add. Codes are FSE compressed and interleaved + with raw additional bits in the same bitstream. + + Literals length codes are values ranging from 0 to 35 inclusive. + They define lengths from 0 to 131071 bytes. The literals length is + equal to the decoded Baseline plus the result of reading + Number_of_Bits bits from the bitstream, as a little-endian value. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Collet & Kucherawy Informational [Page 22] + +RFC 8478 application/zstd October 2018 + + + +----------------------+----------+----------------+ + | Literals_Length_Code | Baseline | Number_of_Bits | + +----------------------+----------+----------------+ + | 0-15 | length | 0 | + +----------------------+----------+----------------+ + | 16 | 16 | 1 | + +----------------------+----------+----------------+ + | 17 | 18 | 1 | + +----------------------+----------+----------------+ + | 18 | 20 | 1 | + +----------------------+----------+----------------+ + | 19 | 22 | 1 | + +----------------------+----------+----------------+ + | 20 | 24 | 2 | + +----------------------+----------+----------------+ + | 21 | 28 | 2 | + +----------------------+----------+----------------+ + | 22 | 32 | 3 | + +----------------------+----------+----------------+ + | 23 | 40 | 3 | + +----------------------+----------+----------------+ + | 24 | 48 | 4 | + +----------------------+----------+----------------+ + | 25 | 64 | 6 | + +----------------------+----------+----------------+ + | 26 | 128 | 7 | + +----------------------+----------+----------------+ + | 27 | 256 | 8 | + +----------------------+----------+----------------+ + | 28 | 512 | 9 | + +----------------------+----------+----------------+ + | 29 | 1024 | 10 | + +----------------------+----------+----------------+ + | 30 | 2048 | 11 | + +----------------------+----------+----------------+ + | 31 | 4096 | 12 | + +----------------------+----------+----------------+ + | 32 | 8192 | 13 | + +----------------------+----------+----------------+ + | 33 | 16384 | 14 | + +----------------------+----------+----------------+ + | 34 | 32768 | 15 | + +----------------------+----------+----------------+ + | 35 | 65536 | 16 | + +----------------------+----------+----------------+ + + + + + + +Collet & Kucherawy Informational [Page 23] + +RFC 8478 application/zstd October 2018 + + + Match length codes are values ranging from 0 to 52 inclusive. They + define lengths from 3 to 131074 bytes. The match length is equal to + the decoded Baseline plus the result of reading Number_of_Bits bits + from the bitstream, as a little-endian value. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Collet & Kucherawy Informational [Page 24] + +RFC 8478 application/zstd October 2018 + + + +-------------------+-----------------------+----------------+ + | Match_Length_Code | Baseline | Number_of_Bits | + +-------------------+-----------------------+----------------+ + | 0-31 | Match_Length_Code + 3 | 0 | + +-------------------+-----------------------+----------------+ + | 32 | 35 | 1 | + +-------------------+-----------------------+----------------+ + | 33 | 37 | 1 | + +-------------------+-----------------------+----------------+ + | 34 | 39 | 1 | + +-------------------+-----------------------+----------------+ + | 35 | 41 | 1 | + +-------------------+-----------------------+----------------+ + | 36 | 43 | 2 | + +-------------------+-----------------------+----------------+ + | 37 | 47 | 2 | + +-------------------+-----------------------+----------------+ + | 38 | 51 | 3 | + +-------------------+-----------------------+----------------+ + | 39 | 59 | 3 | + +-------------------+-----------------------+----------------+ + | 40 | 67 | 4 | + +-------------------+-----------------------+----------------+ + | 41 | 83 | 4 | + +-------------------+-----------------------+----------------+ + | 42 | 99 | 5 | + +-------------------+-----------------------+----------------+ + | 43 | 131 | 7 | + +-------------------+-----------------------+----------------+ + | 44 | 259 | 8 | + +-------------------+-----------------------+----------------+ + | 45 | 515 | 9 | + +-------------------+-----------------------+----------------+ + | 46 | 1027 | 10 | + +-------------------+-----------------------+----------------+ + | 47 | 2051 | 11 | + +-------------------+-----------------------+----------------+ + | 48 | 4099 | 12 | + +-------------------+-----------------------+----------------+ + | 49 | 8195 | 13 | + +-------------------+-----------------------+----------------+ + | 50 | 16387 | 14 | + +-------------------+-----------------------+----------------+ + | 51 | 32771 | 15 | + +-------------------+-----------------------+----------------+ + | 52 | 65539 | 16 | + +-------------------+-----------------------+----------------+ + + + + +Collet & Kucherawy Informational [Page 25] + +RFC 8478 application/zstd October 2018 + + + Offset codes are values ranging from 0 to N. + + A decoder is free to limit its maximum supported value for N. + Support for values of at least 22 is recommended. At the time of + this writing, the reference decoder supports a maximum N value of 31. + + An offset code is also the number of additional bits to read in + little-endian fashion and can be translated into an Offset_Value + using the following formulas: + + Offset_Value = (1 << offsetCode) + readNBits(offsetCode); + if (Offset_Value > 3) Offset = Offset_Value - 3; + + This means that maximum Offset_Value is (2^(N+1))-1, supporting back- + reference distance up to (2^(N+1))-4, but it is limited by the + maximum back-reference distance (see Section 3.1.1.1.2). + + Offset_Value from 1 to 3 are special: they define "repeat codes". + This is described in more detail in Section 3.1.1.5. + +3.1.1.3.2.1.2. Decoding Sequences + + FSE bitstreams are read in reverse of the direction they are written. + In zstd, the compressor writes bits forward into a block, and the + decompressor must read the bitstream backwards. + + To find the start of the bitstream, it is therefore necessary to know + the offset of the last byte of the block, which can be found by + counting Block_Size bytes after the block header. + + After writing the last bit containing information, the compressor + writes a single 1 bit and then fills the byte with 0-7 zero bits of + padding. The last byte of the compressed bitstream cannot be zero + for that reason. + + When decompressing, the last byte containing the padding is the first + byte to read. The decompressor needs to skip 0-7 initial zero bits + until the first 1 bit occurs. Afterwards, the useful part of the + bitstream begins. + + FSE decoding requires a 'state' to be carried from symbol to symbol. + For more explanation on FSE decoding, see Section 4.1. + + For sequence decoding, a separate state keeps track of each literal + lengths, offsets, and match lengths symbols. Some FSE primitives are + also used. For more details on the operation of these primitives, + see Section 4.1. + + + + +Collet & Kucherawy Informational [Page 26] + +RFC 8478 application/zstd October 2018 + + + The bitstream starts with initial FSE state values, each using the + required number of bits in their respective accuracy, decoded + previously from their normalized distribution. It starts with + Literals_Length_State, followed by Offset_State, and finally + Match_Length_State. + + Note that all values are read backward, so the 'start' of the + bitstream is at the highest position in memory, immediately before + the last 1 bit for padding. + + After decoding the starting states, a single sequence is decoded + Number_Of_Sequences times. These sequences are decoded in order from + first to last. Since the compressor writes the bitstream in the + forward direction, this means the compressor must encode the + sequences starting with the last one and ending with the first. + + For each of the symbol types, the FSE state can be used to determine + the appropriate code. The code then defines the Baseline and + Number_of_Bits to read for each type. The description of the codes + for how to determine these values can be found in + Section 3.1.1.3.2.1. + + Decoding starts by reading the Number_of_Bits required to decode + offset. It does the same for Match_Length and then for + Literals_Length. This sequence is then used for Sequence Execution + (see Section 3.1.1.4). + + If it is not the last sequence in the block, the next operation is to + update states. Using the rules pre-calculated in the decoding + tables, Literals_Length_State is updated, followed by + Match_Length_State, and then Offset_State. See Section 4.1 for + details on how to update states from the bitstream. + + This operation will be repeated Number_of_Sequences times. At the + end, the bitstream shall be entirely consumed; otherwise, the + bitstream is considered corrupted. + +3.1.1.3.2.2. Default Distributions + + If Predefined_Mode is selected for a symbol type, its FSE decoding + table is generated from a predefined distribution table defined here. + For details on how to convert this distribution into a decoding + table, see Section 4.1. + + + + + + + + +Collet & Kucherawy Informational [Page 27] + +RFC 8478 application/zstd October 2018 + + +3.1.1.3.2.2.1. Literals Length + + The decoding table uses an accuracy log of 6 bits (64 states). + + short literalsLength_defaultDistribution[36] = + { 4, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 1, 1, 1, 1, 1, + -1,-1,-1,-1 + }; + +3.1.1.3.2.2.2. Match Length + + The decoding table uses an accuracy log of 6 bits (64 states). + + short matchLengths_defaultDistribution[53] = + { 1, 4, 3, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1,-1, + -1,-1,-1,-1,-1 + }; + +3.1.1.3.2.2.3. Offset Codes + + The decoding table uses an accuracy log of 5 bits (32 states), and + supports a maximum N value of 28, allowing offset values up to + 536,870,908. + + If any sequence in the compressed block requires a larger offset than + this, it's not possible to use the default distribution to represent + it. + + short offsetCodes_defaultDistribution[29] = + { 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1,-1,-1,-1,-1,-1 + }; + +3.1.1.4. Sequence Execution + + Once literals and sequences have been decoded, they are combined to + produce the decoded content of a block. + + Each sequence consists of a tuple of (literals_length, offset_value, + match_length), decoded as described in the + Sequences_Section (Section 3.1.1.3.2). To execute a sequence, first + copy literals_length bytes from the decoded literals to the output. + + + + + + +Collet & Kucherawy Informational [Page 28] + +RFC 8478 application/zstd October 2018 + + + Then, match_length bytes are copied from previous decoded data. The + offset to copy from is determined by offset_value: + + o if Offset_Value > 3, then the offset is Offset_Value - 3; + + o if Offset_Value is from 1-3, the offset is a special repeat offset + value. See Section 3.1.1.5 for how the offset is determined in + this case. + + The offset is defined as from the current position (after copying the + literals), so an offset of 6 and a match length of 3 means that 3 + bytes should be copied from 6 bytes back. Note that all offsets + leading to previously decoded data must be smaller than Window_Size + defined in Frame_Header_Descriptor (Section 3.1.1.1.1). + +3.1.1.5. Repeat Offsets + + As seen above, the first three values define a repeated offset; we + will call them Repeated_Offset1, Repeated_Offset2, and + Repeated_Offset3. They are sorted in recency order, with + Repeated_Offset1 meaning "most recent one". + + If offset_value is 1, then the offset used is Repeated_Offset1, etc. + + There is one exception: When the current sequence's literals_length + is 0, repeated offsets are shifted by 1, so an offset_value of 1 + means Repeated_Offset2, an offset_value of 2 means Repeated_Offset3, + and an offset_value of 3 means Repeated_Offset1 - 1_byte. + + For the first block, the starting offset history is populated with + the following values: Repeated_Offset1 (1), Repeated_Offset2 (4), and + Repeated_Offset3 (8), unless a dictionary is used, in which case they + come from the dictionary. + + Then each block gets its starting offset history from the ending + values of the most recent Compressed_Block. Note that blocks that + are not Compressed_Block are skipped; they do not contribute to + offset history. + + The newest offset takes the lead in offset history, shifting others + back (up to its previous place if it was already present). This + means that when Repeated_Offset1 (most recent) is used, history is + unmodified. When Repeated_Offset2 is used, it is swapped with + Repeated_Offset1. If any other offset is used, it becomes + Repeated_Offset1, and the rest are shifted back by 1. + + + + + + +Collet & Kucherawy Informational [Page 29] + +RFC 8478 application/zstd October 2018 + + +3.1.2. Skippable Frames + + +--------------+------------+-----------+ + | Magic_Number | Frame_Size | User_Data | + +--------------+------------+-----------+ + | 4 bytes | 4 bytes | n bytes | + +--------------+------------+-----------+ + + Skippable frames allow the insertion of user-defined metadata into a + flow of concatenated frames. + + Skippable frames defined in this specification are compatible with + skippable frames in [LZ4]. + + From a compliant decoder perspective, skippable frames simply need to + be skipped, and their content ignored, resuming decoding after the + skippable frame. + + It should be noted that a skippable frame can be used to watermark a + stream of concatenated frames embedding any kind of tracking + information (even just a Universally Unique Identifier (UUID)). + Users wary of such possibility should scan the stream of concatenated + frames in an attempt to detect such frames for analysis or removal. + + The fields are: + + Magic_Number: 4 bytes, little-endian format. Value: 0x184D2A5?, + which means any value from 0x184D2A50 to 0x184D2A5F. All 16 + values are valid to identify a skippable frame. This + specification does not detail any specific tagging methods for + skippable frames. + + Frame_Size: This is the size, in bytes, of the following User_Data + (without including the magic number nor the size field itself). + This field is represented using 4 bytes, little-endian format, + unsigned 32 bits. This means User_Data can't be bigger than + (2^32-1) bytes. + + User_Data: This field can be anything. Data will just be skipped by + the decoder. + +4. Entropy Encoding + + Two types of entropy encoding are used by the Zstandard format: FSE + and Huffman coding. Huffman is used to compress literals, while FSE + is used for all other symbols (Literals_Length_Code, + Match_Length_Code, and offset codes) and to compress Huffman headers. + + + + +Collet & Kucherawy Informational [Page 30] + +RFC 8478 application/zstd October 2018 + + +4.1. FSE + + FSE, short for Finite State Entropy, is an entropy codec based on + [ANS]. FSE encoding/decoding involves a state that is carried over + between symbols, so decoding must be done in the opposite direction + as encoding. Therefore, all FSE bitstreams are read from end to + beginning. Note that the order of the bits in the stream is not + reversed; they are simply read in the reverse order from which they + were written. + + For additional details on FSE, see Finite State Entropy [FSE]. + + FSE decoding involves a decoding table that has a power of 2 size and + contains three elements: Symbol, Num_Bits, and Baseline. The base 2 + logarithm of the table size is its Accuracy_Log. An FSE state value + represents an index in this table. + + To obtain the initial state value, consume Accuracy_Log bits from the + stream as a little-endian value. The next symbol in the stream is + the Symbol indicated in the table for that state. To obtain the next + state value, the decoder should consume Num_Bits bits from the stream + as a little-endian value and add it to Baseline. + +4.1.1. FSE Table Description + + To decode FSE streams, it is necessary to construct the decoding + table. The Zstandard format encodes FSE table descriptions as + described here. + + An FSE distribution table describes the probabilities of all symbols + from 0 to the last present one (included) on a normalized scale of + (1 << Accuracy_Log). Note that there must be two or more symbols + with non-zero probability. + + A bitstream is read forward, in little-endian fashion. It is not + necessary to know its exact size, since the size will be discovered + and reported by the decoding process. The bitstream starts by + reporting on which scale it operates. If low4bits designates the + lowest 4 bits of the first byte, then Accuracy_Log = low4bits + 5. + + + + + + + + + + + + +Collet & Kucherawy Informational [Page 31] + +RFC 8478 application/zstd October 2018 + + + This is followed by each symbol value, from 0 to the last present + one. The number of bits used by each field is variable and depends + on: + + Remaining probabilities + 1: For example, presuming an Accuracy_Log + of 8, and presuming 100 probabilities points have already been + distributed, the decoder may read any value from 0 to + (256 - 100 + 1) == 157, inclusive. Therefore, it must read + log2sup(157) == 8 bits. + + Value decoded: Small values use 1 fewer bit. For example, presuming + values from 0 to 157 (inclusive) are possible, 255 - 157 = 98 + values are remaining in an 8-bit field. The first 98 values + (hence from 0 to 97) use only 7 bits, and values from 98 to 157 + use 8 bits. This is achieved through this scheme: + + +------------+---------------+-----------+ + | Value Read | Value Decoded | Bits Used | + +------------+---------------+-----------+ + | 0 - 97 | 0 - 97 | 7 | + +------------+---------------+-----------+ + | 98 - 127 | 98 - 127 | 8 | + +------------+---------------+-----------+ + | 128 - 225 | 0 - 97 | 7 | + +------------+---------------+-----------+ + | 226 - 255 | 128 - 157 | 8 | + +------------+---------------+-----------+ + + Symbol probabilities are read one by one, in order. The probability + is obtained from Value decoded using the formula P = Value - 1. This + means the value 0 becomes the negative probability -1. This is a + special probability that means "less than 1". Its effect on the + distribution table is described below. For the purpose of + calculating total allocated probability points, it counts as 1. + + When a symbol has a probability of zero, it is followed by a 2-bit + repeat flag. This repeat flag tells how many probabilities of zeroes + follow the current one. It provides a number ranging from 0 to 3. + If it is a 3, another 2-bit repeat flag follows, and so on. + + When the last symbol reaches a cumulated total of + (1 << Accuracy_Log), decoding is complete. If the last symbol makes + the cumulated total go above (1 << Accuracy_Log), distribution is + considered corrupted. + + + + + + + +Collet & Kucherawy Informational [Page 32] + +RFC 8478 application/zstd October 2018 + + + Finally, the decoder can tell how many bytes were used in this + process and how many symbols are present. The bitstream consumes a + round number of bytes. Any remaining bit within the last byte is + simply unused. + + The distribution of normalized probabilities is enough to create a + unique decoding table. The table has a size of (1 << Accuracy_Log). + Each cell describes the symbol decoded and instructions to get the + next state. + + Symbols are scanned in their natural order for "less than 1" + probabilities as described above. Symbols with this probability are + being attributed a single cell, starting from the end of the table + and retreating. These symbols define a full state reset, reading + Accuracy_Log bits. + + All remaining symbols are allocated in their natural order. Starting + from symbol 0 and table position 0, each symbol gets allocated as + many cells as its probability. Cell allocation is spread, not + linear; each successor position follows this rule: + + position += (tableSize >> 1) + (tableSize >> 3) + 3; + position &= tableSize - 1; + + A position is skipped if it is already occupied by a "less than 1" + probability symbol. Position does not reset between symbols; it + simply iterates through each position in the table, switching to the + next symbol when enough states have been allocated to the current + one. + + The result is a list of state values. Each state will decode the + current symbol. + + To get the Number_of_Bits and Baseline required for the next state, + it is first necessary to sort all states in their natural order. The + lower states will need 1 more bit than higher ones. The process is + repeated for each symbol. + + For example, presuming a symbol has a probability of 5, it receives + five state values. States are sorted in natural order. The next + power of 2 is 8. The space of probabilities is divided into 8 equal + parts. Presuming the Accuracy_Log is 7, this defines 128 states, and + each share (divided by 8) is 16 in size. In order to reach 8, 8 - 5 + = 3 lowest states will count "double", doubling the number of shares + (32 in width), requiring 1 more bit in the process. + + + + + + +Collet & Kucherawy Informational [Page 33] + +RFC 8478 application/zstd October 2018 + + + Baseline is assigned starting from the higher states using fewer + bits, and proceeding naturally, then resuming at the first state, + each taking its allocated width from Baseline. + + +----------------+-------+-------+--------+------+-------+ + | state order | 0 | 1 | 2 | 3 | 4 | + +----------------+-------+-------+--------+------+-------+ + | width | 32 | 32 | 32 | 16 | 16 | + +----------------+-------+-------+--------+------+-------+ + | Number_of_Bits | 5 | 5 | 5 | 4 | 4 | + +----------------+-------+-------+--------+------+-------+ + | range number | 2 | 4 | 6 | 0 | 1 | + +----------------+-------+-------+--------+------+-------+ + | Baseline | 32 | 64 | 96 | 0 | 16 | + +----------------+-------+-------+--------+------+-------+ + | range | 32-63 | 64-95 | 96-127 | 0-15 | 16-31 | + +----------------+-------+-------+--------+------+-------+ + + The next state is determined from the current state by reading the + required Number_of_Bits and adding the specified Baseline. + + See Appendix A for the results of this process that are applied to + the default distributions. + +4.2. Huffman Coding + + Zstandard Huffman-coded streams are read backwards, similar to the + FSE bitstreams. Therefore, to find the start of the bitstream, it is + necessary to know the offset of the last byte of the Huffman-coded + stream. + + After writing the last bit containing information, the compressor + writes a single 1 bit and then fills the byte with 0-7 0 bits of + padding. The last byte of the compressed bitstream cannot be 0 for + that reason. + + When decompressing, the last byte containing the padding is the first + byte to read. The decompressor needs to skip 0-7 initial 0 bits and + the first 1 bit that occurs. Afterwards, the useful part of the + bitstream begins. + + The bitstream contains Huffman-coded symbols in little-endian order, + with the codes defined by the method below. + + + + + + + + +Collet & Kucherawy Informational [Page 34] + +RFC 8478 application/zstd October 2018 + + +4.2.1. Huffman Tree Description + + Prefix coding represents symbols from an a priori known alphabet by + bit sequences (codewords), one codeword for each symbol, in a manner + such that different symbols may be represented by bit sequences of + different lengths, but a parser can always parse an encoded string + unambiguously symbol by symbol. + + Given an alphabet with known symbol frequencies, the Huffman + algorithm allows the construction of an optimal prefix code using the + fewest bits of any possible prefix codes for that alphabet. + + The prefix code must not exceed a maximum code length. More bits + improve accuracy but yield a larger header size and require more + memory or more complex decoding operations. This specification + limits the maximum code length to 11 bits. + + All literal values from zero (included) to the last present one + (excluded) are represented by Weight with values from 0 to + Max_Number_of_Bits. Transformation from Weight to Number_of_Bits + follows this pseudocode: + + if Weight == 0 + Number_of_Bits = 0 + else + Number_of_Bits = Max_Number_of_Bits + 1 - Weight + + The last symbol's Weight is deduced from previously decoded ones, by + completing to the nearest power of 2. This power of 2 gives + Max_Number_of_Bits the depth of the current tree. + + For example, presume the following Huffman tree must be described: + + +---------------+----------------+ + | Literal Value | Number_of_Bits | + +---------------+----------------+ + | 0 | 1 | + +---------------+----------------+ + | 1 | 2 | + +---------------+----------------+ + | 2 | 3 | + +---------------+----------------+ + | 3 | 0 | + +---------------+----------------+ + | 4 | 4 | + +---------------+----------------+ + | 5 | 4 | + +---------------+----------------+ + + + +Collet & Kucherawy Informational [Page 35] + +RFC 8478 application/zstd October 2018 + + + The tree depth is 4, since its longest element uses 4 bits. (The + longest elements are those with the smallest frequencies.) Value 5 + will not be listed as it can be determined from the values for 0-4, + nor will values above 5 as they are all 0. Values from 0 to 4 will + be listed using Weight instead of Number_of_Bits. The pseudocode to + determine Weight is: + + if Number_of_Bits == 0 + Weight = 0 + else + Weight = Max_Number_of_Bits + 1 - Number_of_Bits + + It gives the following series of weights: + + +---------------+--------+ + | Literal Value | Weight | + +---------------+--------+ + | 0 | 4 | + +---------------+--------+ + | 1 | 3 | + +---------------+--------+ + | 2 | 2 | + +---------------+--------+ + | 3 | 0 | + +---------------+--------+ + | 4 | 1 | + +---------------+--------+ + + The decoder will do the inverse operation: having collected weights + of literals from 0 to 4, it knows the last literal, 5, is present + with a non-zero Weight. The Weight of 5 can be determined by + advancing to the next power of 2. The sum of 2^(Weight-1) (excluding + 0's) is 15. The nearest power of 2 is 16. Therefore, + Max_Number_of_Bits = 4 and Weight[5] = 16 - 15 = 1. + +4.2.1.1. Huffman Tree Header + + This is a single byte value (0-255), which describes how the series + of weights is encoded. + + headerByte < 128: The series of weights is compressed using FSE (see + below). The length of the FSE-compressed series is equal to + headerByte (0-127). + + + + + + + + +Collet & Kucherawy Informational [Page 36] + +RFC 8478 application/zstd October 2018 + + + headerByte >= 128: This is a direct representation, where each + Weight is written directly as a 4-bit field (0-15). They are + encoded forward, 2 weights to a byte with the first weight taking + the top 4 bits and the second taking the bottom 4; for example, + the following operations could be used to read the weights: + + Weight[0] = (Byte[0] >> 4) + Weight[1] = (Byte[0] & 0xf), + etc. + + The full representation occupies ceiling(Number_of_Symbols/2) + bytes, meaning it uses only full bytes even if Number_of_Symbols + is odd. Number_of_Symbols = headerByte - 127. Note that maximum + Number_of_Symbols is 255 - 127 = 128. If any literal has a value + over 128, raw header mode is not possible, and it is necessary to + use FSE compression. + +4.2.1.2. FSE Compression of Huffman Weights + + In this case, the series of Huffman weights is compressed using FSE + compression. It is a single bitstream with two interleaved states, + sharing a single distribution table. + + To decode an FSE bitstream, it is necessary to know its compressed + size. Compressed size is provided by headerByte. It's also + necessary to know its maximum possible decompressed size, which is + 255, since literal values span from 0 to 255, and the last symbol's + Weight is not represented. + + An FSE bitstream starts by a header, describing probabilities + distribution. It will create a decoding table. For a list of + Huffman weights, the maximum accuracy log is 6 bits. For more + details, see Section 4.1.1. + + The Huffman header compression uses two states, which share the same + FSE distribution table. The first state (State1) encodes the even- + numbered index symbols, and the second (State2) encodes the odd- + numbered index symbols. State1 is initialized first, and then + State2, and they take turns decoding a single symbol and updating + their state. For more details on these FSE operations, see + Section 4.1. + + The number of symbols to be decoded is determined by tracking the + bitStream overflow condition: If updating state after decoding a + symbol would require more bits than remain in the stream, it is + assumed that extra bits are zero. Then, symbols for each of the + final states are decoded and the process is complete. + + + + +Collet & Kucherawy Informational [Page 37] + +RFC 8478 application/zstd October 2018 + + +4.2.1.3. Conversion from Weights to Huffman Prefix Codes + + All present symbols will now have a Weight value. It is possible to + transform weights into Number_of_Bits, using this formula: + + if Weight > 0 + Number_of_Bits = Max_Number_of_Bits + 1 - Weight + else + Number_of_Bits = 0 + + Symbols are sorted by Weight. Within the same Weight, symbols keep + natural sequential order. Symbols with a Weight of zero are removed. + Then, starting from the lowest Weight, prefix codes are distributed + in sequential order. + + For example, assume the following list of weights has been decoded: + + +---------+--------+ + | Literal | Weight | + +---------+--------+ + | 0 | 4 | + +---------+--------+ + | 1 | 3 | + +---------+--------+ + | 2 | 2 | + +---------+--------+ + | 3 | 0 | + +---------+--------+ + | 4 | 1 | + +---------+--------+ + | 5 | 1 | + +---------+--------+ + + + + + + + + + + + + + + + + + + + +Collet & Kucherawy Informational [Page 38] + +RFC 8478 application/zstd October 2018 + + + Sorting by weight and then the natural sequential order yields the + following distribution: + + +---------+--------+----------------+--------------+ + | Literal | Weight | Number_Of_Bits | Prefix Codes | + +---------+--------+----------------|--------------+ + | 3 | 0 | 0 | N/A | + +---------+--------+----------------|--------------+ + | 4 | 1 | 4 | 0000 | + +---------+--------+----------------|--------------+ + | 5 | 1 | 4 | 0001 | + +---------+--------+----------------|--------------+ + | 2 | 2 | 3 | 001 | + +---------+--------+----------------|--------------+ + | 1 | 3 | 2 | 01 | + +---------+--------+----------------|--------------+ + | 0 | 4 | 1 | 1 | + +---------+--------+----------------|--------------+ + +4.2.2. Huffman-Coded Streams + + Given a Huffman decoding table, it is possible to decode a Huffman- + coded stream. + + Each bitstream must be read backward, which starts from the end and + goes up to the beginning. Therefore, it is necessary to know the + size of each bitstream. + + It is also necessary to know exactly which bit is the last. This is + detected by a final bit flag: the highest bit of the last byte is a + final-bit-flag. Consequently, a last byte of 0 is not possible. And + the final-bit-flag itself is not part of the useful bitstream. + Hence, the last byte contains between 0 and 7 useful bits. + + Starting from the end, it is possible to read the bitstream in a + little-endian fashion, keeping track of already used bits. Since the + bitstream is encoded in reverse order, starting from the end, read + symbols in forward order. + + + + + + + + + + + + + +Collet & Kucherawy Informational [Page 39] + +RFC 8478 application/zstd October 2018 + + + For example, if the literal sequence "0145" was encoded using the + above prefix code, it would be encoded (in reverse order) as: + + +---------+----------+ + | Symbol | Encoding | + +---------+----------+ + | 5 | 0000 | + +---------+----------+ + | 4 | 0001 | + +---------+----------+ + | 1 | 01 | + +---------+----------+ + | 0 | 1 | + +---------+----------+ + | Padding | 00001 | + +---------+----------+ + + This results in the following 2-byte bitstream: + + 00010000 00001101 + + Here is an alternative representation with the symbol codes separated + by underscores: + + 0001_0000 00001_1_01 + + Reading the highest Max_Number_of_Bits bits, it's possible to compare + the extracted value to the decoding table, determining the symbol to + decode and number of bits to discard. + + The process continues reading up to the required number of symbols + per stream. If a bitstream is not entirely and exactly consumed, + hence reaching exactly its beginning position with all bits consumed, + the decoding process is considered faulty. + +5. Dictionary Format + + Zstandard is compatible with "raw content" dictionaries, free of any + format restriction, except that they must be at least 8 bytes. These + dictionaries function as if they were just the content part of a + formatted dictionary. + + However, dictionaries created by "zstd --train" in the reference + implementation follow a specific format, described here. + + Dictionaries are not included in the compressed content but rather + are provided out of band. That is, the Dictionary_ID identifies + which should be used, but this specification does not describe the + + + +Collet & Kucherawy Informational [Page 40] + +RFC 8478 application/zstd October 2018 + + + mechanism by which the dictionary is obtained prior to use during + compression or decompression. + + A dictionary has a size, defined either by a buffer limit or a file + size. The general format is: + + +--------------+---------------+----------------+---------+ + | Magic_Number | Dictionary_ID | Entropy_Tables | Content | + +--------------+---------------+----------------+---------+ + + Magic_Number: 4 bytes ID, value 0xEC30A437, little-endian format. + + Dictionary_ID: 4 bytes, stored in little-endian format. + Dictionary_ID can be any value, except 0 (which means no + Dictionary_ID). It is used by decoders to check if they use the + correct dictionary. If the frame is going to be distributed in a + private environment, any Dictionary_ID can be used. However, for + public distribution of compressed frames, the following ranges are + reserved and shall not be used: + + low range: <= 32767 + high range: >= (2^31) + + Entropy_Tables: Follow the same format as the tables in compressed + blocks. See the relevant FSE and Huffman sections for how to + decode these tables. They are stored in the following order: + Huffman table for literals, FSE table for offsets, FSE table for + match lengths, and FSE table for literals lengths. These tables + populate the Repeat Stats literals mode and Repeat distribution + mode for sequence decoding. It is finally followed by 3 offset + values, populating repeat offsets (instead of using {1,4,8}), + stored in order, 4-bytes little-endian each, for a total of 12 + bytes. Each repeat offset must have a value less than the + dictionary size. + + Content: The rest of the dictionary is its content. The content + acts as a "past" in front of data to be compressed or + decompressed, so it can be referenced in sequence commands. As + long as the amount of data decoded from this frame is less than or + equal to Window_Size, sequence commands may specify offsets longer + than the total length of decoded output so far to reference back + to the dictionary, even parts of the dictionary with offsets + larger than Window_Size. After the total output has surpassed + Window_Size, however, this is no longer allowed, and the + dictionary is no longer accessible. + + + + + + +Collet & Kucherawy Informational [Page 41] + +RFC 8478 application/zstd October 2018 + + +6. IANA Considerations + + IANA has made two registrations, as described below. + +6.1. The 'application/zstd' Media Type + + The 'application/zstd' media type identifies a block of data that is + compressed using zstd compression. The data is a stream of bytes as + described in this document. IANA has added the following to the + "Media Types" registry: + + Type name: application + + Subtype name: zstd + + Required parameters: N/A + + Optional parameters: N/A + + Encoding considerations: binary + + Security considerations: See Section 7 of RFC 8478 + + Interoperability considerations: N/A + + Published specification: RFC 8478 + + Applications that use this media type: anywhere data size is an + issue + + Additional information: + + Magic number(s): 4 bytes, little-endian format. + Value: 0xFD2FB528 + + File extension(s): zst + + Macintosh file type code(s): N/A + + For further information: See [ZSTD] + + Intended usage: common + + Restrictions on usage: N/A + + Author: Murray S. Kucherawy + + Change Controller: IETF + + + +Collet & Kucherawy Informational [Page 42] + +RFC 8478 application/zstd October 2018 + + + Provisional registration: no + +6.2. Content Encoding + + IANA has added the following entry to the "HTTP Content Coding + Registry" within the "Hypertext Transfer Protocol (HTTP) Parameters" + registry: + + Name: zstd + + Description: A stream of bytes compressed using the Zstandard + protocol + + Pointer to specification text: RFC 8478 + +6.3. Dictionaries + + Work in progress includes development of dictionaries that will + optimize compression and decompression of particular types of data. + Specification of such dictionaries for public use will necessitate + registration of a code point from the reserved range described in + Section 3.1.1.1.3 and its association with a specific dictionary. + + However, there are at present no such dictionaries published for + public use, so this document makes no immediate request of IANA to + create such a registry. + +7. Security Considerations + + Any data compression method involves the reduction of redundancy in + the data. Zstandard is no exception, and the usual precautions + apply. + + One should never compress a message whose content must remain secret + with a message generated by a third party. Such a compression can be + used to guess the content of the secret message through analysis of + entropy reduction. This was demonstrated in the Compression Ratio + Info-leak Made Easy (CRIME) attack [CRIME], for example. + + A decoder has to demonstrate capabilities to detect and prevent any + kind of data tampering in the compressed frame from triggering system + faults, such as reading or writing beyond allowed memory ranges. + This can be guaranteed by either the implementation language or + careful bound checkings. Of particular note is the encoding of + Number_of_Sequences values that cause the decoder to read into the + block header (and beyond), as well as the indication of a + Frame_Content_Size that is smaller than the actual decompressed data, + in an attempt to trigger a buffer overflow. It is highly recommended + + + +Collet & Kucherawy Informational [Page 43] + +RFC 8478 application/zstd October 2018 + + + to fuzz-test (i.e., provide invalid, unexpected, or random input and + verify safe operation of) decoder implementations to test and harden + their capability to detect bad frames and deal with them without any + adverse system side effect. + + An attacker may provide correctly formed compressed frames with + unreasonable memory requirements. A decoder must always control + memory requirements and enforce some (system-specific) limits in + order to protect memory usage from such scenarios. + + Compression can be optimized by training a dictionary on a variety of + related content payloads. This dictionary must then be available at + the decoder for decompression of the payload to be possible. While + this document does not specify how to acquire a dictionary for a + given compressed payload, it is worth noting that third-party + dictionaries may interact unexpectedly with a decoder, leading to + possible memory or other resource exhaustion attacks. We expect such + topics to be discussed in further detail in the Security + Considerations section of a forthcoming RFC for dictionary + acquisition and transmission, but highlight this issue now out of an + abundance of caution. + + As discussed in Section 3.1.2, it is possible to store arbitrary user + metadata in skippable frames. While such frames are ignored during + decompression of the data, they can be used as a watermark to track + the path of the compressed payload. + +8. Implementation Status + + Source code for a C language implementation of a Zstandard-compliant + library is available at [ZSTD-GITHUB]. This implementation is + considered to be the reference implementation and is production + ready; it implements the full range of the specification. It is + routinely tested against security hazards and widely deployed within + Facebook infrastructure. + + The reference version is optimized for speed and is highly portable. + It has been proven to run safely on multiple architectures (e.g., + x86, x64, ARM, MIPS, PowerPC, IA64) featuring 32- or 64-bit + addressing schemes, a little- or big-endian storage scheme, a number + of different operating systems (e.g., UNIX (including Linux, BSD, + OS-X, and Solaris) and Windows), and a number of compilers (e.g., + gcc, clang, visual, and icc). + + + + + + + + +Collet & Kucherawy Informational [Page 44] + +RFC 8478 application/zstd October 2018 + + +9. References + +9.1. Normative References + + [ZSTD] "Zstandard", . + +9.2. Informative References + + [ANS] Duda, J., "Asymmetric numeral systems: entropy coding + combining speed of Huffman coding with compression rate of + arithmetic coding", January 2014, + . + + [CRIME] "CRIME", June 2018, . + + [FSE] "FiniteStateEntropy", commit 6efa78a, June 2018, + . + + [LZ4] "LZ4 Frame Format Description", commit d03224b, January + 2018, . + + [RFC1952] Deutsch, P., "GZIP file format specification version 4.3", + RFC 1952, DOI 10.17487/RFC1952, May 1996, + . + + [XXHASH] "XXHASH Algorithm", . + + [ZSTD-GITHUB] + "zstd", commit 8514bd8, August 2018, + . + + + + + + + + + + + + + + + + + + + +Collet & Kucherawy Informational [Page 45] + +RFC 8478 application/zstd October 2018 + + +Appendix A. Decoding Tables for Predefined Codes + + This appendix contains FSE decoding tables for the predefined literal + length, match length, and offset codes. The tables have been + constructed using the algorithm as given above in Section 4.1.1. The + tables here can be used as examples to crosscheck that an + implementation has built its decoding tables correctly. + +A.1. Literal Length Code Table + + +-------+--------+----------------+------+ + | State | Symbol | Number_Of_Bits | Base | + +-------+--------+----------------+------+ + | 0 | 0 | 0 | 0 | + +-------+--------+----------------+------+ + | 0 | 0 | 4 | 0 | + +-------+--------+----------------+------+ + | 1 | 0 | 4 | 16 | + +-------+--------+----------------+------+ + | 2 | 1 | 5 | 32 | + +-------+--------+----------------+------+ + | 3 | 3 | 5 | 0 | + +-------+--------+----------------+------+ + | 4 | 4 | 5 | 0 | + +-------+--------+----------------+------+ + | 5 | 6 | 5 | 0 | + +-------+--------+----------------+------+ + | 6 | 7 | 5 | 0 | + +-------+--------+----------------+------+ + | 7 | 9 | 5 | 0 | + +-------+--------+----------------+------+ + | 8 | 10 | 5 | 0 | + +-------+--------+----------------+------+ + | 9 | 12 | 5 | 0 | + +-------+--------+----------------+------+ + | 10 | 14 | 6 | 0 | + +-------+--------+----------------+------+ + | 11 | 16 | 5 | 0 | + +-------+--------+----------------+------+ + | 12 | 18 | 5 | 0 | + +-------+--------+----------------+------+ + | 13 | 19 | 5 | 0 | + +-------+--------+----------------+------+ + | 14 | 21 | 5 | 0 | + +-------+--------+----------------+------+ + | 15 | 22 | 5 | 0 | + +-------+--------+----------------+------+ + | 16 | 24 | 5 | 0 | + + + +Collet & Kucherawy Informational [Page 46] + +RFC 8478 application/zstd October 2018 + + + +-------+--------+----------------+------+ + | 17 | 25 | 5 | 32 | + +-------+--------+----------------+------+ + | 18 | 26 | 5 | 0 | + +-------+--------+----------------+------+ + | 19 | 27 | 6 | 0 | + +-------+--------+----------------+------+ + | 20 | 29 | 6 | 0 | + +-------+--------+----------------+------+ + | 21 | 31 | 6 | 0 | + +-------+--------+----------------+------+ + | 22 | 0 | 4 | 32 | + +-------+--------+----------------+------+ + | 23 | 1 | 4 | 0 | + +-------+--------+----------------+------+ + | 24 | 2 | 5 | 0 | + +-------+--------+----------------+------+ + | 25 | 4 | 5 | 32 | + +-------+--------+----------------+------+ + | 26 | 5 | 5 | 0 | + +-------+--------+----------------+------+ + | 27 | 7 | 5 | 32 | + +-------+--------+----------------+------+ + | 28 | 8 | 5 | 0 | + +-------+--------+----------------+------+ + | 29 | 10 | 5 | 32 | + +-------+--------+----------------+------+ + | 30 | 11 | 5 | 0 | + +-------+--------+----------------+------+ + | 31 | 13 | 6 | 0 | + +-------+--------+----------------+------+ + | 32 | 16 | 5 | 32 | + +-------+--------+----------------+------+ + | 33 | 17 | 5 | 0 | + +-------+--------+----------------+------+ + | 34 | 19 | 5 | 32 | + +-------+--------+----------------+------+ + | 35 | 20 | 5 | 0 | + +-------+--------+----------------+------+ + | 36 | 22 | 5 | 32 | + +-------+--------+----------------+------+ + | 37 | 23 | 5 | 0 | + +-------+--------+----------------+------+ + | 38 | 25 | 4 | 0 | + +-------+--------+----------------+------+ + | 39 | 25 | 4 | 16 | + +-------+--------+----------------+------+ + | 40 | 26 | 5 | 32 | + + + +Collet & Kucherawy Informational [Page 47] + +RFC 8478 application/zstd October 2018 + + + +-------+--------+----------------+------+ + | 41 | 28 | 6 | 0 | + +-------+--------+----------------+------+ + | 42 | 30 | 6 | 0 | + +-------+--------+----------------+------+ + | 43 | 0 | 4 | 48 | + +-------+--------+----------------+------+ + | 44 | 1 | 4 | 16 | + +-------+--------+----------------+------+ + | 45 | 2 | 5 | 32 | + +-------+--------+----------------+------+ + | 46 | 3 | 5 | 32 | + +-------+--------+----------------+------+ + | 47 | 5 | 5 | 32 | + +-------+--------+----------------+------+ + | 48 | 6 | 5 | 32 | + +-------+--------+----------------+------+ + | 49 | 8 | 5 | 32 | + +-------+--------+----------------+------+ + | 50 | 9 | 5 | 32 | + +-------+--------+----------------+------+ + | 51 | 11 | 5 | 32 | + +-------+--------+----------------+------+ + | 52 | 12 | 5 | 32 | + +-------+--------+----------------+------+ + | 53 | 15 | 6 | 0 | + +-------+--------+----------------+------+ + | 54 | 17 | 5 | 32 | + +-------+--------+----------------+------+ + | 55 | 18 | 5 | 32 | + +-------+--------+----------------+------+ + | 56 | 20 | 5 | 32 | + +-------+--------+----------------+------+ + | 57 | 21 | 5 | 32 | + +-------+--------+----------------+------+ + | 58 | 23 | 5 | 32 | + +-------+--------+----------------+------+ + | 59 | 24 | 5 | 32 | + +-------+--------+----------------+------+ + | 60 | 35 | 6 | 0 | + +-------+--------+----------------+------+ + | 61 | 34 | 6 | 0 | + +-------+--------+----------------+------+ + | 62 | 33 | 6 | 0 | + +-------+--------+----------------+------+ + | 63 | 32 | 6 | 0 | + +-------+--------+----------------+------+ + + + + +Collet & Kucherawy Informational [Page 48] + +RFC 8478 application/zstd October 2018 + + +A.2. Match Length Code Table + + +-------+--------+----------------+------+ + | State | Symbol | Number_Of_Bits | Base | + +-------+--------+----------------+------+ + | 0 | 0 | 0 | 0 | + +-------+--------+----------------+------+ + | 0 | 0 | 6 | 0 | + +-------+--------+----------------+------+ + | 1 | 1 | 4 | 0 | + +-------+--------+----------------+------+ + | 2 | 2 | 5 | 32 | + +-------+--------+----------------+------+ + | 3 | 3 | 5 | 0 | + +-------+--------+----------------+------+ + | 4 | 5 | 5 | 0 | + +-------+--------+----------------+------+ + | 5 | 6 | 5 | 0 | + +-------+--------+----------------+------+ + | 6 | 8 | 5 | 0 | + +-------+--------+----------------+------+ + | 7 | 10 | 6 | 0 | + +-------+--------+----------------+------+ + | 8 | 13 | 6 | 0 | + +-------+--------+----------------+------+ + | 9 | 16 | 6 | 0 | + +-------+--------+----------------+------+ + | 10 | 19 | 6 | 0 | + +-------+--------+----------------+------+ + | 11 | 22 | 6 | 0 | + +-------+--------+----------------+------+ + | 12 | 25 | 6 | 0 | + +-------+--------+----------------+------+ + | 13 | 28 | 6 | 0 | + +-------+--------+----------------+------+ + | 14 | 31 | 6 | 0 | + +-------+--------+----------------+------+ + | 15 | 33 | 6 | 0 | + +-------+--------+----------------+------+ + | 16 | 35 | 6 | 0 | + +-------+--------+----------------+------+ + | 17 | 37 | 6 | 0 | + +-------+--------+----------------+------+ + | 18 | 39 | 6 | 0 | + +-------+--------+----------------+------+ + | 19 | 41 | 6 | 0 | + +-------+--------+----------------+------+ + | 20 | 43 | 6 | 0 | + + + +Collet & Kucherawy Informational [Page 49] + +RFC 8478 application/zstd October 2018 + + + +-------+--------+----------------+------+ + | 21 | 45 | 6 | 0 | + +-------+--------+----------------+------+ + | 22 | 1 | 4 | 16 | + +-------+--------+----------------+------+ + | 23 | 2 | 4 | 0 | + +-------+--------+----------------+------+ + | 24 | 3 | 5 | 32 | + +-------+--------+----------------+------+ + | 25 | 4 | 5 | 0 | + +-------+--------+----------------+------+ + | 26 | 6 | 5 | 32 | + +-------+--------+----------------+------+ + | 27 | 7 | 5 | 0 | + +-------+--------+----------------+------+ + | 28 | 9 | 6 | 0 | + +-------+--------+----------------+------+ + | 29 | 12 | 6 | 0 | + +-------+--------+----------------+------+ + | 30 | 15 | 6 | 0 | + +-------+--------+----------------+------+ + | 31 | 18 | 6 | 0 | + +-------+--------+----------------+------+ + | 32 | 21 | 6 | 0 | + +-------+--------+----------------+------+ + | 33 | 24 | 6 | 0 | + +-------+--------+----------------+------+ + | 34 | 27 | 6 | 0 | + +-------+--------+----------------+------+ + | 35 | 30 | 6 | 0 | + +-------+--------+----------------+------+ + | 36 | 32 | 6 | 0 | + +-------+--------+----------------+------+ + | 37 | 34 | 6 | 0 | + +-------+--------+----------------+------+ + | 38 | 36 | 6 | 0 | + +-------+--------+----------------+------+ + | 39 | 38 | 6 | 0 | + +-------+--------+----------------+------+ + | 40 | 40 | 6 | 0 | + +-------+--------+----------------+------+ + | 41 | 42 | 6 | 0 | + +-------+--------+----------------+------+ + | 42 | 44 | 6 | 0 | + +-------+--------+----------------+------+ + | 43 | 1 | 4 | 32 | + +-------+--------+----------------+------+ + | 44 | 1 | 4 | 48 | + + + +Collet & Kucherawy Informational [Page 50] + +RFC 8478 application/zstd October 2018 + + + +-------+--------+----------------+------+ + | 45 | 2 | 4 | 16 | + +-------+--------+----------------+------+ + | 46 | 4 | 5 | 32 | + +-------+--------+----------------+------+ + | 47 | 5 | 5 | 32 | + +-------+--------+----------------+------+ + | 48 | 7 | 5 | 32 | + +-------+--------+----------------+------+ + | 49 | 8 | 5 | 32 | + +-------+--------+----------------+------+ + | 50 | 11 | 6 | 0 | + +-------+--------+----------------+------+ + | 51 | 14 | 6 | 0 | + +-------+--------+----------------+------+ + | 52 | 17 | 6 | 0 | + +-------+--------+----------------+------+ + | 53 | 20 | 6 | 0 | + +-------+--------+----------------+------+ + | 54 | 23 | 6 | 0 | + +-------+--------+----------------+------+ + | 55 | 26 | 6 | 0 | + +-------+--------+----------------+------+ + | 56 | 29 | 6 | 0 | + +-------+--------+----------------+------+ + | 57 | 52 | 6 | 0 | + +-------+--------+----------------+------+ + | 58 | 51 | 6 | 0 | + +-------+--------+----------------+------+ + | 59 | 50 | 6 | 0 | + +-------+--------+----------------+------+ + | 60 | 49 | 6 | 0 | + +-------+--------+----------------+------+ + | 61 | 48 | 6 | 0 | + +-------+--------+----------------+------+ + | 62 | 47 | 6 | 0 | + +-------+--------+----------------+------+ + | 63 | 46 | 6 | 0 | + +-------+--------+----------------+------+ + + + + + + + + + + + + +Collet & Kucherawy Informational [Page 51] + +RFC 8478 application/zstd October 2018 + + +A.3. Offset Code Table + + +-------+--------+----------------+------+ + | State | Symbol | Number_Of_Bits | Base | + +-------+--------+----------------+------+ + | 0 | 0 | 0 | 0 | + +-------+--------+----------------+------+ + | 0 | 0 | 5 | 0 | + +-------+--------+----------------+------+ + | 1 | 6 | 4 | 0 | + +-------+--------+----------------+------+ + | 2 | 9 | 5 | 0 | + +-------+--------+----------------+------+ + | 3 | 15 | 5 | 0 | + +-------+--------+----------------+------+ + | 4 | 21 | 5 | 0 | + +-------+--------+----------------+------+ + | 5 | 3 | 5 | 0 | + +-------+--------+----------------+------+ + | 6 | 7 | 4 | 0 | + +-------+--------+----------------+------+ + | 7 | 12 | 5 | 0 | + +-------+--------+----------------+------+ + | 8 | 18 | 5 | 0 | + +-------+--------+----------------+------+ + | 9 | 23 | 5 | 0 | + +-------+--------+----------------+------+ + | 10 | 5 | 5 | 0 | + +-------+--------+----------------+------+ + | 11 | 8 | 4 | 0 | + +-------+--------+----------------+------+ + | 12 | 14 | 5 | 0 | + +-------+--------+----------------+------+ + | 13 | 20 | 5 | 0 | + +-------+--------+----------------+------+ + | 14 | 2 | 5 | 0 | + +-------+--------+----------------+------+ + | 15 | 7 | 4 | 16 | + +-------+--------+----------------+------+ + | 16 | 11 | 5 | 0 | + +-------+--------+----------------+------+ + | 17 | 17 | 5 | 0 | + +-------+--------+----------------+------+ + | 18 | 22 | 5 | 0 | + +-------+--------+----------------+------+ + | 19 | 4 | 5 | 0 | + +-------+--------+----------------+------+ + | 20 | 8 | 4 | 16 | + + + +Collet & Kucherawy Informational [Page 52] + +RFC 8478 application/zstd October 2018 + + + +-------+--------+----------------+------+ + | 21 | 13 | 5 | 0 | + +-------+--------+----------------+------+ + | 22 | 19 | 5 | 0 | + +-------+--------+----------------+------+ + | 23 | 1 | 5 | 0 | + +-------+--------+----------------+------+ + | 24 | 6 | 4 | 16 | + +-------+--------+----------------+------+ + | 25 | 10 | 5 | 0 | + +-------+--------+----------------+------+ + | 26 | 16 | 5 | 0 | + +-------+--------+----------------+------+ + | 27 | 28 | 5 | 0 | + +-------+--------+----------------+------+ + | 28 | 27 | 5 | 0 | + +-------+--------+----------------+------+ + | 29 | 26 | 5 | 0 | + +-------+--------+----------------+------+ + | 30 | 25 | 5 | 0 | + +-------+--------+----------------+------+ + | 31 | 24 | 5 | 0 | + +-------+--------+----------------+------+ + +Acknowledgments + + zstd was developed by Yann Collet. + + Bobo Bose-Kolanu, Felix Handte, Kyle Nekritz, Nick Terrell, and David + Schleimer provided helpful feedback during the development of this + document. + + + + + + + + + + + + + + + + + + + + +Collet & Kucherawy Informational [Page 53] + +RFC 8478 application/zstd October 2018 + + +Authors' Addresses + + Yann Collet + Facebook + 1 Hacker Way + Menlo Park, CA 94025 + United States of America + + Email: cyan@fb.com + + + Murray S. Kucherawy (editor) + Facebook + 1 Hacker Way + Menlo Park, CA 94025 + United States of America + + Email: msk@fb.com + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Collet & Kucherawy Informational [Page 54] + diff --git a/lib/std/compress/testdata/rfc8478.txt.zst.19 b/lib/std/compress/testdata/rfc8478.txt.zst.19 new file mode 100644 index 0000000000000000000000000000000000000000..e0cf325af238aaf25c2d4ec05107381a28bd481f GIT binary patch literal 22211 zcmV(rK<>XNwJ-gotfBz`owWjLPW?JGz(}Zw9v?iwy>s~R_*R@8 zmu-0;>q+F7jkRh{ue4{xJqUOK1C}myh>j<+aIc zQGQXta>XvUcE%RHx_x1Lu^HJfmP>2P+wW9Q8ns+$tQa@hGg@M$T;sdVD*n~RO6|$K zHrAG2{iR>kE&D|){!-JqYAUnFxOv74b)Pe8DNk?o`Qk+q_kgho!7?L8bAup%RW)z| z6j>^l7&{DiAy;LMZ zP(lK`dtJSX-!Rc2JrIJk+RKQZQ3Rv_>UM(DS;MtdD-31VP za0)6EaE2*(APkEbPh$NoD*?9cm^X-~$CvVq0(NRvKv?;6j6qSEAhLiZe!sdxMAyZ} zN-eSha^j4Z^B}Xsj(Z1LX^YozIJ2{BZA|6B8M*pq%MAFTB7wxNSixRhNzM58x@gV# zq+U_$l`l48EETJ*mu8%`Ov-C{ZOY%jT{_b(%Xvm_ApX-W>(Ozyj|bb*8&I^x8Sp1e z5vFiRU@2EX<$mR7DI1ubuk>PXf3%{VUmdf5t3^XSoQn2hi8aF-0BBq|c1u${-~e#L zQBbf{>QriD?5fTsn={BQ4#+G1ziq(rnE3X81%y5XU6#F^- zg5k>lvR9beic-_pv&74B^Mkc^g1(kDKGxJovGi z2XsHL^L93dv9&ABX5%0P%#DZ?c@Q%kXda%?7aQxOHD+YQNJYzXD?%VQkb6kk*RvBz zUPO{W$%7DzB55It9^{!`XGW~C`fE|h>nF_SPq4A8$KC2}^EJK61Qr}c9H^Nf0Ytdb zIC$Vdrkw-`@PYvb0dO<1!lSAh4jf?rPOxBrAIA)Ap$srBZ~sR-TOz^(c~Fy}CHLTm z(gN?vrkz(+-Lh`w89opE{r(Wa4U~BpB#vj$N_#cE!cw*qWzte!Yy7{q-SUfmw17Vw zGD&S;nQcpK*W$L((s_9(UQu#&uL&f0swl7AG8<*AMBB7hpHNMjdFAbUd;QqlywZ7R zTWU-0=5O#@;pX3zLodH(oNi6d!jphCX%9ChSSdr5UP^@n1V4q@@dta>NLIW+rpJfH zJ)dUZFP3T!kk?>BEq4fp^9%WZH+@&pV1q{^JJdt);7kn zV@9l&zlt=Y8`X?fg}ovh!;`Ptv^s>!{kf?g_T3UIp4Z|_^MDdtT0&4FPs+{43gkhO z^R@lS*!I0B0mOTN$UHnTz-)+1Yop}WfIJSAiZM<)LT!w- z==HezVS)v>=6b8Z+>P6q)99?W2%c5|??Jx{ZgS1N;e)Z!R@ zwd+z$zn#9?UFC2N2wNTorr<^+>AmQ^=cY(R!Vgt@1HU{*nur84O%i5+wC2IpF>diS zJ+S-F#uka1zU+(?dmcAo5Dosihz&46uV8#jd2Qq)eq^U*ER+GIq1)uKiwEwG@sE;b z%&2ftG12g#Jy7$HiBrtQP8tWX#7Y9hmh}0swB;x{hyg?m?*UG3La)zcQ%^Dc%|nC( z2M>jZ*WA@`3@>z>PMzk_4RP_~q!`xb zyB;i4< z0X@fT;JTS??Rio*hLGqZJP?s*fZYUnFvEeauxsPPW7N_)X&!7v$K!w;rGjEn%H=jn zAdQKwsJz25GumadJZ#pA+-6gG?2gVmJ)zgi4M5L97^c4$LXwk78#6v*TkeKo-k?GQ z8Gw!b%CJjO)JE4tpYier4=6z_wV2j=*pE?Escim`JI;%sd(xZW1W61Z*Fev4f`rQ_ z@Z5tU(F7m&Ja^#0h2sWBlINdu42^>zPc(TRW?+z-hXBFF^Pur8&hT&SA58yZX~lVu zGF}}s{8GtpqE_jny2TQ$=veV9WYU26xd>LQqHmLX@H|M=01`PHA(ki`f&Ex-lk0d$R`{x@P7#!^DZuS|iZbmEc^mb7wZ?-iAxeDiR(aYhJAR%veGcsrf) z>TqQJrpUa4Ggg{M`Nln(C+}3t&2c9_+U?J1W93->tT8@4M*|H2G%g^V3pVNf}}6nm z^xk`(#6_IsAO}5=WSSz8NP-Z04w5`C5=A5_au1rE1SOA~B9atAD0-4}FoKMC{?q!+ zb_6Z^*K!-UO;rWDXv$N};;_z7#4D zf~>&zpr=w$fFcq&J9AKQD%a9eq%D`4gIS8C35p<`7^+xF8UO~DHrfGT7tB$>M2s`N z`Md}o7(K^1$SGJAmJu_x8_a6o*$ygNuaj#1y0UL@aB~X4t*eW``gAaBF9*0e91v1e z%-}|GqbN&t8DnipD-HmuXf1PRWbohvL`*o~*1nQ#yTuKZngay|w+e~~32qV%8n{6z z!%dnpj>T~sFI4S$yJl2kURAXr^d*x%qqY1O2^=i2h&TXJD-ehGm3Uc~dMaY<|FKn*|a?0ULc2~0LRc);PN;7WXEd&CD3uW?RnrilgoBAuMXJGW= zK?8>eXWt4W7^7W68VxSYwPaQG$28BT?%%5E?1yU0i&5zrUPZ>Jlul4^^I%CSYO7ZV zg@wfeN8{BCI~AODtyiK!&w8!PQx4Jm!|%_|=Enw#NUTs3Te%a*JF31Mbc(W<#_D}) zHO$Bk`;ATBH(+Y8IR-rVA?x~PXJ@!{|6bs6Q%ghVgP#Zl8y?6(K_-H%t@6Ox_R_Ac zc}8~3*wsQHj?TWb=5X*wiM`iuKyehxZMerdipM$l&CLVT78Qt`81%h{{EaS!*gH7KU>M+S~h;Ss&DTmv5u#S4%x zYAuB?g3N$GY9I+32EK?Mz~~d6#{wB@cDI)kTEXW*kE#Cf;i%GCX^gmWo<3}Z5rhu} zz2_XWaD%YFbSu*8`eT$##$6_pc(-I<9;=lR50LZze0ZfV81(0{wPkEfIS3*-h$M+5 z5GX+s1VjF?^P%LsjJFa9O#uaxlaU(TprL)8i7Eyds*eT6~iiDlRZX02Uy~ z0D_*OsH9f;F=PzMfB*mh00VIV01OF>q*BR*G_AS=lz;~mi8ThBOo1aoh6+fC0~7!e z0{{R>00035AcBly0PK8l@Fij#qBgRZAF?Ye#~1#R`0Dh4=Z_LDS1_NXvITje$8PRD z{-_SmgT6YZMxiFqa*6ZhRzv7bcE+CAK2P!{j8k>N>M#{+vaR|}Iy$TbH5J5WWCOaF zGqzI7Ldp8r0U3UP7;^p2D0^{4qqAnS2q?o?jN8S-qbN2IwyD9Gb35}2YS6L@Lo>Oh z^!r7k5Gh?1@Yhub!Q@cdj585%#4EX>W(cJm*9-3oDVV%%p@jTY!w0TNDE?lBv4@*i zG`=yh_%0u3gTwb^ccuIAHWj3MM3AVOVIpA593D9wpK)2isRRahStkcNxjjQKUhn^h z3~+5Zfgzf{FV{kPej%9N`@eB2H5nTGpReRvk zrg&Nq*K$KUpC6u&+2Ki%w-QSFq9r+#%1rL~ioCW+`^E&_OwqV2XJ};Ce87c)i|Qc6 zwaZDZD+OiKWu%XBya9iJqwpPL{qhA(M)8j&i&ieM8X zbm&?O%DV#j{>{Vat+x@v=i|k-cHARXnD20EYv7e?4QsRLtOh5_7Gr=+2rE6m31fNB zM4t6mc==cys(pPYvvp2lOXz>iht8E~Xj6kPY_V9bD-d&yJ^%o6MN5E!2?y z9x46&TpIr^XKB>@;XjG1>YCH2AGFGRe2(-+TSNBgX@O)f!OPEaG_D zvNv*`Fk@?QX|ssq*m5#}P1c7`z-h{CLJS`WK8=n<(Qu)VDQ4lba;BC3Tk1x*4G}SL zqmCWFDv7JEiVguA^av}VN5KXa!U`&BWMvLd4}bX9aw=F7sfbs}WaWy`1gxwFcz(G37`nq|p7=Y*4{9N7axakb4pH{GBWz^^gX?rajY zIapeft`S3{Q3VnpgnC94t90bhA*p0NRHPD}VJhyVpsW+5g5o92--9jJmboZZ5jL)Fy{i%GFmC4^OS8zBjqx^l0Fyv9tBO z^t1!oIx8Au7q_=dMX;H~BodGNOGmU|4Bh{#%*~OYq;5i!&#gO(tX5bH#s@O_-bB#37a$EfAB{^-=m zgSE5)if+PO%#5=nK2MUSZJgbq;g69^VUEw=dFo#1Nku?rpP-zNd-W6(KBD=C68^Pe zr$3|5by?GU^yVMZEkIQY)QTywEV$lan*a=&8-q%?!dFN!NwIZ$L+5n=#rzQq|EHVX zTTEa$V@9TdZb<(@hOa{^JImPy^UJaRq=J<4T3lVQS8&oeLH3d zW9S!uRXxsa4mGO;&Hd4=FeP~>I;v$YRm;2`6Ex3ZwwCMVxOMY&^z4nxd7VNE%S_uG z-^UsXqb(m!+OO3>0KcytI%x8+F)-z=f}#`n!+m{8x#_c{*cAzQ*@|nMS$ju1p%C;w z`loB2)!;-+-YujGFnLc{R2xM!%!I}aFM7rKa-2(lD$yf9UYv z+4vbsoyW$GaPB37nF9suz3x9YU}QwWucG$Ai%Ob2Lc1HEwSYmJOo>DFrxwt2iF8FG zNhfCOUCRL>c7jJ=w1e_1`_BjzbCD#pd*Es6{0BldyTdqgl*bI~yH$WcW})l=xEq4Y z4-9m7a!VJo&fudU-=?)Yub>B~UhO+{4y$#K=yMiP8b)_q0*7G{%*U0U_oX}K3ff)7 z<`XBd^al4C%>deMm(|&KtAJf8`9dcFXH9 z5?;t3QP`n~$?B7CS2NME|4myxGd|cF>pH;DQ^1zaNjyT|X+1~Gt@$J3E5>bLWiLVL z>V9olTAgzCaaA=UWzY)(1G73VPGiBTVKb-n(7`l zH}Rq~j1KeX^Ici_O*kd2MDZ7kAvN~&nNWY4=+{(DG&eOAfvz8j!}3Vp(@dyWzg;~y zZMR@S>9(VnQs_<8KOE->*c)vo5=sY&w#kO8P=ydBF$LW}J;a%wTbANFn1KQgJ`HR> zTyE2+fOUjPTjLsv&}?EY|9%hCiLKmQ0gw42IttyExG0ork7jPuUJA8ABzl<0J-2BjI7N z;IXsKl0=N6{zkHW1>`&4&4!^duz`3j&`^EgP{?=R`6t8;|vjB)$f>fWt29`0MUP;ywBQq z&Mf0QX#UjfXazm^Wj+ljMPo=$9NbBCCE;2wO_bP7Ne7ZOph@{B*KIofH6~YKyef<{ zW#A4Z#tAdiPX?1Uh~zLel9ZJVLH?R-R>BhJEEO#gy+|!C3Y};mrBH9auxJTYXOl~| z;`k`bri4>+-X9EZ;S0}{_&ss4!si`j(j==Q4@xWs1DfP#Rn98W&{h=_YD0pqF zs`3`nMnG~bmHGv75=1gSj{LSapH05@=na&f znZmMaATyUI;QLrOEq{Dz?)Z445;wNH5L7??%cXf?AGs5nhAU_-B^NX#KpNE%8Q_ms zURsA^jl|61m9hjg%MmII^>!*-!6sgR+$ff3fRAOAe}Q}3_D$xlC?2y*j%+O+*6TSn zJPA2yBy{v(o!QD5#Q1$>pp~F3`6JO@69`PA#LuD}M@)KW`uh+cqV2`4Zl}9;;Ao`! zGBSsPy86&i)`+wv+<&b(=%Zr{;t?%zF=h323z#8dP-Msz>vX9s1~~nyI|jTRKx+U7 zefY)$J`T&01(neNL#EQe=KaN94*}U|evjr*)X)UmqTo!t_GA8NxVKXu#*)5eq?CTq ztm!l^@jEC?W_Dt50J-&go3ie}uNvo4LouT5UIa9#p*xrL!dF&i$P^p9n5Cye1B>b! z-Y+!yR6?M-JjJ?#8ZJhC99F(f2UoF-o;qlj*owuamu{(HdA{>1!_$&~M2TsSXW4)1JHh+zRM#eeg4=LOFo0L3rL^jyTZ$WuPZ6 z=*DL|dkPFKXrD-1#zMSuANHis^cUvcXKqPo|GLMw!FuYLh*?eM~m-& zk7?5U(B2)h>?8&_O9Koh0AyTxWZr0vuI+7E+I*tqx{P|~pkrByf7>C;QY{CFOcakV z=DMcG>xlYnbj{C~BdX}Gg@iAs^cru^cjQ1re{M6j{}QQ%_f9hy!wnkx(^P8t?8pdiW< zNRb~{Tv$+1{CqfW=+};$pR~gR>ogw#wn7w2@zEP|hk3{i>cG&qu=O|%FErGP(Li-S z;ON3u(>f!!;v|a}%~zL%1CU*_&2HBiOwnu}NC#7k7~QEUDe3BdwS;7hjDWSuQhLUC}>C>iA;i^s(#qf zx=Zt>2sQfH*-47_K3OmcpoyZpL~$0pwL5v@vvy@MAzX?Uu17fACqbU}zs0b{>%vq@ z*VAdCF)|wccui+9<=A_TyA~HX!D+gwsK5V zf11mKdRSQhi2w*V z7(|C5f`>?ws|*;sA`gQq3>6mE+p)C7Sqn8fKphf(P;3*h&cXLOnTh3&72=z`-C!wI zrCR;Uk;Wf|-~?U+h{lUK%U}`RU8=TLfpN95R8LGS5D&Q1e2a$s>4q01`R;{hUer8| zUZuxw@m?pisqY%x3C5*i0h&9-9(2R;!~A;;&$nX4;bTk@Hf*5FnLF%NBSAb_h8Ue7 zCKC3%qHYcOCXHS7gRLnPNicHauMVwlJ(2j$->b->nVSk(4o1n?qeYN`#$#hH;_sO3 zNN)@t>{)%dTv|MG06%XRp-nb`&l+y;NAB+C>;p7w^Pn9|l=i>1hCkMd-Bmj>r3G`c zqHeRqeXvG2doMy#?ifol$kRNj3TzBM_|-yc4i;R1%11bxp4N>GYP4Axm-2#~B}ZZ) zpKuO5FqxW8*@+=RAJ(~5-M0T#jM$+HLKETA>2Anerf_pSGJ0mVNuX=m5Ka#zqg24! z6s-lEY7h+SHjo|RODv5t#;`7!#s_=LHC>PDW5(!mr6np!o|RsO0LBs=n8)~6J(eoc zk)UTn!b|WHO=g}}$qZ?phl2RBu9OUgU1v8l18VHBHkDWq(vY=n0dV}o?Y8{mp=4f_ z9qc(Ge}F9&{kxXpc_$VM6-eqJV{x3=od+xiRkA_h_1Q5>eVls8esR!N1p}&_z;K#AQ3@zA=Cjc#!&25 zCVNRXh4Aw{$pXhrPc+lL|3Wmk>7qZ$|LXQW09|yB4tuSuqC<2z!A{XF1P+}DaA#~@ z3M^vGG0c+F3U8i+WBN$j{gBhWNcY^Bbr@Hy)C}!MK@O{eVmy@M7bI(M)kqGHWRm9G zdWUqM*Kr3)F+zI{M^)HcxOngW8;u|Q$>dop{KobaT7SzU^lI19sec6g89>^& zM^)|fpKxBvG1^ck3?k&Whl?fZMc$JtAG)=Hz}uw` z#5IlULh7~3a*_d?t_{zr+lTVfK2uft1IWUChmC!>$;jI#Ev*ioPGPb~(lF&AAR-kOgY z1~*6X2K%tx+Ee2^k{a1IsJiG-h>TK&Zh#vZ(u(`07dbiY4&4i^Jef?^-o-Ir-rZE} z+{Z=aa%NC10{a>f)9gk>F?VAGqLQ)?)GDyWL!K7`EIoqqhR?s@9YjK*Grz=xCLQN; z`I{#x;$(+)NXBMT`>UYHuN{tf*$3Ktm8`O)gj=+O9%f=mHJBcJqjr)6FT)Dyz5CdV zA~A?4RPp3s-18G_PktKj37#f3-IkavhrrdYL6(?aRmhDZ4C(5c}?HsSX zSQsEQL_fBKr9`$D9Xg=7?DWcB3!C>yY?Nf5Seg*`nwkFhqB~Zna{<$-cN{Abx?M}< zG|Z3Ji#0Bc7NgX7$y;EtNFZAWh?N!{TtLpN3VZVp<~U)1ifYqIqNH`5_(OJ2Y=y(w zM=7e6mFmS=f(Zs72L}WXv0J#3#?Zj1o6wT|FdajUe@Km5?`IM|tviDC#*CRv27>{L z8omTwIihmA9)0Y6L`d`XO^Zjlzn`$|xau<7PSSX2pzpcon6eJxG94$7$K>UOPssZT z4&?`gIpL($kO)4(d}(7C2lqpk++{(LKU^rhY%f&*H2xqnQ7FK*xCMn`i}Jc5L==kK zm?S=3L%{<+M1kp}06UOW;GH=Sm#2zPYZ+L6VN_!I2{W!!ikLTDIq5%;`LmX?t@DL( znD3ko%k}TfBb4$ijo1+-e(45NsLXMzlLhGqX#6FZ z_ag%+F0DHUHOpD47U=T%q&MWUZ_u^Wy`h`8xbx_r|6h=vfj`5K#W9kZ=U~Hf#T*Wg z99uUlzs&a)`Te20l=UFFx4ttF8{1@1$!|Jf>X0ReaZckP8afo}0tlT4FEz9(^vj|` zfz!;FwhNaK$!Mu##8Ju1yI)!_fx`SjrngMAxOIy3NVf#|;+~{35HP%Mj6kVJb7Nt) zd6EkQ`%;v}bT%g?9B+tPlpUdRgMb}1P*n`+U!L(o5K9`=p+Sle3!$}2ma(g4geYQz z*`F@a_#QCLG~dHh=%l1Q$mJB?-YeD?mUbPkg|{43xIf{oi>P$Q0JMJQ3D0{i=R}V) zn8^f#PB((es-)YiZ>z%h2=|=aMUwfTpce{3qI1bDH7n;l6JTGaawV4JPU1Oed-9EF zv+q7-m%w8NuR3j#bT`kbCqPpCnR_ez+PSu|7=ZEFna@14eUtx zwM0!)`=pn9M(iORs%3JOOx#({wu7Ua>4H$eeuqwZH*y)U)83JJ?6D3!t%sk>Jo(NE zvi}${$!i@+9@cEDxk8j5LZkzq_S}|}PNfrtQ-Tpb{rRn4BeqyZzrYb9N|;TI3Uy0~ z>7^&OH`U)uSVwmj)&iCt#wc~YpeDCp@K)X1Vx^=FVPZUL&tDUlhnK^TF>IikJ%pj?V7>XcrtgCqx|J(1pDmgdpR zG%w3i@+X^8OUu-l$V?-1xj6biM3JTPyl)W!{^L7A}L$95nOn3Bi%MUJ@j4 z5RRN8)T^w#5ZE#SA+@KxwkBe)bmi_*j_}eXSP+AD;Qg9+Cy~ zdzZfG6n}l2{mNcVrV8wNH8I4ZFy|(3J`;8c9y~7tO9d@0InQ^AAb)YA9o(kqwfW# zdsZkE?XOXO6Ax9*CX(?q)kvmgR4&g6YjAB?-5H0ig;dhD)~wKU|NKw_f+k}pEK|lLOpRoAjpffVw3`cJ z%t2;ok$NgP9tguf8;ubbX)e!q3mJTEFN+ytDcHQiafjvsubw|MJ@u%{qx`CD!%N~2 z*UIlroN7n0l~g>YS8x2KRTK6Ge6~phV-3RTwRG2f zU?@@QMy-gia&re2d}r39ltioH(}0zsB|mZ7g*+>5y=-eEoyl@E0ajwnAXN7VE+S!J zn;;bw9-Gi*mYk5!Nd}x{r?^>AT4IqyoeiI|y8RgPMnCDSQ zT)b2SRki;NuvV`8)1iOtQ06aOTvp5gX&jGfv5+u?qBdOigaBOhS0A>!o>f!rBD}|b zkj#yWK&!NjCI}@)gxvED`&Tu%Waxg}ni{=P8?hplN@j~zj(0<+j0|O(qibEJTr9ha z8DC3r=eV;x#`T!)N)|TmUl9Q>_KL&C5cwPhxiG>Sk-e$F!z_Pb13!b~PrTLQ=3=43 zGof6Nn#~e?2YsrTwW3a`hGid9JJI)4a?t=b#+Y=f%cuBp>0UOc053~5==C`hpiZxC z)`dNcqCjLuW-3bk@tcwA;Y%c$kr?j~52+J1>N1NhT5&b=!nFYe_Pp9|krKkItNv9{ zDR;M4(J7q9=H4I&4E`OU3Siyf&lGC;ev{@~HI85AbLLDH`lf7}$$J+aiIIt$R{J!U zybdM{C_;VLNFR@}BD3$&o-Nv;g~uQ+rL@(1p!GXhPbxH^+7TVqV}ysGhRjt?PYfvF z2(9oeb;1P3p2(JgrVeu@IsC+lc0i(H6>eCzYMq$$xFWzkXDlxCT9h&3(!wUa;oiAZ zjIlh@QrTfxzHi|h2`<;JwUD8f=4;N&d`1+~mzFpeB2Vk)@|O@nGoJ0l&-Nl-0bn>b z{Wr=yUpcf1GWwZ~NjM3PIfdaS&mG1KROZNHZ)l03i81qeD2rpB(GhcdWpsBObNPgSySc)A=4E@7=6(j&inrTk&MKd{Ti;SPl=F`NzzAl>&>WlJg5@a#^kx>{-ASQY!& zNQDxVQ-7Z=Hv<|z)c=f|;X+`Du%s964)D(ympp9M%$Hje@Oh9mHyPo|>$n(k1yTsT z#ByWmXkZiKXAvrvbeHJ%*Vl%vN}}G)(N&fF8V}T;@&-de0endfK7Sl3lOn&W>wu90 zNz|{dm;1`!BP7qCmcVY|(PeMKv!f8`Yv?FR<-QX21{x*uTM4PI`D-EeVkW zis9rrOn8NfW-td}l9~NcGEdR4V5)piqu%2*dD=R4VA7T@bLx0-RDZAO;-=G&%0U~J z>U6icCGG8EJ~jDc*AGI>w@^ojT87>%+9K{B$m+(-FDR-F`(v*Rx>`4ItbjM*ziJWG zH9;*d{)pt`#RgE?9%ZJq?zc6ClpNI}kr~IDN_C?F6i+zqk$tg_Fg#otHUUrJRHy-6;Bq@e$C@LfE@v2E~+)YUQGS6K+(9G;g@EP7mpsk zU8*Xf>nFxjEVV^>-e}%HSm+NPP=g%jMc*_s?IH1*W(pJKrK*jT6Jlp*7Yko=Fc$c3 zlTE#gwCay!FopqIl3pS~o z-RTzTZH zCZb)at5vy>c-UkLO~b`BLQG^@H!P1Ii#0??pd+;}_IS;Y<5g(oD1pkuKvhxZH6z|F zhGmz1q11mv_1%9^vZAR*Vr3s^niaDnm2vW|k-7Q(NphGFg7#N6S!W0y}DNxD-_-=cH9?Kb*9+7P-M%cu=98$g@u zAi6uMXY@D8vUKG2-9~PxLxg`4TL%Z%%s;X{-lu^kB*(_2Jr$ql9*wF<MaunD7Tgbwws6Ef zm%b-Eepv54b7p`jbS$_|w~7Zxn*t1yx)jOehWL?klDdHUK2ORJ6h}GAA3KdC4Ea#~ zYPw7oS*(HOYi;|bMQla%GnE4~GFE{bi;wi4JF0`SM~>g7OdKRQK4!GDW|+XaHXAIU z%Jd*QY_i_3zW%CLPs>L#cQ<7;P5yt>*J+YAH0@tjf4QC+qee^@Q6@CjcP|s%WA37^ zbuYS+HRR3Ws>Ye6K{vR)yXeED?s1(%Q%#G1PmfozTAWR5UvzuInbA4oJ@ijfeQBcb zj_#4}q4TPrI(SFFtDtvukBr~Ze`FU#d${BXu44%5Vw_mai9Wr(9$jK+7Mfxih&(L^ z_dZgtE{HxO$#e=Y{3xdz5JCX`--Kz9McrZWT8E#(`;t6 z^beG+?YJOhCoz44J&O$^VvuQ9l9h%O4@DNx3N{foye2>*Aq++I{`2}bxf%6^0DFE;q zM*@^pHh&^5ffOpzO}WFITH%9vK?mvhHE=bE#w2Jfh5IhiKVeV8h-8;aNU_Jr#N{l! zOpGdjNhbv}VLAq6tUBqYI&Me><^jRC;|ftiQH?9UCuC*!|iARyiot&@~DGM6o9nE*SbWsKn{i_SZsP|D6UpvZn4E3 zQ)2-3fpx5)z@84{bybDk5V{wcLiwU2)g2w)ucMu(=1ox>(~s@^6xfgwg83a{SM=|U zJ;%Na{4nQDWDuLKn?Mm41V9Eswd-dz256e>8*CS@v$*irFIUQX+D*aEB|jVqoCUN| zR9sLkF-!*eHr2n?NPu>lI3t-`f5@9@P^a1y3LK6VEro7o|Msy#BVj1e-16v3X(HYS$?-+}t zh?dYcS3M6|ZOV~pm`12rM+n)Oo}@P5si5?NQzD9`*%-0fxm_dG6f0%X>9 z3PpsbCw*MgH&@raXAP@jkCM_iPl}gg#zxA_@2U8h9HYRvT8+ihM2afd3^A=@-|;^WX!!>T*6GZJLP_xk5u$V@A!8wz>pYXx3eeCFx39t2K?{F8>JENp2s}9Sk(CSIO zEAm2)%7m=CU5wX+4Vb4-G-u;nQmi_Cyz41wt<5`u;nG?s;DF>wEPMpEj&Eq@B{$)I zpvL52{c=X&nkQMc&L@68k|G%6ym=F5B~FJ3YW%`QwM))WWm>~nQl+`%dlcme0JKrc zzA;jEg*aV?B^S*^@1g>sS7EEliunl>X!~Km9^^zcH_+uC9t-t^f`7O*pxVU&nQEj* z^etPJTpyF?6j|pLga?@uAD&ZmUu)bLeZEv#X-f&{GBccK3`S%EO|k{P8c#W$;{mM; zuhP+%F7Flw`(ws14PcD-auOU>D3{jRQxD@r^`lc0TWTQ3rgvpH5HaXCOop)sXB`Ps zPedatCmv(-9v%oxilcjo&83>KA)|Dr>ZW_{K6d26p*6ayYM$N&zUHKke+|D$jx~}Z zw4*p*Pj!r;e+}p~8kdbaElVtI4a3ZoXF~>)TiELmWG@i(hKsJn?(oXHcKDsb7isQr zawweYSDS-c_d4a9WyJ}Y$yS58BAih%D3jdKxD7fcpVVF_(By>Pu5wNdtw1emJ=upC zE&agt0m~tvps7BsMpS9rP7QVGcIb-;F4ds+t4SIkp8A%y!jQ1s!I?tg7|_Rjkl@M@ zw1+O0iv6J~X`tE_7T>uU2jWcjq=qu?s1dL{_qpinOFe^1pF|lvUfpST12cgVRoXZ3 z>?T6Jrj;fGm0kjy^WF+OD)&dE-jyaX5jBn4Ni|eeaHQ}ZzS}No{(z>a=Gyh1S7u&T zUm#l|ms_b%Y0T+)p%9hjuK>0to2ldEu(NG^V89%lNcg}YbdFj^&i}12ocJ}k@zxxI zf9(0UAQ{gj_4pvYpdwZ4^jp$e+_ z`>x1hbEPuH!c{}>ov)mP1ypQmJ{;P32=GYXaEnH&Q9-I0(Ontr3%t!RB^E~ZM|J>G z7n}5)O7_}_Sg256{X*tUWpvGx8l_>l z#S+v$O{`+%RkTW*hci792$*Kqsev_os4c|OYpGwwF)D(;JTc8MGHOY_VwSf%aKUhi z$_?A?5KAXm;IP`(klILves~7PQY-^L3o8i$HieQVob*tJ$Ey*iXu!}S?|D%?s5ddh znJw$D^M!+(;j~oN>(+t;?aT~U2ZFN3T%Wh00sDDxrIS%W4VsXL-@zSV{>Ez8Gq5I7 zg&Ek$OMCKOO`w(oZqea*Wf7f1^^Fu;wNj0zq(vhCIYpC|tDQLh_iEZkH?wf1Dp{Zn zH0{I+BLT)Vi^iw&TZUtk*grQ;3&g35negA%jw}~$cdr*zBxz$;4m+kFla}zm;k1}T zj}v1tn{;=*YZY-vp26&BRA+-()9WYUjblc0KIGp)vq2bjN(B{pi-j8Ljv$_-8AcSz z6o`*o$?XS1l&%FHXN3Xc^p<6HV_7*QrvX9a29^&QH-xbH{&MNB9?63zJWIY|n!C;R zj^6axcI(oB@i_^zgjr$2Vkjw^=wc=g{zgJHB>)BziM~40A-KQ-DOCxVL|^1wfzO;BYo!zm8@g&dL&1WG zwqgGtaa`yMq%VvQX!ik1Km}u6b87aLBsyP^fpfs^=QMNM#^`)c`Ur zYcj2whrdhX+4ve7N(>KX3ak~q^a`_VII-%{t=91mEfV|guhFTEC)=@X`uSEVkguz+ z(qeA#kmDtq_xYz`t-8j`dWok?v%n4e?H1jh0kRfN>FP!n5yGH0W>L`z9|!r71|t?$ zz+X!8!j&@)`L$+vh4oQORAABe& zZ9IXGq$@7haAp(_3{0?pf&lDfrGowu2<(O#JIWf>-eNSc@hzj}ePxb|)u`S@RXJyw zJVCP`St+U|R$AKKrym3;p##Bt+XqV4pZQK2(~4|vah-20R5K`t{RAmDaf7l(-yRap z%=9N_(k~!B>v=&)$+K2T@mBcft9ila*&&3%N{Nhx1PWo=tF4xtD1o1A+b1N*@^>Ao zxiOa1TyqQQvxTT6nX57#m6}*gRh@DylUeU#O|mOG`!6Q3bhV6=zWmRZY~+_es<~( z=GO4Vp@~pk32Y%1i=|A5>k!Li69h+3!x@x`4@F-o?f* z6ugQkjqA@_^cgeAs z-dw(P5jTQs?Cz$6PS6O=X3wNn#?b#l>-5&=3}Kp*PfkC7Tx;UgCepMgMLZ2tu=XJ3J%r7;)bl@CzbbEptdkE0VCmo{nu_# zguj@u1vpUqSYfr{)e${a*tg8&4=S7C5IV)(_b|+y$M%^JM$&hRMs*<8&ivjxrsC)^ zEF1s2CO-QCP~;)qPtM}2+jg3}sQ<=!p^t%&=!i0q%*$3z^<}-s64N^%eEF*|m|G4A zoW81vMT4fxs{j&pkl`_eb5xH#;VOAh+TIHSYf-L)@`gq}7H|)*HofvIB`h51oOt}K z668ULGqD}MM%n(uh7s`k1P^Wk69^eNzeU;Qnw$B;CyA&iq*~E#FON_fug0ZU6`^I> z;PW@fj?uvuf26YVYf_FPK1Ie?Mzft!%PkKKsET@I=Q+1qd8o`>MllO3bL6j!*| zgf%EeY(z4nh_{J@E&MlO$YI!rV0Emb(**VV?Iz^ zI4Do(Td2=up`0mz`yB@~luxzaMO2I3Nt-VgiGfu^HQ0|Hd4+Ht9af{11Oq|EA9L)R zj!Kb1)Efm-9O22;WEJI?1_Jq~&-nB#95;}qh9UzES2S!Mk~#=~r4_HlFcVZ>BU|-M9YQ;)F_b$kgOfABp8djwg4X* zeUPV=2|BUMk*=zLvZN$`?anyWMk%f!8!rY}0t0F_EOmiZw{YC`rcFf5JmXDQ7uj-b zs8l&XdTUNj{C(c~!@-b{&c4k{=-#QeA=;ZIbo$DTUK>nZ)9vkqkxu@_eiM9WT8~Tb zxs0<|?gf+5F8Bb!&_Br3jllqmGH#z41#_(osR=F$MerIcdLpD_v>MAFON&@o@F#0h zSdD$hc#dNE^)gcWyY7)U68WL5Sq4Jt)xnNR3owQ#OrtbNE}4?aZPOJOD^V^X7F#4- zZ*#r5g&;RI8l4q&0VnM4^{uX0Cc9fQ5Z@d z=-0cL$TJ_+vq^`O4y@>u25WY@)RM*tbO7>@uk%Z{#bmIR<%cN3?qpH)aRNxk zuU%j;s9@?)z&Isjp#tgqAkY3UD@o$ z?m+~H<^@VCxmc4syV39oRDIMk;=(ye$~aIs;*1)O8?vzTvyJZ(w>8BX+7w;KvdWxA z6RAy#X=u6M=PPuR=K#ADdT`Q7NTqsddj-d2?G@5q>e4r-vDs4bL_J86o`)i!XpuiD zUzW-6|147Eyj#;OUE0ppiS68PZ7kwORvK<=BBpC@nUVl>cuFd-FApJKEJ9pp`gB&f zlGO7}h~^2K=O_F7Ct0M9gZvyttA?woI#cg?=X>63fC`GvOBQ3r(QM%$BS(<{qHpHd zZ+3bXmmJhkPE^SuxmEh^sd-$S6IGHQ=m|9yeJ~UEY_A!hoO_n}QDI<00IeIaHXq zAvw-N1XYuCAa{Oc$Wcc-GMr67 zRwa%xe6?W2Y_|^u7!9>DI1Aoj9Tpdzy4WNGI252rNNF`1#f0>QF>_1qBp4S|?8Sj? zhpWYmKs!hnY`y=FH6z#Bx4Jkx_JDBX=6+@KD34i5;B|WO-))ULB^Ijj#q^m!Y|afB%DbPkrbGDu=V3p0%`ct9tPx@t+4Ef__OQ3t0Fw;&njyT`X`>8;0jI`MWC3y zO=c0)8L-bPurU4RrQ*!*5)p_0ftK-gk!yZ`mnlj`V5np8`YBKO5s2wD3Ex62Oz`qaUDz&P;gy6yzAFi_u7}c; z?6v{_pGvIk>=%*gncM)S2vvHoj(GRXal*Ny9Q~q_R^KMNI=Dc!MZp(S9EK;e_@*gm zAX5LAx{5Ey4q#IvM6YmRzN&z_sc+D9)Xs&w2*L>mB8&(1Jy*aFpsLpq)cB9HaJ1$B zhm4(Qr|n<#y*eITMT3Qp7fkBE!Pu_jM5@yWHM}B3vhVFgLb_xh7JPOXKC6{DpklSt zRO=E?bz^cjyrC7j8R`6?ixoN{8^XOUoQH{}GLZ=5@QbVssHRZbw$h)1|IGJ^#0A3Z zOyxb}cn2U*0UVoTHGQC*^VTVGJOaAj+@dR#gdp)YWN}K4f}?B>ka6;-+pWChW($nOGz@E+yPQ+Ut*0kdRO?6vYhLb^C79DJ1TmY5jf z=Mqyay}{3g^wLeh#T~sA<=bWBqAfe%3<(MkV>_KVM;_n8b?M(Y!uOdyq%#5VcHkS4 zFg7^vAp(PT_b-*n)i$lgV03|W!UR=o5wTT8kzk?7)ly_+iMSe;5a(_c+Z%d~A4$Mx zPzA#WK2ZJ?=X`JDT5V7X@sS#C<<`k&@-}y&KtOAp!3yMp8$F+4L(zbq|Xgf{pLcjbWboWc7?Wl|(A$E2z4b8HIvP9fFIWv_LaID;Y%< z^$gq>G$T^^66|pu{i${z#;kriiN`Gdu$q32a)#m9fHQUy`+_-pM<4^*nC?bn%cuvLh@4uPlHN!b1H_ ze@(_!{AN(yghr2B7Rsrm6uE%6Uq#9MUXm}5I5Z^-Nh4C0)ll@jSBJg(KzwL2L#k0z|o_EJRX-!I;Yd>TXiOdYV#Rek= z5sxILsZMxPH)YVhfOJuuM~Ezm#U>d*A^|ShrhN6K%{48Lv(hCYL?AlVWGkr%Epj5* zR2cVq^-S%ZA0er%buM!tO12#P6(*M2b6sbRHJXJg!4^3FHZ&S^o=kUPauoYB$-H7j z;}}5qqRI)?NUbErVVU5_^{(W6AJ|gwV#34guBjg?%m7vBQ!Cr(_<<@8hIX%tW=g=N z2l+pGrsg2}%NgJ7H@z^y1&JJKtFh8Y+rB9`R7hI53K%;kWB_tz^=TP)NYB^k()03n zn8MtQ@Tp)3&ViuS@Ae%#}L?EF}w9D9@-oF3Aoi}THI1V z?*-L9;5$nscAS85e$3DeU#B8c8dc~0ZF7UU!g$^=o!@oo=@oltqDGbW>nn!oufqp% z0y+Du9XSx5BWSB{`w;i6Rf{#NC_87!`M*lO>QG*~BXWN;O6V)m0#X#OpItbq5uL#CH#<6SxefC)f{>eh=@aXZQFc zds7dDp!$GHPRDZ)`R|=Qb*4H_3C$#8>2}3^xsJ-;K?yLTkQyY~rKfRV;{Z&DB@+-& zcQ^KJbG#aBi5o*{{{53EU981wtRxvqhO;TCw;YS$P+QG(lHla!bTyUch+=&p^*6B% zEV|Al%L769Yw=t`I{~6D8HYfTlQ0M1W!x|wqZ z9=vJ*zwUv^8B7sHI`hSaBGOF?1F-W_$n)hhS3@P%A+90KJ6({(N~847=G(M8t68n; zr~I;KhPe)%au-99F~+j#X9D&L`)@dJRb!>S9%L0!>%W;tSYF1Q;({XqdT$P>0~(a8 zM+o4^JJMa4Xkiu7(D9EeTLVo_XrCh5Z?RSOMu`hKG34%4=bm65P6jW~dbp=WV?d4^ zCm<)GtEXQx6>o^GjX)We9TxEFTPpfk077bsCeZ5|T($i!6gV>yk{GRtZC~O)sFomX zqE-a}Q7{O#HjqK8Z|l2Hi$}yt{hk7p-GvAr6<4ZDyBlD>B(Kw%?K-W{S{l_q#12~_ z1eE;MPiC3bZz6y$E~mbhduS+%aals!V}S{pkVaBepgbZXZDFAhporxXgF3V-yg@>Z zE?FGmpLWJ;@tSG?s%}iTCAnGfZm<53jC)|X`jz}6I`eA20eh#D)@?3fzcH_& zWSq?7r6ys--3U$Ir47b==>H7k89XiMTm)&n-KT_(2Kv!gpjV*d;9hAQ38V=La@`KvD|WBmBzuDgHE=F)Esf+Z zYr`2U-$v70B?|OE&1L&Gr_j$NU(i6hsAz2^%?@`|_Fcx5#Ln~<3MEM8!+9$II}kmn zdxNl5>cOH$_XXmXy`4xyQ}MQJQ6COT(A9|iY9pkq_|XUp%c^;{^!$@lV`zpRCAke! zS@K*w7X`8HqRllnnv32)H`0a}pN{}zlzW;x@iFubp4neYA?#1+dL?3W3Rg0g-Bt!dQwI!8{?)JS0G1Kc=Psv;M?w=OCOSRfZaV+iUIz>+g9-KD zivNlffFgM7)&!&+`#BD`#%|;|LvFK7;H_|S%Dl=Kk4(V?leZ_}n44S-waMV~zLojq zX;E-xxXi5KEn1F*qB$GxA*f)-Vo7gqSnBrMug1-=8SJMkSXPz92!nJx){uG z1xjDk37JI*uYxs7lXOxv?`0GM1BXU1mFsurzA%$n18qK9w~IX$6=A&@%ZYQHBAchJ zJIyS7$DUxBhx9SinRF)1Vbvl48bGAX$ODNgv*OihG*G>TnKbORGN8w6H+tf~q%wJD zjJ~aLBzZIp$$}uq2k<6ea84uVR(_5jS8gVm<>q|mWp^ldYu^oAnrbdzP0@%8b7hTF zD7qS;v}J^5Ss}*Ca@j9y9ff=NPmg9$Mlo?~4DC;w08C3KFmI z)%UCPX_u#N9~hkyFY?J;5NVhSzM7!TXMXXh?_UdTND*m+u{f9c>ep`i)BuOU1LI^xxYmp;QJWfIg05qP?tX%EDtMu#AeH6g z%^@HRS-G`X0J;7LWIq6OTcH&_qPlVR(T9npX23_XFI(PT({9Rc^R-R^02BbgO_5Cd Wx~H8l@BLdP@qcAfX|n-M&rFS~S5QX) literal 0 HcmV?d00001 diff --git a/lib/std/compress/testdata/rfc8478.txt.zst.3 b/lib/std/compress/testdata/rfc8478.txt.zst.3 new file mode 100644 index 0000000000000000000000000000000000000000..781601a8a2c9150aad00ee56d7997d2a96396404 GIT binary patch literal 25639 zcmV)EK)}B!wJ-gotfBz`y&wZhSfXY$kV>kkbN6xGCrQH5=`9#7TGWcb*fe|rQGkF` z+m72f>04skHrNoK4C09h0001BLL>qp2?j?7b_Uyf$}_%ikKtPE8@HGyxR7$<#RtnR)^%vW~$ehV&O@2^!Q02}_rC!w?)CSwxTOR|P2oD^-553#& zeckb0AGI3|$-H=3eDimwR_q|pt*BH|U#Z#xeU|toy)srU*TD!B*JK4Kw$*{c?FBbj z3D*L~3VtEWO0LDimGffZS1tx(qRejb6%-D6Uxw+$HZ2#k6SB4dLv6EdYQ@<$`BSy0 z7MJmD$=Gt8jDIS%sW6MjK+eSX^o@)b%eB}{WbjKcke%Et%Iq>)twpw2nH|P`Q^5~L z_$8QT+Ium$F%9Dj#qM?|RH+)~Z?28vUlm`OHw{@QyeftGC5^%Ow9d4GyHHWZM@_z5 z_ukHyI~P-_S>xc|K?oVMlr|HtgPW~`{O#@B*ZhWG(wFi3*m@YqeeHdh#EHphHE-12 zE-a?f`Y&~Fg+M`JVRSIAwNl2z$ju1kUNb^{oL?9ls@zBE)vRG4&t~Vgd79O|Ri*Wt z;a&>JwTRWq4BI)En|0bXrEZ&KxCO6sA$m-+(Kw#ibTjMmD%GTU!4x$}#~3kGs)ck491O|$DE zUzOJ1;)Gf?@m4(C$N47qYxpG-pvm$$L_m=6j4?2e^N=g_O(ae4Ug=!J4Kz&0xP6%j zE2@U+mqeDUn%dx(K-a{;Mc_~6PDY?Xy0vcQgN9$QgPUay5@;>hfkk=+SFUA|e#s@D zaIt+EChy^JI zXWlGlue^gQwYYt2w)Zq^rB>T}&28QH;FnnOJJrGXS*>O5A^#CA$dPTh6+de(OLk{# zaylO3)VC1WBFYDfSp?n&DwIEPSnxPMt8D7R47uVvgCniK|X+yQ6l4H zw~@+{l1wIZ;J`v>b6$G4kabe=OMO%9lMmx7Dy7`6^>u6!W%f9qiPs%$bgGow%>INc zrB+QXQu;@#Q0@=`;Z{6U?d7|D1b@Sde`~St8`R#ZjvdtMEA<7qKuG*O{frKN$)f-l z2nokCEy@DS%Qe4Iw}>*!^l+DY>y=rySozAW1#h)duWd5aFf3M&n1GP@UbEpgjMaqO zE3=iK62Ihc%t~`nW>?AIh}br;;#-?KFO7y|doaG)?bZA!EjtGCQg8m`Be1yItp7JE zDwPxO3)IgD-#kC{ns?i5;q-rFrPGyf7E}11d@FuR&E9&2I~g-p{N$|nLGplTAko0_ zhLIcYyny)Mc&rV+9W$wAJfGI!$`%`OvoVf=JY~!ps+|8DaIehWw)UkS=P{67tl)ma zoiJ9LZ4s8#>ip-~8sG@Lddr zyRD!WVU(HU3T`%r!ex9jE0wdUhDKwvZQ{yq8{EytI8gD+GW}lj4(eg#?v)u99qyz$ zsIwRFMx#-A9O!bXII9HyK$IuqP#owB4lWmoMn#O&RvPiAF!EGPMhi-OVUrQFQym%- z2k~ahsNAd44sc0>S13~D3q9dtSai6I)@QG_z(Q!ueJ?s(qJTdlCLkmn-%7@aSF}zX z16l4u#z2VAg>om9Xfy(|^>NJ$5hUY*K)``SA{M(#EjBx2X6`kS#o3i7;ygow14X81 z{170K2l5Qa^f>=uAY-7)2*mKyNJXVtOVI>K0>uQ14ATrCVFHZ2D<>!NV~1MyX7oahmP7ZElSMaYls$$)%Ti^7Te@nK+Nc%nu@*Y=lF{hw)MK$ zfTAA|mk7u1WXKVlZ4}-6cICu3YUP|*g=(e383DpeODFMT6!6*J2$|s|KgN z>SDj#cpxZ}>nw?15+T&eX|FqGQ={OFaC?g>wBj3%d*?>=Y;G8<#dt0>BDW9(0#8OE zJOYV8HruAuqSesn#lpv`>)8vs)CJ34) z=#e`Q%tTlLwL2K75HR?1afpCOZ=g5Nn}_4hkD!Q$Bqw)ieGu*j`1PT6};oG_Za_2e2F%f}`J`I!6!r+T`^Mu;q8NYyd1{6goKjhIn zw*`6u@i^YngaOdwyt6?JzRYcisbsm9rBUx+0!hw;#KT)ZB_1U1u8-)#c>vDvAVINB zENc-dy>oD>zsH&DG^>?T->jSwuGAXt-)N~#U2Cz}loaQ?VS943O~$DXD)t%SHsxj* zZq33znOd1e`1kbchRF*a;g>{Jd`(Zr_+4%$%3W!_`F{h!f(EZF2DhtLHrwRK6i>~t zSVc))#t63<+y>&d)E9$WUQ<_QnwyraEdXQdN~rBMp<<-IYYXhHc#ZEV?1t{G7QT6E z)El>NwzZ1bVvr}43m3EUOEs1yaS2pG;3Uv&Ei}%X|3>8*mt?&2j7QHn^o&2xfb64m zuPI|y>9xHk#l<1wO@YxD@kofW6Ee|3RzSiJi9FA^j1-VarZsdcLdLFd7K59m zTCm3Xt)NQXEK?Wb#cwP4ZKb)DTWQv>L?R|4WQu(nfW6T{m6wokxHLHKsl{#{l4>pn zGUPYCQ@)9axNscj!g0Kz+&MM-Q?_6^-;Se&KDMmrAX`+F*_V3TZ_U70j8RO+*?%U! zC>P^ayxZ-!{^1;pVRxumzgn+99MXhA0ur!uG5R7X_|0T00!0n~!bIx68G5X9-J8dG z=wGNn(}D=-A>zXM!fH>6W>yIoPiPaSaeY3 zGx5ER?wXXbn(HPoxHK?GKmsHj)4HPCtGYCaT&)Z)E}*)2&BfkYL%zNkm2viW%AJs* z7BRhB2eruCgE9K%2m|?XO~+8=c%!h7(xOv%bcV<)Zz_;Kc`Dy1$5-mrO?mS*A4sh% zuLVz#sYgzs8iMCT$vNoC8V8iy_LQ%wQg<=h+RoC}km9Bhjj6SQ0 zt`db2dnR5}_nA<8vpGpCJPMI3W}y>d_=8Jee4L(X0zs4sf}p2DsaPNsisf!Q!ZYsH z=|aHvs`|}tEtaopn=(C*^A!tq%7bEY{I6Id70Ve;nVv_QeUG)^rc81(YaP4%3yv~9 zj!bI^6Q8bZDAVIP26HH~bPzM1Z206G3s2B21-h~7~9Z@ulwmk~m4x-mM>Pc5FmhyhFu(1TQYo?1j~CHM9eRxPz7 zJRT$>L~uMp%e7z_;SGSXqEg8fOJVy8shZIlVdErI8OGhn%_c|-KtRGF@3XAE zZfo0+GcqEO2t=noP{{2F&xk-G5KBd+vsX5j`eZB1G}Dla7<@SXI9>Vlt(mR#Wp~KE zX7eg^W)^;7tfD&txIGsGwKtE7;}6O4$K&`yXEA$o^L2{nkv~y%isu{UyeZm%0tyQw zrf>}8r`f8@2x=Y*x0qI$Bd~~6zZ+z9@l{kxO^t5FN30w=MUm%NY=+SlY*d7MO;o0q zO)c&|PW%rFikE5ARWO174aow{IA?6WroK0*701#zlu9#+rV0c_Q$LyzJfL|Zf1)?i z9l-DjlSqP-CXQPXDn+{HH~sMWG`02t!BS22w2JK0B$UzWb_*#@p}h)cxNtZi+l)u2^XL$qO7n{#X|s-cYYrr~O2C&=|q>2MkAm_16Q)PlA$`ef%3lvo(%0#kQqGzg5C{Q$cq{$*tqUX^ekq9IrO{Sy> zni@pVB$}qlG@(F|REZ`}6h)8asYH*(k)G&zBnNUL=W!m31$w4RWimk&N(Fi*6KOI@ z70HpJ2#TNzBtekHfhrYB#^m`AfkdV!;y@x94v+|>PPLcat)no5&`{*Kn{sOFb(b()85vCt@y@) z-fJ2t5=oKie+1GQzZE$VlnV!foPnXCXXi#$o*=;^j%k9PAi^(k7#1EC=y?Kl8N?;x z{C5^-gbNSHEmm-LZj?fF2lWq(OmU&GA_caSA2ai@L?R1LGI=sZA)!G*#a2*&7P_y& z#JN%zy4S^OB?otfw$;VHkq$f}h~t0by#`zPl|12QT2;kQ zejdD#v4!A>NWWwfVQ7k{GfqIpr5 zNf8t|Q8cMckY(~nkVTR#QDqWQrs0=UGD*{9 zilAq*IFn^kp-9n0nk1Hlpb0cdlqhP9DNHpT&KwuaFf-c%eVJyQiEqvUG6E-n;)XtF#Li{<&I4c;7} zR3yqHah^*g@bCtjEYBSxASgr<>4C%o3ZU>t^5D%?_0;y^cbJ4i!gy752jtFH>;M%X zC1E^Df}qH8FtW4&1f&HZaNs~hu}DxihXYuwB$Red#%P72)=oYdQ&9LX5tC_Bi6%*^ zFgSKE27qDT}t;7AiBd7kJwV7bhKFQgtg zu+Y-uK;NWlPz~@#q2jsnF!Dmnxfr%}gTfn$0p6(79)5Aq&}c{$%!Lv+8zY)K|T!D$M_e+{YOft4Fic7cU8jToQP=EO>LFK}O&h$O({e5aA7m zsmpi_bWWwoWqbsRj)DA;CJhpA93IHc#0PIK4rYp$%epLv$4rj^#(w+gC*nrk#9TZGRvl03Ro zCuc7zNFFcT_P^1cFq1vQErv8O0frbMP{sjnbHpZm13sh(Y$;3Klre2T4 zawELnXePCO>lLlXeuIJWxrI?$vo%w=a$E7Lbh~*#rIDJ)0giXm(Q`3cO*hmD)neaD z1_igO=7?$S0UdWJk<{Qs4iFd4A(76=I3)7+&9<#MQa2MyX;I~;MzJbDRyiY)Pu*I3 zd24!EA)`{^jfQ0F*OM`jHS3jHO@%=5P*7+M-c0CO&JU+Cgn7wfPO2 zE!ldUi_pcoz2@U_{OL)2GDdL2j1X$p(*#YHdc_Pk73le2fgY%t0zHo~xcFv%KnENc z$d9*rFK6#*59`8t03PWI6nugsVdu8MafVNDBveNr9gA~Ds@j&I$0S{K8wLdv3iL!A z$2AvrZVUS1IK!vs44*Jbkj^l9rZ(U_o`{11{k^E5S~#6<#W%3x#p)Qy7J7Xlx<_&% z2#Ovlb1QC73AUmnasJ*UBa|Y}Wt=>l-}I1lyrx5%y_2OkPm?4$Q3OHH++8^{3&vdc zWmwY-V;hM4IphrQw|CCKV6lr;blP?n=O=y%#f3I-`{>2?sZkB~DaJFy()wC(0jNKy zwbbkz1)*Zs=Fv{Z%(_@P-b~G<^va$dVjw?|BG)+1)W;b2o35bdmu0&b2aqT-d7hDR z3iL!KmS=pjM5M_x9!(-p^o)r#d7NKnJOh$(D7plJu+9czAiM3J7N|t@>0?DH8KM~x z00000lMz4wAQ%{phorLkoVG?Q&*Wz)s9s&ENvQm*7_#-;da6no<*Kx<@@V}5puqug1m0$u$k zQ1dLBce@yLdj&+B-N#lsgK>2Tr}SFaporr4B#SVXOX70bRDJ|yCsK_rt;A!iX_@?XTYrw?^@59fF~PqdXe7)x{fc)n>&G6 z*z8cm3q%&JZtSTFt{2}9AU;=b1%_I?S&Pu6HE`^;(oe%Px0kqqk743u*tQbyPumX$ zXL5)*eZ5ukx<^)wg}OYv2>JJQFrbOxUiP2JH!o3ALpFrytgPag!egra5e$ zC$eX>IS@nn0T2%&2Rf_gir$Jo9KY^|awwM_edYW{8nyQ~8J)Wzf#Q6lgvauVm;oEe zd7uZ6P6u=)A7p{B@qo9!G2#-@wez9le@W>vWyDAM}5JN(+@QFgH?*R((w*ZKx{ z8GjyP+X(3I)H;2rdMo3*WO!h~W8~pXPiD1~27MXHOoZ3uz^WQ~$gVR?qpE9Ig7`d2 zOw!*{9;}KJGTfmWi|}y7U4V3iN_@!ATscB@N&={%#5z)9i}RK^MG^gK5aCuSAiHGqM0z&`Y{2eB45?W`S(OK1;xgh%m~&0FLE1H zAPgblSvI*3QwT|Mi_!rGIjt?qYMhe6+o28=t7LRNSvukepiil!<3H^BxXC+kI0Q64 z%oBI5RtQTTm%fOe>1nK|wdqH-5I8yKOJ^)@y(DUW?ZFc5uyiTez7NqW=3ZlifbzD)2h35s2^C5w z!MVYaRdn}a=!=0@I<~llBeV~lNRegqn;2B-bs~4r8w9Xxz&^X-_#5jqU{5-_D)e5P z`nY8_Jo_U!GK@aoMr32hM^OIufi*n;8{U(pB%D0tO-Dg`W71VVX84eM-k4g2ybQuE zNme#TD^Sj%zYszUBfzcc`HH`YGx0^1vf0jz|voBw6H5FaX z*jc+v?5&_W#5c2z(XcAMIH&yVs*rim%i5@8Sj%gfOoxRdDb75puZCU&U9BS4;oaW6*WPT zM#%+!%yeK5`@i$#C16@O(mnf#=ZKoGrwzS0?4(A#6f)iv47Am0Js`-L-hYT6zK5|Hkj9ujD6Q9 zH{d~!%t$^SXPVIAf%p`(>`DHG>VB5TI%A?Vm(k9z%mRC0w_UzODKJcY^`swk^tzG^ zKeK+&BB;}A4-(RSPP`|(Cy_VsC?21^WF?_18A*a^5fq|oivI=(E~8N#NdzlKO5_qg z^msDKctujodl76&-Em>;QCQg6Y2FkuaNX{y==vH+K=M|4<{~oV7eJ06j5I zl!lUC66g)-6kK70xsb57kRB>ksMGewbc!Q8|J`c4k?lExp1J1!ER#i(r9+I&x5|(6 zcx6ubkw1`G0jR~Gp=0WYl|YmKB|@@0QoeC3;bVGf){RP@;E_(ltc9p;}|PudV)Bz!q4pJ!G0)JezCqD6YR)%vX-aj2HlMQ(PO1#I37l3+DuHg z(84&%M{%VcL$PMO8xOTBZ!Glsf!xia(m!IU^!cfeCmr+&K9p{81>DVv@P#EvpDYmXYMIdeVc$s3f9i;LNca7Z=gZOD3bHQd<{jzUH8pNOwDt zq?9V=62J#pr^8X_HkX5Ze(ihcR~fN=Ax#}KwR6pmH4a`J7Owivf})%SVu!}v5IAu@ z@WLOs?);2|`UeLHnY+dik?@+F;JlkW1k?*#UZ8A@f23GfL!;3NyI=eL#^g!0Z!GLsbYb zk*IseCscg7n*+g3XaVMr zWE&pYYf!r4zpq+8jFZKRh>B{&3W>pEBQE&79H$<829)A$5GoW76!A>GL4euL6Fr|W zj?gf435Xv}%o1D7;B88Ok_lg;`M2;m9E1=CDXKC@beYg{=G>)72&t3(MRcO@th*_8{EJeAFLTc zcvz%nQ>E3AJ9EOvJ%fY>=-&-Lo>CUD(4M8Sf_|O4qSWnv4-I(*X1>c%&>Pguy)yrl zLgZ8s&ZChCw+e##ewhn%OsheTT?|e9YVI2Vb#bnsr+nGd046r0Vyi7%0i5WF>a#BO zqFRm@u*%7CZ8w=XSvw4waUj-U;Tnss*>llJ*-E{Ryu)~odyL%hw=>1#+Cyudnm$>f z5<=8li#1})gr=`$Tm;eManFJ1zaa5LNndIl1~fDE9`fqMfQX1oQC61**zfoag4+=e0LP8$u zH zD?{BKjQ{fAzLWX3)x6isZ(WdiJ&YZ6^z=& zLEE}n+iAKWn`K~iUOK43a;SJQmr46Zl4s*?hu(fR-1iYdao79eo?r3ci|gqk^ljz> zudPXI)>-Kz%eilLgOM`5P7)DRao!y)4kK2ukYoK5T2H7AH%LwLPdf1MdA!27t&}o8 z4*fpT?atT0;;oo-qJlFPliBNpOdroED<$ubGi(XMiVO=H1o7I# z0eIcT-4Jm_h7>5G)}<=_5rSq3;{nQ{eNe$DYe`Dse_r~IkSy6MBXD8JF&)n3BS(G} zYDU|oVUN}*iZ(o;0uZm`c&;+6sXoY;R7jE1z@o{cN(NiR1^}6<73%0gI!e35rsX?9 zYhF3NfJoUQ!2+PW<1~N(VlfL{61<06BQwLzrQ-)5-y4;u9}I-3czBP4Zm2#vTdnK- z@)*4W#YrVqDT+{M^Ndx%TE?KmpQYwvcN}-3kxiM9xVAS(8hXLgm!*K?8 zvH(U&c&W~Bc}f2i_1iVc&{uwW!mqt-MGf#%m#*7 zt=uMu1|F=&8u@P9P&mdq3eW-YZ9MRQsSrD>AYIFomf4FV=+tno8y3qJs^Wbzmmg+g_z?it~rJ%iJRgoTw%!hODQD^lZQ#h0`w-8dq7LzlAi!k zmZA0F=T+FbH~w%_S`eZ~2;vW{GFVXSKOd@Y2<;trE_W;)VV%NJsDN!;joz9uHV=uv zhAS)I?nR|S!>rzUOmg|eBeJG|8-(pO-Upz657M7UmxK&t;cW9_*Q!$WH=Cc*d1*O) zbR5T&R72JXIW7_4Xb!iTbwe^5&xbt$gL9^w^I*`VXowoFQ4spZapLPX#;~5=J@7k~ zG-^GrUX_r4u(BMQcp`KXNX$d+*tn6T#*(#CvIj>#7LIi^hX6x$!^*Z9lx8|KFrTZI zmQU02S6Aqnl0NK?KjP{}qvxcJeGecgbej1PVuZVTN97$4k-s2!og5Xrd|q}GF|lgI zj91CFH;{=DjGI&4!@R-Z;K5@~2Q%U7zjsBMo^qGl0%7A_9Za=f51y8lkx@?W0VY`H z#bUCM3aV8^h9eVTLxg8K%`C8baq(|@I?kh6zt!JLPWRUow+2~B04c8)E3xzNFooyv!#K~xjTN>s#^Wx0Ap z{nX_=?Wg@o6F@^sh@QMjdA~$y*$kTUB1CiT4)9>{1MsF1zau%uVq4-fpao9>4bz}8 z<$GS(uRhbxXp$YvR!uJyg#GS=V<)sd2);5e@Jn~$C1TlPwPZh^sez)0_bI?6zrZ@V z-McZ>X_XrEcDOCbJxYDW1! z6GN+LY82=i70qpyY^8ogVkq=?`XbHbb)J0aR-iPrR~+#dAH`Vm@7-x?7XL4DI#I9% zt64le(js+H2E*Q@l@T_^qqoipz{a)mOzs-vXbavB>Xt2wp69KK{GeMJ)#z8fof$MJ zILMHVO=bRnkjd42lchzNZfmH~h|tr4sY`?a6~M?Y1g}p&)ji130MUO5R%;nUpb8Z; zu7^qyGy6%>4<${(n$mrw9152>wor_I70r833`uC8GFW<6&{DbhKuhCw~_eVaM zkr?`d$pgXPu>lH!FM(Ch8=ojGSeo2CAY^B6L4x;t#tKb>)$(#EC6ujW2N}Pqtt`v; zN{GML^nn$;Y%DhMkzSnDx15NVKE&hz`rTk(nPFIUkw%LQF^7$d>gM82t|Y9`)?yHZ z!v6+QW?dQtZL>JIzKT3#D*md_u(vAQz%`=mFTw;+!?RH`5;Tt35{lejEtIN$urC1O zI=%A8SWym?f0|(|c){1O#2G-^2@i4_`O2F6&yi7Am2VaDxv1K61akBpI zRHKhfg?iV+-He5u6_ubTfbVeuR_BHe0l?S5bM|nJ@8@E_4iZDp$@-`!O+2ka@@bJ} zFiv@nw7OCU3Omkj9u24oxD-X;uotl{4nRt0nt_VjZRo|*KOw2=`E)f5omBAwR-Ei` zxYsm#gPX^KuL7~!MY-G0g^ItTzM9Rk`IKl#aR|lG&7VHylol)$Mk}k`q*oykiM#e< z2cq2m`M1<`)ULOdr|cZSeAaWTZ-CEwb9wN^Vz;muG~M@8^X3#g$VI;=L^KRC{!6E6kAz4;gfr| z6h|U-gzE31e=5QV);_@ANW#K5X&@6EBD+wi_sc>$*lkAc~ z#`EihCWRerQhT;1PQTVhycQNuidRC_2EHB1)gDq2Idcza}5lT z?WhE;D%S`DSW@GOf-;d1C50)XjOr7q9?lYsFZo+=raYv5?Z=3+(ZJluetKo1&3wTQ ztxdg&vH31Hk~NObWF!oPKB`IBNF=v^>^dvs%Rt)Uv%?n0HxGRm_8gb2gR&Kr7BOmN zdL-v%K9uLK5*SYK?%CzRAaVdfz@qRH42*&V;!1ma!z4>CLL%G`?{bYZ1FVg$=rf~%J|L=krbLPf^ zYc)B{sbX|^CkompwKzSU823OQG$(bq(ILkl9{#LA+jv3V-({;~#AY)WQh8K`0cEr@OAZ|I`2rOcy|9UwI$ zgBS=ApYI*d0OkQ&*8T;)bS$gxi`?Y736BGd2=6WRL3*4^;{^!G&OlqNzEeUSFsY_W z4l!4IA7q0rL+X|+LZ{Zs_Sb9_r3@)G7Vd`8(YwGOJ}{M`hS9WCLM`+4JiV%hADJI6 z{YYEVNZj&_*FACH*{iwP$zMF9up_%g+t35pWr#$HjB9D;qQyn?39L{@Ad3uoucK9j zaMN0I^W0afly$XCh+;CBvAVQnW)-SJ(hO+t3~9EgcpYl`gRrOYV_xZXVTsSuA=a%V z11`1apj!P2z&ds+&jwoQnFq2Kvx*+7%w${xwKu{-tGv|MmI11*O4K95Il18>?m;dT zA}_2B`bwUQpYm%6UHmoy{NrPpp@F^lCWmvKm;=mT3^um{UkutMFhQqT5py;O?q-as zv3`PGe7}0i`LUa@YylcoSmQ>aQUX<*H1PzI;LAlicvSL+>n>^tsAHhS6v4iLGP5YR zpP2Eiq|LNSYBg`+uN(dW%e=+`iIV9+ZPL-pA@!?g3M|K;UqIC1>7%SvN!3pl6+*A; zEit>C?SC8{x)&X54T`E{R>y)RBA8nx@2C92d^C4tzrBCTsBwwO~nX%2fHOkcL#Rw zcSb4V1vT+bFOtqR>RBCuGmUxS2m2+;6(BU3bcMZRWFmOf?xLwxAI64-c{ za%sQhj1pwQENrh2sg1nM7UeV}5PTR%goK3Uz+2u#9#+0&@H~h*fW^jiOU1+D*l5eh zSdJOOOSRLoZF)~Qdf;WN0Dei6BKnKN<%MK1jJML=@0&8xRsaU#A8;s@fH^BQc#E|h zxo@Y{2+4;qXF-Rwj!g6jc#gC}*!qkNw6$wiSTErA+x zpt;pag>inS!$Cgh#pN}e+6JQ1T0;#KC;?U#9*GJ%Uo{#N2my(j<>}Yi&PVX1(#W0vT*&L47;>3KNrS}qm* zHbeEF~@L@qN%&J*rZh(s;De0h^rU<-+$9)hfATP&RkDA_qD3Jv|(B_ncv z_sgpk!h|6GGPS^Jz)`Bo-WcD)#`kz!$eR$HW7T?k6$uV#)2GbjrMEP*y_ujVsdY%T z`;6}1jaUVs;OV(A$$79yBL&V#G@lK$2S@`=7Qmjp8AHO@TtjADKrd)1RRdv8&-Aoc zbL>n~vh*!mVV0I7QiPvSWD`7=Y--kWd#=RKn^31Ru?e9O%)mgqj#ly5^iQa}s44~t z$YPz_F!R)@jsr6eEzTxsg@QAKAkHmtIp*-suj#PvA2ekrMWmI~xqrR!b#ru}fk_6P z7Mz&{<3ZQ7;6sPPQV|q+@MjaysB``n5BC|YYB|7666uj>h<6sGi|=(!s7oK?TxZn& z?4LH}&QBt7LNz)Ocxrn1aFGAKrC}R-jX%AAm zmf3Xpte;p_%SH{UgH-&OLaF7MBrJqBQ0`VBxF*{IUL;Sdk$PK-J#!~Ik-Cpn-c7F} zG~g`Fx0pep1K68+M24fW=20v?u;q}vrzsEf?`F2)Tl+AB;oe9&ip=qtL3-or3>8#V zs#P$AuuY2r4P@Vk+me4{Bs82z+(#wVfNG=mYKwd&K&rt8FQHHdzXxMl(S(;dN!uA9 zfFVWgR1OrR>ON9c39*@F#;&T4a2;YPw}h|8^gt+uD-J#cKuVpTI(Z%w@nWE69lUHO z3HlI-KLcL3M;yT2SyW9h~RME9TYCrnnIn*Gt4zfU2QexqekgwN>6_9R_mac4H&XEHHc^uQh(1*q?-#B zw^gUYuo+n&6D0p=oYu0f2momPI7@FXUxa#(5$2rO1Rqk8kLwxtZLn+^1Xb7mQwQuJ z3qg7KM#m0aXnQ~bz8MoM+1u31!lD4u%W2dB&eL9PgwWP5%Defjs_(IkJ-#CqRIRLx z_VhV2BGS({{>2%WQMk6uDQhc(pebQ| zx?Av_+NU&;s2GKASgX~zE}FaGDcP6-)Fxw+R`C)kGTK6)^u>P@0x$@V=L|XO(6y)B z^H7EZkwxWM1IRHmm4L9nuTlP)b+V$vt@}0Vb|JRdbW-zdZ7Qv)71*b)=3At78^GU$ z6hl2th!bBOL>WUh_bP$=JfC3)6B%|X`M!nJP0k$OR5=rlez#mbVef9r)g45a&3@Otot!*1~E#!w~brB#OXKaYuXc}7B z%;j@#GBL)Qva(!5+bSo>!dNd80+%J}!{rSXTkr`2Xmm81BI!$>&IR~EXAn!^#cxnw)Bm_Q2h!>(M05Q?b zFp(!AHi_Y(W=7zLeWLTx<<%ZhUKNyRr+6qP-0U&%p(F@c5*sN3ec6(Xg|k|@yBULi zHYE=GtUmbVrYv@X)>of!`@l7tZJG8(Zf%iJv7>MGp-Z4pmP@^0bwWrT#s zK?X7Ewkfk2VHl}4W*apo%pfs^GR(h8Tn=$~>Cuun!A(y=qC_M~+Jer2$twGRWSq_W zkzSyu^xku*M_A+plk8KF>=cD>D60YJ| z-+i1NX%Vbl0(nVq2H)&Aqy(Y((&iUlm)a)Epmj?Z924MNXt3u;$2^NXocB$zs`3-3 z(Z~vD%R1nf4S^(MzV4nU5LJo}Q5KofMN0sf=HxxLXTmO|pclU|mGHZgbV?X*y;p&a zEcRpKv;Yub{1V^H@JcR#54f(;>KUX6LWP7O1vIW5wj#%-W7QAbK zYjY{+!uEnIT3=y~LBUAeE5|2!QYIwu?_G*@>n!yyV|77f!Us|XQW87@2s(r|p9~s1 zOh|4J*B|jK#@Vz10*A`PIpaG9%mAl<8jMfCA?Zwp1zjkcE0Lq{b_tTBIBKF zXS$N@cnK6cbuIAT!2WN8VJ)Pb|M8ry>~!JYAf{2cRd`B+YPE{3L^_nD2Qb{m-&5^{8ABSjChQ9pUM(#WVT<3QKtnyG~o~>q=rDYq3=Z3ZEiQx z4(=HeoNHDQSznE9SB{39To4YfC6C-*k>K6iWTAb3iPoEilu}E2sU!2{Qi7Nyr48Dq zL*n`WH-+1Bys~I=NRJ&T+@BQ|2}9ISND_r4cw;bYlgs2u@-{PRb}RFFDL5%?nC^0c z+cR=(oqi*BF>ZQEtDG6pgR1tNiChTU7CG$!l2>pSTi*5S60V2w_}}VAYf@-KIz2W5 z7D3{w>3IJKAK{@Dv7vth08i3d6*DkLy(U4Q(oPCkdhwG!H%W>KnaT$!tBFAJHd21a z*UsTcI1mNE0J9;K0ypZHWvN^CD%#$$KL>9iElvipX02f4y$m)k>8NOqIspxT?%l1q z+2q;#BTd#@*)W7#Fs61qzOOy;SH14`z_Ylwr^+s!Y&5^#nelT=eZcdYynnlbCWu6TFw5E*$N8_8A2^@s z7~t(IugfA=Tj`QxkYtx3b6Vf1mZ2Ki?oQ{da{Sn85{`?BE80ewQ z0G1bdE{0!rOKlj1J-}}ky8T9A_ArO8pyU)Qkw-JlO6>7i7beku?m}cCE{`xvN!CJM z8~|0fRvy-|Kq8kM;6E{t67UpYu*vq_t;d_Z9g7=;6FF8W-W<##CGW`dS3V+2MVd%W zTHeBu>5cq3+rA1-cQ3Q_?Tk~*A~RU%i@aLcNxzm`Yi(yy`cqH)yM$3r?$(7tpaC*?LI*Y0Kr48`xYxteRI_jdkN(=Jpey**4JgRCf?Vd$`or zM5_X8Sc|;Wp7ASLkU5p&ZtdF@E18zH{=|-&8m-+lcMFXE-30Mjt*vO&%9046b!WO| zLl7+m9ZhHukPqZpgo%$)D}enTh0@9mK-HK}0LeQgh?t)zPt(O1QEdnkx+vQ^2w<28 z>h}%T!lMlQBtvkBs&1V707(v=;M!aeM2NOJZCVyjF^(^{MV~OKz5g6!-6Nv>)TXf= z%9qe8y8$ld&T8O{a>C%ZpOafdX+9c`g%f&t@T%U&(MF(Il< zLr;jj>?-(KOJ#+c3*^BIsc1T8Jdm3bv-l$^^=~YV2w!mLIw%|`RsO;JF z6Epklq}!0-&snL;oMyuKEO_8Nw{Mi0P5HCnfn{#{NfF8QB5!{L`p~(NSzEW6$R#!N z{!(-2e=NSMSbcF4IG*H*#9z_zNGnl3)~@7{lf7P1@e+u4dC1vH&o4&H@=2aAvPbfl z1K#CFj#qS{?h$A|`i`MNFEp^i;9fXc;i7gQy1mAJ8ZYo-`-)|1go;B9N=kDt2tVZj zUe>RNTDU*UV|_3G(|o2&mvs6$M-8({d&w85ls(I&)G=#YU=d9&cM#=C5i**YgoVoM z-QR~%gH{0!HgmBV9SiVFZbMxhUh)&D&xH_}@a$;=034*WE?m@U{jX~Sq}43bt|$u{ zdj;;6H_{5-EpMb1x?A3n1jR%iP5*`6e?JW-I(rAYUC8XPh2DZ}!#ZT?&Xx(0M~9-vZLkd_xPkR_@>3ZNmKwQD05aBoxZ{OltDO}QIf zsIGNvAPnZk4!oT1;#U5GQB$^Qkde7LP|Sa$k$-K2^Y;LI^fi(k0uzcG^a`Mvob+Ht zWxz0A@$%6IauynZCBngJy%#+oAk%$MPRTAnwuH@8#10R0n3^jo=bd1KYykqsVCFRj z-~9zL1;cZW75&1VvqoeMu{^~V%?${+y8!~P8-PAWI?U1y<~Xa{J8EFZ)C37g*RE(X zBkX?p@CkW77|5_11h}taan_w91iwstPjZjI8mEt5t0<`c(XNX8^?|EZ+d5ZP|F!AX z3YXx0=$8_CL~O&|BQiGAwT7M+C{Bn1Bl598T2NtOu40S%aWw|`=)htZJLevAx)xMn zOXylxrXkunF4kQ`oI3*}WH=4gkp4Mj;J7LMMbN%L8#-LK21|OQ};ny?1AsdO#;W zKc=+Fbxnhh?JLmM%TW!@bQ~(*KwgEA03dXj9(jA&T(4PhvMecvoAC?tTRY#cAwOra zx{R!Ibq!liO{8rQQbfS>t2iGMd8e?~aIohAa&lqKbJW6A;nmD!RN^*_PrG5%s`}V~ z0MR&L{4&H4Qf$aT@ECM9HHobP{=(TPatYDs28pZLbvi51q<_#p8>BNYi8!ESG}b4r z0r9Nvn`AKyS(~Ziw76e(@8M*x>L2F3>Lt_uy+MiUuR$Gqay&(!uV7}KW)})H1K6?G zN9pn3>7ur`?2Wt}{|fhrykO}e{NvTu`|bJ!H~0z{9LT5f z?R+xm{HmDaF|B-7BEQ<8p1+V+?UG_94CBOy%}F($YOJkQFbe=2TB*xQTOfV|TZ;%> zV4TVFDzsH82Yvz!`&cB^W^)59vvrLY)zi;r3H?@)hhmuLI8#DqD@WbVe%)PK6ar6_ z+4p7P$Nt>cW_7UdWL)_!$ZK05LDx&O^XJ?=)hnEb41?LF&6bK&&iy^U!{PUzw_8N< zxHyJd#)YD`2#btsQ%*|| z<;v+T9r%O?!Q$b-$+NlqY_7mAnN8I|_MGjpG!HDwC2~=-bQkO^NMc5>j@O9ALfvum zdR~n&<*|NLWDcn5){T(&4a?NrWF!a!mJFt~umghB1~HHIw+9E_(3{S{^GaJgoK?Hj z{()o2=S^m~*icuo+Fznp32~fj0Rd(Mt17dncryqP)9BY<1G2~@I%SNo6Ug<1Sz%i? zA!>mbswk@Ua0kg2tR)0ZOL9Pgy!_PmkdQW8uDD^k5% z@WPOC?+%hMphMs)17zVZU6cj@TxL-f)zY}IFhiXiNW9?i*wayilro%%xF#cAbcNwI z*qoGZU!ghJ&=@(e8=MITs!SS#!EOqEP1SJ~0}bSJThm?vMccCT@b={5OwqC%hE9WgxA5F zMB)O2v%c?vku$&5h!cB*Tfrr4O+g&<{DZXYrRBtj;hU3g((HR+;pP}*wF zbSo2Yms$i<8tW*aYFB8NWx?c{i311#`E)Hm2rMLw;xDrSePeU9-EEP4KKKzZY}Sp0 z5MCIofo^BVLh!9SuFDp>UoDYx50!1V^LjadU&qL5{YIpP!^Cg3^k|u4r^+AcYE_WQp#I7*FBaB5VAsvrp!7$-V|3SZm5x zd!^gZTr2J}UK}`R+Kwxf>^(|M+3k1w9=RM_JU@0A+&eWZY|hkF7wOKtrS!s0-s@4T zy{66fvl9Ini4uwp%mMJ*>j4`UG^O{eKR*N6u8jT6P%}^NxwYh+p8eq+cwrmt3;B1o zG>B41<}nEXN0DQJS26_J+qAk!5j92DftzZyV{n7{>-BtlET`aLH5R zhIqz$0Kha%3Noo)AHgyEH|z0I7lu(rPFg|K*K)Ss(qm=50QPdSH4Oh(Ir)E$9cO>8xiX;kf{eA-4XGNSvjE4{=YoNiebmVzyMcp9ZC zUgJcFpJJ5(o)l;BW5}>a1Zra2WZyGtj@`WLqr4q3LH@pPm!`U_9v%21D-i?vGXs=M z*Y3X4bfW6PB(v?Eyf6$Ym2(k025P?La*jCIzYeE19~et38*UqNGS*Dz?4Hie9GtGH zG)f|3d#_|zI3{uYe_T|RJF3+i_UU$@do_)M=u->nA|B#xN-(3W!%FJ?z~kWZB?Ly; z6_D#Gq2atK26JlZPqzh`l>P?J#Hvp6%zjm3iKRnPHBOPM~Mlnv4J4~Z&W(2&@H}P!BoP`%)XSX4y&X* z8$;XYTuqzRnZP}hS#|lGx(1!eb5V8Q3{_cBljn)9q68`%Gw|Gyr{IVVNQB&^G;&M=x?1|V4p-mJ;VrmU zT&uazBv22WCcP=Vfm0Sjd99>6pf=lKkSrS%b-`;qmtq_!BZ$PwP7L%OsY!R{D!XOh<9x^Dp__f@j>-F4VE%VaCg2qhVbPJdjt5~HW zA%K(%UgK|$lc2`znXwAhY6AriJ_1=M+Z4B@*O*0YewHV4mW(~5oeedxll|TTQp5^B z;#5h-Ll=lgjdMGiPeNK=B0Nv7NaZ5jA1MukGFj#ZmPZ|=ZItL@yPoAGX}`PqSud%=cK{}QJ zbxHVZp|uzEG`_H6Hv&aT%`8B8-ZR6t?so-TtpS zsfkd9D1+6B^RrQJhwD31af<&0v~#d}6`@&r9m)Yz&=rzCK(e>x-Ec%L=kTm2_60gd zs>uPumc&0k_$H}qI)+?ela8cLj-u6@uZ8G`=amNSX%$4R4B3^T@UuK626@1{2W#L3atK+vSpaY+G7?jc(PG-GnsG{@^a`BiLKy44` z0D=&ewGPI$uLnX;+dz?Kx_?_i}#x*oM|U7Kk86GD^Qz zHMb%+BT(PG|KOc+dv>TsS z`P_Md^1@M>Yf6Fw)g;iu{ouRc9sT47X6k1EJdXn#nhXwX4e`4dRJ$NO64d5)Am{=J z{d*f^JmJzuA``AjQh|ij9}G3dbJR`-$&=z(n2!{Bcp{Z8Cw?h$136Bd4XC&w#gye7 zi3c~ey7+&fGR~1y|e?s3UWsrpLk?5f27SWOj(IN5>yWtLeE#-hCbuM+9n@!B8iO$2rBFBZPP>H} zgD}ctmwOvkXO^b??Cj|z!@sBzebOg_{`FV`Tc`#goTykIdP^8F_7Enf)R@!dr#U1# z)XQF_KP?(0-9&6?)-3dDY{%9YM~EKBa1PYYhLS-rLCvft2Ul@*5^(~LFl^D2^(xm5 z)fNK3DKO}~oF-LWOdobNeZ(TyF+cL2S&jk_xQpaSLa}CzH`GVrRIb9|g0c6O?~Ih; zP}qvR4(U0DU1h$o-Y#;s-~-F9DG`FcoQ%kDF%*;!MPE{RVS-q`2v#|qtR5Q{HU|q8 zPDPRe6>Ej_7ozQD`8L202<==bv^nN&ys%tA1|24RA$T@|53f$Crh5Y!Eep&(608`; zQ@aMCc8Ci{Ls_~YP*AP`tKCaoI=0w#fV+hYE2crbgp_hIwxk4jfJlYrr7rE6E>>b9 ztF=-!Q0X0QRKm zVL3mQY>*d+0+eG+x6({pnLZkNO4$NF;VXL?aXH2i%G`|Hh{wk(bG0xn?JfSP6TxlCd&*zSonM4C69+U<+x<6Ixi#U3nj zbBhASXN=!Dkr?JGqKIWdNF=lwa6@d$f_H}#OFZ+`hHl#PvaM3IwPGWDMSuQla^HXs!Ig!p-0p-uNt7$Xn~!=3;;f1#}_+CM~7CqG0-#U)#qf zj1e>}D83w9#-yWRjhgPamO#RU`TikmW~jA^4{9-F?EkrHpi%V{%*0$(#~(v|>Ct@m z==@+SL*^siQ$@Gp84eCG`vvUxKu`aR`N?VZMzJX_S1 za8eG{D01By@gcBSyM-ks_NtXuWH4kYzM)t78RSVeUzyBoKGe zw$5H$5chpP`Gk%;0hJEkfsN;Itrr!MO1lR~)Kq=G^Myv0J9H9C&2Nk)MX;WC80~Ya z@V^HHAV|6$FIFvwkyChs%{O!ETyJ=})fBuCR!j$4WLrR6RHUU9>tz7L)x%q#i8D$c z-M|`W_XNUL6P9955#8#D(C{tudBh^ox$V$y#^ zawZvu(q;@JvE_qti}O@m>s?rcxj;Y*JnMe}2Q2kquB;AqPD>au{k*k? zY5xsv+no(VEZG{u4PTFfb}srls( zh!_pxLlz9YbiPfMpcfd)$W^In^+hjy-#Hnzi0g!fN5lKd4Kg%p60e($GerN5Ko?!% zponC|sd7(3wLH?}oWc>|B?WVA=MleRtZXI&4;Y}8MXTkxVSzXgPMjm_ zIlM1Mo_{Ee72bwy zEf|eYSkSGA5k!*YfmoWT7&8Yu-En&k)D$=*l6;w6>g3$@q|9XbzjTH;Os4WGr4%0~ z!bK61iXD0^g6?DbT)6n-voCDlv3+?MBwy+6kL92FyKml8^99#_*QgpsDtxTqvr(g& zqZ=6%+93Ftqss)kaZ{*1PSc4hzJBR~dqVgl?YoT=JNNT?Q%t|XulpjNlJeT}w(5Lw zBjW@ThM5XOfMXAD!&yN5uLU*tn{Sz5PMjE|*v&_*ZOU-HN4V~?2(MwUe*@aA3()rH z{)9Wukgb?|C{K|{t6@Xd^QvqO#nq}2;__Aq0_yAe)=0M#h0iEo7#zXJF7ecP8}3LI z*0>GHM*lX-xVivUj59FRE!HL~J|`z-fxQ(L1Bj)0-B=5;BT7)zi+gA{hB$Be99G82 zDVkoOe!Bj)`6acchoIk}qQ()tl{{mxREErTbUsX#k3ajV&pk$aza*hEoHwwDgIRfw zgu(_duGbJr??zIVCTl)qIx_WycYg;!VrCGOj8y*;l6RVPi)h1p)<~s60cP7_UuPWK z$ovUK;eNd7#5wc%V?MB#y$PC}`!%T)%x&L~Y|uM3=$6wuqp_rJ3G2*t6O|0wa5q3W z%EGjTu+*P@7C%Iy6zYGA-BY5*rK6#x4#%sD5iThLdilgLq0m8db`5>eEb?hkx8UQV5*ubAXviX$LJ zJOi$#045+VIaasemUmbGEzLa)yfdb(lOdroA;BHYO}ctY#q5$rzZkGUP4F!!iy4nc z$cUU3H-@3p#XBxaDgpAwW?!VV20W!WauTj;(?H<8WU%3mc#D)ib^=DQf_2ly$t_U< z=%pq;wdqw(+OTzW~Q|-DAtI@x*`J_7c~>0DOLq~d}LK*w?P%nDTu4Jibg%~H$3nS)7wZu1C0!x)gllqTq6 zYAI2H6Fs=Jr#j3-`c>lODc**iMdg4ea-7I)HV<5Pro(Qi7-~uIPhRel4jfh zqrDRKXF6&?UJS7UZQ2_@j(&p#5Gc?J6P7&y$m?%9Jnd${Jz&ux%`jct2G*K^CsKGR z?2}!)#=)+8b|~P@0+Ar$@x7Vd)YNHHg51H>!$(DNayhgZy(KvJQQ+jvk<$|8$Su4KuY z1h8^ZgRR`;Z#osQ#Hho%dY6V6hHuAr@ENS6#Sf(Ek&%jd`-7koj``2yj#&)d${b#1 z9=RDNgE3P=JgZ(`h+g0DEfnwO3>x(us4>xBun!)+ZhByq3T;i|<~AHb5D5rrmh4tZ z@S)OIh4|ANA5jZvYf?^M9br@}@yVBWlWk{2xPK^mJG6O*lY%Eu-)4y?OvpNxv1mGQ z+ROs|N2{MTwK%r`8$X!Cw))#Bz{sLC<+uqzKx6h=bg+DkF49~D>|c*aA>P|1y@QPv z3T^9DuA!dwpU%FKR%Ntla+48$s1LyI$-d5dQ6fRQgw$M-RLH+(DG5Ms%HEd`5I5zq z@GfU->DG<(cx+8HBwgt6KGf0a#%lzvI$^j2@W&L~MECd}$i zQ|=IbGg9vH&>6ID=ex=SnElYonN*tMpJAm`NRp&IiOVov3=U=4^70$04k#G3ID=GU zD&CUE^m7S2PdWY4Hv-eI^-qqCG>nvNF2ktGi!F7|FsVx6FEy^f?Bzm}AubdgGD8z3 zBPp@s)$bMV06<(J!WJGzqvMjqG5E*jqo9_-Sai1Ai7AXfw`y{Te97IwwFfOr{8Q&si}LI^WEGp754KDcasR&0)IMk((T?a9v~_ssJ$)Uj}1&JW~Aor(<1@w z3&~jV{h9)vJ%dzl$reVio^B~IXy18oUQ#)}Vh%zwRDe(bTYL#_cchznEE7Uh@olS0 z5b_Q%O1R?X=$KsqAb@oAM>7LD6mN0H3u`ht2oN|Bmqv_daJ=`-t=^fakkBq|=S;0I z!>Q|GK9DApAwx8uGSaKSkB(T^M@k3IS;0KU)m8w-7-FtK5{xc$1AQOkej~#fktUX5 z#VIl2Z#1d&guo#Qk1iY}*lTjF4=W+XYt3Q>qFlv2aq*1VruEv8H(>iEuIb@N zh^~ecMQ2h_9I;}Zc3HbG>4buuYQTrCpm7wu0pI`VpSq9jj_eUert<8s4I5J7;U2^# zxU2Hc%nYCnimI0I8M@5X&|=j+g$%Fl&wHAn28?}C9rbOp@oVS5?q+61UCu7n)U*pC z^h^cvkx95CT(1Eq_6XfDWZ$Xm)z2UVO@Sh3WhL)!8l-vthVnl`hI3*ex*C35FmX-4 z#{kVM*q*uxvo%%i*X2N{-bAmn+)e91)dP)8hvY`&D(GlEz5duPi`m|D5nD { + const header = try decodeZStandardHeader(src[4..], null); + return header.content_size; + }, + .skippable => return 0, + } +} + +pub fn frameType(src: []const u8) !frame.Kind { + const magic = readInt(u32, src[0..4]); + return if (magic == frame.ZStandard.magic_number) + .zstandard + else if (isSkippableMagic(magic)) + .skippable + else + error.BadMagic; +} + +const ReadWriteCount = struct { + read_count: usize, + write_count: usize, +}; + +pub fn decodeFrame(dest: []u8, src: []const u8, verify_checksum: bool) !ReadWriteCount { + return switch (try frameType(src)) { + .zstandard => decodeZStandardFrame(dest, src, verify_checksum), + .skippable => ReadWriteCount{ + .read_count = try skippableFrameSize(src[0..8]) + 8, + .write_count = 0, + }, + }; +} + +const DecodeState = struct { + repeat_offsets: [3]u32, + + offset: StateData(8), + match: StateData(9), + literal: StateData(9), + + offset_fse_buffer: []Table.Fse, + match_fse_buffer: []Table.Fse, + literal_fse_buffer: []Table.Fse, + + literal_written_count: usize, + + literal_stream_reader: ReverseBitReader(ReversedByteReader.Reader), + literal_stream_bytes: ReversedByteReader, + literal_stream_index: usize, + huffman_tree: Literals.HuffmanTree, + + fn StateData(comptime max_accuracy_log: comptime_int) type { + return struct { + state: State, + table: Table, + accuracy_log: u8, + + const State = std.meta.Int(.unsigned, max_accuracy_log); + }; + } + + fn readInitialState(self: *DecodeState, bit_reader: anytype) !void { + self.literal.state = try bit_reader.readBitsNoEof(u9, self.literal.accuracy_log); + self.offset.state = try bit_reader.readBitsNoEof(u8, self.offset.accuracy_log); + self.match.state = try bit_reader.readBitsNoEof(u9, self.match.accuracy_log); + log.debug("initial decoder state: literal = {d}, offset = {d} match = {d}", .{ + self.literal.state, + self.offset.state, + self.match.state, + }); + } + + fn updateRepeatOffset(self: *DecodeState, offset: u32) void { + std.mem.swap(u32, &self.repeat_offsets[0], &self.repeat_offsets[1]); + std.mem.swap(u32, &self.repeat_offsets[0], &self.repeat_offsets[2]); + self.repeat_offsets[0] = offset; + } + + fn useRepeatOffset(self: *DecodeState, index: usize) u32 { + if (index == 1) + std.mem.swap(u32, &self.repeat_offsets[0], &self.repeat_offsets[1]) + else if (index == 2) { + std.mem.swap(u32, &self.repeat_offsets[0], &self.repeat_offsets[2]); + std.mem.swap(u32, &self.repeat_offsets[1], &self.repeat_offsets[2]); + } + return self.repeat_offsets[0]; + } + + const DataType = enum { offset, match, literal }; + + fn updateState(self: *DecodeState, comptime choice: DataType, bit_reader: anytype) !void { + switch (@field(self, @tagName(choice)).table) { + .rle => {}, + .fse => |table| { + const data = table[@field(self, @tagName(choice)).state]; + const T = @TypeOf(@field(self, @tagName(choice))).State; + const bits_summand = try bit_reader.readBitsNoEof(T, data.bits); + const next_state = data.baseline + bits_summand; + @field(self, @tagName(choice)).state = @intCast(@TypeOf(@field(self, @tagName(choice))).State, next_state); + }, + } + } + + fn updateFseTable( + self: *DecodeState, + src: []const u8, + comptime choice: DataType, + mode: Sequences.Header.Mode, + first_compressed_block: bool, + ) !usize { + const field_name = @tagName(choice); + switch (mode) { + .predefined => { + @field(self, field_name).accuracy_log = @field(types.compressed_block.default_accuracy_log, field_name); + @field(self, field_name).table = @field(types.compressed_block, "predefined_" ++ field_name ++ "_fse_table"); + return 0; + }, + .rle => { + @field(self, field_name).accuracy_log = 0; + @field(self, field_name).table = .{ .rle = src[0] }; + return 1; + }, + .fse => { + var stream = std.io.fixedBufferStream(src); + var counting_reader = std.io.countingReader(stream.reader()); + var bit_reader = bitReader(counting_reader.reader()); + + const table_size = try decodeFseTable( + &bit_reader, + @field(types.compressed_block.table_symbol_count_max, field_name), + @field(types.compressed_block.table_accuracy_log_max, field_name), + @field(self, field_name ++ "_fse_buffer"), + ); + @field(self, field_name).table = .{ .fse = @field(self, field_name ++ "_fse_buffer")[0..table_size] }; + @field(self, field_name).accuracy_log = std.math.log2_int_ceil(usize, table_size); + log.debug("decoded fse " ++ field_name ++ " table '{}'", .{ + std.fmt.fmtSliceHexUpper(src[0..counting_reader.bytes_read]), + }); + dumpFseTable(field_name, @field(self, field_name).table.fse); + return counting_reader.bytes_read; + }, + .repeat => return if (first_compressed_block) error.RepeatModeFirst else 0, + } + } + + const Sequence = struct { + literal_length: u32, + match_length: u32, + offset: u32, + }; + + fn nextSequence(self: *DecodeState, bit_reader: anytype) !Sequence { + const raw_code = self.getCode(.offset); + const offset_code = std.math.cast(u5, raw_code) orelse { + log.err("got offset code of {d}", .{raw_code}); + return error.OffsetCodeTooLarge; + }; + const offset_value = (@as(u32, 1) << offset_code) + try bit_reader.readBitsNoEof(u32, offset_code); + + const match_code = self.getCode(.match); + const match = types.compressed_block.match_length_code_table[match_code]; + const match_length = match[0] + try bit_reader.readBitsNoEof(u32, match[1]); + + const literal_code = self.getCode(.literal); + const literal = types.compressed_block.literals_length_code_table[literal_code]; + const literal_length = literal[0] + try bit_reader.readBitsNoEof(u32, literal[1]); + + const offset = if (offset_value > 3) offset: { + const offset = offset_value - 3; + self.updateRepeatOffset(offset); + break :offset offset; + } else offset: { + if (literal_length == 0) { + if (offset_value == 3) { + const offset = self.repeat_offsets[0] - 1; + self.updateRepeatOffset(offset); + break :offset offset; + } + break :offset self.useRepeatOffset(offset_value); + } + break :offset self.useRepeatOffset(offset_value - 1); + }; + + log.debug("sequence = ({d}, {d}, {d})", .{ literal_length, offset, match_length }); + return .{ + .literal_length = literal_length, + .match_length = match_length, + .offset = offset, + }; + } + + fn executeSequenceSlice(self: *DecodeState, dest: []u8, write_pos: usize, literals: Literals, sequence: Sequence) !void { + try self.decodeLiteralsInto(dest[write_pos..], literals, sequence.literal_length); + + // TODO: should we validate offset against max_window_size? + assert(sequence.offset <= write_pos + sequence.literal_length); + const copy_start = write_pos + sequence.literal_length - sequence.offset; + const copy_end = copy_start + sequence.match_length; + // NOTE: we ignore the usage message for std.mem.copy and copy with dest.ptr >= src.ptr + // to allow repeats + std.mem.copy(u8, dest[write_pos + sequence.literal_length ..], dest[copy_start..copy_end]); + } + + fn decodeSequenceSlice( + self: *DecodeState, + dest: []u8, + write_pos: usize, + literals: Literals, + bit_reader: anytype, + last_sequence: bool, + ) !usize { + const sequence = try self.nextSequence(bit_reader); + try self.executeSequenceSlice(dest, write_pos, literals, sequence); + log.debug("sequence decompressed into '{x}'", .{ + std.fmt.fmtSliceHexUpper(dest[write_pos .. write_pos + sequence.literal_length + sequence.match_length]), + }); + if (!last_sequence) { + try self.updateState(.literal, bit_reader); + try self.updateState(.match, bit_reader); + try self.updateState(.offset, bit_reader); + } + return sequence.match_length + sequence.literal_length; + } + + fn nextLiteralMultiStream(self: *DecodeState, literals: Literals) !void { + self.literal_stream_index += 1; + try self.initLiteralStream(literals.streams.four[self.literal_stream_index]); + } + + fn initLiteralStream(self: *DecodeState, bytes: []const u8) !void { + log.debug("initing literal stream: {}", .{std.fmt.fmtSliceHexUpper(bytes)}); + self.literal_stream_bytes = reversedByteReader(bytes); + self.literal_stream_reader = reverseBitReader(self.literal_stream_bytes.reader()); + while (0 == try self.literal_stream_reader.readBitsNoEof(u1, 1)) {} + } + + fn decodeLiteralsInto(self: *DecodeState, dest: []u8, literals: Literals, len: usize) !void { + if (self.literal_written_count + len > literals.header.regenerated_size) return error.MalformedLiteralsLength; + switch (literals.header.block_type) { + .raw => { + const literal_data = literals.streams.one[self.literal_written_count .. self.literal_written_count + len]; + std.mem.copy(u8, dest, literal_data); + self.literal_written_count += len; + }, + .rle => { + var i: usize = 0; + while (i < len) : (i += 1) { + dest[i] = literals.streams.one[0]; + } + log.debug("rle: {}", .{std.fmt.fmtSliceHexUpper(dest[0..len])}); + self.literal_written_count += len; + }, + .compressed, .treeless => { + // const written_bytes_per_stream = (literals.header.regenerated_size + 3) / 4; + const huffman_tree = self.huffman_tree; + const max_bit_count = huffman_tree.max_bit_count; + const starting_bit_count = Literals.HuffmanTree.weightToBitCount( + huffman_tree.nodes[huffman_tree.symbol_count_minus_one].weight, + max_bit_count, + ); + var bits_read: u4 = 0; + var huffman_tree_index: usize = huffman_tree.symbol_count_minus_one; + var bit_count_to_read: u4 = starting_bit_count; + var i: usize = 0; + while (i < len) : (i += 1) { + var prefix: u16 = 0; + while (true) { + const new_bits = self.literal_stream_reader.readBitsNoEof(u16, bit_count_to_read) catch |err| + switch (err) { + error.EndOfStream => if (literals.streams == .four and self.literal_stream_index < 3) bits: { + try self.nextLiteralMultiStream(literals); + break :bits try self.literal_stream_reader.readBitsNoEof(u16, bit_count_to_read); + } else { + return error.UnexpectedEndOfLiteralStream; + }, + }; + prefix <<= bit_count_to_read; + prefix |= new_bits; + bits_read += bit_count_to_read; + const result = try huffman_tree.query(huffman_tree_index, prefix); + + switch (result) { + .symbol => |sym| { + dest[i] = sym; + bit_count_to_read = starting_bit_count; + bits_read = 0; + huffman_tree_index = huffman_tree.symbol_count_minus_one; + break; + }, + .index => |index| { + huffman_tree_index = index; + const bit_count = Literals.HuffmanTree.weightToBitCount( + huffman_tree.nodes[index].weight, + max_bit_count, + ); + bit_count_to_read = bit_count - bits_read; + }, + } + } + } + self.literal_written_count += len; + }, + } + } + + fn getCode(self: *DecodeState, comptime choice: DataType) u32 { + return switch (@field(self, @tagName(choice)).table) { + .rle => |value| value, + .fse => |table| table[@field(self, @tagName(choice)).state].symbol, + }; + } +}; + +const literal_table_size_max = 1 << types.compressed_block.table_accuracy_log_max.literal; +const match_table_size_max = 1 << types.compressed_block.table_accuracy_log_max.match; +const offset_table_size_max = 1 << types.compressed_block.table_accuracy_log_max.match; + +pub fn decodeZStandardFrame(dest: []u8, src: []const u8, verify_checksum: bool) !ReadWriteCount { + assert(readInt(u32, src[0..4]) == frame.ZStandard.magic_number); + var consumed_count: usize = 4; + + const frame_header = try decodeZStandardHeader(src[consumed_count..], &consumed_count); + + if (frame_header.descriptor.dictionary_id_flag != 0) return error.DictionaryIdFlagUnsupported; + + const content_size = frame_header.content_size orelse return error.UnknownContentSizeUnsupported; + // const window_size = frameWindowSize(header) orelse return error.WindowSizeUnknown; + if (dest.len < content_size) return error.ContentTooLarge; + + const should_compute_checksum = frame_header.descriptor.content_checksum_flag and verify_checksum; + var hash_state = if (should_compute_checksum) std.hash.XxHash64.init(0) else undefined; + + // TODO: block_maximum_size should be @min(1 << 17, window_size); + const written_count = try decodeFrameBlocks( + dest, + src[consumed_count..], + &consumed_count, + if (should_compute_checksum) &hash_state else null, + ); + + if (frame_header.descriptor.content_checksum_flag) { + const checksum = readIntSlice(u32, src[consumed_count .. consumed_count + 4]); + consumed_count += 4; + if (verify_checksum) { + const hash = hash_state.final(); + const hash_low_bytes = hash & 0xFFFFFFFF; + if (checksum != hash_low_bytes) { + std.log.err("expected checksum {x}, got {x} (full hash {x})", .{ checksum, hash_low_bytes, hash }); + return error.ChecksumFailure; + } + } + } + return ReadWriteCount{ .read_count = consumed_count, .write_count = written_count }; +} + +pub fn decodeFrameBlocks(dest: []u8, src: []const u8, consumed_count: *usize, hash: ?*std.hash.XxHash64) !usize { + // These tables take 7680 bytes + var literal_fse_data: [literal_table_size_max]Table.Fse = undefined; + var match_fse_data: [match_table_size_max]Table.Fse = undefined; + var offset_fse_data: [offset_table_size_max]Table.Fse = undefined; + + var block_header = decodeBlockHeader(src[0..3]); + var bytes_read: usize = 3; + var decode_state = DecodeState{ + .repeat_offsets = .{ + types.compressed_block.start_repeated_offset_1, + types.compressed_block.start_repeated_offset_2, + types.compressed_block.start_repeated_offset_3, + }, + + .offset = undefined, + .match = undefined, + .literal = undefined, + + .literal_fse_buffer = &literal_fse_data, + .match_fse_buffer = &match_fse_data, + .offset_fse_buffer = &offset_fse_data, + + .literal_written_count = 0, + .literal_stream_reader = undefined, + .literal_stream_bytes = undefined, + .literal_stream_index = undefined, + .huffman_tree = undefined, + }; + var first_compressed_block = true; + var first_compressed_literals = true; + var written_count: usize = 0; + while (true) : ({ + block_header = decodeBlockHeader(src[bytes_read..][0..3]); + bytes_read += 3; + }) { + const written_size = try decodeBlock( + dest, + src[bytes_read..], + block_header, + &decode_state, + &first_compressed_block, + &first_compressed_literals, + &bytes_read, + written_count, + ); + if (hash) |hash_state| hash_state.update(dest[written_count .. written_count + written_size]); + written_count += written_size; + if (block_header.last_block) break; + } + consumed_count.* += bytes_read; + return written_count; +} + +pub fn decodeBlock( + dest: []u8, + src: []const u8, + block_header: frame.ZStandard.Block.Header, + decode_state: *DecodeState, + first_compressed_block: *bool, + first_compressed_literals: *bool, + consumed_count: *usize, + written_count: usize, +) !usize { + const block_maximum_size = 1 << 17; // 128KiB + const block_size = block_header.block_size; + if (block_maximum_size < block_size) return error.BlockSizeOverMaximum; + // TODO: we probably want to enable safety for release-fast and release-small (or insert custom checks) + switch (block_header.block_type) { + .raw => { + log.debug("writing raw block - size {d}", .{block_size}); + const data = src[0..block_size]; + std.mem.copy(u8, dest[written_count..], data); + consumed_count.* += block_size; + return block_size; + }, + .rle => { + log.debug("writing rle block - '{x}'x{d}", .{ src[0], block_size }); + var write_pos: usize = written_count; + while (write_pos < block_size + written_count) : (write_pos += 1) { + dest[write_pos] = src[0]; + } + consumed_count.* += 1; + return block_size; + }, + .compressed => { + var bytes_read: usize = 0; + const literals = try decodeLiteralsSection(src, &bytes_read); + const sequences_header = try decodeSequencesHeader(src[bytes_read..], &bytes_read); + + if (first_compressed_literals.* and literals.header.block_type == .treeless) + return error.TreelessLiteralsFirst; + + if (literals.huffman_tree) |tree| { + decode_state.huffman_tree = tree; + first_compressed_literals.* = false; + } + + switch (literals.header.block_type) { + .raw, .rle => {}, + .compressed, .treeless => { + decode_state.literal_stream_index = 0; + switch (literals.streams) { + .one => |slice| try decode_state.initLiteralStream(slice), + .four => |streams| try decode_state.initLiteralStream(streams[0]), + } + }, + } + + if (sequences_header.sequence_count > 0) { + bytes_read += try decode_state.updateFseTable( + src[bytes_read..], + .literal, + sequences_header.literal_lengths, + first_compressed_block.*, + ); + + bytes_read += try decode_state.updateFseTable( + src[bytes_read..], + .offset, + sequences_header.offsets, + first_compressed_block.*, + ); + + bytes_read += try decode_state.updateFseTable( + src[bytes_read..], + .match, + sequences_header.match_lengths, + first_compressed_block.*, + ); + first_compressed_block.* = false; + } + + var bytes_written: usize = 0; + if (sequences_header.sequence_count > 0) { + const bit_stream_bytes = src[bytes_read..block_size]; + var reverse_byte_reader = reversedByteReader(bit_stream_bytes); + var bit_stream = reverseBitReader(reverse_byte_reader.reader()); + + while (0 == try bit_stream.readBitsNoEof(u1, 1)) {} + try decode_state.readInitialState(&bit_stream); + + var i: usize = 0; + while (i < sequences_header.sequence_count) : (i += 1) { + log.debug("decoding sequence {d}", .{i}); + const decompressed_size = try decode_state.decodeSequenceSlice( + dest, + written_count + bytes_written, + literals, + &bit_stream, + i == sequences_header.sequence_count - 1, + ); + bytes_written += decompressed_size; + } + + bytes_read += bit_stream_bytes.len; + } + + if (decode_state.literal_written_count < literals.header.regenerated_size) { + log.debug("decoding remaining literals", .{}); + const len = literals.header.regenerated_size - decode_state.literal_written_count; + try decode_state.decodeLiteralsInto(dest[written_count + bytes_written ..], literals, len); + log.debug("remaining decoded literals at {d}: {}", .{ + written_count, + std.fmt.fmtSliceHexUpper(dest[written_count .. written_count + len]), + }); + bytes_written += len; + } + + decode_state.literal_written_count = 0; + assert(bytes_read == block_header.block_size); + consumed_count.* += bytes_read; + return bytes_written; + }, + .reserved => return error.FrameContainsReservedBlock, + } +} + +pub fn decodeSkippableHeader(src: *const [8]u8) frame.Skippable.Header { + const magic = readInt(u32, src[0..4]); + assert(isSkippableMagic(magic)); + const frame_size = readInt(u32, src[4..8]); + return .{ + .magic_number = magic, + .frame_size = frame_size, + }; +} + +pub fn skippableFrameSize(src: *const [8]u8) !usize { + assert(isSkippableMagic(readInt(u32, src[0..4]))); + const frame_size = readInt(u32, src[4..8]); + return frame_size; +} + +pub fn frameWindowSize(header: frame.ZStandard.Header) ?u64 { + if (header.window_descriptor) |descriptor| { + const exponent = (descriptor & 0b11111000) >> 3; + const mantissa = descriptor & 0b00000111; + const window_log = 10 + exponent; + const window_base = @as(u64, 1) << @intCast(u6, window_log); + const window_add = (window_base / 8) * mantissa; + return window_base + window_add; + } else return header.content_size; +} + +pub fn decodeZStandardHeader(src: []const u8, consumed_count: ?*usize) !frame.ZStandard.Header { + const descriptor = @bitCast(frame.ZStandard.Header.Descriptor, src[0]); + + if (descriptor.unused) return error.UnusedBitSet; + if (descriptor.reserved) return error.ReservedBitSet; + + var bytes_read_count: usize = 1; + + var window_descriptor: ?u8 = null; + if (!descriptor.single_segment_flag) { + window_descriptor = src[bytes_read_count]; + bytes_read_count += 1; + } + + var dictionary_id: ?u32 = null; + if (descriptor.dictionary_id_flag > 0) { + // if flag is 3 we field_size = 4, else field_size = flag + const field_size = (@as(u3, 1) << descriptor.dictionary_id_flag) >> 1; + dictionary_id = readVarInt(u32, src[bytes_read_count .. bytes_read_count + field_size]); + bytes_read_count += field_size; + } + + var content_size: ?u64 = null; + if (descriptor.single_segment_flag or descriptor.content_size_flag > 0) { + const field_size = @as(u4, 1) << descriptor.content_size_flag; + content_size = readVarInt(u64, src[bytes_read_count .. bytes_read_count + field_size]); + if (field_size == 2) content_size.? += 256; + bytes_read_count += field_size; + } + + if (consumed_count) |p| p.* += bytes_read_count; + + const header = frame.ZStandard.Header{ + .descriptor = descriptor, + .window_descriptor = window_descriptor, + .dictionary_id = dictionary_id, + .content_size = content_size, + }; + log.debug( + "decoded ZStandard frame header {x}: " ++ + "desc = (d={d},c={},r={},u={},s={},cs={d}), win_desc = {?x}, dict_id = {?x}, content_size = {?d}", + .{ + std.fmt.fmtSliceHexUpper(src[0..bytes_read_count]), + header.descriptor.dictionary_id_flag, + header.descriptor.content_checksum_flag, + header.descriptor.reserved, + header.descriptor.unused, + header.descriptor.single_segment_flag, + header.descriptor.content_size_flag, + header.window_descriptor, + header.dictionary_id, + header.content_size, + }, + ); + return header; +} + +pub fn decodeBlockHeader(src: *const [3]u8) frame.ZStandard.Block.Header { + const last_block = src[0] & 1 == 1; + const block_type = @intToEnum(frame.ZStandard.Block.Type, (src[0] & 0b110) >> 1); + const block_size = ((src[0] & 0b11111000) >> 3) + (@as(u21, src[1]) << 5) + (@as(u21, src[2]) << 13); + log.debug("decoded block header {}: last = {}, type = {s}, size = {d}", .{ + std.fmt.fmtSliceHexUpper(src), + last_block, + @tagName(block_type), + block_size, + }); + return .{ + .last_block = last_block, + .block_type = block_type, + .block_size = block_size, + }; +} + +pub fn decodeLiteralsSection(src: []const u8, consumed_count: *usize) !Literals { + // TODO: we probably want to enable safety for release-fast and release-small (or insert custom checks) + var bytes_read: usize = 0; + const header = decodeLiteralsHeader(src, &bytes_read); + switch (header.block_type) { + .raw => { + const stream = src[bytes_read .. bytes_read + header.regenerated_size]; + consumed_count.* += header.regenerated_size + bytes_read; + return Literals{ + .header = header, + .huffman_tree = null, + .streams = .{ .one = stream }, + }; + }, + .rle => { + const stream = src[bytes_read .. bytes_read + 1]; + consumed_count.* += 1 + bytes_read; + return Literals{ + .header = header, + .huffman_tree = null, + .streams = .{ .one = stream }, + }; + }, + .compressed, .treeless => { + const huffman_tree_start = bytes_read; + const huffman_tree = if (header.block_type == .compressed) + try decodeHuffmanTree(src[bytes_read..], &bytes_read) + else + null; + const huffman_tree_size = bytes_read - huffman_tree_start; + const total_streams_size = @as(usize, header.compressed_size.?) - huffman_tree_size; + log.debug("huffman tree size = {}, total streams size = {}", .{ huffman_tree_size, total_streams_size }); + if (huffman_tree) |tree| dumpHuffmanTree(tree); + + if (header.size_format == 0) { + const stream = src[bytes_read .. bytes_read + total_streams_size]; + bytes_read += total_streams_size; + consumed_count.* += bytes_read; + return Literals{ + .header = header, + .huffman_tree = huffman_tree, + .streams = .{ .one = stream }, + }; + } + + const stream_data = src[bytes_read .. bytes_read + total_streams_size]; + + log.debug("jump table: {}", .{std.fmt.fmtSliceHexUpper(stream_data[0..6])}); + const stream_1_length = @as(usize, readInt(u16, stream_data[0..2])); + const stream_2_length = @as(usize, readInt(u16, stream_data[2..4])); + const stream_3_length = @as(usize, readInt(u16, stream_data[4..6])); + const stream_4_length = (total_streams_size - 6) - (stream_1_length + stream_2_length + stream_3_length); + + const stream_1_start = 6; + const stream_2_start = stream_1_start + stream_1_length; + const stream_3_start = stream_2_start + stream_2_length; + const stream_4_start = stream_3_start + stream_3_length; + + consumed_count.* += total_streams_size + bytes_read; + + return Literals{ + .header = header, + .huffman_tree = huffman_tree, + .streams = .{ .four = .{ + stream_data[stream_1_start .. stream_1_start + stream_1_length], + stream_data[stream_2_start .. stream_2_start + stream_2_length], + stream_data[stream_3_start .. stream_3_start + stream_3_length], + stream_data[stream_4_start .. stream_4_start + stream_4_length], + } }, + }; + }, + } +} + +fn decodeHuffmanTree(src: []const u8, consumed_count: *usize) !Literals.HuffmanTree { + var bytes_read: usize = 0; + bytes_read += 1; + const header = src[0]; + var symbol_count: usize = undefined; + var weights: [256]u4 = undefined; + var max_number_of_bits: u4 = undefined; + if (header < 128) { + // FSE compressed weigths + const compressed_size = header; + var stream = std.io.fixedBufferStream(src[1 .. compressed_size + 1]); + var counting_reader = std.io.countingReader(stream.reader()); + var bit_reader = bitReader(counting_reader.reader()); + + var entries: [1 << 6]Table.Fse = undefined; + const table_size = try decodeFseTable(&bit_reader, 256, 6, &entries); + const accuracy_log = std.math.log2_int_ceil(usize, table_size); + + var huff_data = src[1 + counting_reader.bytes_read .. compressed_size + 1]; + var huff_data_bytes = reversedByteReader(huff_data); + var huff_bits = reverseBitReader(huff_data_bytes.reader()); + while (0 == try huff_bits.readBitsNoEof(u1, 1)) {} + + dumpFseTable("huffman", entries[0..table_size]); + + var i: usize = 0; + var even_state: u32 = try huff_bits.readBitsNoEof(u32, accuracy_log); + var odd_state: u32 = try huff_bits.readBitsNoEof(u32, accuracy_log); + + while (i < 255) { + const even_data = entries[even_state]; + var read_bits: usize = 0; + const even_bits = try huff_bits.readBits(u32, even_data.bits, &read_bits); + weights[i] = @intCast(u4, even_data.symbol); + i += 1; + if (read_bits < even_data.bits) { + weights[i] = @intCast(u4, entries[odd_state].symbol); + log.debug("overflow condition: setting weights[{d}] = {d}", .{ i, weights[i] }); + i += 1; + break; + } + even_state = even_data.baseline + even_bits; + + read_bits = 0; + const odd_data = entries[odd_state]; + const odd_bits = try huff_bits.readBits(u32, odd_data.bits, &read_bits); + weights[i] = @intCast(u4, odd_data.symbol); + i += 1; + if (read_bits < odd_data.bits) { + if (i == 256) return error.MalformedHuffmanTree; + weights[i] = @intCast(u4, entries[even_state].symbol); + log.debug("overflow condition: setting weights[{d}] = {d}", .{ i, weights[i] }); + i += 1; + break; + } + odd_state = odd_data.baseline + odd_bits; + } else return error.MalformedHuffmanTree; + + symbol_count = i + 1; // stream contains all but the last symbol + bytes_read += compressed_size; + } else { + const encoded_symbol_count = header - 127; + symbol_count = encoded_symbol_count + 1; + log.debug("huffman tree symbol count = {d}", .{symbol_count}); + const weights_byte_count = (encoded_symbol_count + 1) / 2; + log.debug("decoding direct huffman tree: {}|{}", .{ + std.fmt.fmtSliceHexUpper(src[0..1]), + std.fmt.fmtSliceHexUpper(src[1 .. weights_byte_count + 1]), + }); + if (src.len < weights_byte_count) return error.MalformedHuffmanTree; + var i: usize = 0; + while (i < weights_byte_count) : (i += 1) { + weights[2 * i] = @intCast(u4, src[i + 1] >> 4); + weights[2 * i + 1] = @intCast(u4, src[i + 1] & 0xF); + log.debug("weights[{d}] = {d}", .{ 2 * i, weights[2 * i] }); + log.debug("weights[{d}] = {d}", .{ 2 * i + 1, weights[2 * i + 1] }); + } + bytes_read += weights_byte_count; + } + var weight_power_sum: u16 = 0; + for (weights[0 .. symbol_count - 1]) |value| { + if (value > 0) { + weight_power_sum += @as(u16, 1) << (value - 1); + } + } + log.debug("weight power sum = {d}", .{weight_power_sum}); + + // advance to next power of two (even if weight_power_sum is a power of 2) + max_number_of_bits = @intCast(u4, std.math.log2_int(u16, weight_power_sum) + 1); + const next_power_of_two = @as(u16, 1) << max_number_of_bits; + weights[symbol_count - 1] = @intCast(u4, std.math.log2_int(u16, next_power_of_two - weight_power_sum) + 1); + log.debug("weights[{d}] = {d}", .{ symbol_count - 1, weights[symbol_count - 1] }); + + var weight_sorted_prefixed_symbols: [256]Literals.HuffmanTree.PrefixedSymbol = undefined; + for (weight_sorted_prefixed_symbols[0..symbol_count]) |_, i| { + weight_sorted_prefixed_symbols[i] = .{ + .symbol = @intCast(u8, i), + .weight = undefined, + .prefix = undefined, + }; + } + + std.sort.sort( + Literals.HuffmanTree.PrefixedSymbol, + weight_sorted_prefixed_symbols[0..symbol_count], + weights, + lessThanByWeight, + ); + + var prefix: u16 = 0; + var prefixed_symbol_count: usize = 0; + var sorted_index: usize = 0; + while (sorted_index < symbol_count) { + var symbol = weight_sorted_prefixed_symbols[sorted_index].symbol; + const weight = weights[symbol]; + if (weight == 0) { + sorted_index += 1; + continue; + } + + while (sorted_index < symbol_count) : ({ + sorted_index += 1; + prefixed_symbol_count += 1; + prefix += 1; + }) { + symbol = weight_sorted_prefixed_symbols[sorted_index].symbol; + if (weights[symbol] != weight) { + prefix = ((prefix - 1) >> (weights[symbol] - weight)) + 1; + break; + } + weight_sorted_prefixed_symbols[prefixed_symbol_count].symbol = symbol; + weight_sorted_prefixed_symbols[prefixed_symbol_count].prefix = prefix; + weight_sorted_prefixed_symbols[prefixed_symbol_count].weight = weight; + } + } + consumed_count.* += bytes_read; + const tree = Literals.HuffmanTree{ + .max_bit_count = max_number_of_bits, + .symbol_count_minus_one = @intCast(u8, prefixed_symbol_count - 1), + .nodes = weight_sorted_prefixed_symbols, + }; + log.debug("decoded huffman tree {}:", .{std.fmt.fmtSliceHexUpper(src[0..bytes_read])}); + return tree; +} + +fn lessThanByWeight( + weights: [256]u4, + lhs: Literals.HuffmanTree.PrefixedSymbol, + rhs: Literals.HuffmanTree.PrefixedSymbol, +) bool { + // NOTE: this function relies on the use of a stable sorting algorithm, + // otherwise a special case of if (weights[lhs] == weights[rhs]) return lhs < rhs; + // should be added + return weights[lhs.symbol] < weights[rhs.symbol]; +} + +pub fn decodeLiteralsHeader(src: []const u8, consumed_count: *usize) Literals.Header { + // TODO: we probably want to enable safety for release-fast and release-small (or insert custom checks) + const start = consumed_count.*; + const byte0 = src[0]; + const block_type = @intToEnum(Literals.BlockType, byte0 & 0b11); + const size_format = @intCast(u2, (byte0 & 0b1100) >> 2); + var regenerated_size: u20 = undefined; + var compressed_size: ?u18 = null; + switch (block_type) { + .raw, .rle => { + switch (size_format) { + 0, 2 => { + regenerated_size = byte0 >> 3; + consumed_count.* += 1; + }, + 1 => { + regenerated_size = (byte0 >> 4) + + (@as(u20, src[consumed_count.* + 1]) << 4); + consumed_count.* += 2; + }, + 3 => { + regenerated_size = (byte0 >> 4) + + (@as(u20, src[consumed_count.* + 1]) << 4) + + (@as(u20, src[consumed_count.* + 2]) << 12); + consumed_count.* += 3; + }, + } + }, + .compressed, .treeless => { + const byte1 = src[1]; + const byte2 = src[2]; + switch (size_format) { + 0, 1 => { + regenerated_size = (byte0 >> 4) + ((@as(u20, byte1) & 0b00111111) << 4); + compressed_size = ((byte1 & 0b11000000) >> 6) + (@as(u18, byte2) << 2); + consumed_count.* += 3; + }, + 2 => { + const byte3 = src[3]; + regenerated_size = (byte0 >> 4) + (@as(u20, byte1) << 4) + ((@as(u20, byte2) & 0b00000011) << 12); + compressed_size = ((byte2 & 0b11111100) >> 2) + (@as(u18, byte3) << 6); + consumed_count.* += 4; + }, + 3 => { + const byte3 = src[3]; + const byte4 = src[4]; + regenerated_size = (byte0 >> 4) + (@as(u20, byte1) << 4) + ((@as(u20, byte2) & 0b00111111) << 12); + compressed_size = ((byte2 & 0b11000000) >> 6) + (@as(u18, byte3) << 2) + (@as(u18, byte4) << 10); + consumed_count.* += 5; + }, + } + }, + } + log.debug( + "decoded literals section header '{}': type = {s}, size_format = {}, regen_size = {d}, compressed size = {?d}", + .{ + std.fmt.fmtSliceHexUpper(src[0 .. consumed_count.* - start]), + @tagName(block_type), + size_format, + regenerated_size, + compressed_size, + }, + ); + return Literals.Header{ + .block_type = block_type, + .size_format = size_format, + .regenerated_size = regenerated_size, + .compressed_size = compressed_size, + }; +} + +fn decodeSequencesHeader(src: []const u8, consumed_count: *usize) !Sequences.Header { + var sequence_count: u24 = undefined; + + var bytes_read: usize = 0; + const byte0 = src[0]; + if (byte0 == 0) { + bytes_read += 1; + log.debug("decoded sequences header '{}': sequence count = 0", .{std.fmt.fmtSliceHexUpper(src[0..bytes_read])}); + consumed_count.* += bytes_read; + return Sequences.Header{ + .sequence_count = 0, + .offsets = undefined, + .match_lengths = undefined, + .literal_lengths = undefined, + }; + } else if (byte0 < 128) { + sequence_count = byte0; + bytes_read += 1; + } else if (byte0 < 255) { + sequence_count = (@as(u24, (byte0 - 128)) << 8) + src[1]; + bytes_read += 2; + } else { + sequence_count = src[1] + (@as(u24, src[2]) << 8) + 0x7F00; + bytes_read += 3; + } + + const compression_modes = src[bytes_read]; + bytes_read += 1; + + consumed_count.* += bytes_read; + const matches_mode = @intToEnum(Sequences.Header.Mode, (compression_modes & 0b00001100) >> 2); + const offsets_mode = @intToEnum(Sequences.Header.Mode, (compression_modes & 0b00110000) >> 4); + const literal_mode = @intToEnum(Sequences.Header.Mode, (compression_modes & 0b11000000) >> 6); + log.debug("decoded sequences header '{}': (sc={d},o={s},m={s},l={s})", .{ + std.fmt.fmtSliceHexUpper(src[0..bytes_read]), + sequence_count, + @tagName(offsets_mode), + @tagName(matches_mode), + @tagName(literal_mode), + }); + if (compression_modes & 0b11 != 0) return error.ReservedBitSet; + + return Sequences.Header{ + .sequence_count = sequence_count, + .offsets = offsets_mode, + .match_lengths = matches_mode, + .literal_lengths = literal_mode, + }; +} + +fn buildFseTable(values: []const u16, entries: []Table.Fse) !void { + const total_probability = @intCast(u16, entries.len); + const accuracy_log = std.math.log2_int(u16, total_probability); + assert(total_probability <= 1 << 9); + + var less_than_one_count: usize = 0; + for (values) |value, i| { + if (value == 0) { + entries[entries.len - 1 - less_than_one_count] = Table.Fse{ + .symbol = @intCast(u8, i), + .baseline = 0, + .bits = accuracy_log, + }; + less_than_one_count += 1; + } + } + + var position: usize = 0; + var temp_states: [1 << 9]u16 = undefined; + for (values) |value, symbol| { + if (value == 0 or value == 1) continue; + const probability = value - 1; + + const state_share_dividend = try std.math.ceilPowerOfTwo(u16, probability); + const share_size = @divExact(total_probability, state_share_dividend); + const double_state_count = state_share_dividend - probability; + const single_state_count = probability - double_state_count; + const share_size_log = std.math.log2_int(u16, share_size); + + var i: u16 = 0; + while (i < probability) : (i += 1) { + temp_states[i] = @intCast(u16, position); + position += (entries.len >> 1) + (entries.len >> 3) + 3; + position &= entries.len - 1; + while (position >= entries.len - less_than_one_count) { + position += (entries.len >> 1) + (entries.len >> 3) + 3; + position &= entries.len - 1; + } + } + std.sort.sort(u16, temp_states[0..probability], {}, std.sort.asc(u16)); + i = 0; + while (i < probability) : (i += 1) { + entries[temp_states[i]] = if (i < double_state_count) Table.Fse{ + .symbol = @intCast(u8, symbol), + .bits = share_size_log + 1, + .baseline = single_state_count * share_size + i * 2 * share_size, + } else Table.Fse{ + .symbol = @intCast(u8, symbol), + .bits = share_size_log, + .baseline = (i - double_state_count) * share_size, + }; + } + } +} + +fn decodeFseTable( + bit_reader: anytype, + expected_symbol_count: usize, + max_accuracy_log: u4, + entries: []Table.Fse, +) !usize { + log.debug("decoding fse table {d} {d}", .{ max_accuracy_log, expected_symbol_count }); + + const accuracy_log_biased = try bit_reader.readBitsNoEof(u4, 4); + log.debug("accuracy_log_biased = {d}", .{accuracy_log_biased}); + if (accuracy_log_biased > max_accuracy_log -| 5) return error.MalformedAccuracyLog; + const accuracy_log = accuracy_log_biased + 5; + + var values: [256]u16 = undefined; + var value_count: usize = 0; + + const total_probability = @as(u16, 1) << accuracy_log; + log.debug("total probability = {d}", .{total_probability}); + var accumulated_probability: u16 = 0; + + while (accumulated_probability < total_probability) { + // WARNING: The RFC in poorly worded, and would suggest std.math.log2_int_ceil is correct here, + // but power of two (remaining probabilities + 1) need max bits set to 1 more. + const max_bits = @intCast(u4, std.math.log2_int(u16, total_probability - accumulated_probability + 1)) + 1; + const small = try bit_reader.readBitsNoEof(u16, max_bits - 1); + + const cutoff = (@as(u16, 1) << max_bits) - 1 - (total_probability - accumulated_probability + 1); + + const value = if (small < cutoff) + small + else value: { + const value_read = small + (try bit_reader.readBitsNoEof(u16, 1) << (max_bits - 1)); + break :value if (value_read < @as(u16, 1) << (max_bits - 1)) + value_read + else + value_read - cutoff; + }; + + accumulated_probability += if (value != 0) value - 1 else 1; + + values[value_count] = value; + value_count += 1; + + if (value == 1) { + while (true) { + const repeat_flag = try bit_reader.readBitsNoEof(u2, 2); + var i: usize = 0; + while (i < repeat_flag) : (i += 1) { + values[value_count] = 1; + value_count += 1; + } + if (repeat_flag < 3) break; + } + } + } + bit_reader.alignToByte(); + + // TODO: check there are at least 2 non-zero probabilities + + if (accumulated_probability != total_probability) return error.MalformedFseTable; + if (value_count > expected_symbol_count) return error.MalformedFseTable; + + const table_size = total_probability; + + try buildFseTable(values[0..value_count], entries[0..table_size]); + return table_size; +} + +const ReversedByteReader = struct { + remaining_bytes: usize, + bytes: []const u8, + + const Reader = std.io.Reader(*ReversedByteReader, error{}, readFn); + + fn reader(self: *ReversedByteReader) Reader { + return .{ .context = self }; + } +}; + +fn readFn(ctx: *ReversedByteReader, buffer: []u8) !usize { + if (ctx.remaining_bytes == 0) return 0; + const byte_index = ctx.remaining_bytes - 1; + buffer[0] = ctx.bytes[byte_index]; + // buffer[0] = @bitReverse(ctx.bytes[byte_index]); + ctx.remaining_bytes = byte_index; + return 1; +} + +fn reversedByteReader(bytes: []const u8) ReversedByteReader { + return ReversedByteReader{ + .remaining_bytes = bytes.len, + .bytes = bytes, + }; +} + +fn ReverseBitReader(comptime Reader: type) type { + return struct { + underlying: std.io.BitReader(.Big, Reader), + + fn readBitsNoEof(self: *@This(), comptime U: type, num_bits: usize) !U { + return self.underlying.readBitsNoEof(U, num_bits); + } + + fn readBits(self: *@This(), comptime U: type, num_bits: usize, out_bits: *usize) !U { + return try self.underlying.readBits(U, num_bits, out_bits); + } + + fn alignToByte(self: *@This()) void { + self.underlying.alignToByte(); + } + }; +} + +fn reverseBitReader(reader: anytype) ReverseBitReader(@TypeOf(reader)) { + return .{ .underlying = std.io.bitReader(.Big, reader) }; +} + +fn BitReader(comptime Reader: type) type { + return struct { + underlying: std.io.BitReader(.Little, Reader), + + fn readBitsNoEof(self: *@This(), comptime U: type, num_bits: usize) !U { + return self.underlying.readBitsNoEof(U, num_bits); + } + + fn readBits(self: *@This(), comptime U: type, num_bits: usize, out_bits: *usize) !U { + return self.underlying.readBits(U, num_bits, out_bits); + } + + fn alignToByte(self: *@This()) void { + self.underlying.alignToByte(); + } + }; +} + +fn bitReader(reader: anytype) BitReader(@TypeOf(reader)) { + return .{ .underlying = std.io.bitReader(.Little, reader) }; +} + +test { + std.testing.refAllDecls(@This()); +} + +test buildFseTable { + const literals_length_default_values = [36]u16{ + 5, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 3, 2, 2, 2, 2, 2, + 0, 0, 0, 0, + }; + + const match_lengths_default_values = [53]u16{ + 2, 5, 4, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, + 0, 0, 0, 0, 0, + }; + + const offset_codes_default_values = [29]u16{ + 2, 2, 2, 2, 2, 2, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, + }; + + var entries: [64]Table.Fse = undefined; + try buildFseTable(&literals_length_default_values, &entries); + try std.testing.expectEqualSlices(Table.Fse, types.compressed_block.predefined_literal_fse_table.fse, &entries); + + try buildFseTable(&match_lengths_default_values, &entries); + try std.testing.expectEqualSlices(Table.Fse, types.compressed_block.predefined_match_fse_table.fse, &entries); + + try buildFseTable(&offset_codes_default_values, entries[0..32]); + try std.testing.expectEqualSlices(Table.Fse, types.compressed_block.predefined_offset_fse_table.fse, entries[0..32]); +} + +fn dumpFseTable(prefix: []const u8, table: []const Table.Fse) void { + log.debug("{s} fse table:", .{prefix}); + for (table) |entry, i| { + log.debug("state = {d} symbol = {d} bl = {d}, bits = {d}", .{ i, entry.symbol, entry.baseline, entry.bits }); + } +} + +fn dumpHuffmanTree(tree: Literals.HuffmanTree) void { + log.debug("Huffman tree: max bit count = {}, symbol count = {}", .{ tree.max_bit_count, tree.symbol_count_minus_one + 1 }); + for (tree.nodes[0 .. tree.symbol_count_minus_one + 1]) |node| { + log.debug("symbol = {[symbol]d}, prefix = {[prefix]d}, weight = {[weight]d}", node); + } +} diff --git a/lib/std/compress/zstandard/types.zig b/lib/std/compress/zstandard/types.zig new file mode 100644 index 0000000000..edac66f686 --- /dev/null +++ b/lib/std/compress/zstandard/types.zig @@ -0,0 +1,394 @@ +pub const frame = struct { + pub const Kind = enum { zstandard, skippable }; + + pub const ZStandard = struct { + pub const magic_number = 0xFD2FB528; + + header: Header, + data_blocks: []Block, + checksum: ?u32, + + pub const Header = struct { + descriptor: Descriptor, + window_descriptor: ?u8, + dictionary_id: ?u32, + content_size: ?u64, + + pub const Descriptor = packed struct { + dictionary_id_flag: u2, + content_checksum_flag: bool, + reserved: bool, + unused: bool, + single_segment_flag: bool, + content_size_flag: u2, + }; + }; + + pub const Block = struct { + pub const Header = struct { + last_block: bool, + block_type: Block.Type, + block_size: u21, + }; + + pub const Type = enum(u2) { + raw, + rle, + compressed, + reserved, + }; + }; + }; + + pub const Skippable = struct { + pub const magic_number_min = 0x184D2A50; + pub const magic_number_max = 0x184D2A5F; + + pub const Header = struct { + magic_number: u32, + frame_size: u32, + }; + }; +}; + +pub const compressed_block = struct { + pub const Literals = struct { + header: Header, + huffman_tree: ?HuffmanTree, + streams: Streams, + + pub const Streams = union(enum) { + one: []const u8, + four: [4][]const u8, + }; + + pub const Header = struct { + block_type: BlockType, + size_format: u2, + regenerated_size: u20, + compressed_size: ?u18, + }; + + pub const BlockType = enum(u2) { + raw, + rle, + compressed, + treeless, + }; + + pub const HuffmanTree = struct { + max_bit_count: u4, + symbol_count_minus_one: u8, + nodes: [256]PrefixedSymbol, + + pub const PrefixedSymbol = struct { + symbol: u8, + prefix: u16, + weight: u4, + }; + + pub const Result = union(enum) { + symbol: u8, + index: usize, + }; + + pub fn query(self: HuffmanTree, index: usize, prefix: u16) !Result { + var node = self.nodes[index]; + const weight = node.weight; + var i: usize = index; + while (node.weight == weight) { + if (node.prefix == prefix) return Result{ .symbol = node.symbol }; + if (i == 0) return error.PrefixNotFound; + i -= 1; + node = self.nodes[i]; + } + return Result{ .index = i }; + } + + pub fn weightToBitCount(weight: u4, max_bit_count: u4) u4 { + return if (weight == 0) 0 else ((max_bit_count + 1) - weight); + } + }; + + pub const StreamCount = enum { one, four }; + pub fn streamCount(size_format: u2, block_type: BlockType) StreamCount { + return switch (block_type) { + .raw, .rle => .one, + .compressed, .treeless => if (size_format == 0) .one else .four, + }; + } + }; + + pub const Sequences = struct { + header: Sequences.Header, + literals_length_table: Table, + offset_table: Table, + match_length_table: Table, + + pub const Header = struct { + sequence_count: u24, + match_lengths: Mode, + offsets: Mode, + literal_lengths: Mode, + + pub const Mode = enum(u2) { + predefined, + rle, + fse, + repeat, + }; + }; + }; + + pub const Table = union(enum) { + fse: []const Fse, + rle: u8, + + pub const Fse = struct { + symbol: u8, + baseline: u16, + bits: u8, + }; + }; + + pub const literals_length_code_table = [36]struct { u32, u5 }{ + .{ 0, 0 }, .{ 1, 0 }, .{ 2, 0 }, .{ 3, 0 }, + .{ 4, 0 }, .{ 5, 0 }, .{ 6, 0 }, .{ 7, 0 }, + .{ 8, 0 }, .{ 9, 0 }, .{ 10, 0 }, .{ 11, 0 }, + .{ 12, 0 }, .{ 13, 0 }, .{ 14, 0 }, .{ 15, 0 }, + .{ 16, 1 }, .{ 18, 1 }, .{ 20, 1 }, .{ 22, 1 }, + .{ 24, 2 }, .{ 28, 2 }, .{ 32, 3 }, .{ 40, 3 }, + .{ 48, 4 }, .{ 64, 6 }, .{ 128, 7 }, .{ 256, 8 }, + .{ 512, 9 }, .{ 1024, 10 }, .{ 2048, 11 }, .{ 4096, 12 }, + .{ 8192, 13 }, .{ 16384, 14 }, .{ 32768, 15 }, .{ 65536, 16 }, + }; + + pub const match_length_code_table = [53]struct { u32, u5 }{ + .{ 3, 0 }, .{ 4, 0 }, .{ 5, 0 }, .{ 6, 0 }, .{ 7, 0 }, .{ 8, 0 }, .{ 9, 0 }, .{ 10, 0 }, + .{ 11, 0 }, .{ 12, 0 }, .{ 13, 0 }, .{ 14, 0 }, .{ 15, 0 }, .{ 16, 0 }, .{ 17, 0 }, .{ 18, 0 }, + .{ 19, 0 }, .{ 20, 0 }, .{ 21, 0 }, .{ 22, 0 }, .{ 23, 0 }, .{ 24, 0 }, .{ 25, 0 }, .{ 26, 0 }, + .{ 27, 0 }, .{ 28, 0 }, .{ 29, 0 }, .{ 30, 0 }, .{ 31, 0 }, .{ 32, 0 }, .{ 33, 0 }, .{ 34, 0 }, + .{ 35, 1 }, .{ 37, 1 }, .{ 39, 1 }, .{ 41, 1 }, .{ 43, 2 }, .{ 47, 2 }, .{ 51, 3 }, .{ 59, 3 }, + .{ 67, 4 }, .{ 83, 4 }, .{ 99, 5 }, .{ 131, 7 }, .{ 259, 8 }, .{ 515, 9 }, .{ 1027, 10 }, .{ 2051, 11 }, + .{ 4099, 12 }, .{ 8195, 13 }, .{ 16387, 14 }, .{ 32771, 15 }, .{ 65539, 16 }, + }; + + pub const literals_length_default_distribution = [36]i16{ + 4, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 1, 1, 1, 1, 1, + -1, -1, -1, -1, + }; + + pub const match_lengths_default_distribution = [53]i16{ + 1, 4, 3, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, + -1, -1, -1, -1, -1, + }; + + pub const offset_codes_default_distribution = [29]i16{ + 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, + }; + + pub const predefined_literal_fse_table = Table{ + .fse = &[64]Table.Fse{ + .{ .symbol = 0, .bits = 4, .baseline = 0 }, + .{ .symbol = 0, .bits = 4, .baseline = 16 }, + .{ .symbol = 1, .bits = 5, .baseline = 32 }, + .{ .symbol = 3, .bits = 5, .baseline = 0 }, + .{ .symbol = 4, .bits = 5, .baseline = 0 }, + .{ .symbol = 6, .bits = 5, .baseline = 0 }, + .{ .symbol = 7, .bits = 5, .baseline = 0 }, + .{ .symbol = 9, .bits = 5, .baseline = 0 }, + .{ .symbol = 10, .bits = 5, .baseline = 0 }, + .{ .symbol = 12, .bits = 5, .baseline = 0 }, + .{ .symbol = 14, .bits = 6, .baseline = 0 }, + .{ .symbol = 16, .bits = 5, .baseline = 0 }, + .{ .symbol = 18, .bits = 5, .baseline = 0 }, + .{ .symbol = 19, .bits = 5, .baseline = 0 }, + .{ .symbol = 21, .bits = 5, .baseline = 0 }, + .{ .symbol = 22, .bits = 5, .baseline = 0 }, + .{ .symbol = 24, .bits = 5, .baseline = 0 }, + .{ .symbol = 25, .bits = 5, .baseline = 32 }, + .{ .symbol = 26, .bits = 5, .baseline = 0 }, + .{ .symbol = 27, .bits = 6, .baseline = 0 }, + .{ .symbol = 29, .bits = 6, .baseline = 0 }, + .{ .symbol = 31, .bits = 6, .baseline = 0 }, + .{ .symbol = 0, .bits = 4, .baseline = 32 }, + .{ .symbol = 1, .bits = 4, .baseline = 0 }, + .{ .symbol = 2, .bits = 5, .baseline = 0 }, + .{ .symbol = 4, .bits = 5, .baseline = 32 }, + .{ .symbol = 5, .bits = 5, .baseline = 0 }, + .{ .symbol = 7, .bits = 5, .baseline = 32 }, + .{ .symbol = 8, .bits = 5, .baseline = 0 }, + .{ .symbol = 10, .bits = 5, .baseline = 32 }, + .{ .symbol = 11, .bits = 5, .baseline = 0 }, + .{ .symbol = 13, .bits = 6, .baseline = 0 }, + .{ .symbol = 16, .bits = 5, .baseline = 32 }, + .{ .symbol = 17, .bits = 5, .baseline = 0 }, + .{ .symbol = 19, .bits = 5, .baseline = 32 }, + .{ .symbol = 20, .bits = 5, .baseline = 0 }, + .{ .symbol = 22, .bits = 5, .baseline = 32 }, + .{ .symbol = 23, .bits = 5, .baseline = 0 }, + .{ .symbol = 25, .bits = 4, .baseline = 0 }, + .{ .symbol = 25, .bits = 4, .baseline = 16 }, + .{ .symbol = 26, .bits = 5, .baseline = 32 }, + .{ .symbol = 28, .bits = 6, .baseline = 0 }, + .{ .symbol = 30, .bits = 6, .baseline = 0 }, + .{ .symbol = 0, .bits = 4, .baseline = 48 }, + .{ .symbol = 1, .bits = 4, .baseline = 16 }, + .{ .symbol = 2, .bits = 5, .baseline = 32 }, + .{ .symbol = 3, .bits = 5, .baseline = 32 }, + .{ .symbol = 5, .bits = 5, .baseline = 32 }, + .{ .symbol = 6, .bits = 5, .baseline = 32 }, + .{ .symbol = 8, .bits = 5, .baseline = 32 }, + .{ .symbol = 9, .bits = 5, .baseline = 32 }, + .{ .symbol = 11, .bits = 5, .baseline = 32 }, + .{ .symbol = 12, .bits = 5, .baseline = 32 }, + .{ .symbol = 15, .bits = 6, .baseline = 0 }, + .{ .symbol = 17, .bits = 5, .baseline = 32 }, + .{ .symbol = 18, .bits = 5, .baseline = 32 }, + .{ .symbol = 20, .bits = 5, .baseline = 32 }, + .{ .symbol = 21, .bits = 5, .baseline = 32 }, + .{ .symbol = 23, .bits = 5, .baseline = 32 }, + .{ .symbol = 24, .bits = 5, .baseline = 32 }, + .{ .symbol = 35, .bits = 6, .baseline = 0 }, + .{ .symbol = 34, .bits = 6, .baseline = 0 }, + .{ .symbol = 33, .bits = 6, .baseline = 0 }, + .{ .symbol = 32, .bits = 6, .baseline = 0 }, + }, + }; + + pub const predefined_match_fse_table = Table{ + .fse = &[64]Table.Fse{ + .{ .symbol = 0, .bits = 6, .baseline = 0 }, + .{ .symbol = 1, .bits = 4, .baseline = 0 }, + .{ .symbol = 2, .bits = 5, .baseline = 32 }, + .{ .symbol = 3, .bits = 5, .baseline = 0 }, + .{ .symbol = 5, .bits = 5, .baseline = 0 }, + .{ .symbol = 6, .bits = 5, .baseline = 0 }, + .{ .symbol = 8, .bits = 5, .baseline = 0 }, + .{ .symbol = 10, .bits = 6, .baseline = 0 }, + .{ .symbol = 13, .bits = 6, .baseline = 0 }, + .{ .symbol = 16, .bits = 6, .baseline = 0 }, + .{ .symbol = 19, .bits = 6, .baseline = 0 }, + .{ .symbol = 22, .bits = 6, .baseline = 0 }, + .{ .symbol = 25, .bits = 6, .baseline = 0 }, + .{ .symbol = 28, .bits = 6, .baseline = 0 }, + .{ .symbol = 31, .bits = 6, .baseline = 0 }, + .{ .symbol = 33, .bits = 6, .baseline = 0 }, + .{ .symbol = 35, .bits = 6, .baseline = 0 }, + .{ .symbol = 37, .bits = 6, .baseline = 0 }, + .{ .symbol = 39, .bits = 6, .baseline = 0 }, + .{ .symbol = 41, .bits = 6, .baseline = 0 }, + .{ .symbol = 43, .bits = 6, .baseline = 0 }, + .{ .symbol = 45, .bits = 6, .baseline = 0 }, + .{ .symbol = 1, .bits = 4, .baseline = 16 }, + .{ .symbol = 2, .bits = 4, .baseline = 0 }, + .{ .symbol = 3, .bits = 5, .baseline = 32 }, + .{ .symbol = 4, .bits = 5, .baseline = 0 }, + .{ .symbol = 6, .bits = 5, .baseline = 32 }, + .{ .symbol = 7, .bits = 5, .baseline = 0 }, + .{ .symbol = 9, .bits = 6, .baseline = 0 }, + .{ .symbol = 12, .bits = 6, .baseline = 0 }, + .{ .symbol = 15, .bits = 6, .baseline = 0 }, + .{ .symbol = 18, .bits = 6, .baseline = 0 }, + .{ .symbol = 21, .bits = 6, .baseline = 0 }, + .{ .symbol = 24, .bits = 6, .baseline = 0 }, + .{ .symbol = 27, .bits = 6, .baseline = 0 }, + .{ .symbol = 30, .bits = 6, .baseline = 0 }, + .{ .symbol = 32, .bits = 6, .baseline = 0 }, + .{ .symbol = 34, .bits = 6, .baseline = 0 }, + .{ .symbol = 36, .bits = 6, .baseline = 0 }, + .{ .symbol = 38, .bits = 6, .baseline = 0 }, + .{ .symbol = 40, .bits = 6, .baseline = 0 }, + .{ .symbol = 42, .bits = 6, .baseline = 0 }, + .{ .symbol = 44, .bits = 6, .baseline = 0 }, + .{ .symbol = 1, .bits = 4, .baseline = 32 }, + .{ .symbol = 1, .bits = 4, .baseline = 48 }, + .{ .symbol = 2, .bits = 4, .baseline = 16 }, + .{ .symbol = 4, .bits = 5, .baseline = 32 }, + .{ .symbol = 5, .bits = 5, .baseline = 32 }, + .{ .symbol = 7, .bits = 5, .baseline = 32 }, + .{ .symbol = 8, .bits = 5, .baseline = 32 }, + .{ .symbol = 11, .bits = 6, .baseline = 0 }, + .{ .symbol = 14, .bits = 6, .baseline = 0 }, + .{ .symbol = 17, .bits = 6, .baseline = 0 }, + .{ .symbol = 20, .bits = 6, .baseline = 0 }, + .{ .symbol = 23, .bits = 6, .baseline = 0 }, + .{ .symbol = 26, .bits = 6, .baseline = 0 }, + .{ .symbol = 29, .bits = 6, .baseline = 0 }, + .{ .symbol = 52, .bits = 6, .baseline = 0 }, + .{ .symbol = 51, .bits = 6, .baseline = 0 }, + .{ .symbol = 50, .bits = 6, .baseline = 0 }, + .{ .symbol = 49, .bits = 6, .baseline = 0 }, + .{ .symbol = 48, .bits = 6, .baseline = 0 }, + .{ .symbol = 47, .bits = 6, .baseline = 0 }, + .{ .symbol = 46, .bits = 6, .baseline = 0 }, + }, + }; + + pub const predefined_offset_fse_table = Table{ + .fse = &[32]Table.Fse{ + .{ .symbol = 0, .bits = 5, .baseline = 0 }, + .{ .symbol = 6, .bits = 4, .baseline = 0 }, + .{ .symbol = 9, .bits = 5, .baseline = 0 }, + .{ .symbol = 15, .bits = 5, .baseline = 0 }, + .{ .symbol = 21, .bits = 5, .baseline = 0 }, + .{ .symbol = 3, .bits = 5, .baseline = 0 }, + .{ .symbol = 7, .bits = 4, .baseline = 0 }, + .{ .symbol = 12, .bits = 5, .baseline = 0 }, + .{ .symbol = 18, .bits = 5, .baseline = 0 }, + .{ .symbol = 23, .bits = 5, .baseline = 0 }, + .{ .symbol = 5, .bits = 5, .baseline = 0 }, + .{ .symbol = 8, .bits = 4, .baseline = 0 }, + .{ .symbol = 14, .bits = 5, .baseline = 0 }, + .{ .symbol = 20, .bits = 5, .baseline = 0 }, + .{ .symbol = 2, .bits = 5, .baseline = 0 }, + .{ .symbol = 7, .bits = 4, .baseline = 16 }, + .{ .symbol = 11, .bits = 5, .baseline = 0 }, + .{ .symbol = 17, .bits = 5, .baseline = 0 }, + .{ .symbol = 22, .bits = 5, .baseline = 0 }, + .{ .symbol = 4, .bits = 5, .baseline = 0 }, + .{ .symbol = 8, .bits = 4, .baseline = 16 }, + .{ .symbol = 13, .bits = 5, .baseline = 0 }, + .{ .symbol = 19, .bits = 5, .baseline = 0 }, + .{ .symbol = 1, .bits = 5, .baseline = 0 }, + .{ .symbol = 6, .bits = 4, .baseline = 16 }, + .{ .symbol = 10, .bits = 5, .baseline = 0 }, + .{ .symbol = 16, .bits = 5, .baseline = 0 }, + .{ .symbol = 28, .bits = 5, .baseline = 0 }, + .{ .symbol = 27, .bits = 5, .baseline = 0 }, + .{ .symbol = 26, .bits = 5, .baseline = 0 }, + .{ .symbol = 25, .bits = 5, .baseline = 0 }, + .{ .symbol = 24, .bits = 5, .baseline = 0 }, + }, + }; + pub const start_repeated_offset_1 = 1; + pub const start_repeated_offset_2 = 4; + pub const start_repeated_offset_3 = 8; + + pub const table_accuracy_log_max = struct { + pub const literal = 9; + pub const match = 9; + pub const offset = 8; + }; + + pub const table_symbol_count_max = struct { + pub const literal = 36; + pub const match = 53; + pub const offset = 32; + }; + + pub const default_accuracy_log = struct { + pub const literal = 6; + pub const match = 6; + pub const offset = 5; + }; +}; + +test { + const testing = @import("std").testing; + testing.refAllDeclsRecursive(@This()); +}