tar: improve diagnostic reporting

Using Python testtar file (mentioned in #14310) to test diagnostic reporting. Added computing checksum by using both unsigned and signed header bytes values. Added skipping gnu exteneded sparse headers while reporting unsupported header in diagnostic. Note on testing: wget https://github.com/python/cpython/raw/3.11/Lib/test/testtar.tar -O /tmp/testtar.tar ``` test "Python testtar.tar file" { const file_name = "testtar.tar"; var file = try std.fs.cwd().openFile("/tmp/" ++ file_name, .{}); defer file.close(); var diag = Options.Diagnostics{ .allocator = std.testing.allocator }; defer diag.deinit(); var iter = iterator(file.reader(), &diag); while (try iter.next()) |f| { std.debug.print("supported: {} {s} {d}\n", .{ f.kind, f.name, f.size }); try f.skip(); } for (diag.errors.items) |e| { switch (e) { .unsupported_file_type => |u| { std.debug.print("unsupported: {} {s}\n", .{ u.file_type, u.file_name }); }, else => unreachable, } } } ```
2025-12-06 13:54:21 +00:00 · 2023-12-18 21:39:07 +01:00 · 2023-12-18 21:39:07 +01:00 · 7d3a31872e
commit 7d3a31872e
parent 7923a53996
1 changed files with 31 additions and 9 deletions
--- a/lib/std/tar.zig
+++ b/lib/std/tar.zig
@ -105,6 +105,8 @@ pub const Header = struct {
        // used to store the path or link name for the next file.
        gnu_long_name = 'L',
        gnu_long_link = 'K',
+        gnu_sparse = 'S',
+        solaris_extended_header = 'X',
        _,
    };

@ -194,16 +196,21 @@ pub const Header = struct {
        return std.fmt.parseInt(u64, rtrimmed, 8) catch return error.TarHeader;
    }

+    const Chksums = struct {
+        unsigned: u64,
+        signed: i64,
+    };
+
    // Sum of all bytes in the header block. The chksum field is treated as if
    // it were filled with spaces (ASCII 32).
-    fn computeChksum(header: Header) u64 {
-        var sum: u64 = 0;
-        for (header.bytes, 0..) |b, i| {
-            if (148 <= i and i < 156) continue; // skip chksum field bytes
-            sum += b;
+    fn computeChksum(header: Header) Chksums {
+        var cs: Chksums = .{ .signed = 0, .unsigned = 0 };
+        for (header.bytes, 0..) |v, i| {
+            const b = if (148 <= i and i < 156) 32 else v; // Treating chksum bytes as spaces.
+            cs.unsigned += b;
+            cs.signed += @as(i8, @bitCast(b));
        }
-        // Treating chksum bytes as spaces. 256 = 8 * 32, 8 spaces.
-        return if (sum > 0) sum + 256 else 0;
+        return cs;
    }

    // Checks calculated chksum with value of chksum field.
@ -211,8 +218,9 @@ pub const Header = struct {
    // Zero value indicates empty block.
    pub fn checkChksum(header: Header) !u64 {
        const field = try header.chksum();
-        const computed = header.computeChksum();
-        if (field != computed) return error.TarHeaderChksum;
+        const cs = header.computeChksum();
+        if (field == 0 and cs.unsigned == 256) return 0;
+        if (field != cs.unsigned and field != cs.signed) return error.TarHeaderChksum;
        return field;
    }
 };
@ -387,11 +395,25 @@ fn Iterator(comptime ReaderType: type) type {
                            .file_name = try d.allocator.dupe(u8, header.name()),
                            .file_type = kind,
                        } });
+                        if (kind == .gnu_sparse) {
+                            try self.skipGnuSparseExtendedHeaders(header);
+                        }
+                        self.reader.skipBytes(size, .{}) catch return error.TarHeadersTooBig;
                    },
                }
            }
            return null;
        }
+
+        fn skipGnuSparseExtendedHeaders(self: *Self, header: Header) !void {
+            var is_extended = header.bytes[482] > 0;
+            while (is_extended) {
+                var buf: [Header.SIZE]u8 = undefined;
+                const n = try self.reader.readAll(&buf);
+                if (n < Header.SIZE) return error.UnexpectedEndOfStream;
+                is_extended = buf[504] > 0;
+            }
+        }
    };
 }