Merge pull request #22087 from ziglang/std.ArrayHashMap

std.ArrayHashMap: add `reinit` method and other housekeeping, including the move towards "unmanaged" containers
This commit is contained in:
Andrew Kelley 2024-11-28 14:07:30 -05:00 committed by GitHub
commit 182cdf74bf
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -53,6 +53,14 @@ pub fn hashString(s: []const u8) u32 {
return @as(u32, @truncate(std.hash.Wyhash.hash(0, s)));
}
/// Deprecated in favor of `ArrayHashMapWithAllocator` (no code changes needed)
/// or `ArrayHashMapUnmanaged` (will need to update callsites to pass an
/// allocator). After Zig 0.14.0 is released, `ArrayHashMapWithAllocator` will
/// be removed and `ArrayHashMapUnmanaged` will be a deprecated alias. After
/// Zig 0.15.0 is released, the deprecated alias `ArrayHashMapUnmanaged` will
/// be removed.
pub const ArrayHashMap = ArrayHashMapWithAllocator;
/// A hash table of keys and values, each stored sequentially.
///
/// Insertion order is preserved. In general, this data structure supports the same
@ -67,7 +75,7 @@ pub fn hashString(s: []const u8) u32 {
///
/// See `ArrayHashMapUnmanaged` for a variant of this data structure that accepts an
/// `Allocator` as a parameter when needed rather than storing it.
pub fn ArrayHashMap(
pub fn ArrayHashMapWithAllocator(
comptime K: type,
comptime V: type,
/// A namespace that provides these two functions:
@ -604,42 +612,48 @@ pub fn ArrayHashMapUnmanaged(
ordered,
};
const Oom = Allocator.Error;
/// Convert from an unmanaged map to a managed map. After calling this,
/// the promoted map should no longer be used.
pub fn promote(self: Self, allocator: Allocator) Managed {
pub fn promote(self: Self, gpa: Allocator) Managed {
if (@sizeOf(Context) != 0)
@compileError("Cannot infer context " ++ @typeName(Context) ++ ", call promoteContext instead.");
return self.promoteContext(allocator, undefined);
return self.promoteContext(gpa, undefined);
}
pub fn promoteContext(self: Self, allocator: Allocator, ctx: Context) Managed {
pub fn promoteContext(self: Self, gpa: Allocator, ctx: Context) Managed {
return .{
.unmanaged = self,
.allocator = allocator,
.allocator = gpa,
.ctx = ctx,
};
}
pub fn init(allocator: Allocator, key_list: []const K, value_list: []const V) !Self {
pub fn init(gpa: Allocator, key_list: []const K, value_list: []const V) Oom!Self {
var self: Self = .{};
try self.entries.resize(allocator, key_list.len);
errdefer self.entries.deinit(allocator);
errdefer self.deinit(gpa);
try self.reinit(gpa, key_list, value_list);
return self;
}
pub fn reinit(self: *Self, gpa: Allocator, key_list: []const K, value_list: []const V) Oom!void {
try self.entries.resize(gpa, key_list.len);
@memcpy(self.keys(), key_list);
if (@sizeOf(V) != 0) {
assert(key_list.len == value_list.len);
@memcpy(self.values(), value_list);
}
try self.reIndex(allocator);
return self;
try self.reIndex(gpa);
}
/// Frees the backing allocation and leaves the map in an undefined state.
/// Note that this does not free keys or values. You must take care of that
/// before calling this function, if it is needed.
pub fn deinit(self: *Self, allocator: Allocator) void {
pub fn deinit(self: *Self, gpa: Allocator) void {
self.pointer_stability.assertUnlocked();
self.entries.deinit(allocator);
self.entries.deinit(gpa);
if (self.index_header) |header| {
header.free(allocator);
header.free(gpa);
}
self.* = undefined;
}
@ -677,13 +691,13 @@ pub fn ArrayHashMapUnmanaged(
}
/// Clears the map and releases the backing allocation
pub fn clearAndFree(self: *Self, allocator: Allocator) void {
pub fn clearAndFree(self: *Self, gpa: Allocator) void {
self.pointer_stability.lock();
defer self.pointer_stability.unlock();
self.entries.shrinkAndFree(allocator, 0);
self.entries.shrinkAndFree(gpa, 0);
if (self.index_header) |header| {
header.free(allocator);
header.free(gpa);
self.index_header = null;
}
}
@ -746,25 +760,25 @@ pub fn ArrayHashMapUnmanaged(
/// Otherwise, puts a new item with undefined value, and
/// the `Entry` pointer points to it. Caller should then initialize
/// the value (but not the key).
pub fn getOrPut(self: *Self, allocator: Allocator, key: K) !GetOrPutResult {
pub fn getOrPut(self: *Self, gpa: Allocator, key: K) Oom!GetOrPutResult {
if (@sizeOf(Context) != 0)
@compileError("Cannot infer context " ++ @typeName(Context) ++ ", call getOrPutContext instead.");
return self.getOrPutContext(allocator, key, undefined);
return self.getOrPutContext(gpa, key, undefined);
}
pub fn getOrPutContext(self: *Self, allocator: Allocator, key: K, ctx: Context) !GetOrPutResult {
const gop = try self.getOrPutContextAdapted(allocator, key, ctx, ctx);
pub fn getOrPutContext(self: *Self, gpa: Allocator, key: K, ctx: Context) Oom!GetOrPutResult {
const gop = try self.getOrPutContextAdapted(gpa, key, ctx, ctx);
if (!gop.found_existing) {
gop.key_ptr.* = key;
}
return gop;
}
pub fn getOrPutAdapted(self: *Self, allocator: Allocator, key: anytype, key_ctx: anytype) !GetOrPutResult {
pub fn getOrPutAdapted(self: *Self, gpa: Allocator, key: anytype, key_ctx: anytype) Oom!GetOrPutResult {
if (@sizeOf(Context) != 0)
@compileError("Cannot infer context " ++ @typeName(Context) ++ ", call getOrPutContextAdapted instead.");
return self.getOrPutContextAdapted(allocator, key, key_ctx, undefined);
return self.getOrPutContextAdapted(gpa, key, key_ctx, undefined);
}
pub fn getOrPutContextAdapted(self: *Self, allocator: Allocator, key: anytype, key_ctx: anytype, ctx: Context) !GetOrPutResult {
self.ensureTotalCapacityContext(allocator, self.entries.len + 1, ctx) catch |err| {
pub fn getOrPutContextAdapted(self: *Self, gpa: Allocator, key: anytype, key_ctx: anytype, ctx: Context) Oom!GetOrPutResult {
self.ensureTotalCapacityContext(gpa, self.entries.len + 1, ctx) catch |err| {
// "If key exists this function cannot fail."
const index = self.getIndexAdapted(key, key_ctx) orelse return err;
const slice = self.entries.slice();
@ -844,13 +858,13 @@ pub fn ArrayHashMapUnmanaged(
}
}
pub fn getOrPutValue(self: *Self, allocator: Allocator, key: K, value: V) !GetOrPutResult {
pub fn getOrPutValue(self: *Self, gpa: Allocator, key: K, value: V) Oom!GetOrPutResult {
if (@sizeOf(Context) != 0)
@compileError("Cannot infer context " ++ @typeName(Context) ++ ", call getOrPutValueContext instead.");
return self.getOrPutValueContext(allocator, key, value, undefined);
return self.getOrPutValueContext(gpa, key, value, undefined);
}
pub fn getOrPutValueContext(self: *Self, allocator: Allocator, key: K, value: V, ctx: Context) !GetOrPutResult {
const res = try self.getOrPutContextAdapted(allocator, key, ctx, ctx);
pub fn getOrPutValueContext(self: *Self, gpa: Allocator, key: K, value: V, ctx: Context) Oom!GetOrPutResult {
const res = try self.getOrPutContextAdapted(gpa, key, ctx, ctx);
if (!res.found_existing) {
res.key_ptr.* = key;
res.value_ptr.* = value;
@ -860,32 +874,32 @@ pub fn ArrayHashMapUnmanaged(
/// Increases capacity, guaranteeing that insertions up until the
/// `expected_count` will not cause an allocation, and therefore cannot fail.
pub fn ensureTotalCapacity(self: *Self, allocator: Allocator, new_capacity: usize) !void {
pub fn ensureTotalCapacity(self: *Self, gpa: Allocator, new_capacity: usize) Oom!void {
if (@sizeOf(ByIndexContext) != 0)
@compileError("Cannot infer context " ++ @typeName(Context) ++ ", call ensureTotalCapacityContext instead.");
return self.ensureTotalCapacityContext(allocator, new_capacity, undefined);
return self.ensureTotalCapacityContext(gpa, new_capacity, undefined);
}
pub fn ensureTotalCapacityContext(self: *Self, allocator: Allocator, new_capacity: usize, ctx: Context) !void {
pub fn ensureTotalCapacityContext(self: *Self, gpa: Allocator, new_capacity: usize, ctx: Context) Oom!void {
self.pointer_stability.lock();
defer self.pointer_stability.unlock();
if (new_capacity <= linear_scan_max) {
try self.entries.ensureTotalCapacity(allocator, new_capacity);
try self.entries.ensureTotalCapacity(gpa, new_capacity);
return;
}
if (self.index_header) |header| {
if (new_capacity <= header.capacity()) {
try self.entries.ensureTotalCapacity(allocator, new_capacity);
try self.entries.ensureTotalCapacity(gpa, new_capacity);
return;
}
}
try self.entries.ensureTotalCapacity(allocator, new_capacity);
try self.entries.ensureTotalCapacity(gpa, new_capacity);
const new_bit_index = try IndexHeader.findBitIndex(new_capacity);
const new_header = try IndexHeader.alloc(allocator, new_bit_index);
const new_header = try IndexHeader.alloc(gpa, new_bit_index);
if (self.index_header) |old_header| old_header.free(allocator);
if (self.index_header) |old_header| old_header.free(gpa);
self.insertAllEntriesIntoNewHeader(if (store_hash) {} else ctx, new_header);
self.index_header = new_header;
}
@ -895,20 +909,20 @@ pub fn ArrayHashMapUnmanaged(
/// therefore cannot fail.
pub fn ensureUnusedCapacity(
self: *Self,
allocator: Allocator,
gpa: Allocator,
additional_capacity: usize,
) !void {
) Oom!void {
if (@sizeOf(Context) != 0)
@compileError("Cannot infer context " ++ @typeName(Context) ++ ", call ensureTotalCapacityContext instead.");
return self.ensureUnusedCapacityContext(allocator, additional_capacity, undefined);
return self.ensureUnusedCapacityContext(gpa, additional_capacity, undefined);
}
pub fn ensureUnusedCapacityContext(
self: *Self,
allocator: Allocator,
gpa: Allocator,
additional_capacity: usize,
ctx: Context,
) !void {
return self.ensureTotalCapacityContext(allocator, self.count() + additional_capacity, ctx);
) Oom!void {
return self.ensureTotalCapacityContext(gpa, self.count() + additional_capacity, ctx);
}
/// Returns the number of total elements which may be present before it is
@ -922,25 +936,25 @@ pub fn ArrayHashMapUnmanaged(
/// Clobbers any existing data. To detect if a put would clobber
/// existing data, see `getOrPut`.
pub fn put(self: *Self, allocator: Allocator, key: K, value: V) !void {
pub fn put(self: *Self, gpa: Allocator, key: K, value: V) Oom!void {
if (@sizeOf(Context) != 0)
@compileError("Cannot infer context " ++ @typeName(Context) ++ ", call putContext instead.");
return self.putContext(allocator, key, value, undefined);
return self.putContext(gpa, key, value, undefined);
}
pub fn putContext(self: *Self, allocator: Allocator, key: K, value: V, ctx: Context) !void {
const result = try self.getOrPutContext(allocator, key, ctx);
pub fn putContext(self: *Self, gpa: Allocator, key: K, value: V, ctx: Context) Oom!void {
const result = try self.getOrPutContext(gpa, key, ctx);
result.value_ptr.* = value;
}
/// Inserts a key-value pair into the hash map, asserting that no previous
/// entry with the same key is already present
pub fn putNoClobber(self: *Self, allocator: Allocator, key: K, value: V) !void {
pub fn putNoClobber(self: *Self, gpa: Allocator, key: K, value: V) Oom!void {
if (@sizeOf(Context) != 0)
@compileError("Cannot infer context " ++ @typeName(Context) ++ ", call putNoClobberContext instead.");
return self.putNoClobberContext(allocator, key, value, undefined);
return self.putNoClobberContext(gpa, key, value, undefined);
}
pub fn putNoClobberContext(self: *Self, allocator: Allocator, key: K, value: V, ctx: Context) !void {
const result = try self.getOrPutContext(allocator, key, ctx);
pub fn putNoClobberContext(self: *Self, gpa: Allocator, key: K, value: V, ctx: Context) Oom!void {
const result = try self.getOrPutContext(gpa, key, ctx);
assert(!result.found_existing);
result.value_ptr.* = value;
}
@ -973,13 +987,13 @@ pub fn ArrayHashMapUnmanaged(
}
/// Inserts a new `Entry` into the hash map, returning the previous one, if any.
pub fn fetchPut(self: *Self, allocator: Allocator, key: K, value: V) !?KV {
pub fn fetchPut(self: *Self, gpa: Allocator, key: K, value: V) Oom!?KV {
if (@sizeOf(Context) != 0)
@compileError("Cannot infer context " ++ @typeName(Context) ++ ", call fetchPutContext instead.");
return self.fetchPutContext(allocator, key, value, undefined);
return self.fetchPutContext(gpa, key, value, undefined);
}
pub fn fetchPutContext(self: *Self, allocator: Allocator, key: K, value: V, ctx: Context) !?KV {
const gop = try self.getOrPutContext(allocator, key, ctx);
pub fn fetchPutContext(self: *Self, gpa: Allocator, key: K, value: V, ctx: Context) Oom!?KV {
const gop = try self.getOrPutContext(gpa, key, ctx);
var result: ?KV = null;
if (gop.found_existing) {
result = KV{
@ -1265,20 +1279,20 @@ pub fn ArrayHashMapUnmanaged(
/// Create a copy of the hash map which can be modified separately.
/// The copy uses the same context as this instance, but is allocated
/// with the provided allocator.
pub fn clone(self: Self, allocator: Allocator) !Self {
pub fn clone(self: Self, gpa: Allocator) Oom!Self {
if (@sizeOf(ByIndexContext) != 0)
@compileError("Cannot infer context " ++ @typeName(Context) ++ ", call cloneContext instead.");
return self.cloneContext(allocator, undefined);
return self.cloneContext(gpa, undefined);
}
pub fn cloneContext(self: Self, allocator: Allocator, ctx: Context) !Self {
pub fn cloneContext(self: Self, gpa: Allocator, ctx: Context) Oom!Self {
var other: Self = .{};
other.entries = try self.entries.clone(allocator);
errdefer other.entries.deinit(allocator);
other.entries = try self.entries.clone(gpa);
errdefer other.entries.deinit(gpa);
if (self.index_header) |header| {
// TODO: I'm pretty sure this could be memcpy'd instead of
// doing all this work.
const new_header = try IndexHeader.alloc(allocator, header.bit_index);
const new_header = try IndexHeader.alloc(gpa, header.bit_index);
other.insertAllEntriesIntoNewHeader(if (store_hash) {} else ctx, new_header);
other.index_header = new_header;
}
@ -1304,13 +1318,13 @@ pub fn ArrayHashMapUnmanaged(
/// directly without going through the methods of this map.
///
/// The time complexity of this operation is O(n).
pub fn reIndex(self: *Self, allocator: Allocator) !void {
pub fn reIndex(self: *Self, gpa: Allocator) Oom!void {
if (@sizeOf(ByIndexContext) != 0)
@compileError("Cannot infer context " ++ @typeName(Context) ++ ", call reIndexContext instead.");
return self.reIndexContext(allocator, undefined);
return self.reIndexContext(gpa, undefined);
}
pub fn reIndexContext(self: *Self, allocator: Allocator, ctx: Context) !void {
pub fn reIndexContext(self: *Self, gpa: Allocator, ctx: Context) Oom!void {
// Recompute all hashes.
if (store_hash) {
for (self.keys(), self.entries.items(.hash)) |key, *hash| {
@ -1323,8 +1337,8 @@ pub fn ArrayHashMapUnmanaged(
// We're going to rebuild the index header and replace the existing one (if any). The
// indexes should sized such that they will be at most 60% full.
const bit_index = try IndexHeader.findBitIndex(self.entries.capacity);
const new_header = try IndexHeader.alloc(allocator, bit_index);
if (self.index_header) |header| header.free(allocator);
const new_header = try IndexHeader.alloc(gpa, bit_index);
if (self.index_header) |header| header.free(gpa);
self.insertAllEntriesIntoNewHeader(if (store_hash) {} else ctx, new_header);
self.index_header = new_header;
}
@ -1416,10 +1430,10 @@ pub fn ArrayHashMapUnmanaged(
/// performing hash and equality checks. It is a bug to call this
/// function if the discarded entries require deinitialization. For
/// that use case, `shrinkRetainingCapacity` can be used instead.
pub fn shrinkAndFree(self: *Self, allocator: Allocator, new_len: usize) void {
pub fn shrinkAndFree(self: *Self, gpa: Allocator, new_len: usize) void {
if (@sizeOf(ByIndexContext) != 0)
@compileError("Cannot infer context " ++ @typeName(Context) ++ ", call shrinkAndFreeContext instead.");
return self.shrinkAndFreeContext(allocator, new_len, undefined);
return self.shrinkAndFreeContext(gpa, new_len, undefined);
}
/// Shrinks the underlying `Entry` array to `new_len` elements and
@ -1430,7 +1444,7 @@ pub fn ArrayHashMapUnmanaged(
/// function if the discarded entries require deinitialization. For
/// that use case, `shrinkRetainingCapacityContext` can be used
/// instead.
pub fn shrinkAndFreeContext(self: *Self, allocator: Allocator, new_len: usize, ctx: Context) void {
pub fn shrinkAndFreeContext(self: *Self, gpa: Allocator, new_len: usize, ctx: Context) void {
self.pointer_stability.lock();
defer self.pointer_stability.unlock();
@ -1442,7 +1456,7 @@ pub fn ArrayHashMapUnmanaged(
while (i < self.entries.len) : (i += 1)
self.removeFromIndexByIndex(i, if (store_hash) {} else ctx, header);
}
self.entries.shrinkAndFree(allocator, new_len);
self.entries.shrinkAndFree(gpa, new_len);
}
/// Removes the last inserted `Entry` in the hash map and returns it.
@ -2086,7 +2100,7 @@ const IndexHeader = struct {
return @as(u32, @intCast(self.length() - 1));
}
fn findBitIndex(desired_capacity: usize) !u8 {
fn findBitIndex(desired_capacity: usize) Allocator.Error!u8 {
if (desired_capacity > max_capacity) return error.OutOfMemory;
var new_bit_index = @as(u8, @intCast(std.math.log2_int_ceil(usize, desired_capacity)));
if (desired_capacity > index_capacities[new_bit_index]) new_bit_index += 1;
@ -2097,11 +2111,11 @@ const IndexHeader = struct {
/// Allocates an index header, and fills the entryIndexes array with empty.
/// The distance array contents are undefined.
fn alloc(allocator: Allocator, new_bit_index: u8) !*IndexHeader {
fn alloc(gpa: Allocator, new_bit_index: u8) Allocator.Error!*IndexHeader {
const len = @as(usize, 1) << @as(math.Log2Int(usize), @intCast(new_bit_index));
const index_size = hash_map.capacityIndexSize(new_bit_index);
const nbytes = @sizeOf(IndexHeader) + index_size * len;
const bytes = try allocator.alignedAlloc(u8, @alignOf(IndexHeader), nbytes);
const bytes = try gpa.alignedAlloc(u8, @alignOf(IndexHeader), nbytes);
@memset(bytes[@sizeOf(IndexHeader)..], 0xff);
const result: *IndexHeader = @alignCast(@ptrCast(bytes.ptr));
result.* = .{
@ -2111,11 +2125,11 @@ const IndexHeader = struct {
}
/// Releases the memory for a header and its associated arrays.
fn free(header: *IndexHeader, allocator: Allocator) void {
fn free(header: *IndexHeader, gpa: Allocator) void {
const index_size = hash_map.capacityIndexSize(header.bit_index);
const ptr: [*]align(@alignOf(IndexHeader)) u8 = @ptrCast(header);
const slice = ptr[0 .. @sizeOf(IndexHeader) + header.length() * index_size];
allocator.free(slice);
gpa.free(slice);
}
/// Puts an IndexHeader into the state that it would be in after being freshly allocated.