From bdacf00278c43c2db75feb0a2d9db8e41572e39d Mon Sep 17 00:00:00 2001 From: p-rosit Date: Mon, 8 Sep 2025 17:35:31 +0200 Subject: [PATCH 01/24] remove cache (incompatible with general context) --- lib/std/sort/block.zig | 924 ++++++++++++++++------------------------- 1 file changed, 359 insertions(+), 565 deletions(-) diff --git a/lib/std/sort/block.zig b/lib/std/sort/block.zig index 4c94fb78ad..ce34cd3bc0 100644 --- a/lib/std/sort/block.zig +++ b/lib/std/sort/block.zig @@ -113,7 +113,6 @@ pub fn block( }.lessThan else lessThanFn; // Implementation ported from https://github.com/BonzaiThePenguin/WikiSort/blob/master/WikiSort.c - var cache: [512]T = undefined; if (items.len < 4) { if (items.len == 3) { @@ -217,535 +216,407 @@ pub fn block( // then merge sort the higher levels, which can be 8-15, 16-31, 32-63, 64-127, etc. while (true) { - // if every A and B block will fit into the cache, use a special branch - // specifically for merging with the cache - // (we use < rather than <= since the block size might be one more than - // iterator.length()) - if (iterator.length() < cache.len) { - // if four subarrays fit into the cache, it's faster to merge both - // pairs of subarrays into the cache, - // then merge the two merged subarrays from the cache back into the original array - if ((iterator.length() + 1) * 4 <= cache.len and iterator.length() * 4 <= items.len) { - iterator.begin(); - while (!iterator.finished()) { - // merge A1 and B1 into the cache - var A1 = iterator.nextRange(); - var B1 = iterator.nextRange(); - var A2 = iterator.nextRange(); - var B2 = iterator.nextRange(); + // this is where the in-place merge logic starts! + // 1. pull out two internal buffers each containing √A unique values + // 1a. adjust block_size and buffer_size if we couldn't find enough unique values + // 2. loop over the A and B subarrays within this level of the merge sort + // 3. break A and B into blocks of size 'block_size' + // 4. "tag" each of the A blocks with values from the first internal buffer + // 5. roll the A blocks through the B blocks and drop/rotate them where they belong + // 6. merge each A block with any B values that follow, using the second internal buffer + // 7. sort the second internal buffer if it exists + // 8. redistribute the two internal buffers back into the items + var block_size: usize = math.sqrt(iterator.length()); + var buffer_size = iterator.length() / block_size + 1; - if (lessThan(context, items[B1.end - 1], items[A1.start])) { - // the two ranges are in reverse order, so copy them in reverse order into the cache - const a1_items = items[A1.start..A1.end]; - @memcpy(cache[B1.length()..][0..a1_items.len], a1_items); - const b1_items = items[B1.start..B1.end]; - @memcpy(cache[0..b1_items.len], b1_items); - } else if (lessThan(context, items[B1.start], items[A1.end - 1])) { - // these two ranges weren't already in order, so merge them into the cache - mergeInto(T, items, A1, B1, cache[0..], context, lessThan); - } else { - // if A1, B1, A2, and B2 are all in order, skip doing anything else - if (!lessThan(context, items[B2.start], items[A2.end - 1]) and !lessThan(context, items[A2.start], items[B1.end - 1])) continue; + // as an optimization, we really only need to pull out the internal buffers once for each level of merges + // after that we can reuse the same buffers over and over, then redistribute it when we're finished with this level + var A: Range = undefined; + var B: Range = undefined; + var index: usize = 0; + var last: usize = 0; + var count: usize = 0; + var find: usize = 0; + var start: usize = 0; + var pull_index: usize = 0; + var pull = [_]Pull{ + Pull{ + .from = 0, + .to = 0, + .count = 0, + .range = Range.init(0, 0), + }, + Pull{ + .from = 0, + .to = 0, + .count = 0, + .range = Range.init(0, 0), + }, + }; - // copy A1 and B1 into the cache in the same order - const a1_items = items[A1.start..A1.end]; - @memcpy(cache[0..a1_items.len], a1_items); - const b1_items = items[B1.start..B1.end]; - @memcpy(cache[A1.length()..][0..b1_items.len], b1_items); - } - A1 = Range.init(A1.start, B1.end); + var buffer1 = Range.init(0, 0); + var buffer2 = Range.init(0, 0); - // merge A2 and B2 into the cache - if (lessThan(context, items[B2.end - 1], items[A2.start])) { - // the two ranges are in reverse order, so copy them in reverse order into the cache - const a2_items = items[A2.start..A2.end]; - @memcpy(cache[A1.length() + B2.length() ..][0..a2_items.len], a2_items); - const b2_items = items[B2.start..B2.end]; - @memcpy(cache[A1.length()..][0..b2_items.len], b2_items); - } else if (lessThan(context, items[B2.start], items[A2.end - 1])) { - // these two ranges weren't already in order, so merge them into the cache - mergeInto(T, items, A2, B2, cache[A1.length()..], context, lessThan); - } else { - // copy A2 and B2 into the cache in the same order - const a2_items = items[A2.start..A2.end]; - @memcpy(cache[A1.length()..][0..a2_items.len], a2_items); - const b2_items = items[B2.start..B2.end]; - @memcpy(cache[A1.length() + A2.length() ..][0..b2_items.len], b2_items); - } - A2 = Range.init(A2.start, B2.end); + // find two internal buffers of size 'buffer_size' each + find = buffer_size + buffer_size; + var find_separately = false; - // merge A1 and A2 from the cache into the items - const A3 = Range.init(0, A1.length()); - const B3 = Range.init(A1.length(), A1.length() + A2.length()); + if (find > iterator.length()) { + // we can't fit both buffers into the same A or B subarray, so find two buffers separately + find = buffer_size; + find_separately = true; + } - if (lessThan(context, cache[B3.end - 1], cache[A3.start])) { - // the two ranges are in reverse order, so copy them in reverse order into the items - const a3_items = cache[A3.start..A3.end]; - @memcpy(items[A1.start + A2.length() ..][0..a3_items.len], a3_items); - const b3_items = cache[B3.start..B3.end]; - @memcpy(items[A1.start..][0..b3_items.len], b3_items); - } else if (lessThan(context, cache[B3.start], cache[A3.end - 1])) { - // these two ranges weren't already in order, so merge them back into the items - mergeInto(T, cache[0..], A3, B3, items[A1.start..], context, lessThan); - } else { - // copy A3 and B3 into the items in the same order - const a3_items = cache[A3.start..A3.end]; - @memcpy(items[A1.start..][0..a3_items.len], a3_items); - const b3_items = cache[B3.start..B3.end]; - @memcpy(items[A1.start + A1.length() ..][0..b3_items.len], b3_items); - } - } + // we need to find either a single contiguous space containing 2√A unique values (which will be split up into two buffers of size √A each), + // or we need to find one buffer of < 2√A unique values, and a second buffer of √A unique values, + // OR if we couldn't find that many unique values, we need the largest possible buffer we can get - // we merged two levels at the same time, so we're done with this level already - // (iterator.nextLevel() is called again at the bottom of this outer merge loop) - _ = iterator.nextLevel(); - } else { - iterator.begin(); - while (!iterator.finished()) { - const A = iterator.nextRange(); - const B = iterator.nextRange(); + // in the case where it couldn't find a single buffer of at least √A unique values, + // all of the Merge steps must be replaced by a different merge algorithm (MergeInPlace) + iterator.begin(); + while (!iterator.finished()) { + A = iterator.nextRange(); + B = iterator.nextRange(); - if (lessThan(context, items[B.end - 1], items[A.start])) { - // the two ranges are in reverse order, so a simple rotation should fix it - mem.rotate(T, items[A.start..B.end], A.length()); - } else if (lessThan(context, items[B.start], items[A.end - 1])) { - // these two ranges weren't already in order, so we'll need to merge them! - const a_items = items[A.start..A.end]; - @memcpy(cache[0..a_items.len], a_items); - mergeExternal(T, items, A, B, cache[0..], context, lessThan); - } - } + // just store information about where the values will be pulled from and to, + // as well as how many values there are, to create the two internal buffers + + // check A for the number of unique values we need to fill an internal buffer + // these values will be pulled out to the start of A + last = A.start; + count = 1; + while (count < find) : ({ + last = index; + count += 1; + }) { + index = findLastForward(T, items, items[last], Range.init(last + 1, A.end), find - count, context, lessThan); + if (index == A.end) break; } - } else { - // this is where the in-place merge logic starts! - // 1. pull out two internal buffers each containing √A unique values - // 1a. adjust block_size and buffer_size if we couldn't find enough unique values - // 2. loop over the A and B subarrays within this level of the merge sort - // 3. break A and B into blocks of size 'block_size' - // 4. "tag" each of the A blocks with values from the first internal buffer - // 5. roll the A blocks through the B blocks and drop/rotate them where they belong - // 6. merge each A block with any B values that follow, using the cache or the second internal buffer - // 7. sort the second internal buffer if it exists - // 8. redistribute the two internal buffers back into the items - var block_size: usize = math.sqrt(iterator.length()); - var buffer_size = iterator.length() / block_size + 1; + index = last; - // as an optimization, we really only need to pull out the internal buffers once for each level of merges - // after that we can reuse the same buffers over and over, then redistribute it when we're finished with this level - var A: Range = undefined; - var B: Range = undefined; - var index: usize = 0; - var last: usize = 0; - var count: usize = 0; - var find: usize = 0; - var start: usize = 0; - var pull_index: usize = 0; - var pull = [_]Pull{ - Pull{ - .from = 0, - .to = 0, - .count = 0, - .range = Range.init(0, 0), - }, - Pull{ - .from = 0, - .to = 0, - .count = 0, - .range = Range.init(0, 0), - }, - }; + if (count >= buffer_size) { + // keep track of the range within the items where we'll need to "pull out" these values to create the internal buffer + pull[pull_index] = Pull{ + .range = Range.init(A.start, B.end), + .count = count, + .from = index, + .to = A.start, + }; + pull_index = 1; - var buffer1 = Range.init(0, 0); - var buffer2 = Range.init(0, 0); - - // find two internal buffers of size 'buffer_size' each - find = buffer_size + buffer_size; - var find_separately = false; - - if (block_size <= cache.len) { - // if every A block fits into the cache then we won't need the second internal buffer, - // so we really only need to find 'buffer_size' unique values - find = buffer_size; - } else if (find > iterator.length()) { - // we can't fit both buffers into the same A or B subarray, so find two buffers separately - find = buffer_size; - find_separately = true; - } - - // we need to find either a single contiguous space containing 2√A unique values (which will be split up into two buffers of size √A each), - // or we need to find one buffer of < 2√A unique values, and a second buffer of √A unique values, - // OR if we couldn't find that many unique values, we need the largest possible buffer we can get - - // in the case where it couldn't find a single buffer of at least √A unique values, - // all of the Merge steps must be replaced by a different merge algorithm (MergeInPlace) - iterator.begin(); - while (!iterator.finished()) { - A = iterator.nextRange(); - B = iterator.nextRange(); - - // just store information about where the values will be pulled from and to, - // as well as how many values there are, to create the two internal buffers - - // check A for the number of unique values we need to fill an internal buffer - // these values will be pulled out to the start of A - last = A.start; - count = 1; - while (count < find) : ({ - last = index; - count += 1; - }) { - index = findLastForward(T, items, items[last], Range.init(last + 1, A.end), find - count, context, lessThan); - if (index == A.end) break; - } - index = last; - - if (count >= buffer_size) { - // keep track of the range within the items where we'll need to "pull out" these values to create the internal buffer - pull[pull_index] = Pull{ - .range = Range.init(A.start, B.end), - .count = count, - .from = index, - .to = A.start, - }; - pull_index = 1; - - if (count == buffer_size + buffer_size) { - // we were able to find a single contiguous section containing 2√A unique values, - // so this section can be used to contain both of the internal buffers we'll need - buffer1 = Range.init(A.start, A.start + buffer_size); - buffer2 = Range.init(A.start + buffer_size, A.start + count); - break; - } else if (find == buffer_size + buffer_size) { - // we found a buffer that contains at least √A unique values, but did not contain the full 2√A unique values, - // so we still need to find a second separate buffer of at least √A unique values - buffer1 = Range.init(A.start, A.start + count); - find = buffer_size; - } else if (block_size <= cache.len) { - // we found the first and only internal buffer that we need, so we're done! - buffer1 = Range.init(A.start, A.start + count); - break; - } else if (find_separately) { - // found one buffer, but now find the other one - buffer1 = Range.init(A.start, A.start + count); - find_separately = false; - } else { - // we found a second buffer in an 'A' subarray containing √A unique values, so we're done! - buffer2 = Range.init(A.start, A.start + count); - break; - } - } else if (pull_index == 0 and count > buffer1.length()) { - // keep track of the largest buffer we were able to find + if (count == buffer_size + buffer_size) { + // we were able to find a single contiguous section containing 2√A unique values, + // so this section can be used to contain both of the internal buffers we'll need + buffer1 = Range.init(A.start, A.start + buffer_size); + buffer2 = Range.init(A.start + buffer_size, A.start + count); + break; + } else if (find == buffer_size + buffer_size) { + // we found a buffer that contains at least √A unique values, but did not contain the full 2√A unique values, + // so we still need to find a second separate buffer of at least √A unique values buffer1 = Range.init(A.start, A.start + count); - pull[pull_index] = Pull{ - .range = Range.init(A.start, B.end), - .count = count, - .from = index, - .to = A.start, - }; + find = buffer_size; + } else if (find_separately) { + // found one buffer, but now find the other one + buffer1 = Range.init(A.start, A.start + count); + find_separately = false; + } else { + // we found a second buffer in an 'A' subarray containing √A unique values, so we're done! + buffer2 = Range.init(A.start, A.start + count); + break; } + } else if (pull_index == 0 and count > buffer1.length()) { + // keep track of the largest buffer we were able to find + buffer1 = Range.init(A.start, A.start + count); + pull[pull_index] = Pull{ + .range = Range.init(A.start, B.end), + .count = count, + .from = index, + .to = A.start, + }; + } - // check B for the number of unique values we need to fill an internal buffer - // these values will be pulled out to the end of B - last = B.end - 1; - count = 1; - while (count < find) : ({ - last = index - 1; - count += 1; - }) { - index = findFirstBackward(T, items, items[last], Range.init(B.start, last), find - count, context, lessThan); - if (index == B.start) break; - } - index = last; + // check B for the number of unique values we need to fill an internal buffer + // these values will be pulled out to the end of B + last = B.end - 1; + count = 1; + while (count < find) : ({ + last = index - 1; + count += 1; + }) { + index = findFirstBackward(T, items, items[last], Range.init(B.start, last), find - count, context, lessThan); + if (index == B.start) break; + } + index = last; - if (count >= buffer_size) { - // keep track of the range within the items where we'll need to "pull out" these values to create the internal buffe - pull[pull_index] = Pull{ - .range = Range.init(A.start, B.end), - .count = count, - .from = index, - .to = B.end, - }; - pull_index = 1; + if (count >= buffer_size) { + // keep track of the range within the items where we'll need to "pull out" these values to create the internal buffe + pull[pull_index] = Pull{ + .range = Range.init(A.start, B.end), + .count = count, + .from = index, + .to = B.end, + }; + pull_index = 1; - if (count == buffer_size + buffer_size) { - // we were able to find a single contiguous section containing 2√A unique values, - // so this section can be used to contain both of the internal buffers we'll need - buffer1 = Range.init(B.end - count, B.end - buffer_size); - buffer2 = Range.init(B.end - buffer_size, B.end); - break; - } else if (find == buffer_size + buffer_size) { - // we found a buffer that contains at least √A unique values, but did not contain the full 2√A unique values, - // so we still need to find a second separate buffer of at least √A unique values - buffer1 = Range.init(B.end - count, B.end); - find = buffer_size; - } else if (block_size <= cache.len) { - // we found the first and only internal buffer that we need, so we're done! - buffer1 = Range.init(B.end - count, B.end); - break; - } else if (find_separately) { - // found one buffer, but now find the other one - buffer1 = Range.init(B.end - count, B.end); - find_separately = false; - } else { - // buffer2 will be pulled out from a 'B' subarray, so if the first buffer was pulled out from the corresponding 'A' subarray, - // we need to adjust the end point for that A subarray so it knows to stop redistributing its values before reaching buffer2 - if (pull[0].range.start == A.start) pull[0].range.end -= pull[1].count; - - // we found a second buffer in an 'B' subarray containing √A unique values, so we're done! - buffer2 = Range.init(B.end - count, B.end); - break; - } - } else if (pull_index == 0 and count > buffer1.length()) { - // keep track of the largest buffer we were able to find + if (count == buffer_size + buffer_size) { + // we were able to find a single contiguous section containing 2√A unique values, + // so this section can be used to contain both of the internal buffers we'll need + buffer1 = Range.init(B.end - count, B.end - buffer_size); + buffer2 = Range.init(B.end - buffer_size, B.end); + break; + } else if (find == buffer_size + buffer_size) { + // we found a buffer that contains at least √A unique values, but did not contain the full 2√A unique values, + // so we still need to find a second separate buffer of at least √A unique values buffer1 = Range.init(B.end - count, B.end); - pull[pull_index] = Pull{ - .range = Range.init(A.start, B.end), - .count = count, - .from = index, - .to = B.end, - }; + find = buffer_size; + } else if (find_separately) { + // found one buffer, but now find the other one + buffer1 = Range.init(B.end - count, B.end); + find_separately = false; + } else { + // buffer2 will be pulled out from a 'B' subarray, so if the first buffer was pulled out from the corresponding 'A' subarray, + // we need to adjust the end point for that A subarray so it knows to stop redistributing its values before reaching buffer2 + if (pull[0].range.start == A.start) pull[0].range.end -= pull[1].count; + + // we found a second buffer in an 'B' subarray containing √A unique values, so we're done! + buffer2 = Range.init(B.end - count, B.end); + break; + } + } else if (pull_index == 0 and count > buffer1.length()) { + // keep track of the largest buffer we were able to find + buffer1 = Range.init(B.end - count, B.end); + pull[pull_index] = Pull{ + .range = Range.init(A.start, B.end), + .count = count, + .from = index, + .to = B.end, + }; + } + } + + // pull out the two ranges so we can use them as internal buffers + pull_index = 0; + while (pull_index < 2) : (pull_index += 1) { + const length = pull[pull_index].count; + + if (pull[pull_index].to < pull[pull_index].from) { + // we're pulling the values out to the left, which means the start of an A subarray + index = pull[pull_index].from; + count = 1; + while (count < length) : (count += 1) { + index = findFirstBackward(T, items, items[index - 1], Range.init(pull[pull_index].to, pull[pull_index].from - (count - 1)), length - count, context, lessThan); + const range = Range.init(index + 1, pull[pull_index].from + 1); + mem.rotate(T, items[range.start..range.end], range.length() - count); + pull[pull_index].from = index + count; + } + } else if (pull[pull_index].to > pull[pull_index].from) { + // we're pulling values out to the right, which means the end of a B subarray + index = pull[pull_index].from + 1; + count = 1; + while (count < length) : (count += 1) { + index = findLastForward(T, items, items[index], Range.init(index, pull[pull_index].to), length - count, context, lessThan); + const range = Range.init(pull[pull_index].from, index - 1); + mem.rotate(T, items[range.start..range.end], count); + pull[pull_index].from = index - 1 - count; + } + } + } + + // adjust block_size and buffer_size based on the values we were able to pull out + buffer_size = buffer1.length(); + block_size = iterator.length() / buffer_size + 1; + + // the first buffer NEEDS to be large enough to tag each of the evenly sized A blocks, + // so this was originally here to test the math for adjusting block_size above + // assert((iterator.length() + 1)/block_size <= buffer_size); + + // now that the two internal buffers have been created, it's time to merge each A+B combination at this level of the merge sort! + iterator.begin(); + while (!iterator.finished()) { + A = iterator.nextRange(); + B = iterator.nextRange(); + + // remove any parts of A or B that are being used by the internal buffers + start = A.start; + if (start == pull[0].range.start) { + if (pull[0].from > pull[0].to) { + A.start += pull[0].count; + + // if the internal buffer takes up the entire A or B subarray, then there's nothing to merge + // this only happens for very small subarrays, like √4 = 2, 2 * (2 internal buffers) = 4 + if (A.length() == 0) continue; + } else if (pull[0].from < pull[0].to) { + B.end -= pull[0].count; + if (B.length() == 0) continue; + } + } + if (start == pull[1].range.start) { + if (pull[1].from > pull[1].to) { + A.start += pull[1].count; + if (A.length() == 0) continue; + } else if (pull[1].from < pull[1].to) { + B.end -= pull[1].count; + if (B.length() == 0) continue; } } - // pull out the two ranges so we can use them as internal buffers - pull_index = 0; - while (pull_index < 2) : (pull_index += 1) { - const length = pull[pull_index].count; + if (lessThan(context, items[B.end - 1], items[A.start])) { + // the two ranges are in reverse order, so a simple rotation should fix it + mem.rotate(T, items[A.start..B.end], A.length()); + } else if (lessThan(context, items[A.end], items[A.end - 1])) { + // these two ranges weren't already in order, so we'll need to merge them! + var findA: usize = undefined; - if (pull[pull_index].to < pull[pull_index].from) { - // we're pulling the values out to the left, which means the start of an A subarray - index = pull[pull_index].from; - count = 1; - while (count < length) : (count += 1) { - index = findFirstBackward(T, items, items[index - 1], Range.init(pull[pull_index].to, pull[pull_index].from - (count - 1)), length - count, context, lessThan); - const range = Range.init(index + 1, pull[pull_index].from + 1); - mem.rotate(T, items[range.start..range.end], range.length() - count); - pull[pull_index].from = index + count; - } - } else if (pull[pull_index].to > pull[pull_index].from) { - // we're pulling values out to the right, which means the end of a B subarray - index = pull[pull_index].from + 1; - count = 1; - while (count < length) : (count += 1) { - index = findLastForward(T, items, items[index], Range.init(index, pull[pull_index].to), length - count, context, lessThan); - const range = Range.init(pull[pull_index].from, index - 1); - mem.rotate(T, items[range.start..range.end], count); - pull[pull_index].from = index - 1 - count; - } - } - } + // break the remainder of A into blocks. firstA is the uneven-sized first A block + var blockA = Range.init(A.start, A.end); + var firstA = Range.init(A.start, A.start + blockA.length() % block_size); - // adjust block_size and buffer_size based on the values we were able to pull out - buffer_size = buffer1.length(); - block_size = iterator.length() / buffer_size + 1; - - // the first buffer NEEDS to be large enough to tag each of the evenly sized A blocks, - // so this was originally here to test the math for adjusting block_size above - // assert((iterator.length() + 1)/block_size <= buffer_size); - - // now that the two internal buffers have been created, it's time to merge each A+B combination at this level of the merge sort! - iterator.begin(); - while (!iterator.finished()) { - A = iterator.nextRange(); - B = iterator.nextRange(); - - // remove any parts of A or B that are being used by the internal buffers - start = A.start; - if (start == pull[0].range.start) { - if (pull[0].from > pull[0].to) { - A.start += pull[0].count; - - // if the internal buffer takes up the entire A or B subarray, then there's nothing to merge - // this only happens for very small subarrays, like √4 = 2, 2 * (2 internal buffers) = 4, - // which also only happens when cache.len is small or 0 since it'd otherwise use MergeExternal - if (A.length() == 0) continue; - } else if (pull[0].from < pull[0].to) { - B.end -= pull[0].count; - if (B.length() == 0) continue; - } - } - if (start == pull[1].range.start) { - if (pull[1].from > pull[1].to) { - A.start += pull[1].count; - if (A.length() == 0) continue; - } else if (pull[1].from < pull[1].to) { - B.end -= pull[1].count; - if (B.length() == 0) continue; - } + // swap the first value of each A block with the value in buffer1 + var indexA = buffer1.start; + index = firstA.end; + while (index < blockA.end) : ({ + indexA += 1; + index += block_size; + }) { + mem.swap(T, &items[indexA], &items[index]); } - if (lessThan(context, items[B.end - 1], items[A.start])) { - // the two ranges are in reverse order, so a simple rotation should fix it - mem.rotate(T, items[A.start..B.end], A.length()); - } else if (lessThan(context, items[A.end], items[A.end - 1])) { - // these two ranges weren't already in order, so we'll need to merge them! - var findA: usize = undefined; + // start rolling the A blocks through the B blocks! + // whenever we leave an A block behind, we'll need to merge the previous A block with any B blocks that follow it, so track that information as well + var lastA = firstA; + var lastB = Range.init(0, 0); + var blockB = Range.init(B.start, B.start + @min(block_size, B.length())); + blockA.start += firstA.length(); + indexA = buffer1.start; - // break the remainder of A into blocks. firstA is the uneven-sized first A block - var blockA = Range.init(A.start, A.end); - var firstA = Range.init(A.start, A.start + blockA.length() % block_size); + // if the second buffer is available, block swap the contents into that + if (buffer2.length() > 0) { + blockSwap(T, items, lastA.start, buffer2.start, lastA.length()); + } - // swap the first value of each A block with the value in buffer1 - var indexA = buffer1.start; - index = firstA.end; - while (index < blockA.end) : ({ - indexA += 1; - index += block_size; - }) { - mem.swap(T, &items[indexA], &items[index]); - } + if (blockA.length() > 0) { + while (true) { + // if there's a previous B block and the first value of the minimum A block is <= the last value of the previous B block, + // then drop that minimum A block behind. or if there are no B blocks left then keep dropping the remaining A blocks. + if ((lastB.length() > 0 and !lessThan(context, items[lastB.end - 1], items[indexA])) or blockB.length() == 0) { + // figure out where to split the previous B block, and rotate it at the split + const B_split = binaryFirst(T, items, items[indexA], lastB, context, lessThan); + const B_remaining = lastB.end - B_split; - // start rolling the A blocks through the B blocks! - // whenever we leave an A block behind, we'll need to merge the previous A block with any B blocks that follow it, so track that information as well - var lastA = firstA; - var lastB = Range.init(0, 0); - var blockB = Range.init(B.start, B.start + @min(block_size, B.length())); - blockA.start += firstA.length(); - indexA = buffer1.start; - - // if the first unevenly sized A block fits into the cache, copy it there for when we go to Merge it - // otherwise, if the second buffer is available, block swap the contents into that - if (lastA.length() <= cache.len) { - const last_a_items = items[lastA.start..lastA.end]; - @memcpy(cache[0..last_a_items.len], last_a_items); - } else if (buffer2.length() > 0) { - blockSwap(T, items, lastA.start, buffer2.start, lastA.length()); - } - - if (blockA.length() > 0) { - while (true) { - // if there's a previous B block and the first value of the minimum A block is <= the last value of the previous B block, - // then drop that minimum A block behind. or if there are no B blocks left then keep dropping the remaining A blocks. - if ((lastB.length() > 0 and !lessThan(context, items[lastB.end - 1], items[indexA])) or blockB.length() == 0) { - // figure out where to split the previous B block, and rotate it at the split - const B_split = binaryFirst(T, items, items[indexA], lastB, context, lessThan); - const B_remaining = lastB.end - B_split; - - // swap the minimum A block to the beginning of the rolling A blocks - var minA = blockA.start; - findA = minA + block_size; - while (findA < blockA.end) : (findA += block_size) { - if (lessThan(context, items[findA], items[minA])) { - minA = findA; - } + // swap the minimum A block to the beginning of the rolling A blocks + var minA = blockA.start; + findA = minA + block_size; + while (findA < blockA.end) : (findA += block_size) { + if (lessThan(context, items[findA], items[minA])) { + minA = findA; } - blockSwap(T, items, blockA.start, minA, block_size); + } + blockSwap(T, items, blockA.start, minA, block_size); - // swap the first item of the previous A block back with its original value, which is stored in buffer1 - mem.swap(T, &items[blockA.start], &items[indexA]); - indexA += 1; + // swap the first item of the previous A block back with its original value, which is stored in buffer1 + mem.swap(T, &items[blockA.start], &items[indexA]); + indexA += 1; - // locally merge the previous A block with the B values that follow it - // if lastA fits into the external cache we'll use that (with MergeExternal), - // or if the second internal buffer exists we'll use that (with MergeInternal), - // or failing that we'll use a strictly in-place merge algorithm (MergeInPlace) + // locally merge the previous A block with the B values that follow it + // if lastA fits into the second internal buffer exists we'll use that (with MergeInternal), + // or failing that we'll use a strictly in-place merge algorithm (MergeInPlace) - if (lastA.length() <= cache.len) { - mergeExternal(T, items, lastA, Range.init(lastA.end, B_split), cache[0..], context, lessThan); - } else if (buffer2.length() > 0) { - mergeInternal(T, items, lastA, Range.init(lastA.end, B_split), buffer2, context, lessThan); - } else { - mergeInPlace(T, items, lastA, Range.init(lastA.end, B_split), context, lessThan); - } - - if (buffer2.length() > 0 or block_size <= cache.len) { - // copy the previous A block into the cache or buffer2, since that's where we need it to be when we go to merge it anyway - if (block_size <= cache.len) { - @memcpy(cache[0..block_size], items[blockA.start..][0..block_size]); - } else { - blockSwap(T, items, blockA.start, buffer2.start, block_size); - } - - // this is equivalent to rotating, but faster - // the area normally taken up by the A block is either the contents of buffer2, or data we don't need anymore since we memcopied it - // either way, we don't need to retain the order of those items, so instead of rotating we can just block swap B to where it belongs - blockSwap(T, items, B_split, blockA.start + block_size - B_remaining, B_remaining); - } else { - // we are unable to use the 'buffer2' trick to speed up the rotation operation since buffer2 doesn't exist, so perform a normal rotation - mem.rotate(T, items[B_split .. blockA.start + block_size], blockA.start - B_split); - } - - // update the range for the remaining A blocks, and the range remaining from the B block after it was split - lastA = Range.init(blockA.start - B_remaining, blockA.start - B_remaining + block_size); - lastB = Range.init(lastA.end, lastA.end + B_remaining); - - // if there are no more A blocks remaining, this step is finished! - blockA.start += block_size; - if (blockA.length() == 0) break; - } else if (blockB.length() < block_size) { - // move the last B block, which is unevenly sized, to before the remaining A blocks, by using a rotation - // the cache is disabled here since it might contain the contents of the previous A block - mem.rotate(T, items[blockA.start..blockB.end], blockB.start - blockA.start); - - lastB = Range.init(blockA.start, blockA.start + blockB.length()); - blockA.start += blockB.length(); - blockA.end += blockB.length(); - blockB.end = blockB.start; + if (buffer2.length() > 0) { + mergeInternal(T, items, lastA, Range.init(lastA.end, B_split), buffer2, context, lessThan); } else { - // roll the leftmost A block to the end by swapping it with the next B block - blockSwap(T, items, blockA.start, blockB.start, block_size); - lastB = Range.init(blockA.start, blockA.start + block_size); + mergeInPlace(T, items, lastA, Range.init(lastA.end, B_split), context, lessThan); + } - blockA.start += block_size; - blockA.end += block_size; - blockB.start += block_size; + if (buffer2.length() > 0) { + // copy the previous A block into the buffer2, since that's where we need it to be when we go to merge it anyway + blockSwap(T, items, blockA.start, buffer2.start, block_size); - if (blockB.end > B.end - block_size) { - blockB.end = B.end; - } else { - blockB.end += block_size; - } + // this is equivalent to rotating, but faster + // the area normally taken up by the A block is either the contents of buffer2, or data we don't need anymore since we memcopied it + // either way, we don't need to retain the order of those items, so instead of rotating we can just block swap B to where it belongs + blockSwap(T, items, B_split, blockA.start + block_size - B_remaining, B_remaining); + } else { + // we are unable to use the 'buffer2' trick to speed up the rotation operation since buffer2 doesn't exist, so perform a normal rotation + mem.rotate(T, items[B_split .. blockA.start + block_size], blockA.start - B_split); + } + + // update the range for the remaining A blocks, and the range remaining from the B block after it was split + lastA = Range.init(blockA.start - B_remaining, blockA.start - B_remaining + block_size); + lastB = Range.init(lastA.end, lastA.end + B_remaining); + + // if there are no more A blocks remaining, this step is finished! + blockA.start += block_size; + if (blockA.length() == 0) break; + } else if (blockB.length() < block_size) { + // move the last B block, which is unevenly sized, to before the remaining A blocks, by using a rotation + mem.rotate(T, items[blockA.start..blockB.end], blockB.start - blockA.start); + + lastB = Range.init(blockA.start, blockA.start + blockB.length()); + blockA.start += blockB.length(); + blockA.end += blockB.length(); + blockB.end = blockB.start; + } else { + // roll the leftmost A block to the end by swapping it with the next B block + blockSwap(T, items, blockA.start, blockB.start, block_size); + lastB = Range.init(blockA.start, blockA.start + block_size); + + blockA.start += block_size; + blockA.end += block_size; + blockB.start += block_size; + + if (blockB.end > B.end - block_size) { + blockB.end = B.end; + } else { + blockB.end += block_size; } } } + } - // merge the last A block with the remaining B values - if (lastA.length() <= cache.len) { - mergeExternal(T, items, lastA, Range.init(lastA.end, B.end), cache[0..], context, lessThan); - } else if (buffer2.length() > 0) { - mergeInternal(T, items, lastA, Range.init(lastA.end, B.end), buffer2, context, lessThan); - } else { - mergeInPlace(T, items, lastA, Range.init(lastA.end, B.end), context, lessThan); - } + // merge the last A block with the remaining B values + if (buffer2.length() > 0) { + mergeInternal(T, items, lastA, Range.init(lastA.end, B.end), buffer2, context, lessThan); + } else { + mergeInPlace(T, items, lastA, Range.init(lastA.end, B.end), context, lessThan); } } + } - // when we're finished with this merge step we should have the one - // or two internal buffers left over, where the second buffer is all jumbled up - // insertion sort the second buffer, then redistribute the buffers - // back into the items using the opposite process used for creating the buffer + // when we're finished with this merge step we should have the one + // or two internal buffers left over, where the second buffer is all jumbled up + // insertion sort the second buffer, then redistribute the buffers + // back into the items using the opposite process used for creating the buffer - // while an unstable sort like quicksort could be applied here, in benchmarks - // it was consistently slightly slower than a simple insertion sort, - // even for tens of millions of items. this may be because insertion - // sort is quite fast when the data is already somewhat sorted, like it is here - sort.insertion(T, items[buffer2.start..buffer2.end], context, lessThan); + // while an unstable sort like quicksort could be applied here, in benchmarks + // it was consistently slightly slower than a simple insertion sort, + // even for tens of millions of items. this may be because insertion + // sort is quite fast when the data is already somewhat sorted, like it is here + sort.insertion(T, items[buffer2.start..buffer2.end], context, lessThan); - pull_index = 0; - while (pull_index < 2) : (pull_index += 1) { - var unique = pull[pull_index].count * 2; - if (pull[pull_index].from > pull[pull_index].to) { - // the values were pulled out to the left, so redistribute them back to the right - var buffer = Range.init(pull[pull_index].range.start, pull[pull_index].range.start + pull[pull_index].count); - while (buffer.length() > 0) { - index = findFirstForward(T, items, items[buffer.start], Range.init(buffer.end, pull[pull_index].range.end), unique, context, lessThan); - const amount = index - buffer.end; - mem.rotate(T, items[buffer.start..index], buffer.length()); - buffer.start += (amount + 1); - buffer.end += amount; - unique -= 2; - } - } else if (pull[pull_index].from < pull[pull_index].to) { - // the values were pulled out to the right, so redistribute them back to the left - var buffer = Range.init(pull[pull_index].range.end - pull[pull_index].count, pull[pull_index].range.end); - while (buffer.length() > 0) { - index = findLastBackward(T, items, items[buffer.end - 1], Range.init(pull[pull_index].range.start, buffer.start), unique, context, lessThan); - const amount = buffer.start - index; - mem.rotate(T, items[index..buffer.end], amount); - buffer.start -= amount; - buffer.end -= (amount + 1); - unique -= 2; - } + pull_index = 0; + while (pull_index < 2) : (pull_index += 1) { + var unique = pull[pull_index].count * 2; + if (pull[pull_index].from > pull[pull_index].to) { + // the values were pulled out to the left, so redistribute them back to the right + var buffer = Range.init(pull[pull_index].range.start, pull[pull_index].range.start + pull[pull_index].count); + while (buffer.length() > 0) { + index = findFirstForward(T, items, items[buffer.start], Range.init(buffer.end, pull[pull_index].range.end), unique, context, lessThan); + const amount = index - buffer.end; + mem.rotate(T, items[buffer.start..index], buffer.length()); + buffer.start += (amount + 1); + buffer.end += amount; + unique -= 2; + } + } else if (pull[pull_index].from < pull[pull_index].to) { + // the values were pulled out to the right, so redistribute them back to the left + var buffer = Range.init(pull[pull_index].range.end - pull[pull_index].count, pull[pull_index].range.end); + while (buffer.length() > 0) { + index = findLastBackward(T, items, items[buffer.end - 1], Range.init(pull[pull_index].range.start, buffer.start), unique, context, lessThan); + const amount = buffer.start - index; + mem.rotate(T, items[index..buffer.end], amount); + buffer.start -= amount; + buffer.end -= (amount + 1); + unique -= 2; } } } @@ -982,83 +853,6 @@ fn binaryLast( return curr; } -fn mergeInto( - comptime T: type, - from: []T, - A: Range, - B: Range, - into: []T, - context: anytype, - comptime lessThan: fn (@TypeOf(context), lhs: T, rhs: T) bool, -) void { - var A_index: usize = A.start; - var B_index: usize = B.start; - const A_last = A.end; - const B_last = B.end; - var insert_index: usize = 0; - - while (true) { - if (!lessThan(context, from[B_index], from[A_index])) { - into[insert_index] = from[A_index]; - A_index += 1; - insert_index += 1; - if (A_index == A_last) { - // copy the remainder of B into the final array - const from_b = from[B_index..B_last]; - @memcpy(into[insert_index..][0..from_b.len], from_b); - break; - } - } else { - into[insert_index] = from[B_index]; - B_index += 1; - insert_index += 1; - if (B_index == B_last) { - // copy the remainder of A into the final array - const from_a = from[A_index..A_last]; - @memcpy(into[insert_index..][0..from_a.len], from_a); - break; - } - } - } -} - -fn mergeExternal( - comptime T: type, - items: []T, - A: Range, - B: Range, - cache: []T, - context: anytype, - comptime lessThan: fn (@TypeOf(context), lhs: T, rhs: T) bool, -) void { - // A fits into the cache, so use that instead of the internal buffer - var A_index: usize = 0; - var B_index: usize = B.start; - var insert_index: usize = A.start; - const A_last = A.length(); - const B_last = B.end; - - if (B.length() > 0 and A.length() > 0) { - while (true) { - if (!lessThan(context, items[B_index], cache[A_index])) { - items[insert_index] = cache[A_index]; - A_index += 1; - insert_index += 1; - if (A_index == A_last) break; - } else { - items[insert_index] = items[B_index]; - B_index += 1; - insert_index += 1; - if (B_index == B_last) break; - } - } - } - - // copy the remainder of A into the final array - const cache_a = cache[A_index..A_last]; - @memcpy(items[insert_index..][0..cache_a.len], cache_a); -} - fn swap( comptime T: type, items: []T, From f6f2e1ee077e70541c7fe52138ae363a50b4f42e Mon Sep 17 00:00:00 2001 From: p-rosit Date: Mon, 8 Sep 2025 17:03:59 +0200 Subject: [PATCH 02/24] introduce `blockContext` with incorrect api --- lib/std/sort/block.zig | 197 ++++++++++++++++++++++++----------------- 1 file changed, 115 insertions(+), 82 deletions(-) diff --git a/lib/std/sort/block.zig b/lib/std/sort/block.zig index ce34cd3bc0..948a263fc5 100644 --- a/lib/std/sort/block.zig +++ b/lib/std/sort/block.zig @@ -103,8 +103,41 @@ pub fn block( context: anytype, comptime lessThanFn: fn (@TypeOf(context), lhs: T, rhs: T) bool, ) void { + const Context = struct { + items: []T, + sub_ctx: @TypeOf(context), + + pub fn lessThan(ctx: @This(), a: usize, b: usize) bool { + return lessThanFn(ctx.sub_ctx, ctx.items[a], ctx.items[b]); + } + + pub fn swap(ctx: @This(), a: usize, b: usize) void { + return mem.swap(T, &ctx.items[a], &ctx.items[b]); + } + }; + return blockContext(T, items, context, lessThanFn, 0, items.len, Context{ .items = items, .sub_ctx = context }); +} + +/// Stable in-place sort. O(n) best case, O(n*log(n)) worst case and average case. +/// O(1) memory (no allocator required). +/// Sorts in ascending order with respect to the given `lessThan` function. +/// `context` must have methods `swap` and `lessThan`, +/// which each take 2 `usize` parameters indicating the index of an item. +/// +/// NOTE: The algorithm only works when the comparison is less-than or greater-than. +/// (See https://github.com/ziglang/zig/issues/8289) +pub fn blockContext( + comptime T: type, + items: []T, + inner_context: anytype, + comptime lessThanFn: fn (@TypeOf(inner_context), lhs: T, rhs: T) bool, + a: usize, + b: usize, + context: anytype, +) void { + _ = .{ a, b, context }; const lessThan = if (builtin.mode == .Debug) struct { - fn lessThan(ctx: @TypeOf(context), lhs: T, rhs: T) bool { + fn lessThan(ctx: @TypeOf(inner_context), lhs: T, rhs: T) bool { const lt = lessThanFn(ctx, lhs, rhs); const gt = lessThanFn(ctx, rhs, lhs); std.debug.assert(!(lt and gt)); @@ -117,13 +150,13 @@ pub fn block( if (items.len < 4) { if (items.len == 3) { // hard coded insertion sort - if (lessThan(context, items[1], items[0])) mem.swap(T, &items[0], &items[1]); - if (lessThan(context, items[2], items[1])) { + if (lessThan(inner_context, items[1], items[0])) mem.swap(T, &items[0], &items[1]); + if (lessThan(inner_context, items[2], items[1])) { mem.swap(T, &items[1], &items[2]); - if (lessThan(context, items[1], items[0])) mem.swap(T, &items[0], &items[1]); + if (lessThan(inner_context, items[1], items[0])) mem.swap(T, &items[0], &items[1]); } } else if (items.len == 2) { - if (lessThan(context, items[1], items[0])) mem.swap(T, &items[0], &items[1]); + if (lessThan(inner_context, items[1], items[0])) mem.swap(T, &items[0], &items[1]); } return; } @@ -139,75 +172,75 @@ pub fn block( const sliced_items = items[range.start..]; switch (range.length()) { 8 => { - swap(T, sliced_items, &order, 0, 1, context, lessThan); - swap(T, sliced_items, &order, 2, 3, context, lessThan); - swap(T, sliced_items, &order, 4, 5, context, lessThan); - swap(T, sliced_items, &order, 6, 7, context, lessThan); - swap(T, sliced_items, &order, 0, 2, context, lessThan); - swap(T, sliced_items, &order, 1, 3, context, lessThan); - swap(T, sliced_items, &order, 4, 6, context, lessThan); - swap(T, sliced_items, &order, 5, 7, context, lessThan); - swap(T, sliced_items, &order, 1, 2, context, lessThan); - swap(T, sliced_items, &order, 5, 6, context, lessThan); - swap(T, sliced_items, &order, 0, 4, context, lessThan); - swap(T, sliced_items, &order, 3, 7, context, lessThan); - swap(T, sliced_items, &order, 1, 5, context, lessThan); - swap(T, sliced_items, &order, 2, 6, context, lessThan); - swap(T, sliced_items, &order, 1, 4, context, lessThan); - swap(T, sliced_items, &order, 3, 6, context, lessThan); - swap(T, sliced_items, &order, 2, 4, context, lessThan); - swap(T, sliced_items, &order, 3, 5, context, lessThan); - swap(T, sliced_items, &order, 3, 4, context, lessThan); + swap(T, sliced_items, &order, 0, 1, inner_context, lessThan); + swap(T, sliced_items, &order, 2, 3, inner_context, lessThan); + swap(T, sliced_items, &order, 4, 5, inner_context, lessThan); + swap(T, sliced_items, &order, 6, 7, inner_context, lessThan); + swap(T, sliced_items, &order, 0, 2, inner_context, lessThan); + swap(T, sliced_items, &order, 1, 3, inner_context, lessThan); + swap(T, sliced_items, &order, 4, 6, inner_context, lessThan); + swap(T, sliced_items, &order, 5, 7, inner_context, lessThan); + swap(T, sliced_items, &order, 1, 2, inner_context, lessThan); + swap(T, sliced_items, &order, 5, 6, inner_context, lessThan); + swap(T, sliced_items, &order, 0, 4, inner_context, lessThan); + swap(T, sliced_items, &order, 3, 7, inner_context, lessThan); + swap(T, sliced_items, &order, 1, 5, inner_context, lessThan); + swap(T, sliced_items, &order, 2, 6, inner_context, lessThan); + swap(T, sliced_items, &order, 1, 4, inner_context, lessThan); + swap(T, sliced_items, &order, 3, 6, inner_context, lessThan); + swap(T, sliced_items, &order, 2, 4, inner_context, lessThan); + swap(T, sliced_items, &order, 3, 5, inner_context, lessThan); + swap(T, sliced_items, &order, 3, 4, inner_context, lessThan); }, 7 => { - swap(T, sliced_items, &order, 1, 2, context, lessThan); - swap(T, sliced_items, &order, 3, 4, context, lessThan); - swap(T, sliced_items, &order, 5, 6, context, lessThan); - swap(T, sliced_items, &order, 0, 2, context, lessThan); - swap(T, sliced_items, &order, 3, 5, context, lessThan); - swap(T, sliced_items, &order, 4, 6, context, lessThan); - swap(T, sliced_items, &order, 0, 1, context, lessThan); - swap(T, sliced_items, &order, 4, 5, context, lessThan); - swap(T, sliced_items, &order, 2, 6, context, lessThan); - swap(T, sliced_items, &order, 0, 4, context, lessThan); - swap(T, sliced_items, &order, 1, 5, context, lessThan); - swap(T, sliced_items, &order, 0, 3, context, lessThan); - swap(T, sliced_items, &order, 2, 5, context, lessThan); - swap(T, sliced_items, &order, 1, 3, context, lessThan); - swap(T, sliced_items, &order, 2, 4, context, lessThan); - swap(T, sliced_items, &order, 2, 3, context, lessThan); + swap(T, sliced_items, &order, 1, 2, inner_context, lessThan); + swap(T, sliced_items, &order, 3, 4, inner_context, lessThan); + swap(T, sliced_items, &order, 5, 6, inner_context, lessThan); + swap(T, sliced_items, &order, 0, 2, inner_context, lessThan); + swap(T, sliced_items, &order, 3, 5, inner_context, lessThan); + swap(T, sliced_items, &order, 4, 6, inner_context, lessThan); + swap(T, sliced_items, &order, 0, 1, inner_context, lessThan); + swap(T, sliced_items, &order, 4, 5, inner_context, lessThan); + swap(T, sliced_items, &order, 2, 6, inner_context, lessThan); + swap(T, sliced_items, &order, 0, 4, inner_context, lessThan); + swap(T, sliced_items, &order, 1, 5, inner_context, lessThan); + swap(T, sliced_items, &order, 0, 3, inner_context, lessThan); + swap(T, sliced_items, &order, 2, 5, inner_context, lessThan); + swap(T, sliced_items, &order, 1, 3, inner_context, lessThan); + swap(T, sliced_items, &order, 2, 4, inner_context, lessThan); + swap(T, sliced_items, &order, 2, 3, inner_context, lessThan); }, 6 => { - swap(T, sliced_items, &order, 1, 2, context, lessThan); - swap(T, sliced_items, &order, 4, 5, context, lessThan); - swap(T, sliced_items, &order, 0, 2, context, lessThan); - swap(T, sliced_items, &order, 3, 5, context, lessThan); - swap(T, sliced_items, &order, 0, 1, context, lessThan); - swap(T, sliced_items, &order, 3, 4, context, lessThan); - swap(T, sliced_items, &order, 2, 5, context, lessThan); - swap(T, sliced_items, &order, 0, 3, context, lessThan); - swap(T, sliced_items, &order, 1, 4, context, lessThan); - swap(T, sliced_items, &order, 2, 4, context, lessThan); - swap(T, sliced_items, &order, 1, 3, context, lessThan); - swap(T, sliced_items, &order, 2, 3, context, lessThan); + swap(T, sliced_items, &order, 1, 2, inner_context, lessThan); + swap(T, sliced_items, &order, 4, 5, inner_context, lessThan); + swap(T, sliced_items, &order, 0, 2, inner_context, lessThan); + swap(T, sliced_items, &order, 3, 5, inner_context, lessThan); + swap(T, sliced_items, &order, 0, 1, inner_context, lessThan); + swap(T, sliced_items, &order, 3, 4, inner_context, lessThan); + swap(T, sliced_items, &order, 2, 5, inner_context, lessThan); + swap(T, sliced_items, &order, 0, 3, inner_context, lessThan); + swap(T, sliced_items, &order, 1, 4, inner_context, lessThan); + swap(T, sliced_items, &order, 2, 4, inner_context, lessThan); + swap(T, sliced_items, &order, 1, 3, inner_context, lessThan); + swap(T, sliced_items, &order, 2, 3, inner_context, lessThan); }, 5 => { - swap(T, sliced_items, &order, 0, 1, context, lessThan); - swap(T, sliced_items, &order, 3, 4, context, lessThan); - swap(T, sliced_items, &order, 2, 4, context, lessThan); - swap(T, sliced_items, &order, 2, 3, context, lessThan); - swap(T, sliced_items, &order, 1, 4, context, lessThan); - swap(T, sliced_items, &order, 0, 3, context, lessThan); - swap(T, sliced_items, &order, 0, 2, context, lessThan); - swap(T, sliced_items, &order, 1, 3, context, lessThan); - swap(T, sliced_items, &order, 1, 2, context, lessThan); + swap(T, sliced_items, &order, 0, 1, inner_context, lessThan); + swap(T, sliced_items, &order, 3, 4, inner_context, lessThan); + swap(T, sliced_items, &order, 2, 4, inner_context, lessThan); + swap(T, sliced_items, &order, 2, 3, inner_context, lessThan); + swap(T, sliced_items, &order, 1, 4, inner_context, lessThan); + swap(T, sliced_items, &order, 0, 3, inner_context, lessThan); + swap(T, sliced_items, &order, 0, 2, inner_context, lessThan); + swap(T, sliced_items, &order, 1, 3, inner_context, lessThan); + swap(T, sliced_items, &order, 1, 2, inner_context, lessThan); }, 4 => { - swap(T, sliced_items, &order, 0, 1, context, lessThan); - swap(T, sliced_items, &order, 2, 3, context, lessThan); - swap(T, sliced_items, &order, 0, 2, context, lessThan); - swap(T, sliced_items, &order, 1, 3, context, lessThan); - swap(T, sliced_items, &order, 1, 2, context, lessThan); + swap(T, sliced_items, &order, 0, 1, inner_context, lessThan); + swap(T, sliced_items, &order, 2, 3, inner_context, lessThan); + swap(T, sliced_items, &order, 0, 2, inner_context, lessThan); + swap(T, sliced_items, &order, 1, 3, inner_context, lessThan); + swap(T, sliced_items, &order, 1, 2, inner_context, lessThan); }, else => {}, } @@ -289,7 +322,7 @@ pub fn block( last = index; count += 1; }) { - index = findLastForward(T, items, items[last], Range.init(last + 1, A.end), find - count, context, lessThan); + index = findLastForward(T, items, items[last], Range.init(last + 1, A.end), find - count, inner_context, lessThan); if (index == A.end) break; } index = last; @@ -343,7 +376,7 @@ pub fn block( last = index - 1; count += 1; }) { - index = findFirstBackward(T, items, items[last], Range.init(B.start, last), find - count, context, lessThan); + index = findFirstBackward(T, items, items[last], Range.init(B.start, last), find - count, inner_context, lessThan); if (index == B.start) break; } index = last; @@ -404,7 +437,7 @@ pub fn block( index = pull[pull_index].from; count = 1; while (count < length) : (count += 1) { - index = findFirstBackward(T, items, items[index - 1], Range.init(pull[pull_index].to, pull[pull_index].from - (count - 1)), length - count, context, lessThan); + index = findFirstBackward(T, items, items[index - 1], Range.init(pull[pull_index].to, pull[pull_index].from - (count - 1)), length - count, inner_context, lessThan); const range = Range.init(index + 1, pull[pull_index].from + 1); mem.rotate(T, items[range.start..range.end], range.length() - count); pull[pull_index].from = index + count; @@ -414,7 +447,7 @@ pub fn block( index = pull[pull_index].from + 1; count = 1; while (count < length) : (count += 1) { - index = findLastForward(T, items, items[index], Range.init(index, pull[pull_index].to), length - count, context, lessThan); + index = findLastForward(T, items, items[index], Range.init(index, pull[pull_index].to), length - count, inner_context, lessThan); const range = Range.init(pull[pull_index].from, index - 1); mem.rotate(T, items[range.start..range.end], count); pull[pull_index].from = index - 1 - count; @@ -460,10 +493,10 @@ pub fn block( } } - if (lessThan(context, items[B.end - 1], items[A.start])) { + if (lessThan(inner_context, items[B.end - 1], items[A.start])) { // the two ranges are in reverse order, so a simple rotation should fix it mem.rotate(T, items[A.start..B.end], A.length()); - } else if (lessThan(context, items[A.end], items[A.end - 1])) { + } else if (lessThan(inner_context, items[A.end], items[A.end - 1])) { // these two ranges weren't already in order, so we'll need to merge them! var findA: usize = undefined; @@ -498,16 +531,16 @@ pub fn block( while (true) { // if there's a previous B block and the first value of the minimum A block is <= the last value of the previous B block, // then drop that minimum A block behind. or if there are no B blocks left then keep dropping the remaining A blocks. - if ((lastB.length() > 0 and !lessThan(context, items[lastB.end - 1], items[indexA])) or blockB.length() == 0) { + if ((lastB.length() > 0 and !lessThan(inner_context, items[lastB.end - 1], items[indexA])) or blockB.length() == 0) { // figure out where to split the previous B block, and rotate it at the split - const B_split = binaryFirst(T, items, items[indexA], lastB, context, lessThan); + const B_split = binaryFirst(T, items, items[indexA], lastB, inner_context, lessThan); const B_remaining = lastB.end - B_split; // swap the minimum A block to the beginning of the rolling A blocks var minA = blockA.start; findA = minA + block_size; while (findA < blockA.end) : (findA += block_size) { - if (lessThan(context, items[findA], items[minA])) { + if (lessThan(inner_context, items[findA], items[minA])) { minA = findA; } } @@ -522,9 +555,9 @@ pub fn block( // or failing that we'll use a strictly in-place merge algorithm (MergeInPlace) if (buffer2.length() > 0) { - mergeInternal(T, items, lastA, Range.init(lastA.end, B_split), buffer2, context, lessThan); + mergeInternal(T, items, lastA, Range.init(lastA.end, B_split), buffer2, inner_context, lessThan); } else { - mergeInPlace(T, items, lastA, Range.init(lastA.end, B_split), context, lessThan); + mergeInPlace(T, items, lastA, Range.init(lastA.end, B_split), inner_context, lessThan); } if (buffer2.length() > 0) { @@ -575,9 +608,9 @@ pub fn block( // merge the last A block with the remaining B values if (buffer2.length() > 0) { - mergeInternal(T, items, lastA, Range.init(lastA.end, B.end), buffer2, context, lessThan); + mergeInternal(T, items, lastA, Range.init(lastA.end, B.end), buffer2, inner_context, lessThan); } else { - mergeInPlace(T, items, lastA, Range.init(lastA.end, B.end), context, lessThan); + mergeInPlace(T, items, lastA, Range.init(lastA.end, B.end), inner_context, lessThan); } } } @@ -591,7 +624,7 @@ pub fn block( // it was consistently slightly slower than a simple insertion sort, // even for tens of millions of items. this may be because insertion // sort is quite fast when the data is already somewhat sorted, like it is here - sort.insertion(T, items[buffer2.start..buffer2.end], context, lessThan); + sort.insertion(T, items[buffer2.start..buffer2.end], inner_context, lessThan); pull_index = 0; while (pull_index < 2) : (pull_index += 1) { @@ -600,7 +633,7 @@ pub fn block( // the values were pulled out to the left, so redistribute them back to the right var buffer = Range.init(pull[pull_index].range.start, pull[pull_index].range.start + pull[pull_index].count); while (buffer.length() > 0) { - index = findFirstForward(T, items, items[buffer.start], Range.init(buffer.end, pull[pull_index].range.end), unique, context, lessThan); + index = findFirstForward(T, items, items[buffer.start], Range.init(buffer.end, pull[pull_index].range.end), unique, inner_context, lessThan); const amount = index - buffer.end; mem.rotate(T, items[buffer.start..index], buffer.length()); buffer.start += (amount + 1); @@ -611,7 +644,7 @@ pub fn block( // the values were pulled out to the right, so redistribute them back to the left var buffer = Range.init(pull[pull_index].range.end - pull[pull_index].count, pull[pull_index].range.end); while (buffer.length() > 0) { - index = findLastBackward(T, items, items[buffer.end - 1], Range.init(pull[pull_index].range.start, buffer.start), unique, context, lessThan); + index = findLastBackward(T, items, items[buffer.end - 1], Range.init(pull[pull_index].range.start, buffer.start), unique, inner_context, lessThan); const amount = buffer.start - index; mem.rotate(T, items[index..buffer.end], amount); buffer.start -= amount; From 31f5f0c647864ad0ee64a7b0c7f11402acdbb5bf Mon Sep 17 00:00:00 2001 From: p-rosit Date: Mon, 8 Sep 2025 17:18:23 +0200 Subject: [PATCH 03/24] produce a wrapped context --- lib/std/sort/block.zig | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/lib/std/sort/block.zig b/lib/std/sort/block.zig index 948a263fc5..33cb0cde0c 100644 --- a/lib/std/sort/block.zig +++ b/lib/std/sort/block.zig @@ -135,7 +135,29 @@ pub fn blockContext( b: usize, context: anytype, ) void { - _ = .{ a, b, context }; + _ = .{ a, b }; + const Context = struct { + sub_ctx: @TypeOf(context), + + pub const lessThan = if (builtin.mode == .Debug) lessThanChecked else lessThanUnchecked; + + fn lessThanChecked(ctx: @This(), i: usize, j: usize) bool { + const lt = ctx.sub_ctx.lessThan(i, j); + const gt = ctx.sub_ctx.lessThan(j, i); + std.debug.assert(!(lt and gt)); + return lt; + } + + fn lessThanUnchecked(ctx: @This(), i: usize, j: usize) bool { + return ctx.sub_ctx.lessThan(i, j); + } + + pub fn swap(ctx: @This(), i: usize, j: usize) void { + return ctx.sub_ctx.swap(i, j); + } + }; + const wrapped_context = Context{ .sub_ctx = context }; + _ = wrapped_context; const lessThan = if (builtin.mode == .Debug) struct { fn lessThan(ctx: @TypeOf(inner_context), lhs: T, rhs: T) bool { const lt = lessThanFn(ctx, lhs, rhs); From 2de51681b77329b8e95e0a8452849fec5457553a Mon Sep 17 00:00:00 2001 From: p-rosit Date: Mon, 8 Sep 2025 17:19:36 +0200 Subject: [PATCH 04/24] place comment at top --- lib/std/sort/block.zig | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/lib/std/sort/block.zig b/lib/std/sort/block.zig index 33cb0cde0c..b929cc8c64 100644 --- a/lib/std/sort/block.zig +++ b/lib/std/sort/block.zig @@ -135,6 +135,7 @@ pub fn blockContext( b: usize, context: anytype, ) void { + // Implementation ported from https://github.com/BonzaiThePenguin/WikiSort/blob/master/WikiSort.c _ = .{ a, b }; const Context = struct { sub_ctx: @TypeOf(context), @@ -167,8 +168,6 @@ pub fn blockContext( } }.lessThan else lessThanFn; - // Implementation ported from https://github.com/BonzaiThePenguin/WikiSort/blob/master/WikiSort.c - if (items.len < 4) { if (items.len == 3) { // hard coded insertion sort From 18f21a172bca9473e7e2da3a0f045b79912590cd Mon Sep 17 00:00:00 2001 From: p-rosit Date: Mon, 8 Sep 2025 17:24:18 +0200 Subject: [PATCH 05/24] check array length from start and end index --- lib/std/sort/block.zig | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/lib/std/sort/block.zig b/lib/std/sort/block.zig index b929cc8c64..84d0330bf9 100644 --- a/lib/std/sort/block.zig +++ b/lib/std/sort/block.zig @@ -136,7 +136,6 @@ pub fn blockContext( context: anytype, ) void { // Implementation ported from https://github.com/BonzaiThePenguin/WikiSort/blob/master/WikiSort.c - _ = .{ a, b }; const Context = struct { sub_ctx: @TypeOf(context), @@ -168,15 +167,17 @@ pub fn blockContext( } }.lessThan else lessThanFn; - if (items.len < 4) { - if (items.len == 3) { + const range_length = b - a; + + if (range_length < 4) { + if (range_length == 3) { // hard coded insertion sort if (lessThan(inner_context, items[1], items[0])) mem.swap(T, &items[0], &items[1]); if (lessThan(inner_context, items[2], items[1])) { mem.swap(T, &items[1], &items[2]); if (lessThan(inner_context, items[1], items[0])) mem.swap(T, &items[0], &items[1]); } - } else if (items.len == 2) { + } else if (range_length == 2) { if (lessThan(inner_context, items[1], items[0])) mem.swap(T, &items[0], &items[1]); } return; @@ -185,7 +186,7 @@ pub fn blockContext( // sort groups of 4-8 items at a time using an unstable sorting network, // but keep track of the original item orders to force it to be stable // http://pages.ripco.net/~jgamble/nw.html - var iterator = Iterator.init(items.len, 4); + var iterator = Iterator.init(range_length, 4); while (!iterator.finished()) { var order = [_]u8{ 0, 1, 2, 3, 4, 5, 6, 7 }; const range = iterator.nextRange(); @@ -266,7 +267,7 @@ pub fn blockContext( else => {}, } } - if (items.len < 8) return; + if (range_length < 8) return; // then merge sort the higher levels, which can be 8-15, 16-31, 32-63, 64-127, etc. while (true) { From fd28d7e475f1a22ba778162acff335066ccd97b6 Mon Sep 17 00:00:00 2001 From: p-rosit Date: Mon, 8 Sep 2025 17:27:50 +0200 Subject: [PATCH 06/24] use wrapped context to sort small slices --- lib/std/sort/block.zig | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/lib/std/sort/block.zig b/lib/std/sort/block.zig index 84d0330bf9..4e8f9cc926 100644 --- a/lib/std/sort/block.zig +++ b/lib/std/sort/block.zig @@ -157,7 +157,6 @@ pub fn blockContext( } }; const wrapped_context = Context{ .sub_ctx = context }; - _ = wrapped_context; const lessThan = if (builtin.mode == .Debug) struct { fn lessThan(ctx: @TypeOf(inner_context), lhs: T, rhs: T) bool { const lt = lessThanFn(ctx, lhs, rhs); @@ -172,13 +171,13 @@ pub fn blockContext( if (range_length < 4) { if (range_length == 3) { // hard coded insertion sort - if (lessThan(inner_context, items[1], items[0])) mem.swap(T, &items[0], &items[1]); - if (lessThan(inner_context, items[2], items[1])) { - mem.swap(T, &items[1], &items[2]); - if (lessThan(inner_context, items[1], items[0])) mem.swap(T, &items[0], &items[1]); + if (wrapped_context.lessThan(a + 1, a + 0)) wrapped_context.swap(a + 0, a + 1); + if (wrapped_context.lessThan(a + 2, a + 1)) { + wrapped_context.swap(a + 1, a + 2); + if (wrapped_context.lessThan(a + 1, a + 0)) wrapped_context.swap(a + 0, a + 1); } } else if (range_length == 2) { - if (lessThan(inner_context, items[1], items[0])) mem.swap(T, &items[0], &items[1]); + if (wrapped_context.lessThan(a + 1, a + 0)) wrapped_context.swap(a + 0, a + 1); } return; } From 2be55b3880a384203ce077bdfc8528a89e6614a1 Mon Sep 17 00:00:00 2001 From: p-rosit Date: Mon, 8 Sep 2025 17:32:19 +0200 Subject: [PATCH 07/24] use wrapped context for initial sort --- lib/std/sort/block.zig | 131 ++++++++++++++++++++--------------------- 1 file changed, 64 insertions(+), 67 deletions(-) diff --git a/lib/std/sort/block.zig b/lib/std/sort/block.zig index 4e8f9cc926..99c8f5aed3 100644 --- a/lib/std/sort/block.zig +++ b/lib/std/sort/block.zig @@ -190,78 +190,77 @@ pub fn blockContext( var order = [_]u8{ 0, 1, 2, 3, 4, 5, 6, 7 }; const range = iterator.nextRange(); - const sliced_items = items[range.start..]; switch (range.length()) { 8 => { - swap(T, sliced_items, &order, 0, 1, inner_context, lessThan); - swap(T, sliced_items, &order, 2, 3, inner_context, lessThan); - swap(T, sliced_items, &order, 4, 5, inner_context, lessThan); - swap(T, sliced_items, &order, 6, 7, inner_context, lessThan); - swap(T, sliced_items, &order, 0, 2, inner_context, lessThan); - swap(T, sliced_items, &order, 1, 3, inner_context, lessThan); - swap(T, sliced_items, &order, 4, 6, inner_context, lessThan); - swap(T, sliced_items, &order, 5, 7, inner_context, lessThan); - swap(T, sliced_items, &order, 1, 2, inner_context, lessThan); - swap(T, sliced_items, &order, 5, 6, inner_context, lessThan); - swap(T, sliced_items, &order, 0, 4, inner_context, lessThan); - swap(T, sliced_items, &order, 3, 7, inner_context, lessThan); - swap(T, sliced_items, &order, 1, 5, inner_context, lessThan); - swap(T, sliced_items, &order, 2, 6, inner_context, lessThan); - swap(T, sliced_items, &order, 1, 4, inner_context, lessThan); - swap(T, sliced_items, &order, 3, 6, inner_context, lessThan); - swap(T, sliced_items, &order, 2, 4, inner_context, lessThan); - swap(T, sliced_items, &order, 3, 5, inner_context, lessThan); - swap(T, sliced_items, &order, 3, 4, inner_context, lessThan); + swap(&order, a + range.start, 0, 1, wrapped_context); + swap(&order, a + range.start, 2, 3, wrapped_context); + swap(&order, a + range.start, 4, 5, wrapped_context); + swap(&order, a + range.start, 6, 7, wrapped_context); + swap(&order, a + range.start, 0, 2, wrapped_context); + swap(&order, a + range.start, 1, 3, wrapped_context); + swap(&order, a + range.start, 4, 6, wrapped_context); + swap(&order, a + range.start, 5, 7, wrapped_context); + swap(&order, a + range.start, 1, 2, wrapped_context); + swap(&order, a + range.start, 5, 6, wrapped_context); + swap(&order, a + range.start, 0, 4, wrapped_context); + swap(&order, a + range.start, 3, 7, wrapped_context); + swap(&order, a + range.start, 1, 5, wrapped_context); + swap(&order, a + range.start, 2, 6, wrapped_context); + swap(&order, a + range.start, 1, 4, wrapped_context); + swap(&order, a + range.start, 3, 6, wrapped_context); + swap(&order, a + range.start, 2, 4, wrapped_context); + swap(&order, a + range.start, 3, 5, wrapped_context); + swap(&order, a + range.start, 3, 4, wrapped_context); }, 7 => { - swap(T, sliced_items, &order, 1, 2, inner_context, lessThan); - swap(T, sliced_items, &order, 3, 4, inner_context, lessThan); - swap(T, sliced_items, &order, 5, 6, inner_context, lessThan); - swap(T, sliced_items, &order, 0, 2, inner_context, lessThan); - swap(T, sliced_items, &order, 3, 5, inner_context, lessThan); - swap(T, sliced_items, &order, 4, 6, inner_context, lessThan); - swap(T, sliced_items, &order, 0, 1, inner_context, lessThan); - swap(T, sliced_items, &order, 4, 5, inner_context, lessThan); - swap(T, sliced_items, &order, 2, 6, inner_context, lessThan); - swap(T, sliced_items, &order, 0, 4, inner_context, lessThan); - swap(T, sliced_items, &order, 1, 5, inner_context, lessThan); - swap(T, sliced_items, &order, 0, 3, inner_context, lessThan); - swap(T, sliced_items, &order, 2, 5, inner_context, lessThan); - swap(T, sliced_items, &order, 1, 3, inner_context, lessThan); - swap(T, sliced_items, &order, 2, 4, inner_context, lessThan); - swap(T, sliced_items, &order, 2, 3, inner_context, lessThan); + swap(&order, a + range.start, 1, 2, wrapped_context); + swap(&order, a + range.start, 3, 4, wrapped_context); + swap(&order, a + range.start, 5, 6, wrapped_context); + swap(&order, a + range.start, 0, 2, wrapped_context); + swap(&order, a + range.start, 3, 5, wrapped_context); + swap(&order, a + range.start, 4, 6, wrapped_context); + swap(&order, a + range.start, 0, 1, wrapped_context); + swap(&order, a + range.start, 4, 5, wrapped_context); + swap(&order, a + range.start, 2, 6, wrapped_context); + swap(&order, a + range.start, 0, 4, wrapped_context); + swap(&order, a + range.start, 1, 5, wrapped_context); + swap(&order, a + range.start, 0, 3, wrapped_context); + swap(&order, a + range.start, 2, 5, wrapped_context); + swap(&order, a + range.start, 1, 3, wrapped_context); + swap(&order, a + range.start, 2, 4, wrapped_context); + swap(&order, a + range.start, 2, 3, wrapped_context); }, 6 => { - swap(T, sliced_items, &order, 1, 2, inner_context, lessThan); - swap(T, sliced_items, &order, 4, 5, inner_context, lessThan); - swap(T, sliced_items, &order, 0, 2, inner_context, lessThan); - swap(T, sliced_items, &order, 3, 5, inner_context, lessThan); - swap(T, sliced_items, &order, 0, 1, inner_context, lessThan); - swap(T, sliced_items, &order, 3, 4, inner_context, lessThan); - swap(T, sliced_items, &order, 2, 5, inner_context, lessThan); - swap(T, sliced_items, &order, 0, 3, inner_context, lessThan); - swap(T, sliced_items, &order, 1, 4, inner_context, lessThan); - swap(T, sliced_items, &order, 2, 4, inner_context, lessThan); - swap(T, sliced_items, &order, 1, 3, inner_context, lessThan); - swap(T, sliced_items, &order, 2, 3, inner_context, lessThan); + swap(&order, a + range.start, 1, 2, wrapped_context); + swap(&order, a + range.start, 4, 5, wrapped_context); + swap(&order, a + range.start, 0, 2, wrapped_context); + swap(&order, a + range.start, 3, 5, wrapped_context); + swap(&order, a + range.start, 0, 1, wrapped_context); + swap(&order, a + range.start, 3, 4, wrapped_context); + swap(&order, a + range.start, 2, 5, wrapped_context); + swap(&order, a + range.start, 0, 3, wrapped_context); + swap(&order, a + range.start, 1, 4, wrapped_context); + swap(&order, a + range.start, 2, 4, wrapped_context); + swap(&order, a + range.start, 1, 3, wrapped_context); + swap(&order, a + range.start, 2, 3, wrapped_context); }, 5 => { - swap(T, sliced_items, &order, 0, 1, inner_context, lessThan); - swap(T, sliced_items, &order, 3, 4, inner_context, lessThan); - swap(T, sliced_items, &order, 2, 4, inner_context, lessThan); - swap(T, sliced_items, &order, 2, 3, inner_context, lessThan); - swap(T, sliced_items, &order, 1, 4, inner_context, lessThan); - swap(T, sliced_items, &order, 0, 3, inner_context, lessThan); - swap(T, sliced_items, &order, 0, 2, inner_context, lessThan); - swap(T, sliced_items, &order, 1, 3, inner_context, lessThan); - swap(T, sliced_items, &order, 1, 2, inner_context, lessThan); + swap(&order, a + range.start, 0, 1, wrapped_context); + swap(&order, a + range.start, 3, 4, wrapped_context); + swap(&order, a + range.start, 2, 4, wrapped_context); + swap(&order, a + range.start, 2, 3, wrapped_context); + swap(&order, a + range.start, 1, 4, wrapped_context); + swap(&order, a + range.start, 0, 3, wrapped_context); + swap(&order, a + range.start, 0, 2, wrapped_context); + swap(&order, a + range.start, 1, 3, wrapped_context); + swap(&order, a + range.start, 1, 2, wrapped_context); }, 4 => { - swap(T, sliced_items, &order, 0, 1, inner_context, lessThan); - swap(T, sliced_items, &order, 2, 3, inner_context, lessThan); - swap(T, sliced_items, &order, 0, 2, inner_context, lessThan); - swap(T, sliced_items, &order, 1, 3, inner_context, lessThan); - swap(T, sliced_items, &order, 1, 2, inner_context, lessThan); + swap(&order, a + range.start, 0, 1, wrapped_context); + swap(&order, a + range.start, 2, 3, wrapped_context); + swap(&order, a + range.start, 0, 2, wrapped_context); + swap(&order, a + range.start, 1, 3, wrapped_context); + swap(&order, a + range.start, 1, 2, wrapped_context); }, else => {}, } @@ -908,16 +907,14 @@ fn binaryLast( } fn swap( - comptime T: type, - items: []T, order: *[8]u8, + start_index: usize, x: usize, y: usize, context: anytype, - comptime lessThan: fn (@TypeOf(context), lhs: T, rhs: T) bool, ) void { - if (lessThan(context, items[y], items[x]) or ((order.*)[x] > (order.*)[y] and !lessThan(context, items[x], items[y]))) { - mem.swap(T, &items[x], &items[y]); + if (context.lessThan(start_index + y, start_index + x) or ((order.*)[x] > (order.*)[y] and !context.lessThan(start_index + x, start_index + y))) { + context.swap(start_index + x, start_index + y); mem.swap(u8, &(order.*)[x], &(order.*)[y]); } } From 3fdf3a3d8ca239f54cde17d544802514f579969b Mon Sep 17 00:00:00 2001 From: p-rosit Date: Tue, 9 Sep 2025 07:42:47 +0200 Subject: [PATCH 08/24] use wrapped context for `blockSwap` --- lib/std/sort/block.zig | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/lib/std/sort/block.zig b/lib/std/sort/block.zig index 99c8f5aed3..39d9fda561 100644 --- a/lib/std/sort/block.zig +++ b/lib/std/sort/block.zig @@ -544,7 +544,7 @@ pub fn blockContext( // if the second buffer is available, block swap the contents into that if (buffer2.length() > 0) { - blockSwap(T, items, lastA.start, buffer2.start, lastA.length()); + blockSwap(lastA.start, buffer2.start, lastA.length(), wrapped_context); } if (blockA.length() > 0) { @@ -564,7 +564,7 @@ pub fn blockContext( minA = findA; } } - blockSwap(T, items, blockA.start, minA, block_size); + blockSwap(blockA.start, minA, block_size, wrapped_context); // swap the first item of the previous A block back with its original value, which is stored in buffer1 mem.swap(T, &items[blockA.start], &items[indexA]); @@ -575,19 +575,19 @@ pub fn blockContext( // or failing that we'll use a strictly in-place merge algorithm (MergeInPlace) if (buffer2.length() > 0) { - mergeInternal(T, items, lastA, Range.init(lastA.end, B_split), buffer2, inner_context, lessThan); + mergeInternal(T, items, lastA, Range.init(lastA.end, B_split), buffer2, inner_context, lessThan, wrapped_context); } else { mergeInPlace(T, items, lastA, Range.init(lastA.end, B_split), inner_context, lessThan); } if (buffer2.length() > 0) { // copy the previous A block into the buffer2, since that's where we need it to be when we go to merge it anyway - blockSwap(T, items, blockA.start, buffer2.start, block_size); + blockSwap(blockA.start, buffer2.start, block_size, wrapped_context); // this is equivalent to rotating, but faster // the area normally taken up by the A block is either the contents of buffer2, or data we don't need anymore since we memcopied it // either way, we don't need to retain the order of those items, so instead of rotating we can just block swap B to where it belongs - blockSwap(T, items, B_split, blockA.start + block_size - B_remaining, B_remaining); + blockSwap(B_split, blockA.start + block_size - B_remaining, B_remaining, wrapped_context); } else { // we are unable to use the 'buffer2' trick to speed up the rotation operation since buffer2 doesn't exist, so perform a normal rotation mem.rotate(T, items[B_split .. blockA.start + block_size], blockA.start - B_split); @@ -610,7 +610,7 @@ pub fn blockContext( blockB.end = blockB.start; } else { // roll the leftmost A block to the end by swapping it with the next B block - blockSwap(T, items, blockA.start, blockB.start, block_size); + blockSwap(blockA.start, blockB.start, block_size, wrapped_context); lastB = Range.init(blockA.start, blockA.start + block_size); blockA.start += block_size; @@ -628,7 +628,7 @@ pub fn blockContext( // merge the last A block with the remaining B values if (buffer2.length() > 0) { - mergeInternal(T, items, lastA, Range.init(lastA.end, B.end), buffer2, inner_context, lessThan); + mergeInternal(T, items, lastA, Range.init(lastA.end, B.end), buffer2, inner_context, lessThan, wrapped_context); } else { mergeInPlace(T, items, lastA, Range.init(lastA.end, B.end), inner_context, lessThan); } @@ -736,6 +736,7 @@ fn mergeInternal( buffer: Range, context: anytype, comptime lessThan: fn (@TypeOf(context), lhs: T, rhs: T) bool, + wrapped_context: anytype, ) void { // whenever we find a value to add to the final array, swap it with the value that's already in that spot // when this algorithm is finished, 'buffer' will contain its original contents, but in a different order @@ -760,13 +761,13 @@ fn mergeInternal( } // swap the remainder of A into the final array - blockSwap(T, items, buffer.start + A_count, A.start + insert, A.length() - A_count); + blockSwap(buffer.start + A_count, A.start + insert, A.length() - A_count, wrapped_context); } -fn blockSwap(comptime T: type, items: []T, start1: usize, start2: usize, block_size: usize) void { +fn blockSwap(start1: usize, start2: usize, block_size: usize, context: anytype) void { var index: usize = 0; while (index < block_size) : (index += 1) { - mem.swap(T, &items[start1 + index], &items[start2 + index]); + context.swap(start1 + index, start2 + index); } } From 60dedb9eb86a2b6212e55d46faef5f02bad5605d Mon Sep 17 00:00:00 2001 From: p-rosit Date: Tue, 9 Sep 2025 19:33:02 +0200 Subject: [PATCH 09/24] use wrapped context for `findFirstForward` --- lib/std/sort/block.zig | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/lib/std/sort/block.zig b/lib/std/sort/block.zig index 39d9fda561..823e25080b 100644 --- a/lib/std/sort/block.zig +++ b/lib/std/sort/block.zig @@ -653,7 +653,7 @@ pub fn blockContext( // the values were pulled out to the left, so redistribute them back to the right var buffer = Range.init(pull[pull_index].range.start, pull[pull_index].range.start + pull[pull_index].count); while (buffer.length() > 0) { - index = findFirstForward(T, items, items[buffer.start], Range.init(buffer.end, pull[pull_index].range.end), unique, inner_context, lessThan); + index = findFirstForward(T, items, buffer.start, items[buffer.start], Range.init(buffer.end, pull[pull_index].range.end), unique, inner_context, lessThan, a, wrapped_context); const amount = index - buffer.end; mem.rotate(T, items[buffer.start..index], buffer.length()); buffer.start += (amount + 1); @@ -776,17 +776,19 @@ fn blockSwap(start1: usize, start2: usize, block_size: usize, context: anytype) fn findFirstForward( comptime T: type, items: []T, + value_index: usize, value: T, range: Range, unique: usize, context: anytype, comptime lessThan: fn (@TypeOf(context), lhs: T, rhs: T) bool, + start_index: usize, + wrapped_context: anytype, ) usize { - if (range.length() == 0) return range.start; const skip = @max(range.length() / unique, @as(usize, 1)); var index = range.start + skip; - while (lessThan(context, items[index - 1], value)) : (index += skip) { + while (wrapped_context.lessThan(start_index + index - 1, start_index + value_index)) : (index += skip) { if (index >= range.end - skip) { return binaryFirst(T, items, value, Range.init(index, range.end), context, lessThan); } From 89d9f0b75cd625f69281eb3133e1a8b819039ad4 Mon Sep 17 00:00:00 2001 From: p-rosit Date: Tue, 9 Sep 2025 19:36:33 +0200 Subject: [PATCH 10/24] use wrapped context in `findFirstBackward` --- lib/std/sort/block.zig | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/lib/std/sort/block.zig b/lib/std/sort/block.zig index 823e25080b..c8eb8cb799 100644 --- a/lib/std/sort/block.zig +++ b/lib/std/sort/block.zig @@ -396,7 +396,7 @@ pub fn blockContext( last = index - 1; count += 1; }) { - index = findFirstBackward(T, items, items[last], Range.init(B.start, last), find - count, inner_context, lessThan); + index = findFirstBackward(T, items, last, items[last], Range.init(B.start, last), find - count, inner_context, lessThan, a, wrapped_context); if (index == B.start) break; } index = last; @@ -457,7 +457,7 @@ pub fn blockContext( index = pull[pull_index].from; count = 1; while (count < length) : (count += 1) { - index = findFirstBackward(T, items, items[index - 1], Range.init(pull[pull_index].to, pull[pull_index].from - (count - 1)), length - count, inner_context, lessThan); + index = findFirstBackward(T, items, index - 1, items[index - 1], Range.init(pull[pull_index].to, pull[pull_index].from - (count - 1)), length - count, inner_context, lessThan, a, wrapped_context); const range = Range.init(index + 1, pull[pull_index].from + 1); mem.rotate(T, items[range.start..range.end], range.length() - count); pull[pull_index].from = index + count; @@ -800,17 +800,20 @@ fn findFirstForward( fn findFirstBackward( comptime T: type, items: []T, + value_index: usize, value: T, range: Range, unique: usize, context: anytype, comptime lessThan: fn (@TypeOf(context), lhs: T, rhs: T) bool, + start_index: usize, + wrapped_context: anytype, ) usize { if (range.length() == 0) return range.start; const skip = @max(range.length() / unique, @as(usize, 1)); var index = range.end - skip; - while (index > range.start and !lessThan(context, items[index - 1], value)) : (index -= skip) { + while (index > range.start and !wrapped_context.lessThan(start_index + index - 1, start_index + value_index)) : (index -= skip) { if (index < range.start + skip) { return binaryFirst(T, items, value, Range.init(range.start, index), context, lessThan); } From 8e9a599ba55f73412ef20b3fe46b0e58732cf015 Mon Sep 17 00:00:00 2001 From: p-rosit Date: Tue, 9 Sep 2025 19:38:12 +0200 Subject: [PATCH 11/24] use wrapped context in `findLastForward` --- lib/std/sort/block.zig | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/lib/std/sort/block.zig b/lib/std/sort/block.zig index c8eb8cb799..57fb538e9a 100644 --- a/lib/std/sort/block.zig +++ b/lib/std/sort/block.zig @@ -342,7 +342,7 @@ pub fn blockContext( last = index; count += 1; }) { - index = findLastForward(T, items, items[last], Range.init(last + 1, A.end), find - count, inner_context, lessThan); + index = findLastForward(T, items, last, items[last], Range.init(last + 1, A.end), find - count, inner_context, lessThan, a, wrapped_context); if (index == A.end) break; } index = last; @@ -467,7 +467,7 @@ pub fn blockContext( index = pull[pull_index].from + 1; count = 1; while (count < length) : (count += 1) { - index = findLastForward(T, items, items[index], Range.init(index, pull[pull_index].to), length - count, inner_context, lessThan); + index = findLastForward(T, items, index, items[index], Range.init(index, pull[pull_index].to), length - count, inner_context, lessThan, a, wrapped_context); const range = Range.init(pull[pull_index].from, index - 1); mem.rotate(T, items[range.start..range.end], count); pull[pull_index].from = index - 1 - count; @@ -825,17 +825,20 @@ fn findFirstBackward( fn findLastForward( comptime T: type, items: []T, + value_index: usize, value: T, range: Range, unique: usize, context: anytype, comptime lessThan: fn (@TypeOf(context), lhs: T, rhs: T) bool, + start_index: usize, + wrapped_context: anytype, ) usize { if (range.length() == 0) return range.start; const skip = @max(range.length() / unique, @as(usize, 1)); var index = range.start + skip; - while (!lessThan(context, value, items[index - 1])) : (index += skip) { + while (!wrapped_context.lessThan(start_index + value_index, start_index + index - 1)) : (index += skip) { if (index >= range.end - skip) { return binaryLast(T, items, value, Range.init(index, range.end), context, lessThan); } From 74aceb6161c9f40bb5e6d81bf2f7270b7d56ff1a Mon Sep 17 00:00:00 2001 From: p-rosit Date: Tue, 9 Sep 2025 19:39:33 +0200 Subject: [PATCH 12/24] use wrapped context in `findLastBackward` --- lib/std/sort/block.zig | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/lib/std/sort/block.zig b/lib/std/sort/block.zig index 57fb538e9a..3c6ed5adfe 100644 --- a/lib/std/sort/block.zig +++ b/lib/std/sort/block.zig @@ -664,7 +664,7 @@ pub fn blockContext( // the values were pulled out to the right, so redistribute them back to the left var buffer = Range.init(pull[pull_index].range.end - pull[pull_index].count, pull[pull_index].range.end); while (buffer.length() > 0) { - index = findLastBackward(T, items, items[buffer.end - 1], Range.init(pull[pull_index].range.start, buffer.start), unique, inner_context, lessThan); + index = findLastBackward(T, items, buffer.end - 1, items[buffer.end - 1], Range.init(pull[pull_index].range.start, buffer.start), unique, inner_context, lessThan, a, wrapped_context); const amount = buffer.start - index; mem.rotate(T, items[index..buffer.end], amount); buffer.start -= amount; @@ -850,17 +850,20 @@ fn findLastForward( fn findLastBackward( comptime T: type, items: []T, + value_index: usize, value: T, range: Range, unique: usize, context: anytype, comptime lessThan: fn (@TypeOf(context), lhs: T, rhs: T) bool, + start_index: usize, + wrapped_context: anytype, ) usize { if (range.length() == 0) return range.start; const skip = @max(range.length() / unique, @as(usize, 1)); var index = range.end - skip; - while (index > range.start and lessThan(context, value, items[index - 1])) : (index -= skip) { + while (index > range.start and wrapped_context.lessThan(start_index + value_index, start_index + index - 1)) : (index -= skip) { if (index < range.start + skip) { return binaryLast(T, items, value, Range.init(range.start, index), context, lessThan); } From 2c06f3713847687a2cd63bdae01a3243a6c24b76 Mon Sep 17 00:00:00 2001 From: p-rosit Date: Tue, 9 Sep 2025 19:52:01 +0200 Subject: [PATCH 13/24] use wrapped context in `mergeInPlace` --- lib/std/sort/block.zig | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/lib/std/sort/block.zig b/lib/std/sort/block.zig index 3c6ed5adfe..ec646061fb 100644 --- a/lib/std/sort/block.zig +++ b/lib/std/sort/block.zig @@ -577,7 +577,7 @@ pub fn blockContext( if (buffer2.length() > 0) { mergeInternal(T, items, lastA, Range.init(lastA.end, B_split), buffer2, inner_context, lessThan, wrapped_context); } else { - mergeInPlace(T, items, lastA, Range.init(lastA.end, B_split), inner_context, lessThan); + mergeInPlace(T, items, lastA, Range.init(lastA.end, B_split), inner_context, lessThan, a, wrapped_context); } if (buffer2.length() > 0) { @@ -630,7 +630,7 @@ pub fn blockContext( if (buffer2.length() > 0) { mergeInternal(T, items, lastA, Range.init(lastA.end, B.end), buffer2, inner_context, lessThan, wrapped_context); } else { - mergeInPlace(T, items, lastA, Range.init(lastA.end, B.end), inner_context, lessThan); + mergeInPlace(T, items, lastA, Range.init(lastA.end, B.end), inner_context, lessThan, a, wrapped_context); } } } @@ -686,6 +686,8 @@ fn mergeInPlace( B_arg: Range, context: anytype, comptime lessThan: fn (@TypeOf(context), lhs: T, rhs: T) bool, + start_index: usize, + wrapped_context: anytype, ) void { if (A_arg.length() == 0 or B_arg.length() == 0) return; @@ -716,7 +718,7 @@ fn mergeInPlace( // rotate A into place const amount = mid - A.end; - mem.rotate(T, items[A.start..mid], A.length()); + wrapped_context.rotate(Range.init(A.start, mid), A.length(), start_index); if (B.end == mid) break; // calculate the new A and B ranges From 7a376a4371306594926367d85aed1c0268d934ca Mon Sep 17 00:00:00 2001 From: p-rosit Date: Fri, 12 Sep 2025 07:57:17 +0200 Subject: [PATCH 14/24] introduce `context.rotate` --- lib/std/sort/block.zig | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/lib/std/sort/block.zig b/lib/std/sort/block.zig index ec646061fb..412864c7d7 100644 --- a/lib/std/sort/block.zig +++ b/lib/std/sort/block.zig @@ -155,6 +155,22 @@ pub fn blockContext( pub fn swap(ctx: @This(), i: usize, j: usize) void { return ctx.sub_ctx.swap(i, j); } + + pub fn rotate(ctx: @This(), A: Range, amount: usize, start_index: usize) void { + ctx.naiveReverse(Range.init(A.start, A.start + amount), start_index); + ctx.naiveReverse(Range.init(A.start + amount, A.end), start_index); + ctx.naiveReverse(A, start_index); + } + + fn naiveReverse(ctx: @This(), A: Range, start_index: usize) void { + var i = start_index + A.start; + var j = start_index + A.end - 1; + while (j > i) { + ctx.sub_ctx.swap(i, j); + i += 1; + j -= 1; + } + } }; const wrapped_context = Context{ .sub_ctx = context }; const lessThan = if (builtin.mode == .Debug) struct { From 534f4917314344a74d4631d73825a404663cb052 Mon Sep 17 00:00:00 2001 From: p-rosit Date: Tue, 9 Sep 2025 19:44:26 +0200 Subject: [PATCH 15/24] use wrapped context in `mergeInternal` --- lib/std/sort/block.zig | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/lib/std/sort/block.zig b/lib/std/sort/block.zig index 412864c7d7..b30357c1b9 100644 --- a/lib/std/sort/block.zig +++ b/lib/std/sort/block.zig @@ -591,7 +591,7 @@ pub fn blockContext( // or failing that we'll use a strictly in-place merge algorithm (MergeInPlace) if (buffer2.length() > 0) { - mergeInternal(T, items, lastA, Range.init(lastA.end, B_split), buffer2, inner_context, lessThan, wrapped_context); + mergeInternal(lastA, Range.init(lastA.end, B_split), buffer2, a, wrapped_context); } else { mergeInPlace(T, items, lastA, Range.init(lastA.end, B_split), inner_context, lessThan, a, wrapped_context); } @@ -644,7 +644,7 @@ pub fn blockContext( // merge the last A block with the remaining B values if (buffer2.length() > 0) { - mergeInternal(T, items, lastA, Range.init(lastA.end, B.end), buffer2, inner_context, lessThan, wrapped_context); + mergeInternal(lastA, Range.init(lastA.end, B.end), buffer2, a, wrapped_context); } else { mergeInPlace(T, items, lastA, Range.init(lastA.end, B.end), inner_context, lessThan, a, wrapped_context); } @@ -747,14 +747,11 @@ fn mergeInPlace( // merge operation using an internal buffer fn mergeInternal( - comptime T: type, - items: []T, A: Range, B: Range, buffer: Range, + start_index: usize, context: anytype, - comptime lessThan: fn (@TypeOf(context), lhs: T, rhs: T) bool, - wrapped_context: anytype, ) void { // whenever we find a value to add to the final array, swap it with the value that's already in that spot // when this algorithm is finished, 'buffer' will contain its original contents, but in a different order @@ -764,13 +761,13 @@ fn mergeInternal( if (B.length() > 0 and A.length() > 0) { while (true) { - if (!lessThan(context, items[B.start + B_count], items[buffer.start + A_count])) { - mem.swap(T, &items[A.start + insert], &items[buffer.start + A_count]); + if (!context.lessThan(start_index + B.start + B_count, start_index + buffer.start + A_count)) { + context.swap(start_index + A.start + insert, start_index + buffer.start + A_count); A_count += 1; insert += 1; if (A_count >= A.length()) break; } else { - mem.swap(T, &items[A.start + insert], &items[B.start + B_count]); + context.swap(start_index + A.start + insert, start_index + B.start + B_count); B_count += 1; insert += 1; if (B_count >= B.length()) break; @@ -779,7 +776,7 @@ fn mergeInternal( } // swap the remainder of A into the final array - blockSwap(buffer.start + A_count, A.start + insert, A.length() - A_count, wrapped_context); + blockSwap(buffer.start + A_count, A.start + insert, A.length() - A_count, context); } fn blockSwap(start1: usize, start2: usize, block_size: usize, context: anytype) void { From d7c812d524b50dc22223fecd1267a04500d1d4e4 Mon Sep 17 00:00:00 2001 From: p-rosit Date: Tue, 9 Sep 2025 20:06:24 +0200 Subject: [PATCH 16/24] use wrapped context in `binaryFirst` --- lib/std/sort/block.zig | 46 ++++++++++++++++-------------------------- 1 file changed, 17 insertions(+), 29 deletions(-) diff --git a/lib/std/sort/block.zig b/lib/std/sort/block.zig index b30357c1b9..d1d019e91b 100644 --- a/lib/std/sort/block.zig +++ b/lib/std/sort/block.zig @@ -412,7 +412,7 @@ pub fn blockContext( last = index - 1; count += 1; }) { - index = findFirstBackward(T, items, last, items[last], Range.init(B.start, last), find - count, inner_context, lessThan, a, wrapped_context); + index = findFirstBackward(last, Range.init(B.start, last), find - count, a, wrapped_context); if (index == B.start) break; } index = last; @@ -473,7 +473,7 @@ pub fn blockContext( index = pull[pull_index].from; count = 1; while (count < length) : (count += 1) { - index = findFirstBackward(T, items, index - 1, items[index - 1], Range.init(pull[pull_index].to, pull[pull_index].from - (count - 1)), length - count, inner_context, lessThan, a, wrapped_context); + index = findFirstBackward(index - 1, Range.init(pull[pull_index].to, pull[pull_index].from - (count - 1)), length - count, a, wrapped_context); const range = Range.init(index + 1, pull[pull_index].from + 1); mem.rotate(T, items[range.start..range.end], range.length() - count); pull[pull_index].from = index + count; @@ -569,7 +569,7 @@ pub fn blockContext( // then drop that minimum A block behind. or if there are no B blocks left then keep dropping the remaining A blocks. if ((lastB.length() > 0 and !lessThan(inner_context, items[lastB.end - 1], items[indexA])) or blockB.length() == 0) { // figure out where to split the previous B block, and rotate it at the split - const B_split = binaryFirst(T, items, items[indexA], lastB, inner_context, lessThan); + const B_split = binaryFirst(indexA, lastB, a, wrapped_context); const B_remaining = lastB.end - B_split; // swap the minimum A block to the beginning of the rolling A blocks @@ -669,7 +669,7 @@ pub fn blockContext( // the values were pulled out to the left, so redistribute them back to the right var buffer = Range.init(pull[pull_index].range.start, pull[pull_index].range.start + pull[pull_index].count); while (buffer.length() > 0) { - index = findFirstForward(T, items, buffer.start, items[buffer.start], Range.init(buffer.end, pull[pull_index].range.end), unique, inner_context, lessThan, a, wrapped_context); + index = findFirstForward(buffer.start, Range.init(buffer.end, pull[pull_index].range.end), unique, a, wrapped_context); const amount = index - buffer.end; mem.rotate(T, items[buffer.start..index], buffer.length()); buffer.start += (amount + 1); @@ -730,7 +730,7 @@ fn mergeInPlace( while (true) { // find the first place in B where the first item in A needs to be inserted - const mid = binaryFirst(T, items, items[A.start], B, context, lessThan); + const mid = binaryFirst(A.start, B, start_index, wrapped_context); // rotate A into place const amount = mid - A.end; @@ -789,52 +789,42 @@ fn blockSwap(start1: usize, start2: usize, block_size: usize, context: anytype) // combine a linear search with a binary search to reduce the number of comparisons in situations // where have some idea as to how many unique values there are and where the next value might be fn findFirstForward( - comptime T: type, - items: []T, value_index: usize, - value: T, range: Range, unique: usize, - context: anytype, - comptime lessThan: fn (@TypeOf(context), lhs: T, rhs: T) bool, start_index: usize, - wrapped_context: anytype, + context: anytype, ) usize { const skip = @max(range.length() / unique, @as(usize, 1)); var index = range.start + skip; - while (wrapped_context.lessThan(start_index + index - 1, start_index + value_index)) : (index += skip) { + while (context.lessThan(start_index + index - 1, start_index + value_index)) : (index += skip) { if (index >= range.end - skip) { - return binaryFirst(T, items, value, Range.init(index, range.end), context, lessThan); + return binaryFirst(value_index, Range.init(index, range.end), start_index, context); } } - return binaryFirst(T, items, value, Range.init(index - skip, index), context, lessThan); + return binaryFirst(value_index, Range.init(index - skip, index), start_index, context); } fn findFirstBackward( - comptime T: type, - items: []T, value_index: usize, - value: T, range: Range, unique: usize, - context: anytype, - comptime lessThan: fn (@TypeOf(context), lhs: T, rhs: T) bool, start_index: usize, - wrapped_context: anytype, + context: anytype, ) usize { if (range.length() == 0) return range.start; const skip = @max(range.length() / unique, @as(usize, 1)); var index = range.end - skip; - while (index > range.start and !wrapped_context.lessThan(start_index + index - 1, start_index + value_index)) : (index -= skip) { + while (index > range.start and !context.lessThan(start_index + index - 1, start_index + value_index)) : (index -= skip) { if (index < range.start + skip) { - return binaryFirst(T, items, value, Range.init(range.start, index), context, lessThan); + return binaryFirst(value_index, Range.init(range.start, index), start_index, context); } } - return binaryFirst(T, items, value, Range.init(index, index + skip), context, lessThan); + return binaryFirst(value_index, Range.init(index, index + skip), start_index, context); } fn findLastForward( @@ -888,12 +878,10 @@ fn findLastBackward( } fn binaryFirst( - comptime T: type, - items: []T, - value: T, + value_index: usize, range: Range, + start_index: usize, context: anytype, - comptime lessThan: fn (@TypeOf(context), lhs: T, rhs: T) bool, ) usize { var curr = range.start; var size = range.length(); @@ -902,8 +890,8 @@ fn binaryFirst( const offset = size % 2; size /= 2; - const mid_item = items[curr + size]; - if (lessThan(context, mid_item, value)) { + const mid_index = curr + size; + if (context.lessThan(start_index + mid_index, start_index + value_index)) { curr += size + offset; } } From 26a9ca67875bfecd66ec3b3981dca737e9b111af Mon Sep 17 00:00:00 2001 From: p-rosit Date: Tue, 9 Sep 2025 20:16:09 +0200 Subject: [PATCH 17/24] use wrapped context in `binaryLast` --- lib/std/sort/block.zig | 58 +++++++++++++++--------------------------- 1 file changed, 21 insertions(+), 37 deletions(-) diff --git a/lib/std/sort/block.zig b/lib/std/sort/block.zig index d1d019e91b..f9a78b25f6 100644 --- a/lib/std/sort/block.zig +++ b/lib/std/sort/block.zig @@ -358,7 +358,7 @@ pub fn blockContext( last = index; count += 1; }) { - index = findLastForward(T, items, last, items[last], Range.init(last + 1, A.end), find - count, inner_context, lessThan, a, wrapped_context); + index = findLastForward(last, Range.init(last + 1, A.end), find - count, a, wrapped_context); if (index == A.end) break; } index = last; @@ -483,7 +483,7 @@ pub fn blockContext( index = pull[pull_index].from + 1; count = 1; while (count < length) : (count += 1) { - index = findLastForward(T, items, index, items[index], Range.init(index, pull[pull_index].to), length - count, inner_context, lessThan, a, wrapped_context); + index = findLastForward(index, Range.init(index, pull[pull_index].to), length - count, a, wrapped_context); const range = Range.init(pull[pull_index].from, index - 1); mem.rotate(T, items[range.start..range.end], count); pull[pull_index].from = index - 1 - count; @@ -593,7 +593,7 @@ pub fn blockContext( if (buffer2.length() > 0) { mergeInternal(lastA, Range.init(lastA.end, B_split), buffer2, a, wrapped_context); } else { - mergeInPlace(T, items, lastA, Range.init(lastA.end, B_split), inner_context, lessThan, a, wrapped_context); + mergeInPlace(lastA, Range.init(lastA.end, B_split), a, wrapped_context); } if (buffer2.length() > 0) { @@ -646,7 +646,7 @@ pub fn blockContext( if (buffer2.length() > 0) { mergeInternal(lastA, Range.init(lastA.end, B.end), buffer2, a, wrapped_context); } else { - mergeInPlace(T, items, lastA, Range.init(lastA.end, B.end), inner_context, lessThan, a, wrapped_context); + mergeInPlace(lastA, Range.init(lastA.end, B.end), a, wrapped_context); } } } @@ -680,7 +680,7 @@ pub fn blockContext( // the values were pulled out to the right, so redistribute them back to the left var buffer = Range.init(pull[pull_index].range.end - pull[pull_index].count, pull[pull_index].range.end); while (buffer.length() > 0) { - index = findLastBackward(T, items, buffer.end - 1, items[buffer.end - 1], Range.init(pull[pull_index].range.start, buffer.start), unique, inner_context, lessThan, a, wrapped_context); + index = findLastBackward(buffer.end - 1, Range.init(pull[pull_index].range.start, buffer.start), unique, a, wrapped_context); const amount = buffer.start - index; mem.rotate(T, items[index..buffer.end], amount); buffer.start -= amount; @@ -696,14 +696,10 @@ pub fn blockContext( } // merge operation without a buffer fn mergeInPlace( - comptime T: type, - items: []T, A_arg: Range, B_arg: Range, - context: anytype, - comptime lessThan: fn (@TypeOf(context), lhs: T, rhs: T) bool, start_index: usize, - wrapped_context: anytype, + context: anytype, ) void { if (A_arg.length() == 0 or B_arg.length() == 0) return; @@ -730,17 +726,17 @@ fn mergeInPlace( while (true) { // find the first place in B where the first item in A needs to be inserted - const mid = binaryFirst(A.start, B, start_index, wrapped_context); + const mid = binaryFirst(A.start, B, start_index, context); // rotate A into place const amount = mid - A.end; - wrapped_context.rotate(Range.init(A.start, mid), A.length(), start_index); + context.rotate(Range.init(A.start, mid), A.length(), start_index); if (B.end == mid) break; // calculate the new A and B ranges B.start = mid; A = Range.init(A.start + amount, B.start); - A.start = binaryLast(T, items, items[A.start], A, context, lessThan); + A.start = binaryLast(A.start, A, start_index, context); if (A.length() == 0) break; } } @@ -828,53 +824,43 @@ fn findFirstBackward( } fn findLastForward( - comptime T: type, - items: []T, value_index: usize, - value: T, range: Range, unique: usize, - context: anytype, - comptime lessThan: fn (@TypeOf(context), lhs: T, rhs: T) bool, start_index: usize, - wrapped_context: anytype, + context: anytype, ) usize { if (range.length() == 0) return range.start; const skip = @max(range.length() / unique, @as(usize, 1)); var index = range.start + skip; - while (!wrapped_context.lessThan(start_index + value_index, start_index + index - 1)) : (index += skip) { + while (!context.lessThan(start_index + value_index, start_index + index - 1)) : (index += skip) { if (index >= range.end - skip) { - return binaryLast(T, items, value, Range.init(index, range.end), context, lessThan); + return binaryLast(value_index, Range.init(index, range.end), start_index, context); } } - return binaryLast(T, items, value, Range.init(index - skip, index), context, lessThan); + return binaryLast(value_index, Range.init(index - skip, index), start_index, context); } fn findLastBackward( - comptime T: type, - items: []T, value_index: usize, - value: T, range: Range, unique: usize, - context: anytype, - comptime lessThan: fn (@TypeOf(context), lhs: T, rhs: T) bool, start_index: usize, - wrapped_context: anytype, + context: anytype, ) usize { if (range.length() == 0) return range.start; const skip = @max(range.length() / unique, @as(usize, 1)); var index = range.end - skip; - while (index > range.start and wrapped_context.lessThan(start_index + value_index, start_index + index - 1)) : (index -= skip) { + while (index > range.start and context.lessThan(start_index + value_index, start_index + index - 1)) : (index -= skip) { if (index < range.start + skip) { - return binaryLast(T, items, value, Range.init(range.start, index), context, lessThan); + return binaryLast(value_index, Range.init(range.start, index), start_index, context); } } - return binaryLast(T, items, value, Range.init(index, index + skip), context, lessThan); + return binaryLast(value_index, Range.init(index, index + skip), start_index, context); } fn binaryFirst( @@ -899,12 +885,10 @@ fn binaryFirst( } fn binaryLast( - comptime T: type, - items: []T, - value: T, + value_index: usize, range: Range, + start_index: usize, context: anytype, - comptime lessThan: fn (@TypeOf(context), lhs: T, rhs: T) bool, ) usize { var curr = range.start; var size = range.length(); @@ -913,8 +897,8 @@ fn binaryLast( const offset = size % 2; size /= 2; - const mid_item = items[curr + size]; - if (!lessThan(context, value, mid_item)) { + const mid_index = curr + size; + if (!context.lessThan(start_index + value_index, start_index + mid_index)) { curr += size + offset; } } From f742788e32372598ef8dafe921ddb190a71d49a3 Mon Sep 17 00:00:00 2001 From: p-rosit Date: Tue, 9 Sep 2025 19:56:54 +0200 Subject: [PATCH 18/24] use wrapped context in `blockContext` --- lib/std/sort/block.zig | 42 +++++++++++++++--------------------------- 1 file changed, 15 insertions(+), 27 deletions(-) diff --git a/lib/std/sort/block.zig b/lib/std/sort/block.zig index f9a78b25f6..f1e327db56 100644 --- a/lib/std/sort/block.zig +++ b/lib/std/sort/block.zig @@ -115,7 +115,7 @@ pub fn block( return mem.swap(T, &ctx.items[a], &ctx.items[b]); } }; - return blockContext(T, items, context, lessThanFn, 0, items.len, Context{ .items = items, .sub_ctx = context }); + return blockContext(0, items.len, Context{ .items = items, .sub_ctx = context }); } /// Stable in-place sort. O(n) best case, O(n*log(n)) worst case and average case. @@ -127,10 +127,6 @@ pub fn block( /// NOTE: The algorithm only works when the comparison is less-than or greater-than. /// (See https://github.com/ziglang/zig/issues/8289) pub fn blockContext( - comptime T: type, - items: []T, - inner_context: anytype, - comptime lessThanFn: fn (@TypeOf(inner_context), lhs: T, rhs: T) bool, a: usize, b: usize, context: anytype, @@ -173,14 +169,6 @@ pub fn blockContext( } }; const wrapped_context = Context{ .sub_ctx = context }; - const lessThan = if (builtin.mode == .Debug) struct { - fn lessThan(ctx: @TypeOf(inner_context), lhs: T, rhs: T) bool { - const lt = lessThanFn(ctx, lhs, rhs); - const gt = lessThanFn(ctx, rhs, lhs); - std.debug.assert(!(lt and gt)); - return lt; - } - }.lessThan else lessThanFn; const range_length = b - a; @@ -475,7 +463,7 @@ pub fn blockContext( while (count < length) : (count += 1) { index = findFirstBackward(index - 1, Range.init(pull[pull_index].to, pull[pull_index].from - (count - 1)), length - count, a, wrapped_context); const range = Range.init(index + 1, pull[pull_index].from + 1); - mem.rotate(T, items[range.start..range.end], range.length() - count); + wrapped_context.rotate(range, range.length() - count, a); pull[pull_index].from = index + count; } } else if (pull[pull_index].to > pull[pull_index].from) { @@ -485,7 +473,7 @@ pub fn blockContext( while (count < length) : (count += 1) { index = findLastForward(index, Range.init(index, pull[pull_index].to), length - count, a, wrapped_context); const range = Range.init(pull[pull_index].from, index - 1); - mem.rotate(T, items[range.start..range.end], count); + wrapped_context.rotate(range, count, a); pull[pull_index].from = index - 1 - count; } } @@ -529,10 +517,10 @@ pub fn blockContext( } } - if (lessThan(inner_context, items[B.end - 1], items[A.start])) { + if (wrapped_context.lessThan(a + B.end - 1, a + A.start)) { // the two ranges are in reverse order, so a simple rotation should fix it - mem.rotate(T, items[A.start..B.end], A.length()); - } else if (lessThan(inner_context, items[A.end], items[A.end - 1])) { + wrapped_context.rotate(Range.init(A.start, B.end), A.length(), a); + } else if (wrapped_context.lessThan(a + A.end, a + A.end - 1)) { // these two ranges weren't already in order, so we'll need to merge them! var findA: usize = undefined; @@ -547,7 +535,7 @@ pub fn blockContext( indexA += 1; index += block_size; }) { - mem.swap(T, &items[indexA], &items[index]); + context.swap(a + indexA, a + index); } // start rolling the A blocks through the B blocks! @@ -567,7 +555,7 @@ pub fn blockContext( while (true) { // if there's a previous B block and the first value of the minimum A block is <= the last value of the previous B block, // then drop that minimum A block behind. or if there are no B blocks left then keep dropping the remaining A blocks. - if ((lastB.length() > 0 and !lessThan(inner_context, items[lastB.end - 1], items[indexA])) or blockB.length() == 0) { + if ((lastB.length() > 0 and !wrapped_context.lessThan(a + lastB.end - 1, a + indexA)) or blockB.length() == 0) { // figure out where to split the previous B block, and rotate it at the split const B_split = binaryFirst(indexA, lastB, a, wrapped_context); const B_remaining = lastB.end - B_split; @@ -576,14 +564,14 @@ pub fn blockContext( var minA = blockA.start; findA = minA + block_size; while (findA < blockA.end) : (findA += block_size) { - if (lessThan(inner_context, items[findA], items[minA])) { + if (wrapped_context.lessThan(a + findA, a + minA)) { minA = findA; } } blockSwap(blockA.start, minA, block_size, wrapped_context); // swap the first item of the previous A block back with its original value, which is stored in buffer1 - mem.swap(T, &items[blockA.start], &items[indexA]); + context.swap(a + blockA.start, a + indexA); indexA += 1; // locally merge the previous A block with the B values that follow it @@ -606,7 +594,7 @@ pub fn blockContext( blockSwap(B_split, blockA.start + block_size - B_remaining, B_remaining, wrapped_context); } else { // we are unable to use the 'buffer2' trick to speed up the rotation operation since buffer2 doesn't exist, so perform a normal rotation - mem.rotate(T, items[B_split .. blockA.start + block_size], blockA.start - B_split); + wrapped_context.rotate(Range.init(B_split, blockA.start + block_size), blockA.start - B_split, a); } // update the range for the remaining A blocks, and the range remaining from the B block after it was split @@ -618,7 +606,7 @@ pub fn blockContext( if (blockA.length() == 0) break; } else if (blockB.length() < block_size) { // move the last B block, which is unevenly sized, to before the remaining A blocks, by using a rotation - mem.rotate(T, items[blockA.start..blockB.end], blockB.start - blockA.start); + wrapped_context.rotate(Range.init(blockA.start, blockB.end), blockB.start - blockA.start, a); lastB = Range.init(blockA.start, blockA.start + blockB.length()); blockA.start += blockB.length(); @@ -660,7 +648,7 @@ pub fn blockContext( // it was consistently slightly slower than a simple insertion sort, // even for tens of millions of items. this may be because insertion // sort is quite fast when the data is already somewhat sorted, like it is here - sort.insertion(T, items[buffer2.start..buffer2.end], inner_context, lessThan); + sort.insertionContext(a + buffer2.start, a + buffer2.end, wrapped_context); pull_index = 0; while (pull_index < 2) : (pull_index += 1) { @@ -671,7 +659,7 @@ pub fn blockContext( while (buffer.length() > 0) { index = findFirstForward(buffer.start, Range.init(buffer.end, pull[pull_index].range.end), unique, a, wrapped_context); const amount = index - buffer.end; - mem.rotate(T, items[buffer.start..index], buffer.length()); + wrapped_context.rotate(Range.init(buffer.start, index), buffer.length(), a); buffer.start += (amount + 1); buffer.end += amount; unique -= 2; @@ -682,7 +670,7 @@ pub fn blockContext( while (buffer.length() > 0) { index = findLastBackward(buffer.end - 1, Range.init(pull[pull_index].range.start, buffer.start), unique, a, wrapped_context); const amount = buffer.start - index; - mem.rotate(T, items[index..buffer.end], amount); + wrapped_context.rotate(Range.init(index, buffer.end), amount, a); buffer.start -= amount; buffer.end -= (amount + 1); unique -= 2; From a6b6e4dd034f1d73a0716ee8404967c547003dd3 Mon Sep 17 00:00:00 2001 From: p-rosit Date: Wed, 10 Sep 2025 17:28:46 +0200 Subject: [PATCH 19/24] Iterator uses global indices --- lib/std/sort/block.zig | 184 +++++++++++++++++++++-------------------- 1 file changed, 93 insertions(+), 91 deletions(-) diff --git a/lib/std/sort/block.zig b/lib/std/sort/block.zig index f1e327db56..618a77781e 100644 --- a/lib/std/sort/block.zig +++ b/lib/std/sort/block.zig @@ -21,6 +21,7 @@ const Range = struct { }; const Iterator = struct { + start_index: usize, size: usize, power_of_two: usize, numerator: usize, @@ -29,12 +30,13 @@ const Iterator = struct { decimal_step: usize, numerator_step: usize, - fn init(size2: usize, min_level: usize) Iterator { + fn init(start_index: usize, size2: usize, min_level: usize) Iterator { const power_of_two = math.floorPowerOfTwo(usize, size2); const denominator = power_of_two / min_level; return Iterator{ .numerator = 0, - .decimal = 0, + .decimal = start_index, + .start_index = start_index, .size = size2, .power_of_two = power_of_two, .denominator = denominator, @@ -45,7 +47,7 @@ const Iterator = struct { fn begin(self: *Iterator) void { self.numerator = 0; - self.decimal = 0; + self.decimal = self.start_index; } fn nextRange(self: *Iterator) Range { @@ -65,7 +67,7 @@ const Iterator = struct { } fn finished(self: *Iterator) bool { - return self.decimal >= self.size; + return self.decimal >= self.start_index + self.size; } fn nextLevel(self: *Iterator) bool { @@ -189,82 +191,82 @@ pub fn blockContext( // sort groups of 4-8 items at a time using an unstable sorting network, // but keep track of the original item orders to force it to be stable // http://pages.ripco.net/~jgamble/nw.html - var iterator = Iterator.init(range_length, 4); + var iterator = Iterator.init(a, range_length, 4); while (!iterator.finished()) { var order = [_]u8{ 0, 1, 2, 3, 4, 5, 6, 7 }; const range = iterator.nextRange(); switch (range.length()) { 8 => { - swap(&order, a + range.start, 0, 1, wrapped_context); - swap(&order, a + range.start, 2, 3, wrapped_context); - swap(&order, a + range.start, 4, 5, wrapped_context); - swap(&order, a + range.start, 6, 7, wrapped_context); - swap(&order, a + range.start, 0, 2, wrapped_context); - swap(&order, a + range.start, 1, 3, wrapped_context); - swap(&order, a + range.start, 4, 6, wrapped_context); - swap(&order, a + range.start, 5, 7, wrapped_context); - swap(&order, a + range.start, 1, 2, wrapped_context); - swap(&order, a + range.start, 5, 6, wrapped_context); - swap(&order, a + range.start, 0, 4, wrapped_context); - swap(&order, a + range.start, 3, 7, wrapped_context); - swap(&order, a + range.start, 1, 5, wrapped_context); - swap(&order, a + range.start, 2, 6, wrapped_context); - swap(&order, a + range.start, 1, 4, wrapped_context); - swap(&order, a + range.start, 3, 6, wrapped_context); - swap(&order, a + range.start, 2, 4, wrapped_context); - swap(&order, a + range.start, 3, 5, wrapped_context); - swap(&order, a + range.start, 3, 4, wrapped_context); + swap(&order, range.start, 0, 1, wrapped_context); + swap(&order, range.start, 2, 3, wrapped_context); + swap(&order, range.start, 4, 5, wrapped_context); + swap(&order, range.start, 6, 7, wrapped_context); + swap(&order, range.start, 0, 2, wrapped_context); + swap(&order, range.start, 1, 3, wrapped_context); + swap(&order, range.start, 4, 6, wrapped_context); + swap(&order, range.start, 5, 7, wrapped_context); + swap(&order, range.start, 1, 2, wrapped_context); + swap(&order, range.start, 5, 6, wrapped_context); + swap(&order, range.start, 0, 4, wrapped_context); + swap(&order, range.start, 3, 7, wrapped_context); + swap(&order, range.start, 1, 5, wrapped_context); + swap(&order, range.start, 2, 6, wrapped_context); + swap(&order, range.start, 1, 4, wrapped_context); + swap(&order, range.start, 3, 6, wrapped_context); + swap(&order, range.start, 2, 4, wrapped_context); + swap(&order, range.start, 3, 5, wrapped_context); + swap(&order, range.start, 3, 4, wrapped_context); }, 7 => { - swap(&order, a + range.start, 1, 2, wrapped_context); - swap(&order, a + range.start, 3, 4, wrapped_context); - swap(&order, a + range.start, 5, 6, wrapped_context); - swap(&order, a + range.start, 0, 2, wrapped_context); - swap(&order, a + range.start, 3, 5, wrapped_context); - swap(&order, a + range.start, 4, 6, wrapped_context); - swap(&order, a + range.start, 0, 1, wrapped_context); - swap(&order, a + range.start, 4, 5, wrapped_context); - swap(&order, a + range.start, 2, 6, wrapped_context); - swap(&order, a + range.start, 0, 4, wrapped_context); - swap(&order, a + range.start, 1, 5, wrapped_context); - swap(&order, a + range.start, 0, 3, wrapped_context); - swap(&order, a + range.start, 2, 5, wrapped_context); - swap(&order, a + range.start, 1, 3, wrapped_context); - swap(&order, a + range.start, 2, 4, wrapped_context); - swap(&order, a + range.start, 2, 3, wrapped_context); + swap(&order, range.start, 1, 2, wrapped_context); + swap(&order, range.start, 3, 4, wrapped_context); + swap(&order, range.start, 5, 6, wrapped_context); + swap(&order, range.start, 0, 2, wrapped_context); + swap(&order, range.start, 3, 5, wrapped_context); + swap(&order, range.start, 4, 6, wrapped_context); + swap(&order, range.start, 0, 1, wrapped_context); + swap(&order, range.start, 4, 5, wrapped_context); + swap(&order, range.start, 2, 6, wrapped_context); + swap(&order, range.start, 0, 4, wrapped_context); + swap(&order, range.start, 1, 5, wrapped_context); + swap(&order, range.start, 0, 3, wrapped_context); + swap(&order, range.start, 2, 5, wrapped_context); + swap(&order, range.start, 1, 3, wrapped_context); + swap(&order, range.start, 2, 4, wrapped_context); + swap(&order, range.start, 2, 3, wrapped_context); }, 6 => { - swap(&order, a + range.start, 1, 2, wrapped_context); - swap(&order, a + range.start, 4, 5, wrapped_context); - swap(&order, a + range.start, 0, 2, wrapped_context); - swap(&order, a + range.start, 3, 5, wrapped_context); - swap(&order, a + range.start, 0, 1, wrapped_context); - swap(&order, a + range.start, 3, 4, wrapped_context); - swap(&order, a + range.start, 2, 5, wrapped_context); - swap(&order, a + range.start, 0, 3, wrapped_context); - swap(&order, a + range.start, 1, 4, wrapped_context); - swap(&order, a + range.start, 2, 4, wrapped_context); - swap(&order, a + range.start, 1, 3, wrapped_context); - swap(&order, a + range.start, 2, 3, wrapped_context); + swap(&order, range.start, 1, 2, wrapped_context); + swap(&order, range.start, 4, 5, wrapped_context); + swap(&order, range.start, 0, 2, wrapped_context); + swap(&order, range.start, 3, 5, wrapped_context); + swap(&order, range.start, 0, 1, wrapped_context); + swap(&order, range.start, 3, 4, wrapped_context); + swap(&order, range.start, 2, 5, wrapped_context); + swap(&order, range.start, 0, 3, wrapped_context); + swap(&order, range.start, 1, 4, wrapped_context); + swap(&order, range.start, 2, 4, wrapped_context); + swap(&order, range.start, 1, 3, wrapped_context); + swap(&order, range.start, 2, 3, wrapped_context); }, 5 => { - swap(&order, a + range.start, 0, 1, wrapped_context); - swap(&order, a + range.start, 3, 4, wrapped_context); - swap(&order, a + range.start, 2, 4, wrapped_context); - swap(&order, a + range.start, 2, 3, wrapped_context); - swap(&order, a + range.start, 1, 4, wrapped_context); - swap(&order, a + range.start, 0, 3, wrapped_context); - swap(&order, a + range.start, 0, 2, wrapped_context); - swap(&order, a + range.start, 1, 3, wrapped_context); - swap(&order, a + range.start, 1, 2, wrapped_context); + swap(&order, range.start, 0, 1, wrapped_context); + swap(&order, range.start, 3, 4, wrapped_context); + swap(&order, range.start, 2, 4, wrapped_context); + swap(&order, range.start, 2, 3, wrapped_context); + swap(&order, range.start, 1, 4, wrapped_context); + swap(&order, range.start, 0, 3, wrapped_context); + swap(&order, range.start, 0, 2, wrapped_context); + swap(&order, range.start, 1, 3, wrapped_context); + swap(&order, range.start, 1, 2, wrapped_context); }, 4 => { - swap(&order, a + range.start, 0, 1, wrapped_context); - swap(&order, a + range.start, 2, 3, wrapped_context); - swap(&order, a + range.start, 0, 2, wrapped_context); - swap(&order, a + range.start, 1, 3, wrapped_context); - swap(&order, a + range.start, 1, 2, wrapped_context); + swap(&order, range.start, 0, 1, wrapped_context); + swap(&order, range.start, 2, 3, wrapped_context); + swap(&order, range.start, 0, 2, wrapped_context); + swap(&order, range.start, 1, 3, wrapped_context); + swap(&order, range.start, 1, 2, wrapped_context); }, else => {}, } @@ -346,7 +348,7 @@ pub fn blockContext( last = index; count += 1; }) { - index = findLastForward(last, Range.init(last + 1, A.end), find - count, a, wrapped_context); + index = findLastForward(last, Range.init(last + 1, A.end), find - count, 0, wrapped_context); if (index == A.end) break; } index = last; @@ -400,7 +402,7 @@ pub fn blockContext( last = index - 1; count += 1; }) { - index = findFirstBackward(last, Range.init(B.start, last), find - count, a, wrapped_context); + index = findFirstBackward(last, Range.init(B.start, last), find - count, 0, wrapped_context); if (index == B.start) break; } index = last; @@ -461,9 +463,9 @@ pub fn blockContext( index = pull[pull_index].from; count = 1; while (count < length) : (count += 1) { - index = findFirstBackward(index - 1, Range.init(pull[pull_index].to, pull[pull_index].from - (count - 1)), length - count, a, wrapped_context); + index = findFirstBackward(index - 1, Range.init(pull[pull_index].to, pull[pull_index].from - (count - 1)), length - count, 0, wrapped_context); const range = Range.init(index + 1, pull[pull_index].from + 1); - wrapped_context.rotate(range, range.length() - count, a); + wrapped_context.rotate(range, range.length() - count, 0); pull[pull_index].from = index + count; } } else if (pull[pull_index].to > pull[pull_index].from) { @@ -471,9 +473,9 @@ pub fn blockContext( index = pull[pull_index].from + 1; count = 1; while (count < length) : (count += 1) { - index = findLastForward(index, Range.init(index, pull[pull_index].to), length - count, a, wrapped_context); + index = findLastForward(index, Range.init(index, pull[pull_index].to), length - count, 0, wrapped_context); const range = Range.init(pull[pull_index].from, index - 1); - wrapped_context.rotate(range, count, a); + wrapped_context.rotate(range, count, 0); pull[pull_index].from = index - 1 - count; } } @@ -517,10 +519,10 @@ pub fn blockContext( } } - if (wrapped_context.lessThan(a + B.end - 1, a + A.start)) { + if (wrapped_context.lessThan(0 + B.end - 1, 0 + A.start)) { // the two ranges are in reverse order, so a simple rotation should fix it - wrapped_context.rotate(Range.init(A.start, B.end), A.length(), a); - } else if (wrapped_context.lessThan(a + A.end, a + A.end - 1)) { + wrapped_context.rotate(Range.init(A.start, B.end), A.length(), 0); + } else if (wrapped_context.lessThan(0 + A.end, 0 + A.end - 1)) { // these two ranges weren't already in order, so we'll need to merge them! var findA: usize = undefined; @@ -535,7 +537,7 @@ pub fn blockContext( indexA += 1; index += block_size; }) { - context.swap(a + indexA, a + index); + context.swap(0 + indexA, 0 + index); } // start rolling the A blocks through the B blocks! @@ -555,23 +557,23 @@ pub fn blockContext( while (true) { // if there's a previous B block and the first value of the minimum A block is <= the last value of the previous B block, // then drop that minimum A block behind. or if there are no B blocks left then keep dropping the remaining A blocks. - if ((lastB.length() > 0 and !wrapped_context.lessThan(a + lastB.end - 1, a + indexA)) or blockB.length() == 0) { + if ((lastB.length() > 0 and !wrapped_context.lessThan(a + lastB.end - 1, 0 + indexA)) or blockB.length() == 0) { // figure out where to split the previous B block, and rotate it at the split - const B_split = binaryFirst(indexA, lastB, a, wrapped_context); + const B_split = binaryFirst(indexA, lastB, 0, wrapped_context); const B_remaining = lastB.end - B_split; // swap the minimum A block to the beginning of the rolling A blocks var minA = blockA.start; findA = minA + block_size; while (findA < blockA.end) : (findA += block_size) { - if (wrapped_context.lessThan(a + findA, a + minA)) { + if (wrapped_context.lessThan(0 + findA, 0 + minA)) { minA = findA; } } blockSwap(blockA.start, minA, block_size, wrapped_context); // swap the first item of the previous A block back with its original value, which is stored in buffer1 - context.swap(a + blockA.start, a + indexA); + context.swap(0 + blockA.start, 0 + indexA); indexA += 1; // locally merge the previous A block with the B values that follow it @@ -579,9 +581,9 @@ pub fn blockContext( // or failing that we'll use a strictly in-place merge algorithm (MergeInPlace) if (buffer2.length() > 0) { - mergeInternal(lastA, Range.init(lastA.end, B_split), buffer2, a, wrapped_context); + mergeInternal(lastA, Range.init(lastA.end, B_split), buffer2, 0, wrapped_context); } else { - mergeInPlace(lastA, Range.init(lastA.end, B_split), a, wrapped_context); + mergeInPlace(lastA, Range.init(lastA.end, B_split), 0, wrapped_context); } if (buffer2.length() > 0) { @@ -594,7 +596,7 @@ pub fn blockContext( blockSwap(B_split, blockA.start + block_size - B_remaining, B_remaining, wrapped_context); } else { // we are unable to use the 'buffer2' trick to speed up the rotation operation since buffer2 doesn't exist, so perform a normal rotation - wrapped_context.rotate(Range.init(B_split, blockA.start + block_size), blockA.start - B_split, a); + wrapped_context.rotate(Range.init(B_split, blockA.start + block_size), blockA.start - B_split, 0); } // update the range for the remaining A blocks, and the range remaining from the B block after it was split @@ -606,7 +608,7 @@ pub fn blockContext( if (blockA.length() == 0) break; } else if (blockB.length() < block_size) { // move the last B block, which is unevenly sized, to before the remaining A blocks, by using a rotation - wrapped_context.rotate(Range.init(blockA.start, blockB.end), blockB.start - blockA.start, a); + wrapped_context.rotate(Range.init(blockA.start, blockB.end), blockB.start - blockA.start, 0); lastB = Range.init(blockA.start, blockA.start + blockB.length()); blockA.start += blockB.length(); @@ -632,9 +634,9 @@ pub fn blockContext( // merge the last A block with the remaining B values if (buffer2.length() > 0) { - mergeInternal(lastA, Range.init(lastA.end, B.end), buffer2, a, wrapped_context); + mergeInternal(lastA, Range.init(lastA.end, B.end), buffer2, 0, wrapped_context); } else { - mergeInPlace(lastA, Range.init(lastA.end, B.end), a, wrapped_context); + mergeInPlace(lastA, Range.init(lastA.end, B.end), 0, wrapped_context); } } } @@ -648,7 +650,7 @@ pub fn blockContext( // it was consistently slightly slower than a simple insertion sort, // even for tens of millions of items. this may be because insertion // sort is quite fast when the data is already somewhat sorted, like it is here - sort.insertionContext(a + buffer2.start, a + buffer2.end, wrapped_context); + sort.insertionContext(0 + buffer2.start, 0 + buffer2.end, wrapped_context); pull_index = 0; while (pull_index < 2) : (pull_index += 1) { @@ -657,9 +659,9 @@ pub fn blockContext( // the values were pulled out to the left, so redistribute them back to the right var buffer = Range.init(pull[pull_index].range.start, pull[pull_index].range.start + pull[pull_index].count); while (buffer.length() > 0) { - index = findFirstForward(buffer.start, Range.init(buffer.end, pull[pull_index].range.end), unique, a, wrapped_context); + index = findFirstForward(buffer.start, Range.init(buffer.end, pull[pull_index].range.end), unique, 0, wrapped_context); const amount = index - buffer.end; - wrapped_context.rotate(Range.init(buffer.start, index), buffer.length(), a); + wrapped_context.rotate(Range.init(buffer.start, index), buffer.length(), 0); buffer.start += (amount + 1); buffer.end += amount; unique -= 2; @@ -668,9 +670,9 @@ pub fn blockContext( // the values were pulled out to the right, so redistribute them back to the left var buffer = Range.init(pull[pull_index].range.end - pull[pull_index].count, pull[pull_index].range.end); while (buffer.length() > 0) { - index = findLastBackward(buffer.end - 1, Range.init(pull[pull_index].range.start, buffer.start), unique, a, wrapped_context); + index = findLastBackward(buffer.end - 1, Range.init(pull[pull_index].range.start, buffer.start), unique, 0, wrapped_context); const amount = buffer.start - index; - wrapped_context.rotate(Range.init(index, buffer.end), amount, a); + wrapped_context.rotate(Range.init(index, buffer.end), amount, 0); buffer.start -= amount; buffer.end -= (amount + 1); unique -= 2; From 337e607ebe8a89d1bed0df78df4c4121c3ddb534 Mon Sep 17 00:00:00 2001 From: p-rosit Date: Wed, 10 Sep 2025 17:54:52 +0200 Subject: [PATCH 20/24] remove start index --- lib/std/sort/block.zig | 112 +++++++++++++++++++---------------------- 1 file changed, 52 insertions(+), 60 deletions(-) diff --git a/lib/std/sort/block.zig b/lib/std/sort/block.zig index 618a77781e..7db879663f 100644 --- a/lib/std/sort/block.zig +++ b/lib/std/sort/block.zig @@ -154,15 +154,15 @@ pub fn blockContext( return ctx.sub_ctx.swap(i, j); } - pub fn rotate(ctx: @This(), A: Range, amount: usize, start_index: usize) void { - ctx.naiveReverse(Range.init(A.start, A.start + amount), start_index); - ctx.naiveReverse(Range.init(A.start + amount, A.end), start_index); - ctx.naiveReverse(A, start_index); + pub fn rotate(ctx: @This(), A: Range, amount: usize) void { + ctx.naiveReverse(Range.init(A.start, A.start + amount)); + ctx.naiveReverse(Range.init(A.start + amount, A.end)); + ctx.naiveReverse(A); } - fn naiveReverse(ctx: @This(), A: Range, start_index: usize) void { - var i = start_index + A.start; - var j = start_index + A.end - 1; + fn naiveReverse(ctx: @This(), A: Range) void { + var i = A.start; + var j = A.end - 1; while (j > i) { ctx.sub_ctx.swap(i, j); i += 1; @@ -348,7 +348,7 @@ pub fn blockContext( last = index; count += 1; }) { - index = findLastForward(last, Range.init(last + 1, A.end), find - count, 0, wrapped_context); + index = findLastForward(last, Range.init(last + 1, A.end), find - count, wrapped_context); if (index == A.end) break; } index = last; @@ -402,7 +402,7 @@ pub fn blockContext( last = index - 1; count += 1; }) { - index = findFirstBackward(last, Range.init(B.start, last), find - count, 0, wrapped_context); + index = findFirstBackward(last, Range.init(B.start, last), find - count, wrapped_context); if (index == B.start) break; } index = last; @@ -463,9 +463,9 @@ pub fn blockContext( index = pull[pull_index].from; count = 1; while (count < length) : (count += 1) { - index = findFirstBackward(index - 1, Range.init(pull[pull_index].to, pull[pull_index].from - (count - 1)), length - count, 0, wrapped_context); + index = findFirstBackward(index - 1, Range.init(pull[pull_index].to, pull[pull_index].from - (count - 1)), length - count, wrapped_context); const range = Range.init(index + 1, pull[pull_index].from + 1); - wrapped_context.rotate(range, range.length() - count, 0); + wrapped_context.rotate(range, range.length() - count); pull[pull_index].from = index + count; } } else if (pull[pull_index].to > pull[pull_index].from) { @@ -473,9 +473,9 @@ pub fn blockContext( index = pull[pull_index].from + 1; count = 1; while (count < length) : (count += 1) { - index = findLastForward(index, Range.init(index, pull[pull_index].to), length - count, 0, wrapped_context); + index = findLastForward(index, Range.init(index, pull[pull_index].to), length - count, wrapped_context); const range = Range.init(pull[pull_index].from, index - 1); - wrapped_context.rotate(range, count, 0); + wrapped_context.rotate(range, count); pull[pull_index].from = index - 1 - count; } } @@ -519,10 +519,10 @@ pub fn blockContext( } } - if (wrapped_context.lessThan(0 + B.end - 1, 0 + A.start)) { + if (wrapped_context.lessThan(B.end - 1, A.start)) { // the two ranges are in reverse order, so a simple rotation should fix it - wrapped_context.rotate(Range.init(A.start, B.end), A.length(), 0); - } else if (wrapped_context.lessThan(0 + A.end, 0 + A.end - 1)) { + wrapped_context.rotate(Range.init(A.start, B.end), A.length()); + } else if (wrapped_context.lessThan(A.end, A.end - 1)) { // these two ranges weren't already in order, so we'll need to merge them! var findA: usize = undefined; @@ -537,7 +537,7 @@ pub fn blockContext( indexA += 1; index += block_size; }) { - context.swap(0 + indexA, 0 + index); + context.swap(indexA, index); } // start rolling the A blocks through the B blocks! @@ -557,23 +557,23 @@ pub fn blockContext( while (true) { // if there's a previous B block and the first value of the minimum A block is <= the last value of the previous B block, // then drop that minimum A block behind. or if there are no B blocks left then keep dropping the remaining A blocks. - if ((lastB.length() > 0 and !wrapped_context.lessThan(a + lastB.end - 1, 0 + indexA)) or blockB.length() == 0) { + if ((lastB.length() > 0 and !wrapped_context.lessThan(a + lastB.end - 1, indexA)) or blockB.length() == 0) { // figure out where to split the previous B block, and rotate it at the split - const B_split = binaryFirst(indexA, lastB, 0, wrapped_context); + const B_split = binaryFirst(indexA, lastB, wrapped_context); const B_remaining = lastB.end - B_split; // swap the minimum A block to the beginning of the rolling A blocks var minA = blockA.start; findA = minA + block_size; while (findA < blockA.end) : (findA += block_size) { - if (wrapped_context.lessThan(0 + findA, 0 + minA)) { + if (wrapped_context.lessThan(findA, minA)) { minA = findA; } } blockSwap(blockA.start, minA, block_size, wrapped_context); // swap the first item of the previous A block back with its original value, which is stored in buffer1 - context.swap(0 + blockA.start, 0 + indexA); + context.swap(blockA.start, indexA); indexA += 1; // locally merge the previous A block with the B values that follow it @@ -581,9 +581,9 @@ pub fn blockContext( // or failing that we'll use a strictly in-place merge algorithm (MergeInPlace) if (buffer2.length() > 0) { - mergeInternal(lastA, Range.init(lastA.end, B_split), buffer2, 0, wrapped_context); + mergeInternal(lastA, Range.init(lastA.end, B_split), buffer2, wrapped_context); } else { - mergeInPlace(lastA, Range.init(lastA.end, B_split), 0, wrapped_context); + mergeInPlace(lastA, Range.init(lastA.end, B_split), wrapped_context); } if (buffer2.length() > 0) { @@ -596,7 +596,7 @@ pub fn blockContext( blockSwap(B_split, blockA.start + block_size - B_remaining, B_remaining, wrapped_context); } else { // we are unable to use the 'buffer2' trick to speed up the rotation operation since buffer2 doesn't exist, so perform a normal rotation - wrapped_context.rotate(Range.init(B_split, blockA.start + block_size), blockA.start - B_split, 0); + wrapped_context.rotate(Range.init(B_split, blockA.start + block_size), blockA.start - B_split); } // update the range for the remaining A blocks, and the range remaining from the B block after it was split @@ -608,7 +608,7 @@ pub fn blockContext( if (blockA.length() == 0) break; } else if (blockB.length() < block_size) { // move the last B block, which is unevenly sized, to before the remaining A blocks, by using a rotation - wrapped_context.rotate(Range.init(blockA.start, blockB.end), blockB.start - blockA.start, 0); + wrapped_context.rotate(Range.init(blockA.start, blockB.end), blockB.start - blockA.start); lastB = Range.init(blockA.start, blockA.start + blockB.length()); blockA.start += blockB.length(); @@ -634,9 +634,9 @@ pub fn blockContext( // merge the last A block with the remaining B values if (buffer2.length() > 0) { - mergeInternal(lastA, Range.init(lastA.end, B.end), buffer2, 0, wrapped_context); + mergeInternal(lastA, Range.init(lastA.end, B.end), buffer2, wrapped_context); } else { - mergeInPlace(lastA, Range.init(lastA.end, B.end), 0, wrapped_context); + mergeInPlace(lastA, Range.init(lastA.end, B.end), wrapped_context); } } } @@ -650,7 +650,7 @@ pub fn blockContext( // it was consistently slightly slower than a simple insertion sort, // even for tens of millions of items. this may be because insertion // sort is quite fast when the data is already somewhat sorted, like it is here - sort.insertionContext(0 + buffer2.start, 0 + buffer2.end, wrapped_context); + sort.insertionContext(buffer2.start, buffer2.end, wrapped_context); pull_index = 0; while (pull_index < 2) : (pull_index += 1) { @@ -659,9 +659,9 @@ pub fn blockContext( // the values were pulled out to the left, so redistribute them back to the right var buffer = Range.init(pull[pull_index].range.start, pull[pull_index].range.start + pull[pull_index].count); while (buffer.length() > 0) { - index = findFirstForward(buffer.start, Range.init(buffer.end, pull[pull_index].range.end), unique, 0, wrapped_context); + index = findFirstForward(buffer.start, Range.init(buffer.end, pull[pull_index].range.end), unique, wrapped_context); const amount = index - buffer.end; - wrapped_context.rotate(Range.init(buffer.start, index), buffer.length(), 0); + wrapped_context.rotate(Range.init(buffer.start, index), buffer.length()); buffer.start += (amount + 1); buffer.end += amount; unique -= 2; @@ -670,9 +670,9 @@ pub fn blockContext( // the values were pulled out to the right, so redistribute them back to the left var buffer = Range.init(pull[pull_index].range.end - pull[pull_index].count, pull[pull_index].range.end); while (buffer.length() > 0) { - index = findLastBackward(buffer.end - 1, Range.init(pull[pull_index].range.start, buffer.start), unique, 0, wrapped_context); + index = findLastBackward(buffer.end - 1, Range.init(pull[pull_index].range.start, buffer.start), unique, wrapped_context); const amount = buffer.start - index; - wrapped_context.rotate(Range.init(index, buffer.end), amount, 0); + wrapped_context.rotate(Range.init(index, buffer.end), amount); buffer.start -= amount; buffer.end -= (amount + 1); unique -= 2; @@ -688,7 +688,6 @@ pub fn blockContext( fn mergeInPlace( A_arg: Range, B_arg: Range, - start_index: usize, context: anytype, ) void { if (A_arg.length() == 0 or B_arg.length() == 0) return; @@ -716,17 +715,17 @@ fn mergeInPlace( while (true) { // find the first place in B where the first item in A needs to be inserted - const mid = binaryFirst(A.start, B, start_index, context); + const mid = binaryFirst(A.start, B, context); // rotate A into place const amount = mid - A.end; - context.rotate(Range.init(A.start, mid), A.length(), start_index); + context.rotate(Range.init(A.start, mid), A.length()); if (B.end == mid) break; // calculate the new A and B ranges B.start = mid; A = Range.init(A.start + amount, B.start); - A.start = binaryLast(A.start, A, start_index, context); + A.start = binaryLast(A.start, A, context); if (A.length() == 0) break; } } @@ -736,7 +735,6 @@ fn mergeInternal( A: Range, B: Range, buffer: Range, - start_index: usize, context: anytype, ) void { // whenever we find a value to add to the final array, swap it with the value that's already in that spot @@ -747,13 +745,13 @@ fn mergeInternal( if (B.length() > 0 and A.length() > 0) { while (true) { - if (!context.lessThan(start_index + B.start + B_count, start_index + buffer.start + A_count)) { - context.swap(start_index + A.start + insert, start_index + buffer.start + A_count); + if (!context.lessThan(B.start + B_count, buffer.start + A_count)) { + context.swap(A.start + insert, buffer.start + A_count); A_count += 1; insert += 1; if (A_count >= A.length()) break; } else { - context.swap(start_index + A.start + insert, start_index + B.start + B_count); + context.swap(A.start + insert, B.start + B_count); B_count += 1; insert += 1; if (B_count >= B.length()) break; @@ -778,85 +776,80 @@ fn findFirstForward( value_index: usize, range: Range, unique: usize, - start_index: usize, context: anytype, ) usize { const skip = @max(range.length() / unique, @as(usize, 1)); var index = range.start + skip; - while (context.lessThan(start_index + index - 1, start_index + value_index)) : (index += skip) { + while (context.lessThan(index - 1, value_index)) : (index += skip) { if (index >= range.end - skip) { - return binaryFirst(value_index, Range.init(index, range.end), start_index, context); + return binaryFirst(value_index, Range.init(index, range.end), context); } } - return binaryFirst(value_index, Range.init(index - skip, index), start_index, context); + return binaryFirst(value_index, Range.init(index - skip, index), context); } fn findFirstBackward( value_index: usize, range: Range, unique: usize, - start_index: usize, context: anytype, ) usize { if (range.length() == 0) return range.start; const skip = @max(range.length() / unique, @as(usize, 1)); var index = range.end - skip; - while (index > range.start and !context.lessThan(start_index + index - 1, start_index + value_index)) : (index -= skip) { + while (index > range.start and !context.lessThan(index - 1, value_index)) : (index -= skip) { if (index < range.start + skip) { - return binaryFirst(value_index, Range.init(range.start, index), start_index, context); + return binaryFirst(value_index, Range.init(range.start, index), context); } } - return binaryFirst(value_index, Range.init(index, index + skip), start_index, context); + return binaryFirst(value_index, Range.init(index, index + skip), context); } fn findLastForward( value_index: usize, range: Range, unique: usize, - start_index: usize, context: anytype, ) usize { if (range.length() == 0) return range.start; const skip = @max(range.length() / unique, @as(usize, 1)); var index = range.start + skip; - while (!context.lessThan(start_index + value_index, start_index + index - 1)) : (index += skip) { + while (!context.lessThan(value_index, index - 1)) : (index += skip) { if (index >= range.end - skip) { - return binaryLast(value_index, Range.init(index, range.end), start_index, context); + return binaryLast(value_index, Range.init(index, range.end), context); } } - return binaryLast(value_index, Range.init(index - skip, index), start_index, context); + return binaryLast(value_index, Range.init(index - skip, index), context); } fn findLastBackward( value_index: usize, range: Range, unique: usize, - start_index: usize, context: anytype, ) usize { if (range.length() == 0) return range.start; const skip = @max(range.length() / unique, @as(usize, 1)); var index = range.end - skip; - while (index > range.start and context.lessThan(start_index + value_index, start_index + index - 1)) : (index -= skip) { + while (index > range.start and context.lessThan(value_index, index - 1)) : (index -= skip) { if (index < range.start + skip) { - return binaryLast(value_index, Range.init(range.start, index), start_index, context); + return binaryLast(value_index, Range.init(range.start, index), context); } } - return binaryLast(value_index, Range.init(index, index + skip), start_index, context); + return binaryLast(value_index, Range.init(index, index + skip), context); } fn binaryFirst( value_index: usize, range: Range, - start_index: usize, context: anytype, ) usize { var curr = range.start; @@ -867,7 +860,7 @@ fn binaryFirst( size /= 2; const mid_index = curr + size; - if (context.lessThan(start_index + mid_index, start_index + value_index)) { + if (context.lessThan(mid_index, value_index)) { curr += size + offset; } } @@ -877,7 +870,6 @@ fn binaryFirst( fn binaryLast( value_index: usize, range: Range, - start_index: usize, context: anytype, ) usize { var curr = range.start; @@ -888,7 +880,7 @@ fn binaryLast( size /= 2; const mid_index = curr + size; - if (!context.lessThan(start_index + value_index, start_index + mid_index)) { + if (!context.lessThan(value_index, mid_index)) { curr += size + offset; } } From f856bfc492c5790c03c74a011863f0de55f0bf94 Mon Sep 17 00:00:00 2001 From: p-rosit Date: Sat, 13 Sep 2025 13:32:19 +0200 Subject: [PATCH 21/24] use `blockContext` --- lib/std/mem.zig | 8 +++++--- lib/std/sort.zig | 3 ++- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/lib/std/mem.zig b/lib/std/mem.zig index 3763de5180..e502fbb837 100644 --- a/lib/std/mem.zig +++ b/lib/std/mem.zig @@ -629,10 +629,12 @@ pub fn sortUnstable( std.sort.pdq(T, items, context, lessThanFn); } -/// TODO: currently this just calls `insertionSortContext`. The block sort implementation -/// in this file needs to be adapted to use the sort context. +/// Sorts a range [a, b) using a stable algorithm (maintains relative order of equal elements) with custom context. +/// This is a lower-level interface for sorting that works with indices instead of slices. +/// +/// The context must provide lessThan(a_idx, b_idx) and swap(a_idx, b_idx) methods. pub fn sortContext(a: usize, b: usize, context: anytype) void { - std.sort.insertionContext(a, b, context); + std.sort.blockContext(a, b, context); } /// Sorts a range [a, b) using an unstable algorithm with custom context. diff --git a/lib/std/sort.zig b/lib/std/sort.zig index 8705d24017..ba1cc86714 100644 --- a/lib/std/sort.zig +++ b/lib/std/sort.zig @@ -7,6 +7,7 @@ const math = std.math; pub const Mode = enum { stable, unstable }; pub const block = @import("sort/block.zig").block; +pub const blockContext = @import("sort/block.zig").blockContext; pub const pdq = @import("sort/pdq.zig").pdq; pub const pdqContext = @import("sort/pdq.zig").pdqContext; @@ -159,7 +160,7 @@ const sort_funcs = &[_]fn (comptime type, anytype, anytype, comptime anytype) vo }; const context_sort_funcs = &[_]fn (usize, usize, anytype) void{ - // blockContext, + blockContext, pdqContext, insertionContext, heapContext, From bc5b99d182be22edef544843bda403da185d8411 Mon Sep 17 00:00:00 2001 From: p-rosit Date: Fri, 12 Sep 2025 07:57:27 +0200 Subject: [PATCH 22/24] optionally implement rotate --- lib/std/mem.zig | 3 ++- lib/std/sort/block.zig | 17 +++++++++++++++-- 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/lib/std/mem.zig b/lib/std/mem.zig index e502fbb837..066991938a 100644 --- a/lib/std/mem.zig +++ b/lib/std/mem.zig @@ -632,7 +632,8 @@ pub fn sortUnstable( /// Sorts a range [a, b) using a stable algorithm (maintains relative order of equal elements) with custom context. /// This is a lower-level interface for sorting that works with indices instead of slices. /// -/// The context must provide lessThan(a_idx, b_idx) and swap(a_idx, b_idx) methods. +/// The context must provide lessThan(a_idx, b_idx) and swap(a_idx, b_idx) methods and optionally +/// a rotate(start_idx, end_idx, amount) method (see `mem.rotate`). pub fn sortContext(a: usize, b: usize, context: anytype) void { std.sort.blockContext(a, b, context); } diff --git a/lib/std/sort/block.zig b/lib/std/sort/block.zig index 7db879663f..6d22ac0d43 100644 --- a/lib/std/sort/block.zig +++ b/lib/std/sort/block.zig @@ -116,6 +116,10 @@ pub fn block( pub fn swap(ctx: @This(), a: usize, b: usize) void { return mem.swap(T, &ctx.items[a], &ctx.items[b]); } + + pub fn rotate(ctx: @This(), a: usize, b: usize, amount: usize) void { + return mem.rotate(T, ctx.items[a..b], amount); + } }; return blockContext(0, items.len, Context{ .items = items, .sub_ctx = context }); } @@ -124,7 +128,10 @@ pub fn block( /// O(1) memory (no allocator required). /// Sorts in ascending order with respect to the given `lessThan` function. /// `context` must have methods `swap` and `lessThan`, -/// which each take 2 `usize` parameters indicating the index of an item. +/// which each take 2 `usize` parameters indicating the index of an item. Optionally +/// the `context` can define a `rotate` method which takes 2 `usize` parameters +/// indicating the start and end index and another `usize` indicating how many +/// steps to rotate. /// /// NOTE: The algorithm only works when the comparison is less-than or greater-than. /// (See https://github.com/ziglang/zig/issues/8289) @@ -154,7 +161,13 @@ pub fn blockContext( return ctx.sub_ctx.swap(i, j); } - pub fn rotate(ctx: @This(), A: Range, amount: usize) void { + pub const rotate = if (std.meta.hasFn(@TypeOf(context), "rotate")) innerRotate else naiveRotate; + + fn innerRotate(ctx: @This(), A: Range, amount: usize) void { + ctx.sub_ctx.rotate(A.start, A.end, amount); + } + + fn naiveRotate(ctx: @This(), A: Range, amount: usize) void { ctx.naiveReverse(Range.init(A.start, A.start + amount)); ctx.naiveReverse(Range.init(A.start + amount, A.end)); ctx.naiveReverse(A); From c1bccb851f993862ba979766a6c7215131dd8322 Mon Sep 17 00:00:00 2001 From: p-rosit Date: Sun, 21 Sep 2025 22:06:34 +0200 Subject: [PATCH 23/24] don't close over comptime var --- lib/std/sort/block.zig | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/lib/std/sort/block.zig b/lib/std/sort/block.zig index 6d22ac0d43..8ba65a4332 100644 --- a/lib/std/sort/block.zig +++ b/lib/std/sort/block.zig @@ -141,8 +141,9 @@ pub fn blockContext( context: anytype, ) void { // Implementation ported from https://github.com/BonzaiThePenguin/WikiSort/blob/master/WikiSort.c + const ContextType = @TypeOf(context); const Context = struct { - sub_ctx: @TypeOf(context), + sub_ctx: ContextType, pub const lessThan = if (builtin.mode == .Debug) lessThanChecked else lessThanUnchecked; @@ -161,7 +162,7 @@ pub fn blockContext( return ctx.sub_ctx.swap(i, j); } - pub const rotate = if (std.meta.hasFn(@TypeOf(context), "rotate")) innerRotate else naiveRotate; + pub const rotate = if (std.meta.hasFn(ContextType, "rotate")) innerRotate else naiveRotate; fn innerRotate(ctx: @This(), A: Range, amount: usize) void { ctx.sub_ctx.rotate(A.start, A.end, amount); From c80bb983ccc7c50737a8068b980ac545bbab4595 Mon Sep 17 00:00:00 2001 From: p-rosit Date: Sun, 21 Sep 2025 22:22:28 +0200 Subject: [PATCH 24/24] block sort without cache hits more branches --- lib/std/multi_array_list.zig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/std/multi_array_list.zig b/lib/std/multi_array_list.zig index ec4b0b72e1..b827d5c4a1 100644 --- a/lib/std/multi_array_list.zig +++ b/lib/std/multi_array_list.zig @@ -186,7 +186,7 @@ pub fn MultiArrayList(comptime T: type) type { return lhs.alignment > rhs.alignment; } }; - @setEvalBranchQuota(3 * fields.len * std.math.log2(fields.len)); + @setEvalBranchQuota(10 * fields.len * std.math.log2(fields.len)); mem.sort(Data, &data, {}, Sort.lessThan); var sizes_bytes: [fields.len]usize = undefined; var field_indexes: [fields.len]usize = undefined;