Merge pull request #9458 from SuperAuguste/popcount-ctz-clz

Vector support for `@popCount`, `@ctz`, and `@clz`
This commit is contained in:
Andrew Kelley 2021-07-26 19:15:27 -04:00 committed by GitHub
commit fc105f2681
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 258 additions and 49 deletions

View file

@ -7366,18 +7366,20 @@ pub const CallOptions = struct {
{#header_close#}
{#header_open|@clz#}
<pre>{#syntax#}@clz(comptime T: type, integer: T){#endsyntax#}</pre>
<pre>{#syntax#}@clz(comptime T: type, operand: T){#endsyntax#}</pre>
<p>{#syntax#}T{#endsyntax#} must be an integer type.</p>
<p>{#syntax#}operand{#endsyntax#} may be an {#link|integer|Integers#} or {#link|vector|Vectors#}.</p>
<p>
This function counts the number of most-significant (leading in a big-Endian sense) zeroes in {#syntax#}integer{#endsyntax#}.
This function counts the number of most-significant (leading in a big-Endian sense) zeroes in an integer.
</p>
<p>
If {#syntax#}integer{#endsyntax#} is known at {#link|comptime#},
If {#syntax#}operand{#endsyntax#} is a {#link|comptime#}-known integer,
the return type is {#syntax#}comptime_int{#endsyntax#}.
Otherwise, the return type is an unsigned integer with the minimum number
Otherwise, the return type is an unsigned integer or vector of unsigned integers with the minimum number
of bits that can represent the bit count of the integer type.
</p>
<p>
If {#syntax#}integer{#endsyntax#} is zero, {#syntax#}@clz{#endsyntax#} returns the bit width
If {#syntax#}operand{#endsyntax#} is zero, {#syntax#}@clz{#endsyntax#} returns the bit width
of integer type {#syntax#}T{#endsyntax#}.
</p>
{#see_also|@ctz|@popCount#}
@ -7509,18 +7511,20 @@ test "main" {
{#header_close#}
{#header_open|@ctz#}
<pre>{#syntax#}@ctz(comptime T: type, integer: T){#endsyntax#}</pre>
<pre>{#syntax#}@ctz(comptime T: type, operand: T){#endsyntax#}</pre>
<p>{#syntax#}T{#endsyntax#} must be an integer type.</p>
<p>{#syntax#}operand{#endsyntax#} may be an {#link|integer|Integers#} or {#link|vector|Vectors#}.</p>
<p>
This function counts the number of least-significant (trailing in a big-Endian sense) zeroes in {#syntax#}integer{#endsyntax#}.
This function counts the number of least-significant (trailing in a big-Endian sense) zeroes in an integer.
</p>
<p>
If {#syntax#}integer{#endsyntax#} is known at {#link|comptime#},
If {#syntax#}operand{#endsyntax#} is a {#link|comptime#}-known integer,
the return type is {#syntax#}comptime_int{#endsyntax#}.
Otherwise, the return type is an unsigned integer with the minimum number
Otherwise, the return type is an unsigned integer or vector of unsigned integers with the minimum number
of bits that can represent the bit count of the integer type.
</p>
<p>
If {#syntax#}integer{#endsyntax#} is zero, {#syntax#}@ctz{#endsyntax#} returns
If {#syntax#}operand{#endsyntax#} is zero, {#syntax#}@ctz{#endsyntax#} returns
the bit width of integer type {#syntax#}T{#endsyntax#}.
</p>
{#see_also|@clz|@popCount#}
@ -8105,12 +8109,14 @@ test "@wasmMemoryGrow" {
{#header_close#}
{#header_open|@popCount#}
<pre>{#syntax#}@popCount(comptime T: type, integer: T){#endsyntax#}</pre>
<pre>{#syntax#}@popCount(comptime T: type, operand: T){#endsyntax#}</pre>
<p>{#syntax#}T{#endsyntax#} must be an integer type.</p>
<p>{#syntax#}operand{#endsyntax#} may be an {#link|integer|Integers#} or {#link|vector|Vectors#}.</p>
<p>Counts the number of bits set in an integer.</p>
<p>
If {#syntax#}integer{#endsyntax#} is known at {#link|comptime#},
If {#syntax#}operand{#endsyntax#} is a {#link|comptime#}-known integer,
the return type is {#syntax#}comptime_int{#endsyntax#}.
Otherwise, the return type is an unsigned integer with the minimum number
Otherwise, the return type is an unsigned integer or vector of unsigned integers with the minimum number
of bits that can represent the bit count of the integer type.
</p>
{#see_also|@ctz|@clz#}

View file

@ -1907,12 +1907,15 @@ struct ZigLLVMFnKey {
union {
struct {
uint32_t bit_count;
uint32_t vector_len; // 0 means not a vector
} ctz;
struct {
uint32_t bit_count;
uint32_t vector_len; // 0 means not a vector
} clz;
struct {
uint32_t bit_count;
uint32_t vector_len; // 0 means not a vector
} pop_count;
struct {
BuiltinFnId op;

View file

@ -7883,11 +7883,14 @@ bool type_id_eql(TypeId const *a, TypeId const *b) {
uint32_t zig_llvm_fn_key_hash(ZigLLVMFnKey const *x) {
switch (x->id) {
case ZigLLVMFnIdCtz:
return (uint32_t)(x->data.ctz.bit_count) * (uint32_t)810453934;
return (uint32_t)(x->data.ctz.bit_count) * (uint32_t)810453934 +
(uint32_t)(x->data.ctz.vector_len) * (((uint32_t)x->id << 5) + 1025);
case ZigLLVMFnIdClz:
return (uint32_t)(x->data.clz.bit_count) * (uint32_t)2428952817;
return (uint32_t)(x->data.clz.bit_count) * (uint32_t)2428952817 +
(uint32_t)(x->data.clz.vector_len) * (((uint32_t)x->id << 5) + 1025);
case ZigLLVMFnIdPopCount:
return (uint32_t)(x->data.clz.bit_count) * (uint32_t)101195049;
return (uint32_t)(x->data.pop_count.bit_count) * (uint32_t)101195049 +
(uint32_t)(x->data.pop_count.vector_len) * (((uint32_t)x->id << 5) + 1025);
case ZigLLVMFnIdFloatOp:
return (uint32_t)(x->data.floating.bit_count) * ((uint32_t)x->id + 1025) +
(uint32_t)(x->data.floating.vector_len) * (((uint32_t)x->id << 5) + 1025) +

View file

@ -5070,6 +5070,7 @@ static LLVMValueRef get_int_builtin_fn(CodeGen *g, ZigType *expr_type, BuiltinFn
n_args = 1;
key.id = ZigLLVMFnIdPopCount;
key.data.pop_count.bit_count = (uint32_t)int_type->data.integral.bit_count;
key.data.pop_count.vector_len = vector_len;
} else if (fn_id == BuiltinFnIdBswap) {
fn_name = "bswap";
n_args = 1;

View file

@ -15945,69 +15945,195 @@ static Stage1AirInst *ir_analyze_instruction_optional_unwrap_ptr(IrAnalyze *ira,
}
static Stage1AirInst *ir_analyze_instruction_ctz(IrAnalyze *ira, Stage1ZirInstCtz *instruction) {
Error err;
ZigType *int_type = ir_resolve_int_type(ira, instruction->type->child);
if (type_is_invalid(int_type))
return ira->codegen->invalid_inst_gen;
Stage1AirInst *op = ir_implicit_cast(ira, instruction->op->child, int_type);
Stage1AirInst *uncasted_op = instruction->op->child;
if (type_is_invalid(uncasted_op->value->type))
return ira->codegen->invalid_inst_gen;
uint32_t vector_len = UINT32_MAX; // means not a vector
if (uncasted_op->value->type->id == ZigTypeIdArray) {
bool can_be_vec_elem;
if ((err = is_valid_vector_elem_type(ira->codegen, uncasted_op->value->type->data.array.child_type,
&can_be_vec_elem)))
{
return ira->codegen->invalid_inst_gen;
}
if (can_be_vec_elem) {
vector_len = uncasted_op->value->type->data.array.len;
}
} else if (uncasted_op->value->type->id == ZigTypeIdVector) {
vector_len = uncasted_op->value->type->data.vector.len;
}
bool is_vector = (vector_len != UINT32_MAX);
ZigType *op_type = is_vector ? get_vector_type(ira->codegen, vector_len, int_type) : int_type;
Stage1AirInst *op = ir_implicit_cast(ira, uncasted_op, op_type);
if (type_is_invalid(op->value->type))
return ira->codegen->invalid_inst_gen;
if (int_type->data.integral.bit_count == 0)
return ir_const_unsigned(ira, instruction->base.scope, instruction->base.source_node, 0);
ZigType *smallest_type = get_smallest_unsigned_int_type(ira->codegen, int_type->data.integral.bit_count);
if (instr_is_comptime(op)) {
ZigValue *val = ir_resolve_const(ira, op, UndefOk);
if (val == nullptr)
return ira->codegen->invalid_inst_gen;
if (val->special == ConstValSpecialUndef)
return ir_const_undef(ira, instruction->base.scope, instruction->base.source_node, ira->codegen->builtin_types.entry_num_lit_int);
if (is_vector) {
ZigType *smallest_vec_type = get_vector_type(ira->codegen, vector_len, smallest_type);
Stage1AirInst *result = ir_const(ira, instruction->base.scope, instruction->base.source_node, smallest_vec_type);
expand_undef_array(ira->codegen, val);
result->value->data.x_array.data.s_none.elements = ira->codegen->pass1_arena->allocate<ZigValue>(smallest_vec_type->data.vector.len);
for (unsigned i = 0; i < smallest_vec_type->data.vector.len; i += 1) {
ZigValue *op_elem_val = &val->data.x_array.data.s_none.elements[i];
if ((err = ir_resolve_const_val(ira->codegen, ira->new_irb.exec, instruction->base.source_node,
op_elem_val, UndefOk)))
{
return ira->codegen->invalid_inst_gen;
}
ZigValue *result_elem_val = &result->value->data.x_array.data.s_none.elements[i];
result_elem_val->type = smallest_type;
result_elem_val->special = op_elem_val->special;
if (op_elem_val->special == ConstValSpecialUndef)
continue;
size_t value = bigint_ctz(&op_elem_val->data.x_bigint, int_type->data.integral.bit_count);
bigint_init_unsigned(&result->value->data.x_array.data.s_none.elements[i].data.x_bigint, value);
}
return result;
} else {
size_t result_usize = bigint_ctz(&op->value->data.x_bigint, int_type->data.integral.bit_count);
return ir_const_unsigned(ira, instruction->base.scope, instruction->base.source_node, result_usize);
}
}
ZigType *return_type = get_smallest_unsigned_int_type(ira->codegen, int_type->data.integral.bit_count);
ZigType *return_type = is_vector ? get_vector_type(ira->codegen, vector_len, smallest_type) : smallest_type;
return ir_build_ctz_gen(ira, instruction->base.scope, instruction->base.source_node, return_type, op);
}
static Stage1AirInst *ir_analyze_instruction_clz(IrAnalyze *ira, Stage1ZirInstClz *instruction) {
Error err;
ZigType *int_type = ir_resolve_int_type(ira, instruction->type->child);
if (type_is_invalid(int_type))
return ira->codegen->invalid_inst_gen;
Stage1AirInst *op = ir_implicit_cast(ira, instruction->op->child, int_type);
Stage1AirInst *uncasted_op = instruction->op->child;
if (type_is_invalid(uncasted_op->value->type))
return ira->codegen->invalid_inst_gen;
uint32_t vector_len = UINT32_MAX; // means not a vector
if (uncasted_op->value->type->id == ZigTypeIdArray) {
bool can_be_vec_elem;
if ((err = is_valid_vector_elem_type(ira->codegen, uncasted_op->value->type->data.array.child_type,
&can_be_vec_elem)))
{
return ira->codegen->invalid_inst_gen;
}
if (can_be_vec_elem) {
vector_len = uncasted_op->value->type->data.array.len;
}
} else if (uncasted_op->value->type->id == ZigTypeIdVector) {
vector_len = uncasted_op->value->type->data.vector.len;
}
bool is_vector = (vector_len != UINT32_MAX);
ZigType *op_type = is_vector ? get_vector_type(ira->codegen, vector_len, int_type) : int_type;
Stage1AirInst *op = ir_implicit_cast(ira, uncasted_op, op_type);
if (type_is_invalid(op->value->type))
return ira->codegen->invalid_inst_gen;
if (int_type->data.integral.bit_count == 0)
return ir_const_unsigned(ira, instruction->base.scope, instruction->base.source_node, 0);
ZigType *smallest_type = get_smallest_unsigned_int_type(ira->codegen, int_type->data.integral.bit_count);
if (instr_is_comptime(op)) {
ZigValue *val = ir_resolve_const(ira, op, UndefOk);
if (val == nullptr)
return ira->codegen->invalid_inst_gen;
if (val->special == ConstValSpecialUndef)
return ir_const_undef(ira, instruction->base.scope, instruction->base.source_node, ira->codegen->builtin_types.entry_num_lit_int);
if (is_vector) {
ZigType *smallest_vec_type = get_vector_type(ira->codegen, vector_len, smallest_type);
Stage1AirInst *result = ir_const(ira, instruction->base.scope, instruction->base.source_node, smallest_vec_type);
expand_undef_array(ira->codegen, val);
result->value->data.x_array.data.s_none.elements = ira->codegen->pass1_arena->allocate<ZigValue>(smallest_vec_type->data.vector.len);
for (unsigned i = 0; i < smallest_vec_type->data.vector.len; i += 1) {
ZigValue *op_elem_val = &val->data.x_array.data.s_none.elements[i];
if ((err = ir_resolve_const_val(ira->codegen, ira->new_irb.exec, instruction->base.source_node,
op_elem_val, UndefOk)))
{
return ira->codegen->invalid_inst_gen;
}
ZigValue *result_elem_val = &result->value->data.x_array.data.s_none.elements[i];
result_elem_val->type = smallest_type;
result_elem_val->special = op_elem_val->special;
if (op_elem_val->special == ConstValSpecialUndef)
continue;
size_t value = bigint_clz(&op_elem_val->data.x_bigint, int_type->data.integral.bit_count);
bigint_init_unsigned(&result->value->data.x_array.data.s_none.elements[i].data.x_bigint, value);
}
return result;
} else {
size_t result_usize = bigint_clz(&op->value->data.x_bigint, int_type->data.integral.bit_count);
return ir_const_unsigned(ira, instruction->base.scope, instruction->base.source_node, result_usize);
}
}
ZigType *return_type = get_smallest_unsigned_int_type(ira->codegen, int_type->data.integral.bit_count);
ZigType *return_type = is_vector ? get_vector_type(ira->codegen, vector_len, smallest_type) : smallest_type;
return ir_build_clz_gen(ira, instruction->base.scope, instruction->base.source_node, return_type, op);
}
static Stage1AirInst *ir_analyze_instruction_pop_count(IrAnalyze *ira, Stage1ZirInstPopCount *instruction) {
Error err;
ZigType *int_type = ir_resolve_int_type(ira, instruction->type->child);
if (type_is_invalid(int_type))
return ira->codegen->invalid_inst_gen;
Stage1AirInst *op = ir_implicit_cast(ira, instruction->op->child, int_type);
Stage1AirInst *uncasted_op = instruction->op->child;
if (type_is_invalid(uncasted_op->value->type))
return ira->codegen->invalid_inst_gen;
uint32_t vector_len = UINT32_MAX; // means not a vector
if (uncasted_op->value->type->id == ZigTypeIdArray) {
bool can_be_vec_elem;
if ((err = is_valid_vector_elem_type(ira->codegen, uncasted_op->value->type->data.array.child_type,
&can_be_vec_elem)))
{
return ira->codegen->invalid_inst_gen;
}
if (can_be_vec_elem) {
vector_len = uncasted_op->value->type->data.array.len;
}
} else if (uncasted_op->value->type->id == ZigTypeIdVector) {
vector_len = uncasted_op->value->type->data.vector.len;
}
bool is_vector = (vector_len != UINT32_MAX);
ZigType *op_type = is_vector ? get_vector_type(ira->codegen, vector_len, int_type) : int_type;
Stage1AirInst *op = ir_implicit_cast(ira, uncasted_op, op_type);
if (type_is_invalid(op->value->type))
return ira->codegen->invalid_inst_gen;
if (int_type->data.integral.bit_count == 0)
return ir_const_unsigned(ira, instruction->base.scope, instruction->base.source_node, 0);
ZigType *smallest_type = get_smallest_unsigned_int_type(ira->codegen, int_type->data.integral.bit_count);
if (instr_is_comptime(op)) {
ZigValue *val = ir_resolve_const(ira, op, UndefOk);
if (val == nullptr)
@ -16015,6 +16141,33 @@ static Stage1AirInst *ir_analyze_instruction_pop_count(IrAnalyze *ira, Stage1Zir
if (val->special == ConstValSpecialUndef)
return ir_const_undef(ira, instruction->base.scope, instruction->base.source_node, ira->codegen->builtin_types.entry_num_lit_int);
if (is_vector) {
ZigType *smallest_vec_type = get_vector_type(ira->codegen, vector_len, smallest_type);
Stage1AirInst *result = ir_const(ira, instruction->base.scope, instruction->base.source_node, smallest_vec_type);
expand_undef_array(ira->codegen, val);
result->value->data.x_array.data.s_none.elements = ira->codegen->pass1_arena->allocate<ZigValue>(smallest_vec_type->data.vector.len);
for (unsigned i = 0; i < smallest_vec_type->data.vector.len; i += 1) {
ZigValue *op_elem_val = &val->data.x_array.data.s_none.elements[i];
if ((err = ir_resolve_const_val(ira->codegen, ira->new_irb.exec, instruction->base.source_node,
op_elem_val, UndefOk)))
{
return ira->codegen->invalid_inst_gen;
}
ZigValue *result_elem_val = &result->value->data.x_array.data.s_none.elements[i];
result_elem_val->type = smallest_type;
result_elem_val->special = op_elem_val->special;
if (op_elem_val->special == ConstValSpecialUndef)
continue;
if (bigint_cmp_zero(&op_elem_val->data.x_bigint) != CmpLT) {
size_t value = bigint_popcount_unsigned(&op_elem_val->data.x_bigint);
bigint_init_unsigned(&result->value->data.x_array.data.s_none.elements[i].data.x_bigint, value);
}
size_t value = bigint_popcount_signed(&op_elem_val->data.x_bigint, int_type->data.integral.bit_count);
bigint_init_unsigned(&result->value->data.x_array.data.s_none.elements[i].data.x_bigint, value);
}
return result;
} else {
if (bigint_cmp_zero(&val->data.x_bigint) != CmpLT) {
size_t result = bigint_popcount_unsigned(&val->data.x_bigint);
return ir_const_unsigned(ira, instruction->base.scope, instruction->base.source_node, result);
@ -16022,8 +16175,9 @@ static Stage1AirInst *ir_analyze_instruction_pop_count(IrAnalyze *ira, Stage1Zir
size_t result = bigint_popcount_signed(&val->data.x_bigint, int_type->data.integral.bit_count);
return ir_const_unsigned(ira, instruction->base.scope, instruction->base.source_node, result);
}
}
ZigType *return_type = get_smallest_unsigned_int_type(ira->codegen, int_type->data.integral.bit_count);
ZigType *return_type = is_vector ? get_vector_type(ira->codegen, vector_len, smallest_type) : smallest_type;
return ir_build_pop_count_gen(ira, instruction->base.scope, instruction->base.source_node, return_type, op);
}

View file

@ -123,16 +123,27 @@ test "@clz" {
}
fn testClz() !void {
try expect(clz(u8, 0b10001010) == 0);
try expect(clz(u8, 0b00001010) == 4);
try expect(clz(u8, 0b00011010) == 3);
try expect(clz(u8, 0b00000000) == 8);
try expect(clz(u128, 0xffffffffffffffff) == 64);
try expect(clz(u128, 0x10000000000000000) == 63);
try expect(@clz(u8, 0b10001010) == 0);
try expect(@clz(u8, 0b00001010) == 4);
try expect(@clz(u8, 0b00011010) == 3);
try expect(@clz(u8, 0b00000000) == 8);
try expect(@clz(u128, 0xffffffffffffffff) == 64);
try expect(@clz(u128, 0x10000000000000000) == 63);
}
fn clz(comptime T: type, x: T) usize {
return @clz(T, x);
test "@clz vectors" {
try testClzVectors();
comptime try testClzVectors();
}
fn testClzVectors() !void {
@setEvalBranchQuota(10_000);
try expectEqual(@clz(u8, @splat(64, @as(u8, 0b10001010))), @splat(64, @as(u4, 0)));
try expectEqual(@clz(u8, @splat(64, @as(u8, 0b00001010))), @splat(64, @as(u4, 4)));
try expectEqual(@clz(u8, @splat(64, @as(u8, 0b00011010))), @splat(64, @as(u4, 3)));
try expectEqual(@clz(u8, @splat(64, @as(u8, 0b00000000))), @splat(64, @as(u4, 8)));
try expectEqual(@clz(u128, @splat(64, @as(u128, 0xffffffffffffffff))), @splat(64, @as(u8, 64)));
try expectEqual(@clz(u128, @splat(64, @as(u128, 0x10000000000000000))), @splat(64, @as(u8, 63)));
}
test "@ctz" {
@ -141,14 +152,23 @@ test "@ctz" {
}
fn testCtz() !void {
try expect(ctz(u8, 0b10100000) == 5);
try expect(ctz(u8, 0b10001010) == 1);
try expect(ctz(u8, 0b00000000) == 8);
try expect(ctz(u16, 0b00000000) == 16);
try expect(@ctz(u8, 0b10100000) == 5);
try expect(@ctz(u8, 0b10001010) == 1);
try expect(@ctz(u8, 0b00000000) == 8);
try expect(@ctz(u16, 0b00000000) == 16);
}
fn ctz(comptime T: type, x: T) usize {
return @ctz(T, x);
test "@ctz vectors" {
try testClzVectors();
comptime try testClzVectors();
}
fn testCtzVectors() !void {
@setEvalBranchQuota(10_000);
try expectEqual(@ctz(u8, @splat(64, @as(u8, 0b10100000))), @splat(64, @as(u4, 5)));
try expectEqual(@ctz(u8, @splat(64, @as(u8, 0b10001010))), @splat(64, @as(u4, 1)));
try expectEqual(@ctz(u8, @splat(64, @as(u8, 0b00000000))), @splat(64, @as(u4, 8)));
try expectEqual(@ctz(u16, @splat(64, @as(u16, 0b00000000))), @splat(64, @as(u5, 16)));
}
test "assignment operators" {

View file

@ -1,11 +1,14 @@
const expect = @import("std").testing.expect;
const std = @import("std");
const expect = std.testing.expect;
const expectEqual = std.testing.expectEqual;
const Vector = std.meta.Vector;
test "@popCount" {
comptime try testPopCount();
try testPopCount();
test "@popCount integers" {
comptime try testPopCountIntegers();
try testPopCountIntegers();
}
fn testPopCount() !void {
fn testPopCountIntegers() !void {
{
var x: u32 = 0xffffffff;
try expect(@popCount(u32, x) == 32);
@ -41,3 +44,22 @@ fn testPopCount() !void {
try expect(@popCount(i128, 0b11111111000110001100010000100001000011000011100101010001) == 24);
}
}
test "@popCount vectors" {
// https://github.com/ziglang/zig/issues/3317
if (std.Target.current.cpu.arch == .mipsel or std.Target.current.cpu.arch == .mips) return error.SkipZigTest;
comptime try testPopCountVectors();
try testPopCountVectors();
}
fn testPopCountVectors() !void {
{
var x: Vector(8, u32) = [1]u32{0xffffffff} ** 8;
try expectEqual([1]u6{32} ** 8, @as([8]u6, @popCount(u32, x)));
}
{
var x: Vector(8, i16) = [1]i16{-1} ** 8;
try expectEqual([1]u5{16} ** 8, @as([8]u5, @popCount(i16, x)));
}
}