rework the implementation

* update documentation
   - move `@shuffle` to be sorted alphabetically
   - remove mention of LLVM
   - minor clarifications & rewording
 * introduce ir_resolve_vector_elem_type to avoid duplicate compile
   error message and duplicate vector element checking logic
 * rework ir_analyze_shuffle_vector to solve various issues
 * improve `@shuffle` to allow implicit cast of arrays
 * the shuffle tests weren't being run
This commit is contained in:
Andrew Kelley 2019-09-18 15:41:56 -04:00
parent 193604c837
commit 2038f4d45a
No known key found for this signature in database
GPG key ID: 7C5F548F728501A9
6 changed files with 250 additions and 250 deletions

View file

@ -7673,6 +7673,43 @@ test "@setRuntimeSafety" {
{#see_also|@shlExact|@shlWithOverflow#} {#see_also|@shlExact|@shlWithOverflow#}
{#header_close#} {#header_close#}
{#header_open|@shuffle#}
<pre>{#syntax#}@shuffle(comptime E: type, a: @Vector(a_len, E), b: @Vector(b_len, E), comptime mask: @Vector(mask_len, i32)) @Vector(mask_len, E){#endsyntax#}</pre>
<p>
Constructs a new {#link|vector|Vectors#} by selecting elements from {#syntax#}a{#endsyntax#} and
{#syntax#}b{#endsyntax#} based on {#syntax#}mask{#endsyntax#}.
</p>
<p>
Each element in {#syntax#}mask{#endsyntax#} selects an element from either {#syntax#}a{#endsyntax#} or
{#syntax#}b{#endsyntax#}. Positive numbers select from {#syntax#}a{#endsyntax#} starting at 0.
Negative values select from {#syntax#}b{#endsyntax#}, starting at {#syntax#}-1{#endsyntax#} and going down.
It is recommended to use the {#syntax#}~{#endsyntax#} operator from indexes from {#syntax#}b{#endsyntax#}
so that both indexes can start from {#syntax#}0{#endsyntax#} (i.e. {#syntax#}~i32(0){#endsyntax#} is
{#syntax#}-1{#endsyntax#}).
</p>
<p>
For each element of {#syntax#}mask{#endsyntax#}, if it or the selected value from
{#syntax#}a{#endsyntax#} or {#syntax#}b{#endsyntax#} is {#syntax#}undefined{#endsyntax#},
then the resulting element is {#syntax#}undefined{#endsyntax#}.
</p>
<p>
{#syntax#}a_len{#endsyntax#} and {#syntax#}b_len{#endsyntax#} may differ in length. Out-of-bounds element
indexes in {#syntax#}mask{#endsyntax#} result in compile errors.
</p>
<p>
If {#syntax#}a{#endsyntax#} or {#syntax#}b{#endsyntax#} is {#syntax#}undefined{#endsyntax#}, it
is equivalent to a vector of all {#syntax#}undefined{#endsyntax#} with the same length as the other vector.
If both vectors are {#syntax#}undefined{#endsyntax#}, {#syntax#}@shuffle{#endsyntax#} returns
a vector with all elements {#syntax#}undefined{#endsyntax#}.
</p>
<p>
{#syntax#}E{#endsyntax#} must be an {#link|integer|Integers#}, {#link|float|Floats#},
{#link|pointer|Pointers#}, or {#syntax#}bool{#endsyntax#}. The mask may be any vector length, and its
length determines the result length.
</p>
{#see_also|SIMD#}
{#header_close#}
{#header_open|@sizeOf#} {#header_open|@sizeOf#}
<pre>{#syntax#}@sizeOf(comptime T: type) comptime_int{#endsyntax#}</pre> <pre>{#syntax#}@sizeOf(comptime T: type) comptime_int{#endsyntax#}</pre>
<p> <p>
@ -8226,28 +8263,6 @@ fn foo(comptime T: type, ptr: *T) T {
{#link|pointer|Pointers#}. {#link|pointer|Pointers#}.
</p> </p>
{#header_close#} {#header_close#}
{#header_open|@shuffle#}
<pre>{#syntax#}@shuffle(comptime ElemType: type, a: @Vector(_, ElemType), b: @Vector(_, ElemType), comptime mask: @Vector(_, u32)) @Vector(mask.len, ElemType){#endsyntax#}</pre>
<p>
Does the {#syntax#}shufflevector{#endsyntax#} instruction. Each element in {#syntax#}comptime{#endsyntax#}
(and always {#syntax#}i32{#endsyntax#}) {#syntax#}mask{#endsyntax#} selects a element from either {#syntax#}a{#endsyntax#} or {#syntax#}b{#endsyntax#}.
Positive numbers select from {#syntax#}a{#endsyntax#} (starting at 0), while negative values select
from {#syntax#}b{#endsyntax#} (starting at -1 and going down). It is recommended to use the {#syntax#}~{#endsyntax#}
operator from indexes from b so that both indexes can start from 0 (i.e. ~0 is -1). If either the {#syntax#}mask{#endsyntax#}
value or the value from {#syntax#}a{#endsyntax#} or {#syntax#}b{#endsyntax#} that it selects are {#syntax#}undefined{#endsyntax#}
then the resulting value is {#syntax#}undefined{#endsyntax#}. Also see {#link|SIMD#} and
the relevent <a href="https://llvm.org/docs/LangRef.html#i-shufflevector">LLVM Documentation on
{#syntax#}shufflevector{#endsyntax#}</a>, although note that the mask values are interpreted differently than in LLVM-IR.
Also, unlike LLVM-IR, the number of elements in {#syntax#}a{#endsyntax#} and {#syntax#}b{#endsyntax#} do not have to match.
The {#syntax#}undefined{#endsyntax#} identifier can be selected from up to the length of the other vector,
and yields {#syntax#}undefined{#endsyntax#}. If both vectors are {#syntax#}undefined{#endsyntax#}, yields an
{#syntax#}undefined{#endsyntax#} {#syntax#}ElemType{#endsyntax#} vector with length of {#syntax#}mask{#endsyntax#}.</p>
<p>
{#syntax#}ElemType{#endsyntax#} must be an {#link|integer|Integers#}, a {#link|float|Floats#}, or a
{#link|pointer|Pointers#}. The mask may be any vector length that the target supports, and its' length determines the result length.
</p>
{#header_close#}
{#header_close#} {#header_close#}
{#header_open|Build Mode#} {#header_open|Build Mode#}

View file

@ -4583,7 +4583,7 @@ static LLVMValueRef ir_render_ctz(CodeGen *g, IrExecutable *executable, IrInstru
static LLVMValueRef ir_render_shuffle_vector(CodeGen *g, IrExecutable *executable, IrInstructionShuffleVector *instruction) { static LLVMValueRef ir_render_shuffle_vector(CodeGen *g, IrExecutable *executable, IrInstructionShuffleVector *instruction) {
uint64_t len_a = instruction->a->value.type->data.vector.len; uint64_t len_a = instruction->a->value.type->data.vector.len;
uint64_t len_c = instruction->mask->value.type->data.vector.len; uint64_t len_mask = instruction->mask->value.type->data.vector.len;
// LLVM uses integers larger than the length of the first array to // LLVM uses integers larger than the length of the first array to
// index into the second array. This was deemed unnecessarily fragile // index into the second array. This was deemed unnecessarily fragile
@ -4591,23 +4591,24 @@ static LLVMValueRef ir_render_shuffle_vector(CodeGen *g, IrExecutable *executabl
// second vector. These start at -1 and go down, and are easiest to use // second vector. These start at -1 and go down, and are easiest to use
// with the ~ operator. Here we convert between the two formats. // with the ~ operator. Here we convert between the two formats.
IrInstruction *mask = instruction->mask; IrInstruction *mask = instruction->mask;
LLVMValueRef *values = allocate<LLVMValueRef>(len_c); LLVMValueRef *values = allocate<LLVMValueRef>(len_mask);
for (uint64_t i = 0;i < len_c;i++) { for (uint64_t i = 0; i < len_mask; i++) {
if (mask->value.data.x_array.data.s_none.elements[i].special == ConstValSpecialUndef) { if (mask->value.data.x_array.data.s_none.elements[i].special == ConstValSpecialUndef) {
values[i] = LLVMGetUndef(LLVMInt32Type()); values[i] = LLVMGetUndef(LLVMInt32Type());
} else { } else {
int64_t v = bigint_as_signed(&mask->value.data.x_array.data.s_none.elements[i].data.x_bigint); int32_t v = bigint_as_signed(&mask->value.data.x_array.data.s_none.elements[i].data.x_bigint);
if (v < 0) uint32_t index_val = (v >= 0) ? (uint32_t)v : (uint32_t)~v + (uint32_t)len_a;
v = (uint32_t)~v + (uint32_t)len_a; values[i] = LLVMConstInt(LLVMInt32Type(), index_val, false);
values[i] = LLVMConstInt(LLVMInt32Type(), v, false);
} }
} }
LLVMValueRef llvm_mask_value = LLVMConstVector(values, len_mask);
free(values);
return LLVMBuildShuffleVector(g->builder, return LLVMBuildShuffleVector(g->builder,
ir_llvm_value(g, instruction->a), ir_llvm_value(g, instruction->a),
ir_llvm_value(g, instruction->b), ir_llvm_value(g, instruction->b),
LLVMConstVector(values, len_c), llvm_mask_value, "");
"");
} }
static LLVMValueRef ir_render_pop_count(CodeGen *g, IrExecutable *executable, IrInstructionPopCount *instruction) { static LLVMValueRef ir_render_pop_count(CodeGen *g, IrExecutable *executable, IrInstructionPopCount *instruction) {

View file

@ -11049,6 +11049,19 @@ static ZigType *ir_resolve_type(IrAnalyze *ira, IrInstruction *type_value) {
return ir_resolve_const_type(ira->codegen, ira->new_irb.exec, type_value->source_node, val); return ir_resolve_const_type(ira->codegen, ira->new_irb.exec, type_value->source_node, val);
} }
static ZigType *ir_resolve_vector_elem_type(IrAnalyze *ira, IrInstruction *elem_type_value) {
ZigType *elem_type = ir_resolve_type(ira, elem_type_value);
if (type_is_invalid(elem_type))
return ira->codegen->builtin_types.entry_invalid;
if (!is_valid_vector_elem_type(elem_type)) {
ir_add_error(ira, elem_type_value,
buf_sprintf("vector element type must be integer, float, bool, or pointer; '%s' is invalid",
buf_ptr(&elem_type->name)));
return ira->codegen->builtin_types.entry_invalid;
}
return elem_type;
}
static ZigType *ir_resolve_int_type(IrAnalyze *ira, IrInstruction *type_value) { static ZigType *ir_resolve_int_type(IrAnalyze *ira, IrInstruction *type_value) {
ZigType *ty = ir_resolve_type(ira, type_value); ZigType *ty = ir_resolve_type(ira, type_value);
if (type_is_invalid(ty)) if (type_is_invalid(ty))
@ -22096,242 +22109,212 @@ static IrInstruction *ir_analyze_instruction_vector_type(IrAnalyze *ira, IrInstr
if (!ir_resolve_unsigned(ira, instruction->len->child, ira->codegen->builtin_types.entry_u32, &len)) if (!ir_resolve_unsigned(ira, instruction->len->child, ira->codegen->builtin_types.entry_u32, &len))
return ira->codegen->invalid_instruction; return ira->codegen->invalid_instruction;
ZigType *elem_type = ir_resolve_type(ira, instruction->elem_type->child); ZigType *elem_type = ir_resolve_vector_elem_type(ira, instruction->elem_type->child);
if (type_is_invalid(elem_type)) if (type_is_invalid(elem_type))
return ira->codegen->invalid_instruction; return ira->codegen->invalid_instruction;
if (!is_valid_vector_elem_type(elem_type)) {
ir_add_error(ira, instruction->elem_type,
buf_sprintf("vector element type must be integer, float, bool, or pointer; '%s' is invalid",
buf_ptr(&elem_type->name)));
return ira->codegen->invalid_instruction;
}
ZigType *vector_type = get_vector_type(ira->codegen, len, elem_type); ZigType *vector_type = get_vector_type(ira->codegen, len, elem_type);
return ir_const_type(ira, &instruction->base, vector_type); return ir_const_type(ira, &instruction->base, vector_type);
} }
static IrInstruction *ir_analyze_shuffle_vector(IrAnalyze *ira, IrInstruction *source_instr, static IrInstruction *ir_analyze_shuffle_vector(IrAnalyze *ira, IrInstruction *source_instr,
ZigType *scalar_type, IrInstruction *a, IrInstruction *b, IrInstruction *mask) { ZigType *scalar_type, IrInstruction *a, IrInstruction *b, IrInstruction *mask)
assert(source_instr && scalar_type && a && b && mask); {
assert(scalar_type->id == ZigTypeIdBool || ir_assert(source_instr && scalar_type && a && b && mask, source_instr);
scalar_type->id == ZigTypeIdInt || ir_assert(is_valid_vector_elem_type(scalar_type), source_instr);
scalar_type->id == ZigTypeIdFloat ||
scalar_type->id == ZigTypeIdPointer);
ZigType *mask_type = mask->value.type; uint32_t len_mask;
if (type_is_invalid(mask_type)) if (mask->value.type->id == ZigTypeIdVector) {
return ira->codegen->invalid_instruction; len_mask = mask->value.type->data.vector.len;
} else if (mask->value.type->id == ZigTypeIdArray) {
const char *shuffle_mask_fail_fmt = "@shuffle mask operand must be a vector of signed 32-bit integers, got '%s'"; len_mask = mask->value.type->data.array.len;
} else {
if (mask_type->id == ZigTypeIdArray) {
ZigType *vector_type = get_vector_type(ira->codegen, mask_type->data.array.len, mask_type->data.array.child_type);
mask = ir_analyze_array_to_vector(ira, mask, mask, vector_type);
if (!mask)
return ira->codegen->invalid_instruction;
mask_type = vector_type;
}
if (mask_type->id != ZigTypeIdVector) {
ir_add_error(ira, mask, ir_add_error(ira, mask,
buf_sprintf(shuffle_mask_fail_fmt, buf_ptr(&mask->value.type->name))); buf_sprintf("expected vector or array, found '%s'",
buf_ptr(&mask->value.type->name)));
return ira->codegen->invalid_instruction; return ira->codegen->invalid_instruction;
} }
mask = ir_implicit_cast(ira, mask, get_vector_type(ira->codegen, len_mask,
ZigType *mask_scalar_type = mask_type->data.array.child_type; ira->codegen->builtin_types.entry_i32));
if (mask_scalar_type->id != ZigTypeIdInt) { if (type_is_invalid(mask->value.type))
ir_add_error(ira, mask,
buf_sprintf(shuffle_mask_fail_fmt, buf_ptr(&mask->value.type->name)));
return ira->codegen->invalid_instruction; return ira->codegen->invalid_instruction;
}
if (mask_scalar_type->data.integral.bit_count != 32 || uint32_t len_a;
mask_scalar_type->data.integral.is_signed == false) { if (a->value.type->id == ZigTypeIdVector) {
ir_add_error(ira, mask, len_a = a->value.type->data.vector.len;
buf_sprintf(shuffle_mask_fail_fmt, buf_ptr(&mask->value.type->name))); } else if (a->value.type->id == ZigTypeIdArray) {
return ira->codegen->invalid_instruction; len_a = a->value.type->data.array.len;
} } else if (a->value.type->id == ZigTypeIdUndefined) {
len_a = UINT32_MAX;
uint64_t len_a, len_b, len_c = mask->value.type->data.vector.len; } else {
if (a->value.type->id != ZigTypeIdVector) {
if (a->value.type->id != ZigTypeIdUndefined) {
ir_add_error(ira, a, ir_add_error(ira, a,
buf_sprintf("expected vector of element type '%s' got '%s'", buf_sprintf("expected vector or array with element type '%s', found '%s'",
buf_ptr(&scalar_type->name), buf_ptr(&scalar_type->name),
buf_ptr(&a->value.type->name))); buf_ptr(&a->value.type->name)));
return ira->codegen->invalid_instruction; return ira->codegen->invalid_instruction;
} }
} else {
len_a = a->value.type->data.vector.len;
}
if (b->value.type->id != ZigTypeIdVector) { uint32_t len_b;
if (b->value.type->id != ZigTypeIdUndefined) { if (b->value.type->id == ZigTypeIdVector) {
len_b = b->value.type->data.vector.len;
} else if (b->value.type->id == ZigTypeIdArray) {
len_b = b->value.type->data.array.len;
} else if (b->value.type->id == ZigTypeIdUndefined) {
len_b = UINT32_MAX;
} else {
ir_add_error(ira, b, ir_add_error(ira, b,
buf_sprintf("expected vector of element type '%s' got '%s'", buf_sprintf("expected vector or array with element type '%s', found '%s'",
buf_ptr(&scalar_type->name), buf_ptr(&scalar_type->name),
buf_ptr(&b->value.type->name))); buf_ptr(&b->value.type->name)));
return ira->codegen->invalid_instruction; return ira->codegen->invalid_instruction;
} }
if (len_a == UINT32_MAX && len_b == UINT32_MAX) {
return ir_const_undef(ira, a, get_vector_type(ira->codegen, len_mask, scalar_type));
}
if (len_a == UINT32_MAX) {
len_a = len_b;
a = ir_const_undef(ira, a, get_vector_type(ira->codegen, len_a, scalar_type));
} else { } else {
len_b = b->value.type->data.vector.len; a = ir_implicit_cast(ira, a, get_vector_type(ira->codegen, len_a, scalar_type));
} if (type_is_invalid(a->value.type))
if (a->value.type->id == ZigTypeIdUndefined && b->value.type->id == ZigTypeIdUndefined) {
return ir_const_undef(ira, a, get_vector_type(ira->codegen, len_c, scalar_type));
}
// undefined is a vector up to length of the other vector.
if (a->value.type->id == ZigTypeIdUndefined) {
a = ir_const_undef(ira, a, b->value.type);
len_a = b->value.type->data.vector.len;
} else if (b->value.type->id == ZigTypeIdUndefined) {
b = ir_const_undef(ira, b, a->value.type);
len_b = a->value.type->data.vector.len;
}
// FIXME I think this needs to be more sophisticated
if (a->value.type->data.vector.elem_type != scalar_type) {
ir_add_error(ira, a,
buf_sprintf("element type '%s' does not match '%s'",
buf_ptr(&a->value.type->data.vector.elem_type->name),
buf_ptr(&scalar_type->name)));
return ira->codegen->invalid_instruction;
}
if (b->value.type->data.vector.elem_type != scalar_type) {
ir_add_error(ira, b,
buf_sprintf("element type '%s' does not match '%s'",
buf_ptr(&b->value.type->data.vector.elem_type->name),
buf_ptr(&scalar_type->name)));
return ira->codegen->invalid_instruction; return ira->codegen->invalid_instruction;
} }
if (a->value.type != b->value.type) { if (len_b == UINT32_MAX) {
assert(len_a != len_b); len_b = len_a;
uint32_t len_max = max(len_a, len_b), len_min = min(len_a, len_b); b = ir_const_undef(ira, b, get_vector_type(ira->codegen, len_b, scalar_type));
bool expand_b = len_b < len_a;
IrInstruction *expand_mask = ir_const(ira, mask,
get_vector_type(ira->codegen, len_max, ira->codegen->builtin_types.entry_i32));
expand_mask->value.data.x_array.data.s_none.elements = create_const_vals(len_max);
uint32_t i = 0;
for (; i < len_min; i++)
bigint_init_unsigned(&expand_mask->value.data.x_array.data.s_none.elements[i].data.x_bigint, i);
for (; i < len_max; i++)
bigint_init_signed(&expand_mask->value.data.x_array.data.s_none.elements[i].data.x_bigint, -1);
IrInstruction *undef = ir_const_undef(ira, source_instr,
get_vector_type(ira->codegen, len_min, scalar_type));
if (expand_b) {
if (instr_is_comptime(b)) {
ConstExprValue *old = b->value.data.x_array.data.s_none.elements;
b->value.data.x_array.data.s_none.elements =
allocate<ConstExprValue>(len_a);
memcpy(b->value.data.x_array.data.s_none.elements, old,
b->value.type->data.vector.len * sizeof(ConstExprValue));
} else { } else {
b = ir_build_shuffle_vector(&ira->new_irb, b = ir_implicit_cast(ira, b, get_vector_type(ira->codegen, len_b, scalar_type));
source_instr->scope, source_instr->source_node, if (type_is_invalid(b->value.type))
nullptr, b, undef, expand_mask); return ira->codegen->invalid_instruction;
b->value.special = ConstValSpecialRuntime;
}
b->value.type = get_vector_type(ira->codegen, len_max, scalar_type);
} else {
if (instr_is_comptime(a)) {
ConstExprValue *old = a->value.data.x_array.data.s_none.elements;
a->value.data.x_array.data.s_none.elements =
allocate<ConstExprValue>(len_b);
memcpy(a->value.data.x_array.data.s_none.elements, old,
a->value.type->data.vector.len * sizeof(ConstExprValue));
} else {
a = ir_build_shuffle_vector(&ira->new_irb,
source_instr->scope, source_instr->source_node,
nullptr, a, undef, expand_mask);
a->value.special = ConstValSpecialRuntime;
}
a->value.type = get_vector_type(ira->codegen, len_max, scalar_type);
}
} }
ConstExprValue *mask_val = ir_resolve_const(ira, mask, UndefOk); ConstExprValue *mask_val = ir_resolve_const(ira, mask, UndefOk);
if (!mask_val) { if (mask_val == nullptr)
ir_add_error(ira, mask, return ira->codegen->invalid_instruction;
buf_sprintf("mask must be comptime"));
expand_undef_array(ira->codegen, mask_val);
for (uint32_t i = 0; i < len_mask; i += 1) {
ConstExprValue *mask_elem_val = &mask_val->data.x_array.data.s_none.elements[i];
if (mask_elem_val->special == ConstValSpecialUndef)
continue;
int32_t v_i32 = bigint_as_signed(&mask_elem_val->data.x_bigint);
uint32_t v;
IrInstruction *chosen_operand;
if (v_i32 >= 0) {
v = (uint32_t)v_i32;
chosen_operand = a;
} else {
v = (uint32_t)~v_i32;
chosen_operand = b;
}
if (v >= chosen_operand->value.type->data.vector.len) {
ErrorMsg *msg = ir_add_error(ira, mask,
buf_sprintf("mask index '%u' has out-of-bounds selection", i));
add_error_note(ira->codegen, msg, chosen_operand->source_node,
buf_sprintf("selected index '%u' out of bounds of %s", v,
buf_ptr(&chosen_operand->value.type->name)));
if (chosen_operand == a && v < len_a + len_b) {
add_error_note(ira->codegen, msg, b->source_node,
buf_create_from_str("selections from the second vector are specified with negative numbers"));
}
return ira->codegen->invalid_instruction; return ira->codegen->invalid_instruction;
} }
for (uint32_t i = 0;i < mask->value.type->data.vector.len;i++) {
if (mask->value.data.x_array.data.s_none.elements[i].special == ConstValSpecialUndef)
continue;
int64_t v = bigint_as_signed(&mask->value.data.x_array.data.s_none.elements[i].data.x_bigint);
if (v >= 0 && (uint64_t)v + 1 > len_a) {
ErrorMsg *msg = ir_add_error(ira, mask,
buf_sprintf("mask index out of bounds"));
add_error_note(ira->codegen, msg, mask->source_node,
buf_sprintf("when computing vector element at index %" ZIG_PRI_usize, (uintptr_t)i));
if ((uint64_t)v <= len_a + len_b)
add_error_note(ira->codegen, msg, mask->source_node,
buf_sprintf("selections from the second vector are specified with negative numbers"));
} else if (v < 0 && (uint64_t)~v + 1 > len_b) {
ErrorMsg *msg = ir_add_error(ira, mask,
buf_sprintf("mask index out of bounds"));
add_error_note(ira->codegen, msg, mask->source_node,
buf_sprintf("when computing vector element at index %" ZIG_PRI_usize, (uintptr_t)i));
}
else
continue;
return ira->codegen->invalid_instruction;
} }
ZigType *result_type = get_vector_type(ira->codegen, len_c, scalar_type); ZigType *result_type = get_vector_type(ira->codegen, len_mask, scalar_type);
if (instr_is_comptime(a) && if (instr_is_comptime(a) && instr_is_comptime(b)) {
instr_is_comptime(b)) { ConstExprValue *a_val = ir_resolve_const(ira, a, UndefOk);
if (a_val == nullptr)
return ira->codegen->invalid_instruction;
ConstExprValue *b_val = ir_resolve_const(ira, b, UndefOk);
if (b_val == nullptr)
return ira->codegen->invalid_instruction;
expand_undef_array(ira->codegen, a_val);
expand_undef_array(ira->codegen, b_val);
IrInstruction *result = ir_const(ira, source_instr, result_type); IrInstruction *result = ir_const(ira, source_instr, result_type);
result->value.data.x_array.data.s_none.elements = create_const_vals(len_c); result->value.data.x_array.data.s_none.elements = create_const_vals(len_mask);
for (uint32_t i = 0;i < mask->value.type->data.vector.len;i++) { for (uint32_t i = 0; i < mask_val->type->data.vector.len; i += 1) {
if (mask->value.data.x_array.data.s_none.elements[i].special == ConstValSpecialUndef) ConstExprValue *mask_elem_val = &mask_val->data.x_array.data.s_none.elements[i];
result->value.data.x_array.data.s_none.elements[i].special = ConstExprValue *result_elem_val = &result->value.data.x_array.data.s_none.elements[i];
ConstValSpecialUndef; if (mask_elem_val->special == ConstValSpecialUndef) {
int64_t v = bigint_as_signed(&mask->value.data.x_array.data.s_none.elements[i].data.x_bigint); result_elem_val->special = ConstValSpecialUndef;
if (v >= 0) continue;
result->value.data.x_array.data.s_none.elements[i] = }
a->value.data.x_array.data.s_none.elements[v]; int32_t v = bigint_as_signed(&mask_elem_val->data.x_bigint);
else if (v < 0) // We've already checked for and emitted compile errors for index out of bounds here.
result->value.data.x_array.data.s_none.elements[i] = ConstExprValue *src_elem_val = (v >= 0) ?
b->value.data.x_array.data.s_none.elements[~v]; &a->value.data.x_array.data.s_none.elements[v] :
else &b->value.data.x_array.data.s_none.elements[~v];
zig_unreachable(); copy_const_val(result_elem_val, src_elem_val, false);
result->value.data.x_array.data.s_none.elements[i].special =
ConstValSpecialStatic; ir_assert(result_elem_val->special == ConstValSpecialStatic, source_instr);
} }
result->value.special = ConstValSpecialStatic; result->value.special = ConstValSpecialStatic;
return result; return result;
} }
// All static analysis passed, and not comptime // All static analysis passed, and not comptime.
// For runtime codegen, vectors a and b must be the same length. Here we
// recursively @shuffle the smaller vector to append undefined elements
// to it up to the length of the longer vector. This recursion terminates
// in 1 call because these calls to ir_analyze_shuffle_vector guarantee
// len_a == len_b.
if (len_a != len_b) {
uint32_t len_min = min(len_a, len_b);
uint32_t len_max = max(len_a, len_b);
IrInstruction *expand_mask = ir_const(ira, mask,
get_vector_type(ira->codegen, len_max, ira->codegen->builtin_types.entry_i32));
expand_mask->value.data.x_array.data.s_none.elements = create_const_vals(len_max);
uint32_t i = 0;
for (; i < len_min; i += 1)
bigint_init_unsigned(&expand_mask->value.data.x_array.data.s_none.elements[i].data.x_bigint, i);
for (; i < len_max; i += 1)
bigint_init_signed(&expand_mask->value.data.x_array.data.s_none.elements[i].data.x_bigint, -1);
IrInstruction *undef = ir_const_undef(ira, source_instr,
get_vector_type(ira->codegen, len_min, scalar_type));
if (len_b < len_a) {
b = ir_analyze_shuffle_vector(ira, source_instr, scalar_type, b, undef, expand_mask);
} else {
a = ir_analyze_shuffle_vector(ira, source_instr, scalar_type, a, undef, expand_mask);
}
}
IrInstruction *result = ir_build_shuffle_vector(&ira->new_irb, IrInstruction *result = ir_build_shuffle_vector(&ira->new_irb,
source_instr->scope, source_instr->source_node, source_instr->scope, source_instr->source_node,
nullptr, a, b, mask); nullptr, a, b, mask);
result->value.type = result_type; result->value.type = result_type;
result->value.special = ConstValSpecialRuntime;
return result; return result;
} }
static IrInstruction *ir_analyze_instruction_shuffle_vector(IrAnalyze *ira, IrInstructionShuffleVector *instruction) { static IrInstruction *ir_analyze_instruction_shuffle_vector(IrAnalyze *ira, IrInstructionShuffleVector *instruction) {
ZigType *scalar_type = ir_resolve_type(ira, instruction->scalar_type); ZigType *scalar_type = ir_resolve_vector_elem_type(ira, instruction->scalar_type);
assert(scalar_type);
if (type_is_invalid(scalar_type)) if (type_is_invalid(scalar_type))
return ira->codegen->invalid_instruction; return ira->codegen->invalid_instruction;
if (scalar_type->id != ZigTypeIdBool && IrInstruction *a = instruction->a->child;
scalar_type->id != ZigTypeIdInt && if (type_is_invalid(a->value.type))
scalar_type->id != ZigTypeIdFloat &&
scalar_type->id != ZigTypeIdPointer) {
ir_add_error(ira, instruction->scalar_type,
buf_sprintf("vector element type must be integer, float, bool, or pointer; '%s' is invalid",
buf_ptr(&scalar_type->name)));
return ira->codegen->invalid_instruction; return ira->codegen->invalid_instruction;
}
return ir_analyze_shuffle_vector(ira, &instruction->base, scalar_type, instruction->a->child, instruction->b->child, instruction->mask->child); IrInstruction *b = instruction->b->child;
if (type_is_invalid(b->value.type))
return ira->codegen->invalid_instruction;
IrInstruction *mask = instruction->mask->child;
if (type_is_invalid(mask->value.type))
return ira->codegen->invalid_instruction;
return ir_analyze_shuffle_vector(ira, &instruction->base, scalar_type, a, b, mask);
} }
static IrInstruction *ir_analyze_instruction_bool_not(IrAnalyze *ira, IrInstructionBoolNot *instruction) { static IrInstruction *ir_analyze_instruction_bool_not(IrAnalyze *ira, IrInstructionBoolNot *instruction) {

View file

@ -6485,16 +6485,16 @@ pub fn addCases(cases: *tests.CompileErrorContext) void {
); );
cases.addTest( cases.addTest(
"using LLVM syntax for @shuffle", "@shuffle with selected index past first vector length",
\\export fn entry() void { \\export fn entry() void {
\\ const v: @Vector(4, u32) = [4]u32{0, 1, 2, 3}; \\ const v: @Vector(4, u32) = [4]u32{ 10, 11, 12, 13 };
\\ const x: @Vector(4, u32) = [4]u32{4, 5, 6, 7}; \\ const x: @Vector(4, u32) = [4]u32{ 14, 15, 16, 17 };
\\ var z = @shuffle(u32, v, x, [8]i32{0, 1, 2, 3, 4, 5, 6, 7}); \\ var z = @shuffle(u32, v, x, [8]i32{ 0, 1, 2, 3, 7, 6, 5, 4 });
\\} \\}
, ,
"tmp.zig:4:39: error: mask index out of bounds", "tmp.zig:4:39: error: mask index '4' has out-of-bounds selection",
"tmp.zig:4:39: note: when computing vector element at index 4", "tmp.zig:4:27: note: selected index '7' out of bounds of @Vector(4, u32)",
"tmp.zig:4:39: note: selections from the second vector are specified with negative numbers", "tmp.zig:4:30: note: selections from the second vector are specified with negative numbers",
); );
cases.addTest( cases.addTest(

View file

@ -80,6 +80,7 @@ comptime {
_ = @import("behavior/pub_enum.zig"); _ = @import("behavior/pub_enum.zig");
_ = @import("behavior/ref_var_in_if_after_if_2nd_switch_prong.zig"); _ = @import("behavior/ref_var_in_if_after_if_2nd_switch_prong.zig");
_ = @import("behavior/reflection.zig"); _ = @import("behavior/reflection.zig");
_ = @import("behavior/shuffle.zig");
_ = @import("behavior/sizeof_and_typeof.zig"); _ = @import("behavior/sizeof_and_typeof.zig");
_ = @import("behavior/slice.zig"); _ = @import("behavior/slice.zig");
_ = @import("behavior/slicetobytes.zig"); _ = @import("behavior/slicetobytes.zig");

View file

@ -41,8 +41,8 @@ test "@shuffle" {
expect(mem.eql(bool, ([4]bool)(res2), [4]bool{ false, false, true, false })); expect(mem.eql(bool, ([4]bool)(res2), [4]bool{ false, false, true, false }));
} }
// FIXME re-enable when LLVM codegen is fixed // TODO re-enable when LLVM codegen is fixed
// https://bugs.llvm.org/show_bug.cgi?id=42803 // https://github.com/ziglang/zig/issues/3246
if (false) { if (false) {
var x2: @Vector(3, bool) = [3]bool{ false, true, false }; var x2: @Vector(3, bool) = [3]bool{ false, true, false };
var v4: @Vector(2, bool) = [2]bool{ true, false }; var v4: @Vector(2, bool) = [2]bool{ true, false };