mirror of
https://codeberg.org/ziglang/zig.git
synced 2025-12-06 13:54:21 +00:00
x86_64: implement integer @reduce(.Mul)
This commit is contained in:
parent
1f6f8b0ffe
commit
7bfdb7f26d
6 changed files with 3830 additions and 141 deletions
File diff suppressed because it is too large
Load diff
|
|
@ -336,7 +336,7 @@ pub const Mnemonic = enum {
|
|||
fcom, fcomi, fcomip, fcomp, fcompp, fcos,
|
||||
fdecstp, fdiv, fdivp, fdivr, fdivrp, ffree,
|
||||
fiadd, ficom, ficomp, fidiv, fidivr, fild, fimul, fincstp, finit,
|
||||
fist, fistp, fisttp, fisub, fisubr,
|
||||
fist, fistp, fisub, fisubr,
|
||||
fld, fld1, fldcw, fldenv, fldl2e, fldl2t, fldlg2, fldln2, fldpi, fldz,
|
||||
fmul, fmulp,
|
||||
fnclex, fninit, fnop, fnsave, fnstcw, fnstenv, fnstsw,
|
||||
|
|
@ -349,19 +349,18 @@ pub const Mnemonic = enum {
|
|||
// MMX
|
||||
emms, movd, movq,
|
||||
packssdw, packsswb, packuswb,
|
||||
paddb, paddd, paddq, paddsb, paddsw, paddusb, paddusw, paddw,
|
||||
paddb, paddd, paddsb, paddsw, paddusb, paddusw, paddw,
|
||||
pand, pandn, por, pxor,
|
||||
pcmpeqb, pcmpeqd, pcmpeqw,
|
||||
pcmpgtb, pcmpgtd, pcmpgtw,
|
||||
pmulhw, pmullw,
|
||||
pmaddwd, pmulhw, pmullw,
|
||||
pslld, psllq, psllw,
|
||||
psrad, psraw,
|
||||
psrld, psrlq, psrlw,
|
||||
psubb, psubd, psubq, psubsb, psubsw, psubusb, psubusw, psubw,
|
||||
psubb, psubd, psubsb, psubsw, psubusb, psubusw, psubw,
|
||||
// SSE
|
||||
addps, addss,
|
||||
andps,
|
||||
andnps,
|
||||
andnps, andps,
|
||||
cmpps, cmpss, comiss,
|
||||
cvtpi2ps, cvtps2pi, cvtsi2ss, cvtss2si, cvttps2pi, cvttss2si,
|
||||
divps, divss,
|
||||
|
|
@ -374,9 +373,11 @@ pub const Mnemonic = enum {
|
|||
movss, movups,
|
||||
mulps, mulss,
|
||||
orps,
|
||||
pavgb, pavgw,
|
||||
pextrw, pinsrw,
|
||||
pmaxsw, pmaxub, pminsw, pminub, pmovmskb,
|
||||
pmaxsw, pmaxub, pminsw, pminub, pmovmskb, pmulhuw,
|
||||
prefetchit0, prefetchit1, prefetchnta, prefetcht0, prefetcht1, prefetcht2, prefetchw, prefetchwt1,
|
||||
psadbw, pshufw,
|
||||
shufps,
|
||||
sqrtps, sqrtss,
|
||||
stmxcsr,
|
||||
|
|
@ -397,15 +398,16 @@ pub const Mnemonic = enum {
|
|||
maxpd, maxsd,
|
||||
minpd, minsd,
|
||||
movapd,
|
||||
movdqa, movdqu,
|
||||
movdq2q, movdqa, movdqu,
|
||||
movhpd, movlpd,
|
||||
movmskpd,
|
||||
movmskpd, movq2dq,
|
||||
//movsd,
|
||||
movupd,
|
||||
mulpd, mulsd,
|
||||
orpd,
|
||||
paddq, pmuludq,
|
||||
pshufd, pshufhw, pshuflw,
|
||||
pslldq, psrldq,
|
||||
pslldq, psrldq, psubq,
|
||||
punpckhbw, punpckhdq, punpckhqdq, punpckhwd,
|
||||
punpcklbw, punpckldq, punpcklqdq, punpcklwd,
|
||||
shufpd,
|
||||
|
|
@ -414,9 +416,17 @@ pub const Mnemonic = enum {
|
|||
ucomisd, unpckhpd, unpcklpd,
|
||||
xorpd,
|
||||
// SSE3
|
||||
addsubpd, addsubps, haddpd, haddps, lddqu, movddup, movshdup, movsldup,
|
||||
addsubpd, addsubps,
|
||||
fisttp,
|
||||
haddpd, haddps,
|
||||
hsubpd, hsubps,
|
||||
lddqu,
|
||||
movddup, movshdup, movsldup,
|
||||
// SSSE3
|
||||
pabsb, pabsd, pabsw, palignr, pshufb,
|
||||
pabsb, pabsd, pabsw, palignr,
|
||||
phaddw, phaddsw, phaddd, phsubw, phsubsw, phsubd,
|
||||
pmaddubsw, pmulhrsw, pshufb,
|
||||
psignb, psignd, psignw,
|
||||
// SSE4.1
|
||||
blendpd, blendps, blendvpd, blendvps,
|
||||
dppd, dpps,
|
||||
|
|
@ -430,7 +440,7 @@ pub const Mnemonic = enum {
|
|||
pmaxsb, pmaxsd, pmaxud, pmaxuw, pminsb, pminsd, pminud, pminuw,
|
||||
pmovsxbd, pmovsxbq, pmovsxbw, pmovsxdq, pmovsxwd, pmovsxwq,
|
||||
pmovzxbd, pmovzxbq, pmovzxbw, pmovzxdq, pmovzxwd, pmovzxwq,
|
||||
pmulld,
|
||||
pmuldq, pmulld,
|
||||
ptest,
|
||||
roundpd, roundps, roundsd, roundss,
|
||||
// SSE4.2
|
||||
|
|
@ -458,7 +468,7 @@ pub const Mnemonic = enum {
|
|||
vdppd, vdpps,
|
||||
vextractf128, vextractps,
|
||||
vgf2p8affineinvqb, vgf2p8affineqb, vgf2p8mulb,
|
||||
vhaddpd, vhaddps,
|
||||
vhaddpd, vhaddps, vhsubpd, vhsubps,
|
||||
vinsertf128, vinsertps,
|
||||
vlddqu, vldmxcsr,
|
||||
vmaskmovpd, vmaskmovps,
|
||||
|
|
@ -480,21 +490,24 @@ pub const Mnemonic = enum {
|
|||
vpabsb, vpabsd, vpabsw,
|
||||
vpackssdw, vpacksswb, vpackusdw, vpackuswb,
|
||||
vpaddb, vpaddd, vpaddq, vpaddsb, vpaddsw, vpaddusb, vpaddusw, vpaddw,
|
||||
vpalignr, vpand, vpandn,
|
||||
vpalignr, vpand, vpandn, vpavgb, vpavgw,
|
||||
vpblendvb, vpblendw, vpclmulqdq,
|
||||
vpcmpeqb, vpcmpeqd, vpcmpeqq, vpcmpeqw,
|
||||
vpcmpgtb, vpcmpgtd, vpcmpgtq, vpcmpgtw,
|
||||
vphaddw, vphaddsw, vphaddd, vphsubw, vphsubsw, vphsubd,
|
||||
vperm2f128, vpermilpd, vpermilps,
|
||||
vpextrb, vpextrd, vpextrq, vpextrw,
|
||||
vpinsrb, vpinsrd, vpinsrq, vpinsrw,
|
||||
vpmaxsb, vpmaxsd, vpmaxsw, vpmaxub, vpmaxud, vpmaxuw,
|
||||
vpminsb, vpminsd, vpminsw, vpminub, vpminud, vpminuw,
|
||||
vpmaddubsw,
|
||||
vpmovmskb,
|
||||
vpmovsxbd, vpmovsxbq, vpmovsxbw, vpmovsxdq, vpmovsxwd, vpmovsxwq,
|
||||
vpmovzxbd, vpmovzxbq, vpmovzxbw, vpmovzxdq, vpmovzxwd, vpmovzxwq,
|
||||
vpmulhw, vpmulld, vpmullw,
|
||||
vpmuldq, vpmulhrsw, vpmulhw, vpmulld, vpmullw, vpmuludq,
|
||||
vpor,
|
||||
vpshufb, vpshufd, vpshufhw, vpshuflw,
|
||||
vpsignb, vpsignd, vpsignw,
|
||||
vpslld, vpslldq, vpsllq, vpsllw,
|
||||
vpsrad, vpsraq, vpsraw,
|
||||
vpsrld, vpsrldq, vpsrlq, vpsrlw,
|
||||
|
|
@ -779,7 +792,7 @@ pub const Op = enum {
|
|||
pub fn isImmediate(op: Op) bool {
|
||||
// zig fmt: off
|
||||
return switch (op) {
|
||||
.imm8, .imm16, .imm32, .imm64,
|
||||
.imm8, .imm16, .imm32, .imm64,
|
||||
.imm8s, .imm16s, .imm32s,
|
||||
.rel8, .rel16, .rel32,
|
||||
.unity,
|
||||
|
|
@ -986,6 +999,7 @@ pub const Feature = enum {
|
|||
sse,
|
||||
sse2,
|
||||
sse3,
|
||||
@"sse3 x87",
|
||||
sse4_1,
|
||||
sse4_2,
|
||||
ssse3,
|
||||
|
|
|
|||
|
|
@ -567,7 +567,7 @@ fn emit(lower: *Lower, prefix: Prefix, mnemonic: Mnemonic, ops: []const Operand)
|
|||
}
|
||||
|
||||
fn generic(lower: *Lower, inst: Mir.Inst) Error!void {
|
||||
@setEvalBranchQuota(2_500);
|
||||
@setEvalBranchQuota(2_600);
|
||||
const fixes = switch (inst.ops) {
|
||||
.none => inst.data.none.fixes,
|
||||
.inst => inst.data.inst.fixes,
|
||||
|
|
|
|||
|
|
@ -386,7 +386,10 @@ pub const Inst = struct {
|
|||
/// Packed ___ Quadword
|
||||
p_q,
|
||||
/// Packed ___ Double Quadword
|
||||
/// Packed ___ Doubleword to Quadword
|
||||
p_dq,
|
||||
/// Packed ___ Unsigned Doubleword to Quadword
|
||||
p_udq,
|
||||
/// ___ Aligned Packed Integer Values
|
||||
_dqa,
|
||||
/// ___ Unaligned Packed Integer Values
|
||||
|
|
@ -446,7 +449,10 @@ pub const Inst = struct {
|
|||
/// VEX-Encoded Packed ___ Quadword
|
||||
vp_q,
|
||||
/// VEX-Encoded Packed ___ Double Quadword
|
||||
/// VEX-Encoded Packed ___ Doubleword to Quadword
|
||||
vp_dq,
|
||||
/// VEX-Encoded Packed ___ Unsigned Doubleword to Quadword
|
||||
vp_udq,
|
||||
/// VEX-Encoded ___ Scalar Single-Precision Values
|
||||
v_ss,
|
||||
/// VEX-Encoded ___ Packed Single-Precision Values
|
||||
|
|
@ -663,6 +669,8 @@ pub const Inst = struct {
|
|||
/// Multiply scalar single-precision floating-point values
|
||||
/// Multiply packed double-precision floating-point values
|
||||
/// Multiply scalar double-precision floating-point values
|
||||
/// Multiply packed unsigned doubleword integers
|
||||
/// Multiply packed doubleword integers
|
||||
mul,
|
||||
/// Two's complement negation
|
||||
neg,
|
||||
|
|
|
|||
|
|
@ -1160,10 +1160,6 @@
|
|||
.{ .fistp, .m, .{ .m32 }, .{ 0xdb }, 3, .none, .x87 },
|
||||
.{ .fistp, .m, .{ .m64 }, .{ 0xdf }, 7, .none, .x87 },
|
||||
|
||||
.{ .fisttp, .m, .{ .m16 }, .{ 0xdf }, 1, .none, .x87 },
|
||||
.{ .fisttp, .m, .{ .m32 }, .{ 0xdb }, 1, .none, .x87 },
|
||||
.{ .fisttp, .m, .{ .m64 }, .{ 0xdd }, 1, .none, .x87 },
|
||||
|
||||
.{ .fld, .m, .{ .m32 }, .{ 0xd9 }, 0, .none, .x87 },
|
||||
.{ .fld, .m, .{ .m64 }, .{ 0xdd }, 0, .none, .x87 },
|
||||
.{ .fld, .m, .{ .m80 }, .{ 0xdb }, 5, .none, .x87 },
|
||||
|
|
@ -1540,6 +1536,8 @@
|
|||
|
||||
.{ .pmullw, .rm, .{ .xmm, .xmm_m128 }, .{ 0x66, 0x0f, 0xd5 }, 0, .none, .sse2 },
|
||||
|
||||
.{ .pmuludq, .rm, .{ .xmm, .xmm_m128 }, .{ 0x66, 0x0f, 0xf4 }, 0, .none, .sse2 },
|
||||
|
||||
.{ .por, .rm, .{ .xmm, .xmm_m128 }, .{ 0x66, 0x0f, 0xeb }, 0, .none, .sse2 },
|
||||
|
||||
.{ .pshufd, .rmi, .{ .xmm, .xmm_m128, .imm8 }, .{ 0x66, 0x0f, 0x70 }, 0, .none, .sse2 },
|
||||
|
|
@ -1618,6 +1616,10 @@
|
|||
|
||||
.{ .addsubps, .rm, .{ .xmm, .xmm_m128 }, .{ 0xf2, 0x0f, 0xd0 }, 0, .none, .sse3 },
|
||||
|
||||
.{ .fisttp, .m, .{ .m16 }, .{ 0xdf }, 1, .none, .@"sse3 x87" },
|
||||
.{ .fisttp, .m, .{ .m32 }, .{ 0xdb }, 1, .none, .@"sse3 x87" },
|
||||
.{ .fisttp, .m, .{ .m64 }, .{ 0xdd }, 1, .none, .@"sse3 x87" },
|
||||
|
||||
.{ .haddpd, .rm, .{ .xmm, .xmm_m128 }, .{ 0x66, 0x0f, 0x7c }, 0, .none, .sse3 },
|
||||
|
||||
.{ .haddps, .rm, .{ .xmm, .xmm_m128 }, .{ 0xf2, 0x0f, 0x7c }, 0, .none, .sse3 },
|
||||
|
|
@ -1708,6 +1710,8 @@
|
|||
.{ .pmovzxwq, .rm, .{ .xmm, .xmm_m32 }, .{ 0x66, 0x0f, 0x38, 0x34 }, 0, .none, .sse4_1 },
|
||||
.{ .pmovzxdq, .rm, .{ .xmm, .xmm_m64 }, .{ 0x66, 0x0f, 0x38, 0x35 }, 0, .none, .sse4_1 },
|
||||
|
||||
.{ .pmuldq, .rm, .{ .xmm, .xmm_m128 }, .{ 0x66, 0x0f, 0x38, 0x28 }, 0, .none, .sse4_1 },
|
||||
|
||||
.{ .pmulld, .rm, .{ .xmm, .xmm_m128 }, .{ 0x66, 0x0f, 0x38, 0x40 }, 0, .none, .sse4_1 },
|
||||
|
||||
.{ .ptest, .rm, .{ .xmm, .xmm_m128 }, .{ 0x66, 0x0f, 0x38, 0x17 }, 0, .none, .sse4_1 },
|
||||
|
|
@ -2166,12 +2170,16 @@
|
|||
.{ .vpmovzxwq, .rm, .{ .xmm, .xmm_m32 }, .{ 0x66, 0x0f, 0x38, 0x34 }, 0, .vex_128_wig, .avx },
|
||||
.{ .vpmovzxdq, .rm, .{ .xmm, .xmm_m64 }, .{ 0x66, 0x0f, 0x38, 0x35 }, 0, .vex_128_wig, .avx },
|
||||
|
||||
.{ .vpmuldq, .rvm, .{ .xmm, .xmm, .xmm_m128 }, .{ 0x66, 0x0f, 0x38, 0x28 }, 0, .vex_128_wig, .avx },
|
||||
|
||||
.{ .vpmulhw, .rvm, .{ .xmm, .xmm, .xmm_m128 }, .{ 0x66, 0x0f, 0xe5 }, 0, .vex_128_wig, .avx },
|
||||
|
||||
.{ .vpmulld, .rvm, .{ .xmm, .xmm, .xmm_m128 }, .{ 0x66, 0x0f, 0x38, 0x40 }, 0, .vex_128_wig, .avx },
|
||||
|
||||
.{ .vpmullw, .rvm, .{ .xmm, .xmm, .xmm_m128 }, .{ 0x66, 0x0f, 0xd5 }, 0, .vex_128_wig, .avx },
|
||||
|
||||
.{ .vpmuludq, .rvm, .{ .xmm, .xmm, .xmm_m128 }, .{ 0x66, 0x0f, 0xf4 }, 0, .vex_128_wig, .avx },
|
||||
|
||||
.{ .vpor, .rvm, .{ .xmm, .xmm, .xmm_m128 }, .{ 0x66, 0x0f, 0xeb }, 0, .vex_128_wig, .avx },
|
||||
|
||||
.{ .vpshufb, .rvm, .{ .xmm, .xmm, .xmm_m128 }, .{ 0x66, 0x0f, 0x38, 0x00 }, 0, .vex_128_wig, .avx },
|
||||
|
|
@ -2493,12 +2501,16 @@
|
|||
.{ .vpmovzxwq, .rm, .{ .ymm, .xmm_m64 }, .{ 0x66, 0x0f, 0x38, 0x34 }, 0, .vex_256_wig, .avx2 },
|
||||
.{ .vpmovzxdq, .rm, .{ .ymm, .xmm_m128 }, .{ 0x66, 0x0f, 0x38, 0x35 }, 0, .vex_256_wig, .avx2 },
|
||||
|
||||
.{ .vpmuldq, .rvm, .{ .ymm, .ymm, .ymm_m256 }, .{ 0x66, 0x0f, 0x38, 0x28 }, 0, .vex_256_wig, .avx2 },
|
||||
|
||||
.{ .vpmulhw, .rvm, .{ .ymm, .ymm, .ymm_m256 }, .{ 0x66, 0x0f, 0xe5 }, 0, .vex_256_wig, .avx2 },
|
||||
|
||||
.{ .vpmulld, .rvm, .{ .ymm, .ymm, .ymm_m256 }, .{ 0x66, 0x0f, 0x38, 0x40 }, 0, .vex_256_wig, .avx2 },
|
||||
|
||||
.{ .vpmullw, .rvm, .{ .ymm, .ymm, .ymm_m256 }, .{ 0x66, 0x0f, 0xd5 }, 0, .vex_256_wig, .avx2 },
|
||||
|
||||
.{ .vpmuludq, .rvm, .{ .ymm, .ymm, .ymm_m256 }, .{ 0x66, 0x0f, 0xf4 }, 0, .vex_256_wig, .avx2 },
|
||||
|
||||
.{ .vpor, .rvm, .{ .ymm, .ymm, .ymm_m256 }, .{ 0x66, 0x0f, 0xeb }, 0, .vex_256_wig, .avx2 },
|
||||
|
||||
.{ .vpshufb, .rvm, .{ .ymm, .ymm, .ymm_m256 }, .{ 0x66, 0x0f, 0x38, 0x00 }, 0, .vex_256_wig, .avx2 },
|
||||
|
|
|
|||
|
|
@ -4889,6 +4889,14 @@ test reduceAdd {
|
|||
try test_reduce_add.testIntVectors();
|
||||
}
|
||||
|
||||
inline fn reduceMul(comptime Type: type, rhs: Type) @typeInfo(Type).vector.child {
|
||||
return @reduce(.Mul, rhs);
|
||||
}
|
||||
test reduceMul {
|
||||
const test_reduce_mul = unary(reduceMul, .{});
|
||||
try test_reduce_mul.testIntVectors();
|
||||
}
|
||||
|
||||
inline fn splat(comptime Type: type, rhs: Type) Type {
|
||||
return @splat(rhs[0]);
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue