update CPU features to LLVM 18

release/18.x branch, commit 78b99c73ee4b96fe9ce0e294d4632326afb2db42
This commit is contained in:
Andrew Kelley 2024-04-25 16:54:33 -07:00
parent 6a018d3e65
commit 109ec72924
14 changed files with 2103 additions and 190 deletions

File diff suppressed because it is too large Load diff

View file

@ -13,6 +13,7 @@ pub const Feature = enum {
architected_sgprs,
atomic_buffer_global_pk_add_f16_insts,
atomic_buffer_global_pk_add_f16_no_rtn_insts,
atomic_csub_no_rtn_insts,
atomic_ds_pk_add_16_insts,
atomic_fadd_no_rtn_insts,
atomic_fadd_rtn_insts,
@ -22,6 +23,8 @@ pub const Feature = enum {
back_off_barrier,
ci_insts,
cumode,
default_component_broadcast,
default_component_zero,
dl_insts,
dot10_insts,
dot1_insts,
@ -36,6 +39,7 @@ pub const Feature = enum {
dpp,
dpp8,
dpp_64bit,
dpp_src1_sgpr,
ds128,
ds_src2_insts,
extended_image_insts,
@ -54,10 +58,12 @@ pub const Feature = enum {
fmaf,
force_store_sc0_sc1,
fp64,
fp8_conversion_insts,
fp8_insts,
full_rate_64_ops,
g16,
gcn3_encoding,
gds,
get_wave_id_inst,
gfx10,
gfx10_3_insts,
@ -67,12 +73,15 @@ pub const Feature = enum {
gfx11,
gfx11_full_vgprs,
gfx11_insts,
gfx12,
gfx12_insts,
gfx7_gfx8_gfx9_insts,
gfx8_insts,
gfx9,
gfx90a_insts,
gfx940_insts,
gfx9_insts,
gws,
half_rate_64_ops,
image_gather4_d16_bug,
image_insts,
@ -80,6 +89,7 @@ pub const Feature = enum {
inst_fwd_prefetch_bug,
int_clamp_insts,
inv_2pi_inline_imm,
kernarg_preload,
lds_branch_vmem_war_hazard,
lds_misaligned_bug,
ldsbankcount16,
@ -97,6 +107,7 @@ pub const Feature = enum {
mfma_inline_literal_bug,
mimg_r128,
movrel,
msaa_load_dst_sel_bug,
negative_scratch_offset_bug,
negative_unaligned_scratch_offset_bug,
no_data_dep_hazard,
@ -111,10 +122,15 @@ pub const Feature = enum {
pk_fmac_f16_inst,
promote_alloca,
prt_strict_null,
pseudo_scalar_trans,
r128_a16,
real_true16,
restricted_soffset,
s_memrealtime,
s_memtime_inst,
salu_float,
scalar_atomics,
scalar_dwordx3_loads,
scalar_flat_scratch_insts,
scalar_stores,
sdwa,
@ -125,6 +141,7 @@ pub const Feature = enum {
sdwa_sdst,
sea_islands,
sgpr_init_bug,
shader_cycles_hi_lo_registers,
shader_cycles_register,
si_scheduler,
smem_to_vector_write_hazard,
@ -146,6 +163,7 @@ pub const Feature = enum {
vcmpx_exec_war_hazard,
vcmpx_permlane_hazard,
vgpr_index_mode,
vgpr_singleuse_hint,
vmem_to_scalar_write_hazard,
volcanic_islands,
vop3_literal,
@ -212,6 +230,11 @@ pub const all_features = blk: {
.flat_global_insts,
}),
};
result[@intFromEnum(Feature.atomic_csub_no_rtn_insts)] = .{
.llvm_name = "atomic-csub-no-rtn-insts",
.description = "Has buffer_atomic_csub and global_atomic_csub instructions that don't return original value",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.atomic_ds_pk_add_16_insts)] = .{
.llvm_name = "atomic-ds-pk-add-16-insts",
.description = "Has ds_pk_add_bf16, ds_pk_add_f16, ds_pk_add_rtn_bf16, ds_pk_add_rtn_f16 instructions",
@ -263,6 +286,16 @@ pub const all_features = blk: {
.description = "Enable CU wavefront execution mode",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.default_component_broadcast)] = .{
.llvm_name = "default-component-broadcast",
.description = "BUFFER/IMAGE store instructions set unspecified components to x component (GFX12)",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.default_component_zero)] = .{
.llvm_name = "default-component-zero",
.description = "BUFFER/IMAGE store instructions set unspecified components to zero (before GFX12)",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.dl_insts)] = .{
.llvm_name = "dl-insts",
.description = "Has v_fmac_f32 and v_xnor_b32 instructions",
@ -330,7 +363,12 @@ pub const all_features = blk: {
};
result[@intFromEnum(Feature.dpp_64bit)] = .{
.llvm_name = "dpp-64bit",
.description = "Support DPP (Data Parallel Primitives) extension",
.description = "Support DPP (Data Parallel Primitives) extension in DP ALU",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.dpp_src1_sgpr)] = .{
.llvm_name = "dpp-src1-sgpr",
.description = "Support SGPR for Src1 of DPP instructions",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.ds128)] = .{
@ -423,6 +461,11 @@ pub const all_features = blk: {
.description = "Enable double precision operations",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.fp8_conversion_insts)] = .{
.llvm_name = "fp8-conversion-insts",
.description = "Has fp8 and bf8 conversion instructions",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.fp8_insts)] = .{
.llvm_name = "fp8-insts",
.description = "Has fp8 and bf8 instructions",
@ -443,6 +486,11 @@ pub const all_features = blk: {
.description = "Encoding format for VI",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.gds)] = .{
.llvm_name = "gds",
.description = "Has Global Data Share",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.get_wave_id_inst)] = .{
.llvm_name = "get-wave-id-inst",
.description = "Has s_get_waveid_in_workgroup instruction",
@ -457,6 +505,7 @@ pub const all_features = blk: {
.add_no_carry_insts,
.aperture_regs,
.ci_insts,
.default_component_zero,
.dpp,
.dpp8,
.extended_image_insts,
@ -469,9 +518,11 @@ pub const all_features = blk: {
.fma_mix_insts,
.fp64,
.g16,
.gds,
.gfx10_insts,
.gfx8_insts,
.gfx9_insts,
.gws,
.image_insts,
.int_clamp_insts,
.inv_2pi_inline_imm,
@ -523,6 +574,7 @@ pub const all_features = blk: {
.add_no_carry_insts,
.aperture_regs,
.ci_insts,
.default_component_zero,
.dpp,
.dpp8,
.extended_image_insts,
@ -535,6 +587,7 @@ pub const all_features = blk: {
.fma_mix_insts,
.fp64,
.g16,
.gds,
.gfx10_3_insts,
.gfx10_a_encoding,
.gfx10_b_encoding,
@ -542,6 +595,7 @@ pub const all_features = blk: {
.gfx11_insts,
.gfx8_insts,
.gfx9_insts,
.gws,
.int_clamp_insts,
.inv_2pi_inline_imm,
.localmemorysize65536,
@ -569,6 +623,57 @@ pub const all_features = blk: {
.description = "Additional instructions for GFX11+",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.gfx12)] = .{
.llvm_name = "gfx12",
.description = "GFX12 GPU generation",
.dependencies = featureSet(&[_]Feature{
.@"16_bit_insts",
.a16,
.add_no_carry_insts,
.aperture_regs,
.ci_insts,
.default_component_broadcast,
.dpp,
.dpp8,
.fast_denormal_f32,
.fast_fmaf,
.flat_address_space,
.flat_global_insts,
.flat_inst_offsets,
.flat_scratch_insts,
.fma_mix_insts,
.fp64,
.g16,
.gfx10_3_insts,
.gfx10_a_encoding,
.gfx10_b_encoding,
.gfx10_insts,
.gfx11_insts,
.gfx12_insts,
.gfx8_insts,
.gfx9_insts,
.int_clamp_insts,
.inv_2pi_inline_imm,
.localmemorysize65536,
.mimg_r128,
.movrel,
.no_data_dep_hazard,
.no_sdst_cmpx,
.pk_fmac_f16_inst,
.true16,
.unaligned_buffer_access,
.unaligned_ds_access,
.vop3_literal,
.vop3p,
.vopd,
.vscnt,
}),
};
result[@intFromEnum(Feature.gfx12_insts)] = .{
.llvm_name = "gfx12-insts",
.description = "Additional instructions for GFX12+",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.gfx7_gfx8_gfx9_insts)] = .{
.llvm_name = "gfx7-gfx8-gfx9-insts",
.description = "Instructions shared in GFX7, GFX8, GFX9",
@ -588,6 +693,7 @@ pub const all_features = blk: {
.add_no_carry_insts,
.aperture_regs,
.ci_insts,
.default_component_zero,
.dpp,
.fast_denormal_f32,
.fast_fmaf,
@ -600,6 +706,7 @@ pub const all_features = blk: {
.gfx7_gfx8_gfx9_insts,
.gfx8_insts,
.gfx9_insts,
.gws,
.int_clamp_insts,
.inv_2pi_inline_imm,
.localmemorysize65536,
@ -637,6 +744,11 @@ pub const all_features = blk: {
.description = "Additional instructions for GFX9+",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.gws)] = .{
.llvm_name = "gws",
.description = "Has Global Wave Sync",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.half_rate_64_ops)] = .{
.llvm_name = "half-rate-64-ops",
.description = "Most fp64 instructions are half rate instead of quarter",
@ -672,6 +784,11 @@ pub const all_features = blk: {
.description = "Has 1 / (2 * pi) as inline immediate",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.kernarg_preload)] = .{
.llvm_name = "kernarg-preload",
.description = "Hardware supports preloading of kernel arguments in user SGPRs.",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.lds_branch_vmem_war_hazard)] = .{
.llvm_name = "lds-branch-vmem-war-hazard",
.description = "Switching between LDS and VMEM-tex not waiting VM_VSRC=0",
@ -757,6 +874,11 @@ pub const all_features = blk: {
.description = "Has v_movrel*_b32 instructions",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.msaa_load_dst_sel_bug)] = .{
.llvm_name = "msaa-load-dst-sel-bug",
.description = "MSAA loads not honoring dst_sel bug",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.negative_scratch_offset_bug)] = .{
.llvm_name = "negative-scratch-offset-bug",
.description = "Negative immediate offsets in scratch instructions with an SGPR offset page fault on GFX9",
@ -827,11 +949,26 @@ pub const all_features = blk: {
.description = "Enable zeroing of result registers for sparse texture fetches",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.pseudo_scalar_trans)] = .{
.llvm_name = "pseudo-scalar-trans",
.description = "Has Pseudo Scalar Transcendental instructions",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.r128_a16)] = .{
.llvm_name = "r128-a16",
.description = "Support gfx9-style A16 for 16-bit coordinates/gradients/lod/clamp/mip image operands, where a16 is aliased with r128",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.real_true16)] = .{
.llvm_name = "real-true16",
.description = "Use true 16-bit registers",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.restricted_soffset)] = .{
.llvm_name = "restricted-soffset",
.description = "Has restricted SOffset (immediate not supported).",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.s_memrealtime)] = .{
.llvm_name = "s-memrealtime",
.description = "Has s_memrealtime instruction",
@ -842,11 +979,21 @@ pub const all_features = blk: {
.description = "Has s_memtime instruction",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.salu_float)] = .{
.llvm_name = "salu-float",
.description = "Has SALU floating point instructions",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.scalar_atomics)] = .{
.llvm_name = "scalar-atomics",
.description = "Has atomic scalar memory instructions",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.scalar_dwordx3_loads)] = .{
.llvm_name = "scalar-dwordx3-loads",
.description = "Has 96-bit scalar load instructions",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.scalar_flat_scratch_insts)] = .{
.llvm_name = "scalar-flat-scratch-insts",
.description = "Have s_scratch_* flat memory instructions",
@ -892,11 +1039,14 @@ pub const all_features = blk: {
.description = "SEA_ISLANDS GPU generation",
.dependencies = featureSet(&[_]Feature{
.ci_insts,
.default_component_zero,
.ds_src2_insts,
.extended_image_insts,
.flat_address_space,
.fp64,
.gds,
.gfx7_gfx8_gfx9_insts,
.gws,
.image_insts,
.localmemorysize65536,
.mad_mac_f32_insts,
@ -913,6 +1063,11 @@ pub const all_features = blk: {
.description = "VI SGPR initialization bug requiring a fixed SGPR allocation size",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.shader_cycles_hi_lo_registers)] = .{
.llvm_name = "shader-cycles-hi-lo-registers",
.description = "Has SHADER_CYCLES_HI/LO hardware registers",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.shader_cycles_register)] = .{
.llvm_name = "shader-cycles-register",
.description = "Has SHADER_CYCLES hardware register",
@ -932,9 +1087,12 @@ pub const all_features = blk: {
.llvm_name = "southern-islands",
.description = "SOUTHERN_ISLANDS GPU generation",
.dependencies = featureSet(&[_]Feature{
.default_component_zero,
.ds_src2_insts,
.extended_image_insts,
.fp64,
.gds,
.gws,
.image_insts,
.ldsbankcount32,
.localmemorysize32768,
@ -1031,6 +1189,11 @@ pub const all_features = blk: {
.description = "Has VGPR mode register indexing",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.vgpr_singleuse_hint)] = .{
.llvm_name = "vgpr-singleuse-hint",
.description = "Has single-use VGPR hint instructions",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.vmem_to_scalar_write_hazard)] = .{
.llvm_name = "vmem-to-scalar-write-hazard",
.description = "VMEM instruction followed by scalar writing to EXEC mask, M0 or SGPR leads to incorrect execution.",
@ -1042,6 +1205,7 @@ pub const all_features = blk: {
.dependencies = featureSet(&[_]Feature{
.@"16_bit_insts",
.ci_insts,
.default_component_zero,
.dpp,
.ds_src2_insts,
.extended_image_insts,
@ -1049,8 +1213,10 @@ pub const all_features = blk: {
.flat_address_space,
.fp64,
.gcn3_encoding,
.gds,
.gfx7_gfx8_gfx9_insts,
.gfx8_insts,
.gws,
.image_insts,
.int_clamp_insts,
.inv_2pi_inline_imm,
@ -1157,6 +1323,8 @@ pub const cpu = struct {
.name = "generic",
.llvm_name = "generic",
.features = featureSet(&[_]Feature{
.gds,
.gws,
.wavefrontsize64,
}),
};
@ -1165,6 +1333,8 @@ pub const cpu = struct {
.llvm_name = "generic-hsa",
.features = featureSet(&[_]Feature{
.flat_address_space,
.gds,
.gws,
.wavefrontsize64,
}),
};
@ -1478,6 +1648,7 @@ pub const cpu = struct {
.image_insts,
.ldsbankcount32,
.mad_intra_fwd_bug,
.msaa_load_dst_sel_bug,
.nsa_encoding,
.packed_tid,
.partial_nsa_encoding,
@ -1507,6 +1678,7 @@ pub const cpu = struct {
.image_insts,
.ldsbankcount32,
.mad_intra_fwd_bug,
.msaa_load_dst_sel_bug,
.nsa_encoding,
.packed_tid,
.partial_nsa_encoding,
@ -1534,6 +1706,7 @@ pub const cpu = struct {
.image_insts,
.ldsbankcount32,
.mad_intra_fwd_bug,
.msaa_load_dst_sel_bug,
.nsa_encoding,
.packed_tid,
.partial_nsa_encoding,
@ -1562,6 +1735,7 @@ pub const cpu = struct {
.image_insts,
.ldsbankcount32,
.mad_intra_fwd_bug,
.msaa_load_dst_sel_bug,
.nsa_encoding,
.packed_tid,
.partial_nsa_encoding,
@ -1584,6 +1758,7 @@ pub const cpu = struct {
.dot7_insts,
.dot8_insts,
.dot9_insts,
.dpp_src1_sgpr,
.flat_atomic_fadd_f32_inst,
.gfx11,
.image_insts,
@ -1592,8 +1767,10 @@ pub const cpu = struct {
.nsa_encoding,
.packed_tid,
.partial_nsa_encoding,
.salu_float,
.shader_cycles_register,
.vcmpx_permlane_hazard,
.vgpr_singleuse_hint,
.wavefrontsize32,
}),
};
@ -1610,6 +1787,7 @@ pub const cpu = struct {
.dot7_insts,
.dot8_insts,
.dot9_insts,
.dpp_src1_sgpr,
.flat_atomic_fadd_f32_inst,
.gfx11,
.gfx11_full_vgprs,
@ -1619,8 +1797,84 @@ pub const cpu = struct {
.nsa_encoding,
.packed_tid,
.partial_nsa_encoding,
.salu_float,
.shader_cycles_register,
.vcmpx_permlane_hazard,
.vgpr_singleuse_hint,
.wavefrontsize32,
}),
};
pub const gfx1200 = CpuModel{
.name = "gfx1200",
.llvm_name = "gfx1200",
.features = featureSet(&[_]Feature{
.architected_flat_scratch,
.architected_sgprs,
.atomic_buffer_global_pk_add_f16_insts,
.atomic_ds_pk_add_16_insts,
.atomic_fadd_no_rtn_insts,
.atomic_fadd_rtn_insts,
.atomic_flat_pk_add_16_insts,
.atomic_global_pk_add_bf16_inst,
.dl_insts,
.dot10_insts,
.dot7_insts,
.dot8_insts,
.dot9_insts,
.dpp_src1_sgpr,
.extended_image_insts,
.flat_atomic_fadd_f32_inst,
.fp8_conversion_insts,
.gfx12,
.image_insts,
.ldsbankcount32,
.nsa_encoding,
.packed_tid,
.partial_nsa_encoding,
.pseudo_scalar_trans,
.restricted_soffset,
.salu_float,
.scalar_dwordx3_loads,
.shader_cycles_hi_lo_registers,
.vcmpx_permlane_hazard,
.vgpr_singleuse_hint,
.wavefrontsize32,
}),
};
pub const gfx1201 = CpuModel{
.name = "gfx1201",
.llvm_name = "gfx1201",
.features = featureSet(&[_]Feature{
.architected_flat_scratch,
.architected_sgprs,
.atomic_buffer_global_pk_add_f16_insts,
.atomic_ds_pk_add_16_insts,
.atomic_fadd_no_rtn_insts,
.atomic_fadd_rtn_insts,
.atomic_flat_pk_add_16_insts,
.atomic_global_pk_add_bf16_inst,
.dl_insts,
.dot10_insts,
.dot7_insts,
.dot8_insts,
.dot9_insts,
.dpp_src1_sgpr,
.extended_image_insts,
.flat_atomic_fadd_f32_inst,
.fp8_conversion_insts,
.gfx12,
.image_insts,
.ldsbankcount32,
.nsa_encoding,
.packed_tid,
.partial_nsa_encoding,
.pseudo_scalar_trans,
.restricted_soffset,
.salu_float,
.scalar_dwordx3_loads,
.shader_cycles_hi_lo_registers,
.vcmpx_permlane_hazard,
.vgpr_singleuse_hint,
.wavefrontsize32,
}),
};
@ -1756,6 +2010,7 @@ pub const cpu = struct {
.features = featureSet(&[_]Feature{
.ds_src2_insts,
.extended_image_insts,
.gds,
.gfx9,
.image_gather4_d16_bug,
.image_insts,
@ -1770,6 +2025,7 @@ pub const cpu = struct {
.features = featureSet(&[_]Feature{
.ds_src2_insts,
.extended_image_insts,
.gds,
.gfx9,
.image_gather4_d16_bug,
.image_insts,
@ -1785,6 +2041,7 @@ pub const cpu = struct {
.ds_src2_insts,
.extended_image_insts,
.fma_mix_insts,
.gds,
.gfx9,
.image_gather4_d16_bug,
.image_insts,
@ -1804,6 +2061,7 @@ pub const cpu = struct {
.ds_src2_insts,
.extended_image_insts,
.fma_mix_insts,
.gds,
.gfx9,
.half_rate_64_ops,
.image_gather4_d16_bug,
@ -1831,6 +2089,7 @@ pub const cpu = struct {
.ds_src2_insts,
.extended_image_insts,
.fma_mix_insts,
.gds,
.gfx9,
.half_rate_64_ops,
.image_gather4_d16_bug,
@ -1849,6 +2108,7 @@ pub const cpu = struct {
.features = featureSet(&[_]Feature{
.ds_src2_insts,
.extended_image_insts,
.gds,
.gfx9,
.image_gather4_d16_bug,
.image_insts,
@ -1881,6 +2141,7 @@ pub const cpu = struct {
.gfx9,
.gfx90a_insts,
.image_insts,
.kernarg_preload,
.ldsbankcount32,
.mad_mac_f32_insts,
.mai_insts,
@ -1896,6 +2157,7 @@ pub const cpu = struct {
.features = featureSet(&[_]Feature{
.ds_src2_insts,
.extended_image_insts,
.gds,
.gfx9,
.image_gather4_d16_bug,
.image_insts,
@ -1930,11 +2192,13 @@ pub const cpu = struct {
.fma_mix_insts,
.fmacf64_inst,
.force_store_sc0_sc1,
.fp8_conversion_insts,
.fp8_insts,
.full_rate_64_ops,
.gfx9,
.gfx90a_insts,
.gfx940_insts,
.kernarg_preload,
.ldsbankcount32,
.mai_insts,
.packed_fp32_ops,
@ -1969,11 +2233,13 @@ pub const cpu = struct {
.fma_mix_insts,
.fmacf64_inst,
.force_store_sc0_sc1,
.fp8_conversion_insts,
.fp8_insts,
.full_rate_64_ops,
.gfx9,
.gfx90a_insts,
.gfx940_insts,
.kernarg_preload,
.ldsbankcount32,
.mai_insts,
.packed_fp32_ops,
@ -2007,11 +2273,13 @@ pub const cpu = struct {
.flat_atomic_fadd_f32_inst,
.fma_mix_insts,
.fmacf64_inst,
.fp8_conversion_insts,
.fp8_insts,
.full_rate_64_ops,
.gfx9,
.gfx90a_insts,
.gfx940_insts,
.kernarg_preload,
.ldsbankcount32,
.mai_insts,
.packed_fp32_ops,

View file

@ -186,6 +186,8 @@ pub const Feature = enum {
v9_2a,
v9_3a,
v9_4a,
v9_5a,
v9_5a,
v9a,
vfp2,
vfp2sp,
@ -1572,6 +1574,29 @@ pub const all_features = blk: {
.virtualization,
}),
};
result[@intFromEnum(Feature.v9_5a)] = .{
.llvm_name = "armv9.5-a",
.description = "ARMv95a architecture",
.dependencies = featureSet(&[_]Feature{
.aclass,
.crc,
.db,
.dsp,
.fp_armv8,
.mp,
.ras,
.trustzone,
.v9_5a,
.virtualization,
}),
};
result[@intFromEnum(Feature.v9_5a)] = .{
.llvm_name = "v9.5a",
.description = "Support ARM v9.5a instructions",
.dependencies = featureSet(&[_]Feature{
.has_v9_4a,
}),
};
result[@intFromEnum(Feature.v9a)] = .{
.llvm_name = "armv9-a",
.description = "ARMv9a architecture",
@ -2238,6 +2263,21 @@ pub const cpu = struct {
.v7em,
}),
};
pub const cortex_m52 = CpuModel{
.name = "cortex_m52",
.llvm_name = "cortex-m52",
.features = featureSet(&[_]Feature{
.fp_armv8d16,
.loop_align,
.mve1beat,
.mve_fp,
.no_branch_predictor,
.pacbti,
.slowfpvmlx,
.use_misched,
.v8_1m_main,
}),
};
pub const cortex_m55 = CpuModel{
.name = "cortex_m55",
.llvm_name = "cortex-m55",
@ -2499,7 +2539,7 @@ pub const cpu = struct {
.features = featureSet(&[_]Feature{
.bf16,
.i8mm,
.v8_5a,
.v9a,
}),
};
pub const neoverse_v1 = CpuModel{

View file

@ -70,4 +70,11 @@ pub const cpu = struct {
.alu32,
}),
};
pub const v4 = CpuModel{
.name = "v4",
.llvm_name = "v4",
.features = featureSet(&[_]Feature{
.alu32,
}),
};
};

View file

@ -7,8 +7,10 @@ const CpuModel = std.Target.Cpu.Model;
pub const Feature = enum {
@"32bit",
@"64bit",
auto_vec,
d,
f,
frecipe,
la_global_with_abs,
la_global_with_pcrel,
la_local_with_abs,
@ -16,6 +18,7 @@ pub const Feature = enum {
lbt,
lsx,
lvz,
relax,
ual,
};
@ -38,6 +41,11 @@ pub const all_features = blk: {
.description = "LA64 Basic Integer and Privilege Instruction Set",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.auto_vec)] = .{
.llvm_name = "auto-vec",
.description = "Experimental auto vectorization",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.d)] = .{
.llvm_name = "d",
.description = "'D' (Double-Precision Floating-Point)",
@ -50,6 +58,11 @@ pub const all_features = blk: {
.description = "'F' (Single-Precision Floating-Point)",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.frecipe)] = .{
.llvm_name = "frecipe",
.description = "Support frecipe.{s/d} and frsqrte.{s/d} instructions.",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.la_global_with_abs)] = .{
.llvm_name = "la-global-with-abs",
.description = "Expand la.global as la.abs",
@ -89,6 +102,11 @@ pub const all_features = blk: {
.description = "'LVZ' (Loongson Virtualization Extension)",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.relax)] = .{
.llvm_name = "relax",
.description = "Enable Linker relaxation",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.ual)] = .{
.llvm_name = "ual",
.description = "Allow memory accesses to be unaligned",

View file

@ -27,6 +27,8 @@ pub const Feature = enum {
ptx78,
ptx80,
ptx81,
ptx82,
ptx83,
sm_20,
sm_21,
sm_30,
@ -47,6 +49,7 @@ pub const Feature = enum {
sm_87,
sm_89,
sm_90,
sm_90a,
};
pub const featureSet = CpuFeature.feature_set_fns(Feature).featureSet;
@ -168,6 +171,16 @@ pub const all_features = blk: {
.description = "Use PTX version 81",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.ptx82)] = .{
.llvm_name = "ptx82",
.description = "Use PTX version 82",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.ptx83)] = .{
.llvm_name = "ptx83",
.description = "Use PTX version 83",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.sm_20)] = .{
.llvm_name = "sm_20",
.description = "Target SM 20",
@ -268,6 +281,11 @@ pub const all_features = blk: {
.description = "Target SM 90",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.sm_90a)] = .{
.llvm_name = "sm_90a",
.description = "Target SM 90a",
.dependencies = featureSet(&[_]Feature{}),
};
const ti = @typeInfo(Feature);
for (&result, 0..) |*elem, i| {
elem.index = i;
@ -436,4 +454,12 @@ pub const cpu = struct {
.sm_90,
}),
};
pub const sm_90a = CpuModel{
.name = "sm_90a",
.llvm_name = "sm_90a",
.features = featureSet(&[_]Feature{
.ptx80,
.sm_90a,
}),
};
};

View file

@ -8,6 +8,7 @@ pub const Feature = enum {
@"64bit",
@"64bitregs",
aix,
aix_small_local_exec_tls,
allow_unaligned_fp_access,
altivec,
booke,
@ -112,6 +113,11 @@ pub const all_features = blk: {
.description = "AIX OS",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.aix_small_local_exec_tls)] = .{
.llvm_name = "aix-small-local-exec-tls",
.description = "Produce a TOC-free local-exec TLS sequence for this function for 64-bit AIX",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.allow_unaligned_fp_access)] = .{
.llvm_name = "allow-unaligned-fp-access",
.description = "CPU does not trap on unaligned FP access",

View file

@ -8,38 +8,28 @@ pub const Feature = enum {
@"32bit",
@"64bit",
a,
auipc_addi_fusion,
c,
conditional_cmv_fusion,
d,
dlen_factor_2,
e,
experimental_smaia,
experimental_ssaia,
experimental,
experimental_zacas,
experimental_zfa,
experimental_zcmop,
experimental_zfbfmin,
experimental_zicond,
experimental_zihintntl,
experimental_zicfilp,
experimental_zicfiss,
experimental_zimop,
experimental_ztso,
experimental_zvbb,
experimental_zvbc,
experimental_zvfbfmin,
experimental_zvfbfwma,
experimental_zvkg,
experimental_zvkn,
experimental_zvknc,
experimental_zvkned,
experimental_zvkng,
experimental_zvknha,
experimental_zvknhb,
experimental_zvks,
experimental_zvksc,
experimental_zvksed,
experimental_zvksg,
experimental_zvksh,
experimental_zvkt,
f,
fast_unaligned_access,
forced_atomics,
h,
i,
ld_add_fusion,
lui_addi_fusion,
m,
no_default_unroll,
@ -79,18 +69,30 @@ pub const Feature = enum {
reserve_x9,
save_restore,
seq_cst_trailing_fence,
shifted_zextw_fusion,
short_forward_branch_opt,
smaia,
smepmp,
ssaia,
svinval,
svnapot,
svpbmt,
tagged_globals,
unaligned_scalar_mem,
unaligned_vector_mem,
use_postra_scheduler,
v,
ventana_veyron,
xcvalu,
xcvbi,
xcvbitmanip,
xcvelw,
xcvmac,
xsfcie,
xcvmem,
xcvsimd,
xsfvcp,
xsfvfnrclipxfqf,
xsfvfwmaccqqq,
xsfvqmaccdod,
xsfvqmaccqoq,
xtheadba,
xtheadbb,
xtheadbs,
@ -103,6 +105,8 @@ pub const Feature = enum {
xtheadsync,
xtheadvdot,
xventanacondops,
za128rs,
za64rs,
zawrs,
zba,
zbb,
@ -119,17 +123,27 @@ pub const Feature = enum {
zcmp,
zcmt,
zdinx,
zexth_fusion,
zextw_fusion,
zfa,
zfh,
zfhmin,
zfinx,
zhinx,
zhinxmin,
zic64b,
zicbom,
zicbop,
zicboz,
ziccamoa,
ziccif,
zicclsm,
ziccrse,
zicntr,
zicond,
zicsr,
zifencei,
zihintntl,
zihintpause,
zihpm,
zk,
@ -143,12 +157,29 @@ pub const Feature = enum {
zksh,
zkt,
zmmul,
zvbb,
zvbc,
zve32f,
zve32x,
zve64d,
zve64f,
zve64x,
zvfh,
zvfhmin,
zvkb,
zvkg,
zvkn,
zvknc,
zvkned,
zvkng,
zvknha,
zvknhb,
zvks,
zvksc,
zvksed,
zvksg,
zvksh,
zvkt,
zvl1024b,
zvl128b,
zvl16384b,
@ -187,11 +218,21 @@ pub const all_features = blk: {
.description = "'A' (Atomic Instructions)",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.auipc_addi_fusion)] = .{
.llvm_name = "auipc-addi-fusion",
.description = "Enable AUIPC+ADDI macrofusion",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.c)] = .{
.llvm_name = "c",
.description = "'C' (Compressed Instructions)",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.conditional_cmv_fusion)] = .{
.llvm_name = "conditional-cmv-fusion",
.description = "Enable branch+c.mv fusion",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.d)] = .{
.llvm_name = "d",
.description = "'D' (Double-Precision Floating-Point)",
@ -209,14 +250,9 @@ pub const all_features = blk: {
.description = "Implements RV{32,64}E (provides 16 rather than 32 GPRs)",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.experimental_smaia)] = .{
.llvm_name = "experimental-smaia",
.description = "'Smaia' (Smaia encompasses all added CSRs and all modifications to interrupt response behavior that the AIA specifies for a hart, over all privilege levels.)",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.experimental_ssaia)] = .{
.llvm_name = "experimental-ssaia",
.description = "'Ssaia' (Ssaia is essentially the same as Smaia except excluding the machine-level CSRs and behavior not directly visible to supervisor level.)",
result[@intFromEnum(Feature.experimental)] = .{
.llvm_name = "experimental",
.description = "Experimental intrinsics",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.experimental_zacas)] = .{
@ -224,11 +260,11 @@ pub const all_features = blk: {
.description = "'Zacas' (Atomic Compare-And-Swap Instructions)",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.experimental_zfa)] = .{
.llvm_name = "experimental-zfa",
.description = "'Zfa' (Additional Floating-Point)",
result[@intFromEnum(Feature.experimental_zcmop)] = .{
.llvm_name = "experimental-zcmop",
.description = "'Zcmop' (Compressed May-Be-Operations)",
.dependencies = featureSet(&[_]Feature{
.f,
.zca,
}),
};
result[@intFromEnum(Feature.experimental_zfbfmin)] = .{
@ -238,14 +274,22 @@ pub const all_features = blk: {
.f,
}),
};
result[@intFromEnum(Feature.experimental_zicond)] = .{
.llvm_name = "experimental-zicond",
.description = "'Zicond' (Integer Conditional Operations)",
result[@intFromEnum(Feature.experimental_zicfilp)] = .{
.llvm_name = "experimental-zicfilp",
.description = "'Zicfilp' (Landing pad)",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.experimental_zihintntl)] = .{
.llvm_name = "experimental-zihintntl",
.description = "'Zihintntl' (Non-Temporal Locality Hints)",
result[@intFromEnum(Feature.experimental_zicfiss)] = .{
.llvm_name = "experimental-zicfiss",
.description = "'Zicfiss' (Shadow stack)",
.dependencies = featureSet(&[_]Feature{
.experimental_zimop,
.zicsr,
}),
};
result[@intFromEnum(Feature.experimental_zimop)] = .{
.llvm_name = "experimental-zimop",
.description = "'Zimop' (May-Be-Operations)",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.experimental_ztso)] = .{
@ -253,16 +297,6 @@ pub const all_features = blk: {
.description = "'Ztso' (Memory Model - Total Store Order)",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.experimental_zvbb)] = .{
.llvm_name = "experimental-zvbb",
.description = "'Zvbb' (Vector Bit-manipulation used in Cryptography)",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.experimental_zvbc)] = .{
.llvm_name = "experimental-zvbc",
.description = "'Zvbc' (Vector Carryless Multiplication)",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.experimental_zvfbfmin)] = .{
.llvm_name = "experimental-zvfbfmin",
.description = "'Zvbfmin' (Vector BF16 Converts)",
@ -274,76 +308,10 @@ pub const all_features = blk: {
.llvm_name = "experimental-zvfbfwma",
.description = "'Zvfbfwma' (Vector BF16 widening mul-add)",
.dependencies = featureSet(&[_]Feature{
.zve32f,
.experimental_zfbfmin,
.experimental_zvfbfmin,
}),
};
result[@intFromEnum(Feature.experimental_zvkg)] = .{
.llvm_name = "experimental-zvkg",
.description = "'Zvkg' (Vector GCM instructions for Cryptography)",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.experimental_zvkn)] = .{
.llvm_name = "experimental-zvkn",
.description = "This extension is shorthand for the following set of other extensions: Zvkned, Zvknhb, Zvbb, Zvbc, and Zvkt.",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.experimental_zvknc)] = .{
.llvm_name = "experimental-zvknc",
.description = "This extension is shorthand for the following set of other extensions: Zvkn and Zvbc.",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.experimental_zvkned)] = .{
.llvm_name = "experimental-zvkned",
.description = "'Zvkned' (Vector AES Encryption & Decryption (Single Round))",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.experimental_zvkng)] = .{
.llvm_name = "experimental-zvkng",
.description = "This extension is shorthand for the following set of other extensions: Zvkn and Zvkg.",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.experimental_zvknha)] = .{
.llvm_name = "experimental-zvknha",
.description = "'Zvknha' (Vector SHA-2 (SHA-256 only))",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.experimental_zvknhb)] = .{
.llvm_name = "experimental-zvknhb",
.description = "'Zvknhb' (Vector SHA-2 (SHA-256 and SHA-512))",
.dependencies = featureSet(&[_]Feature{
.experimental_zvknha,
}),
};
result[@intFromEnum(Feature.experimental_zvks)] = .{
.llvm_name = "experimental-zvks",
.description = "This extension is shorthand for the following set of other extensions: Zvksed, Zvksh, Zvbb, Zvbc, and Zvkt.",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.experimental_zvksc)] = .{
.llvm_name = "experimental-zvksc",
.description = "This extension is shorthand for the following set of other extensions: Zvks and Zvbc.",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.experimental_zvksed)] = .{
.llvm_name = "experimental-zvksed",
.description = "'Zvksed' (SM4 Block Cipher Instructions)",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.experimental_zvksg)] = .{
.llvm_name = "experimental-zvksg",
.description = "This extension is shorthand for the following set of other extensions: Zvks and Zvkg.",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.experimental_zvksh)] = .{
.llvm_name = "experimental-zvksh",
.description = "'Zvksh' (SM3 Hash Function Instructions)",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.experimental_zvkt)] = .{
.llvm_name = "experimental-zvkt",
.description = "'Zvkt' (Vector Data-Independent Execution Latency)",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.f)] = .{
.llvm_name = "f",
.description = "'F' (Single-Precision Floating-Point)",
@ -351,6 +319,11 @@ pub const all_features = blk: {
.zicsr,
}),
};
result[@intFromEnum(Feature.fast_unaligned_access)] = .{
.llvm_name = "fast-unaligned-access",
.description = "Has reasonably performant unaligned loads and stores (both scalar and vector)",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.forced_atomics)] = .{
.llvm_name = "forced-atomics",
.description = "Assume that lock-free native-width atomics are available",
@ -361,9 +334,19 @@ pub const all_features = blk: {
.description = "'H' (Hypervisor)",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.i)] = .{
.llvm_name = "i",
.description = "'I' (Base Integer Instruction Set)",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.ld_add_fusion)] = .{
.llvm_name = "ld-add-fusion",
.description = "Enable LD+ADD macrofusion",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.lui_addi_fusion)] = .{
.llvm_name = "lui-addi-fusion",
.description = "Enable LUI+ADDI macrofusion",
.description = "Enable LUI+ADDI macro fusion",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.m)] = .{
@ -556,11 +539,31 @@ pub const all_features = blk: {
.description = "Enable trailing fence for seq-cst store.",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.shifted_zextw_fusion)] = .{
.llvm_name = "shifted-zextw-fusion",
.description = "Enable SLLI+SRLI to be fused when computing (shifted) word zero extension",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.short_forward_branch_opt)] = .{
.llvm_name = "short-forward-branch-opt",
.description = "Enable short forward branch optimization",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.smaia)] = .{
.llvm_name = "smaia",
.description = "'Smaia' (Advanced Interrupt Architecture Machine Level)",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.smepmp)] = .{
.llvm_name = "smepmp",
.description = "'Smepmp' (Enhanced Physical Memory Protection)",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.ssaia)] = .{
.llvm_name = "ssaia",
.description = "'Ssaia' (Advanced Interrupt Architecture Supervisor Level)",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.svinval)] = .{
.llvm_name = "svinval",
.description = "'Svinval' (Fine-Grained Address-Translation Cache Invalidation)",
@ -581,14 +584,9 @@ pub const all_features = blk: {
.description = "Use an instruction sequence for taking the address of a global that allows a memory tag in the upper address bits",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.unaligned_scalar_mem)] = .{
.llvm_name = "unaligned-scalar-mem",
.description = "Has reasonably performant unaligned scalar loads and stores",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.unaligned_vector_mem)] = .{
.llvm_name = "unaligned-vector-mem",
.description = "Has reasonably performant unaligned vector loads and stores",
result[@intFromEnum(Feature.use_postra_scheduler)] = .{
.llvm_name = "use-postra-scheduler",
.description = "Schedule again after register allocation",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.v)] = .{
@ -599,19 +597,44 @@ pub const all_features = blk: {
.zvl128b,
}),
};
result[@intFromEnum(Feature.ventana_veyron)] = .{
.llvm_name = "ventana-veyron",
.description = "Ventana Veyron-Series processors",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.xcvalu)] = .{
.llvm_name = "xcvalu",
.description = "'XCValu' (CORE-V ALU Operations)",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.xcvbi)] = .{
.llvm_name = "xcvbi",
.description = "'XCVbi' (CORE-V Immediate Branching)",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.xcvbitmanip)] = .{
.llvm_name = "xcvbitmanip",
.description = "'XCVbitmanip' (CORE-V Bit Manipulation)",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.xcvelw)] = .{
.llvm_name = "xcvelw",
.description = "'XCVelw' (CORE-V Event Load Word)",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.xcvmac)] = .{
.llvm_name = "xcvmac",
.description = "'XCVmac' (CORE-V Multiply-Accumulate)",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.xsfcie)] = .{
.llvm_name = "xsfcie",
.description = "'XSfcie' (SiFive Custom Instruction Extension SCIE.)",
result[@intFromEnum(Feature.xcvmem)] = .{
.llvm_name = "xcvmem",
.description = "'XCVmem' (CORE-V Post-incrementing Load & Store)",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.xcvsimd)] = .{
.llvm_name = "xcvsimd",
.description = "'XCVsimd' (CORE-V SIMD ALU)",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.xsfvcp)] = .{
@ -621,6 +644,34 @@ pub const all_features = blk: {
.zve32x,
}),
};
result[@intFromEnum(Feature.xsfvfnrclipxfqf)] = .{
.llvm_name = "xsfvfnrclipxfqf",
.description = "'XSfvfnrclipxfqf' (SiFive FP32-to-int8 Ranged Clip Instructions)",
.dependencies = featureSet(&[_]Feature{
.zve32f,
}),
};
result[@intFromEnum(Feature.xsfvfwmaccqqq)] = .{
.llvm_name = "xsfvfwmaccqqq",
.description = "'XSfvfwmaccqqq' (SiFive Matrix Multiply Accumulate Instruction and 4-by-4))",
.dependencies = featureSet(&[_]Feature{
.experimental_zvfbfmin,
}),
};
result[@intFromEnum(Feature.xsfvqmaccdod)] = .{
.llvm_name = "xsfvqmaccdod",
.description = "'XSfvqmaccdod' (SiFive Int8 Matrix Multiplication Instructions (2-by-8 and 8-by-2))",
.dependencies = featureSet(&[_]Feature{
.zve32x,
}),
};
result[@intFromEnum(Feature.xsfvqmaccqoq)] = .{
.llvm_name = "xsfvqmaccqoq",
.description = "'XSfvqmaccqoq' (SiFive Int8 Matrix Multiplication Instructions (4-by-8 and 8-by-4))",
.dependencies = featureSet(&[_]Feature{
.zve32x,
}),
};
result[@intFromEnum(Feature.xtheadba)] = .{
.llvm_name = "xtheadba",
.description = "'xtheadba' (T-Head address calculation instructions)",
@ -685,6 +736,16 @@ pub const all_features = blk: {
.description = "'XVentanaCondOps' (Ventana Conditional Ops)",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.za128rs)] = .{
.llvm_name = "za128rs",
.description = "'Za128rs' (Reservation Set Size of at Most 128 Bytes)",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.za64rs)] = .{
.llvm_name = "za64rs",
.description = "'Za64rs' (Reservation Set Size of at Most 64 Bytes)",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.zawrs)] = .{
.llvm_name = "zawrs",
.description = "'Zawrs' (Wait on Reservation Set)",
@ -782,11 +843,28 @@ pub const all_features = blk: {
.zfinx,
}),
};
result[@intFromEnum(Feature.zexth_fusion)] = .{
.llvm_name = "zexth-fusion",
.description = "Enable SLLI+SRLI to be fused to zero extension of halfword",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.zextw_fusion)] = .{
.llvm_name = "zextw-fusion",
.description = "Enable SLLI+SRLI to be fused to zero extension of word",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.zfa)] = .{
.llvm_name = "zfa",
.description = "'Zfa' (Additional Floating-Point)",
.dependencies = featureSet(&[_]Feature{
.f,
}),
};
result[@intFromEnum(Feature.zfh)] = .{
.llvm_name = "zfh",
.description = "'Zfh' (Half-Precision Floating-Point)",
.dependencies = featureSet(&[_]Feature{
.f,
.zfhmin,
}),
};
result[@intFromEnum(Feature.zfhmin)] = .{
@ -807,7 +885,7 @@ pub const all_features = blk: {
.llvm_name = "zhinx",
.description = "'Zhinx' (Half Float in Integer)",
.dependencies = featureSet(&[_]Feature{
.zfinx,
.zhinxmin,
}),
};
result[@intFromEnum(Feature.zhinxmin)] = .{
@ -817,6 +895,11 @@ pub const all_features = blk: {
.zfinx,
}),
};
result[@intFromEnum(Feature.zic64b)] = .{
.llvm_name = "zic64b",
.description = "'Zic64b' (Cache Block Size Is 64 Bytes)",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.zicbom)] = .{
.llvm_name = "zicbom",
.description = "'Zicbom' (Cache-Block Management Instructions)",
@ -832,6 +915,26 @@ pub const all_features = blk: {
.description = "'Zicboz' (Cache-Block Zero Instructions)",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.ziccamoa)] = .{
.llvm_name = "ziccamoa",
.description = "'Ziccamoa' (Main Memory Supports All Atomics in A)",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.ziccif)] = .{
.llvm_name = "ziccif",
.description = "'Ziccif' (Main Memory Supports Instruction Fetch with Atomicity Requirement)",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.zicclsm)] = .{
.llvm_name = "zicclsm",
.description = "'Zicclsm' (Main Memory Supports Misaligned Loads/Stores)",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.ziccrse)] = .{
.llvm_name = "ziccrse",
.description = "'Ziccrse' (Main Memory Supports Forward Progress on LR/SC Sequences)",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.zicntr)] = .{
.llvm_name = "zicntr",
.description = "'Zicntr' (Base Counters and Timers)",
@ -839,6 +942,11 @@ pub const all_features = blk: {
.zicsr,
}),
};
result[@intFromEnum(Feature.zicond)] = .{
.llvm_name = "zicond",
.description = "'Zicond' (Integer Conditional Operations)",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.zicsr)] = .{
.llvm_name = "zicsr",
.description = "'zicsr' (CSRs)",
@ -849,6 +957,11 @@ pub const all_features = blk: {
.description = "'Zifencei' (fence.i)",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.zihintntl)] = .{
.llvm_name = "zihintntl",
.description = "'Zihintntl' (Non-Temporal Locality Hints)",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.zihintpause)] = .{
.llvm_name = "zihintpause",
.description = "'Zihintpause' (Pause Hint)",
@ -933,6 +1046,18 @@ pub const all_features = blk: {
.description = "'Zmmul' (Integer Multiplication)",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.zvbb)] = .{
.llvm_name = "zvbb",
.description = "'Zvbb' (Vector basic bit-manipulation instructions)",
.dependencies = featureSet(&[_]Feature{
.zvkb,
}),
};
result[@intFromEnum(Feature.zvbc)] = .{
.llvm_name = "zvbc",
.description = "'Zvbc' (Vector Carryless Multiplication)",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.zve32f)] = .{
.llvm_name = "zve32f",
.description = "'Zve32f' (Vector Extensions for Embedded Processors with maximal 32 EEW and F extension)",
@ -978,9 +1103,110 @@ pub const all_features = blk: {
.description = "'Zvfh' (Vector Half-Precision Floating-Point)",
.dependencies = featureSet(&[_]Feature{
.zfhmin,
.zvfhmin,
}),
};
result[@intFromEnum(Feature.zvfhmin)] = .{
.llvm_name = "zvfhmin",
.description = "'Zvfhmin' (Vector Half-Precision Floating-Point Minimal)",
.dependencies = featureSet(&[_]Feature{
.zve32f,
}),
};
result[@intFromEnum(Feature.zvkb)] = .{
.llvm_name = "zvkb",
.description = "'Zvkb' (Vector Bit-manipulation used in Cryptography)",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.zvkg)] = .{
.llvm_name = "zvkg",
.description = "'Zvkg' (Vector GCM instructions for Cryptography)",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.zvkn)] = .{
.llvm_name = "zvkn",
.description = "'Zvkn' (shorthand for 'Zvkned', 'Zvknhb', 'Zvkb', and 'Zvkt')",
.dependencies = featureSet(&[_]Feature{
.zvkb,
.zvkned,
.zvknhb,
.zvkt,
}),
};
result[@intFromEnum(Feature.zvknc)] = .{
.llvm_name = "zvknc",
.description = "'Zvknc' (shorthand for 'Zvknc' and 'Zvbc')",
.dependencies = featureSet(&[_]Feature{
.zvbc,
.zvkn,
}),
};
result[@intFromEnum(Feature.zvkned)] = .{
.llvm_name = "zvkned",
.description = "'Zvkned' (Vector AES Encryption & Decryption (Single Round))",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.zvkng)] = .{
.llvm_name = "zvkng",
.description = "'zvkng' (shorthand for 'Zvkn' and 'Zvkg')",
.dependencies = featureSet(&[_]Feature{
.zvkg,
.zvkn,
}),
};
result[@intFromEnum(Feature.zvknha)] = .{
.llvm_name = "zvknha",
.description = "'Zvknha' (Vector SHA-2 (SHA-256 only))",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.zvknhb)] = .{
.llvm_name = "zvknhb",
.description = "'Zvknhb' (Vector SHA-2 (SHA-256 and SHA-512))",
.dependencies = featureSet(&[_]Feature{
.zve64x,
}),
};
result[@intFromEnum(Feature.zvks)] = .{
.llvm_name = "zvks",
.description = "'Zvks' (shorthand for 'Zvksed', 'Zvksh', 'Zvkb', and 'Zvkt')",
.dependencies = featureSet(&[_]Feature{
.zvkb,
.zvksed,
.zvksh,
.zvkt,
}),
};
result[@intFromEnum(Feature.zvksc)] = .{
.llvm_name = "zvksc",
.description = "'Zvksc' (shorthand for 'Zvks' and 'Zvbc')",
.dependencies = featureSet(&[_]Feature{
.zvbc,
.zvks,
}),
};
result[@intFromEnum(Feature.zvksed)] = .{
.llvm_name = "zvksed",
.description = "'Zvksed' (SM4 Block Cipher Instructions)",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.zvksg)] = .{
.llvm_name = "zvksg",
.description = "'Zvksg' (shorthand for 'Zvks' and 'Zvkg')",
.dependencies = featureSet(&[_]Feature{
.zvkg,
.zvks,
}),
};
result[@intFromEnum(Feature.zvksh)] = .{
.llvm_name = "zvksh",
.description = "'Zvksh' (SM3 Hash Function Instructions)",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.zvkt)] = .{
.llvm_name = "zvkt",
.description = "'Zvkt' (Vector Data-Independent Execution Latency)",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.zvl1024b)] = .{
.llvm_name = "zvl1024b",
.description = "'Zvl' (Minimum Vector Length) 1024",
@ -1217,6 +1443,77 @@ pub const cpu = struct {
.zifencei,
}),
};
pub const sifive_p450 = CpuModel{
.name = "sifive_p450",
.llvm_name = "sifive-p450",
.features = featureSet(&[_]Feature{
.@"64bit",
.a,
.auipc_addi_fusion,
.c,
.conditional_cmv_fusion,
.d,
.fast_unaligned_access,
.lui_addi_fusion,
.m,
.no_default_unroll,
.za64rs,
.zba,
.zbb,
.zbs,
.zfhmin,
.zic64b,
.zicbom,
.zicbop,
.zicboz,
.ziccamoa,
.ziccif,
.zicclsm,
.ziccrse,
.zifencei,
.zihintntl,
.zihintpause,
.zihpm,
}),
};
pub const sifive_p670 = CpuModel{
.name = "sifive_p670",
.llvm_name = "sifive-p670",
.features = featureSet(&[_]Feature{
.@"64bit",
.a,
.auipc_addi_fusion,
.c,
.conditional_cmv_fusion,
.fast_unaligned_access,
.lui_addi_fusion,
.m,
.no_default_unroll,
.v,
.za64rs,
.zba,
.zbb,
.zbs,
.zfhmin,
.zic64b,
.zicbom,
.zicbop,
.zicboz,
.ziccamoa,
.ziccif,
.zicclsm,
.ziccrse,
.zifencei,
.zihintntl,
.zihintpause,
.zihpm,
.zvbb,
.zvknc,
.zvkng,
.zvksc,
.zvksg,
}),
};
pub const sifive_s21 = CpuModel{
.name = "sifive_s21",
.llvm_name = "sifive-s21",
@ -1264,7 +1561,6 @@ pub const cpu = struct {
.m,
.no_default_unroll,
.short_forward_branch_opt,
.xsfcie,
.zifencei,
.zihintpause,
}),
@ -1338,4 +1634,56 @@ pub const cpu = struct {
.zifencei,
}),
};
pub const veyron_v1 = CpuModel{
.name = "veyron_v1",
.llvm_name = "veyron-v1",
.features = featureSet(&[_]Feature{
.@"64bit",
.a,
.auipc_addi_fusion,
.c,
.d,
.ld_add_fusion,
.lui_addi_fusion,
.m,
.shifted_zextw_fusion,
.ventana_veyron,
.xventanacondops,
.zba,
.zbb,
.zbc,
.zbs,
.zexth_fusion,
.zextw_fusion,
.zicbom,
.zicbop,
.zicboz,
.zicntr,
.zifencei,
.zihintpause,
.zihpm,
}),
};
pub const xiangshan_nanhu = CpuModel{
.name = "xiangshan_nanhu",
.llvm_name = "xiangshan-nanhu",
.features = featureSet(&[_]Feature{
.@"64bit",
.a,
.c,
.d,
.m,
.svinval,
.zba,
.zbb,
.zbc,
.zbs,
.zicbom,
.zicboz,
.zifencei,
.zkn,
.zksed,
.zksh,
}),
};
};

View file

@ -5,6 +5,7 @@ const CpuFeature = std.Target.Cpu.Feature;
const CpuModel = std.Target.Cpu.Model;
pub const Feature = enum {
backchain,
bear_enhancement,
deflate_conversion,
dfp_packed_conversion,
@ -57,6 +58,11 @@ pub const all_features = blk: {
const len = @typeInfo(Feature).Enum.fields.len;
std.debug.assert(len <= CpuFeature.Set.needed_bit_count);
var result: [len]CpuFeature = undefined;
result[@intFromEnum(Feature.backchain)] = .{
.llvm_name = "backchain",
.description = "Store the address of the caller's frame into the callee's stack frame",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.bear_enhancement)] = .{
.llvm_name = "bear-enhancement",
.description = "Assume that the BEAR-enhancement facility is installed",

View file

@ -18,6 +18,34 @@ pub const Feature = enum {
no_fmuls,
no_fsmuld,
popc,
reserve_g1,
reserve_g2,
reserve_g3,
reserve_g4,
reserve_g5,
reserve_g6,
reserve_g7,
reserve_i0,
reserve_i1,
reserve_i2,
reserve_i3,
reserve_i4,
reserve_i5,
reserve_l0,
reserve_l1,
reserve_l2,
reserve_l3,
reserve_l4,
reserve_l5,
reserve_l6,
reserve_l7,
reserve_o0,
reserve_o1,
reserve_o2,
reserve_o3,
reserve_o4,
reserve_o5,
slow_rdpc,
soft_float,
soft_mul_div,
v9,
@ -100,6 +128,148 @@ pub const all_features = blk: {
.description = "Use the popc (population count) instruction",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.reserve_g1)] = .{
.llvm_name = "reserve-g1",
.description = "Reserve G1, making it unavailable as a GPR",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.reserve_g2)] = .{
.llvm_name = "reserve-g2",
.description = "Reserve G2, making it unavailable as a GPR",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.reserve_g3)] = .{
.llvm_name = "reserve-g3",
.description = "Reserve G3, making it unavailable as a GPR",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.reserve_g4)] = .{
.llvm_name = "reserve-g4",
.description = "Reserve G4, making it unavailable as a GPR",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.reserve_g5)] = .{
.llvm_name = "reserve-g5",
.description = "Reserve G5, making it unavailable as a GPR",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.reserve_g6)] = .{
.llvm_name = "reserve-g6",
.description = "Reserve G6, making it unavailable as a GPR",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.reserve_g7)] = .{
.llvm_name = "reserve-g7",
.description = "Reserve G7, making it unavailable as a GPR",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.reserve_i0)] = .{
.llvm_name = "reserve-i0",
.description = "Reserve I0, making it unavailable as a GPR",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.reserve_i1)] = .{
.llvm_name = "reserve-i1",
.description = "Reserve I1, making it unavailable as a GPR",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.reserve_i2)] = .{
.llvm_name = "reserve-i2",
.description = "Reserve I2, making it unavailable as a GPR",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.reserve_i3)] = .{
.llvm_name = "reserve-i3",
.description = "Reserve I3, making it unavailable as a GPR",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.reserve_i4)] = .{
.llvm_name = "reserve-i4",
.description = "Reserve I4, making it unavailable as a GPR",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.reserve_i5)] = .{
.llvm_name = "reserve-i5",
.description = "Reserve I5, making it unavailable as a GPR",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.reserve_l0)] = .{
.llvm_name = "reserve-l0",
.description = "Reserve L0, making it unavailable as a GPR",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.reserve_l1)] = .{
.llvm_name = "reserve-l1",
.description = "Reserve L1, making it unavailable as a GPR",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.reserve_l2)] = .{
.llvm_name = "reserve-l2",
.description = "Reserve L2, making it unavailable as a GPR",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.reserve_l3)] = .{
.llvm_name = "reserve-l3",
.description = "Reserve L3, making it unavailable as a GPR",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.reserve_l4)] = .{
.llvm_name = "reserve-l4",
.description = "Reserve L4, making it unavailable as a GPR",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.reserve_l5)] = .{
.llvm_name = "reserve-l5",
.description = "Reserve L5, making it unavailable as a GPR",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.reserve_l6)] = .{
.llvm_name = "reserve-l6",
.description = "Reserve L6, making it unavailable as a GPR",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.reserve_l7)] = .{
.llvm_name = "reserve-l7",
.description = "Reserve L7, making it unavailable as a GPR",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.reserve_o0)] = .{
.llvm_name = "reserve-o0",
.description = "Reserve O0, making it unavailable as a GPR",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.reserve_o1)] = .{
.llvm_name = "reserve-o1",
.description = "Reserve O1, making it unavailable as a GPR",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.reserve_o2)] = .{
.llvm_name = "reserve-o2",
.description = "Reserve O2, making it unavailable as a GPR",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.reserve_o3)] = .{
.llvm_name = "reserve-o3",
.description = "Reserve O3, making it unavailable as a GPR",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.reserve_o4)] = .{
.llvm_name = "reserve-o4",
.description = "Reserve O4, making it unavailable as a GPR",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.reserve_o5)] = .{
.llvm_name = "reserve-o5",
.description = "Reserve O5, making it unavailable as a GPR",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.slow_rdpc)] = .{
.llvm_name = "slow-rdpc",
.description = "rd %pc, %XX is slow",
.dependencies = featureSet(&[_]Feature{
.v9,
}),
};
result[@intFromEnum(Feature.soft_float)] = .{
.llvm_name = "soft-float",
.description = "Use software emulation for floating point",
@ -407,7 +577,7 @@ pub const cpu = struct {
.llvm_name = "ultrasparc",
.features = featureSet(&[_]Feature{
.deprecated_v8,
.v9,
.slow_rdpc,
.vis,
}),
};
@ -416,7 +586,7 @@ pub const cpu = struct {
.llvm_name = "ultrasparc3",
.features = featureSet(&[_]Feature{
.deprecated_v8,
.v9,
.slow_rdpc,
.vis,
.vis2,
}),

View file

@ -34,6 +34,8 @@ pub const cpu = struct {
pub const generic = CpuModel{
.name = "generic",
.llvm_name = "generic",
.features = featureSet(&[_]Feature{}),
.features = featureSet(&[_]Feature{
.vpu,
}),
};
};

View file

@ -9,6 +9,7 @@ pub const Feature = enum {
bulk_memory,
exception_handling,
extended_const,
multimemory,
multivalue,
mutable_globals,
nontrapping_fptoint,
@ -48,6 +49,11 @@ pub const all_features = blk: {
.description = "Enable extended const expressions",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.multimemory)] = .{
.llvm_name = "multimemory",
.description = "Enable multiple memories",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.multivalue)] = .{
.llvm_name = "multivalue",
.description = "Enable multivalue blocks, instructions, and functions",

View file

@ -19,6 +19,8 @@ pub const Feature = enum {
amx_int8,
amx_tile,
avx,
avx10_1_256,
avx10_1_512,
avx2,
avx512bf16,
avx512bitalg,
@ -44,6 +46,8 @@ pub const Feature = enum {
bmi,
bmi2,
branchfusion,
ccmp,
cf,
cldemote,
clflushopt,
clwb,
@ -53,8 +57,10 @@ pub const Feature = enum {
crc32,
cx16,
cx8,
egpr,
enqcmd,
ermsb,
evex512,
f16c,
false_deps_getmant,
false_deps_lzcnt_tzcnt,
@ -104,6 +110,7 @@ pub const Feature = enum {
movdir64b,
movdiri,
mwaitx,
ndd,
no_bypass_delay,
no_bypass_delay_blend,
no_bypass_delay_mov,
@ -114,14 +121,18 @@ pub const Feature = enum {
pconfig,
pku,
popcnt,
ppx,
prefer_128_bit,
prefer_256_bit,
prefer_mask_registers,
prefer_movmsk_over_vtest,
prefer_no_gather,
prefer_no_scatter,
prefetchi,
prefetchwt1,
prfchw,
ptwrite,
push2pop2,
raoint,
rdpid,
rdpru,
@ -167,6 +178,7 @@ pub const Feature = enum {
uintr,
use_glm_div_sqrt_costs,
use_slm_arith_costs,
usermsr,
vaes,
vpclmulqdq,
vzeroupper,
@ -276,6 +288,31 @@ pub const all_features = blk: {
.sse4_2,
}),
};
result[@intFromEnum(Feature.avx10_1_256)] = .{
.llvm_name = "avx10.1-256",
.description = "Support AVX10.1 up to 256-bit instruction",
.dependencies = featureSet(&[_]Feature{
.avx512bf16,
.avx512bitalg,
.avx512cd,
.avx512fp16,
.avx512ifma,
.avx512vbmi,
.avx512vbmi2,
.avx512vnni,
.avx512vpopcntdq,
.vaes,
.vpclmulqdq,
}),
};
result[@intFromEnum(Feature.avx10_1_512)] = .{
.llvm_name = "avx10.1-512",
.description = "Support AVX10.1 up to 512-bit instruction",
.dependencies = featureSet(&[_]Feature{
.avx10_1_256,
.evex512,
}),
};
result[@intFromEnum(Feature.avx2)] = .{
.llvm_name = "avx2",
.description = "Enable AVX2 instructions",
@ -449,6 +486,16 @@ pub const all_features = blk: {
.description = "CMP/TEST can be fused with conditional branches",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.ccmp)] = .{
.llvm_name = "ccmp",
.description = "Support conditional cmp & test instructions",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.cf)] = .{
.llvm_name = "cf",
.description = "Support conditional faulting",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.cldemote)] = .{
.llvm_name = "cldemote",
.description = "Enable Cache Line Demote",
@ -496,6 +543,11 @@ pub const all_features = blk: {
.description = "Support CMPXCHG8B instructions",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.egpr)] = .{
.llvm_name = "egpr",
.description = "Support extended general purpose register",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.enqcmd)] = .{
.llvm_name = "enqcmd",
.description = "Has ENQCMD instructions",
@ -506,6 +558,11 @@ pub const all_features = blk: {
.description = "REP MOVS/STOS are fast",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.evex512)] = .{
.llvm_name = "evex512",
.description = "Support ZMM and 64-bit mask instructions",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.f16c)] = .{
.llvm_name = "f16c",
.description = "Support 16-bit floating point conversion instructions",
@ -762,6 +819,11 @@ pub const all_features = blk: {
.description = "Enable MONITORX/MWAITX timer functionality",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.ndd)] = .{
.llvm_name = "ndd",
.description = "Support non-destructive destination",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.no_bypass_delay)] = .{
.llvm_name = "no-bypass-delay",
.description = "Has no bypass delay when using the 'wrong' domain",
@ -814,6 +876,11 @@ pub const all_features = blk: {
.description = "Support POPCNT instruction",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.ppx)] = .{
.llvm_name = "ppx",
.description = "Support Push-Pop Acceleration",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.prefer_128_bit)] = .{
.llvm_name = "prefer-128-bit",
.description = "Prefer 128-bit AVX instructions",
@ -834,6 +901,16 @@ pub const all_features = blk: {
.description = "Prefer movmsk over vtest instruction",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.prefer_no_gather)] = .{
.llvm_name = "prefer-no-gather",
.description = "Prefer no gather instructions",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.prefer_no_scatter)] = .{
.llvm_name = "prefer-no-scatter",
.description = "Prefer no scatter instructions",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.prefetchi)] = .{
.llvm_name = "prefetchi",
.description = "Prefetch instruction with T0 or T1 Hint",
@ -854,6 +931,11 @@ pub const all_features = blk: {
.description = "Support ptwrite instruction",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.push2pop2)] = .{
.llvm_name = "push2pop2",
.description = "Support PUSH2/POP2 instructions",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.raoint)] = .{
.llvm_name = "raoint",
.description = "Support RAO-INT instructions",
@ -947,7 +1029,7 @@ pub const all_features = blk: {
.llvm_name = "sha512",
.description = "Support SHA512 instructions",
.dependencies = featureSet(&[_]Feature{
.avx,
.avx2,
}),
};
result[@intFromEnum(Feature.shstk)] = .{
@ -1011,7 +1093,7 @@ pub const all_features = blk: {
.llvm_name = "sm4",
.description = "Support SM4 instructions",
.dependencies = featureSet(&[_]Feature{
.avx,
.avx2,
}),
};
result[@intFromEnum(Feature.soft_float)] = .{
@ -1106,12 +1188,17 @@ pub const all_features = blk: {
.description = "Use Silvermont specific arithmetic costs",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.usermsr)] = .{
.llvm_name = "usermsr",
.description = "Support USERMSR instructions",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.vaes)] = .{
.llvm_name = "vaes",
.description = "Promote selected AES instructions to AVX512/AVX registers",
.dependencies = featureSet(&[_]Feature{
.aes,
.avx,
.avx2,
}),
};
result[@intFromEnum(Feature.vpclmulqdq)] = .{
@ -1282,6 +1369,239 @@ pub const cpu = struct {
.x87,
}),
};
pub const arrowlake = CpuModel{
.name = "arrowlake",
.llvm_name = "arrowlake",
.features = featureSet(&[_]Feature{
.@"64bit",
.adx,
.allow_light_256_bit,
.avxifma,
.avxneconvert,
.avxvnni,
.avxvnniint8,
.bmi,
.bmi2,
.cldemote,
.clflushopt,
.clwb,
.cmov,
.cmpccxadd,
.crc32,
.cx16,
.enqcmd,
.f16c,
.false_deps_perm,
.false_deps_popcnt,
.fast_15bytenop,
.fast_gather,
.fast_scalar_fsqrt,
.fast_shld_rotate,
.fast_variable_crosslane_shuffle,
.fast_variable_perlane_shuffle,
.fast_vector_fsqrt,
.fma,
.fsgsbase,
.fxsr,
.gfni,
.hreset,
.idivq_to_divl,
.invpcid,
.lzcnt,
.macrofusion,
.mmx,
.movbe,
.movdir64b,
.movdiri,
.no_bypass_delay_blend,
.no_bypass_delay_mov,
.no_bypass_delay_shuffle,
.nopl,
.pconfig,
.pku,
.popcnt,
.prefer_movmsk_over_vtest,
.prfchw,
.ptwrite,
.rdpid,
.rdrnd,
.rdseed,
.sahf,
.serialize,
.sha,
.shstk,
.slow_3ops_lea,
.tuning_fast_imm_vector_shift,
.uintr,
.vaes,
.vpclmulqdq,
.vzeroupper,
.waitpkg,
.widekl,
.x87,
.xsavec,
.xsaveopt,
.xsaves,
}),
};
pub const arrowlake_s = CpuModel{
.name = "arrowlake_s",
.llvm_name = "arrowlake-s",
.features = featureSet(&[_]Feature{
.@"64bit",
.adx,
.allow_light_256_bit,
.avxifma,
.avxneconvert,
.avxvnni,
.avxvnniint16,
.avxvnniint8,
.bmi,
.bmi2,
.cldemote,
.clflushopt,
.clwb,
.cmov,
.cmpccxadd,
.crc32,
.cx16,
.enqcmd,
.f16c,
.false_deps_perm,
.false_deps_popcnt,
.fast_15bytenop,
.fast_gather,
.fast_scalar_fsqrt,
.fast_shld_rotate,
.fast_variable_crosslane_shuffle,
.fast_variable_perlane_shuffle,
.fast_vector_fsqrt,
.fma,
.fsgsbase,
.fxsr,
.gfni,
.hreset,
.idivq_to_divl,
.invpcid,
.lzcnt,
.macrofusion,
.mmx,
.movbe,
.movdir64b,
.movdiri,
.no_bypass_delay_blend,
.no_bypass_delay_mov,
.no_bypass_delay_shuffle,
.nopl,
.pconfig,
.pku,
.popcnt,
.prefer_movmsk_over_vtest,
.prfchw,
.ptwrite,
.rdpid,
.rdrnd,
.rdseed,
.sahf,
.serialize,
.sha,
.sha512,
.shstk,
.slow_3ops_lea,
.sm3,
.sm4,
.tuning_fast_imm_vector_shift,
.uintr,
.vaes,
.vpclmulqdq,
.vzeroupper,
.waitpkg,
.widekl,
.x87,
.xsavec,
.xsaveopt,
.xsaves,
}),
};
pub const arrowlake_s = CpuModel{
.name = "arrowlake_s",
.llvm_name = "arrowlake_s",
.features = featureSet(&[_]Feature{
.@"64bit",
.adx,
.allow_light_256_bit,
.avxifma,
.avxneconvert,
.avxvnni,
.avxvnniint16,
.avxvnniint8,
.bmi,
.bmi2,
.cldemote,
.clflushopt,
.clwb,
.cmov,
.cmpccxadd,
.crc32,
.cx16,
.enqcmd,
.f16c,
.false_deps_perm,
.false_deps_popcnt,
.fast_15bytenop,
.fast_gather,
.fast_scalar_fsqrt,
.fast_shld_rotate,
.fast_variable_crosslane_shuffle,
.fast_variable_perlane_shuffle,
.fast_vector_fsqrt,
.fma,
.fsgsbase,
.fxsr,
.gfni,
.hreset,
.idivq_to_divl,
.invpcid,
.lzcnt,
.macrofusion,
.mmx,
.movbe,
.movdir64b,
.movdiri,
.no_bypass_delay_blend,
.no_bypass_delay_mov,
.no_bypass_delay_shuffle,
.nopl,
.pconfig,
.pku,
.popcnt,
.prefer_movmsk_over_vtest,
.prfchw,
.ptwrite,
.rdpid,
.rdrnd,
.rdseed,
.sahf,
.serialize,
.sha,
.sha512,
.shstk,
.slow_3ops_lea,
.sm3,
.sm4,
.tuning_fast_imm_vector_shift,
.uintr,
.vaes,
.vpclmulqdq,
.vzeroupper,
.waitpkg,
.widekl,
.x87,
.xsavec,
.xsaveopt,
.xsaves,
}),
};
pub const athlon = CpuModel{
.name = "athlon",
.llvm_name = "athlon",
@ -1817,6 +2137,7 @@ pub const cpu = struct {
.crc32,
.cx16,
.ermsb,
.evex512,
.fast_15bytenop,
.fast_gather,
.fast_scalar_fsqrt,
@ -1875,6 +2196,7 @@ pub const cpu = struct {
.crc32,
.cx16,
.ermsb,
.evex512,
.false_deps_popcnt,
.fast_15bytenop,
.fast_gather,
@ -1913,6 +2235,87 @@ pub const cpu = struct {
.xsaves,
}),
};
pub const clearwaterforest = CpuModel{
.name = "clearwaterforest",
.llvm_name = "clearwaterforest",
.features = featureSet(&[_]Feature{
.@"64bit",
.adx,
.allow_light_256_bit,
.avxifma,
.avxneconvert,
.avxvnni,
.avxvnniint16,
.avxvnniint8,
.bmi,
.bmi2,
.cldemote,
.clflushopt,
.clwb,
.cmov,
.cmpccxadd,
.crc32,
.cx16,
.enqcmd,
.f16c,
.false_deps_perm,
.false_deps_popcnt,
.fast_15bytenop,
.fast_gather,
.fast_scalar_fsqrt,
.fast_shld_rotate,
.fast_variable_crosslane_shuffle,
.fast_variable_perlane_shuffle,
.fast_vector_fsqrt,
.fma,
.fsgsbase,
.fxsr,
.gfni,
.hreset,
.idivq_to_divl,
.invpcid,
.lzcnt,
.macrofusion,
.mmx,
.movbe,
.movdir64b,
.movdiri,
.no_bypass_delay_blend,
.no_bypass_delay_mov,
.no_bypass_delay_shuffle,
.nopl,
.pconfig,
.pku,
.popcnt,
.prefer_movmsk_over_vtest,
.prefetchi,
.prfchw,
.ptwrite,
.rdpid,
.rdrnd,
.rdseed,
.sahf,
.serialize,
.sha,
.sha512,
.shstk,
.slow_3ops_lea,
.sm3,
.sm4,
.tuning_fast_imm_vector_shift,
.uintr,
.usermsr,
.vaes,
.vpclmulqdq,
.vzeroupper,
.waitpkg,
.widekl,
.x87,
.xsavec,
.xsaveopt,
.xsaves,
}),
};
pub const cooperlake = CpuModel{
.name = "cooperlake",
.llvm_name = "cooperlake",
@ -1934,6 +2337,7 @@ pub const cpu = struct {
.crc32,
.cx16,
.ermsb,
.evex512,
.false_deps_popcnt,
.fast_15bytenop,
.fast_gather,
@ -2039,6 +2443,7 @@ pub const cpu = struct {
.cx16,
.enqcmd,
.ermsb,
.evex512,
.false_deps_getmant,
.false_deps_mulc,
.false_deps_mullq,
@ -2195,6 +2600,66 @@ pub const cpu = struct {
.xsaves,
}),
};
pub const gracemont = CpuModel{
.name = "gracemont",
.llvm_name = "gracemont",
.features = featureSet(&[_]Feature{
.@"64bit",
.adx,
.avxvnni,
.bmi,
.bmi2,
.cldemote,
.clflushopt,
.clwb,
.cmov,
.crc32,
.cx16,
.f16c,
.false_deps_popcnt,
.fast_15bytenop,
.fast_scalar_fsqrt,
.fast_variable_perlane_shuffle,
.fast_vector_fsqrt,
.fma,
.fsgsbase,
.fxsr,
.gfni,
.hreset,
.idivl_to_divb,
.idivq_to_divl,
.invpcid,
.lzcnt,
.macrofusion,
.mmx,
.movbe,
.movdir64b,
.movdiri,
.nopl,
.pconfig,
.pku,
.popcnt,
.prfchw,
.ptwrite,
.rdpid,
.rdrnd,
.rdseed,
.sahf,
.serialize,
.sha,
.shstk,
.slow_3ops_lea,
.vaes,
.vpclmulqdq,
.vzeroupper,
.waitpkg,
.widekl,
.x87,
.xsavec,
.xsaveopt,
.xsaves,
}),
};
pub const grandridge = CpuModel{
.name = "grandridge",
.llvm_name = "grandridge",
@ -2235,7 +2700,6 @@ pub const cpu = struct {
.popcnt,
.prfchw,
.ptwrite,
.raoint,
.rdpid,
.rdrnd,
.rdseed,
@ -2289,6 +2753,7 @@ pub const cpu = struct {
.cx16,
.enqcmd,
.ermsb,
.evex512,
.false_deps_getmant,
.false_deps_mulc,
.false_deps_mullq,
@ -2376,6 +2841,7 @@ pub const cpu = struct {
.cx16,
.enqcmd,
.ermsb,
.evex512,
.false_deps_getmant,
.false_deps_mulc,
.false_deps_mullq,
@ -2537,6 +3003,7 @@ pub const cpu = struct {
.crc32,
.cx16,
.ermsb,
.evex512,
.fast_15bytenop,
.fast_gather,
.fast_scalar_fsqrt,
@ -2601,6 +3068,7 @@ pub const cpu = struct {
.crc32,
.cx16,
.ermsb,
.evex512,
.fast_15bytenop,
.fast_gather,
.fast_scalar_fsqrt,
@ -2760,6 +3228,7 @@ pub const cpu = struct {
.cmov,
.crc32,
.cx16,
.evex512,
.fast_gather,
.fast_movbe,
.fsgsbase,
@ -2801,6 +3270,7 @@ pub const cpu = struct {
.cmov,
.crc32,
.cx16,
.evex512,
.fast_gather,
.fast_movbe,
.fsgsbase,
@ -2836,6 +3306,85 @@ pub const cpu = struct {
.vzeroupper,
}),
};
pub const lunarlake = CpuModel{
.name = "lunarlake",
.llvm_name = "lunarlake",
.features = featureSet(&[_]Feature{
.@"64bit",
.adx,
.allow_light_256_bit,
.avxifma,
.avxneconvert,
.avxvnni,
.avxvnniint16,
.avxvnniint8,
.bmi,
.bmi2,
.cldemote,
.clflushopt,
.clwb,
.cmov,
.cmpccxadd,
.crc32,
.cx16,
.enqcmd,
.f16c,
.false_deps_perm,
.false_deps_popcnt,
.fast_15bytenop,
.fast_gather,
.fast_scalar_fsqrt,
.fast_shld_rotate,
.fast_variable_crosslane_shuffle,
.fast_variable_perlane_shuffle,
.fast_vector_fsqrt,
.fma,
.fsgsbase,
.fxsr,
.gfni,
.hreset,
.idivq_to_divl,
.invpcid,
.lzcnt,
.macrofusion,
.mmx,
.movbe,
.movdir64b,
.movdiri,
.no_bypass_delay_blend,
.no_bypass_delay_mov,
.no_bypass_delay_shuffle,
.nopl,
.pconfig,
.pku,
.popcnt,
.prefer_movmsk_over_vtest,
.prfchw,
.ptwrite,
.rdpid,
.rdrnd,
.rdseed,
.sahf,
.serialize,
.sha,
.sha512,
.shstk,
.slow_3ops_lea,
.sm3,
.sm4,
.tuning_fast_imm_vector_shift,
.uintr,
.vaes,
.vpclmulqdq,
.vzeroupper,
.waitpkg,
.widekl,
.x87,
.xsavec,
.xsaveopt,
.xsaves,
}),
};
pub const meteorlake = CpuModel{
.name = "meteorlake",
.llvm_name = "meteorlake",
@ -2979,6 +3528,86 @@ pub const cpu = struct {
.x87,
}),
};
pub const pantherlake = CpuModel{
.name = "pantherlake",
.llvm_name = "pantherlake",
.features = featureSet(&[_]Feature{
.@"64bit",
.adx,
.allow_light_256_bit,
.avxifma,
.avxneconvert,
.avxvnni,
.avxvnniint16,
.avxvnniint8,
.bmi,
.bmi2,
.cldemote,
.clflushopt,
.clwb,
.cmov,
.cmpccxadd,
.crc32,
.cx16,
.enqcmd,
.f16c,
.false_deps_perm,
.false_deps_popcnt,
.fast_15bytenop,
.fast_gather,
.fast_scalar_fsqrt,
.fast_shld_rotate,
.fast_variable_crosslane_shuffle,
.fast_variable_perlane_shuffle,
.fast_vector_fsqrt,
.fma,
.fsgsbase,
.fxsr,
.gfni,
.hreset,
.idivq_to_divl,
.invpcid,
.lzcnt,
.macrofusion,
.mmx,
.movbe,
.movdir64b,
.movdiri,
.no_bypass_delay_blend,
.no_bypass_delay_mov,
.no_bypass_delay_shuffle,
.nopl,
.pconfig,
.pku,
.popcnt,
.prefer_movmsk_over_vtest,
.prefetchi,
.prfchw,
.ptwrite,
.rdpid,
.rdrnd,
.rdseed,
.sahf,
.serialize,
.sha,
.sha512,
.shstk,
.slow_3ops_lea,
.sm3,
.sm4,
.tuning_fast_imm_vector_shift,
.uintr,
.vaes,
.vpclmulqdq,
.vzeroupper,
.waitpkg,
.widekl,
.x87,
.xsavec,
.xsaveopt,
.xsaves,
}),
};
pub const penryn = CpuModel{
.name = "penryn",
.llvm_name = "penryn",
@ -3211,6 +3840,7 @@ pub const cpu = struct {
.crc32,
.cx16,
.ermsb,
.evex512,
.fast_15bytenop,
.fast_gather,
.fast_scalar_fsqrt,
@ -3309,6 +3939,7 @@ pub const cpu = struct {
.cx16,
.enqcmd,
.ermsb,
.evex512,
.false_deps_getmant,
.false_deps_mulc,
.false_deps_mullq,
@ -3479,6 +4110,7 @@ pub const cpu = struct {
.crc32,
.cx16,
.ermsb,
.evex512,
.false_deps_popcnt,
.fast_15bytenop,
.fast_gather,
@ -3589,6 +4221,7 @@ pub const cpu = struct {
.crc32,
.cx16,
.ermsb,
.evex512,
.false_deps_popcnt,
.fast_15bytenop,
.fast_gather,
@ -3684,6 +4317,7 @@ pub const cpu = struct {
.crc32,
.cx16,
.ermsb,
.evex512,
.fast_15bytenop,
.fast_gather,
.fast_scalar_fsqrt,
@ -3904,6 +4538,7 @@ pub const cpu = struct {
.cmov,
.crc32,
.cx16,
.evex512,
.false_deps_popcnt,
.fast_15bytenop,
.fast_gather,
@ -4056,7 +4691,6 @@ pub const cpu = struct {
.@"64bit",
.adx,
.allow_light_256_bit,
.avx2,
.bmi,
.bmi2,
.branchfusion,
@ -4134,6 +4768,7 @@ pub const cpu = struct {
.cmov,
.crc32,
.cx16,
.evex512,
.fast_15bytenop,
.fast_bextr,
.fast_lzcnt,

View file

@ -183,6 +183,10 @@ const llvm_targets = [_]LlvmTarget{
.llvm_name = "apple-a16",
.flatten = true,
},
.{
.llvm_name = "apple-a17",
.flatten = true,
},
.{
.llvm_name = "apple-a7-sysreg",
.flatten = true,
@ -207,6 +211,10 @@ const llvm_targets = [_]LlvmTarget{
.llvm_name = "cortex-x3",
.flatten = true,
},
.{
.llvm_name = "cortex-x4",
.flatten = true,
},
.{
.llvm_name = "falkor",
.flatten = true,
@ -257,6 +265,10 @@ const llvm_targets = [_]LlvmTarget{
.llvm_name = "ampere1",
.flatten = true,
},
.{
.llvm_name = "ampere1b",
.flatten = true,
},
},
.extra_cpus = &.{
.{
@ -681,6 +693,10 @@ const llvm_targets = [_]LlvmTarget{
.llvm_name = "armv9.4-a",
.zig_name = "v9_4a",
},
.{
.llvm_name = "armv9.5-a",
.zig_name = "v9_5a",
},
.{
.llvm_name = "armv9-a",
.zig_name = "v9a",