From 109ec729247443200c2ba614f6acb0b991e44b4d Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Thu, 25 Apr 2024 16:54:33 -0700 Subject: [PATCH] update CPU features to LLVM 18 release/18.x branch, commit 78b99c73ee4b96fe9ce0e294d4632326afb2db42 --- lib/std/Target/aarch64.zig | 457 +++++++++++++++++++++--- lib/std/Target/amdgpu.zig | 270 +++++++++++++- lib/std/Target/arm.zig | 42 ++- lib/std/Target/bpf.zig | 7 + lib/std/Target/loongarch.zig | 18 + lib/std/Target/nvptx.zig | 26 ++ lib/std/Target/powerpc.zig | 6 + lib/std/Target/riscv.zig | 616 +++++++++++++++++++++++++------- lib/std/Target/s390x.zig | 6 + lib/std/Target/sparc.zig | 174 ++++++++- lib/std/Target/ve.zig | 4 +- lib/std/Target/wasm.zig | 6 + lib/std/Target/x86.zig | 645 +++++++++++++++++++++++++++++++++- tools/update_cpu_features.zig | 16 + 14 files changed, 2103 insertions(+), 190 deletions(-) diff --git a/lib/std/Target/aarch64.zig b/lib/std/Target/aarch64.zig index 011cb20aef..ff402d50b3 100644 --- a/lib/std/Target/aarch64.zig +++ b/lib/std/Target/aarch64.zig @@ -6,15 +6,19 @@ const CpuModel = std.Target.Cpu.Model; pub const Feature = enum { a510, + a520, a65, a710, + a720, a76, a78, a78c, + addr_lsl_fast, aes, aggressive_fma, alternate_sextload_cvt_f32_pattern, altnzcv, + alu_lsl_fast, am, amvs, arith_bcc_fusion, @@ -43,12 +47,14 @@ pub const Feature = enum { complxnum, contextidr_el2, cortex_r82, + cpa, crc, crypto, cssc, - custom_cheap_as_move, d128, disable_latency_sched_heuristic, + disable_ldp, + disable_stp, dit, dotprod, ecv, @@ -59,13 +65,19 @@ pub const Feature = enum { exynos_cheap_as_move, f32mm, f64mm, + faminmax, fgt, fix_cortex_a53_835769, flagm, fmv, force_32bit_jump_tables, fp16fml, + fp8, + fp8dot2, + fp8dot4, + fp8fma, fp_armv8, + fpmr, fptoint, fullfp16, fuse_address, @@ -85,12 +97,13 @@ pub const Feature = enum { i8mm, ite, jsconv, + ldp_aligned_only, lor, ls64, lse, lse128, lse2, - lsl_fast, + lut, mec, mops, mpam, @@ -106,6 +119,7 @@ pub const Feature = enum { pan, pan_rwv, pauth, + pauth_lr, perfmon, predictable_select_expensive, predres, @@ -156,12 +170,21 @@ pub const Feature = enum { sme2p1, sme_f16f16, sme_f64f64, + sme_f8f16, + sme_f8f32, + sme_fa64, sme_i16i64, + sme_lutv2, spe, spe_eef, specres2, specrestrict, ssbs, + ssve_fp8dot2, + ssve_fp8dot4, + ssve_fp8fma, + store_pair_suppress, + stp_aligned_only, strict_align, sve, sve2, @@ -173,6 +196,7 @@ pub const Feature = enum { tagged_globals, the, tlb_rmi, + tlbiw, tme, tpidr_el1, tpidr_el2, @@ -200,6 +224,7 @@ pub const Feature = enum { v9_2a, v9_3a, v9_4a, + v9_5a, v9a, vh, wfxt, @@ -229,6 +254,15 @@ pub const all_features = blk: { .use_postra_scheduler, }), }; + result[@intFromEnum(Feature.a520)] = .{ + .llvm_name = "a520", + .description = "Cortex-A520 ARM processors", + .dependencies = featureSet(&[_]Feature{ + .fuse_adrp_add, + .fuse_aes, + .use_postra_scheduler, + }), + }; result[@intFromEnum(Feature.a65)] = .{ .llvm_name = "a65", .description = "Cortex-A65 ARM processors", @@ -245,11 +279,26 @@ pub const all_features = blk: { .llvm_name = "a710", .description = "Cortex-A710 ARM processors", .dependencies = featureSet(&[_]Feature{ + .addr_lsl_fast, + .alu_lsl_fast, + .cmp_bcc_fusion, + .enable_select_opt, + .fuse_adrp_add, + .fuse_aes, + .predictable_select_expensive, + .use_postra_scheduler, + }), + }; + result[@intFromEnum(Feature.a720)] = .{ + .llvm_name = "a720", + .description = "Cortex-A720 ARM processors", + .dependencies = featureSet(&[_]Feature{ + .addr_lsl_fast, + .alu_lsl_fast, .cmp_bcc_fusion, .enable_select_opt, .fuse_adrp_add, .fuse_aes, - .lsl_fast, .predictable_select_expensive, .use_postra_scheduler, }), @@ -258,10 +307,11 @@ pub const all_features = blk: { .llvm_name = "a76", .description = "Cortex-A76 ARM processors", .dependencies = featureSet(&[_]Feature{ + .addr_lsl_fast, + .alu_lsl_fast, .enable_select_opt, .fuse_adrp_add, .fuse_aes, - .lsl_fast, .predictable_select_expensive, }), }; @@ -269,11 +319,12 @@ pub const all_features = blk: { .llvm_name = "a78", .description = "Cortex-A78 ARM processors", .dependencies = featureSet(&[_]Feature{ + .addr_lsl_fast, + .alu_lsl_fast, .cmp_bcc_fusion, .enable_select_opt, .fuse_adrp_add, .fuse_aes, - .lsl_fast, .predictable_select_expensive, .use_postra_scheduler, }), @@ -282,15 +333,21 @@ pub const all_features = blk: { .llvm_name = "a78c", .description = "Cortex-A78C ARM processors", .dependencies = featureSet(&[_]Feature{ + .addr_lsl_fast, + .alu_lsl_fast, .cmp_bcc_fusion, .enable_select_opt, .fuse_adrp_add, .fuse_aes, - .lsl_fast, .predictable_select_expensive, .use_postra_scheduler, }), }; + result[@intFromEnum(Feature.addr_lsl_fast)] = .{ + .llvm_name = "addr-lsl-fast", + .description = "Address operands with logical shift of up to 3 places are cheap", + .dependencies = featureSet(&[_]Feature{}), + }; result[@intFromEnum(Feature.aes)] = .{ .llvm_name = "aes", .description = "Enable AES support (FEAT_AES, FEAT_PMULL)", @@ -313,6 +370,11 @@ pub const all_features = blk: { .description = "Enable alternative NZCV format for floating point comparisons (FEAT_FlagM2)", .dependencies = featureSet(&[_]Feature{}), }; + result[@intFromEnum(Feature.alu_lsl_fast)] = .{ + .llvm_name = "alu-lsl-fast", + .description = "Add/Sub operations with lsl shift <= 4 are cheap", + .dependencies = featureSet(&[_]Feature{}), + }; result[@intFromEnum(Feature.am)] = .{ .llvm_name = "am", .description = "Enable v8.4-A Activity Monitors extension (FEAT_AMUv1)", @@ -343,7 +405,9 @@ pub const all_features = blk: { result[@intFromEnum(Feature.b16b16)] = .{ .llvm_name = "b16b16", .description = "Enable SVE2.1 or SME2.1 non-widening BFloat16 to BFloat16 instructions (FEAT_B16B16)", - .dependencies = featureSet(&[_]Feature{}), + .dependencies = featureSet(&[_]Feature{ + .bf16, + }), }; result[@intFromEnum(Feature.balance_fp_ops)] = .{ .llvm_name = "balance-fp-ops", @@ -459,6 +523,11 @@ pub const all_features = blk: { .use_postra_scheduler, }), }; + result[@intFromEnum(Feature.cpa)] = .{ + .llvm_name = "cpa", + .description = "Enable Armv9.5-A Checked Pointer Arithmetic (FEAT_CPA)", + .dependencies = featureSet(&[_]Feature{}), + }; result[@intFromEnum(Feature.crc)] = .{ .llvm_name = "crc", .description = "Enable ARMv8 CRC-32 checksum instructions (FEAT_CRC32)", @@ -477,11 +546,6 @@ pub const all_features = blk: { .description = "Enable Common Short Sequence Compression (CSSC) instructions (FEAT_CSSC)", .dependencies = featureSet(&[_]Feature{}), }; - result[@intFromEnum(Feature.custom_cheap_as_move)] = .{ - .llvm_name = "custom-cheap-as-move", - .description = "Use custom handling of cheap instructions", - .dependencies = featureSet(&[_]Feature{}), - }; result[@intFromEnum(Feature.d128)] = .{ .llvm_name = "d128", .description = "Enable Armv9.4-A 128-bit Page Table Descriptors, System Registers and Instructions (FEAT_D128, FEAT_LVA3, FEAT_SYSREG128, FEAT_SYSINSTR128)", @@ -494,6 +558,16 @@ pub const all_features = blk: { .description = "Disable latency scheduling heuristic", .dependencies = featureSet(&[_]Feature{}), }; + result[@intFromEnum(Feature.disable_ldp)] = .{ + .llvm_name = "disable-ldp", + .description = "Do not emit ldp", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.disable_stp)] = .{ + .llvm_name = "disable-stp", + .description = "Do not emit stp", + .dependencies = featureSet(&[_]Feature{}), + }; result[@intFromEnum(Feature.dit)] = .{ .llvm_name = "dit", .description = "Enable v8.4-A Data Independent Timing instructions (FEAT_DIT)", @@ -534,9 +608,7 @@ pub const all_features = blk: { result[@intFromEnum(Feature.exynos_cheap_as_move)] = .{ .llvm_name = "exynos-cheap-as-move", .description = "Use Exynos specific handling of cheap instructions", - .dependencies = featureSet(&[_]Feature{ - .custom_cheap_as_move, - }), + .dependencies = featureSet(&[_]Feature{}), }; result[@intFromEnum(Feature.f32mm)] = .{ .llvm_name = "f32mm", @@ -552,6 +624,11 @@ pub const all_features = blk: { .sve, }), }; + result[@intFromEnum(Feature.faminmax)] = .{ + .llvm_name = "faminmax", + .description = "Enable FAMIN and FAMAX instructions (FEAT_FAMINMAX)", + .dependencies = featureSet(&[_]Feature{}), + }; result[@intFromEnum(Feature.fgt)] = .{ .llvm_name = "fgt", .description = "Enable fine grained virtualization traps extension (FEAT_FGT)", @@ -584,11 +661,36 @@ pub const all_features = blk: { .fullfp16, }), }; + result[@intFromEnum(Feature.fp8)] = .{ + .llvm_name = "fp8", + .description = "Enable FP8 instructions (FEAT_FP8)", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.fp8dot2)] = .{ + .llvm_name = "fp8dot2", + .description = "Enable fp8 2-way dot instructions (FEAT_FP8DOT2)", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.fp8dot4)] = .{ + .llvm_name = "fp8dot4", + .description = "Enable fp8 4-way dot instructions (FEAT_FP8DOT4)", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.fp8fma)] = .{ + .llvm_name = "fp8fma", + .description = "Enable fp8 multiply-add instructions (FEAT_FP8FMA)", + .dependencies = featureSet(&[_]Feature{}), + }; result[@intFromEnum(Feature.fp_armv8)] = .{ .llvm_name = "fp-armv8", .description = "Enable ARMv8 FP (FEAT_FP)", .dependencies = featureSet(&[_]Feature{}), }; + result[@intFromEnum(Feature.fpmr)] = .{ + .llvm_name = "fpmr", + .description = "Enable FPMR Register (FEAT_FPMR)", + .dependencies = featureSet(&[_]Feature{}), + }; result[@intFromEnum(Feature.fptoint)] = .{ .llvm_name = "fptoint", .description = "Enable FRInt[32|64][Z|X] instructions that round a floating-point number to an integer (in FP format) forcing it to fit into a 32- or 64-bit int (FEAT_FRINTTS)", @@ -692,6 +794,11 @@ pub const all_features = blk: { .fp_armv8, }), }; + result[@intFromEnum(Feature.ldp_aligned_only)] = .{ + .llvm_name = "ldp-aligned-only", + .description = "In order to emit ldp, first check if the load will be aligned to 2 * element_size", + .dependencies = featureSet(&[_]Feature{}), + }; result[@intFromEnum(Feature.lor)] = .{ .llvm_name = "lor", .description = "Enables ARM v8.1 Limited Ordering Regions extension (FEAT_LOR)", @@ -719,9 +826,9 @@ pub const all_features = blk: { .description = "Enable ARMv8.4 Large System Extension 2 (LSE2) atomicity rules (FEAT_LSE2)", .dependencies = featureSet(&[_]Feature{}), }; - result[@intFromEnum(Feature.lsl_fast)] = .{ - .llvm_name = "lsl-fast", - .description = "CPU has a fastpath logical shift of up to 3 places", + result[@intFromEnum(Feature.lut)] = .{ + .llvm_name = "lut", + .description = "Enable Lookup Table instructions (FEAT_LUT)", .dependencies = featureSet(&[_]Feature{}), }; result[@intFromEnum(Feature.mec)] = .{ @@ -805,6 +912,11 @@ pub const all_features = blk: { .description = "Enable v8.3-A Pointer Authentication extension (FEAT_PAuth)", .dependencies = featureSet(&[_]Feature{}), }; + result[@intFromEnum(Feature.pauth_lr)] = .{ + .llvm_name = "pauth-lr", + .description = "Enable Armv9.5-A PAC enhancements (FEAT_PAuth_LR)", + .dependencies = featureSet(&[_]Feature{}), + }; result[@intFromEnum(Feature.perfmon)] = .{ .llvm_name = "perfmon", .description = "Enable Code Generation for ARMv8 PMUv3 Performance Monitors extension (FEAT_PMUv3)", @@ -1076,6 +1188,30 @@ pub const all_features = blk: { .sme, }), }; + result[@intFromEnum(Feature.sme_f8f16)] = .{ + .llvm_name = "sme-f8f16", + .description = "Enable Scalable Matrix Extension (SME) F8F16 instructions(FEAT_SME_F8F16)", + .dependencies = featureSet(&[_]Feature{ + .fp8, + .sme2, + }), + }; + result[@intFromEnum(Feature.sme_f8f32)] = .{ + .llvm_name = "sme-f8f32", + .description = "Enable Scalable Matrix Extension (SME) F8F32 instructions (FEAT_SME_F8F32)", + .dependencies = featureSet(&[_]Feature{ + .fp8, + .sme2, + }), + }; + result[@intFromEnum(Feature.sme_fa64)] = .{ + .llvm_name = "sme-fa64", + .description = "Enable the full A64 instruction set in streaming SVE mode (FEAT_SME_FA64)", + .dependencies = featureSet(&[_]Feature{ + .sme, + .sve2, + }), + }; result[@intFromEnum(Feature.sme_i16i64)] = .{ .llvm_name = "sme-i16i64", .description = "Enable Scalable Matrix Extension (SME) I16I64 instructions (FEAT_SME_I16I64)", @@ -1083,6 +1219,11 @@ pub const all_features = blk: { .sme, }), }; + result[@intFromEnum(Feature.sme_lutv2)] = .{ + .llvm_name = "sme-lutv2", + .description = "Enable Scalable Matrix Extension (SME) LUTv2 instructions (FEAT_SME_LUTv2)", + .dependencies = featureSet(&[_]Feature{}), + }; result[@intFromEnum(Feature.spe)] = .{ .llvm_name = "spe", .description = "Enable Statistical Profiling extension (FEAT_SPE)", @@ -1110,6 +1251,37 @@ pub const all_features = blk: { .description = "Enable Speculative Store Bypass Safe bit (FEAT_SSBS, FEAT_SSBS2)", .dependencies = featureSet(&[_]Feature{}), }; + result[@intFromEnum(Feature.ssve_fp8dot2)] = .{ + .llvm_name = "ssve-fp8dot2", + .description = "Enable SVE2 fp8 2-way dot product instructions (FEAT_SSVE_FP8DOT2)", + .dependencies = featureSet(&[_]Feature{ + .sme2, + }), + }; + result[@intFromEnum(Feature.ssve_fp8dot4)] = .{ + .llvm_name = "ssve-fp8dot4", + .description = "Enable SVE2 fp8 4-way dot product instructions (FEAT_SSVE_FP8DOT4)", + .dependencies = featureSet(&[_]Feature{ + .sme2, + }), + }; + result[@intFromEnum(Feature.ssve_fp8fma)] = .{ + .llvm_name = "ssve-fp8fma", + .description = "Enable SVE2 fp8 multiply-add instructions (FEAT_SSVE_FP8FMA)", + .dependencies = featureSet(&[_]Feature{ + .sme2, + }), + }; + result[@intFromEnum(Feature.store_pair_suppress)] = .{ + .llvm_name = "store-pair-suppress", + .description = "Enable Store Pair Suppression heuristics", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.stp_aligned_only)] = .{ + .llvm_name = "stp-aligned-only", + .description = "In order to emit stp, first check if the store will be aligned to 2 * element_size", + .dependencies = featureSet(&[_]Feature{}), + }; result[@intFromEnum(Feature.strict_align)] = .{ .llvm_name = "strict-align", .description = "Disallow all unaligned memory access", @@ -1183,6 +1355,11 @@ pub const all_features = blk: { .description = "Enable v8.4-A TLB Range and Maintenance Instructions (FEAT_TLBIOS, FEAT_TLBIRANGE)", .dependencies = featureSet(&[_]Feature{}), }; + result[@intFromEnum(Feature.tlbiw)] = .{ + .llvm_name = "tlbiw", + .description = "Enable ARMv9.5-A TLBI VMALL for Dirty State (FEAT_TLBIW)", + .dependencies = featureSet(&[_]Feature{}), + }; result[@intFromEnum(Feature.tme)] = .{ .llvm_name = "tme", .description = "Enable Transactional Memory Extension (FEAT_TME)", @@ -1424,6 +1601,14 @@ pub const all_features = blk: { .v9_3a, }), }; + result[@intFromEnum(Feature.v9_5a)] = .{ + .llvm_name = "v9.5a", + .description = "Support ARM v9.5a instructions", + .dependencies = featureSet(&[_]Feature{ + .cpa, + .v9_4a, + }), + }; result[@intFromEnum(Feature.v9a)] = .{ .llvm_name = "v9a", .description = "Support ARM v9a instructions", @@ -1491,6 +1676,7 @@ pub const cpu = struct { .perfmon, .predictable_select_expensive, .sha2, + .store_pair_suppress, .sve, .use_postra_scheduler, .v8_2a, @@ -1500,17 +1686,21 @@ pub const cpu = struct { .name = "ampere1", .llvm_name = "ampere1", .features = featureSet(&[_]Feature{ + .addr_lsl_fast, .aes, .aggressive_fma, + .alu_lsl_fast, .arith_bcc_fusion, .cmp_bcc_fusion, .fuse_address, .fuse_aes, .fuse_literals, - .lsl_fast, + .ldp_aligned_only, .perfmon, .rand, .sha3, + .store_pair_suppress, + .stp_aligned_only, .use_postra_scheduler, .v8_6a, }), @@ -1519,23 +1709,57 @@ pub const cpu = struct { .name = "ampere1a", .llvm_name = "ampere1a", .features = featureSet(&[_]Feature{ + .addr_lsl_fast, .aes, .aggressive_fma, + .alu_lsl_fast, .arith_bcc_fusion, .cmp_bcc_fusion, .fuse_address, .fuse_aes, .fuse_literals, - .lsl_fast, + .ldp_aligned_only, .mte, .perfmon, .rand, .sha3, .sm4, + .store_pair_suppress, + .stp_aligned_only, .use_postra_scheduler, .v8_6a, }), }; + pub const ampere1b = CpuModel{ + .name = "ampere1b", + .llvm_name = "ampere1b", + .features = featureSet(&[_]Feature{ + .addr_lsl_fast, + .aes, + .aggressive_fma, + .alu_lsl_fast, + .arith_bcc_fusion, + .cmp_bcc_fusion, + .cssc, + .enable_select_opt, + .fullfp16, + .fuse_address, + .fuse_adrp_add, + .fuse_aes, + .fuse_literals, + .ldp_aligned_only, + .mte, + .perfmon, + .predictable_select_expensive, + .rand, + .sha3, + .sm4, + .store_pair_suppress, + .stp_aligned_only, + .use_postra_scheduler, + .v8_7a, + }), + }; pub const apple_a10 = CpuModel{ .name = "apple_a10", .llvm_name = "apple-a10", @@ -1552,6 +1776,7 @@ pub const cpu = struct { .pan, .perfmon, .rdm, + .store_pair_suppress, .v8a, .vh, .zcm, @@ -1571,6 +1796,7 @@ pub const cpu = struct { .fuse_aes, .fuse_crypto_eor, .perfmon, + .store_pair_suppress, .v8_2a, .zcm, .zcz, @@ -1589,6 +1815,7 @@ pub const cpu = struct { .fuse_aes, .fuse_crypto_eor, .perfmon, + .store_pair_suppress, .v8_3a, .zcm, .zcz, @@ -1608,6 +1835,7 @@ pub const cpu = struct { .fuse_crypto_eor, .perfmon, .sha3, + .store_pair_suppress, .v8_4a, .zcm, .zcz, @@ -1640,6 +1868,7 @@ pub const cpu = struct { .sha3, .specrestrict, .ssbs, + .store_pair_suppress, .v8_4a, .zcm, .zcz, @@ -1663,6 +1892,7 @@ pub const cpu = struct { .fuse_literals, .perfmon, .sha3, + .store_pair_suppress, .v8_6a, .zcm, .zcz, @@ -1687,6 +1917,32 @@ pub const cpu = struct { .hcx, .perfmon, .sha3, + .store_pair_suppress, + .v8_6a, + .zcm, + .zcz, + }), + }; + pub const apple_a17 = CpuModel{ + .name = "apple_a17", + .llvm_name = "apple-a17", + .features = featureSet(&[_]Feature{ + .alternate_sextload_cvt_f32_pattern, + .arith_bcc_fusion, + .arith_cbz_fusion, + .crypto, + .disable_latency_sched_heuristic, + .fp16fml, + .fuse_address, + .fuse_aes, + .fuse_arith_logic, + .fuse_crypto_eor, + .fuse_csel, + .fuse_literals, + .hcx, + .perfmon, + .sha3, + .store_pair_suppress, .v8_6a, .zcm, .zcz, @@ -1704,6 +1960,7 @@ pub const cpu = struct { .fuse_aes, .fuse_crypto_eor, .perfmon, + .store_pair_suppress, .v8a, .zcm, .zcz, @@ -1722,6 +1979,7 @@ pub const cpu = struct { .fuse_aes, .fuse_crypto_eor, .perfmon, + .store_pair_suppress, .v8a, .zcm, .zcz, @@ -1740,6 +1998,7 @@ pub const cpu = struct { .fuse_aes, .fuse_crypto_eor, .perfmon, + .store_pair_suppress, .v8a, .zcm, .zcz, @@ -1765,6 +2024,7 @@ pub const cpu = struct { .hcx, .perfmon, .sha3, + .store_pair_suppress, .v8_6a, .zcm, .zcz, @@ -1797,6 +2057,7 @@ pub const cpu = struct { .sha3, .specrestrict, .ssbs, + .store_pair_suppress, .v8_4a, .zcm, .zcz, @@ -1820,6 +2081,32 @@ pub const cpu = struct { .fuse_literals, .perfmon, .sha3, + .store_pair_suppress, + .v8_6a, + .zcm, + .zcz, + }), + }; + pub const apple_m3 = CpuModel{ + .name = "apple_m3", + .llvm_name = "apple-m3", + .features = featureSet(&[_]Feature{ + .alternate_sextload_cvt_f32_pattern, + .arith_bcc_fusion, + .arith_cbz_fusion, + .crypto, + .disable_latency_sched_heuristic, + .fp16fml, + .fuse_address, + .fuse_aes, + .fuse_arith_logic, + .fuse_crypto_eor, + .fuse_csel, + .fuse_literals, + .hcx, + .perfmon, + .sha3, + .store_pair_suppress, .v8_6a, .zcm, .zcz, @@ -1838,6 +2125,7 @@ pub const cpu = struct { .fuse_aes, .fuse_crypto_eor, .perfmon, + .store_pair_suppress, .v8_3a, .zcm, .zcz, @@ -1856,6 +2144,7 @@ pub const cpu = struct { .fuse_aes, .fuse_crypto_eor, .perfmon, + .store_pair_suppress, .v8_3a, .zcm, .zcz, @@ -1905,6 +2194,19 @@ pub const cpu = struct { .v9a, }), }; + pub const cortex_a520 = CpuModel{ + .name = "cortex_a520", + .llvm_name = "cortex-a520", + .features = featureSet(&[_]Feature{ + .a520, + .ete, + .fp16fml, + .mte, + .perfmon, + .sve2_bitperm, + .v9_2a, + }), + }; pub const cortex_a53 = CpuModel{ .name = "cortex_a53", .llvm_name = "cortex-a53", @@ -1912,7 +2214,6 @@ pub const cpu = struct { .balance_fp_ops, .crc, .crypto, - .custom_cheap_as_move, .fuse_adrp_add, .fuse_aes, .perfmon, @@ -1943,7 +2244,6 @@ pub const cpu = struct { .balance_fp_ops, .crc, .crypto, - .custom_cheap_as_move, .enable_select_opt, .fuse_adrp_add, .fuse_aes, @@ -2001,6 +2301,8 @@ pub const cpu = struct { .name = "cortex_a715", .llvm_name = "cortex-a715", .features = featureSet(&[_]Feature{ + .addr_lsl_fast, + .alu_lsl_fast, .bf16, .cmp_bcc_fusion, .enable_select_opt, @@ -2009,7 +2311,6 @@ pub const cpu = struct { .fuse_adrp_add, .fuse_aes, .i8mm, - .lsl_fast, .mte, .perfmon, .predictable_select_expensive, @@ -2034,6 +2335,21 @@ pub const cpu = struct { .v8a, }), }; + pub const cortex_a720 = CpuModel{ + .name = "cortex_a720", + .llvm_name = "cortex-a720", + .features = featureSet(&[_]Feature{ + .a720, + .ete, + .fp16fml, + .mte, + .perfmon, + .spe, + .spe_eef, + .sve2_bitperm, + .v9_2a, + }), + }; pub const cortex_a73 = CpuModel{ .name = "cortex_a73", .llvm_name = "cortex-a73", @@ -2096,6 +2412,8 @@ pub const cpu = struct { .name = "cortex_a77", .llvm_name = "cortex-a77", .features = featureSet(&[_]Feature{ + .addr_lsl_fast, + .alu_lsl_fast, .cmp_bcc_fusion, .crypto, .dotprod, @@ -2103,7 +2421,6 @@ pub const cpu = struct { .fullfp16, .fuse_adrp_add, .fuse_aes, - .lsl_fast, .perfmon, .predictable_select_expensive, .rcpc, @@ -2134,7 +2451,7 @@ pub const cpu = struct { .crypto, .dotprod, .flagm, - .fp16fml, + .fullfp16, .pauth, .perfmon, .rcpc, @@ -2160,6 +2477,8 @@ pub const cpu = struct { .name = "cortex_x1", .llvm_name = "cortex-x1", .features = featureSet(&[_]Feature{ + .addr_lsl_fast, + .alu_lsl_fast, .cmp_bcc_fusion, .crypto, .dotprod, @@ -2167,7 +2486,6 @@ pub const cpu = struct { .fullfp16, .fuse_adrp_add, .fuse_aes, - .lsl_fast, .perfmon, .predictable_select_expensive, .rcpc, @@ -2181,6 +2499,8 @@ pub const cpu = struct { .name = "cortex_x1c", .llvm_name = "cortex-x1c", .features = featureSet(&[_]Feature{ + .addr_lsl_fast, + .alu_lsl_fast, .cmp_bcc_fusion, .crypto, .dotprod, @@ -2190,7 +2510,6 @@ pub const cpu = struct { .fuse_adrp_add, .fuse_aes, .lse2, - .lsl_fast, .pauth, .perfmon, .predictable_select_expensive, @@ -2205,6 +2524,8 @@ pub const cpu = struct { .name = "cortex_x2", .llvm_name = "cortex-x2", .features = featureSet(&[_]Feature{ + .addr_lsl_fast, + .alu_lsl_fast, .bf16, .cmp_bcc_fusion, .enable_select_opt, @@ -2213,7 +2534,6 @@ pub const cpu = struct { .fuse_adrp_add, .fuse_aes, .i8mm, - .lsl_fast, .mte, .perfmon, .predictable_select_expensive, @@ -2226,6 +2546,8 @@ pub const cpu = struct { .name = "cortex_x3", .llvm_name = "cortex-x3", .features = featureSet(&[_]Feature{ + .addr_lsl_fast, + .alu_lsl_fast, .bf16, .enable_select_opt, .ete, @@ -2233,7 +2555,6 @@ pub const cpu = struct { .fuse_adrp_add, .fuse_aes, .i8mm, - .lsl_fast, .mte, .perfmon, .predictable_select_expensive, @@ -2243,6 +2564,27 @@ pub const cpu = struct { .v9a, }), }; + pub const cortex_x4 = CpuModel{ + .name = "cortex_x4", + .llvm_name = "cortex-x4", + .features = featureSet(&[_]Feature{ + .addr_lsl_fast, + .alu_lsl_fast, + .enable_select_opt, + .ete, + .fp16fml, + .fuse_adrp_add, + .fuse_aes, + .mte, + .perfmon, + .predictable_select_expensive, + .spe, + .spe_eef, + .sve2_bitperm, + .use_postra_scheduler, + .v9_2a, + }), + }; pub const cyclone = CpuModel{ .name = "cyclone", .llvm_name = "cyclone", @@ -2255,6 +2597,7 @@ pub const cpu = struct { .fuse_aes, .fuse_crypto_eor, .perfmon, + .store_pair_suppress, .v8a, .zcm, .zcz, @@ -2308,6 +2651,8 @@ pub const cpu = struct { .name = "exynos_m3", .llvm_name = "exynos-m3", .features = featureSet(&[_]Feature{ + .addr_lsl_fast, + .alu_lsl_fast, .crc, .crypto, .exynos_cheap_as_move, @@ -2317,9 +2662,9 @@ pub const cpu = struct { .fuse_aes, .fuse_csel, .fuse_literals, - .lsl_fast, .perfmon, .predictable_select_expensive, + .store_pair_suppress, .use_postra_scheduler, .v8a, }), @@ -2328,6 +2673,8 @@ pub const cpu = struct { .name = "exynos_m4", .llvm_name = "exynos-m4", .features = featureSet(&[_]Feature{ + .addr_lsl_fast, + .alu_lsl_fast, .arith_bcc_fusion, .arith_cbz_fusion, .crypto, @@ -2341,8 +2688,8 @@ pub const cpu = struct { .fuse_arith_logic, .fuse_csel, .fuse_literals, - .lsl_fast, .perfmon, + .store_pair_suppress, .use_postra_scheduler, .v8_2a, .zcz, @@ -2352,6 +2699,8 @@ pub const cpu = struct { .name = "exynos_m5", .llvm_name = "exynos-m5", .features = featureSet(&[_]Feature{ + .addr_lsl_fast, + .alu_lsl_fast, .arith_bcc_fusion, .arith_cbz_fusion, .crypto, @@ -2365,8 +2714,8 @@ pub const cpu = struct { .fuse_arith_logic, .fuse_csel, .fuse_literals, - .lsl_fast, .perfmon, + .store_pair_suppress, .use_postra_scheduler, .v8_2a, .zcz, @@ -2376,14 +2725,15 @@ pub const cpu = struct { .name = "falkor", .llvm_name = "falkor", .features = featureSet(&[_]Feature{ + .addr_lsl_fast, + .alu_lsl_fast, .crc, .crypto, - .custom_cheap_as_move, - .lsl_fast, .perfmon, .predictable_select_expensive, .rdm, .slow_strqro_store, + .store_pair_suppress, .use_postra_scheduler, .v8a, .zcz, @@ -2405,12 +2755,13 @@ pub const cpu = struct { .name = "kryo", .llvm_name = "kryo", .features = featureSet(&[_]Feature{ + .addr_lsl_fast, + .alu_lsl_fast, .crc, .crypto, - .custom_cheap_as_move, - .lsl_fast, .perfmon, .predictable_select_expensive, + .store_pair_suppress, .use_postra_scheduler, .v8a, .zcz, @@ -2420,6 +2771,8 @@ pub const cpu = struct { .name = "neoverse_512tvb", .llvm_name = "neoverse-512tvb", .features = featureSet(&[_]Feature{ + .addr_lsl_fast, + .alu_lsl_fast, .bf16, .ccdp, .crypto, @@ -2428,7 +2781,6 @@ pub const cpu = struct { .fuse_adrp_add, .fuse_aes, .i8mm, - .lsl_fast, .perfmon, .predictable_select_expensive, .rand, @@ -2459,13 +2811,14 @@ pub const cpu = struct { .name = "neoverse_n1", .llvm_name = "neoverse-n1", .features = featureSet(&[_]Feature{ + .addr_lsl_fast, + .alu_lsl_fast, .crypto, .dotprod, .enable_select_opt, .fullfp16, .fuse_adrp_add, .fuse_aes, - .lsl_fast, .perfmon, .predictable_select_expensive, .rcpc, @@ -2479,26 +2832,28 @@ pub const cpu = struct { .name = "neoverse_n2", .llvm_name = "neoverse-n2", .features = featureSet(&[_]Feature{ + .addr_lsl_fast, + .alu_lsl_fast, .bf16, - .crypto, .enable_select_opt, .ete, .fuse_adrp_add, .fuse_aes, .i8mm, - .lsl_fast, .mte, .perfmon, .predictable_select_expensive, .sve2_bitperm, .use_postra_scheduler, - .v8_5a, + .v9a, }), }; pub const neoverse_v1 = CpuModel{ .name = "neoverse_v1", .llvm_name = "neoverse-v1", .features = featureSet(&[_]Feature{ + .addr_lsl_fast, + .alu_lsl_fast, .bf16, .ccdp, .crypto, @@ -2507,7 +2862,6 @@ pub const cpu = struct { .fuse_adrp_add, .fuse_aes, .i8mm, - .lsl_fast, .no_sve_fp_ld1r, .perfmon, .predictable_select_expensive, @@ -2523,13 +2877,15 @@ pub const cpu = struct { .name = "neoverse_v2", .llvm_name = "neoverse-v2", .features = featureSet(&[_]Feature{ + .addr_lsl_fast, + .alu_lsl_fast, .bf16, .enable_select_opt, .ete, .fp16fml, + .fuse_adrp_add, .fuse_aes, .i8mm, - .lsl_fast, .mte, .perfmon, .predictable_select_expensive, @@ -2544,12 +2900,13 @@ pub const cpu = struct { .name = "saphira", .llvm_name = "saphira", .features = featureSet(&[_]Feature{ + .addr_lsl_fast, + .alu_lsl_fast, .crypto, - .custom_cheap_as_move, - .lsl_fast, .perfmon, .predictable_select_expensive, .spe, + .store_pair_suppress, .use_postra_scheduler, .v8_4a, .zcz, @@ -2563,6 +2920,7 @@ pub const cpu = struct { .crypto, .perfmon, .predictable_select_expensive, + .store_pair_suppress, .use_postra_scheduler, .v8a, }), @@ -2575,6 +2933,7 @@ pub const cpu = struct { .arith_bcc_fusion, .crypto, .predictable_select_expensive, + .store_pair_suppress, .use_postra_scheduler, .v8_1a, }), @@ -2589,6 +2948,7 @@ pub const cpu = struct { .crypto, .perfmon, .predictable_select_expensive, + .store_pair_suppress, .strict_align, .use_postra_scheduler, .v8_3a, @@ -2602,6 +2962,7 @@ pub const cpu = struct { .crypto, .perfmon, .predictable_select_expensive, + .store_pair_suppress, .use_postra_scheduler, .v8a, }), @@ -2614,6 +2975,7 @@ pub const cpu = struct { .crypto, .perfmon, .predictable_select_expensive, + .store_pair_suppress, .use_postra_scheduler, .v8a, }), @@ -2626,6 +2988,7 @@ pub const cpu = struct { .crypto, .perfmon, .predictable_select_expensive, + .store_pair_suppress, .use_postra_scheduler, .v8a, }), @@ -2634,13 +2997,15 @@ pub const cpu = struct { .name = "tsv110", .llvm_name = "tsv110", .features = featureSet(&[_]Feature{ + .complxnum, .crypto, - .custom_cheap_as_move, .dotprod, .fp16fml, .fuse_aes, + .jsconv, .perfmon, .spe, + .store_pair_suppress, .use_postra_scheduler, .v8_2a, }), diff --git a/lib/std/Target/amdgpu.zig b/lib/std/Target/amdgpu.zig index 012f652088..5980fea1dc 100644 --- a/lib/std/Target/amdgpu.zig +++ b/lib/std/Target/amdgpu.zig @@ -13,6 +13,7 @@ pub const Feature = enum { architected_sgprs, atomic_buffer_global_pk_add_f16_insts, atomic_buffer_global_pk_add_f16_no_rtn_insts, + atomic_csub_no_rtn_insts, atomic_ds_pk_add_16_insts, atomic_fadd_no_rtn_insts, atomic_fadd_rtn_insts, @@ -22,6 +23,8 @@ pub const Feature = enum { back_off_barrier, ci_insts, cumode, + default_component_broadcast, + default_component_zero, dl_insts, dot10_insts, dot1_insts, @@ -36,6 +39,7 @@ pub const Feature = enum { dpp, dpp8, dpp_64bit, + dpp_src1_sgpr, ds128, ds_src2_insts, extended_image_insts, @@ -54,10 +58,12 @@ pub const Feature = enum { fmaf, force_store_sc0_sc1, fp64, + fp8_conversion_insts, fp8_insts, full_rate_64_ops, g16, gcn3_encoding, + gds, get_wave_id_inst, gfx10, gfx10_3_insts, @@ -67,12 +73,15 @@ pub const Feature = enum { gfx11, gfx11_full_vgprs, gfx11_insts, + gfx12, + gfx12_insts, gfx7_gfx8_gfx9_insts, gfx8_insts, gfx9, gfx90a_insts, gfx940_insts, gfx9_insts, + gws, half_rate_64_ops, image_gather4_d16_bug, image_insts, @@ -80,6 +89,7 @@ pub const Feature = enum { inst_fwd_prefetch_bug, int_clamp_insts, inv_2pi_inline_imm, + kernarg_preload, lds_branch_vmem_war_hazard, lds_misaligned_bug, ldsbankcount16, @@ -97,6 +107,7 @@ pub const Feature = enum { mfma_inline_literal_bug, mimg_r128, movrel, + msaa_load_dst_sel_bug, negative_scratch_offset_bug, negative_unaligned_scratch_offset_bug, no_data_dep_hazard, @@ -111,10 +122,15 @@ pub const Feature = enum { pk_fmac_f16_inst, promote_alloca, prt_strict_null, + pseudo_scalar_trans, r128_a16, + real_true16, + restricted_soffset, s_memrealtime, s_memtime_inst, + salu_float, scalar_atomics, + scalar_dwordx3_loads, scalar_flat_scratch_insts, scalar_stores, sdwa, @@ -125,6 +141,7 @@ pub const Feature = enum { sdwa_sdst, sea_islands, sgpr_init_bug, + shader_cycles_hi_lo_registers, shader_cycles_register, si_scheduler, smem_to_vector_write_hazard, @@ -146,6 +163,7 @@ pub const Feature = enum { vcmpx_exec_war_hazard, vcmpx_permlane_hazard, vgpr_index_mode, + vgpr_singleuse_hint, vmem_to_scalar_write_hazard, volcanic_islands, vop3_literal, @@ -212,6 +230,11 @@ pub const all_features = blk: { .flat_global_insts, }), }; + result[@intFromEnum(Feature.atomic_csub_no_rtn_insts)] = .{ + .llvm_name = "atomic-csub-no-rtn-insts", + .description = "Has buffer_atomic_csub and global_atomic_csub instructions that don't return original value", + .dependencies = featureSet(&[_]Feature{}), + }; result[@intFromEnum(Feature.atomic_ds_pk_add_16_insts)] = .{ .llvm_name = "atomic-ds-pk-add-16-insts", .description = "Has ds_pk_add_bf16, ds_pk_add_f16, ds_pk_add_rtn_bf16, ds_pk_add_rtn_f16 instructions", @@ -263,6 +286,16 @@ pub const all_features = blk: { .description = "Enable CU wavefront execution mode", .dependencies = featureSet(&[_]Feature{}), }; + result[@intFromEnum(Feature.default_component_broadcast)] = .{ + .llvm_name = "default-component-broadcast", + .description = "BUFFER/IMAGE store instructions set unspecified components to x component (GFX12)", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.default_component_zero)] = .{ + .llvm_name = "default-component-zero", + .description = "BUFFER/IMAGE store instructions set unspecified components to zero (before GFX12)", + .dependencies = featureSet(&[_]Feature{}), + }; result[@intFromEnum(Feature.dl_insts)] = .{ .llvm_name = "dl-insts", .description = "Has v_fmac_f32 and v_xnor_b32 instructions", @@ -330,7 +363,12 @@ pub const all_features = blk: { }; result[@intFromEnum(Feature.dpp_64bit)] = .{ .llvm_name = "dpp-64bit", - .description = "Support DPP (Data Parallel Primitives) extension", + .description = "Support DPP (Data Parallel Primitives) extension in DP ALU", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.dpp_src1_sgpr)] = .{ + .llvm_name = "dpp-src1-sgpr", + .description = "Support SGPR for Src1 of DPP instructions", .dependencies = featureSet(&[_]Feature{}), }; result[@intFromEnum(Feature.ds128)] = .{ @@ -423,6 +461,11 @@ pub const all_features = blk: { .description = "Enable double precision operations", .dependencies = featureSet(&[_]Feature{}), }; + result[@intFromEnum(Feature.fp8_conversion_insts)] = .{ + .llvm_name = "fp8-conversion-insts", + .description = "Has fp8 and bf8 conversion instructions", + .dependencies = featureSet(&[_]Feature{}), + }; result[@intFromEnum(Feature.fp8_insts)] = .{ .llvm_name = "fp8-insts", .description = "Has fp8 and bf8 instructions", @@ -443,6 +486,11 @@ pub const all_features = blk: { .description = "Encoding format for VI", .dependencies = featureSet(&[_]Feature{}), }; + result[@intFromEnum(Feature.gds)] = .{ + .llvm_name = "gds", + .description = "Has Global Data Share", + .dependencies = featureSet(&[_]Feature{}), + }; result[@intFromEnum(Feature.get_wave_id_inst)] = .{ .llvm_name = "get-wave-id-inst", .description = "Has s_get_waveid_in_workgroup instruction", @@ -457,6 +505,7 @@ pub const all_features = blk: { .add_no_carry_insts, .aperture_regs, .ci_insts, + .default_component_zero, .dpp, .dpp8, .extended_image_insts, @@ -469,9 +518,11 @@ pub const all_features = blk: { .fma_mix_insts, .fp64, .g16, + .gds, .gfx10_insts, .gfx8_insts, .gfx9_insts, + .gws, .image_insts, .int_clamp_insts, .inv_2pi_inline_imm, @@ -523,6 +574,7 @@ pub const all_features = blk: { .add_no_carry_insts, .aperture_regs, .ci_insts, + .default_component_zero, .dpp, .dpp8, .extended_image_insts, @@ -535,6 +587,7 @@ pub const all_features = blk: { .fma_mix_insts, .fp64, .g16, + .gds, .gfx10_3_insts, .gfx10_a_encoding, .gfx10_b_encoding, @@ -542,6 +595,7 @@ pub const all_features = blk: { .gfx11_insts, .gfx8_insts, .gfx9_insts, + .gws, .int_clamp_insts, .inv_2pi_inline_imm, .localmemorysize65536, @@ -569,6 +623,57 @@ pub const all_features = blk: { .description = "Additional instructions for GFX11+", .dependencies = featureSet(&[_]Feature{}), }; + result[@intFromEnum(Feature.gfx12)] = .{ + .llvm_name = "gfx12", + .description = "GFX12 GPU generation", + .dependencies = featureSet(&[_]Feature{ + .@"16_bit_insts", + .a16, + .add_no_carry_insts, + .aperture_regs, + .ci_insts, + .default_component_broadcast, + .dpp, + .dpp8, + .fast_denormal_f32, + .fast_fmaf, + .flat_address_space, + .flat_global_insts, + .flat_inst_offsets, + .flat_scratch_insts, + .fma_mix_insts, + .fp64, + .g16, + .gfx10_3_insts, + .gfx10_a_encoding, + .gfx10_b_encoding, + .gfx10_insts, + .gfx11_insts, + .gfx12_insts, + .gfx8_insts, + .gfx9_insts, + .int_clamp_insts, + .inv_2pi_inline_imm, + .localmemorysize65536, + .mimg_r128, + .movrel, + .no_data_dep_hazard, + .no_sdst_cmpx, + .pk_fmac_f16_inst, + .true16, + .unaligned_buffer_access, + .unaligned_ds_access, + .vop3_literal, + .vop3p, + .vopd, + .vscnt, + }), + }; + result[@intFromEnum(Feature.gfx12_insts)] = .{ + .llvm_name = "gfx12-insts", + .description = "Additional instructions for GFX12+", + .dependencies = featureSet(&[_]Feature{}), + }; result[@intFromEnum(Feature.gfx7_gfx8_gfx9_insts)] = .{ .llvm_name = "gfx7-gfx8-gfx9-insts", .description = "Instructions shared in GFX7, GFX8, GFX9", @@ -588,6 +693,7 @@ pub const all_features = blk: { .add_no_carry_insts, .aperture_regs, .ci_insts, + .default_component_zero, .dpp, .fast_denormal_f32, .fast_fmaf, @@ -600,6 +706,7 @@ pub const all_features = blk: { .gfx7_gfx8_gfx9_insts, .gfx8_insts, .gfx9_insts, + .gws, .int_clamp_insts, .inv_2pi_inline_imm, .localmemorysize65536, @@ -637,6 +744,11 @@ pub const all_features = blk: { .description = "Additional instructions for GFX9+", .dependencies = featureSet(&[_]Feature{}), }; + result[@intFromEnum(Feature.gws)] = .{ + .llvm_name = "gws", + .description = "Has Global Wave Sync", + .dependencies = featureSet(&[_]Feature{}), + }; result[@intFromEnum(Feature.half_rate_64_ops)] = .{ .llvm_name = "half-rate-64-ops", .description = "Most fp64 instructions are half rate instead of quarter", @@ -672,6 +784,11 @@ pub const all_features = blk: { .description = "Has 1 / (2 * pi) as inline immediate", .dependencies = featureSet(&[_]Feature{}), }; + result[@intFromEnum(Feature.kernarg_preload)] = .{ + .llvm_name = "kernarg-preload", + .description = "Hardware supports preloading of kernel arguments in user SGPRs.", + .dependencies = featureSet(&[_]Feature{}), + }; result[@intFromEnum(Feature.lds_branch_vmem_war_hazard)] = .{ .llvm_name = "lds-branch-vmem-war-hazard", .description = "Switching between LDS and VMEM-tex not waiting VM_VSRC=0", @@ -757,6 +874,11 @@ pub const all_features = blk: { .description = "Has v_movrel*_b32 instructions", .dependencies = featureSet(&[_]Feature{}), }; + result[@intFromEnum(Feature.msaa_load_dst_sel_bug)] = .{ + .llvm_name = "msaa-load-dst-sel-bug", + .description = "MSAA loads not honoring dst_sel bug", + .dependencies = featureSet(&[_]Feature{}), + }; result[@intFromEnum(Feature.negative_scratch_offset_bug)] = .{ .llvm_name = "negative-scratch-offset-bug", .description = "Negative immediate offsets in scratch instructions with an SGPR offset page fault on GFX9", @@ -827,11 +949,26 @@ pub const all_features = blk: { .description = "Enable zeroing of result registers for sparse texture fetches", .dependencies = featureSet(&[_]Feature{}), }; + result[@intFromEnum(Feature.pseudo_scalar_trans)] = .{ + .llvm_name = "pseudo-scalar-trans", + .description = "Has Pseudo Scalar Transcendental instructions", + .dependencies = featureSet(&[_]Feature{}), + }; result[@intFromEnum(Feature.r128_a16)] = .{ .llvm_name = "r128-a16", .description = "Support gfx9-style A16 for 16-bit coordinates/gradients/lod/clamp/mip image operands, where a16 is aliased with r128", .dependencies = featureSet(&[_]Feature{}), }; + result[@intFromEnum(Feature.real_true16)] = .{ + .llvm_name = "real-true16", + .description = "Use true 16-bit registers", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.restricted_soffset)] = .{ + .llvm_name = "restricted-soffset", + .description = "Has restricted SOffset (immediate not supported).", + .dependencies = featureSet(&[_]Feature{}), + }; result[@intFromEnum(Feature.s_memrealtime)] = .{ .llvm_name = "s-memrealtime", .description = "Has s_memrealtime instruction", @@ -842,11 +979,21 @@ pub const all_features = blk: { .description = "Has s_memtime instruction", .dependencies = featureSet(&[_]Feature{}), }; + result[@intFromEnum(Feature.salu_float)] = .{ + .llvm_name = "salu-float", + .description = "Has SALU floating point instructions", + .dependencies = featureSet(&[_]Feature{}), + }; result[@intFromEnum(Feature.scalar_atomics)] = .{ .llvm_name = "scalar-atomics", .description = "Has atomic scalar memory instructions", .dependencies = featureSet(&[_]Feature{}), }; + result[@intFromEnum(Feature.scalar_dwordx3_loads)] = .{ + .llvm_name = "scalar-dwordx3-loads", + .description = "Has 96-bit scalar load instructions", + .dependencies = featureSet(&[_]Feature{}), + }; result[@intFromEnum(Feature.scalar_flat_scratch_insts)] = .{ .llvm_name = "scalar-flat-scratch-insts", .description = "Have s_scratch_* flat memory instructions", @@ -892,11 +1039,14 @@ pub const all_features = blk: { .description = "SEA_ISLANDS GPU generation", .dependencies = featureSet(&[_]Feature{ .ci_insts, + .default_component_zero, .ds_src2_insts, .extended_image_insts, .flat_address_space, .fp64, + .gds, .gfx7_gfx8_gfx9_insts, + .gws, .image_insts, .localmemorysize65536, .mad_mac_f32_insts, @@ -913,6 +1063,11 @@ pub const all_features = blk: { .description = "VI SGPR initialization bug requiring a fixed SGPR allocation size", .dependencies = featureSet(&[_]Feature{}), }; + result[@intFromEnum(Feature.shader_cycles_hi_lo_registers)] = .{ + .llvm_name = "shader-cycles-hi-lo-registers", + .description = "Has SHADER_CYCLES_HI/LO hardware registers", + .dependencies = featureSet(&[_]Feature{}), + }; result[@intFromEnum(Feature.shader_cycles_register)] = .{ .llvm_name = "shader-cycles-register", .description = "Has SHADER_CYCLES hardware register", @@ -932,9 +1087,12 @@ pub const all_features = blk: { .llvm_name = "southern-islands", .description = "SOUTHERN_ISLANDS GPU generation", .dependencies = featureSet(&[_]Feature{ + .default_component_zero, .ds_src2_insts, .extended_image_insts, .fp64, + .gds, + .gws, .image_insts, .ldsbankcount32, .localmemorysize32768, @@ -1031,6 +1189,11 @@ pub const all_features = blk: { .description = "Has VGPR mode register indexing", .dependencies = featureSet(&[_]Feature{}), }; + result[@intFromEnum(Feature.vgpr_singleuse_hint)] = .{ + .llvm_name = "vgpr-singleuse-hint", + .description = "Has single-use VGPR hint instructions", + .dependencies = featureSet(&[_]Feature{}), + }; result[@intFromEnum(Feature.vmem_to_scalar_write_hazard)] = .{ .llvm_name = "vmem-to-scalar-write-hazard", .description = "VMEM instruction followed by scalar writing to EXEC mask, M0 or SGPR leads to incorrect execution.", @@ -1042,6 +1205,7 @@ pub const all_features = blk: { .dependencies = featureSet(&[_]Feature{ .@"16_bit_insts", .ci_insts, + .default_component_zero, .dpp, .ds_src2_insts, .extended_image_insts, @@ -1049,8 +1213,10 @@ pub const all_features = blk: { .flat_address_space, .fp64, .gcn3_encoding, + .gds, .gfx7_gfx8_gfx9_insts, .gfx8_insts, + .gws, .image_insts, .int_clamp_insts, .inv_2pi_inline_imm, @@ -1157,6 +1323,8 @@ pub const cpu = struct { .name = "generic", .llvm_name = "generic", .features = featureSet(&[_]Feature{ + .gds, + .gws, .wavefrontsize64, }), }; @@ -1165,6 +1333,8 @@ pub const cpu = struct { .llvm_name = "generic-hsa", .features = featureSet(&[_]Feature{ .flat_address_space, + .gds, + .gws, .wavefrontsize64, }), }; @@ -1478,6 +1648,7 @@ pub const cpu = struct { .image_insts, .ldsbankcount32, .mad_intra_fwd_bug, + .msaa_load_dst_sel_bug, .nsa_encoding, .packed_tid, .partial_nsa_encoding, @@ -1507,6 +1678,7 @@ pub const cpu = struct { .image_insts, .ldsbankcount32, .mad_intra_fwd_bug, + .msaa_load_dst_sel_bug, .nsa_encoding, .packed_tid, .partial_nsa_encoding, @@ -1534,6 +1706,7 @@ pub const cpu = struct { .image_insts, .ldsbankcount32, .mad_intra_fwd_bug, + .msaa_load_dst_sel_bug, .nsa_encoding, .packed_tid, .partial_nsa_encoding, @@ -1562,6 +1735,7 @@ pub const cpu = struct { .image_insts, .ldsbankcount32, .mad_intra_fwd_bug, + .msaa_load_dst_sel_bug, .nsa_encoding, .packed_tid, .partial_nsa_encoding, @@ -1584,6 +1758,7 @@ pub const cpu = struct { .dot7_insts, .dot8_insts, .dot9_insts, + .dpp_src1_sgpr, .flat_atomic_fadd_f32_inst, .gfx11, .image_insts, @@ -1592,8 +1767,10 @@ pub const cpu = struct { .nsa_encoding, .packed_tid, .partial_nsa_encoding, + .salu_float, .shader_cycles_register, .vcmpx_permlane_hazard, + .vgpr_singleuse_hint, .wavefrontsize32, }), }; @@ -1610,6 +1787,7 @@ pub const cpu = struct { .dot7_insts, .dot8_insts, .dot9_insts, + .dpp_src1_sgpr, .flat_atomic_fadd_f32_inst, .gfx11, .gfx11_full_vgprs, @@ -1619,8 +1797,84 @@ pub const cpu = struct { .nsa_encoding, .packed_tid, .partial_nsa_encoding, + .salu_float, .shader_cycles_register, .vcmpx_permlane_hazard, + .vgpr_singleuse_hint, + .wavefrontsize32, + }), + }; + pub const gfx1200 = CpuModel{ + .name = "gfx1200", + .llvm_name = "gfx1200", + .features = featureSet(&[_]Feature{ + .architected_flat_scratch, + .architected_sgprs, + .atomic_buffer_global_pk_add_f16_insts, + .atomic_ds_pk_add_16_insts, + .atomic_fadd_no_rtn_insts, + .atomic_fadd_rtn_insts, + .atomic_flat_pk_add_16_insts, + .atomic_global_pk_add_bf16_inst, + .dl_insts, + .dot10_insts, + .dot7_insts, + .dot8_insts, + .dot9_insts, + .dpp_src1_sgpr, + .extended_image_insts, + .flat_atomic_fadd_f32_inst, + .fp8_conversion_insts, + .gfx12, + .image_insts, + .ldsbankcount32, + .nsa_encoding, + .packed_tid, + .partial_nsa_encoding, + .pseudo_scalar_trans, + .restricted_soffset, + .salu_float, + .scalar_dwordx3_loads, + .shader_cycles_hi_lo_registers, + .vcmpx_permlane_hazard, + .vgpr_singleuse_hint, + .wavefrontsize32, + }), + }; + pub const gfx1201 = CpuModel{ + .name = "gfx1201", + .llvm_name = "gfx1201", + .features = featureSet(&[_]Feature{ + .architected_flat_scratch, + .architected_sgprs, + .atomic_buffer_global_pk_add_f16_insts, + .atomic_ds_pk_add_16_insts, + .atomic_fadd_no_rtn_insts, + .atomic_fadd_rtn_insts, + .atomic_flat_pk_add_16_insts, + .atomic_global_pk_add_bf16_inst, + .dl_insts, + .dot10_insts, + .dot7_insts, + .dot8_insts, + .dot9_insts, + .dpp_src1_sgpr, + .extended_image_insts, + .flat_atomic_fadd_f32_inst, + .fp8_conversion_insts, + .gfx12, + .image_insts, + .ldsbankcount32, + .nsa_encoding, + .packed_tid, + .partial_nsa_encoding, + .pseudo_scalar_trans, + .restricted_soffset, + .salu_float, + .scalar_dwordx3_loads, + .shader_cycles_hi_lo_registers, + .vcmpx_permlane_hazard, + .vgpr_singleuse_hint, .wavefrontsize32, }), }; @@ -1756,6 +2010,7 @@ pub const cpu = struct { .features = featureSet(&[_]Feature{ .ds_src2_insts, .extended_image_insts, + .gds, .gfx9, .image_gather4_d16_bug, .image_insts, @@ -1770,6 +2025,7 @@ pub const cpu = struct { .features = featureSet(&[_]Feature{ .ds_src2_insts, .extended_image_insts, + .gds, .gfx9, .image_gather4_d16_bug, .image_insts, @@ -1785,6 +2041,7 @@ pub const cpu = struct { .ds_src2_insts, .extended_image_insts, .fma_mix_insts, + .gds, .gfx9, .image_gather4_d16_bug, .image_insts, @@ -1804,6 +2061,7 @@ pub const cpu = struct { .ds_src2_insts, .extended_image_insts, .fma_mix_insts, + .gds, .gfx9, .half_rate_64_ops, .image_gather4_d16_bug, @@ -1831,6 +2089,7 @@ pub const cpu = struct { .ds_src2_insts, .extended_image_insts, .fma_mix_insts, + .gds, .gfx9, .half_rate_64_ops, .image_gather4_d16_bug, @@ -1849,6 +2108,7 @@ pub const cpu = struct { .features = featureSet(&[_]Feature{ .ds_src2_insts, .extended_image_insts, + .gds, .gfx9, .image_gather4_d16_bug, .image_insts, @@ -1881,6 +2141,7 @@ pub const cpu = struct { .gfx9, .gfx90a_insts, .image_insts, + .kernarg_preload, .ldsbankcount32, .mad_mac_f32_insts, .mai_insts, @@ -1896,6 +2157,7 @@ pub const cpu = struct { .features = featureSet(&[_]Feature{ .ds_src2_insts, .extended_image_insts, + .gds, .gfx9, .image_gather4_d16_bug, .image_insts, @@ -1930,11 +2192,13 @@ pub const cpu = struct { .fma_mix_insts, .fmacf64_inst, .force_store_sc0_sc1, + .fp8_conversion_insts, .fp8_insts, .full_rate_64_ops, .gfx9, .gfx90a_insts, .gfx940_insts, + .kernarg_preload, .ldsbankcount32, .mai_insts, .packed_fp32_ops, @@ -1969,11 +2233,13 @@ pub const cpu = struct { .fma_mix_insts, .fmacf64_inst, .force_store_sc0_sc1, + .fp8_conversion_insts, .fp8_insts, .full_rate_64_ops, .gfx9, .gfx90a_insts, .gfx940_insts, + .kernarg_preload, .ldsbankcount32, .mai_insts, .packed_fp32_ops, @@ -2007,11 +2273,13 @@ pub const cpu = struct { .flat_atomic_fadd_f32_inst, .fma_mix_insts, .fmacf64_inst, + .fp8_conversion_insts, .fp8_insts, .full_rate_64_ops, .gfx9, .gfx90a_insts, .gfx940_insts, + .kernarg_preload, .ldsbankcount32, .mai_insts, .packed_fp32_ops, diff --git a/lib/std/Target/arm.zig b/lib/std/Target/arm.zig index 4cc5d5559a..49e94ddb82 100644 --- a/lib/std/Target/arm.zig +++ b/lib/std/Target/arm.zig @@ -186,6 +186,8 @@ pub const Feature = enum { v9_2a, v9_3a, v9_4a, + v9_5a, + v9_5a, v9a, vfp2, vfp2sp, @@ -1572,6 +1574,29 @@ pub const all_features = blk: { .virtualization, }), }; + result[@intFromEnum(Feature.v9_5a)] = .{ + .llvm_name = "armv9.5-a", + .description = "ARMv95a architecture", + .dependencies = featureSet(&[_]Feature{ + .aclass, + .crc, + .db, + .dsp, + .fp_armv8, + .mp, + .ras, + .trustzone, + .v9_5a, + .virtualization, + }), + }; + result[@intFromEnum(Feature.v9_5a)] = .{ + .llvm_name = "v9.5a", + .description = "Support ARM v9.5a instructions", + .dependencies = featureSet(&[_]Feature{ + .has_v9_4a, + }), + }; result[@intFromEnum(Feature.v9a)] = .{ .llvm_name = "armv9-a", .description = "ARMv9a architecture", @@ -2238,6 +2263,21 @@ pub const cpu = struct { .v7em, }), }; + pub const cortex_m52 = CpuModel{ + .name = "cortex_m52", + .llvm_name = "cortex-m52", + .features = featureSet(&[_]Feature{ + .fp_armv8d16, + .loop_align, + .mve1beat, + .mve_fp, + .no_branch_predictor, + .pacbti, + .slowfpvmlx, + .use_misched, + .v8_1m_main, + }), + }; pub const cortex_m55 = CpuModel{ .name = "cortex_m55", .llvm_name = "cortex-m55", @@ -2499,7 +2539,7 @@ pub const cpu = struct { .features = featureSet(&[_]Feature{ .bf16, .i8mm, - .v8_5a, + .v9a, }), }; pub const neoverse_v1 = CpuModel{ diff --git a/lib/std/Target/bpf.zig b/lib/std/Target/bpf.zig index 40dc2d1c7e..f1d96505b3 100644 --- a/lib/std/Target/bpf.zig +++ b/lib/std/Target/bpf.zig @@ -70,4 +70,11 @@ pub const cpu = struct { .alu32, }), }; + pub const v4 = CpuModel{ + .name = "v4", + .llvm_name = "v4", + .features = featureSet(&[_]Feature{ + .alu32, + }), + }; }; diff --git a/lib/std/Target/loongarch.zig b/lib/std/Target/loongarch.zig index 65dc33d784..5a4cdeeeb7 100644 --- a/lib/std/Target/loongarch.zig +++ b/lib/std/Target/loongarch.zig @@ -7,8 +7,10 @@ const CpuModel = std.Target.Cpu.Model; pub const Feature = enum { @"32bit", @"64bit", + auto_vec, d, f, + frecipe, la_global_with_abs, la_global_with_pcrel, la_local_with_abs, @@ -16,6 +18,7 @@ pub const Feature = enum { lbt, lsx, lvz, + relax, ual, }; @@ -38,6 +41,11 @@ pub const all_features = blk: { .description = "LA64 Basic Integer and Privilege Instruction Set", .dependencies = featureSet(&[_]Feature{}), }; + result[@intFromEnum(Feature.auto_vec)] = .{ + .llvm_name = "auto-vec", + .description = "Experimental auto vectorization", + .dependencies = featureSet(&[_]Feature{}), + }; result[@intFromEnum(Feature.d)] = .{ .llvm_name = "d", .description = "'D' (Double-Precision Floating-Point)", @@ -50,6 +58,11 @@ pub const all_features = blk: { .description = "'F' (Single-Precision Floating-Point)", .dependencies = featureSet(&[_]Feature{}), }; + result[@intFromEnum(Feature.frecipe)] = .{ + .llvm_name = "frecipe", + .description = "Support frecipe.{s/d} and frsqrte.{s/d} instructions.", + .dependencies = featureSet(&[_]Feature{}), + }; result[@intFromEnum(Feature.la_global_with_abs)] = .{ .llvm_name = "la-global-with-abs", .description = "Expand la.global as la.abs", @@ -89,6 +102,11 @@ pub const all_features = blk: { .description = "'LVZ' (Loongson Virtualization Extension)", .dependencies = featureSet(&[_]Feature{}), }; + result[@intFromEnum(Feature.relax)] = .{ + .llvm_name = "relax", + .description = "Enable Linker relaxation", + .dependencies = featureSet(&[_]Feature{}), + }; result[@intFromEnum(Feature.ual)] = .{ .llvm_name = "ual", .description = "Allow memory accesses to be unaligned", diff --git a/lib/std/Target/nvptx.zig b/lib/std/Target/nvptx.zig index 99a281604b..9cc953a8d4 100644 --- a/lib/std/Target/nvptx.zig +++ b/lib/std/Target/nvptx.zig @@ -27,6 +27,8 @@ pub const Feature = enum { ptx78, ptx80, ptx81, + ptx82, + ptx83, sm_20, sm_21, sm_30, @@ -47,6 +49,7 @@ pub const Feature = enum { sm_87, sm_89, sm_90, + sm_90a, }; pub const featureSet = CpuFeature.feature_set_fns(Feature).featureSet; @@ -168,6 +171,16 @@ pub const all_features = blk: { .description = "Use PTX version 81", .dependencies = featureSet(&[_]Feature{}), }; + result[@intFromEnum(Feature.ptx82)] = .{ + .llvm_name = "ptx82", + .description = "Use PTX version 82", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.ptx83)] = .{ + .llvm_name = "ptx83", + .description = "Use PTX version 83", + .dependencies = featureSet(&[_]Feature{}), + }; result[@intFromEnum(Feature.sm_20)] = .{ .llvm_name = "sm_20", .description = "Target SM 20", @@ -268,6 +281,11 @@ pub const all_features = blk: { .description = "Target SM 90", .dependencies = featureSet(&[_]Feature{}), }; + result[@intFromEnum(Feature.sm_90a)] = .{ + .llvm_name = "sm_90a", + .description = "Target SM 90a", + .dependencies = featureSet(&[_]Feature{}), + }; const ti = @typeInfo(Feature); for (&result, 0..) |*elem, i| { elem.index = i; @@ -436,4 +454,12 @@ pub const cpu = struct { .sm_90, }), }; + pub const sm_90a = CpuModel{ + .name = "sm_90a", + .llvm_name = "sm_90a", + .features = featureSet(&[_]Feature{ + .ptx80, + .sm_90a, + }), + }; }; diff --git a/lib/std/Target/powerpc.zig b/lib/std/Target/powerpc.zig index c350c166ba..b2b3c4988e 100644 --- a/lib/std/Target/powerpc.zig +++ b/lib/std/Target/powerpc.zig @@ -8,6 +8,7 @@ pub const Feature = enum { @"64bit", @"64bitregs", aix, + aix_small_local_exec_tls, allow_unaligned_fp_access, altivec, booke, @@ -112,6 +113,11 @@ pub const all_features = blk: { .description = "AIX OS", .dependencies = featureSet(&[_]Feature{}), }; + result[@intFromEnum(Feature.aix_small_local_exec_tls)] = .{ + .llvm_name = "aix-small-local-exec-tls", + .description = "Produce a TOC-free local-exec TLS sequence for this function for 64-bit AIX", + .dependencies = featureSet(&[_]Feature{}), + }; result[@intFromEnum(Feature.allow_unaligned_fp_access)] = .{ .llvm_name = "allow-unaligned-fp-access", .description = "CPU does not trap on unaligned FP access", diff --git a/lib/std/Target/riscv.zig b/lib/std/Target/riscv.zig index 66c02ae6d0..158468b7fb 100644 --- a/lib/std/Target/riscv.zig +++ b/lib/std/Target/riscv.zig @@ -8,38 +8,28 @@ pub const Feature = enum { @"32bit", @"64bit", a, + auipc_addi_fusion, c, + conditional_cmv_fusion, d, dlen_factor_2, e, - experimental_smaia, - experimental_ssaia, + experimental, experimental_zacas, - experimental_zfa, + experimental_zcmop, experimental_zfbfmin, - experimental_zicond, - experimental_zihintntl, + experimental_zicfilp, + experimental_zicfiss, + experimental_zimop, experimental_ztso, - experimental_zvbb, - experimental_zvbc, experimental_zvfbfmin, experimental_zvfbfwma, - experimental_zvkg, - experimental_zvkn, - experimental_zvknc, - experimental_zvkned, - experimental_zvkng, - experimental_zvknha, - experimental_zvknhb, - experimental_zvks, - experimental_zvksc, - experimental_zvksed, - experimental_zvksg, - experimental_zvksh, - experimental_zvkt, f, + fast_unaligned_access, forced_atomics, h, + i, + ld_add_fusion, lui_addi_fusion, m, no_default_unroll, @@ -79,18 +69,30 @@ pub const Feature = enum { reserve_x9, save_restore, seq_cst_trailing_fence, + shifted_zextw_fusion, short_forward_branch_opt, + smaia, + smepmp, + ssaia, svinval, svnapot, svpbmt, tagged_globals, - unaligned_scalar_mem, - unaligned_vector_mem, + use_postra_scheduler, v, + ventana_veyron, + xcvalu, + xcvbi, xcvbitmanip, + xcvelw, xcvmac, - xsfcie, + xcvmem, + xcvsimd, xsfvcp, + xsfvfnrclipxfqf, + xsfvfwmaccqqq, + xsfvqmaccdod, + xsfvqmaccqoq, xtheadba, xtheadbb, xtheadbs, @@ -103,6 +105,8 @@ pub const Feature = enum { xtheadsync, xtheadvdot, xventanacondops, + za128rs, + za64rs, zawrs, zba, zbb, @@ -119,17 +123,27 @@ pub const Feature = enum { zcmp, zcmt, zdinx, + zexth_fusion, + zextw_fusion, + zfa, zfh, zfhmin, zfinx, zhinx, zhinxmin, + zic64b, zicbom, zicbop, zicboz, + ziccamoa, + ziccif, + zicclsm, + ziccrse, zicntr, + zicond, zicsr, zifencei, + zihintntl, zihintpause, zihpm, zk, @@ -143,12 +157,29 @@ pub const Feature = enum { zksh, zkt, zmmul, + zvbb, + zvbc, zve32f, zve32x, zve64d, zve64f, zve64x, zvfh, + zvfhmin, + zvkb, + zvkg, + zvkn, + zvknc, + zvkned, + zvkng, + zvknha, + zvknhb, + zvks, + zvksc, + zvksed, + zvksg, + zvksh, + zvkt, zvl1024b, zvl128b, zvl16384b, @@ -187,11 +218,21 @@ pub const all_features = blk: { .description = "'A' (Atomic Instructions)", .dependencies = featureSet(&[_]Feature{}), }; + result[@intFromEnum(Feature.auipc_addi_fusion)] = .{ + .llvm_name = "auipc-addi-fusion", + .description = "Enable AUIPC+ADDI macrofusion", + .dependencies = featureSet(&[_]Feature{}), + }; result[@intFromEnum(Feature.c)] = .{ .llvm_name = "c", .description = "'C' (Compressed Instructions)", .dependencies = featureSet(&[_]Feature{}), }; + result[@intFromEnum(Feature.conditional_cmv_fusion)] = .{ + .llvm_name = "conditional-cmv-fusion", + .description = "Enable branch+c.mv fusion", + .dependencies = featureSet(&[_]Feature{}), + }; result[@intFromEnum(Feature.d)] = .{ .llvm_name = "d", .description = "'D' (Double-Precision Floating-Point)", @@ -209,14 +250,9 @@ pub const all_features = blk: { .description = "Implements RV{32,64}E (provides 16 rather than 32 GPRs)", .dependencies = featureSet(&[_]Feature{}), }; - result[@intFromEnum(Feature.experimental_smaia)] = .{ - .llvm_name = "experimental-smaia", - .description = "'Smaia' (Smaia encompasses all added CSRs and all modifications to interrupt response behavior that the AIA specifies for a hart, over all privilege levels.)", - .dependencies = featureSet(&[_]Feature{}), - }; - result[@intFromEnum(Feature.experimental_ssaia)] = .{ - .llvm_name = "experimental-ssaia", - .description = "'Ssaia' (Ssaia is essentially the same as Smaia except excluding the machine-level CSRs and behavior not directly visible to supervisor level.)", + result[@intFromEnum(Feature.experimental)] = .{ + .llvm_name = "experimental", + .description = "Experimental intrinsics", .dependencies = featureSet(&[_]Feature{}), }; result[@intFromEnum(Feature.experimental_zacas)] = .{ @@ -224,11 +260,11 @@ pub const all_features = blk: { .description = "'Zacas' (Atomic Compare-And-Swap Instructions)", .dependencies = featureSet(&[_]Feature{}), }; - result[@intFromEnum(Feature.experimental_zfa)] = .{ - .llvm_name = "experimental-zfa", - .description = "'Zfa' (Additional Floating-Point)", + result[@intFromEnum(Feature.experimental_zcmop)] = .{ + .llvm_name = "experimental-zcmop", + .description = "'Zcmop' (Compressed May-Be-Operations)", .dependencies = featureSet(&[_]Feature{ - .f, + .zca, }), }; result[@intFromEnum(Feature.experimental_zfbfmin)] = .{ @@ -238,14 +274,22 @@ pub const all_features = blk: { .f, }), }; - result[@intFromEnum(Feature.experimental_zicond)] = .{ - .llvm_name = "experimental-zicond", - .description = "'Zicond' (Integer Conditional Operations)", + result[@intFromEnum(Feature.experimental_zicfilp)] = .{ + .llvm_name = "experimental-zicfilp", + .description = "'Zicfilp' (Landing pad)", .dependencies = featureSet(&[_]Feature{}), }; - result[@intFromEnum(Feature.experimental_zihintntl)] = .{ - .llvm_name = "experimental-zihintntl", - .description = "'Zihintntl' (Non-Temporal Locality Hints)", + result[@intFromEnum(Feature.experimental_zicfiss)] = .{ + .llvm_name = "experimental-zicfiss", + .description = "'Zicfiss' (Shadow stack)", + .dependencies = featureSet(&[_]Feature{ + .experimental_zimop, + .zicsr, + }), + }; + result[@intFromEnum(Feature.experimental_zimop)] = .{ + .llvm_name = "experimental-zimop", + .description = "'Zimop' (May-Be-Operations)", .dependencies = featureSet(&[_]Feature{}), }; result[@intFromEnum(Feature.experimental_ztso)] = .{ @@ -253,16 +297,6 @@ pub const all_features = blk: { .description = "'Ztso' (Memory Model - Total Store Order)", .dependencies = featureSet(&[_]Feature{}), }; - result[@intFromEnum(Feature.experimental_zvbb)] = .{ - .llvm_name = "experimental-zvbb", - .description = "'Zvbb' (Vector Bit-manipulation used in Cryptography)", - .dependencies = featureSet(&[_]Feature{}), - }; - result[@intFromEnum(Feature.experimental_zvbc)] = .{ - .llvm_name = "experimental-zvbc", - .description = "'Zvbc' (Vector Carryless Multiplication)", - .dependencies = featureSet(&[_]Feature{}), - }; result[@intFromEnum(Feature.experimental_zvfbfmin)] = .{ .llvm_name = "experimental-zvfbfmin", .description = "'Zvbfmin' (Vector BF16 Converts)", @@ -274,76 +308,10 @@ pub const all_features = blk: { .llvm_name = "experimental-zvfbfwma", .description = "'Zvfbfwma' (Vector BF16 widening mul-add)", .dependencies = featureSet(&[_]Feature{ - .zve32f, + .experimental_zfbfmin, + .experimental_zvfbfmin, }), }; - result[@intFromEnum(Feature.experimental_zvkg)] = .{ - .llvm_name = "experimental-zvkg", - .description = "'Zvkg' (Vector GCM instructions for Cryptography)", - .dependencies = featureSet(&[_]Feature{}), - }; - result[@intFromEnum(Feature.experimental_zvkn)] = .{ - .llvm_name = "experimental-zvkn", - .description = "This extension is shorthand for the following set of other extensions: Zvkned, Zvknhb, Zvbb, Zvbc, and Zvkt.", - .dependencies = featureSet(&[_]Feature{}), - }; - result[@intFromEnum(Feature.experimental_zvknc)] = .{ - .llvm_name = "experimental-zvknc", - .description = "This extension is shorthand for the following set of other extensions: Zvkn and Zvbc.", - .dependencies = featureSet(&[_]Feature{}), - }; - result[@intFromEnum(Feature.experimental_zvkned)] = .{ - .llvm_name = "experimental-zvkned", - .description = "'Zvkned' (Vector AES Encryption & Decryption (Single Round))", - .dependencies = featureSet(&[_]Feature{}), - }; - result[@intFromEnum(Feature.experimental_zvkng)] = .{ - .llvm_name = "experimental-zvkng", - .description = "This extension is shorthand for the following set of other extensions: Zvkn and Zvkg.", - .dependencies = featureSet(&[_]Feature{}), - }; - result[@intFromEnum(Feature.experimental_zvknha)] = .{ - .llvm_name = "experimental-zvknha", - .description = "'Zvknha' (Vector SHA-2 (SHA-256 only))", - .dependencies = featureSet(&[_]Feature{}), - }; - result[@intFromEnum(Feature.experimental_zvknhb)] = .{ - .llvm_name = "experimental-zvknhb", - .description = "'Zvknhb' (Vector SHA-2 (SHA-256 and SHA-512))", - .dependencies = featureSet(&[_]Feature{ - .experimental_zvknha, - }), - }; - result[@intFromEnum(Feature.experimental_zvks)] = .{ - .llvm_name = "experimental-zvks", - .description = "This extension is shorthand for the following set of other extensions: Zvksed, Zvksh, Zvbb, Zvbc, and Zvkt.", - .dependencies = featureSet(&[_]Feature{}), - }; - result[@intFromEnum(Feature.experimental_zvksc)] = .{ - .llvm_name = "experimental-zvksc", - .description = "This extension is shorthand for the following set of other extensions: Zvks and Zvbc.", - .dependencies = featureSet(&[_]Feature{}), - }; - result[@intFromEnum(Feature.experimental_zvksed)] = .{ - .llvm_name = "experimental-zvksed", - .description = "'Zvksed' (SM4 Block Cipher Instructions)", - .dependencies = featureSet(&[_]Feature{}), - }; - result[@intFromEnum(Feature.experimental_zvksg)] = .{ - .llvm_name = "experimental-zvksg", - .description = "This extension is shorthand for the following set of other extensions: Zvks and Zvkg.", - .dependencies = featureSet(&[_]Feature{}), - }; - result[@intFromEnum(Feature.experimental_zvksh)] = .{ - .llvm_name = "experimental-zvksh", - .description = "'Zvksh' (SM3 Hash Function Instructions)", - .dependencies = featureSet(&[_]Feature{}), - }; - result[@intFromEnum(Feature.experimental_zvkt)] = .{ - .llvm_name = "experimental-zvkt", - .description = "'Zvkt' (Vector Data-Independent Execution Latency)", - .dependencies = featureSet(&[_]Feature{}), - }; result[@intFromEnum(Feature.f)] = .{ .llvm_name = "f", .description = "'F' (Single-Precision Floating-Point)", @@ -351,6 +319,11 @@ pub const all_features = blk: { .zicsr, }), }; + result[@intFromEnum(Feature.fast_unaligned_access)] = .{ + .llvm_name = "fast-unaligned-access", + .description = "Has reasonably performant unaligned loads and stores (both scalar and vector)", + .dependencies = featureSet(&[_]Feature{}), + }; result[@intFromEnum(Feature.forced_atomics)] = .{ .llvm_name = "forced-atomics", .description = "Assume that lock-free native-width atomics are available", @@ -361,9 +334,19 @@ pub const all_features = blk: { .description = "'H' (Hypervisor)", .dependencies = featureSet(&[_]Feature{}), }; + result[@intFromEnum(Feature.i)] = .{ + .llvm_name = "i", + .description = "'I' (Base Integer Instruction Set)", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.ld_add_fusion)] = .{ + .llvm_name = "ld-add-fusion", + .description = "Enable LD+ADD macrofusion", + .dependencies = featureSet(&[_]Feature{}), + }; result[@intFromEnum(Feature.lui_addi_fusion)] = .{ .llvm_name = "lui-addi-fusion", - .description = "Enable LUI+ADDI macrofusion", + .description = "Enable LUI+ADDI macro fusion", .dependencies = featureSet(&[_]Feature{}), }; result[@intFromEnum(Feature.m)] = .{ @@ -556,11 +539,31 @@ pub const all_features = blk: { .description = "Enable trailing fence for seq-cst store.", .dependencies = featureSet(&[_]Feature{}), }; + result[@intFromEnum(Feature.shifted_zextw_fusion)] = .{ + .llvm_name = "shifted-zextw-fusion", + .description = "Enable SLLI+SRLI to be fused when computing (shifted) word zero extension", + .dependencies = featureSet(&[_]Feature{}), + }; result[@intFromEnum(Feature.short_forward_branch_opt)] = .{ .llvm_name = "short-forward-branch-opt", .description = "Enable short forward branch optimization", .dependencies = featureSet(&[_]Feature{}), }; + result[@intFromEnum(Feature.smaia)] = .{ + .llvm_name = "smaia", + .description = "'Smaia' (Advanced Interrupt Architecture Machine Level)", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.smepmp)] = .{ + .llvm_name = "smepmp", + .description = "'Smepmp' (Enhanced Physical Memory Protection)", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.ssaia)] = .{ + .llvm_name = "ssaia", + .description = "'Ssaia' (Advanced Interrupt Architecture Supervisor Level)", + .dependencies = featureSet(&[_]Feature{}), + }; result[@intFromEnum(Feature.svinval)] = .{ .llvm_name = "svinval", .description = "'Svinval' (Fine-Grained Address-Translation Cache Invalidation)", @@ -581,14 +584,9 @@ pub const all_features = blk: { .description = "Use an instruction sequence for taking the address of a global that allows a memory tag in the upper address bits", .dependencies = featureSet(&[_]Feature{}), }; - result[@intFromEnum(Feature.unaligned_scalar_mem)] = .{ - .llvm_name = "unaligned-scalar-mem", - .description = "Has reasonably performant unaligned scalar loads and stores", - .dependencies = featureSet(&[_]Feature{}), - }; - result[@intFromEnum(Feature.unaligned_vector_mem)] = .{ - .llvm_name = "unaligned-vector-mem", - .description = "Has reasonably performant unaligned vector loads and stores", + result[@intFromEnum(Feature.use_postra_scheduler)] = .{ + .llvm_name = "use-postra-scheduler", + .description = "Schedule again after register allocation", .dependencies = featureSet(&[_]Feature{}), }; result[@intFromEnum(Feature.v)] = .{ @@ -599,19 +597,44 @@ pub const all_features = blk: { .zvl128b, }), }; + result[@intFromEnum(Feature.ventana_veyron)] = .{ + .llvm_name = "ventana-veyron", + .description = "Ventana Veyron-Series processors", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.xcvalu)] = .{ + .llvm_name = "xcvalu", + .description = "'XCValu' (CORE-V ALU Operations)", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.xcvbi)] = .{ + .llvm_name = "xcvbi", + .description = "'XCVbi' (CORE-V Immediate Branching)", + .dependencies = featureSet(&[_]Feature{}), + }; result[@intFromEnum(Feature.xcvbitmanip)] = .{ .llvm_name = "xcvbitmanip", .description = "'XCVbitmanip' (CORE-V Bit Manipulation)", .dependencies = featureSet(&[_]Feature{}), }; + result[@intFromEnum(Feature.xcvelw)] = .{ + .llvm_name = "xcvelw", + .description = "'XCVelw' (CORE-V Event Load Word)", + .dependencies = featureSet(&[_]Feature{}), + }; result[@intFromEnum(Feature.xcvmac)] = .{ .llvm_name = "xcvmac", .description = "'XCVmac' (CORE-V Multiply-Accumulate)", .dependencies = featureSet(&[_]Feature{}), }; - result[@intFromEnum(Feature.xsfcie)] = .{ - .llvm_name = "xsfcie", - .description = "'XSfcie' (SiFive Custom Instruction Extension SCIE.)", + result[@intFromEnum(Feature.xcvmem)] = .{ + .llvm_name = "xcvmem", + .description = "'XCVmem' (CORE-V Post-incrementing Load & Store)", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.xcvsimd)] = .{ + .llvm_name = "xcvsimd", + .description = "'XCVsimd' (CORE-V SIMD ALU)", .dependencies = featureSet(&[_]Feature{}), }; result[@intFromEnum(Feature.xsfvcp)] = .{ @@ -621,6 +644,34 @@ pub const all_features = blk: { .zve32x, }), }; + result[@intFromEnum(Feature.xsfvfnrclipxfqf)] = .{ + .llvm_name = "xsfvfnrclipxfqf", + .description = "'XSfvfnrclipxfqf' (SiFive FP32-to-int8 Ranged Clip Instructions)", + .dependencies = featureSet(&[_]Feature{ + .zve32f, + }), + }; + result[@intFromEnum(Feature.xsfvfwmaccqqq)] = .{ + .llvm_name = "xsfvfwmaccqqq", + .description = "'XSfvfwmaccqqq' (SiFive Matrix Multiply Accumulate Instruction and 4-by-4))", + .dependencies = featureSet(&[_]Feature{ + .experimental_zvfbfmin, + }), + }; + result[@intFromEnum(Feature.xsfvqmaccdod)] = .{ + .llvm_name = "xsfvqmaccdod", + .description = "'XSfvqmaccdod' (SiFive Int8 Matrix Multiplication Instructions (2-by-8 and 8-by-2))", + .dependencies = featureSet(&[_]Feature{ + .zve32x, + }), + }; + result[@intFromEnum(Feature.xsfvqmaccqoq)] = .{ + .llvm_name = "xsfvqmaccqoq", + .description = "'XSfvqmaccqoq' (SiFive Int8 Matrix Multiplication Instructions (4-by-8 and 8-by-4))", + .dependencies = featureSet(&[_]Feature{ + .zve32x, + }), + }; result[@intFromEnum(Feature.xtheadba)] = .{ .llvm_name = "xtheadba", .description = "'xtheadba' (T-Head address calculation instructions)", @@ -685,6 +736,16 @@ pub const all_features = blk: { .description = "'XVentanaCondOps' (Ventana Conditional Ops)", .dependencies = featureSet(&[_]Feature{}), }; + result[@intFromEnum(Feature.za128rs)] = .{ + .llvm_name = "za128rs", + .description = "'Za128rs' (Reservation Set Size of at Most 128 Bytes)", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.za64rs)] = .{ + .llvm_name = "za64rs", + .description = "'Za64rs' (Reservation Set Size of at Most 64 Bytes)", + .dependencies = featureSet(&[_]Feature{}), + }; result[@intFromEnum(Feature.zawrs)] = .{ .llvm_name = "zawrs", .description = "'Zawrs' (Wait on Reservation Set)", @@ -782,11 +843,28 @@ pub const all_features = blk: { .zfinx, }), }; + result[@intFromEnum(Feature.zexth_fusion)] = .{ + .llvm_name = "zexth-fusion", + .description = "Enable SLLI+SRLI to be fused to zero extension of halfword", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.zextw_fusion)] = .{ + .llvm_name = "zextw-fusion", + .description = "Enable SLLI+SRLI to be fused to zero extension of word", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.zfa)] = .{ + .llvm_name = "zfa", + .description = "'Zfa' (Additional Floating-Point)", + .dependencies = featureSet(&[_]Feature{ + .f, + }), + }; result[@intFromEnum(Feature.zfh)] = .{ .llvm_name = "zfh", .description = "'Zfh' (Half-Precision Floating-Point)", .dependencies = featureSet(&[_]Feature{ - .f, + .zfhmin, }), }; result[@intFromEnum(Feature.zfhmin)] = .{ @@ -807,7 +885,7 @@ pub const all_features = blk: { .llvm_name = "zhinx", .description = "'Zhinx' (Half Float in Integer)", .dependencies = featureSet(&[_]Feature{ - .zfinx, + .zhinxmin, }), }; result[@intFromEnum(Feature.zhinxmin)] = .{ @@ -817,6 +895,11 @@ pub const all_features = blk: { .zfinx, }), }; + result[@intFromEnum(Feature.zic64b)] = .{ + .llvm_name = "zic64b", + .description = "'Zic64b' (Cache Block Size Is 64 Bytes)", + .dependencies = featureSet(&[_]Feature{}), + }; result[@intFromEnum(Feature.zicbom)] = .{ .llvm_name = "zicbom", .description = "'Zicbom' (Cache-Block Management Instructions)", @@ -832,6 +915,26 @@ pub const all_features = blk: { .description = "'Zicboz' (Cache-Block Zero Instructions)", .dependencies = featureSet(&[_]Feature{}), }; + result[@intFromEnum(Feature.ziccamoa)] = .{ + .llvm_name = "ziccamoa", + .description = "'Ziccamoa' (Main Memory Supports All Atomics in A)", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.ziccif)] = .{ + .llvm_name = "ziccif", + .description = "'Ziccif' (Main Memory Supports Instruction Fetch with Atomicity Requirement)", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.zicclsm)] = .{ + .llvm_name = "zicclsm", + .description = "'Zicclsm' (Main Memory Supports Misaligned Loads/Stores)", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.ziccrse)] = .{ + .llvm_name = "ziccrse", + .description = "'Ziccrse' (Main Memory Supports Forward Progress on LR/SC Sequences)", + .dependencies = featureSet(&[_]Feature{}), + }; result[@intFromEnum(Feature.zicntr)] = .{ .llvm_name = "zicntr", .description = "'Zicntr' (Base Counters and Timers)", @@ -839,6 +942,11 @@ pub const all_features = blk: { .zicsr, }), }; + result[@intFromEnum(Feature.zicond)] = .{ + .llvm_name = "zicond", + .description = "'Zicond' (Integer Conditional Operations)", + .dependencies = featureSet(&[_]Feature{}), + }; result[@intFromEnum(Feature.zicsr)] = .{ .llvm_name = "zicsr", .description = "'zicsr' (CSRs)", @@ -849,6 +957,11 @@ pub const all_features = blk: { .description = "'Zifencei' (fence.i)", .dependencies = featureSet(&[_]Feature{}), }; + result[@intFromEnum(Feature.zihintntl)] = .{ + .llvm_name = "zihintntl", + .description = "'Zihintntl' (Non-Temporal Locality Hints)", + .dependencies = featureSet(&[_]Feature{}), + }; result[@intFromEnum(Feature.zihintpause)] = .{ .llvm_name = "zihintpause", .description = "'Zihintpause' (Pause Hint)", @@ -933,6 +1046,18 @@ pub const all_features = blk: { .description = "'Zmmul' (Integer Multiplication)", .dependencies = featureSet(&[_]Feature{}), }; + result[@intFromEnum(Feature.zvbb)] = .{ + .llvm_name = "zvbb", + .description = "'Zvbb' (Vector basic bit-manipulation instructions)", + .dependencies = featureSet(&[_]Feature{ + .zvkb, + }), + }; + result[@intFromEnum(Feature.zvbc)] = .{ + .llvm_name = "zvbc", + .description = "'Zvbc' (Vector Carryless Multiplication)", + .dependencies = featureSet(&[_]Feature{}), + }; result[@intFromEnum(Feature.zve32f)] = .{ .llvm_name = "zve32f", .description = "'Zve32f' (Vector Extensions for Embedded Processors with maximal 32 EEW and F extension)", @@ -978,9 +1103,110 @@ pub const all_features = blk: { .description = "'Zvfh' (Vector Half-Precision Floating-Point)", .dependencies = featureSet(&[_]Feature{ .zfhmin, + .zvfhmin, + }), + }; + result[@intFromEnum(Feature.zvfhmin)] = .{ + .llvm_name = "zvfhmin", + .description = "'Zvfhmin' (Vector Half-Precision Floating-Point Minimal)", + .dependencies = featureSet(&[_]Feature{ .zve32f, }), }; + result[@intFromEnum(Feature.zvkb)] = .{ + .llvm_name = "zvkb", + .description = "'Zvkb' (Vector Bit-manipulation used in Cryptography)", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.zvkg)] = .{ + .llvm_name = "zvkg", + .description = "'Zvkg' (Vector GCM instructions for Cryptography)", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.zvkn)] = .{ + .llvm_name = "zvkn", + .description = "'Zvkn' (shorthand for 'Zvkned', 'Zvknhb', 'Zvkb', and 'Zvkt')", + .dependencies = featureSet(&[_]Feature{ + .zvkb, + .zvkned, + .zvknhb, + .zvkt, + }), + }; + result[@intFromEnum(Feature.zvknc)] = .{ + .llvm_name = "zvknc", + .description = "'Zvknc' (shorthand for 'Zvknc' and 'Zvbc')", + .dependencies = featureSet(&[_]Feature{ + .zvbc, + .zvkn, + }), + }; + result[@intFromEnum(Feature.zvkned)] = .{ + .llvm_name = "zvkned", + .description = "'Zvkned' (Vector AES Encryption & Decryption (Single Round))", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.zvkng)] = .{ + .llvm_name = "zvkng", + .description = "'zvkng' (shorthand for 'Zvkn' and 'Zvkg')", + .dependencies = featureSet(&[_]Feature{ + .zvkg, + .zvkn, + }), + }; + result[@intFromEnum(Feature.zvknha)] = .{ + .llvm_name = "zvknha", + .description = "'Zvknha' (Vector SHA-2 (SHA-256 only))", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.zvknhb)] = .{ + .llvm_name = "zvknhb", + .description = "'Zvknhb' (Vector SHA-2 (SHA-256 and SHA-512))", + .dependencies = featureSet(&[_]Feature{ + .zve64x, + }), + }; + result[@intFromEnum(Feature.zvks)] = .{ + .llvm_name = "zvks", + .description = "'Zvks' (shorthand for 'Zvksed', 'Zvksh', 'Zvkb', and 'Zvkt')", + .dependencies = featureSet(&[_]Feature{ + .zvkb, + .zvksed, + .zvksh, + .zvkt, + }), + }; + result[@intFromEnum(Feature.zvksc)] = .{ + .llvm_name = "zvksc", + .description = "'Zvksc' (shorthand for 'Zvks' and 'Zvbc')", + .dependencies = featureSet(&[_]Feature{ + .zvbc, + .zvks, + }), + }; + result[@intFromEnum(Feature.zvksed)] = .{ + .llvm_name = "zvksed", + .description = "'Zvksed' (SM4 Block Cipher Instructions)", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.zvksg)] = .{ + .llvm_name = "zvksg", + .description = "'Zvksg' (shorthand for 'Zvks' and 'Zvkg')", + .dependencies = featureSet(&[_]Feature{ + .zvkg, + .zvks, + }), + }; + result[@intFromEnum(Feature.zvksh)] = .{ + .llvm_name = "zvksh", + .description = "'Zvksh' (SM3 Hash Function Instructions)", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.zvkt)] = .{ + .llvm_name = "zvkt", + .description = "'Zvkt' (Vector Data-Independent Execution Latency)", + .dependencies = featureSet(&[_]Feature{}), + }; result[@intFromEnum(Feature.zvl1024b)] = .{ .llvm_name = "zvl1024b", .description = "'Zvl' (Minimum Vector Length) 1024", @@ -1217,6 +1443,77 @@ pub const cpu = struct { .zifencei, }), }; + pub const sifive_p450 = CpuModel{ + .name = "sifive_p450", + .llvm_name = "sifive-p450", + .features = featureSet(&[_]Feature{ + .@"64bit", + .a, + .auipc_addi_fusion, + .c, + .conditional_cmv_fusion, + .d, + .fast_unaligned_access, + .lui_addi_fusion, + .m, + .no_default_unroll, + .za64rs, + .zba, + .zbb, + .zbs, + .zfhmin, + .zic64b, + .zicbom, + .zicbop, + .zicboz, + .ziccamoa, + .ziccif, + .zicclsm, + .ziccrse, + .zifencei, + .zihintntl, + .zihintpause, + .zihpm, + }), + }; + pub const sifive_p670 = CpuModel{ + .name = "sifive_p670", + .llvm_name = "sifive-p670", + .features = featureSet(&[_]Feature{ + .@"64bit", + .a, + .auipc_addi_fusion, + .c, + .conditional_cmv_fusion, + .fast_unaligned_access, + .lui_addi_fusion, + .m, + .no_default_unroll, + .v, + .za64rs, + .zba, + .zbb, + .zbs, + .zfhmin, + .zic64b, + .zicbom, + .zicbop, + .zicboz, + .ziccamoa, + .ziccif, + .zicclsm, + .ziccrse, + .zifencei, + .zihintntl, + .zihintpause, + .zihpm, + .zvbb, + .zvknc, + .zvkng, + .zvksc, + .zvksg, + }), + }; pub const sifive_s21 = CpuModel{ .name = "sifive_s21", .llvm_name = "sifive-s21", @@ -1264,7 +1561,6 @@ pub const cpu = struct { .m, .no_default_unroll, .short_forward_branch_opt, - .xsfcie, .zifencei, .zihintpause, }), @@ -1338,4 +1634,56 @@ pub const cpu = struct { .zifencei, }), }; + pub const veyron_v1 = CpuModel{ + .name = "veyron_v1", + .llvm_name = "veyron-v1", + .features = featureSet(&[_]Feature{ + .@"64bit", + .a, + .auipc_addi_fusion, + .c, + .d, + .ld_add_fusion, + .lui_addi_fusion, + .m, + .shifted_zextw_fusion, + .ventana_veyron, + .xventanacondops, + .zba, + .zbb, + .zbc, + .zbs, + .zexth_fusion, + .zextw_fusion, + .zicbom, + .zicbop, + .zicboz, + .zicntr, + .zifencei, + .zihintpause, + .zihpm, + }), + }; + pub const xiangshan_nanhu = CpuModel{ + .name = "xiangshan_nanhu", + .llvm_name = "xiangshan-nanhu", + .features = featureSet(&[_]Feature{ + .@"64bit", + .a, + .c, + .d, + .m, + .svinval, + .zba, + .zbb, + .zbc, + .zbs, + .zicbom, + .zicboz, + .zifencei, + .zkn, + .zksed, + .zksh, + }), + }; }; diff --git a/lib/std/Target/s390x.zig b/lib/std/Target/s390x.zig index b642847258..c6974b9213 100644 --- a/lib/std/Target/s390x.zig +++ b/lib/std/Target/s390x.zig @@ -5,6 +5,7 @@ const CpuFeature = std.Target.Cpu.Feature; const CpuModel = std.Target.Cpu.Model; pub const Feature = enum { + backchain, bear_enhancement, deflate_conversion, dfp_packed_conversion, @@ -57,6 +58,11 @@ pub const all_features = blk: { const len = @typeInfo(Feature).Enum.fields.len; std.debug.assert(len <= CpuFeature.Set.needed_bit_count); var result: [len]CpuFeature = undefined; + result[@intFromEnum(Feature.backchain)] = .{ + .llvm_name = "backchain", + .description = "Store the address of the caller's frame into the callee's stack frame", + .dependencies = featureSet(&[_]Feature{}), + }; result[@intFromEnum(Feature.bear_enhancement)] = .{ .llvm_name = "bear-enhancement", .description = "Assume that the BEAR-enhancement facility is installed", diff --git a/lib/std/Target/sparc.zig b/lib/std/Target/sparc.zig index 87bd95697c..70b55089d2 100644 --- a/lib/std/Target/sparc.zig +++ b/lib/std/Target/sparc.zig @@ -18,6 +18,34 @@ pub const Feature = enum { no_fmuls, no_fsmuld, popc, + reserve_g1, + reserve_g2, + reserve_g3, + reserve_g4, + reserve_g5, + reserve_g6, + reserve_g7, + reserve_i0, + reserve_i1, + reserve_i2, + reserve_i3, + reserve_i4, + reserve_i5, + reserve_l0, + reserve_l1, + reserve_l2, + reserve_l3, + reserve_l4, + reserve_l5, + reserve_l6, + reserve_l7, + reserve_o0, + reserve_o1, + reserve_o2, + reserve_o3, + reserve_o4, + reserve_o5, + slow_rdpc, soft_float, soft_mul_div, v9, @@ -100,6 +128,148 @@ pub const all_features = blk: { .description = "Use the popc (population count) instruction", .dependencies = featureSet(&[_]Feature{}), }; + result[@intFromEnum(Feature.reserve_g1)] = .{ + .llvm_name = "reserve-g1", + .description = "Reserve G1, making it unavailable as a GPR", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.reserve_g2)] = .{ + .llvm_name = "reserve-g2", + .description = "Reserve G2, making it unavailable as a GPR", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.reserve_g3)] = .{ + .llvm_name = "reserve-g3", + .description = "Reserve G3, making it unavailable as a GPR", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.reserve_g4)] = .{ + .llvm_name = "reserve-g4", + .description = "Reserve G4, making it unavailable as a GPR", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.reserve_g5)] = .{ + .llvm_name = "reserve-g5", + .description = "Reserve G5, making it unavailable as a GPR", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.reserve_g6)] = .{ + .llvm_name = "reserve-g6", + .description = "Reserve G6, making it unavailable as a GPR", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.reserve_g7)] = .{ + .llvm_name = "reserve-g7", + .description = "Reserve G7, making it unavailable as a GPR", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.reserve_i0)] = .{ + .llvm_name = "reserve-i0", + .description = "Reserve I0, making it unavailable as a GPR", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.reserve_i1)] = .{ + .llvm_name = "reserve-i1", + .description = "Reserve I1, making it unavailable as a GPR", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.reserve_i2)] = .{ + .llvm_name = "reserve-i2", + .description = "Reserve I2, making it unavailable as a GPR", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.reserve_i3)] = .{ + .llvm_name = "reserve-i3", + .description = "Reserve I3, making it unavailable as a GPR", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.reserve_i4)] = .{ + .llvm_name = "reserve-i4", + .description = "Reserve I4, making it unavailable as a GPR", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.reserve_i5)] = .{ + .llvm_name = "reserve-i5", + .description = "Reserve I5, making it unavailable as a GPR", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.reserve_l0)] = .{ + .llvm_name = "reserve-l0", + .description = "Reserve L0, making it unavailable as a GPR", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.reserve_l1)] = .{ + .llvm_name = "reserve-l1", + .description = "Reserve L1, making it unavailable as a GPR", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.reserve_l2)] = .{ + .llvm_name = "reserve-l2", + .description = "Reserve L2, making it unavailable as a GPR", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.reserve_l3)] = .{ + .llvm_name = "reserve-l3", + .description = "Reserve L3, making it unavailable as a GPR", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.reserve_l4)] = .{ + .llvm_name = "reserve-l4", + .description = "Reserve L4, making it unavailable as a GPR", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.reserve_l5)] = .{ + .llvm_name = "reserve-l5", + .description = "Reserve L5, making it unavailable as a GPR", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.reserve_l6)] = .{ + .llvm_name = "reserve-l6", + .description = "Reserve L6, making it unavailable as a GPR", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.reserve_l7)] = .{ + .llvm_name = "reserve-l7", + .description = "Reserve L7, making it unavailable as a GPR", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.reserve_o0)] = .{ + .llvm_name = "reserve-o0", + .description = "Reserve O0, making it unavailable as a GPR", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.reserve_o1)] = .{ + .llvm_name = "reserve-o1", + .description = "Reserve O1, making it unavailable as a GPR", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.reserve_o2)] = .{ + .llvm_name = "reserve-o2", + .description = "Reserve O2, making it unavailable as a GPR", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.reserve_o3)] = .{ + .llvm_name = "reserve-o3", + .description = "Reserve O3, making it unavailable as a GPR", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.reserve_o4)] = .{ + .llvm_name = "reserve-o4", + .description = "Reserve O4, making it unavailable as a GPR", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.reserve_o5)] = .{ + .llvm_name = "reserve-o5", + .description = "Reserve O5, making it unavailable as a GPR", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.slow_rdpc)] = .{ + .llvm_name = "slow-rdpc", + .description = "rd %pc, %XX is slow", + .dependencies = featureSet(&[_]Feature{ + .v9, + }), + }; result[@intFromEnum(Feature.soft_float)] = .{ .llvm_name = "soft-float", .description = "Use software emulation for floating point", @@ -407,7 +577,7 @@ pub const cpu = struct { .llvm_name = "ultrasparc", .features = featureSet(&[_]Feature{ .deprecated_v8, - .v9, + .slow_rdpc, .vis, }), }; @@ -416,7 +586,7 @@ pub const cpu = struct { .llvm_name = "ultrasparc3", .features = featureSet(&[_]Feature{ .deprecated_v8, - .v9, + .slow_rdpc, .vis, .vis2, }), diff --git a/lib/std/Target/ve.zig b/lib/std/Target/ve.zig index 09ee056ef9..6aa5c3f61c 100644 --- a/lib/std/Target/ve.zig +++ b/lib/std/Target/ve.zig @@ -34,6 +34,8 @@ pub const cpu = struct { pub const generic = CpuModel{ .name = "generic", .llvm_name = "generic", - .features = featureSet(&[_]Feature{}), + .features = featureSet(&[_]Feature{ + .vpu, + }), }; }; diff --git a/lib/std/Target/wasm.zig b/lib/std/Target/wasm.zig index a06d37cf7d..efc25babe9 100644 --- a/lib/std/Target/wasm.zig +++ b/lib/std/Target/wasm.zig @@ -9,6 +9,7 @@ pub const Feature = enum { bulk_memory, exception_handling, extended_const, + multimemory, multivalue, mutable_globals, nontrapping_fptoint, @@ -48,6 +49,11 @@ pub const all_features = blk: { .description = "Enable extended const expressions", .dependencies = featureSet(&[_]Feature{}), }; + result[@intFromEnum(Feature.multimemory)] = .{ + .llvm_name = "multimemory", + .description = "Enable multiple memories", + .dependencies = featureSet(&[_]Feature{}), + }; result[@intFromEnum(Feature.multivalue)] = .{ .llvm_name = "multivalue", .description = "Enable multivalue blocks, instructions, and functions", diff --git a/lib/std/Target/x86.zig b/lib/std/Target/x86.zig index 43ec04f41c..a95b37d070 100644 --- a/lib/std/Target/x86.zig +++ b/lib/std/Target/x86.zig @@ -19,6 +19,8 @@ pub const Feature = enum { amx_int8, amx_tile, avx, + avx10_1_256, + avx10_1_512, avx2, avx512bf16, avx512bitalg, @@ -44,6 +46,8 @@ pub const Feature = enum { bmi, bmi2, branchfusion, + ccmp, + cf, cldemote, clflushopt, clwb, @@ -53,8 +57,10 @@ pub const Feature = enum { crc32, cx16, cx8, + egpr, enqcmd, ermsb, + evex512, f16c, false_deps_getmant, false_deps_lzcnt_tzcnt, @@ -104,6 +110,7 @@ pub const Feature = enum { movdir64b, movdiri, mwaitx, + ndd, no_bypass_delay, no_bypass_delay_blend, no_bypass_delay_mov, @@ -114,14 +121,18 @@ pub const Feature = enum { pconfig, pku, popcnt, + ppx, prefer_128_bit, prefer_256_bit, prefer_mask_registers, prefer_movmsk_over_vtest, + prefer_no_gather, + prefer_no_scatter, prefetchi, prefetchwt1, prfchw, ptwrite, + push2pop2, raoint, rdpid, rdpru, @@ -167,6 +178,7 @@ pub const Feature = enum { uintr, use_glm_div_sqrt_costs, use_slm_arith_costs, + usermsr, vaes, vpclmulqdq, vzeroupper, @@ -276,6 +288,31 @@ pub const all_features = blk: { .sse4_2, }), }; + result[@intFromEnum(Feature.avx10_1_256)] = .{ + .llvm_name = "avx10.1-256", + .description = "Support AVX10.1 up to 256-bit instruction", + .dependencies = featureSet(&[_]Feature{ + .avx512bf16, + .avx512bitalg, + .avx512cd, + .avx512fp16, + .avx512ifma, + .avx512vbmi, + .avx512vbmi2, + .avx512vnni, + .avx512vpopcntdq, + .vaes, + .vpclmulqdq, + }), + }; + result[@intFromEnum(Feature.avx10_1_512)] = .{ + .llvm_name = "avx10.1-512", + .description = "Support AVX10.1 up to 512-bit instruction", + .dependencies = featureSet(&[_]Feature{ + .avx10_1_256, + .evex512, + }), + }; result[@intFromEnum(Feature.avx2)] = .{ .llvm_name = "avx2", .description = "Enable AVX2 instructions", @@ -449,6 +486,16 @@ pub const all_features = blk: { .description = "CMP/TEST can be fused with conditional branches", .dependencies = featureSet(&[_]Feature{}), }; + result[@intFromEnum(Feature.ccmp)] = .{ + .llvm_name = "ccmp", + .description = "Support conditional cmp & test instructions", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.cf)] = .{ + .llvm_name = "cf", + .description = "Support conditional faulting", + .dependencies = featureSet(&[_]Feature{}), + }; result[@intFromEnum(Feature.cldemote)] = .{ .llvm_name = "cldemote", .description = "Enable Cache Line Demote", @@ -496,6 +543,11 @@ pub const all_features = blk: { .description = "Support CMPXCHG8B instructions", .dependencies = featureSet(&[_]Feature{}), }; + result[@intFromEnum(Feature.egpr)] = .{ + .llvm_name = "egpr", + .description = "Support extended general purpose register", + .dependencies = featureSet(&[_]Feature{}), + }; result[@intFromEnum(Feature.enqcmd)] = .{ .llvm_name = "enqcmd", .description = "Has ENQCMD instructions", @@ -506,6 +558,11 @@ pub const all_features = blk: { .description = "REP MOVS/STOS are fast", .dependencies = featureSet(&[_]Feature{}), }; + result[@intFromEnum(Feature.evex512)] = .{ + .llvm_name = "evex512", + .description = "Support ZMM and 64-bit mask instructions", + .dependencies = featureSet(&[_]Feature{}), + }; result[@intFromEnum(Feature.f16c)] = .{ .llvm_name = "f16c", .description = "Support 16-bit floating point conversion instructions", @@ -762,6 +819,11 @@ pub const all_features = blk: { .description = "Enable MONITORX/MWAITX timer functionality", .dependencies = featureSet(&[_]Feature{}), }; + result[@intFromEnum(Feature.ndd)] = .{ + .llvm_name = "ndd", + .description = "Support non-destructive destination", + .dependencies = featureSet(&[_]Feature{}), + }; result[@intFromEnum(Feature.no_bypass_delay)] = .{ .llvm_name = "no-bypass-delay", .description = "Has no bypass delay when using the 'wrong' domain", @@ -814,6 +876,11 @@ pub const all_features = blk: { .description = "Support POPCNT instruction", .dependencies = featureSet(&[_]Feature{}), }; + result[@intFromEnum(Feature.ppx)] = .{ + .llvm_name = "ppx", + .description = "Support Push-Pop Acceleration", + .dependencies = featureSet(&[_]Feature{}), + }; result[@intFromEnum(Feature.prefer_128_bit)] = .{ .llvm_name = "prefer-128-bit", .description = "Prefer 128-bit AVX instructions", @@ -834,6 +901,16 @@ pub const all_features = blk: { .description = "Prefer movmsk over vtest instruction", .dependencies = featureSet(&[_]Feature{}), }; + result[@intFromEnum(Feature.prefer_no_gather)] = .{ + .llvm_name = "prefer-no-gather", + .description = "Prefer no gather instructions", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.prefer_no_scatter)] = .{ + .llvm_name = "prefer-no-scatter", + .description = "Prefer no scatter instructions", + .dependencies = featureSet(&[_]Feature{}), + }; result[@intFromEnum(Feature.prefetchi)] = .{ .llvm_name = "prefetchi", .description = "Prefetch instruction with T0 or T1 Hint", @@ -854,6 +931,11 @@ pub const all_features = blk: { .description = "Support ptwrite instruction", .dependencies = featureSet(&[_]Feature{}), }; + result[@intFromEnum(Feature.push2pop2)] = .{ + .llvm_name = "push2pop2", + .description = "Support PUSH2/POP2 instructions", + .dependencies = featureSet(&[_]Feature{}), + }; result[@intFromEnum(Feature.raoint)] = .{ .llvm_name = "raoint", .description = "Support RAO-INT instructions", @@ -947,7 +1029,7 @@ pub const all_features = blk: { .llvm_name = "sha512", .description = "Support SHA512 instructions", .dependencies = featureSet(&[_]Feature{ - .avx, + .avx2, }), }; result[@intFromEnum(Feature.shstk)] = .{ @@ -1011,7 +1093,7 @@ pub const all_features = blk: { .llvm_name = "sm4", .description = "Support SM4 instructions", .dependencies = featureSet(&[_]Feature{ - .avx, + .avx2, }), }; result[@intFromEnum(Feature.soft_float)] = .{ @@ -1106,12 +1188,17 @@ pub const all_features = blk: { .description = "Use Silvermont specific arithmetic costs", .dependencies = featureSet(&[_]Feature{}), }; + result[@intFromEnum(Feature.usermsr)] = .{ + .llvm_name = "usermsr", + .description = "Support USERMSR instructions", + .dependencies = featureSet(&[_]Feature{}), + }; result[@intFromEnum(Feature.vaes)] = .{ .llvm_name = "vaes", .description = "Promote selected AES instructions to AVX512/AVX registers", .dependencies = featureSet(&[_]Feature{ .aes, - .avx, + .avx2, }), }; result[@intFromEnum(Feature.vpclmulqdq)] = .{ @@ -1282,6 +1369,239 @@ pub const cpu = struct { .x87, }), }; + pub const arrowlake = CpuModel{ + .name = "arrowlake", + .llvm_name = "arrowlake", + .features = featureSet(&[_]Feature{ + .@"64bit", + .adx, + .allow_light_256_bit, + .avxifma, + .avxneconvert, + .avxvnni, + .avxvnniint8, + .bmi, + .bmi2, + .cldemote, + .clflushopt, + .clwb, + .cmov, + .cmpccxadd, + .crc32, + .cx16, + .enqcmd, + .f16c, + .false_deps_perm, + .false_deps_popcnt, + .fast_15bytenop, + .fast_gather, + .fast_scalar_fsqrt, + .fast_shld_rotate, + .fast_variable_crosslane_shuffle, + .fast_variable_perlane_shuffle, + .fast_vector_fsqrt, + .fma, + .fsgsbase, + .fxsr, + .gfni, + .hreset, + .idivq_to_divl, + .invpcid, + .lzcnt, + .macrofusion, + .mmx, + .movbe, + .movdir64b, + .movdiri, + .no_bypass_delay_blend, + .no_bypass_delay_mov, + .no_bypass_delay_shuffle, + .nopl, + .pconfig, + .pku, + .popcnt, + .prefer_movmsk_over_vtest, + .prfchw, + .ptwrite, + .rdpid, + .rdrnd, + .rdseed, + .sahf, + .serialize, + .sha, + .shstk, + .slow_3ops_lea, + .tuning_fast_imm_vector_shift, + .uintr, + .vaes, + .vpclmulqdq, + .vzeroupper, + .waitpkg, + .widekl, + .x87, + .xsavec, + .xsaveopt, + .xsaves, + }), + }; + pub const arrowlake_s = CpuModel{ + .name = "arrowlake_s", + .llvm_name = "arrowlake-s", + .features = featureSet(&[_]Feature{ + .@"64bit", + .adx, + .allow_light_256_bit, + .avxifma, + .avxneconvert, + .avxvnni, + .avxvnniint16, + .avxvnniint8, + .bmi, + .bmi2, + .cldemote, + .clflushopt, + .clwb, + .cmov, + .cmpccxadd, + .crc32, + .cx16, + .enqcmd, + .f16c, + .false_deps_perm, + .false_deps_popcnt, + .fast_15bytenop, + .fast_gather, + .fast_scalar_fsqrt, + .fast_shld_rotate, + .fast_variable_crosslane_shuffle, + .fast_variable_perlane_shuffle, + .fast_vector_fsqrt, + .fma, + .fsgsbase, + .fxsr, + .gfni, + .hreset, + .idivq_to_divl, + .invpcid, + .lzcnt, + .macrofusion, + .mmx, + .movbe, + .movdir64b, + .movdiri, + .no_bypass_delay_blend, + .no_bypass_delay_mov, + .no_bypass_delay_shuffle, + .nopl, + .pconfig, + .pku, + .popcnt, + .prefer_movmsk_over_vtest, + .prfchw, + .ptwrite, + .rdpid, + .rdrnd, + .rdseed, + .sahf, + .serialize, + .sha, + .sha512, + .shstk, + .slow_3ops_lea, + .sm3, + .sm4, + .tuning_fast_imm_vector_shift, + .uintr, + .vaes, + .vpclmulqdq, + .vzeroupper, + .waitpkg, + .widekl, + .x87, + .xsavec, + .xsaveopt, + .xsaves, + }), + }; + pub const arrowlake_s = CpuModel{ + .name = "arrowlake_s", + .llvm_name = "arrowlake_s", + .features = featureSet(&[_]Feature{ + .@"64bit", + .adx, + .allow_light_256_bit, + .avxifma, + .avxneconvert, + .avxvnni, + .avxvnniint16, + .avxvnniint8, + .bmi, + .bmi2, + .cldemote, + .clflushopt, + .clwb, + .cmov, + .cmpccxadd, + .crc32, + .cx16, + .enqcmd, + .f16c, + .false_deps_perm, + .false_deps_popcnt, + .fast_15bytenop, + .fast_gather, + .fast_scalar_fsqrt, + .fast_shld_rotate, + .fast_variable_crosslane_shuffle, + .fast_variable_perlane_shuffle, + .fast_vector_fsqrt, + .fma, + .fsgsbase, + .fxsr, + .gfni, + .hreset, + .idivq_to_divl, + .invpcid, + .lzcnt, + .macrofusion, + .mmx, + .movbe, + .movdir64b, + .movdiri, + .no_bypass_delay_blend, + .no_bypass_delay_mov, + .no_bypass_delay_shuffle, + .nopl, + .pconfig, + .pku, + .popcnt, + .prefer_movmsk_over_vtest, + .prfchw, + .ptwrite, + .rdpid, + .rdrnd, + .rdseed, + .sahf, + .serialize, + .sha, + .sha512, + .shstk, + .slow_3ops_lea, + .sm3, + .sm4, + .tuning_fast_imm_vector_shift, + .uintr, + .vaes, + .vpclmulqdq, + .vzeroupper, + .waitpkg, + .widekl, + .x87, + .xsavec, + .xsaveopt, + .xsaves, + }), + }; pub const athlon = CpuModel{ .name = "athlon", .llvm_name = "athlon", @@ -1817,6 +2137,7 @@ pub const cpu = struct { .crc32, .cx16, .ermsb, + .evex512, .fast_15bytenop, .fast_gather, .fast_scalar_fsqrt, @@ -1875,6 +2196,7 @@ pub const cpu = struct { .crc32, .cx16, .ermsb, + .evex512, .false_deps_popcnt, .fast_15bytenop, .fast_gather, @@ -1913,6 +2235,87 @@ pub const cpu = struct { .xsaves, }), }; + pub const clearwaterforest = CpuModel{ + .name = "clearwaterforest", + .llvm_name = "clearwaterforest", + .features = featureSet(&[_]Feature{ + .@"64bit", + .adx, + .allow_light_256_bit, + .avxifma, + .avxneconvert, + .avxvnni, + .avxvnniint16, + .avxvnniint8, + .bmi, + .bmi2, + .cldemote, + .clflushopt, + .clwb, + .cmov, + .cmpccxadd, + .crc32, + .cx16, + .enqcmd, + .f16c, + .false_deps_perm, + .false_deps_popcnt, + .fast_15bytenop, + .fast_gather, + .fast_scalar_fsqrt, + .fast_shld_rotate, + .fast_variable_crosslane_shuffle, + .fast_variable_perlane_shuffle, + .fast_vector_fsqrt, + .fma, + .fsgsbase, + .fxsr, + .gfni, + .hreset, + .idivq_to_divl, + .invpcid, + .lzcnt, + .macrofusion, + .mmx, + .movbe, + .movdir64b, + .movdiri, + .no_bypass_delay_blend, + .no_bypass_delay_mov, + .no_bypass_delay_shuffle, + .nopl, + .pconfig, + .pku, + .popcnt, + .prefer_movmsk_over_vtest, + .prefetchi, + .prfchw, + .ptwrite, + .rdpid, + .rdrnd, + .rdseed, + .sahf, + .serialize, + .sha, + .sha512, + .shstk, + .slow_3ops_lea, + .sm3, + .sm4, + .tuning_fast_imm_vector_shift, + .uintr, + .usermsr, + .vaes, + .vpclmulqdq, + .vzeroupper, + .waitpkg, + .widekl, + .x87, + .xsavec, + .xsaveopt, + .xsaves, + }), + }; pub const cooperlake = CpuModel{ .name = "cooperlake", .llvm_name = "cooperlake", @@ -1934,6 +2337,7 @@ pub const cpu = struct { .crc32, .cx16, .ermsb, + .evex512, .false_deps_popcnt, .fast_15bytenop, .fast_gather, @@ -2039,6 +2443,7 @@ pub const cpu = struct { .cx16, .enqcmd, .ermsb, + .evex512, .false_deps_getmant, .false_deps_mulc, .false_deps_mullq, @@ -2195,6 +2600,66 @@ pub const cpu = struct { .xsaves, }), }; + pub const gracemont = CpuModel{ + .name = "gracemont", + .llvm_name = "gracemont", + .features = featureSet(&[_]Feature{ + .@"64bit", + .adx, + .avxvnni, + .bmi, + .bmi2, + .cldemote, + .clflushopt, + .clwb, + .cmov, + .crc32, + .cx16, + .f16c, + .false_deps_popcnt, + .fast_15bytenop, + .fast_scalar_fsqrt, + .fast_variable_perlane_shuffle, + .fast_vector_fsqrt, + .fma, + .fsgsbase, + .fxsr, + .gfni, + .hreset, + .idivl_to_divb, + .idivq_to_divl, + .invpcid, + .lzcnt, + .macrofusion, + .mmx, + .movbe, + .movdir64b, + .movdiri, + .nopl, + .pconfig, + .pku, + .popcnt, + .prfchw, + .ptwrite, + .rdpid, + .rdrnd, + .rdseed, + .sahf, + .serialize, + .sha, + .shstk, + .slow_3ops_lea, + .vaes, + .vpclmulqdq, + .vzeroupper, + .waitpkg, + .widekl, + .x87, + .xsavec, + .xsaveopt, + .xsaves, + }), + }; pub const grandridge = CpuModel{ .name = "grandridge", .llvm_name = "grandridge", @@ -2235,7 +2700,6 @@ pub const cpu = struct { .popcnt, .prfchw, .ptwrite, - .raoint, .rdpid, .rdrnd, .rdseed, @@ -2289,6 +2753,7 @@ pub const cpu = struct { .cx16, .enqcmd, .ermsb, + .evex512, .false_deps_getmant, .false_deps_mulc, .false_deps_mullq, @@ -2376,6 +2841,7 @@ pub const cpu = struct { .cx16, .enqcmd, .ermsb, + .evex512, .false_deps_getmant, .false_deps_mulc, .false_deps_mullq, @@ -2537,6 +3003,7 @@ pub const cpu = struct { .crc32, .cx16, .ermsb, + .evex512, .fast_15bytenop, .fast_gather, .fast_scalar_fsqrt, @@ -2601,6 +3068,7 @@ pub const cpu = struct { .crc32, .cx16, .ermsb, + .evex512, .fast_15bytenop, .fast_gather, .fast_scalar_fsqrt, @@ -2760,6 +3228,7 @@ pub const cpu = struct { .cmov, .crc32, .cx16, + .evex512, .fast_gather, .fast_movbe, .fsgsbase, @@ -2801,6 +3270,7 @@ pub const cpu = struct { .cmov, .crc32, .cx16, + .evex512, .fast_gather, .fast_movbe, .fsgsbase, @@ -2836,6 +3306,85 @@ pub const cpu = struct { .vzeroupper, }), }; + pub const lunarlake = CpuModel{ + .name = "lunarlake", + .llvm_name = "lunarlake", + .features = featureSet(&[_]Feature{ + .@"64bit", + .adx, + .allow_light_256_bit, + .avxifma, + .avxneconvert, + .avxvnni, + .avxvnniint16, + .avxvnniint8, + .bmi, + .bmi2, + .cldemote, + .clflushopt, + .clwb, + .cmov, + .cmpccxadd, + .crc32, + .cx16, + .enqcmd, + .f16c, + .false_deps_perm, + .false_deps_popcnt, + .fast_15bytenop, + .fast_gather, + .fast_scalar_fsqrt, + .fast_shld_rotate, + .fast_variable_crosslane_shuffle, + .fast_variable_perlane_shuffle, + .fast_vector_fsqrt, + .fma, + .fsgsbase, + .fxsr, + .gfni, + .hreset, + .idivq_to_divl, + .invpcid, + .lzcnt, + .macrofusion, + .mmx, + .movbe, + .movdir64b, + .movdiri, + .no_bypass_delay_blend, + .no_bypass_delay_mov, + .no_bypass_delay_shuffle, + .nopl, + .pconfig, + .pku, + .popcnt, + .prefer_movmsk_over_vtest, + .prfchw, + .ptwrite, + .rdpid, + .rdrnd, + .rdseed, + .sahf, + .serialize, + .sha, + .sha512, + .shstk, + .slow_3ops_lea, + .sm3, + .sm4, + .tuning_fast_imm_vector_shift, + .uintr, + .vaes, + .vpclmulqdq, + .vzeroupper, + .waitpkg, + .widekl, + .x87, + .xsavec, + .xsaveopt, + .xsaves, + }), + }; pub const meteorlake = CpuModel{ .name = "meteorlake", .llvm_name = "meteorlake", @@ -2979,6 +3528,86 @@ pub const cpu = struct { .x87, }), }; + pub const pantherlake = CpuModel{ + .name = "pantherlake", + .llvm_name = "pantherlake", + .features = featureSet(&[_]Feature{ + .@"64bit", + .adx, + .allow_light_256_bit, + .avxifma, + .avxneconvert, + .avxvnni, + .avxvnniint16, + .avxvnniint8, + .bmi, + .bmi2, + .cldemote, + .clflushopt, + .clwb, + .cmov, + .cmpccxadd, + .crc32, + .cx16, + .enqcmd, + .f16c, + .false_deps_perm, + .false_deps_popcnt, + .fast_15bytenop, + .fast_gather, + .fast_scalar_fsqrt, + .fast_shld_rotate, + .fast_variable_crosslane_shuffle, + .fast_variable_perlane_shuffle, + .fast_vector_fsqrt, + .fma, + .fsgsbase, + .fxsr, + .gfni, + .hreset, + .idivq_to_divl, + .invpcid, + .lzcnt, + .macrofusion, + .mmx, + .movbe, + .movdir64b, + .movdiri, + .no_bypass_delay_blend, + .no_bypass_delay_mov, + .no_bypass_delay_shuffle, + .nopl, + .pconfig, + .pku, + .popcnt, + .prefer_movmsk_over_vtest, + .prefetchi, + .prfchw, + .ptwrite, + .rdpid, + .rdrnd, + .rdseed, + .sahf, + .serialize, + .sha, + .sha512, + .shstk, + .slow_3ops_lea, + .sm3, + .sm4, + .tuning_fast_imm_vector_shift, + .uintr, + .vaes, + .vpclmulqdq, + .vzeroupper, + .waitpkg, + .widekl, + .x87, + .xsavec, + .xsaveopt, + .xsaves, + }), + }; pub const penryn = CpuModel{ .name = "penryn", .llvm_name = "penryn", @@ -3211,6 +3840,7 @@ pub const cpu = struct { .crc32, .cx16, .ermsb, + .evex512, .fast_15bytenop, .fast_gather, .fast_scalar_fsqrt, @@ -3309,6 +3939,7 @@ pub const cpu = struct { .cx16, .enqcmd, .ermsb, + .evex512, .false_deps_getmant, .false_deps_mulc, .false_deps_mullq, @@ -3479,6 +4110,7 @@ pub const cpu = struct { .crc32, .cx16, .ermsb, + .evex512, .false_deps_popcnt, .fast_15bytenop, .fast_gather, @@ -3589,6 +4221,7 @@ pub const cpu = struct { .crc32, .cx16, .ermsb, + .evex512, .false_deps_popcnt, .fast_15bytenop, .fast_gather, @@ -3684,6 +4317,7 @@ pub const cpu = struct { .crc32, .cx16, .ermsb, + .evex512, .fast_15bytenop, .fast_gather, .fast_scalar_fsqrt, @@ -3904,6 +4538,7 @@ pub const cpu = struct { .cmov, .crc32, .cx16, + .evex512, .false_deps_popcnt, .fast_15bytenop, .fast_gather, @@ -4056,7 +4691,6 @@ pub const cpu = struct { .@"64bit", .adx, .allow_light_256_bit, - .avx2, .bmi, .bmi2, .branchfusion, @@ -4134,6 +4768,7 @@ pub const cpu = struct { .cmov, .crc32, .cx16, + .evex512, .fast_15bytenop, .fast_bextr, .fast_lzcnt, diff --git a/tools/update_cpu_features.zig b/tools/update_cpu_features.zig index 662f28c6f7..10ea65084d 100644 --- a/tools/update_cpu_features.zig +++ b/tools/update_cpu_features.zig @@ -183,6 +183,10 @@ const llvm_targets = [_]LlvmTarget{ .llvm_name = "apple-a16", .flatten = true, }, + .{ + .llvm_name = "apple-a17", + .flatten = true, + }, .{ .llvm_name = "apple-a7-sysreg", .flatten = true, @@ -207,6 +211,10 @@ const llvm_targets = [_]LlvmTarget{ .llvm_name = "cortex-x3", .flatten = true, }, + .{ + .llvm_name = "cortex-x4", + .flatten = true, + }, .{ .llvm_name = "falkor", .flatten = true, @@ -257,6 +265,10 @@ const llvm_targets = [_]LlvmTarget{ .llvm_name = "ampere1", .flatten = true, }, + .{ + .llvm_name = "ampere1b", + .flatten = true, + }, }, .extra_cpus = &.{ .{ @@ -681,6 +693,10 @@ const llvm_targets = [_]LlvmTarget{ .llvm_name = "armv9.4-a", .zig_name = "v9_4a", }, + .{ + .llvm_name = "armv9.5-a", + .zig_name = "v9_5a", + }, .{ .llvm_name = "armv9-a", .zig_name = "v9a",