Skip to content

Commit

Permalink
Add missing features to features_x86
Browse files Browse the repository at this point in the history
  • Loading branch information
gbaraldi committed Jan 6, 2025
1 parent 77f4abc commit 75ac0cf
Show file tree
Hide file tree
Showing 2 changed files with 50 additions and 7 deletions.
26 changes: 23 additions & 3 deletions src/features_x86.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
// This file is a part of Julia. License is MIT: https://julialang.org/license

#include "processor.h"
#ifdef _CPU_X86_
#define JL_X86_64ONLY_VER(x) UINT32_MAX
#else
Expand Down Expand Up @@ -72,13 +73,14 @@ JL_FEATURE_DEF(movdir64b, 32 * 3 + 28, 0)
JL_FEATURE_DEF(enqcmd, 32 * 3 + 29, 0)

// EAX=7,ECX=0: EDX
// JL_FEATURE_DEF(avx5124vnniw, 32 * 4 + 2, ?????)
// JL_FEATURE_DEF(avx5124fmaps, 32 * 4 + 3, ?????)
JL_FEATURE_DEF(avx512vnniw, 32 * 4 + 2, 0)
JL_FEATURE_DEF(avx512fmaps, 32 * 4 + 3, 0)
JL_FEATURE_DEF(uintr, 32 * 4 + 5, 140000)
JL_FEATURE_DEF(avx512vp2intersect, 32 * 4 + 8, 0)
JL_FEATURE_DEF(serialize, 32 * 4 + 14, 110000)
JL_FEATURE_DEF(tsxldtrk, 32 * 4 + 16, 110000)
JL_FEATURE_DEF(pconfig, 32 * 4 + 18, 0)
// JL_FEATURE_DEF(ibt, 32 * 4 + 20, 0)
JL_FEATURE_DEF_NAME(amx_bf16, 32 * 4 + 22, 110000, "amx-bf16")
JL_FEATURE_DEF(avx512fp16, 32 * 4 + 23, 140000)
JL_FEATURE_DEF_NAME(amx_tile, 32 * 4 + 24, 110000, "amx-tile")
Expand Down Expand Up @@ -110,10 +112,28 @@ JL_FEATURE_DEF(clzero, 32 * 8 + 0, 0)
JL_FEATURE_DEF(wbnoinvd, 32 * 8 + 9, 0)

// EAX=7,ECX=1: EAX
JL_FEATURE_DEF(sha512, 32 * 9 + 0, 170000)
JL_FEATURE_DEF(sm3, 32 * 9 + 1, 170000)
JL_FEATURE_DEF(sm4, 32 * 9 + 2, 170000)
JL_FEATURE_DEF(raoint, 32 * 9 + 3, 170000)
JL_FEATURE_DEF(avxvnni, 32 * 9 + 4, 120000)
JL_FEATURE_DEF(avx512bf16, 32 * 9 + 5, 0)
JL_FEATURE_DEF(cmpccxadd, 32 * 9 + 7, 160000)
JL_FEATURE_DEF(amx_fp16, 32 * 9 + 21, 160000, "amx-fp16")
JL_FEATURE_DEF(hreset, 32 * 9 + 22, 160000)
JL_FEATURE_DEF(avxifma, 32 * 9 * 23, 160000)

// EAX=7,ECX=1: EBX
JL_FEATURE_DEF(avxvnniint8, 32 * 10 + 4, 160000)
JL_FEATURE_DEF(avxneconvert, 32 * 10 + 5, 160000)
JL_FEATURE_DEF(amx_complex, 32 * 10 + 8, 170000, "amx-complex")
JL_FEATURE_DEF(avxvnniint16, 32 * 10 + 10, 170000)
JL_FEATURE_DEF(prefetchi, 32 * 10 + 14, 160000)
JL_FEATURE_DEF(usermsr, 32 * 10 + 15, 170000)
// JL_FEATURE_DEF(avx10, 32 * 10 + 19, 170000) // TODO: What to do about avx10 and it's mess?
// JL_FEATURE_DEF(apxf, 32 * 10 + 21, 190000)

// EAX=0x14,ECX=0: EBX
JL_FEATURE_DEF(ptwrite, 32 * 10 + 4, 0)
JL_FEATURE_DEF(ptwrite, 32 * 11 + 4, 0)

#undef JL_X86_64ONLY_VER
31 changes: 27 additions & 4 deletions src/processor_x86.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ enum class CPU : uint32_t {
amd_znver5,
};

static constexpr size_t feature_sz = 11;
static constexpr size_t feature_sz = 12;
static constexpr FeatureName feature_names[] = {
#define JL_FEATURE_DEF(name, bit, llvmver) {#name, bit, llvmver},
#define JL_FEATURE_DEF_NAME(name, bit, llvmver, str) {str, bit, llvmver},
Expand Down Expand Up @@ -142,6 +142,10 @@ static constexpr FeatureDep deps[] = {
{vpclmulqdq, avx},
{vpclmulqdq, pclmul},
{avxvnni, avx2},
{avxvnniint8, avx2},
{avxvnniint16, avx2},
{avxifma, avx2},
{avxneconvert, avx2},
{avx512f, avx2},
{avx512dq, avx512f},
{avx512ifma, avx512f},
Expand All @@ -160,13 +164,18 @@ static constexpr FeatureDep deps[] = {
{avx512fp16, avx512vl},
{amx_int8, amx_tile},
{amx_bf16, amx_tile},
{amx_fp16, amx_tile},
{amx_complex, amx_tile},
{sse4a, sse3},
{xop, fma4},
{fma4, avx},
{fma4, sse4a},
{xsaveopt, xsave},
{xsavec, xsave},
{xsaves, xsave},
{sha512, avx2},
{sm3, avx},
{sm4, avx2},
};

// We require cx16 on 64bit by default. This can be overwritten with `-cx16`
Expand Down Expand Up @@ -237,7 +246,7 @@ constexpr auto znver2 = znver1 | get_feature_masks(clwb, rdpid, wbnoinvd);
constexpr auto znver3 = znver2 | get_feature_masks(shstk, pku, vaes, vpclmulqdq);
constexpr auto znver4 = znver3 | get_feature_masks(avx512f, avx512cd, avx512dq, avx512bw, avx512vl, avx512ifma, avx512vbmi,
avx512vbmi2, avx512vnni, avx512bitalg, avx512vpopcntdq, avx512bf16, gfni, shstk, xsaves);
constexpr auto znver5 = znver4 | get_feature_masks(avxvnni, movdiri, movdir64b, avx512vp2intersect, /*prefetchi,*/ avxvnni);
constexpr auto znver5 = znver4 | get_feature_masks(avxvnni, movdiri, movdir64b, avx512vp2intersect, prefetchi, avxvnni);

}

Expand Down Expand Up @@ -666,11 +675,12 @@ static NOINLINE std::pair<uint32_t,FeatureList<feature_sz>> _get_host_cpu(void)
int32_t info7[4];
jl_cpuidex(info7, 7, 1);
features[9] = info7[0];
features[10] = info7[1];
}
if (maxleaf >= 0x14) {
int32_t info14[4];
jl_cpuidex(info14, 0x14, 0);
features[10] = info14[1];
features[11] = info14[1];
}

// Fix up AVX bits to account for OS support and match LLVM model
Expand Down Expand Up @@ -711,7 +721,20 @@ static NOINLINE std::pair<uint32_t,FeatureList<feature_sz>> _get_host_cpu(void)
else {
cpu = uint32_t(CPU::generic);
}

/* Feature bits to register map
feature[0] = ecx
feature[1] = edx
feature[2] = leaf 7 ebx
feature[3] = leaf 7 ecx
feature[4] = leaf 7 edx
feature[5] = leaf 0x80000001 ecx
feature[6] = leaf 0x80000001 edx
feature[7] = leaf 0xd subleaf 1 eax
feature[8] = leaf 0x80000008 ebx
feature[9] = leaf 7 ebx subleaf 1 eax
feature[10] = leaf 7 ebx subleaf 1 ebx
feature[11] = leaf 0x14 ebx
*/
return std::make_pair(cpu, features);
}

Expand Down

0 comments on commit 75ac0cf

Please sign in to comment.