tools/cpuid: add cache topology detection

This commit is contained in:
Danny Robson 2024-02-21 14:54:15 +10:00
parent 14c9a103ef
commit 48e71ce0dd
1 changed files with 380 additions and 2 deletions

View File

@ -7,12 +7,390 @@
*/
#include "cpuid.hpp"
//#include <iostream>
#include <cruft/util/std.hpp>
#include <cruft/util/debug/assert.hpp>
#include <cruft/util/types/sized.hpp>
#include <cruft/util/cast.hpp>
#include <fmt/core.h>
#include <sys/sysinfo.h>
#include <sched.h>
#include <err.h>
#include <unistd.h>
///////////////////////////////////////////////////////////////////////////////
template <u32 hi, u32 lo>
static constexpr
auto
bits (u32 val)
{
CHECK (hi >= lo);
static constexpr u32 const len = hi - lo + 1;
static constexpr u32 const rsize = cruft::round_pow2 (len < 8 ? 8 : len);
using result_t = cruft::types::sized::bits<rsize>::uint;
return cruft::cast::narrow<result_t> (
(val >> lo) & ((1 << len) - 1)
);
}
///////////////////////////////////////////////////////////////////////////////
namespace {
struct cpuid_t {
u32 a, b, c, d;
};
}
//-----------------------------------------------------------------------------
static cpuid_t
cpuid (u32 query, u32 param = 0)
{
::cpuid_t res {};
asm (
"cpuid"
: "=a" (res.a), "=b" (res.b), "=c" (res.c), "=d" (res.d)
: "a" (query), "c" (param)
);
return res;
}
///////////////////////////////////////////////////////////////////////////////
namespace {
struct vendor_t {
u32 b, d, c;
};
};
//-----------------------------------------------------------------------------
static constexpr vendor_t VENDOR_AMD [[maybe_unused]] = {
.b = 0x6874'7541, // 'htuA'
.d = 0x6974'6E65, // 'itne'
.c = 0x444D'4163, // 'DMAc
};
//-----------------------------------------------------------------------------
static vendor_t
vendor (void)
{
auto const src = cpuid (0x8000'0000, 0);
return {
.b = src.b,
.d = src.d,
.c = src.c
};
}
//-----------------------------------------------------------------------------
template <>
struct fmt::formatter<::vendor_t> {
constexpr format_parse_context::iterator
parse (format_parse_context &ctx)
{
return ctx.begin ();
}
format_context::iterator
format (::vendor_t const &val, format_context &ctx)
{
char buf[5] = {};
memcpy (&buf, &val.b, 4);
fmt::format_to (ctx.out (), "{}", buf);
memcpy (&buf, &val.d, 4);
fmt::format_to (ctx.out (), "{}", buf);
memcpy (&buf, &val.c, 4);
fmt::format_to (ctx.out (), "{}", buf);
return ctx.out ();
}
};
///////////////////////////////////////////////////////////////////////////////
namespace {
struct cache_t {
u32 size;
u16 associativity;
u16 lines_per_tag;
u16 line_size;
};
struct splitcache_t {
cache_t d;
cache_t i;
};
}
//-----------------------------------------------------------------------------
static cache_t
_l1_from_u32 (u32 const src)
{
return cache_t {
.size = bits<31, 24> (src),
.associativity = bits<23, 16> (src),
.lines_per_tag = bits<15, 8> (src),
.line_size = bits< 7, 0> (src),
};
}
//-----------------------------------------------------------------------------
static splitcache_t
l1 (void)
{
auto const src = cpuid (0x8000'0005);
return {
.d = _l1_from_u32(src.c),
.i = _l1_from_u32(src.d),
};
}
//-----------------------------------------------------------------------------
static cache_t
l2 (void)
{
auto const src = cpuid (0x8000'0006);
return cache_t {
.size = bits<31, 16> (src.c),
.associativity = bits<15, 12> (src.c),
.lines_per_tag = bits<11, 8> (src.c),
.line_size = bits< 7, 0> (src.c),
};
}
//-----------------------------------------------------------------------------
static constexpr std::pair<u16, u16> L3_ASSOCIATIVITY[0xF + 1] = {
[ 0] = { 0, 0 },
[ 1] = { 1, 1 },
[ 2] = { 2, 2 },
[ 3] = { 3, 3 },
[ 4] = { 4, 5 },
[ 5] = { 6, 7 },
[ 6] = { 8, 15 },
[ 7] = { 0, 0 },
[ 8] = { 16, 31 },
[ 9] = { 0, 0 },
[10] = { 32, 47 },
[11] = { 48, 63 },
[12] = { 64, 95 },
[13] = { 96, 127 },
[14] = { 128, 0 }, // less than fully
[15] = { 0, 0 }, // fully associative
};
//-----------------------------------------------------------------------------
static cache_t
l3 (void)
{
auto const src = cpuid (0x8000'0006);
auto const l3size = bits<31, 18> (src.d);
auto const l3assoc = bits<15, 12> (src.d);
fmt::print ("l3assoc: {}\n", l3assoc);
auto const [assoc_lo, assoc_hi] = L3_ASSOCIATIVITY[l3assoc];
return {
.size = l3size * 512u,
.associativity = assoc_lo,
.lines_per_tag = bits<11, 8> (src.d),
.line_size = bits< 7, 0> (src.d),
};
}
//-----------------------------------------------------------------------------
template <>
struct fmt::formatter<::cache_t> {
constexpr format_parse_context::iterator
parse (format_parse_context &ctx)
{
return ctx.begin ();
}
format_context::iterator
format (::cache_t const &val, format_context &ctx)
{
return fmt::format_to (
ctx.out (),
"size: {}\nassociativity: {}\nlines_per_tag: {}\nline_size: {}\n",
val.size,
val.associativity,
val.lines_per_tag,
val.line_size
);
}
};
//-----------------------------------------------------------------------------
template <>
struct fmt::formatter<::splitcache_t> {
constexpr format_parse_context::iterator
parse (format_parse_context &ctx)
{
return ctx.begin ();
}
format_context::iterator
format (::splitcache_t const &val, format_context &ctx)
{
return fmt::format_to (ctx.out (), "d:\n{}\ni:\n{}\n", val.d, val.i);
}
};
///////////////////////////////////////////////////////////////////////////////
namespace {
struct cachetopology_t {
u32 num_sharing_cache;
bool fully_associative;
bool self_initialisation;
u08 level;
u08 type;
u16 ways;
u16 partitions;
u16 line_size;
u32 num_sets;
bool inclusive;
bool wbinvd;
u32 size (void) const
{
return (ways + 1) * (partitions + 1) * (line_size + 1) * (num_sets + 1);
}
};
};
static cachetopology_t
cache_topology (u32 idx)
{
auto const src = cpuid (0x8000'001D, idx);
auto const type = bits<4, 0> (src.a);
if (type == 0)
return {};
return {
.num_sharing_cache = bits<25, 14> (src.a),
.fully_associative = !!bits< 9, 9> (src.a),
.self_initialisation = !!bits< 8, 8> (src.a),
.level = bits<7, 5> (src.a),
.type = bits<4, 0> (src.a),
.ways = bits<31, 22> (src.b),
.partitions = bits<21, 12> (src.b),
.line_size = bits<11, 0> (src.b),
.num_sets = src.c,
.inclusive = !!bits<1, 1> (src.d),
.wbinvd = !!bits<0, 0> (src.d),
};
}
//-----------------------------------------------------------------------------
template <>
struct fmt::formatter<::cachetopology_t> {
constexpr format_parse_context::iterator
parse (format_parse_context &ctx)
{
return ctx.begin ();
}
format_context::iterator
format (::cachetopology_t const &val, format_context &ctx)
{
return fmt::format_to (ctx.out (),
"num_sharing_cache: {}\n"
"fully_associative: {}\n"
"self_initialisation: {}\n"
"level: {}\n"
"type: {}\n"
"ways: {}\n"
"partitions: {}\n"
"line_size: {}\n"
"num_sets: {}\n"
"inclusive: {}\n"
"wbinvd: {}\n"
"size: {}\n",
val.num_sharing_cache,
val.fully_associative,
val.self_initialisation,
val.level,
val.type,
val.ways,
val.partitions,
val.line_size,
val.num_sets,
val.inclusive,
val.wbinvd,
val.size ()
);
}
};
///////////////////////////////////////////////////////////////////////////////
// TODO: Function Bh — Extended Topology Enumeration
#include <iostream>
///////////////////////////////////////////////////////////////////////////////
int
main ()
{
std::cout << cruft::cpu::native {} << '\n';
fmt::print ("vendor: {}\n\n", vendor ());
fmt::print ("l1:\n{}\n", l1 ());
fmt::print ("l2:\n{}\n", l2 ());
fmt::print ("l3:\n{}\n", l3 ());
for (int i = 0; ; ++i) {
auto const cache = cache_topology (i);
if (cache.type == 0)
break;
fmt::print ("cache{}:\n{}\n", i, cache);
}
auto const self = getpid ();
for (int proc = 0, nprocs = get_nprocs (); proc < nprocs; ++proc) {
cpu_set_t set {};
CPU_ZERO (&set);
CPU_SET (proc, &set);
if (sched_setaffinity (self, sizeof (set), &set) == -1)
err (EXIT_FAILURE, "sched_setaffinity");
auto const llc = cache_topology (3);
fmt::print ("proc {}: {}\n", proc, llc.size () / 1024 / 1024);
}
//std::cout << cruft::cpu::native {} << '\n';
};