From 48e71ce0ddeac4a752d204c5b0ee1791dc762cc7 Mon Sep 17 00:00:00 2001 From: Danny Robson Date: Wed, 21 Feb 2024 14:54:15 +1000 Subject: [PATCH] tools/cpuid: add cache topology detection --- tools/cpuid.cpp | 382 +++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 380 insertions(+), 2 deletions(-) diff --git a/tools/cpuid.cpp b/tools/cpuid.cpp index f4e86439..fb382ced 100644 --- a/tools/cpuid.cpp +++ b/tools/cpuid.cpp @@ -7,12 +7,390 @@ */ #include "cpuid.hpp" +//#include + +#include +#include +#include +#include + +#include + +#include +#include +#include +#include + + +/////////////////////////////////////////////////////////////////////////////// +template +static constexpr +auto +bits (u32 val) +{ + CHECK (hi >= lo); + + static constexpr u32 const len = hi - lo + 1; + static constexpr u32 const rsize = cruft::round_pow2 (len < 8 ? 8 : len); + using result_t = cruft::types::sized::bits::uint; + + return cruft::cast::narrow ( + (val >> lo) & ((1 << len) - 1) + ); +} + + +/////////////////////////////////////////////////////////////////////////////// +namespace { + struct cpuid_t { + u32 a, b, c, d; + }; +} + + +//----------------------------------------------------------------------------- +static cpuid_t +cpuid (u32 query, u32 param = 0) +{ + ::cpuid_t res {}; + + asm ( + "cpuid" + : "=a" (res.a), "=b" (res.b), "=c" (res.c), "=d" (res.d) + : "a" (query), "c" (param) + ); + + return res; +} + + +/////////////////////////////////////////////////////////////////////////////// +namespace { + struct vendor_t { + u32 b, d, c; + }; +}; + + +//----------------------------------------------------------------------------- +static constexpr vendor_t VENDOR_AMD [[maybe_unused]] = { + .b = 0x6874'7541, // 'htuA' + .d = 0x6974'6E65, // 'itne' + .c = 0x444D'4163, // 'DMAc +}; + + +//----------------------------------------------------------------------------- +static vendor_t +vendor (void) +{ + auto const src = cpuid (0x8000'0000, 0); + return { + .b = src.b, + .d = src.d, + .c = src.c + }; +} + + +//----------------------------------------------------------------------------- +template <> +struct fmt::formatter<::vendor_t> { + constexpr format_parse_context::iterator + parse (format_parse_context &ctx) + { + return ctx.begin (); + } + + format_context::iterator + format (::vendor_t const &val, format_context &ctx) + { + char buf[5] = {}; + + memcpy (&buf, &val.b, 4); + fmt::format_to (ctx.out (), "{}", buf); + + memcpy (&buf, &val.d, 4); + fmt::format_to (ctx.out (), "{}", buf); + + memcpy (&buf, &val.c, 4); + fmt::format_to (ctx.out (), "{}", buf); + + return ctx.out (); + } +}; + + +/////////////////////////////////////////////////////////////////////////////// +namespace { + struct cache_t { + u32 size; + u16 associativity; + u16 lines_per_tag; + u16 line_size; + }; + + struct splitcache_t { + cache_t d; + cache_t i; + }; +} + + +//----------------------------------------------------------------------------- +static cache_t +_l1_from_u32 (u32 const src) +{ + return cache_t { + .size = bits<31, 24> (src), + .associativity = bits<23, 16> (src), + .lines_per_tag = bits<15, 8> (src), + .line_size = bits< 7, 0> (src), + }; +} + + +//----------------------------------------------------------------------------- +static splitcache_t +l1 (void) +{ + auto const src = cpuid (0x8000'0005); + return { + .d = _l1_from_u32(src.c), + .i = _l1_from_u32(src.d), + }; +} + + +//----------------------------------------------------------------------------- +static cache_t +l2 (void) +{ + auto const src = cpuid (0x8000'0006); + return cache_t { + .size = bits<31, 16> (src.c), + .associativity = bits<15, 12> (src.c), + .lines_per_tag = bits<11, 8> (src.c), + .line_size = bits< 7, 0> (src.c), + }; +} + + +//----------------------------------------------------------------------------- +static constexpr std::pair L3_ASSOCIATIVITY[0xF + 1] = { + [ 0] = { 0, 0 }, + [ 1] = { 1, 1 }, + [ 2] = { 2, 2 }, + [ 3] = { 3, 3 }, + [ 4] = { 4, 5 }, + [ 5] = { 6, 7 }, + [ 6] = { 8, 15 }, + [ 7] = { 0, 0 }, + [ 8] = { 16, 31 }, + [ 9] = { 0, 0 }, + [10] = { 32, 47 }, + [11] = { 48, 63 }, + [12] = { 64, 95 }, + [13] = { 96, 127 }, + [14] = { 128, 0 }, // less than fully + [15] = { 0, 0 }, // fully associative +}; + + +//----------------------------------------------------------------------------- +static cache_t +l3 (void) +{ + auto const src = cpuid (0x8000'0006); + + auto const l3size = bits<31, 18> (src.d); + auto const l3assoc = bits<15, 12> (src.d); + + fmt::print ("l3assoc: {}\n", l3assoc); + auto const [assoc_lo, assoc_hi] = L3_ASSOCIATIVITY[l3assoc]; + + return { + .size = l3size * 512u, + .associativity = assoc_lo, + .lines_per_tag = bits<11, 8> (src.d), + .line_size = bits< 7, 0> (src.d), + }; +} + + +//----------------------------------------------------------------------------- +template <> +struct fmt::formatter<::cache_t> { + constexpr format_parse_context::iterator + parse (format_parse_context &ctx) + { + return ctx.begin (); + } + + format_context::iterator + format (::cache_t const &val, format_context &ctx) + { + return fmt::format_to ( + ctx.out (), + "size: {}\nassociativity: {}\nlines_per_tag: {}\nline_size: {}\n", + val.size, + val.associativity, + val.lines_per_tag, + val.line_size + ); + } +}; + + +//----------------------------------------------------------------------------- +template <> +struct fmt::formatter<::splitcache_t> { + constexpr format_parse_context::iterator + parse (format_parse_context &ctx) + { + return ctx.begin (); + } + + format_context::iterator + format (::splitcache_t const &val, format_context &ctx) + { + return fmt::format_to (ctx.out (), "d:\n{}\ni:\n{}\n", val.d, val.i); + } +}; + + +/////////////////////////////////////////////////////////////////////////////// +namespace { + struct cachetopology_t { + u32 num_sharing_cache; + bool fully_associative; + bool self_initialisation; + u08 level; + u08 type; + + u16 ways; + u16 partitions; + u16 line_size; + + u32 num_sets; + + bool inclusive; + bool wbinvd; + + u32 size (void) const + { + return (ways + 1) * (partitions + 1) * (line_size + 1) * (num_sets + 1); + } + }; +}; + + +static cachetopology_t +cache_topology (u32 idx) +{ + auto const src = cpuid (0x8000'001D, idx); + + auto const type = bits<4, 0> (src.a); + if (type == 0) + return {}; + + return { + .num_sharing_cache = bits<25, 14> (src.a), + .fully_associative = !!bits< 9, 9> (src.a), + .self_initialisation = !!bits< 8, 8> (src.a), + .level = bits<7, 5> (src.a), + .type = bits<4, 0> (src.a), + + .ways = bits<31, 22> (src.b), + .partitions = bits<21, 12> (src.b), + .line_size = bits<11, 0> (src.b), + + .num_sets = src.c, + + .inclusive = !!bits<1, 1> (src.d), + .wbinvd = !!bits<0, 0> (src.d), + }; +} + + +//----------------------------------------------------------------------------- +template <> +struct fmt::formatter<::cachetopology_t> { + constexpr format_parse_context::iterator + parse (format_parse_context &ctx) + { + return ctx.begin (); + } + + format_context::iterator + format (::cachetopology_t const &val, format_context &ctx) + { + return fmt::format_to (ctx.out (), + "num_sharing_cache: {}\n" + "fully_associative: {}\n" + "self_initialisation: {}\n" + "level: {}\n" + "type: {}\n" + "ways: {}\n" + "partitions: {}\n" + "line_size: {}\n" + "num_sets: {}\n" + "inclusive: {}\n" + "wbinvd: {}\n" + "size: {}\n", + + val.num_sharing_cache, + val.fully_associative, + val.self_initialisation, + val.level, + val.type, + val.ways, + val.partitions, + val.line_size, + val.num_sets, + val.inclusive, + val.wbinvd, + + val.size () + ); + } +}; + + + +/////////////////////////////////////////////////////////////////////////////// +// TODO: Function Bh — Extended Topology Enumeration -#include /////////////////////////////////////////////////////////////////////////////// int main () { - std::cout << cruft::cpu::native {} << '\n'; + fmt::print ("vendor: {}\n\n", vendor ()); + fmt::print ("l1:\n{}\n", l1 ()); + fmt::print ("l2:\n{}\n", l2 ()); + fmt::print ("l3:\n{}\n", l3 ()); + + for (int i = 0; ; ++i) { + auto const cache = cache_topology (i); + if (cache.type == 0) + break; + fmt::print ("cache{}:\n{}\n", i, cache); + } + + auto const self = getpid (); + + for (int proc = 0, nprocs = get_nprocs (); proc < nprocs; ++proc) { + cpu_set_t set {}; + CPU_ZERO (&set); + CPU_SET (proc, &set); + + if (sched_setaffinity (self, sizeof (set), &set) == -1) + err (EXIT_FAILURE, "sched_setaffinity"); + + auto const llc = cache_topology (3); + fmt::print ("proc {}: {}\n", proc, llc.size () / 1024 / 1024); + } + + //std::cout << cruft::cpu::native {} << '\n'; };