KleidiAI Coverage Report


Directory: ./
File: test/common/cpu_info.cpp
Date: 2025-10-20 13:18:31
Coverage Exec Excl Total
Lines: 90.0% 45 5 55
Functions: 87.5% 14 0 16
Branches: 62.5% 10 12 28

Line Branch Exec Source
1 //
2 // SPDX-FileCopyrightText: Copyright 2024-2025 Arm Limited and/or its affiliates <open-source-office@arm.com>
3 //
4 // SPDX-License-Identifier: Apache-2.0
5 //
6
7 #include "test/common/cpu_info.hpp"
8
9 #include <array>
10 #include <cstddef>
11 #include <cstdint>
12 #include <tuple>
13
14 #include "kai/kai_common.h"
15
16 #if defined(__aarch64__) && defined(__linux__)
17 #include <sys/auxv.h>
18 #endif // defined(__aarch64__) && defined(__linux__)
19
20 #if defined(__aarch64__) && defined(__APPLE__)
21 #include <sys/sysctl.h>
22 #include <sys/types.h>
23
24 #include <string_view>
25 #endif // defined(__aarch64__) && defined(__APPLE__)
26
27 #if (defined(__aarch64__) && defined(_WIN64)) || defined(_M_ARM64)
28 #include <Windows.h>
29 #include <processthreadsapi.h>
30 #include <sysinfoapi.h>
31 #include <winnt.h>
32 #endif // (defined(__aarch64__) && defined(_WIN64)) || defined(_M_ARM64)
33
34 namespace kai::test {
35
36 namespace {
37
38 enum CpuFeatures {
39 ADVSIMD = 0, //
40 DOTPROD, //
41 I8MM, //
42 FP16, //
43 BF16, //
44 SVE, //
45 SVE2, //
46 SME, //
47 SME2, //
48 LAST_ELEMENT // This should be last element, please add new CPU capabilities before it
49 };
50
51 #if defined(__aarch64__) && defined(__linux__)
52 /// Define CPU capabilities not available in toolchain definitions
53 #ifndef HWCAP_ASIMD
54 constexpr uint64_t HWCAP_ASIMD = 1UL << 1;
55 #endif
56 #ifndef HWCAP_FPHP
57 constexpr uint64_t HWCAP_FPHP = 1UL << 9;
58 #endif
59 #ifndef HWCAP_ASIMDHP
60 constexpr uint64_t HWCAP_ASIMDHP = 1UL << 10;
61 #endif
62 #ifndef HWCAP_ASIMDDP
63 constexpr uint64_t HWCAP_ASIMDDP = 1UL << 20;
64 #endif
65 #ifndef HWCAP_SVE
66 constexpr uint64_t HWCAP_SVE = 1UL << 22;
67 #endif
68 #ifndef HWCAP2_SVE2
69 constexpr uint64_t HWCAP2_SVE2 = 1UL << 1;
70 #endif
71 #ifndef HWCAP2_I8MM
72 constexpr uint64_t HWCAP2_I8MM = 1UL << 13;
73 #endif
74 #ifndef HWCAP2_BF16
75 constexpr uint64_t HWCAP2_BF16 = 1UL << 14;
76 #endif
77 #ifndef HWCAP2_SME
78 constexpr uint64_t HWCAP2_SME = 1UL << 23;
79 #endif
80 #ifndef HWCAP2_SME2
81 constexpr uint64_t HWCAP2_SME2 = 1UL << 37;
82 #endif
83
84 const std::array<std::tuple<CpuFeatures, uint64_t, uint64_t>, CpuFeatures::LAST_ELEMENT> cpu_caps{{
85 {CpuFeatures::ADVSIMD, AT_HWCAP, HWCAP_ASIMD}, //
86 {CpuFeatures::DOTPROD, AT_HWCAP, HWCAP_ASIMDDP}, //
87 {CpuFeatures::I8MM, AT_HWCAP2, HWCAP2_I8MM}, //
88 {CpuFeatures::FP16, AT_HWCAP, HWCAP_FPHP | HWCAP_ASIMDHP}, //
89 {CpuFeatures::BF16, AT_HWCAP2, HWCAP2_BF16}, //
90 {CpuFeatures::SVE, AT_HWCAP, HWCAP_SVE}, //
91 {CpuFeatures::SVE2, AT_HWCAP2, HWCAP2_SVE2}, //
92 {CpuFeatures::SME, AT_HWCAP2, HWCAP2_SME}, //
93 {CpuFeatures::SME2, AT_HWCAP2, HWCAP2_SME2}, //
94 }};
95
96 bool get_cap_support(CpuFeatures feature) {
97 KAI_ASSERT(feature < cpu_caps.size());
98
99 auto [cpu_feature, cap_id, cap_bits] = cpu_caps[static_cast<int>(feature)];
100 // Make sure CPU feature is correctly initialized
101 KAI_ASSERT(feature == cpu_feature);
102
103 const uint64_t hwcaps = getauxval(cap_id);
104
105 return (hwcaps & cap_bits) == cap_bits;
106 }
107 #elif defined(__aarch64__) && defined(__APPLE__)
108 const std::array<std::tuple<CpuFeatures, std::string_view>, CpuFeatures::LAST_ELEMENT> cpu_caps{{
109 {CpuFeatures::ADVSIMD, "hw.optional.arm64"}, // Advanced SIMD is always present on arm64
110 {CpuFeatures::DOTPROD, "hw.optional.arm.FEAT_DotProd"},
111 {CpuFeatures::I8MM, "hw.optional.arm.FEAT_I8MM"},
112 {CpuFeatures::FP16, "hw.optional.arm.FEAT_FP16"},
113 {CpuFeatures::BF16, "hw.optional.arm.FEAT_BF16"},
114 {CpuFeatures::SVE, ""}, // not supported
115 {CpuFeatures::SVE2, ""}, // not supported
116 {CpuFeatures::SME, "hw.optional.arm.FEAT_SME"},
117 {CpuFeatures::SME2, "hw.optional.arm.FEAT_SME2"},
118 }};
119
120 18 bool get_cap_support(CpuFeatures feature) {
121 KAI_ASSERT(feature < CpuFeatures::LAST_ELEMENT);
122
123 64 auto [cpu_feature, cap_name] = cpu_caps[static_cast<int>(feature)];
124 KAI_ASSERT(feature == cpu_feature);
125
126 18 uint32_t value{};
127
128
2/2
✓ Branch 0 taken 4 times.
✓ Branch 1 taken 14 times.
18 if (cap_name.length() > 0) {
129 14 size_t size = sizeof(value);
130
131 KAI_ASSERT(sysctlbyname(cap_name.data(), nullptr, &size, nullptr, 0) == 0);
132 KAI_ASSERT(size == sizeof(value));
133
134 28 [[maybe_unused]] int status = sysctlbyname(cap_name.data(), &value, &size, nullptr, 0);
135 KAI_ASSERT(status == 0);
136 14 }
137
138 36 return value == 1;
139 18 }
140 #elif (defined(__aarch64__) && defined(_WIN64)) || defined(_M_ARM64)
141 // Some system registers are provided in HARDWARE\DESCRIPTION\System\CentralProcessor\* registry.
142 //
143 // The registry name is encoded as
144 // CP {op0 & 1, op1, CRn, CRm, op2}
145 //
146 // These can be used to detect architectural features that are unable to detect reliably
147 // using IsProcessorFeaturePresent. It must not be used to detect architectural features
148 // that require operating system support such as SVE and SME.
149 const char* ID_AA64PFR0_EL1 = "CP 4020";
150 const char* ID_AA64ISAR1_EL1 = "CP 4031";
151
152 const std::array<std::tuple<CpuFeatures, DWORD, const char*, uint64_t>, CpuFeatures::LAST_ELEMENT> cpu_caps{{
153 {CpuFeatures::ADVSIMD, PF_ARM_NEON_INSTRUCTIONS_AVAILABLE, nullptr, 0},
154 {CpuFeatures::DOTPROD, PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE, nullptr, 0},
155 {CpuFeatures::I8MM, 0, ID_AA64ISAR1_EL1, 0x00f0000000000000ULL},
156 {CpuFeatures::FP16, 0, ID_AA64PFR0_EL1, 0x00000000000f0000ULL},
157 {CpuFeatures::BF16, 0, ID_AA64ISAR1_EL1, 0x0000f00000000000ULL},
158 {CpuFeatures::SVE, 46, nullptr, 0},
159 {CpuFeatures::SVE2, 47, nullptr, 0},
160 {CpuFeatures::SME, 0, nullptr, 0},
161 {CpuFeatures::SME2, 0, nullptr, 0},
162 }};
163
164 uint64_t read_sysreg(const char* name) {
165 uint64_t value = 0;
166 DWORD size = sizeof(value);
167
168 const LSTATUS status = RegGetValueA(
169 HKEY_LOCAL_MACHINE, "HARDWARE\\DESCRIPTION\\System\\CentralProcessor\\0", name, RRF_RT_REG_QWORD, nullptr,
170 &value, &size);
171
172 KAI_ASSERT(status == ERROR_SUCCESS);
173
174 return value;
175 }
176
177 bool get_cap_support(CpuFeatures feature) {
178 KAI_ASSERT(feature < CpuFeatures::LAST_ELEMENT);
179 auto [cpu_feature, cap_id, reg_name, reg_mask] = cpu_caps[static_cast<int>(feature)];
180
181 if (cap_id != 0) {
182 return IsProcessorFeaturePresent(cap_id);
183 }
184
185 if (reg_name != nullptr) {
186 const uint64_t value = read_sysreg(reg_name);
187 const bool is_aarch64 = IsProcessorFeaturePresent(PF_ARM_V8_INSTRUCTIONS_AVAILABLE);
188 const bool has_feature = (value & reg_mask) != 0;
189
190 return is_aarch64 && has_feature;
191 }
192
193 return false;
194 }
195 #elif defined(__aarch64__)
196 #error Please add a way how to check implemented CPU features
197 #else
198 bool get_cap_support(CpuFeatures feature) {
199 KAI_UNUSED(feature);
200 return false;
201 }
202 #endif
203
204 /// Information about the CPU that is executing the program.
205 struct CpuInfo {
206 4 CpuInfo() :
207 2 has_advsimd(get_cap_support(CpuFeatures::ADVSIMD)),
208 2 has_dotprod(get_cap_support(CpuFeatures::DOTPROD)),
209 2 has_i8mm(get_cap_support(CpuFeatures::I8MM)),
210 2 has_fp16(get_cap_support(CpuFeatures::FP16)),
211 2 has_bf16(get_cap_support(CpuFeatures::BF16)),
212 2 has_sve(get_cap_support(CpuFeatures::SVE)),
213 2 has_sve2(get_cap_support(CpuFeatures::SVE2)),
214 2 has_sme(get_cap_support(CpuFeatures::SME)),
215 4 has_sme2(get_cap_support(CpuFeatures::SME2)) {
216 4 }
217
218 /// Gets the singleton @ref CpuInfo object.
219 76601 static const CpuInfo& current() {
220
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 76599 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
76601 static const CpuInfo cpu_info{};
221 76601 return cpu_info;
222 }
223
224 const bool has_advsimd{}; ///< AdvSIMD is supported.
225 const bool has_dotprod{}; ///< DotProd is supported.
226 const bool has_i8mm{}; ///< I8MM is supported.
227 const bool has_fp16{}; ///< FP16 is supported.
228 const bool has_bf16{}; ///< B16 is supported.
229 const bool has_sve{}; ///< SVE is supported.
230 const bool has_sve2{}; ///< SVE2 is supported.
231 const bool has_sme{}; ///< SME is supported.
232 const bool has_sme2{}; ///< SME2 is supported.
233 };
234
235 } // namespace
236
237 /// Helper functions
238 219 bool cpu_has_advsimd() {
239 219 return CpuInfo::current().has_advsimd;
240 }
241
242 14605 bool cpu_has_dotprod() {
243 14605 return CpuInfo::current().has_dotprod;
244 }
245
246 1353 bool cpu_has_dotprod_and_fp16() {
247
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 1353 times.
1353 return cpu_has_dotprod() && cpu_has_fp16();
248 }
249
250 6662 bool cpu_has_i8mm() {
251 6662 return CpuInfo::current().has_i8mm;
252 }
253
254 451 bool cpu_has_i8mm_and_fp16() {
255
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 451 times.
451 return cpu_has_i8mm() && cpu_has_fp16();
256 }
257
258 2024 bool cpu_has_fp16() {
259 2024 return CpuInfo::current().has_fp16;
260 }
261
262 1402 bool cpu_has_bf16() {
263 1402 return CpuInfo::current().has_bf16;
264 }
265
266 bool cpu_has_sve() {
267 return CpuInfo::current().has_sve;
268 }
269
270 bool cpu_has_sve2() {
271 return CpuInfo::current().has_sve2;
272 }
273
274 19726 bool cpu_has_sme() {
275 19726 return CpuInfo::current().has_sme;
276 }
277
278 31963 bool cpu_has_sme2() {
279 31963 return CpuInfo::current().has_sme2;
280 }
281
282 506 bool cpu_has_dotprod_and_bf16() {
283
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 506 times.
506 return cpu_has_dotprod() && cpu_has_bf16();
284 }
285
286 506 bool cpu_has_i8mm_and_bf16() {
287
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 506 times.
506 return cpu_has_i8mm() && cpu_has_bf16();
288 }
289
290 } // namespace kai::test
291