KleidiAI Coverage Report


Directory: ./
Coverage: low: ≥ 0% medium: ≥ 75.0% high: ≥ 90.0%
Coverage Exec / Excl / Total
Lines: 100.0% 28 / 0 / 28
Functions: 100.0% 1 / 1 / 2
Branches: 60.9% 28 / 0 / 46

benchmark/imatmul/imatmul_registry.cpp
Line Branch Exec Source
1 //
2 // SPDX-FileCopyrightText: Copyright 2025 Arm Limited and/or its affiliates <open-source-office@arm.com>
3 //
4 // SPDX-License-Identifier: Apache-2.0
5 //
6
7 #include "imatmul_registry.hpp"
8
9 #include <array>
10 #include <cstddef>
11 #include <cstdint>
12 #include <test/common/cpu_info.hpp>
13 #include <test/common/data_type.hpp>
14
15 #include "imatmul_benchmark_logic.hpp"
16 #include "imatmul_interface.hpp"
17
18 #ifdef __GNUC__
19 #pragma GCC diagnostic push
20 #pragma GCC diagnostic ignored "-Wswitch-default"
21 #endif // __GNUC__
22
23 #include <benchmark/benchmark.h>
24
25 #ifdef __GNUC__
26 #pragma GCC diagnostic pop
27 #endif // __GNUC__
28
29 // Micro-kernels to register for benchmarking
30
31 // imatmul_clamp_f16_f16p_f16p
32 #include "kai/ukernels/matmul/imatmul_clamp_f16_f16p_f16p/kai_imatmul_clamp_f16_f16p2vlx2_f16p2vlx2_2vlx2vl_sme2_mopa.h"
33 #include "kai/ukernels/matmul/imatmul_clamp_f16_f16p_f16p/kai_imatmul_clamp_f16_f16p2vlx2_f16p2vlx2b_2vlx2vl_sme_mopa.h"
34
35 // imatmul_clamp_f32_f32p_f32p
36 #include "kai/ukernels/matmul/imatmul_clamp_f32_f32p_f32p/kai_imatmul_clamp_f32_f32p2vlx1_f32p2vlx1b_2vlx2vl_sme2_mopa.h"
37 #include "kai/ukernels/matmul/imatmul_clamp_f32_f32p_f32p/kai_imatmul_clamp_f32_f32p2vlx1_f32p2vlx1b_2vlx2vl_sme_mopa.h"
38
39 // imatmul_clamp_qai8_qai8p_qsi8cxp
40 #include "kai/ukernels/matmul/imatmul_clamp_qai8_qai8p_qsi8cxp/kai_imatmul_clamp_qai8_qai8p2vlx4_qsi8cxp2vlx4sb_2vlx2vl_sme_mopa.h"
41 #include "kai/ukernels/matmul/imatmul_clamp_qai8_qai8p_qsi8cxp/kai_imatmul_clamp_qai8_qai8p2vlx4_qsi8cxpsb2vlx4_2vlx2vl_sme2_mopa.h"
42
43 namespace kai::benchmark {
44 using DataType = test::DataType;
45
46 // imatmul_clamp_f16_f16p_f16p
47 inline constexpr ImatmulBaseInterface kai_imatmul_clamp_f16_f16p2vlx2_f16p2vlx2_2vlx2vl_sme2_mopa_interface{
48 .run_imatmul = kai_run_imatmul_clamp_f16_f16p2vlx2_f16p2vlx2_2vlx2vl_sme2_mopa,
49 };
50
51 inline constexpr ImatmulBaseInterface kai_imatmul_clamp_f16_f16p2vlx2_f16p2vlx2b_2vlx2vl_sme_mopa_interface{
52 .run_imatmul = kai_run_imatmul_clamp_f16_f16p2vlx2_f16p2vlx2b_2vlx2vl_sme_mopa,
53 };
54
55 // imatmul_clamp_f16_f16_f16p
56 inline constexpr ImatmulBaseInterface kai_imatmul_clamp_f32_f32p2vlx1_f32p2vlx1b_2vlx2vl_sme2_mopa_interface{
57 .run_imatmul = kai_run_imatmul_clamp_f32_f32p2vlx1_f32p2vlx1b_2vlx2vl_sme2_mopa,
58 };
59
60 inline constexpr ImatmulBaseInterface kai_imatmul_clamp_f32_f32p2vlx1_f32p2vlx1b_2vlx2vl_sme_mopa_interface{
61 .run_imatmul = kai_run_imatmul_clamp_f32_f32p2vlx1_f32p2vlx1b_2vlx2vl_sme_mopa,
62 };
63
64 // imatmul_clamp_qai8_qai8p_qsi8cxp
65 inline constexpr ImatmulStaticQuantInterface
66 kai_imatmul_clamp_qai8_qai8p2vlx4_qsi8cxp2vlx4sb_2vlx2vl_sme_mopa_interface{
67 .run_imatmul = kai_run_imatmul_clamp_qai8_qai8p2vlx4_qsi8cxp2vlx4sb_2vlx2vl_sme_mopa,
68 };
69
70 inline constexpr ImatmulStaticQuantInterface
71 kai_imatmul_clamp_qai8_qai8p2vlx4_qsi8cxpsb2vlx4_2vlx2vl_sme2_mopa_interface{
72 .run_imatmul = kai_run_imatmul_clamp_qai8_qai8p2vlx4_qsi8cxpsb2vlx4_2vlx2vl_sme2_mopa,
73 };
74
75 36 inline const std::array imatmul_benchmarks{
76 // imatmul_clamp_f16_f16p_f16p
77
1/2
✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
216 RegisterBenchmark(
78
1/2
✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
36 "kai_imatmul_clamp_f16_f16p2vlx2_f16p2vlx2_2vlx2vl_sme2_mopa", kai_benchmark_imatmul<ImatmulBaseInterface>,
79 36 kai_imatmul_clamp_f16_f16p2vlx2_f16p2vlx2_2vlx2vl_sme2_mopa_interface, DataType::FP16, test::cpu_has_sme2),
80
1/2
✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
36 RegisterBenchmark(
81
1/2
✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
36 "kai_imatmul_clamp_f16_f16p2vlx2_f16p2vlx2b_2vlx2vl_sme_mopa", kai_benchmark_imatmul<ImatmulBaseInterface>,
82 36 kai_imatmul_clamp_f16_f16p2vlx2_f16p2vlx2b_2vlx2vl_sme_mopa_interface, DataType::FP16, test::cpu_has_sme),
83
84 // imatmul_clamp_f16_f16_f16p
85
1/2
✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
36 RegisterBenchmark(
86
1/2
✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
36 "kai_imatmul_clamp_f32_f32p2vlx1_f32p2vlx1b_2vlx2vl_sme2_mopa", kai_benchmark_imatmul<ImatmulBaseInterface>,
87 36 kai_imatmul_clamp_f32_f32p2vlx1_f32p2vlx1b_2vlx2vl_sme2_mopa_interface, DataType::FP32, test::cpu_has_sme2),
88
1/2
✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
36 RegisterBenchmark(
89
1/2
✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
36 "kai_imatmul_clamp_f32_f32p2vlx1_f32p2vlx1b_2vlx2vl_sme_mopa", kai_benchmark_imatmul<ImatmulBaseInterface>,
90 36 kai_imatmul_clamp_f32_f32p2vlx1_f32p2vlx1b_2vlx2vl_sme_mopa_interface, DataType::FP32, test::cpu_has_sme),
91
92 // imatmul_clamp_qai8_qai8p_qsi8cxp
93
1/2
✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
36 RegisterBenchmark(
94
1/2
✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
36 "kai_imatmul_clamp_qai8_qai8p2vlx4_qsi8cxp2vlx4sb_2vlx2vl_sme_mopa",
95 kai_benchmark_imatmul<ImatmulStaticQuantInterface>,
96 36 kai_imatmul_clamp_qai8_qai8p2vlx4_qsi8cxp2vlx4sb_2vlx2vl_sme_mopa_interface, DataType::QAI8, test::cpu_has_sme),
97
1/2
✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
36 RegisterBenchmark(
98
1/2
✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
36 "kai_imatmul_clamp_qai8_qai8p2vlx4_qsi8cxpsb2vlx4_2vlx2vl_sme2_mopa",
99 kai_benchmark_imatmul<ImatmulStaticQuantInterface>,
100 36 kai_imatmul_clamp_qai8_qai8p2vlx4_qsi8cxpsb2vlx4_2vlx2vl_sme2_mopa_interface, DataType::QAI8,
101 test::cpu_has_sme2),
102
103 };
104
105 9 void RegisteriMatMulBenchmarks(size_t m, size_t n, size_t k_chunk_count, size_t k_chunk_length) {
106
2/2
✓ Branch 0 taken 54 times.
✓ Branch 1 taken 9 times.
63 for (const auto& benchmark : imatmul_benchmarks) {
107
0/2
✗ Branch 0 not taken.
✗ Branch 1 not taken.
108 benchmark
108
2/2
✓ Branch 0 taken 18 times.
✓ Branch 1 taken 36 times.
54 ->Args(
109
2/4
✗ Branch 0 not taken.
✓ Branch 1 taken 36 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 36 times.
108 {static_cast<int64_t>(m), static_cast<int64_t>(n), static_cast<int64_t>(k_chunk_count),
110 54 static_cast<int64_t>(k_chunk_length)})
111
10/12
✓ Branch 0 taken 18 times.
✓ Branch 1 taken 36 times.
✓ Branch 2 taken 18 times.
✓ Branch 3 taken 36 times.
✓ Branch 4 taken 18 times.
✓ Branch 5 taken 36 times.
✓ Branch 6 taken 18 times.
✓ Branch 7 taken 36 times.
✗ Branch 8 not taken.
✓ Branch 9 taken 54 times.
✗ Branch 10 not taken.
✓ Branch 11 taken 54 times.
54 ->ArgNames({"m", "n", "k_chunk_count", "k_chunk_length"});
112 54 }
113 9 }
114 } // namespace kai::benchmark
115