KleidiAI Coverage Report


Directory: ./
Coverage: low: ≥ 0% medium: ≥ 75.0% high: ≥ 90.0%
Coverage Exec / Excl / Total
Lines: 92.9% 52 / 0 / 56
Functions: 75.0% 3 / 1 / 5
Branches: 64.9% 48 / 0 / 74

benchmark/dwconv/dwconv_registry.cpp
Line Branch Exec Source
1 //
2 // SPDX-FileCopyrightText: Copyright 2025 Arm Limited and/or its affiliates <open-source-office@arm.com>
3 //
4 // SPDX-License-Identifier: Apache-2.0
5 //
6
7 #include "dwconv_registry.hpp"
8
9 #include <array>
10 #include <cstddef>
11 #include <cstdint>
12 #include <optional>
13 #include <test/common/cpu_info.hpp>
14 #include <test/common/data_type.hpp>
15
16 #include "dwconv_benchmark_logic.hpp"
17 #include "dwconv_interface.hpp"
18
19 #ifdef __GNUC__
20 #pragma GCC diagnostic push
21 #pragma GCC diagnostic ignored "-Wswitch-default"
22 #endif // __GNUC__
23
24 #include <benchmark/benchmark.h>
25
26 #ifdef __GNUC__
27 #pragma GCC diagnostic pop
28 #endif // __GNUC__
29
30 // Micro-kernels to register for benchmarking
31 #include "kai/ukernels/dwconv/dwconv_f32_f32_f32p/kai_dwconv_clamp_f32_f32_f32p1vlx1b_3x3_s1_4xc_sme2_mla.h"
32 #include "kai/ukernels/dwconv/pack/kai_rhs_dwconv_pack_x32p1vlx1b_x32_x32_sme.h"
33
34 namespace kai::benchmark {
35 using DataType = test::DataType;
36
37 // Build interface + traits + RHS config for the packed FP32 kernel
38 inline constexpr DwConvPackedFloatInterface kai_dwconv_clamp_f32_f32_f32p1vlx1b_3x3_s1_4xc_sme2_mla_iface{
39 .run_dwconv = kai_run_dwconv_clamp_f32_f32_f32p1vlx1b_3x3_s1_4xc_sme2_mla,
40 };
41
42 struct DwConvBenchmarkCase {
43 ::benchmark::internal::Benchmark* benchmark;
44 const DwConvTraits* traits;
45 };
46
47 inline constexpr DwConvRhsConfig kai_dwconv_packed_fp32_rhs_cfg{
48 .layout = DwConvRhsLayout::Packed,
49 .weights_elem_bits = 32,
50 .bias_elem_bits = 32,
51 .get_packed_rhs_size = kai_rhs_get_dst_size_dwconv_pack_x32p1vlx1b_x32_x32_sme,
52 };
53
54 // Helper function to bundle traits
55 template <
56 typename GetMStep, typename GetFilterHeight, typename GetFilterWidth, typename GetKr, typename GetDstSize,
57 typename GetDstOffset, typename GetSrcOffset>
58 constexpr DwConvTraits BundleTraits(
59 GetMStep get_m_step, GetFilterHeight get_filter_height, GetFilterWidth get_filter_width, GetKr get_kr,
60 GetDstSize get_dst_size, GetDstOffset get_dst_offset, GetSrcOffset get_src_offset) {
61 DwConvTraits traits{};
62 traits.get_m_step = get_m_step;
63 traits.get_filter_height = get_filter_height;
64 traits.get_filter_width = get_filter_width;
65 traits.get_kr = get_kr;
66 traits.get_dst_size = get_dst_size;
67 traits.get_dst_offset = get_dst_offset;
68 traits.get_src_offset = get_src_offset;
69 return traits;
70 }
71
72 // Usage: declare traits for the kernel
73 inline constexpr DwConvTraits kai_dwconv_clamp_f32_f32_f32p1vlx1b_3x3_s1_4xc_sme2_mla_traits = BundleTraits(
74 kai_get_m_step_dwconv_clamp_f32_f32_f32p1vlx1b_3x3_s1_4xc_sme2_mla,
75 kai_get_filter_height_dwconv_clamp_f32_f32_f32p1vlx1b_3x3_s1_4xc_sme2_mla,
76 kai_get_filter_width_dwconv_clamp_f32_f32_f32p1vlx1b_3x3_s1_4xc_sme2_mla,
77 kai_get_kr_dwconv_clamp_f32_f32_f32p1vlx1b_3x3_s1_4xc_sme2_mla,
78 kai_get_dst_size_dwconv_clamp_f32_f32_f32p1vlx1b_3x3_s1_4xc_sme2_mla,
79 kai_get_dst_offset_dwconv_clamp_f32_f32_f32p1vlx1b_3x3_s1_4xc_sme2_mla,
80 kai_get_src_offset_dwconv_clamp_f32_f32_f32p1vlx1b_3x3_s1_4xc_sme2_mla);
81
82 36 inline std::array<DwConvBenchmarkCase, 1> dwconv_benchmarks{{
83 36 {
84
1/2
✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
36 ::benchmark::RegisterBenchmark(
85
1/2
✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
36 "kai_dwconv_clamp_f32_f32_f32p1vlx1b_3x3_s1_4xc_sme2_mla", kai_benchmark_dwconv,
86
1/2
✓ Branch 0 taken 12 times.
✗ Branch 1 not taken.
37 RunnerFactory{[](const DwConvTraits& tr, DataType sdt, DataType ddt) {
87 1 return std::make_unique<DwConvPackedFloatRunner>(
88 1 kai_dwconv_clamp_f32_f32_f32p1vlx1b_3x3_s1_4xc_sme2_mla_iface, tr, sdt, ddt);
89 }},
90 36 kai_dwconv_clamp_f32_f32_f32p1vlx1b_3x3_s1_4xc_sme2_mla_traits, DataType::FP32, DataType::FP32,
91 kai_dwconv_packed_fp32_rhs_cfg, test::cpu_has_sme2),
92 36 &kai_dwconv_clamp_f32_f32_f32p1vlx1b_3x3_s1_4xc_sme2_mla_traits,
93 },
94 }};
95
96 9 void RegisterDwConvBenchmarks(const DwConvShape& shape) {
97
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 9 times.
9 if (!supports_unit_stride_and_dilation(shape)) {
98 return;
99 }
100
101
2/2
✓ Branch 0 taken 9 times.
✓ Branch 1 taken 9 times.
18 for (auto& entry : dwconv_benchmarks) {
102 9 const size_t filter_height = entry.traits->get_filter_height();
103 9 const size_t filter_width = entry.traits->get_filter_width();
104
105 9 const auto [out_h, out_w] = compute_dwconv_output_dims(shape, filter_height, filter_width);
106
2/4
✓ Branch 0 taken 9 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 9 times.
9 if (out_h == 0 || out_w == 0) {
107 continue;
108 }
109
110
0/2
✗ Branch 0 not taken.
✗ Branch 1 not taken.
18 entry.benchmark
111
13/26
✗ Branch 0 not taken.
✓ Branch 1 taken 9 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 9 times.
✗ Branch 4 not taken.
✓ Branch 5 taken 9 times.
✗ Branch 6 not taken.
✓ Branch 7 taken 9 times.
✗ Branch 8 not taken.
✓ Branch 9 taken 9 times.
✗ Branch 10 not taken.
✓ Branch 11 taken 9 times.
✗ Branch 12 not taken.
✓ Branch 13 taken 9 times.
✗ Branch 14 not taken.
✓ Branch 15 taken 9 times.
✗ Branch 16 not taken.
✓ Branch 17 taken 9 times.
✗ Branch 18 not taken.
✓ Branch 19 taken 9 times.
✗ Branch 20 not taken.
✓ Branch 21 taken 9 times.
✗ Branch 22 not taken.
✓ Branch 23 taken 9 times.
✗ Branch 24 not taken.
✓ Branch 25 taken 6 times.
108 ->Args({
112 9 static_cast<int64_t>(shape.num_channels),
113 9 static_cast<int64_t>(shape.input_height),
114 9 static_cast<int64_t>(shape.input_width),
115 9 static_cast<int64_t>(shape.stride[0]),
116 9 static_cast<int64_t>(shape.stride[1]),
117 9 static_cast<int64_t>(shape.padding[0]),
118 9 static_cast<int64_t>(shape.padding[1]),
119 9 static_cast<int64_t>(shape.padding[2]),
120 9 static_cast<int64_t>(shape.padding[3]),
121 9 static_cast<int64_t>(shape.dilation[0]),
122 9 static_cast<int64_t>(shape.dilation[1]),
123 })
124
2/4
✗ Branch 0 not taken.
✓ Branch 1 taken 9 times.
✓ Branch 2 taken 9 times.
✗ Branch 3 not taken.
9 ->ArgNames({
125
2/2
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 6 times.
9 "channels",
126
2/2
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 6 times.
9 "input_height",
127
2/2
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 6 times.
9 "input_width",
128
2/2
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 6 times.
9 "stride_h",
129
2/2
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 6 times.
9 "stride_w",
130
2/2
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 6 times.
9 "pad_top",
131
2/2
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 6 times.
9 "pad_bottom",
132
2/2
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 6 times.
9 "pad_left",
133
2/2
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 6 times.
9 "pad_right",
134
2/2
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 6 times.
9 "dilation_h",
135
2/2
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 6 times.
9 "dilation_w",
136 });
137 9 }
138 9 }
139
140 3 std::optional<DwConvOutputShape> InferDwConvOutputDims(const DwConvShape& shape) {
141 if (dwconv_benchmarks.empty()) {
142 return std::nullopt;
143 }
144
145
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 3 times.
3 if (!supports_unit_stride_and_dilation(shape)) {
146 return std::nullopt;
147 }
148
149 3 const DwConvTraits* traits = dwconv_benchmarks.front().traits;
150 3 const size_t filter_height = traits->get_filter_height();
151 3 const size_t filter_width = traits->get_filter_width();
152
153 3 const auto dims = compute_dwconv_output_dims(shape, filter_height, filter_width);
154
2/4
✓ Branch 0 taken 3 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 3 times.
3 if (dims.height == 0 || dims.width == 0) {
155 return std::nullopt;
156 }
157
158 3 return dims;
159 3 }
160
161 } // namespace kai::benchmark
162