Line | Branch | Exec | Source |
---|---|---|---|
1 | // | ||
2 | // SPDX-FileCopyrightText: Copyright 2025 Arm Limited and/or its affiliates <open-source-office@arm.com> | ||
3 | // | ||
4 | // SPDX-License-Identifier: Apache-2.0 | ||
5 | // | ||
6 | |||
7 | #include "matmul_registry.hpp" | ||
8 | |||
9 | #include <array> | ||
10 | #include <cstddef> | ||
11 | #include <cstdint> | ||
12 | #include <test/common/cpu_info.hpp> | ||
13 | #include <test/common/data_type.hpp> | ||
14 | |||
15 | #include "matmul_benchmark_logic.hpp" | ||
16 | #include "matmul_interface.hpp" | ||
17 | |||
18 | #ifdef __GNUC__ | ||
19 | #pragma GCC diagnostic push | ||
20 | #pragma GCC diagnostic ignored "-Wswitch-default" | ||
21 | #endif // __GNUC__ | ||
22 | |||
23 | #include <benchmark/benchmark.h> | ||
24 | |||
25 | #ifdef __GNUC__ | ||
26 | #pragma GCC diagnostic pop | ||
27 | #endif // __GNUC__ | ||
28 | |||
29 | // Micro-kernels to register for benchmarking | ||
30 | |||
31 | // matmul_clamp_f16_bf16p_bf16p | ||
32 | #include "kai/ukernels/matmul/matmul_clamp_f16_bf16p_bf16p/kai_matmul_clamp_f16_bf16p8x4_bf16p12x4b_8x12_neon_mmla.h" | ||
33 | |||
34 | // matmul_clamp_f16_f16_f16p | ||
35 | #include "kai/ukernels/matmul/matmul_clamp_f16_f16_f16p/kai_matmul_clamp_f16_f16_f16p16x1biasf16_6x16x8_neon_mla.h" | ||
36 | #include "kai/ukernels/matmul/matmul_clamp_f16_f16_f16p/kai_matmul_clamp_f16_f16_f16p2vlx2b_1x16vl_sme2_dot.h" | ||
37 | #include "kai/ukernels/matmul/matmul_clamp_f16_f16_f16p/kai_matmul_clamp_f16_f16_f16p2vlx2b_1x8vl_sme_mla.h" | ||
38 | #include "kai/ukernels/matmul/matmul_clamp_f16_f16_f16p/kai_matmul_clamp_f16_f16_f16p32x1b_6x32_neon_mla.h" | ||
39 | #include "kai/ukernels/matmul/matmul_clamp_f16_f16_f16p/kai_matmul_clamp_f16_f16_f16p32x1b_6x32_neon_mla_cortexa55.h" | ||
40 | |||
41 | // matmul_clamp_f16_f16p_f16p | ||
42 | #include "kai/ukernels/matmul/matmul_clamp_f16_f16p_f16p/kai_matmul_clamp_f16_f16p2vlx2_f16p2vlx2_2vlx2vl_sme2_mopa.h" | ||
43 | #include "kai/ukernels/matmul/matmul_clamp_f16_f16p_f16p/kai_matmul_clamp_f16_f16p2vlx2_f16p2vlx2b_2vlx2vl_sme_mopa.h" | ||
44 | |||
45 | // matmul_clamp_f32_bf16p_bf16p | ||
46 | #include "kai/ukernels/matmul/matmul_clamp_f32_bf16p_bf16p/kai_matmul_clamp_f32_bf16p1x4_bf16p12x4b_1x36_neon_dot.h" | ||
47 | #include "kai/ukernels/matmul/matmul_clamp_f32_bf16p_bf16p/kai_matmul_clamp_f32_bf16p8x4_bf16p12x4b_8x12_neon_mmla.h" | ||
48 | |||
49 | // matmul_clamp_f32_f32_f32p | ||
50 | #include "kai/ukernels/matmul/matmul_clamp_f32_f32_f32p/kai_matmul_clamp_f32_f32_f32p16vlx1b_1x16vl_sme2_mla.h" | ||
51 | #include "kai/ukernels/matmul/matmul_clamp_f32_f32_f32p/kai_matmul_clamp_f32_f32_f32p16x1b_6x16_neon_mla.h" | ||
52 | #include "kai/ukernels/matmul/matmul_clamp_f32_f32_f32p/kai_matmul_clamp_f32_f32_f32p16x1b_6x16_neon_mla_cortexa55.h" | ||
53 | #include "kai/ukernels/matmul/matmul_clamp_f32_f32_f32p/kai_matmul_clamp_f32_f32_f32p2vlx1b_1x16vl_sme2_mla.h" | ||
54 | #include "kai/ukernels/matmul/matmul_clamp_f32_f32_f32p/kai_matmul_clamp_f32_f32_f32p2vlx1b_1x8vl_sme_mla.h" | ||
55 | #include "kai/ukernels/matmul/matmul_clamp_f32_f32_f32p/kai_matmul_clamp_f32_f32_f32p8x1biasf32_6x8x4_neon_mla.h" | ||
56 | |||
57 | // matmul_clamp_f32_f32p_f32p | ||
58 | #include "kai/ukernels/matmul/matmul_clamp_f32_f32p_f32p/kai_matmul_clamp_f32_f32p2vlx1_f32p2vlx1b_2vlx2vl_sme_mopa.h" | ||
59 | #include "kai/ukernels/matmul/matmul_clamp_f32_f32p_f32p/kai_matmul_clamp_f32_f32p2vlx1_f32p2vlx1biasf32_sme2_mopa.h" | ||
60 | |||
61 | // matmul_clamp_f32_qai8dxp_qsi4c32p | ||
62 | #include "kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp1x4_qsi4c32p4x4_1x4_neon_dotprod.h" | ||
63 | #include "kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp1x4_qsi4c32p8x4_1x8_neon_dotprod.h" | ||
64 | #include "kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp1x8_qsi4c32p4x8_1x4x32_neon_dotprod.h" | ||
65 | #include "kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp1x8_qsi4c32p8x8_1x8_neon_dotprod.h" | ||
66 | #include "kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp1x8_qsi4c32p8x8_1x8x32_neon_dotprod.h" | ||
67 | #include "kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp4x4_qsi4c32p4x4_16x4_neon_dotprod.h" | ||
68 | #include "kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp4x4_qsi4c32p8x4_4x8_neon_dotprod.h" | ||
69 | #include "kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp4x8_qsi4c32p4x8_16x4x32_neon_i8mm.h" | ||
70 | #include "kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp4x8_qsi4c32p4x8_8x4x32_neon_i8mm.h" | ||
71 | #include "kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp4x8_qsi4c32p8x8_4x8_neon_i8mm.h" | ||
72 | #include "kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp4x8_qsi4c32p8x8_4x8x32_neon_i8mm.h" | ||
73 | |||
74 | // matmul_clamp_f32_qai8dxp_qsi4cxp | ||
75 | #include "kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp1vlx8_qsi4cxp4vlx8_1vlx4vl_sme2_mopa.h" | ||
76 | #include "kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp1x4_qsi4cxp4vlx4_1x4vl_sme2_sdot.h" | ||
77 | #include "kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp1x4_qsi4cxp4x4_1x4_neon_dotprod.h" | ||
78 | #include "kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp1x8_qsi4cxp4x8_1x4x32_neon_dotprod.h" | ||
79 | #include "kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp1x8_qsi4cxp8x8_1x8x32_neon_dotprod.h" | ||
80 | #include "kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp4x4_qsi4cxp8x4_8x8x32_neon_dotprod.h" | ||
81 | #include "kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp4x8_qsi4cxp4x4_16x4x32_neon_dotprod.h" | ||
82 | #include "kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp4x8_qsi4cxp4x8_4x4x32_neon_i8mm.h" | ||
83 | #include "kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp4x8_qsi4cxp4x8_8x4x32_neon_i8mm.h" | ||
84 | #include "kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp4x8_qsi4cxp8x8_4x8x32_neon_i8mm.h" | ||
85 | #include "kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp4x8_qsi4cxp8x8_8x8x32_neon_i8mm.h" | ||
86 | |||
87 | // matmul_clamp_f32_qai8dxp_qsi8cxp | ||
88 | #include "kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi8cxp/kai_matmul_clamp_f32_qai8dxp1vlx4_qsi8cxp4vlx4_1vlx4vl_sme2_mopa.h" | ||
89 | #include "kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi8cxp/kai_matmul_clamp_f32_qai8dxp1vlx4_qsi8cxp4vlx4_1vlx4vl_sme_mopa.h" | ||
90 | #include "kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi8cxp/kai_matmul_clamp_f32_qai8dxp1x4_qsi8cxp4vlx4_1x4vl_sme2_dot.h" | ||
91 | #include "kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi8cxp/kai_matmul_clamp_f32_qai8dxp1x4_qsi8cxp4vlx4_1x4vl_sme_dot.h" | ||
92 | #include "kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi8cxp/kai_matmul_clamp_f32_qai8dxp1x4_qsi8cxp4x4_1x4_neon_dotprod.h" | ||
93 | #include "kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi8cxp/kai_matmul_clamp_f32_qai8dxp1x8_qsi8cxp4x8_1x4_neon_dotprod.h" | ||
94 | #include "kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi8cxp/kai_matmul_clamp_f32_qai8dxp4x4_qsi8cxp4x4_16x4_neon_dotprod.h" | ||
95 | #include "kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi8cxp/kai_matmul_clamp_f32_qai8dxp4x8_qsi8cxp4x8_16x4_neon_i8mm.h" | ||
96 | |||
97 | // matmul_clamp_f32_qsi8d32p_qsi4c32p | ||
98 | #include "kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p1vlx4_qsi4c32p4vlx4_1vlx4vl_sme2_mopa.h" | ||
99 | #include "kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p1x4_qsi4c32p4vlx4_1x4vl_sme2_sdot.h" | ||
100 | #include "kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p1x4_qsi4c32p4x4_1x4_neon_dotprod.h" | ||
101 | #include "kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p1x8_qsi4c32p4x8_1x4x32_neon_dotprod.h" | ||
102 | #include "kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p4x4_qsi4c32p4x4_16x4_neon_dotprod.h" | ||
103 | #include "kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p4x8_qsi4c32p4x8_16x4_neon_i8mm.h" | ||
104 | #include "kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p4x8_qsi4c32p4x8_8x4x32_neon_i8mm.h" | ||
105 | |||
106 | // matmul_clamp_fp32_bf16p_bf16p | ||
107 | #include "kai/ukernels/matmul/matmul_clamp_fp32_bf16p_bf16p/kai_matmul_clamp_f32_bf16p2vlx2_bf16p2vlx2_2vlx2vl_sme2_mopa.h" | ||
108 | |||
109 | // matmul_clamp_qai8_qai8_qsi8cxp | ||
110 | #include "kai/ukernels/matmul/matmul_clamp_qai8_qai8_qsi8cxp/kai_matmul_clamp_qai8_qai8_qsi8cxp2vlx4sb_1x16vl_sme2_dot.h" | ||
111 | |||
112 | // matmul_clamp_qai8_qai8p_qsi8cxp | ||
113 | #include "kai/ukernels/matmul/matmul_clamp_qai8_qai8p_qsi8cxp/kai_matmul_clamp_qai8_qai8p2vlx4_qsi8cxp2vlx4sb_2vlx2vl_sme_mopa.h" | ||
114 | #include "kai/ukernels/matmul/matmul_clamp_qai8_qai8p_qsi8cxp/kai_matmul_clamp_qai8_qai8p2vlx4_qsi8cxpsb2vlx4_2vlx2vl_sme2_mopa.h" | ||
115 | |||
116 | // matmul_clamp_f16_qai8dxp_qsi4cxp | ||
117 | #include "kai/ukernels/matmul/matmul_clamp_f16_qai8dxp_qsi4cxp/kai_matmul_clamp_f16_qai8dxp1x4_qsi4cxp4x4_1x4_neon_dotprod.h" | ||
118 | #include "kai/ukernels/matmul/matmul_clamp_f16_qai8dxp_qsi4cxp/kai_matmul_clamp_f16_qai8dxp1x8_qsi4cxp4x8_1x4_neon_dotprod.h" | ||
119 | #include "kai/ukernels/matmul/matmul_clamp_f16_qai8dxp_qsi4cxp/kai_matmul_clamp_f16_qai8dxp4x4_qsi4cxp4x4_16x4_neon_dotprod.h" | ||
120 | #include "kai/ukernels/matmul/matmul_clamp_f16_qai8dxp_qsi4cxp/kai_matmul_clamp_f16_qai8dxp4x8_qsi4cxp4x8_16x4_neon_i8mm.h" | ||
121 | |||
122 | // matmul_clamp_f16_qai8dxp_qsi8cxp | ||
123 | #include "kai/ukernels/matmul/matmul_clamp_f16_qai8dxp_qsi8cxp/kai_matmul_clamp_f16_qai8dxp1x4_qsi8cxp4x4_1x4_neon_dotprod.h" | ||
124 | #include "kai/ukernels/matmul/matmul_clamp_f16_qai8dxp_qsi8cxp/kai_matmul_clamp_f16_qai8dxp1x8_qsi8cxp4x8_1x4_neon_dotprod.h" | ||
125 | #include "kai/ukernels/matmul/matmul_clamp_f16_qai8dxp_qsi8cxp/kai_matmul_clamp_f16_qai8dxp4x4_qsi8cxp4x4_16x4_neon_dotprod.h" | ||
126 | #include "kai/ukernels/matmul/matmul_clamp_f16_qai8dxp_qsi8cxp/kai_matmul_clamp_f16_qai8dxp4x8_qsi8cxp4x8_16x4_neon_i8mm.h" | ||
127 | |||
128 | // matmul_clamp_f16_qsi8d32p_qai4c32p | ||
129 | #include "kai/ukernels/matmul/matmul_clamp_f16_qsi8d32p_qai4c32p/kai_matmul_clamp_f16_qsi8d32p1vlx4_qai4c32p4vlx4_1vlx4vl_sme2_mopa.h" | ||
130 | #include "kai/ukernels/matmul/matmul_clamp_f16_qsi8d32p_qai4c32p/kai_matmul_clamp_f16_qsi8d32p1x4_qai4c32p4vlx4_1x4vl_sme2_dot.h" | ||
131 | #include "kai/ukernels/matmul/matmul_clamp_f16_qsi8d32p_qai4c32p/kai_matmul_clamp_f16_qsi8d32p1x4_qai4c32p4x4_1x4_neon_dotprod.h" | ||
132 | #include "kai/ukernels/matmul/matmul_clamp_f16_qsi8d32p_qai4c32p/kai_matmul_clamp_f16_qsi8d32p1x8_qai4c32p4x8_1x4_neon_dotprod.h" | ||
133 | #include "kai/ukernels/matmul/matmul_clamp_f16_qsi8d32p_qai4c32p/kai_matmul_clamp_f16_qsi8d32p4x4_qai4c32p4x4_8x4_neon_dotprod.h" | ||
134 | #include "kai/ukernels/matmul/matmul_clamp_f16_qsi8d32p_qai4c32p/kai_matmul_clamp_f16_qsi8d32p4x8_qai4c32p4x8_8x4_neon_i8mm.h" | ||
135 | |||
136 | // matmul_clamp_f32_qsi8d32p_qai4c32p | ||
137 | #include "kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qai4c32p/kai_matmul_clamp_f32_qsi8d32p1vlx4_qai4c32p4vlx4_1vlx4vl_sme2_mopa.h" | ||
138 | #include "kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qai4c32p/kai_matmul_clamp_f32_qsi8d32p1x4_qai4c32p4vlx4_1x4vl_sme2_dot.h" | ||
139 | #include "kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qai4c32p/kai_matmul_clamp_f32_qsi8d32p1x4_qai4c32p4x4_1x4_neon_dotprod.h" | ||
140 | #include "kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qai4c32p/kai_matmul_clamp_f32_qsi8d32p1x8_qai4c32p4x8_1x4_neon_dotprod.h" | ||
141 | #include "kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qai4c32p/kai_matmul_clamp_f32_qsi8d32p4x4_qai4c32p4x4_8x4_neon_dotprod.h" | ||
142 | #include "kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qai4c32p/kai_matmul_clamp_f32_qsi8d32p4x8_qai4c32p4x8_8x4_neon_i8mm.h" | ||
143 | |||
144 | // matmul_clamp_bf16_qai8dxp_qsi4c32p | ||
145 | #include "kai/ukernels/matmul/matmul_clamp_bf16_qai8dxp_qsi4c32p/kai_matmul_clamp_bf16_qai8dxp1x8_qsi4c32p4x8_1x4_neon_dotprod.h" | ||
146 | #include "kai/ukernels/matmul/matmul_clamp_bf16_qai8dxp_qsi4c32p/kai_matmul_clamp_bf16_qai8dxp4x8_qsi4c32p4x8_16x4_neon_i8mm.h" | ||
147 | |||
148 | // matmul_clamp_bf16_qai8dxp_qsi4cxp | ||
149 | #include "kai/ukernels/matmul/matmul_clamp_bf16_qai8dxp_qsi4cxp/kai_matmul_clamp_bf16_qai8dxp1x8_qsi4cxp8x8_1x8_neon_dotprod.h" | ||
150 | #include "kai/ukernels/matmul/matmul_clamp_bf16_qai8dxp_qsi4cxp/kai_matmul_clamp_bf16_qai8dxp4x8_qsi4cxp8x8_8x8_neon_i8mm.h" | ||
151 | |||
152 | namespace kai::benchmark { | ||
153 | using DataType = test::DataType; | ||
154 | |||
155 | // matmul_clamp_f16_bf16p_bf16p | ||
156 | inline constexpr MatMulBaseInterface kai_matmul_clamp_f16_bf16p8x4_bf16p12x4b_8x12_neon_mmla_interface{ | ||
157 | .run_matmul = kai_run_matmul_clamp_f16_bf16p8x4_bf16p12x4b_8x12_neon_mmla, | ||
158 | }; | ||
159 | |||
160 | // matmul_clamp_f16_f16_f16p | ||
161 | inline constexpr MatMulStridedLhsInterface kai_matmul_clamp_f16_f16_f16p2vlx2b_1x8vl_sme_mla_interface{ | ||
162 | .run_matmul = kai_run_matmul_clamp_f16_f16_f16p2vlx2b_1x8vl_sme_mla, | ||
163 | }; | ||
164 | |||
165 | inline constexpr MatMulStridedLhsInterface kai_matmul_clamp_f16_f16_f16p2vlx2b_1x16vl_sme2_dot_interface{ | ||
166 | .run_matmul = kai_run_matmul_clamp_f16_f16_f16p2vlx2b_1x16vl_sme2_dot, | ||
167 | }; | ||
168 | |||
169 | inline constexpr MatMulStridedLhsInterface kai_matmul_clamp_f16_f16_f16p16x1biasf16_6x16x8_neon_mla_interface{ | ||
170 | .run_matmul = kai_run_matmul_clamp_f16_f16_f16p16x1biasf16_6x16x8_neon_mla, | ||
171 | }; | ||
172 | |||
173 | inline constexpr MatMulStridedLhsInterface kai_matmul_clamp_f16_f16_f16p32x1b_6x32_neon_mla_interface{ | ||
174 | .run_matmul = kai_run_matmul_clamp_f16_f16_f16p32x1b_6x32_neon_mla, | ||
175 | }; | ||
176 | |||
177 | inline constexpr MatMulStridedLhsInterface kai_matmul_clamp_f16_f16_f16p32x1b_6x32_neon_mla_cortexa55_interface{ | ||
178 | .run_matmul = kai_run_matmul_clamp_f16_f16_f16p32x1b_6x32_neon_mla_cortexa55, | ||
179 | }; | ||
180 | |||
181 | // matmul_clamp_f16_f16p_f16p | ||
182 | inline constexpr MatMulBaseInterface kai_matmul_clamp_f16_f16p2vlx2_f16p2vlx2_2vlx2vl_sme2_mopa_interface{ | ||
183 | .run_matmul = kai_run_matmul_clamp_f16_f16p2vlx2_f16p2vlx2_2vlx2vl_sme2_mopa, | ||
184 | }; | ||
185 | |||
186 | inline constexpr MatMulBaseInterface kai_matmul_clamp_f16_f16p2vlx2_f16p2vlx2b_2vlx2vl_sme_mopa_interface{ | ||
187 | .run_matmul = kai_run_matmul_clamp_f16_f16p2vlx2_f16p2vlx2b_2vlx2vl_sme_mopa, | ||
188 | }; | ||
189 | |||
190 | // matmul_clamp_f32_bf16p_bf16p | ||
191 | inline constexpr MatMulBaseInterface kai_matmul_clamp_f32_bf16p1x4_bf16p12x4b_1x36_neon_dot_interface{ | ||
192 | .run_matmul = kai_run_matmul_clamp_f32_bf16p1x4_bf16p12x4b_1x36_neon_dot, | ||
193 | }; | ||
194 | |||
195 | inline constexpr MatMulBaseInterface kai_matmul_clamp_f32_bf16p8x4_bf16p12x4b_8x12_neon_mmla_interface{ | ||
196 | .run_matmul = kai_run_matmul_clamp_f32_bf16p8x4_bf16p12x4b_8x12_neon_mmla, | ||
197 | }; | ||
198 | |||
199 | // matmul_clamp_f32_f32_f32p | ||
200 | inline constexpr MatMulStridedLhsInterface kai_matmul_clamp_f32_f32_f32p2vlx1b_1x16vl_sme2_mla_interface{ | ||
201 | .run_matmul = kai_run_matmul_clamp_f32_f32_f32p2vlx1b_1x16vl_sme2_mla, | ||
202 | }; | ||
203 | |||
204 | inline constexpr MatMulStridedLhsInterface kai_matmul_clamp_f32_f32_f32p2vlx1b_1x8vl_sme_mla_interface{ | ||
205 | .run_matmul = kai_run_matmul_clamp_f32_f32_f32p2vlx1b_1x8vl_sme_mla, | ||
206 | }; | ||
207 | |||
208 | inline constexpr MatMulStridedLhsInterface kai_matmul_clamp_f32_f32_f32p8x1biasf32_6x8x4_neon_mla_interface{ | ||
209 | .run_matmul = kai_run_matmul_clamp_f32_f32_f32p8x1biasf32_6x8x4_neon_mla, | ||
210 | }; | ||
211 | |||
212 | inline constexpr MatMulStridedLhsInterface kai_matmul_clamp_f32_f32_f32p16x1b_6x16_neon_mla_interface{ | ||
213 | .run_matmul = kai_run_matmul_clamp_f32_f32_f32p16x1b_6x16_neon_mla, | ||
214 | }; | ||
215 | |||
216 | inline constexpr MatMulStridedLhsInterface kai_matmul_clamp_f32_f32_f32p16x1b_6x16_neon_mla_cortexa55_interface{ | ||
217 | .run_matmul = kai_run_matmul_clamp_f32_f32_f32p16x1b_6x16_neon_mla_cortexa55, | ||
218 | }; | ||
219 | |||
220 | inline constexpr MatMulStridedLhsInterface kai_matmul_clamp_f32_f32_f32p16vlx1b_1x16vl_sme2_mla_interface{ | ||
221 | .run_matmul = kai_run_matmul_clamp_f32_f32_f32p16vlx1b_1x16vl_sme2_mla, | ||
222 | }; | ||
223 | |||
224 | // matmul_clamp_f32_f32p_f32p | ||
225 | inline constexpr MatMulBaseInterface kai_matmul_clamp_f32_f32p2vlx1_f32p2vlx1biasf32_sme2_mopa_interface{ | ||
226 | .run_matmul = kai_run_matmul_clamp_f32_f32p2vlx1_f32p2vlx1biasf32_sme2_mopa, | ||
227 | }; | ||
228 | |||
229 | inline constexpr MatMulBaseInterface kai_matmul_clamp_f32_f32p2vlx1_f32p2vlx1b_2vlx2vl_sme_mopa_interface{ | ||
230 | .run_matmul = kai_run_matmul_clamp_f32_f32p2vlx1_f32p2vlx1b_2vlx2vl_sme_mopa, | ||
231 | }; | ||
232 | |||
233 | // matmul_clamp_f32_qai8dxp_qsi4c32p | ||
234 | inline constexpr MatMulBlockwiseDynamicQuantInterface | ||
235 | kai_matmul_clamp_f32_qai8dxp1x4_qsi4c32p4x4_1x4_neon_dotprod_interface{ | ||
236 | .run_matmul = kai_run_matmul_clamp_f32_qai8dxp1x4_qsi4c32p4x4_1x4_neon_dotprod, | ||
237 | }; | ||
238 | |||
239 | inline constexpr MatMulBlockwiseDynamicQuantInterface | ||
240 | kai_matmul_clamp_f32_qai8dxp1x4_qsi4c32p8x4_1x8_neon_dotprod_interface{ | ||
241 | .run_matmul = kai_run_matmul_clamp_f32_qai8dxp1x4_qsi4c32p8x4_1x8_neon_dotprod, | ||
242 | }; | ||
243 | |||
244 | inline constexpr MatMulBlockwiseDynamicQuantInterface | ||
245 | kai_matmul_clamp_f32_qai8dxp1x8_qsi4c32p4x8_1x4x32_neon_dotprod_interface{ | ||
246 | .run_matmul = kai_run_matmul_clamp_f32_qai8dxp1x8_qsi4c32p4x8_1x4x32_neon_dotprod, | ||
247 | }; | ||
248 | |||
249 | inline constexpr MatMulBlockwiseDynamicQuantInterface | ||
250 | kai_matmul_clamp_f32_qai8dxp1x8_qsi4c32p8x8_1x8_neon_dotprod_interface{ | ||
251 | .run_matmul = kai_run_matmul_clamp_f32_qai8dxp1x8_qsi4c32p8x8_1x8_neon_dotprod, | ||
252 | }; | ||
253 | inline constexpr MatMulBlockwiseDynamicQuantInterface | ||
254 | kai_matmul_clamp_f32_qai8dxp1x8_qsi4c32p8x8_1x8x32_neon_dotprod_interface{ | ||
255 | .run_matmul = kai_run_matmul_clamp_f32_qai8dxp1x8_qsi4c32p8x8_1x8x32_neon_dotprod, | ||
256 | }; | ||
257 | |||
258 | inline constexpr MatMulBlockwiseDynamicQuantInterface | ||
259 | kai_matmul_clamp_f32_qai8dxp4x4_qsi4c32p4x4_16x4_neon_dotprod_interface{ | ||
260 | .run_matmul = kai_run_matmul_clamp_f32_qai8dxp4x4_qsi4c32p4x4_16x4_neon_dotprod, | ||
261 | }; | ||
262 | |||
263 | inline constexpr MatMulBlockwiseDynamicQuantInterface | ||
264 | kai_matmul_clamp_f32_qai8dxp4x4_qsi4c32p8x4_4x8_neon_dotprod_interface{ | ||
265 | .run_matmul = kai_run_matmul_clamp_f32_qai8dxp4x4_qsi4c32p8x4_4x8_neon_dotprod, | ||
266 | }; | ||
267 | |||
268 | inline constexpr MatMulBlockwiseDynamicQuantInterface | ||
269 | kai_matmul_clamp_f32_qai8dxp4x8_qsi4c32p4x8_8x4x32_neon_i8mm_interface{ | ||
270 | .run_matmul = kai_run_matmul_clamp_f32_qai8dxp4x8_qsi4c32p4x8_8x4x32_neon_i8mm, | ||
271 | }; | ||
272 | |||
273 | inline constexpr MatMulBlockwiseDynamicQuantInterface | ||
274 | kai_matmul_clamp_f32_qai8dxp4x8_qsi4c32p4x8_16x4x32_neon_i8mm_interface{ | ||
275 | .run_matmul = kai_run_matmul_clamp_f32_qai8dxp4x8_qsi4c32p4x8_16x4x32_neon_i8mm, | ||
276 | }; | ||
277 | |||
278 | inline constexpr MatMulBlockwiseDynamicQuantInterface | ||
279 | kai_matmul_clamp_f32_qai8dxp4x8_qsi4c32p8x8_4x8_neon_i8mm_interface{ | ||
280 | .run_matmul = kai_run_matmul_clamp_f32_qai8dxp4x8_qsi4c32p8x8_4x8_neon_i8mm, | ||
281 | }; | ||
282 | |||
283 | inline constexpr MatMulBlockwiseDynamicQuantInterface | ||
284 | kai_matmul_clamp_f32_qai8dxp4x8_qsi4c32p8x8_4x8x32_neon_i8mm_interface{ | ||
285 | .run_matmul = kai_run_matmul_clamp_f32_qai8dxp4x8_qsi4c32p8x8_4x8x32_neon_i8mm, | ||
286 | }; | ||
287 | |||
288 | // matmul_clamp_f32_qai8dxp_qsi4cxp | ||
289 | inline constexpr MatMulFloatInterface kai_matmul_clamp_f32_qai8dxp1vlx8_qsi4cxp4vlx8_1vlx4vl_sme2_mopa_interface{ | ||
290 | .run_matmul = kai_run_matmul_clamp_f32_qai8dxp1vlx8_qsi4cxp4vlx8_1vlx4vl_sme2_mopa, | ||
291 | }; | ||
292 | |||
293 | inline constexpr MatMulFloatInterface kai_matmul_clamp_f32_qai8dxp1x4_qsi4cxp4vlx4_1x4vl_sme2_sdot_interface{ | ||
294 | .run_matmul = kai_run_matmul_clamp_f32_qai8dxp1x4_qsi4cxp4vlx4_1x4vl_sme2_sdot, | ||
295 | }; | ||
296 | |||
297 | inline constexpr MatMulFloatInterface kai_matmul_clamp_f32_qai8dxp1x4_qsi4cxp4x4_1x4_neon_dotprod_interface{ | ||
298 | .run_matmul = kai_run_matmul_clamp_f32_qai8dxp1x4_qsi4cxp4x4_1x4_neon_dotprod, | ||
299 | }; | ||
300 | |||
301 | inline constexpr MatMulFloatInterface kai_matmul_clamp_f32_qai8dxp1x8_qsi4cxp4x8_1x4x32_neon_dotprod_interface{ | ||
302 | .run_matmul = kai_run_matmul_clamp_f32_qai8dxp1x8_qsi4cxp4x8_1x4x32_neon_dotprod, | ||
303 | }; | ||
304 | |||
305 | inline constexpr MatMulFloatInterface kai_matmul_clamp_f32_qai8dxp1x8_qsi4cxp8x8_1x8x32_neon_dotprod_interface{ | ||
306 | .run_matmul = kai_run_matmul_clamp_f32_qai8dxp1x8_qsi4cxp8x8_1x8x32_neon_dotprod, | ||
307 | }; | ||
308 | |||
309 | inline constexpr MatMulFloatInterface kai_matmul_clamp_f32_qai8dxp4x4_qsi4cxp8x4_8x8x32_neon_dotprod_interface{ | ||
310 | .run_matmul = kai_run_matmul_clamp_f32_qai8dxp4x4_qsi4cxp8x4_8x8x32_neon_dotprod, | ||
311 | }; | ||
312 | |||
313 | inline constexpr MatMulFloatInterface kai_matmul_clamp_f32_qai8dxp4x8_qsi4cxp4x4_16x4x32_neon_dotprod_interface{ | ||
314 | .run_matmul = kai_run_matmul_clamp_f32_qai8dxp4x8_qsi4cxp4x4_16x4x32_neon_dotprod, | ||
315 | }; | ||
316 | |||
317 | inline constexpr MatMulFloatInterface kai_matmul_clamp_f32_qai8dxp4x8_qsi4cxp4x8_4x4x32_neon_i8mm_interface{ | ||
318 | .run_matmul = kai_run_matmul_clamp_f32_qai8dxp4x8_qsi4cxp4x8_4x4x32_neon_i8mm, | ||
319 | }; | ||
320 | |||
321 | inline constexpr MatMulFloatInterface kai_matmul_clamp_f32_qai8dxp4x8_qsi4cxp4x8_8x4x32_neon_i8mm_interface{ | ||
322 | .run_matmul = kai_run_matmul_clamp_f32_qai8dxp4x8_qsi4cxp4x8_8x4x32_neon_i8mm, | ||
323 | }; | ||
324 | |||
325 | inline constexpr MatMulFloatInterface kai_matmul_clamp_f32_qai8dxp4x8_qsi4cxp8x8_4x8x32_neon_i8mm_interface{ | ||
326 | .run_matmul = kai_run_matmul_clamp_f32_qai8dxp4x8_qsi4cxp8x8_4x8x32_neon_i8mm, | ||
327 | }; | ||
328 | |||
329 | inline constexpr MatMulFloatInterface kai_matmul_clamp_f32_qai8dxp4x8_qsi4cxp8x8_8x8x32_neon_i8mm_interface{ | ||
330 | .run_matmul = kai_run_matmul_clamp_f32_qai8dxp4x8_qsi4cxp8x8_8x8x32_neon_i8mm, | ||
331 | }; | ||
332 | |||
333 | // matmul_clamp_f32_qai8dxp_qsi8cxp | ||
334 | inline constexpr MatMulFloatInterface kai_matmul_clamp_f32_qai8dxp1vlx4_qsi8cxp4vlx4_1vlx4vl_sme_mopa_interface{ | ||
335 | .run_matmul = kai_run_matmul_clamp_f32_qai8dxp1vlx4_qsi8cxp4vlx4_1vlx4vl_sme_mopa, | ||
336 | }; | ||
337 | |||
338 | inline constexpr MatMulFloatInterface kai_matmul_clamp_f32_qai8dxp1x4_qsi8cxp4vlx4_1x4vl_sme_dot_interface{ | ||
339 | .run_matmul = kai_run_matmul_clamp_f32_qai8dxp1x4_qsi8cxp4vlx4_1x4vl_sme_dot, | ||
340 | }; | ||
341 | |||
342 | inline constexpr MatMulFloatInterface kai_matmul_clamp_f32_qai8dxp1x4_qsi8cxp4x4_1x4_neon_dotprod_interface{ | ||
343 | .run_matmul = kai_run_matmul_clamp_f32_qai8dxp1x4_qsi8cxp4x4_1x4_neon_dotprod, | ||
344 | }; | ||
345 | |||
346 | inline constexpr MatMulFloatInterface kai_matmul_clamp_f32_qai8dxp1x8_qsi8cxp4x8_1x4_neon_dotprod_interface{ | ||
347 | .run_matmul = kai_run_matmul_clamp_f32_qai8dxp1x8_qsi8cxp4x8_1x4_neon_dotprod, | ||
348 | }; | ||
349 | |||
350 | inline constexpr MatMulFloatInterface kai_matmul_clamp_f32_qai8dxp4x4_qsi8cxp4x4_16x4_neon_dotprod_interface{ | ||
351 | .run_matmul = kai_run_matmul_clamp_f32_qai8dxp4x4_qsi8cxp4x4_16x4_neon_dotprod, | ||
352 | }; | ||
353 | |||
354 | inline constexpr MatMulFloatInterface kai_matmul_clamp_f32_qai8dxp4x8_qsi8cxp4x8_16x4_neon_i8mm_interface{ | ||
355 | .run_matmul = kai_run_matmul_clamp_f32_qai8dxp4x8_qsi8cxp4x8_16x4_neon_i8mm, | ||
356 | }; | ||
357 | |||
358 | inline constexpr MatMulFloatInterface kai_matmul_clamp_f32_qai8dxp1vlx4_qsi8cxp4vlx4_1vlx4vl_sme2_mopa_interface{ | ||
359 | .run_matmul = kai_run_matmul_clamp_f32_qai8dxp1vlx4_qsi8cxp4vlx4_1vlx4vl_sme2_mopa, | ||
360 | }; | ||
361 | |||
362 | inline constexpr MatMulFloatInterface kai_matmul_clamp_f32_qai8dxp1x4_qsi8cxp4vlx4_1x4vl_sme2_dot_interface{ | ||
363 | .run_matmul = kai_run_matmul_clamp_f32_qai8dxp1x4_qsi8cxp4vlx4_1x4vl_sme2_dot, | ||
364 | }; | ||
365 | |||
366 | // matmul_clamp_f32_qsi8d32p_qsi4c32p | ||
367 | inline constexpr MatMulBlockwiseDynamicQuantInterface | ||
368 | kai_matmul_clamp_f32_qsi8d32p1vlx4_qsi4c32p4vlx4_1vlx4vl_sme2_mopa_interface{ | ||
369 | .run_matmul = kai_run_matmul_clamp_f32_qsi8d32p1vlx4_qsi4c32p4vlx4_1vlx4vl_sme2_mopa, | ||
370 | }; | ||
371 | |||
372 | inline constexpr MatMulBlockwiseDynamicQuantInterface | ||
373 | kai_matmul_clamp_f32_qsi8d32p1x4_qsi4c32p4vlx4_1x4vl_sme2_sdot_interface{ | ||
374 | .run_matmul = kai_run_matmul_clamp_f32_qsi8d32p1x4_qsi4c32p4vlx4_1x4vl_sme2_sdot, | ||
375 | }; | ||
376 | |||
377 | inline constexpr MatMulBlockwiseDynamicQuantInterface | ||
378 | kai_matmul_clamp_f32_qsi8d32p1x4_qsi4c32p4x4_1x4_neon_dotprod_interface{ | ||
379 | .run_matmul = kai_run_matmul_clamp_f32_qsi8d32p1x4_qsi4c32p4x4_1x4_neon_dotprod, | ||
380 | }; | ||
381 | |||
382 | inline constexpr MatMulBlockwiseDynamicQuantInterface | ||
383 | kai_matmul_clamp_f32_qsi8d32p1x8_qsi4c32p4x8_1x4x32_neon_dotprod_interface{ | ||
384 | .run_matmul = kai_run_matmul_clamp_f32_qsi8d32p1x8_qsi4c32p4x8_1x4x32_neon_dotprod, | ||
385 | }; | ||
386 | |||
387 | inline constexpr MatMulBlockwiseDynamicQuantInterface | ||
388 | kai_matmul_clamp_f32_qsi8d32p4x4_qsi4c32p4x4_16x4_neon_dotprod_interface{ | ||
389 | .run_matmul = kai_run_matmul_clamp_f32_qsi8d32p4x4_qsi4c32p4x4_16x4_neon_dotprod, | ||
390 | }; | ||
391 | |||
392 | inline constexpr MatMulBlockwiseDynamicQuantInterface | ||
393 | kai_matmul_clamp_f32_qsi8d32p4x8_qsi4c32p4x8_8x4x32_neon_i8mm_interface{ | ||
394 | .run_matmul = kai_run_matmul_clamp_f32_qsi8d32p4x8_qsi4c32p4x8_8x4x32_neon_i8mm, | ||
395 | }; | ||
396 | |||
397 | inline constexpr MatMulBlockwiseDynamicQuantInterface | ||
398 | kai_matmul_clamp_f32_qsi8d32p4x8_qsi4c32p4x8_16x4_neon_i8mm_interface{ | ||
399 | .run_matmul = kai_run_matmul_clamp_f32_qsi8d32p4x8_qsi4c32p4x8_16x4_neon_i8mm, | ||
400 | }; | ||
401 | |||
402 | // matmul_clamp_fp32_bf16p_bf16p | ||
403 | inline constexpr MatMulBaseInterface kai_matmul_clamp_f32_bf16p2vlx2_bf16p2vlx2_2vlx2vl_sme2_mopa_interface{ | ||
404 | .run_matmul = kai_run_matmul_clamp_f32_bf16p2vlx2_bf16p2vlx2_2vlx2vl_sme2_mopa, | ||
405 | }; | ||
406 | |||
407 | // matmul_clamp_qai8_qai8_qsi8cxp | ||
408 | inline constexpr MatMulStaticQuantInterface kai_matmul_clamp_qai8_qai8_qsi8cxp2vlx4sb_1x16vl_sme2_dot_interface{ | ||
409 | .run_matmul = kai_run_matmul_clamp_qai8_qai8_qsi8cxp2vlx4sb_1x16vl_sme2_dot, | ||
410 | }; | ||
411 | |||
412 | // matmul_clamp_qai8_qai8p_qsi8cxp | ||
413 | inline constexpr MatMulStaticQuantInterface kai_matmul_clamp_qai8_qai8p2vlx4_qsi8cxpsb2vlx4_2vlx2vl_sme2_mopa_interface{ | ||
414 | .run_matmul = kai_run_matmul_clamp_qai8_qai8p2vlx4_qsi8cxpsb2vlx4_2vlx2vl_sme2_mopa, | ||
415 | }; | ||
416 | |||
417 | inline constexpr MatMulStaticQuantInterface kai_matmul_clamp_qai8_qai8p2vlx4_qsi8cxp2vlx4sb_2vlx2vl_sme_mopa_interface{ | ||
418 | .run_matmul = kai_run_matmul_clamp_qai8_qai8p2vlx4_qsi8cxp2vlx4sb_2vlx2vl_sme_mopa, | ||
419 | }; | ||
420 | |||
421 | // matmul_clamp_bf16_qai8dxp_qsi4c32p | ||
422 | inline constexpr MatMulBlockwiseDynamicQuantGenericDstInterface | ||
423 | kai_matmul_clamp_bf16_qai8dxp1x8_qsi4c32p4x8_1x4_neon_dotprod_interface{ | ||
424 | .run_matmul = kai_run_matmul_clamp_bf16_qai8dxp1x8_qsi4c32p4x8_1x4_neon_dotprod, | ||
425 | }; | ||
426 | |||
427 | inline constexpr MatMulBlockwiseDynamicQuantGenericDstInterface | ||
428 | kai_matmul_clamp_bf16_qai8dxp4x8_qsi4c32p4x8_16x4_neon_i8mm_interface{ | ||
429 | .run_matmul = kai_run_matmul_clamp_bf16_qai8dxp4x8_qsi4c32p4x8_16x4_neon_i8mm, | ||
430 | }; | ||
431 | |||
432 | // matmul_clamp_bf16_qai8dxp_qsi4cxp | ||
433 | inline constexpr MatMulBaseInterface kai_matmul_clamp_bf16_qai8dxp1x8_qsi4cxp8x8_1x8_neon_dotprod_interface{ | ||
434 | .run_matmul = kai_run_matmul_clamp_bf16_qai8dxp1x8_qsi4cxp8x8_1x8_neon_dotprod, | ||
435 | }; | ||
436 | |||
437 | inline constexpr MatMulBaseInterface kai_matmul_clamp_bf16_qai8dxp4x8_qsi4cxp8x8_8x8_neon_i8mm_interface{ | ||
438 | .run_matmul = kai_run_matmul_clamp_bf16_qai8dxp4x8_qsi4cxp8x8_8x8_neon_i8mm, | ||
439 | }; | ||
440 | |||
441 | // matmul_clamp_f16_qai8dxp_qsi4cxp | ||
442 | inline constexpr MatMulBaseInterface kai_matmul_clamp_f16_qai8dxp1x4_qsi4cxp4x4_1x4_neon_dotprod_interface{ | ||
443 | .run_matmul = kai_run_matmul_clamp_f16_qai8dxp1x4_qsi4cxp4x4_1x4_neon_dotprod, | ||
444 | }; | ||
445 | |||
446 | inline constexpr MatMulBaseInterface kai_matmul_clamp_f16_qai8dxp1x8_qsi4cxp4x8_1x4_neon_dotprod_interface{ | ||
447 | .run_matmul = kai_run_matmul_clamp_f16_qai8dxp1x8_qsi4cxp4x8_1x4_neon_dotprod, | ||
448 | }; | ||
449 | |||
450 | inline constexpr MatMulBaseInterface kai_matmul_clamp_f16_qai8dxp4x4_qsi4cxp4x4_16x4_neon_dotprod_interface{ | ||
451 | .run_matmul = kai_run_matmul_clamp_f16_qai8dxp4x4_qsi4cxp4x4_16x4_neon_dotprod, | ||
452 | }; | ||
453 | |||
454 | inline constexpr MatMulBaseInterface kai_matmul_clamp_f16_qai8dxp4x8_qsi4cxp4x8_16x4_neon_i8mm_interface{ | ||
455 | .run_matmul = kai_run_matmul_clamp_f16_qai8dxp4x8_qsi4cxp4x8_16x4_neon_i8mm, | ||
456 | }; | ||
457 | |||
458 | // matmul_clamp_f16_qai8dxp_qsi8cxp | ||
459 | inline constexpr MatMulBaseInterface kai_matmul_clamp_f16_qai8dxp1x4_qsi8cxp4x4_1x4_neon_dotprod_interface{ | ||
460 | .run_matmul = kai_run_matmul_clamp_f16_qai8dxp1x4_qsi8cxp4x4_1x4_neon_dotprod, | ||
461 | }; | ||
462 | |||
463 | inline constexpr MatMulBaseInterface kai_matmul_clamp_f16_qai8dxp1x8_qsi8cxp4x8_1x4_neon_dotprod_interface{ | ||
464 | .run_matmul = kai_run_matmul_clamp_f16_qai8dxp1x8_qsi8cxp4x8_1x4_neon_dotprod, | ||
465 | }; | ||
466 | |||
467 | inline constexpr MatMulBaseInterface kai_matmul_clamp_f16_qai8dxp4x4_qsi8cxp4x4_16x4_neon_dotprod_interface{ | ||
468 | .run_matmul = kai_run_matmul_clamp_f16_qai8dxp4x4_qsi8cxp4x4_16x4_neon_dotprod, | ||
469 | }; | ||
470 | |||
471 | inline constexpr MatMulBaseInterface kai_matmul_clamp_f16_qai8dxp4x8_qsi8cxp4x8_16x4_neon_i8mm_interface{ | ||
472 | .run_matmul = kai_run_matmul_clamp_f16_qai8dxp4x8_qsi8cxp4x8_16x4_neon_i8mm, | ||
473 | }; | ||
474 | |||
475 | // matmul_clamp_f16_qsi8d32p_qai4c32p | ||
476 | inline constexpr MatMulBlockwiseDynamicQuantGenericDstInterface | ||
477 | kai_matmul_clamp_f16_qsi8d32p1vlx4_qai4c32p4vlx4_1vlx4vl_sme2_mopa_interface{ | ||
478 | .run_matmul = kai_run_matmul_clamp_f16_qsi8d32p1vlx4_qai4c32p4vlx4_1vlx4vl_sme2_mopa, | ||
479 | }; | ||
480 | |||
481 | inline constexpr MatMulBlockwiseDynamicQuantGenericDstInterface | ||
482 | kai_matmul_clamp_f16_qsi8d32p1x4_qai4c32p4vlx4_1x4vl_sme2_dot_interface{ | ||
483 | .run_matmul = kai_run_matmul_clamp_f16_qsi8d32p1x4_qai4c32p4vlx4_1x4vl_sme2_dot, | ||
484 | }; | ||
485 | |||
486 | inline constexpr MatMulBlockwiseDynamicQuantGenericDstInterface | ||
487 | kai_matmul_clamp_f16_qsi8d32p1x4_qai4c32p4x4_1x4_neon_dotprod_interface{ | ||
488 | .run_matmul = kai_run_matmul_clamp_f16_qsi8d32p1x4_qai4c32p4x4_1x4_neon_dotprod, | ||
489 | }; | ||
490 | |||
491 | inline constexpr MatMulBlockwiseDynamicQuantGenericDstInterface | ||
492 | kai_matmul_clamp_f16_qsi8d32p1x8_qai4c32p4x8_1x4_neon_dotprod_interface{ | ||
493 | .run_matmul = kai_run_matmul_clamp_f16_qsi8d32p1x8_qai4c32p4x8_1x4_neon_dotprod, | ||
494 | }; | ||
495 | |||
496 | inline constexpr MatMulBlockwiseDynamicQuantGenericDstInterface | ||
497 | kai_matmul_clamp_f16_qsi8d32p4x4_qai4c32p4x4_8x4_neon_dotprod_interface{ | ||
498 | .run_matmul = kai_run_matmul_clamp_f16_qsi8d32p4x4_qai4c32p4x4_8x4_neon_dotprod, | ||
499 | }; | ||
500 | |||
501 | inline constexpr MatMulBlockwiseDynamicQuantGenericDstInterface | ||
502 | kai_matmul_clamp_f16_qsi8d32p4x8_qai4c32p4x8_8x4_neon_i8mm_interface{ | ||
503 | .run_matmul = kai_run_matmul_clamp_f16_qsi8d32p4x8_qai4c32p4x8_8x4_neon_i8mm, | ||
504 | }; | ||
505 | |||
506 | // matmul_clamp_f32_qsi8d32p_qai4c32p | ||
507 | inline constexpr MatMulBlockwiseDynamicQuantInterface | ||
508 | kai_matmul_clamp_f32_qsi8d32p1vlx4_qai4c32p4vlx4_1vlx4vl_sme2_mopa_interface{ | ||
509 | .run_matmul = kai_run_matmul_clamp_f32_qsi8d32p1vlx4_qai4c32p4vlx4_1vlx4vl_sme2_mopa, | ||
510 | }; | ||
511 | |||
512 | inline constexpr MatMulBlockwiseDynamicQuantInterface | ||
513 | kai_matmul_clamp_f32_qsi8d32p1x4_qai4c32p4vlx4_1x4vl_sme2_dot_interface{ | ||
514 | .run_matmul = kai_run_matmul_clamp_f32_qsi8d32p1x4_qai4c32p4vlx4_1x4vl_sme2_dot, | ||
515 | }; | ||
516 | |||
517 | inline constexpr MatMulBlockwiseDynamicQuantInterface | ||
518 | kai_matmul_clamp_f32_qsi8d32p1x4_qai4c32p4x4_1x4_neon_dotprod_interface{ | ||
519 | .run_matmul = kai_run_matmul_clamp_f32_qsi8d32p1x4_qai4c32p4x4_1x4_neon_dotprod, | ||
520 | }; | ||
521 | |||
522 | inline constexpr MatMulBlockwiseDynamicQuantInterface | ||
523 | kai_matmul_clamp_f32_qsi8d32p1x8_qai4c32p4x8_1x4_neon_dotprod_interface{ | ||
524 | .run_matmul = kai_run_matmul_clamp_f32_qsi8d32p1x8_qai4c32p4x8_1x4_neon_dotprod, | ||
525 | }; | ||
526 | |||
527 | inline constexpr MatMulBlockwiseDynamicQuantInterface | ||
528 | kai_matmul_clamp_f32_qsi8d32p4x4_qai4c32p4x4_8x4_neon_dotprod_interface{ | ||
529 | .run_matmul = kai_run_matmul_clamp_f32_qsi8d32p4x4_qai4c32p4x4_8x4_neon_dotprod, | ||
530 | }; | ||
531 | |||
532 | inline constexpr MatMulBlockwiseDynamicQuantInterface | ||
533 | kai_matmul_clamp_f32_qsi8d32p4x8_qai4c32p4x8_8x4_neon_i8mm_interface{ | ||
534 | .run_matmul = kai_run_matmul_clamp_f32_qsi8d32p4x8_qai4c32p4x8_8x4_neon_i8mm, | ||
535 | }; | ||
536 | |||
537 | inline const std::array matmul_benchmarks{ | ||
538 | // matmul_clamp_f16_bf16p_bf16p | ||
539 | RegisterBenchmark( | ||
540 | "kai_matmul_clamp_f16_bf16p8x4_bf16p12x4b_8x12_neon_mmla", kai_benchmark_matmul<MatMulBaseInterface>, | ||
541 | kai_matmul_clamp_f16_bf16p8x4_bf16p12x4b_8x12_neon_mmla_interface, DataType::FP16, MatMulOp::GEMM, | ||
542 | test::cpu_has_bf16), | ||
543 | |||
544 | // matmul_clamp_f16_f16_f16p | ||
545 | RegisterBenchmark( | ||
546 | "kai_matmul_clamp_f16_f16_f16p2vlx2b_1x8vl_sme_mla", kai_benchmark_matmul<MatMulStridedLhsInterface>, | ||
547 | kai_matmul_clamp_f16_f16_f16p2vlx2b_1x8vl_sme_mla_interface, DataType::FP16, MatMulOp::GEMV, test::cpu_has_sme), | ||
548 | RegisterBenchmark( | ||
549 | "kai_matmul_clamp_f16_f16_f16p2vlx2b_1x16vl_sme2_dot", kai_benchmark_matmul<MatMulStridedLhsInterface>, | ||
550 | kai_matmul_clamp_f16_f16_f16p2vlx2b_1x16vl_sme2_dot_interface, DataType::FP16, MatMulOp::GEMV, | ||
551 | test::cpu_has_sme2), | ||
552 | RegisterBenchmark( | ||
553 | "kai_matmul_clamp_f16_f16_f16p16x1biasf16_6x16x8_neon_mla", kai_benchmark_matmul<MatMulStridedLhsInterface>, | ||
554 | kai_matmul_clamp_f16_f16_f16p16x1biasf16_6x16x8_neon_mla_interface, DataType::FP16, MatMulOp::GEMM, | ||
555 | test::cpu_has_fp16), | ||
556 | RegisterBenchmark( | ||
557 | "kai_matmul_clamp_f16_f16_f16p32x1b_6x32_neon_mla", kai_benchmark_matmul<MatMulStridedLhsInterface>, | ||
558 | kai_matmul_clamp_f16_f16_f16p32x1b_6x32_neon_mla_interface, DataType::FP16, MatMulOp::GEMM, test::cpu_has_fp16), | ||
559 | RegisterBenchmark( | ||
560 | "kai_matmul_clamp_f16_f16_f16p32x1b_6x32_neon_mla_cortexa55", kai_benchmark_matmul<MatMulStridedLhsInterface>, | ||
561 | kai_matmul_clamp_f16_f16_f16p32x1b_6x32_neon_mla_cortexa55_interface, DataType::FP16, MatMulOp::GEMM, | ||
562 | test::cpu_has_fp16), | ||
563 | |||
564 | // matmul_clamp_f16_f16p_f16p | ||
565 | RegisterBenchmark( | ||
566 | "kai_matmul_clamp_f16_f16p2vlx2_f16p2vlx2_2vlx2vl_sme2_mopa", kai_benchmark_matmul<MatMulBaseInterface>, | ||
567 | kai_matmul_clamp_f16_f16p2vlx2_f16p2vlx2_2vlx2vl_sme2_mopa_interface, DataType::FP16, MatMulOp::GEMM, | ||
568 | test::cpu_has_sme2), | ||
569 | RegisterBenchmark( | ||
570 | "kai_matmul_clamp_f16_f16p2vlx2_f16p2vlx2b_2vlx2vl_sme_mopa", kai_benchmark_matmul<MatMulBaseInterface>, | ||
571 | kai_matmul_clamp_f16_f16p2vlx2_f16p2vlx2b_2vlx2vl_sme_mopa_interface, DataType::FP16, MatMulOp::GEMM, | ||
572 | test::cpu_has_sme), | ||
573 | |||
574 | // matmul_clamp_f32_bf16p_bf16p | ||
575 | RegisterBenchmark( | ||
576 | "kai_matmul_clamp_f32_bf16p1x4_bf16p12x4b_1x36_neon_dot", kai_benchmark_matmul<MatMulBaseInterface>, | ||
577 | kai_matmul_clamp_f32_bf16p1x4_bf16p12x4b_1x36_neon_dot_interface, DataType::FP32, MatMulOp::GEMV, | ||
578 | test::cpu_has_dotprod), | ||
579 | RegisterBenchmark( | ||
580 | "kai_matmul_clamp_f32_bf16p8x4_bf16p12x4b_8x12_neon_mmla", kai_benchmark_matmul<MatMulBaseInterface>, | ||
581 | kai_matmul_clamp_f32_bf16p8x4_bf16p12x4b_8x12_neon_mmla_interface, DataType::FP32, MatMulOp::GEMM, | ||
582 | test::cpu_has_i8mm), | ||
583 | |||
584 | // matmul_clamp_f32_f32_f32p | ||
585 | RegisterBenchmark( | ||
586 | "kai_matmul_clamp_f32_f32_f32p2vlx1b_1x16vl_sme2_mla", kai_benchmark_matmul<MatMulStridedLhsInterface>, | ||
587 | kai_matmul_clamp_f32_f32_f32p2vlx1b_1x16vl_sme2_mla_interface, DataType::FP32, MatMulOp::GEMV, | ||
588 | test::cpu_has_sme2), | ||
589 | RegisterBenchmark( | ||
590 | "kai_matmul_clamp_f32_f32_f32p2vlx1b_1x8vl_sme_mla", kai_benchmark_matmul<MatMulStridedLhsInterface>, | ||
591 | kai_matmul_clamp_f32_f32_f32p2vlx1b_1x8vl_sme_mla_interface, DataType::FP32, MatMulOp::GEMV, test::cpu_has_sme), | ||
592 | RegisterBenchmark( | ||
593 | "kai_matmul_clamp_f32_f32_f32p8x1biasf32_6x8x4_neon_mla", kai_benchmark_matmul<MatMulStridedLhsInterface>, | ||
594 | kai_matmul_clamp_f32_f32_f32p8x1biasf32_6x8x4_neon_mla_interface, DataType::FP32, MatMulOp::GEMM, | ||
595 | test::cpu_has_advsimd), | ||
596 | RegisterBenchmark( | ||
597 | "kai_matmul_clamp_f32_f32_f32p16x1b_6x16_neon_mla", kai_benchmark_matmul<MatMulStridedLhsInterface>, | ||
598 | kai_matmul_clamp_f32_f32_f32p16x1b_6x16_neon_mla_interface, DataType::FP32, MatMulOp::GEMM, | ||
599 | test::cpu_has_advsimd), | ||
600 | RegisterBenchmark( | ||
601 | "kai_matmul_clamp_f32_f32_f32p16x1b_6x16_neon_mla_cortexa55", kai_benchmark_matmul<MatMulStridedLhsInterface>, | ||
602 | kai_matmul_clamp_f32_f32_f32p16x1b_6x16_neon_mla_cortexa55_interface, DataType::FP32, MatMulOp::GEMM, | ||
603 | test::cpu_has_advsimd), | ||
604 | RegisterBenchmark( | ||
605 | "kai_matmul_clamp_f32_f32_f32p16vlx1b_1x16vl_sme2_mla", kai_benchmark_matmul<MatMulStridedLhsInterface>, | ||
606 | kai_matmul_clamp_f32_f32_f32p16vlx1b_1x16vl_sme2_mla_interface, DataType::FP32, MatMulOp::GEMV, | ||
607 | test::cpu_has_sme2), | ||
608 | |||
609 | // matmul_clamp_f32_f32p_f32p | ||
610 | RegisterBenchmark( | ||
611 | "kai_matmul_clamp_f32_f32p2vlx1_f32p2vlx1biasf32_sme2_mopa", kai_benchmark_matmul<MatMulBaseInterface>, | ||
612 | kai_matmul_clamp_f32_f32p2vlx1_f32p2vlx1biasf32_sme2_mopa_interface, DataType::FP32, MatMulOp::GEMM, | ||
613 | test::cpu_has_sme2), | ||
614 | RegisterBenchmark( | ||
615 | "kai_matmul_clamp_f32_f32p2vlx1_f32p2vlx1b_2vlx2vl_sme_mopa", kai_benchmark_matmul<MatMulBaseInterface>, | ||
616 | kai_matmul_clamp_f32_f32p2vlx1_f32p2vlx1b_2vlx2vl_sme_mopa_interface, DataType::FP32, MatMulOp::GEMM, | ||
617 | test::cpu_has_sme), | ||
618 | |||
619 | // matmul_clamp_f32_qai8dxp_qsi4c32p | ||
620 | RegisterBenchmark( | ||
621 | "kai_matmul_clamp_f32_qai8dxp1x4_qsi4c32p4x4_1x4_neon_dotprod", | ||
622 | kai_benchmark_matmul<MatMulBlockwiseDynamicQuantInterface>, | ||
623 | kai_matmul_clamp_f32_qai8dxp1x4_qsi4c32p4x4_1x4_neon_dotprod_interface, DataType::FP32, MatMulOp::GEMV, | ||
624 | test::cpu_has_dotprod), | ||
625 | RegisterBenchmark( | ||
626 | "kai_matmul_clamp_f32_qai8dxp1x4_qsi4c32p8x4_1x8_neon_dotprod", | ||
627 | kai_benchmark_matmul<MatMulBlockwiseDynamicQuantInterface>, | ||
628 | kai_matmul_clamp_f32_qai8dxp1x4_qsi4c32p8x4_1x8_neon_dotprod_interface, DataType::FP32, MatMulOp::GEMV, | ||
629 | test::cpu_has_dotprod), | ||
630 | RegisterBenchmark( | ||
631 | "kai_matmul_clamp_f32_qai8dxp1x8_qsi4c32p4x8_1x4x32_neon_dotprod", | ||
632 | kai_benchmark_matmul<MatMulBlockwiseDynamicQuantInterface>, | ||
633 | kai_matmul_clamp_f32_qai8dxp1x8_qsi4c32p4x8_1x4x32_neon_dotprod_interface, DataType::FP32, MatMulOp::GEMV, | ||
634 | test::cpu_has_dotprod), | ||
635 | RegisterBenchmark( | ||
636 | "kai_matmul_clamp_f32_qai8dxp1x8_qsi4c32p8x8_1x8x32_neon_dotprod", | ||
637 | kai_benchmark_matmul<MatMulBlockwiseDynamicQuantInterface>, | ||
638 | kai_matmul_clamp_f32_qai8dxp1x8_qsi4c32p8x8_1x8x32_neon_dotprod_interface, DataType::FP32, MatMulOp::GEMV, | ||
639 | test::cpu_has_dotprod), | ||
640 | RegisterBenchmark( | ||
641 | "kai_matmul_clamp_f32_qai8dxp1x8_qsi4c32p8x8_1x8_neon_dotprod", | ||
642 | kai_benchmark_matmul<MatMulBlockwiseDynamicQuantInterface>, | ||
643 | kai_matmul_clamp_f32_qai8dxp1x8_qsi4c32p8x8_1x8_neon_dotprod_interface, DataType::FP32, MatMulOp::GEMV, | ||
644 | test::cpu_has_dotprod), | ||
645 | RegisterBenchmark( | ||
646 | "kai_matmul_clamp_f32_qai8dxp4x4_qsi4c32p4x4_16x4_neon_dotprod", | ||
647 | kai_benchmark_matmul<MatMulBlockwiseDynamicQuantInterface>, | ||
648 | kai_matmul_clamp_f32_qai8dxp4x4_qsi4c32p4x4_16x4_neon_dotprod_interface, DataType::FP32, MatMulOp::GEMM, | ||
649 | test::cpu_has_dotprod), | ||
650 | RegisterBenchmark( | ||
651 | "kai_matmul_clamp_f32_qai8dxp4x4_qsi4c32p8x4_4x8_neon_dotprod", | ||
652 | kai_benchmark_matmul<MatMulBlockwiseDynamicQuantInterface>, | ||
653 | kai_matmul_clamp_f32_qai8dxp4x4_qsi4c32p8x4_4x8_neon_dotprod_interface, DataType::FP32, MatMulOp::GEMM, | ||
654 | test::cpu_has_dotprod), | ||
655 | RegisterBenchmark( | ||
656 | "kai_matmul_clamp_f32_qai8dxp4x8_qsi4c32p4x8_8x4x32_neon_i8mm", | ||
657 | kai_benchmark_matmul<MatMulBlockwiseDynamicQuantInterface>, | ||
658 | kai_matmul_clamp_f32_qai8dxp4x8_qsi4c32p4x8_8x4x32_neon_i8mm_interface, DataType::FP32, MatMulOp::GEMM, | ||
659 | test::cpu_has_i8mm), | ||
660 | RegisterBenchmark( | ||
661 | "kai_matmul_clamp_f32_qai8dxp4x8_qsi4c32p4x8_16x4x32_neon_i8mm", | ||
662 | kai_benchmark_matmul<MatMulBlockwiseDynamicQuantInterface>, | ||
663 | kai_matmul_clamp_f32_qai8dxp4x8_qsi4c32p4x8_16x4x32_neon_i8mm_interface, DataType::FP32, MatMulOp::GEMM, | ||
664 | test::cpu_has_i8mm), | ||
665 | RegisterBenchmark( | ||
666 | "kai_matmul_clamp_f32_qai8dxp4x8_qsi4c32p8x8_4x8_neon_i8mm", | ||
667 | kai_benchmark_matmul<MatMulBlockwiseDynamicQuantInterface>, | ||
668 | kai_matmul_clamp_f32_qai8dxp4x8_qsi4c32p8x8_4x8_neon_i8mm_interface, DataType::FP32, MatMulOp::GEMM, | ||
669 | test::cpu_has_i8mm), | ||
670 | RegisterBenchmark( | ||
671 | "kai_matmul_clamp_f32_qai8dxp4x8_qsi4c32p8x8_4x8x32_neon_i8mm", | ||
672 | kai_benchmark_matmul<MatMulBlockwiseDynamicQuantInterface>, | ||
673 | kai_matmul_clamp_f32_qai8dxp4x8_qsi4c32p8x8_4x8x32_neon_i8mm_interface, DataType::FP32, MatMulOp::GEMM, | ||
674 | test::cpu_has_i8mm), | ||
675 | |||
676 | // matmul_clamp_f32_qai8dxp_qsi4cxp | ||
677 | RegisterBenchmark( | ||
678 | "kai_matmul_clamp_f32_qai8dxp1vlx8_qsi4cxp4vlx8_1vlx4vl_sme2_mopa", kai_benchmark_matmul<MatMulFloatInterface>, | ||
679 | kai_matmul_clamp_f32_qai8dxp1vlx8_qsi4cxp4vlx8_1vlx4vl_sme2_mopa_interface, DataType::FP32, MatMulOp::GEMM, | ||
680 | test::cpu_has_sme2), | ||
681 | RegisterBenchmark( | ||
682 | "kai_matmul_clamp_f32_qai8dxp1x4_qsi4cxp4vlx4_1x4vl_sme2_sdot", kai_benchmark_matmul<MatMulFloatInterface>, | ||
683 | kai_matmul_clamp_f32_qai8dxp1x4_qsi4cxp4vlx4_1x4vl_sme2_sdot_interface, DataType::FP32, MatMulOp::GEMV, | ||
684 | test::cpu_has_sme2), | ||
685 | RegisterBenchmark( | ||
686 | "kai_matmul_clamp_f32_qai8dxp1x4_qsi4cxp4x4_1x4_neon_dotprod", kai_benchmark_matmul<MatMulFloatInterface>, | ||
687 | kai_matmul_clamp_f32_qai8dxp1x4_qsi4cxp4x4_1x4_neon_dotprod_interface, DataType::FP32, MatMulOp::GEMV, | ||
688 | test::cpu_has_dotprod), | ||
689 | RegisterBenchmark( | ||
690 | "kai_matmul_clamp_f32_qai8dxp1x8_qsi4cxp4x8_1x4x32_neon_dotprod", kai_benchmark_matmul<MatMulFloatInterface>, | ||
691 | kai_matmul_clamp_f32_qai8dxp1x8_qsi4cxp4x8_1x4x32_neon_dotprod_interface, DataType::FP32, MatMulOp::GEMV, | ||
692 | test::cpu_has_dotprod), | ||
693 | RegisterBenchmark( | ||
694 | "kai_matmul_clamp_f32_qai8dxp1x8_qsi4cxp8x8_1x8x32_neon_dotprod", kai_benchmark_matmul<MatMulFloatInterface>, | ||
695 | kai_matmul_clamp_f32_qai8dxp1x8_qsi4cxp8x8_1x8x32_neon_dotprod_interface, DataType::FP32, MatMulOp::GEMV, | ||
696 | test::cpu_has_dotprod), | ||
697 | RegisterBenchmark( | ||
698 | "kai_matmul_clamp_f32_qai8dxp4x4_qsi4cxp8x4_8x8x32_neon_dotprod", kai_benchmark_matmul<MatMulFloatInterface>, | ||
699 | kai_matmul_clamp_f32_qai8dxp4x4_qsi4cxp8x4_8x8x32_neon_dotprod_interface, DataType::FP32, MatMulOp::GEMM, | ||
700 | test::cpu_has_dotprod), | ||
701 | RegisterBenchmark( | ||
702 | "kai_matmul_clamp_f32_qai8dxp4x8_qsi4cxp4x4_16x4x32_neon_dotprod", kai_benchmark_matmul<MatMulFloatInterface>, | ||
703 | kai_matmul_clamp_f32_qai8dxp4x8_qsi4cxp4x4_16x4x32_neon_dotprod_interface, DataType::FP32, MatMulOp::GEMM, | ||
704 | test::cpu_has_dotprod), | ||
705 | RegisterBenchmark( | ||
706 | "kai_matmul_clamp_f32_qai8dxp4x8_qsi4cxp4x8_4x4x32_neon_i8mm", kai_benchmark_matmul<MatMulFloatInterface>, | ||
707 | kai_matmul_clamp_f32_qai8dxp4x8_qsi4cxp4x8_4x4x32_neon_i8mm_interface, DataType::FP32, MatMulOp::GEMM, | ||
708 | test::cpu_has_i8mm), | ||
709 | RegisterBenchmark( | ||
710 | "kai_matmul_clamp_f32_qai8dxp4x8_qsi4cxp4x8_8x4x32_neon_i8mm", kai_benchmark_matmul<MatMulFloatInterface>, | ||
711 | kai_matmul_clamp_f32_qai8dxp4x8_qsi4cxp4x8_8x4x32_neon_i8mm_interface, DataType::FP32, MatMulOp::GEMM, | ||
712 | test::cpu_has_i8mm), | ||
713 | RegisterBenchmark( | ||
714 | "kai_matmul_clamp_f32_qai8dxp4x8_qsi4cxp8x8_4x8x32_neon_i8mm", kai_benchmark_matmul<MatMulFloatInterface>, | ||
715 | kai_matmul_clamp_f32_qai8dxp4x8_qsi4cxp8x8_4x8x32_neon_i8mm_interface, DataType::FP32, MatMulOp::GEMM, | ||
716 | test::cpu_has_i8mm), | ||
717 | RegisterBenchmark( | ||
718 | "kai_matmul_clamp_f32_qai8dxp4x8_qsi4cxp8x8_8x8x32_neon_i8mm", kai_benchmark_matmul<MatMulFloatInterface>, | ||
719 | kai_matmul_clamp_f32_qai8dxp4x8_qsi4cxp8x8_8x8x32_neon_i8mm_interface, DataType::FP32, MatMulOp::GEMM, | ||
720 | test::cpu_has_i8mm), | ||
721 | |||
722 | // matmul_clamp_f32_qai8dxp_qsi8cxp | ||
723 | RegisterBenchmark( | ||
724 | "kai_matmul_clamp_f32_qai8dxp1vlx4_qsi8cxp4vlx4_1vlx4vl_sme_mopa", kai_benchmark_matmul<MatMulFloatInterface>, | ||
725 | kai_matmul_clamp_f32_qai8dxp1vlx4_qsi8cxp4vlx4_1vlx4vl_sme_mopa_interface, DataType::FP32, MatMulOp::GEMM, | ||
726 | test::cpu_has_sme), | ||
727 | RegisterBenchmark( | ||
728 | "kai_matmul_clamp_f32_qai8dxp1x4_qsi8cxp4vlx4_1x4vl_sme_dot", kai_benchmark_matmul<MatMulFloatInterface>, | ||
729 | kai_matmul_clamp_f32_qai8dxp1x4_qsi8cxp4vlx4_1x4vl_sme_dot_interface, DataType::FP32, MatMulOp::GEMV, | ||
730 | test::cpu_has_sme), | ||
731 | RegisterBenchmark( | ||
732 | "kai_matmul_clamp_f32_qai8dxp1x4_qsi8cxp4x4_1x4_neon_dotprod", kai_benchmark_matmul<MatMulFloatInterface>, | ||
733 | kai_matmul_clamp_f32_qai8dxp1x4_qsi8cxp4x4_1x4_neon_dotprod_interface, DataType::FP32, MatMulOp::GEMV, | ||
734 | test::cpu_has_dotprod), | ||
735 | RegisterBenchmark( | ||
736 | "kai_matmul_clamp_f32_qai8dxp1x8_qsi8cxp4x8_1x4_neon_dotprod", kai_benchmark_matmul<MatMulFloatInterface>, | ||
737 | kai_matmul_clamp_f32_qai8dxp1x8_qsi8cxp4x8_1x4_neon_dotprod_interface, DataType::FP32, MatMulOp::GEMV, | ||
738 | test::cpu_has_dotprod), | ||
739 | RegisterBenchmark( | ||
740 | "kai_matmul_clamp_f32_qai8dxp4x4_qsi8cxp4x4_16x4_neon_dotprod", kai_benchmark_matmul<MatMulFloatInterface>, | ||
741 | kai_matmul_clamp_f32_qai8dxp4x4_qsi8cxp4x4_16x4_neon_dotprod_interface, DataType::FP32, MatMulOp::GEMM, | ||
742 | test::cpu_has_dotprod), | ||
743 | RegisterBenchmark( | ||
744 | "kai_matmul_clamp_f32_qai8dxp4x8_qsi8cxp4x8_16x4_neon_i8mm", kai_benchmark_matmul<MatMulFloatInterface>, | ||
745 | kai_matmul_clamp_f32_qai8dxp4x8_qsi8cxp4x8_16x4_neon_i8mm_interface, DataType::FP32, MatMulOp::GEMM, | ||
746 | test::cpu_has_i8mm), | ||
747 | RegisterBenchmark( | ||
748 | "kai_matmul_clamp_f32_qai8dxp1vlx4_qsi8cxp4vlx4_1vlx4vl_sme2_mopa", kai_benchmark_matmul<MatMulFloatInterface>, | ||
749 | kai_matmul_clamp_f32_qai8dxp1vlx4_qsi8cxp4vlx4_1vlx4vl_sme2_mopa_interface, DataType::FP32, MatMulOp::GEMM, | ||
750 | test::cpu_has_sme2), | ||
751 | RegisterBenchmark( | ||
752 | "kai_matmul_clamp_f32_qai8dxp1x4_qsi8cxp4vlx4_1x4vl_sme2_dot", kai_benchmark_matmul<MatMulFloatInterface>, | ||
753 | kai_matmul_clamp_f32_qai8dxp1x4_qsi8cxp4vlx4_1x4vl_sme2_dot_interface, DataType::FP32, MatMulOp::GEMV, | ||
754 | test::cpu_has_sme2), | ||
755 | |||
756 | // matmul_clamp_f32_qsi8d32p_qsi4c32p | ||
757 | RegisterBenchmark( | ||
758 | "kai_matmul_clamp_f32_qsi8d32p1vlx4_qsi4c32p4vlx4_1vlx4vl_sme2_mopa", | ||
759 | kai_benchmark_matmul<MatMulBlockwiseDynamicQuantInterface>, | ||
760 | kai_matmul_clamp_f32_qsi8d32p1vlx4_qsi4c32p4vlx4_1vlx4vl_sme2_mopa_interface, DataType::FP32, MatMulOp::GEMM, | ||
761 | test::cpu_has_sme2), | ||
762 | RegisterBenchmark( | ||
763 | "kai_matmul_clamp_f32_qsi8d32p1x4_qsi4c32p4vlx4_1x4vl_sme2_sdot", | ||
764 | kai_benchmark_matmul<MatMulBlockwiseDynamicQuantInterface>, | ||
765 | kai_matmul_clamp_f32_qsi8d32p1x4_qsi4c32p4vlx4_1x4vl_sme2_sdot_interface, DataType::FP32, MatMulOp::GEMV, | ||
766 | test::cpu_has_sme2), | ||
767 | RegisterBenchmark( | ||
768 | "kai_matmul_clamp_f32_qsi8d32p1x4_qsi4c32p4x4_1x4_neon_dotprod", | ||
769 | kai_benchmark_matmul<MatMulBlockwiseDynamicQuantInterface>, | ||
770 | kai_matmul_clamp_f32_qsi8d32p1x4_qsi4c32p4x4_1x4_neon_dotprod_interface, DataType::FP32, MatMulOp::GEMV, | ||
771 | test::cpu_has_dotprod), | ||
772 | RegisterBenchmark( | ||
773 | "kai_matmul_clamp_f32_qsi8d32p1x8_qsi4c32p4x8_1x4x32_neon_dotprod", | ||
774 | kai_benchmark_matmul<MatMulBlockwiseDynamicQuantInterface>, | ||
775 | kai_matmul_clamp_f32_qsi8d32p1x8_qsi4c32p4x8_1x4x32_neon_dotprod_interface, DataType::FP32, MatMulOp::GEMV, | ||
776 | test::cpu_has_dotprod), | ||
777 | RegisterBenchmark( | ||
778 | "kai_matmul_clamp_f32_qsi8d32p4x4_qsi4c32p4x4_16x4_neon_dotprod", | ||
779 | kai_benchmark_matmul<MatMulBlockwiseDynamicQuantInterface>, | ||
780 | kai_matmul_clamp_f32_qsi8d32p4x4_qsi4c32p4x4_16x4_neon_dotprod_interface, DataType::FP32, MatMulOp::GEMM, | ||
781 | test::cpu_has_dotprod), | ||
782 | RegisterBenchmark( | ||
783 | "kai_matmul_clamp_f32_qsi8d32p4x8_qsi4c32p4x8_8x4x32_neon_i8mm", | ||
784 | kai_benchmark_matmul<MatMulBlockwiseDynamicQuantInterface>, | ||
785 | kai_matmul_clamp_f32_qsi8d32p4x8_qsi4c32p4x8_8x4x32_neon_i8mm_interface, DataType::FP32, MatMulOp::GEMM, | ||
786 | test::cpu_has_i8mm), | ||
787 | RegisterBenchmark( | ||
788 | "kai_matmul_clamp_f32_qsi8d32p4x8_qsi4c32p4x8_16x4_neon_i8mm", | ||
789 | kai_benchmark_matmul<MatMulBlockwiseDynamicQuantInterface>, | ||
790 | kai_matmul_clamp_f32_qsi8d32p4x8_qsi4c32p4x8_16x4_neon_i8mm_interface, DataType::FP32, MatMulOp::GEMM, | ||
791 | test::cpu_has_i8mm), | ||
792 | |||
793 | // matmul_clamp_fp32_bf16p_bf16p | ||
794 | RegisterBenchmark( | ||
795 | "kai_matmul_clamp_f32_bf16p2vlx2_bf16p2vlx2_2vlx2vl_sme2_mopa", kai_benchmark_matmul<MatMulBaseInterface>, | ||
796 | kai_matmul_clamp_f32_bf16p2vlx2_bf16p2vlx2_2vlx2vl_sme2_mopa_interface, DataType::FP32, MatMulOp::GEMM, | ||
797 | test::cpu_has_sme2), | ||
798 | |||
799 | // matmul_clamp_qai8_qai8_qsi8cxp | ||
800 | RegisterBenchmark( | ||
801 | "kai_matmul_clamp_qai8_qai8_qsi8cxp2vlx4sb_1x16vl_sme2_dot", kai_benchmark_matmul<MatMulStaticQuantInterface>, | ||
802 | kai_matmul_clamp_qai8_qai8_qsi8cxp2vlx4sb_1x16vl_sme2_dot_interface, DataType::QAI8, MatMulOp::GEMV, | ||
803 | test::cpu_has_sme2), | ||
804 | |||
805 | // matmul_clamp_qai8_qai8p_qsi8cxp | ||
806 | RegisterBenchmark( | ||
807 | "kai_matmul_clamp_qai8_qai8p2vlx4_qsi8cxpsb2vlx4_2vlx2vl_sme2_mopa", | ||
808 | kai_benchmark_matmul<MatMulStaticQuantInterface>, | ||
809 | kai_matmul_clamp_qai8_qai8p2vlx4_qsi8cxpsb2vlx4_2vlx2vl_sme2_mopa_interface, DataType::QAI8, MatMulOp::GEMM, | ||
810 | test::cpu_has_sme2), | ||
811 | RegisterBenchmark( | ||
812 | "kai_matmul_clamp_qai8_qai8p2vlx4_qsi8cxp2vlx4sb_2vlx2vl_sme_mopa", | ||
813 | kai_benchmark_matmul<MatMulStaticQuantInterface>, | ||
814 | kai_matmul_clamp_qai8_qai8p2vlx4_qsi8cxp2vlx4sb_2vlx2vl_sme_mopa_interface, DataType::QAI8, MatMulOp::GEMM, | ||
815 | test::cpu_has_sme), | ||
816 | |||
817 | // matmul_clamp_bf16_qai8dxp_qsi4c32p | ||
818 | RegisterBenchmark( | ||
819 | "kai_matmul_clamp_bf16_qai8dxp1x8_qsi4c32p4x8_1x4_neon_dotprod", | ||
820 | kai_benchmark_matmul<MatMulBlockwiseDynamicQuantGenericDstInterface>, | ||
821 | kai_matmul_clamp_bf16_qai8dxp1x8_qsi4c32p4x8_1x4_neon_dotprod_interface, DataType::BF16, MatMulOp::GEMV, | ||
822 | test::cpu_has_dotprod_and_bf16), | ||
823 | RegisterBenchmark( | ||
824 | "kai_matmul_clamp_bf16_qai8dxp4x8_qsi4c32p4x8_16x4_neon_i8mm", | ||
825 | kai_benchmark_matmul<MatMulBlockwiseDynamicQuantGenericDstInterface>, | ||
826 | kai_matmul_clamp_bf16_qai8dxp4x8_qsi4c32p4x8_16x4_neon_i8mm_interface, DataType::BF16, MatMulOp::GEMM, | ||
827 | test::cpu_has_i8mm_and_bf16), | ||
828 | |||
829 | // matmul_clamp_bf16_qai8dxp_qsi4cxp | ||
830 | RegisterBenchmark( | ||
831 | "kai_matmul_clamp_bf16_qai8dxp1x8_qsi4cxp8x8_1x8_neon_dotprod", kai_benchmark_matmul<MatMulBaseInterface>, | ||
832 | kai_matmul_clamp_bf16_qai8dxp1x8_qsi4cxp8x8_1x8_neon_dotprod_interface, DataType::BF16, MatMulOp::GEMV, | ||
833 | test::cpu_has_dotprod_and_bf16), | ||
834 | RegisterBenchmark( | ||
835 | "kai_matmul_clamp_bf16_qai8dxp4x8_qsi4cxp8x8_8x8_neon_i8mm", kai_benchmark_matmul<MatMulBaseInterface>, | ||
836 | kai_matmul_clamp_bf16_qai8dxp4x8_qsi4cxp8x8_8x8_neon_i8mm_interface, DataType::BF16, MatMulOp::GEMM, | ||
837 | test::cpu_has_i8mm_and_bf16), | ||
838 | RegisterBenchmark( | ||
839 | "kai_matmul_clamp_f16_qai8dxp1x4_qsi4cxp4x4_1x4_neon_dotprod", kai_benchmark_matmul<MatMulBaseInterface>, | ||
840 | kai_matmul_clamp_f16_qai8dxp1x4_qsi4cxp4x4_1x4_neon_dotprod_interface, DataType::FP16, MatMulOp::GEMV, | ||
841 | test::cpu_has_dotprod_and_fp16), | ||
842 | RegisterBenchmark( | ||
843 | "kai_matmul_clamp_f16_qai8dxp1x8_qsi4cxp4x8_1x4_neon_dotprod", kai_benchmark_matmul<MatMulBaseInterface>, | ||
844 | kai_matmul_clamp_f16_qai8dxp1x8_qsi4cxp4x8_1x4_neon_dotprod_interface, DataType::FP16, MatMulOp::GEMV, | ||
845 | test::cpu_has_dotprod_and_fp16), | ||
846 | RegisterBenchmark( | ||
847 | "kai_matmul_clamp_f16_qai8dxp4x4_qsi4cxp4x4_16x4_neon_dotprod", kai_benchmark_matmul<MatMulBaseInterface>, | ||
848 | kai_matmul_clamp_f16_qai8dxp4x4_qsi4cxp4x4_16x4_neon_dotprod_interface, DataType::FP16, MatMulOp::GEMM, | ||
849 | test::cpu_has_dotprod_and_fp16), | ||
850 | RegisterBenchmark( | ||
851 | "kai_matmul_clamp_f16_qai8dxp4x8_qsi4cxp4x8_16x4_neon_i8mm", kai_benchmark_matmul<MatMulBaseInterface>, | ||
852 | kai_matmul_clamp_f16_qai8dxp4x8_qsi4cxp4x8_16x4_neon_i8mm_interface, DataType::FP16, MatMulOp::GEMM, | ||
853 | test::cpu_has_i8mm_and_fp16), | ||
854 | |||
855 | // matmul_clamp_f16_qai8dxp_qsi8cxp | ||
856 | RegisterBenchmark( | ||
857 | "kai_matmul_clamp_f16_qai8dxp1x4_qsi8cxp4x4_1x4_neon_dotprod", kai_benchmark_matmul<MatMulBaseInterface>, | ||
858 | kai_matmul_clamp_f16_qai8dxp1x4_qsi8cxp4x4_1x4_neon_dotprod_interface, DataType::FP16, MatMulOp::GEMV, | ||
859 | test::cpu_has_dotprod_and_fp16), | ||
860 | RegisterBenchmark( | ||
861 | "kai_matmul_clamp_f16_qai8dxp1x8_qsi8cxp4x8_1x4_neon_dotprod", kai_benchmark_matmul<MatMulBaseInterface>, | ||
862 | kai_matmul_clamp_f16_qai8dxp1x8_qsi8cxp4x8_1x4_neon_dotprod_interface, DataType::FP16, MatMulOp::GEMV, | ||
863 | test::cpu_has_dotprod_and_fp16), | ||
864 | RegisterBenchmark( | ||
865 | "kai_matmul_clamp_f16_qai8dxp4x4_qsi8cxp4x4_16x4_neon_dotprod", kai_benchmark_matmul<MatMulBaseInterface>, | ||
866 | kai_matmul_clamp_f16_qai8dxp4x4_qsi8cxp4x4_16x4_neon_dotprod_interface, DataType::FP16, MatMulOp::GEMM, | ||
867 | test::cpu_has_dotprod_and_fp16), | ||
868 | RegisterBenchmark( | ||
869 | "kai_matmul_clamp_f16_qai8dxp4x8_qsi8cxp4x8_16x4_neon_i8mm", kai_benchmark_matmul<MatMulBaseInterface>, | ||
870 | kai_matmul_clamp_f16_qai8dxp4x8_qsi8cxp4x8_16x4_neon_i8mm_interface, DataType::FP16, MatMulOp::GEMM, | ||
871 | test::cpu_has_i8mm_and_fp16), | ||
872 | |||
873 | // matmul_clamp_f16_qsi8d32p_qai4c32p | ||
874 | RegisterBenchmark( | ||
875 | "kai_matmul_clamp_f16_qsi8d32p1vlx4_qai4c32p4vlx4_1vlx4vl_sme2_mopa", | ||
876 | kai_benchmark_matmul<MatMulBlockwiseDynamicQuantGenericDstInterface>, | ||
877 | kai_matmul_clamp_f16_qsi8d32p1vlx4_qai4c32p4vlx4_1vlx4vl_sme2_mopa_interface, DataType::FP16, MatMulOp::GEMM, | ||
878 | test::cpu_has_sme2), | ||
879 | RegisterBenchmark( | ||
880 | "kai_matmul_clamp_f16_qsi8d32p1x4_qai4c32p4vlx4_1x4vl_sme2_dot", | ||
881 | kai_benchmark_matmul<MatMulBlockwiseDynamicQuantGenericDstInterface>, | ||
882 | kai_matmul_clamp_f16_qsi8d32p1x4_qai4c32p4vlx4_1x4vl_sme2_dot_interface, DataType::FP16, MatMulOp::GEMV, | ||
883 | test::cpu_has_sme2), | ||
884 | RegisterBenchmark( | ||
885 | "kai_matmul_clamp_f16_qsi8d32p1x4_qai4c32p4x4_1x4_neon_dotprod", | ||
886 | kai_benchmark_matmul<MatMulBlockwiseDynamicQuantGenericDstInterface>, | ||
887 | kai_matmul_clamp_f16_qsi8d32p1x4_qai4c32p4x4_1x4_neon_dotprod_interface, DataType::FP16, MatMulOp::GEMV, | ||
888 | test::cpu_has_dotprod_and_fp16), | ||
889 | RegisterBenchmark( | ||
890 | "kai_matmul_clamp_f16_qsi8d32p1x8_qai4c32p4x8_1x4_neon_dotprod", | ||
891 | kai_benchmark_matmul<MatMulBlockwiseDynamicQuantGenericDstInterface>, | ||
892 | kai_matmul_clamp_f16_qsi8d32p1x8_qai4c32p4x8_1x4_neon_dotprod_interface, DataType::FP16, MatMulOp::GEMV, | ||
893 | test::cpu_has_dotprod_and_fp16), | ||
894 | RegisterBenchmark( | ||
895 | "kai_matmul_clamp_f16_qsi8d32p4x4_qai4c32p4x4_8x4_neon_dotprod", | ||
896 | kai_benchmark_matmul<MatMulBlockwiseDynamicQuantGenericDstInterface>, | ||
897 | kai_matmul_clamp_f16_qsi8d32p4x4_qai4c32p4x4_8x4_neon_dotprod_interface, DataType::FP16, MatMulOp::GEMM, | ||
898 | test::cpu_has_dotprod_and_fp16), | ||
899 | RegisterBenchmark( | ||
900 | "kai_matmul_clamp_f16_qsi8d32p4x8_qai4c32p4x8_8x4_neon_i8mm", | ||
901 | kai_benchmark_matmul<MatMulBlockwiseDynamicQuantGenericDstInterface>, | ||
902 | kai_matmul_clamp_f16_qsi8d32p4x8_qai4c32p4x8_8x4_neon_i8mm_interface, DataType::FP16, MatMulOp::GEMM, | ||
903 | test::cpu_has_i8mm_and_fp16), | ||
904 | |||
905 | // matmul_clamp_f32_qsi8d32p_qai4c32p | ||
906 | RegisterBenchmark( | ||
907 | "kai_matmul_clamp_f32_qsi8d32p1vlx4_qai4c32p4vlx4_1vlx4vl_sme2_mopa", | ||
908 | kai_benchmark_matmul<MatMulBlockwiseDynamicQuantInterface>, | ||
909 | kai_matmul_clamp_f32_qsi8d32p1vlx4_qai4c32p4vlx4_1vlx4vl_sme2_mopa_interface, DataType::FP32, MatMulOp::GEMM, | ||
910 | test::cpu_has_sme2), | ||
911 | RegisterBenchmark( | ||
912 | "kai_matmul_clamp_f32_qsi8d32p1x4_qai4c32p4vlx4_1x4vl_sme2_dot", | ||
913 | kai_benchmark_matmul<MatMulBlockwiseDynamicQuantInterface>, | ||
914 | kai_matmul_clamp_f32_qsi8d32p1x4_qai4c32p4vlx4_1x4vl_sme2_dot_interface, DataType::FP32, MatMulOp::GEMV, | ||
915 | test::cpu_has_sme2), | ||
916 | RegisterBenchmark( | ||
917 | "kai_matmul_clamp_f32_qsi8d32p1x4_qai4c32p4x4_1x4_neon_dotprod", | ||
918 | kai_benchmark_matmul<MatMulBlockwiseDynamicQuantInterface>, | ||
919 | kai_matmul_clamp_f32_qsi8d32p1x4_qai4c32p4x4_1x4_neon_dotprod_interface, DataType::FP32, MatMulOp::GEMV, | ||
920 | test::cpu_has_dotprod), | ||
921 | RegisterBenchmark( | ||
922 | "kai_matmul_clamp_f32_qsi8d32p1x8_qai4c32p4x8_1x4_neon_dotprod", | ||
923 | kai_benchmark_matmul<MatMulBlockwiseDynamicQuantInterface>, | ||
924 | kai_matmul_clamp_f32_qsi8d32p1x8_qai4c32p4x8_1x4_neon_dotprod_interface, DataType::FP32, MatMulOp::GEMV, | ||
925 | test::cpu_has_dotprod), | ||
926 | RegisterBenchmark( | ||
927 | "kai_matmul_clamp_f32_qsi8d32p4x4_qai4c32p4x4_8x4_neon_dotprod", | ||
928 | kai_benchmark_matmul<MatMulBlockwiseDynamicQuantInterface>, | ||
929 | kai_matmul_clamp_f32_qsi8d32p4x4_qai4c32p4x4_8x4_neon_dotprod_interface, DataType::FP32, MatMulOp::GEMM, | ||
930 | test::cpu_has_dotprod), | ||
931 | RegisterBenchmark( | ||
932 | "kai_matmul_clamp_f32_qsi8d32p4x8_qai4c32p4x8_8x4_neon_i8mm", | ||
933 | kai_benchmark_matmul<MatMulBlockwiseDynamicQuantInterface>, | ||
934 | kai_matmul_clamp_f32_qsi8d32p4x8_qai4c32p4x8_8x4_neon_i8mm_interface, DataType::FP32, MatMulOp::GEMM, | ||
935 | test::cpu_has_i8mm), | ||
936 | |||
937 | }; | ||
938 | |||
939 | 1 | void RegisterMatMulBenchmarks(const MatMulShape& shape, const size_t bl) { | |
940 |
2/2✓ Branch 0 taken 83 times.
✓ Branch 1 taken 1 time.
|
84 | for (const auto& benchmark : matmul_benchmarks) { |
941 | 166 | benchmark | |
942 |
1/2✓ Branch 0 taken 83 times.
✗ Branch 1 not taken.
|
83 | ->Args( |
943 | 166 | {static_cast<int64_t>(shape.m), static_cast<int64_t>(shape.n), static_cast<int64_t>(shape.k), | |
944 | 83 | static_cast<int64_t>(bl)}) | |
945 |
6/12✓ Branch 0 taken 83 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 83 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 83 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 83 times.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✓ Branch 9 taken 83 times.
✗ Branch 10 not taken.
✓ Branch 11 taken 83 times.
|
83 | ->ArgNames({"m", "n", "k", "bl"}); |
946 | 83 | } | |
947 | 1 | } | |
948 | } // namespace kai::benchmark | ||
949 |