benchmark/matmul/matmul_registry.cpp
| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | // | ||
| 2 | // SPDX-FileCopyrightText: Copyright 2025 Arm Limited and/or its affiliates <open-source-office@arm.com> | ||
| 3 | // | ||
| 4 | // SPDX-License-Identifier: Apache-2.0 | ||
| 5 | // | ||
| 6 | |||
| 7 | #include "matmul_registry.hpp" | ||
| 8 | |||
| 9 | #include <array> | ||
| 10 | #include <cstddef> | ||
| 11 | #include <cstdint> | ||
| 12 | #include <test/common/cpu_info.hpp> | ||
| 13 | #include <test/common/data_type.hpp> | ||
| 14 | |||
| 15 | #include "matmul_benchmark_logic.hpp" | ||
| 16 | #include "matmul_interface.hpp" | ||
| 17 | |||
| 18 | #ifdef __GNUC__ | ||
| 19 | #pragma GCC diagnostic push | ||
| 20 | #pragma GCC diagnostic ignored "-Wswitch-default" | ||
| 21 | #endif // __GNUC__ | ||
| 22 | |||
| 23 | #include <benchmark/benchmark.h> | ||
| 24 | |||
| 25 | #ifdef __GNUC__ | ||
| 26 | #pragma GCC diagnostic pop | ||
| 27 | #endif // __GNUC__ | ||
| 28 | |||
| 29 | // Micro-kernels to register for benchmarking | ||
| 30 | |||
| 31 | // matmul_clamp_f16_bf16p_bf16p | ||
| 32 | #include "kai/ukernels/matmul/matmul_clamp_f16_bf16p_bf16p/kai_matmul_clamp_f16_bf16p8x4_bf16p12x4b_8x12_neon_mmla.h" | ||
| 33 | |||
| 34 | // matmul_clamp_f16_f16_f16p | ||
| 35 | #include "kai/ukernels/matmul/matmul_clamp_f16_f16_f16p/kai_matmul_clamp_f16_f16_f16p16x1biasf16_6x16x8_neon_mla.h" | ||
| 36 | #include "kai/ukernels/matmul/matmul_clamp_f16_f16_f16p/kai_matmul_clamp_f16_f16_f16p2vlx2b_1x16vl_sme2_dot.h" | ||
| 37 | #include "kai/ukernels/matmul/matmul_clamp_f16_f16_f16p/kai_matmul_clamp_f16_f16_f16p2vlx2b_1x8vl_sme_mla.h" | ||
| 38 | #include "kai/ukernels/matmul/matmul_clamp_f16_f16_f16p/kai_matmul_clamp_f16_f16_f16p32x1b_6x32_neon_mla.h" | ||
| 39 | #include "kai/ukernels/matmul/matmul_clamp_f16_f16_f16p/kai_matmul_clamp_f16_f16_f16p32x1b_6x32_neon_mla_cortexa55.h" | ||
| 40 | |||
| 41 | // matmul_clamp_f16_f16p_f16p | ||
| 42 | #include "kai/ukernels/matmul/matmul_clamp_f16_f16p_f16p/kai_matmul_clamp_f16_f16p2vlx2_f16p2vlx2_2vlx2vl_sme2_mopa.h" | ||
| 43 | #include "kai/ukernels/matmul/matmul_clamp_f16_f16p_f16p/kai_matmul_clamp_f16_f16p2vlx2_f16p2vlx2b_2vlx2vl_sme_mopa.h" | ||
| 44 | |||
| 45 | // matmul_clamp_f32_bf16p_bf16p | ||
| 46 | #include "kai/ukernels/matmul/matmul_clamp_f32_bf16p_bf16p/kai_matmul_clamp_f32_bf16p1x4_bf16p12x4b_1x36_neon_dot.h" | ||
| 47 | #include "kai/ukernels/matmul/matmul_clamp_f32_bf16p_bf16p/kai_matmul_clamp_f32_bf16p8x4_bf16p12x4b_8x12_neon_mmla.h" | ||
| 48 | |||
| 49 | // matmul_clamp_f32_f32_f32p | ||
| 50 | #include "kai/ukernels/matmul/matmul_clamp_f32_f32_f32p/kai_matmul_clamp_f32_f32_f32p16vlx1b_1x16vl_sme2_mla.h" | ||
| 51 | #include "kai/ukernels/matmul/matmul_clamp_f32_f32_f32p/kai_matmul_clamp_f32_f32_f32p16x1b_6x16_neon_mla.h" | ||
| 52 | #include "kai/ukernels/matmul/matmul_clamp_f32_f32_f32p/kai_matmul_clamp_f32_f32_f32p16x1b_6x16_neon_mla_cortexa55.h" | ||
| 53 | #include "kai/ukernels/matmul/matmul_clamp_f32_f32_f32p/kai_matmul_clamp_f32_f32_f32p2vlx1b_1x16vl_sme2_mla.h" | ||
| 54 | #include "kai/ukernels/matmul/matmul_clamp_f32_f32_f32p/kai_matmul_clamp_f32_f32_f32p2vlx1b_1x8vl_sme_mla.h" | ||
| 55 | #include "kai/ukernels/matmul/matmul_clamp_f32_f32_f32p/kai_matmul_clamp_f32_f32_f32p4vlx1b_6x4vl_sve_mla.h" | ||
| 56 | #include "kai/ukernels/matmul/matmul_clamp_f32_f32_f32p/kai_matmul_clamp_f32_f32_f32p8x1biasf32_6x8x4_neon_mla.h" | ||
| 57 | |||
| 58 | // matmul_clamp_f32_f32p_f32p | ||
| 59 | #include "kai/ukernels/matmul/matmul_clamp_f32_f32p_f32p/kai_matmul_clamp_f32_f32p2vlx1_f32p2vlx1b_2vlx2vl_sme_mopa.h" | ||
| 60 | #include "kai/ukernels/matmul/matmul_clamp_f32_f32p_f32p/kai_matmul_clamp_f32_f32p2vlx1_f32p2vlx1biasf32_sme2_mopa.h" | ||
| 61 | |||
| 62 | // matmul_clamp_f32_qai8dxp_qsi4c32p | ||
| 63 | #include "kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp1vlx4_qsi4c32p4vlx4_1vlx4vl_sme2_mopa.h" | ||
| 64 | #include "kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp1x4_qsi4c32p4vlx4_1x4vl_sme2_dot.h" | ||
| 65 | #include "kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp1x4_qsi4c32p4x4_1x4_neon_dotprod.h" | ||
| 66 | #include "kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp1x4_qsi4c32p8x4_1x8_neon_dotprod.h" | ||
| 67 | #include "kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp1x8_qsi4c32p4x8_1x4x32_neon_dotprod.h" | ||
| 68 | #include "kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp1x8_qsi4c32p8x8_1x8_neon_dotprod.h" | ||
| 69 | #include "kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp1x8_qsi4c32p8x8_1x8x32_neon_dotprod.h" | ||
| 70 | #include "kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp4x4_qsi4c32p4x4_16x4_neon_dotprod.h" | ||
| 71 | #include "kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp4x4_qsi4c32p8x4_4x8_neon_dotprod.h" | ||
| 72 | #include "kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp4x8_qsi4c32p4x8_16x4x32_neon_i8mm.h" | ||
| 73 | #include "kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp4x8_qsi4c32p4x8_8x4x32_neon_i8mm.h" | ||
| 74 | #include "kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp4x8_qsi4c32p8x8_4x8_neon_i8mm.h" | ||
| 75 | #include "kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp4x8_qsi4c32p8x8_4x8x32_neon_i8mm.h" | ||
| 76 | |||
| 77 | // matmul_clamp_f32_qai8dxp_qsi4cxp | ||
| 78 | #include "kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp1vlx8_qsi4cxp4vlx8_1vlx4vl_sme2_mopa.h" | ||
| 79 | #include "kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp1x4_qsi4cxp4vlx4_1x4vl_sme2_sdot.h" | ||
| 80 | #include "kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp1x4_qsi4cxp4x4_1x4_neon_dotprod.h" | ||
| 81 | #include "kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp1x8_qsi4cxp4x8_1x4x32_neon_dotprod.h" | ||
| 82 | #include "kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp1x8_qsi4cxp8x8_1x8x32_neon_dotprod.h" | ||
| 83 | #include "kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp4x4_qsi4cxp8x4_8x8x32_neon_dotprod.h" | ||
| 84 | #include "kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp4x8_qsi4cxp4x4_16x4x32_neon_dotprod.h" | ||
| 85 | #include "kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp4x8_qsi4cxp4x8_4x4x32_neon_i8mm.h" | ||
| 86 | #include "kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp4x8_qsi4cxp4x8_8x4x32_neon_i8mm.h" | ||
| 87 | #include "kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp4x8_qsi4cxp8x8_4x8x32_neon_i8mm.h" | ||
| 88 | #include "kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp4x8_qsi4cxp8x8_8x8x32_neon_i8mm.h" | ||
| 89 | |||
| 90 | // matmul_clamp_f32_qai8dxp_qsi8cxp | ||
| 91 | #include "kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi8cxp/kai_matmul_clamp_f32_qai8dxp1vlx4_qsi8cxp4vlx4_1vlx4vl_sme2_mopa.h" | ||
| 92 | #include "kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi8cxp/kai_matmul_clamp_f32_qai8dxp1vlx4_qsi8cxp4vlx4_1vlx4vl_sme_mopa.h" | ||
| 93 | #include "kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi8cxp/kai_matmul_clamp_f32_qai8dxp1x4_qsi8cxp4vlx4_1x4vl_sme2_dot.h" | ||
| 94 | #include "kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi8cxp/kai_matmul_clamp_f32_qai8dxp1x4_qsi8cxp4vlx4_1x4vl_sme_dot.h" | ||
| 95 | #include "kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi8cxp/kai_matmul_clamp_f32_qai8dxp1x4_qsi8cxp4x4_1x4_neon_dotprod.h" | ||
| 96 | #include "kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi8cxp/kai_matmul_clamp_f32_qai8dxp1x8_qsi8cxp4x8_1x4_neon_dotprod.h" | ||
| 97 | #include "kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi8cxp/kai_matmul_clamp_f32_qai8dxp4x4_qsi8cxp4x4_16x4_neon_dotprod.h" | ||
| 98 | #include "kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi8cxp/kai_matmul_clamp_f32_qai8dxp4x8_qsi8cxp4x8_16x4_neon_i8mm.h" | ||
| 99 | |||
| 100 | // matmul_clamp_f32_qsi8d32p_qsi4c32p | ||
| 101 | #include "kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p1vlx4_qsi4c32p4vlx4_1vlx4vl_sme2_mopa.h" | ||
| 102 | #include "kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p1x4_qsi4c32p4vlx4_1x4vl_sme2_sdot.h" | ||
| 103 | #include "kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p1x4_qsi4c32p4x4_1x4_neon_dotprod.h" | ||
| 104 | #include "kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p1x4_qsi4c32p8x4_1x8_sve_dotprod.h" | ||
| 105 | #include "kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p1x8_qsi4c32p4x8_1x4x32_neon_dotprod.h" | ||
| 106 | #include "kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p1x8_qsi4c32p8x8_1x8_sve_dotprod.h" | ||
| 107 | #include "kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p4x4_qsi4c32p4x4_16x4_neon_dotprod.h" | ||
| 108 | #include "kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p4x8_qsi4c32p4x8_16x4_neon_i8mm.h" | ||
| 109 | #include "kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p4x8_qsi4c32p4x8_8x4x32_neon_i8mm.h" | ||
| 110 | #include "kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p4x8_qsi4c32p8x8_16x8_sve_i8mm.h" | ||
| 111 | |||
| 112 | // matmul_clamp_fp32_bf16p_bf16p | ||
| 113 | #include "kai/ukernels/matmul/matmul_clamp_fp32_bf16p_bf16p/kai_matmul_clamp_f32_bf16p2vlx2_bf16p2vlx2_2vlx2vl_sme2_mopa.h" | ||
| 114 | |||
| 115 | // matmul_clamp_qai8_qai8_qsi8cxp | ||
| 116 | #include "kai/ukernels/matmul/matmul_clamp_qai8_qai8_qsi8cxp/kai_matmul_clamp_qai8_qai8_qsi8cxp2vlx4sb_1x16vl_sme2_dot.h" | ||
| 117 | |||
| 118 | // matmul_clamp_qai8_qai8p_qsi8cxp | ||
| 119 | #include "kai/ukernels/matmul/matmul_clamp_qai8_qai8p_qsi8cxp/kai_matmul_clamp_qai8_qai8p2vlx4_qsi8cxp2vlx4sb_2vlx2vl_sme_mopa.h" | ||
| 120 | #include "kai/ukernels/matmul/matmul_clamp_qai8_qai8p_qsi8cxp/kai_matmul_clamp_qai8_qai8p2vlx4_qsi8cxpsb2vlx4_2vlx2vl_sme2_mopa.h" | ||
| 121 | |||
| 122 | // matmul_clamp_f16_qai8dxp_qsi4cxp | ||
| 123 | #include "kai/ukernels/matmul/matmul_clamp_f16_qai8dxp_qsi4cxp/kai_matmul_clamp_f16_qai8dxp1x4_qsi4cxp4x4_1x4_neon_dotprod.h" | ||
| 124 | #include "kai/ukernels/matmul/matmul_clamp_f16_qai8dxp_qsi4cxp/kai_matmul_clamp_f16_qai8dxp1x8_qsi4cxp4x8_1x4_neon_dotprod.h" | ||
| 125 | #include "kai/ukernels/matmul/matmul_clamp_f16_qai8dxp_qsi4cxp/kai_matmul_clamp_f16_qai8dxp4x4_qsi4cxp4x4_16x4_neon_dotprod.h" | ||
| 126 | #include "kai/ukernels/matmul/matmul_clamp_f16_qai8dxp_qsi4cxp/kai_matmul_clamp_f16_qai8dxp4x8_qsi4cxp4x8_16x4_neon_i8mm.h" | ||
| 127 | |||
| 128 | // matmul_clamp_f16_qai8dxp_qsi8cxp | ||
| 129 | #include "kai/ukernels/matmul/matmul_clamp_f16_qai8dxp_qsi8cxp/kai_matmul_clamp_f16_qai8dxp1x4_qsi8cxp4x4_1x4_neon_dotprod.h" | ||
| 130 | #include "kai/ukernels/matmul/matmul_clamp_f16_qai8dxp_qsi8cxp/kai_matmul_clamp_f16_qai8dxp1x8_qsi8cxp4x8_1x4_neon_dotprod.h" | ||
| 131 | #include "kai/ukernels/matmul/matmul_clamp_f16_qai8dxp_qsi8cxp/kai_matmul_clamp_f16_qai8dxp4x4_qsi8cxp4x4_16x4_neon_dotprod.h" | ||
| 132 | #include "kai/ukernels/matmul/matmul_clamp_f16_qai8dxp_qsi8cxp/kai_matmul_clamp_f16_qai8dxp4x8_qsi8cxp4x8_16x4_neon_i8mm.h" | ||
| 133 | |||
| 134 | // matmul_clamp_f16_qsi8d32p_qai4c32p | ||
| 135 | #include "kai/ukernels/matmul/matmul_clamp_f16_qsi8d32p_qai4c32p/kai_matmul_clamp_f16_qsi8d32p1vlx4_qai4c32p4vlx4_1vlx4vl_sme2_mopa.h" | ||
| 136 | #include "kai/ukernels/matmul/matmul_clamp_f16_qsi8d32p_qai4c32p/kai_matmul_clamp_f16_qsi8d32p1x4_qai4c32p4vlx4_1x4vl_sme2_dot.h" | ||
| 137 | #include "kai/ukernels/matmul/matmul_clamp_f16_qsi8d32p_qai4c32p/kai_matmul_clamp_f16_qsi8d32p1x4_qai4c32p4x4_1x4_neon_dotprod.h" | ||
| 138 | #include "kai/ukernels/matmul/matmul_clamp_f16_qsi8d32p_qai4c32p/kai_matmul_clamp_f16_qsi8d32p1x8_qai4c32p4x8_1x4_neon_dotprod.h" | ||
| 139 | #include "kai/ukernels/matmul/matmul_clamp_f16_qsi8d32p_qai4c32p/kai_matmul_clamp_f16_qsi8d32p4x4_qai4c32p4x4_8x4_neon_dotprod.h" | ||
| 140 | #include "kai/ukernels/matmul/matmul_clamp_f16_qsi8d32p_qai4c32p/kai_matmul_clamp_f16_qsi8d32p4x8_qai4c32p4x8_8x4_neon_i8mm.h" | ||
| 141 | |||
| 142 | // matmul_clamp_f32_qsi8d32p_qai4c32p | ||
| 143 | #include "kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qai4c32p/kai_matmul_clamp_f32_qsi8d32p1vlx4_qai4c32p4vlx4_1vlx4vl_sme2_mopa.h" | ||
| 144 | #include "kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qai4c32p/kai_matmul_clamp_f32_qsi8d32p1x4_qai4c32p4vlx4_1x4vl_sme2_dot.h" | ||
| 145 | #include "kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qai4c32p/kai_matmul_clamp_f32_qsi8d32p1x4_qai4c32p4x4_1x4_neon_dotprod.h" | ||
| 146 | #include "kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qai4c32p/kai_matmul_clamp_f32_qsi8d32p1x8_qai4c32p4x8_1x4_neon_dotprod.h" | ||
| 147 | #include "kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qai4c32p/kai_matmul_clamp_f32_qsi8d32p4x4_qai4c32p4x4_8x4_neon_dotprod.h" | ||
| 148 | #include "kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qai4c32p/kai_matmul_clamp_f32_qsi8d32p4x8_qai4c32p4x8_8x4_neon_i8mm.h" | ||
| 149 | |||
| 150 | // matmul_clamp_bf16_qai8dxp_qsi4c32p | ||
| 151 | #include "kai/ukernels/matmul/matmul_clamp_bf16_qai8dxp_qsi4c32p/kai_matmul_clamp_bf16_qai8dxp1x8_qsi4c32p4x8_1x4_neon_dotprod.h" | ||
| 152 | #include "kai/ukernels/matmul/matmul_clamp_bf16_qai8dxp_qsi4c32p/kai_matmul_clamp_bf16_qai8dxp4x8_qsi4c32p4x8_16x4_neon_i8mm.h" | ||
| 153 | |||
| 154 | // matmul_clamp_bf16_qai8dxp_qsi4cxp | ||
| 155 | #include "kai/ukernels/matmul/matmul_clamp_bf16_qai8dxp_qsi4cxp/kai_matmul_clamp_bf16_qai8dxp1x8_qsi4cxp8x8_1x8_neon_dotprod.h" | ||
| 156 | #include "kai/ukernels/matmul/matmul_clamp_bf16_qai8dxp_qsi4cxp/kai_matmul_clamp_bf16_qai8dxp4x8_qsi4cxp8x8_8x8_neon_i8mm.h" | ||
| 157 | |||
| 158 | namespace kai::benchmark { | ||
| 159 | using DataType = test::DataType; | ||
| 160 | |||
| 161 | // matmul_clamp_f16_bf16p_bf16p | ||
| 162 | inline constexpr MatMulBaseInterface kai_matmul_clamp_f16_bf16p8x4_bf16p12x4b_8x12_neon_mmla_interface{ | ||
| 163 | .run_matmul = kai_run_matmul_clamp_f16_bf16p8x4_bf16p12x4b_8x12_neon_mmla, | ||
| 164 | }; | ||
| 165 | |||
| 166 | // matmul_clamp_f16_f16_f16p | ||
| 167 | inline constexpr MatMulStridedLhsInterface kai_matmul_clamp_f16_f16_f16p2vlx2b_1x8vl_sme_mla_interface{ | ||
| 168 | .run_matmul = kai_run_matmul_clamp_f16_f16_f16p2vlx2b_1x8vl_sme_mla, | ||
| 169 | }; | ||
| 170 | |||
| 171 | inline constexpr MatMulStridedLhsInterface kai_matmul_clamp_f16_f16_f16p2vlx2b_1x16vl_sme2_dot_interface{ | ||
| 172 | .run_matmul = kai_run_matmul_clamp_f16_f16_f16p2vlx2b_1x16vl_sme2_dot, | ||
| 173 | }; | ||
| 174 | |||
| 175 | inline constexpr MatMulStridedLhsInterface kai_matmul_clamp_f16_f16_f16p16x1biasf16_6x16x8_neon_mla_interface{ | ||
| 176 | .run_matmul = kai_run_matmul_clamp_f16_f16_f16p16x1biasf16_6x16x8_neon_mla, | ||
| 177 | }; | ||
| 178 | |||
| 179 | inline constexpr MatMulStridedLhsInterface kai_matmul_clamp_f16_f16_f16p32x1b_6x32_neon_mla_interface{ | ||
| 180 | .run_matmul = kai_run_matmul_clamp_f16_f16_f16p32x1b_6x32_neon_mla, | ||
| 181 | }; | ||
| 182 | |||
| 183 | inline constexpr MatMulStridedLhsInterface kai_matmul_clamp_f16_f16_f16p32x1b_6x32_neon_mla_cortexa55_interface{ | ||
| 184 | .run_matmul = kai_run_matmul_clamp_f16_f16_f16p32x1b_6x32_neon_mla_cortexa55, | ||
| 185 | }; | ||
| 186 | |||
| 187 | // matmul_clamp_f16_f16p_f16p | ||
| 188 | inline constexpr MatMulBaseInterface kai_matmul_clamp_f16_f16p2vlx2_f16p2vlx2_2vlx2vl_sme2_mopa_interface{ | ||
| 189 | .run_matmul = kai_run_matmul_clamp_f16_f16p2vlx2_f16p2vlx2_2vlx2vl_sme2_mopa, | ||
| 190 | }; | ||
| 191 | |||
| 192 | inline constexpr MatMulBaseInterface kai_matmul_clamp_f16_f16p2vlx2_f16p2vlx2b_2vlx2vl_sme_mopa_interface{ | ||
| 193 | .run_matmul = kai_run_matmul_clamp_f16_f16p2vlx2_f16p2vlx2b_2vlx2vl_sme_mopa, | ||
| 194 | }; | ||
| 195 | |||
| 196 | // matmul_clamp_f32_bf16p_bf16p | ||
| 197 | inline constexpr MatMulBaseInterface kai_matmul_clamp_f32_bf16p1x4_bf16p12x4b_1x36_neon_dot_interface{ | ||
| 198 | .run_matmul = kai_run_matmul_clamp_f32_bf16p1x4_bf16p12x4b_1x36_neon_dot, | ||
| 199 | }; | ||
| 200 | |||
| 201 | inline constexpr MatMulBaseInterface kai_matmul_clamp_f32_bf16p8x4_bf16p12x4b_8x12_neon_mmla_interface{ | ||
| 202 | .run_matmul = kai_run_matmul_clamp_f32_bf16p8x4_bf16p12x4b_8x12_neon_mmla, | ||
| 203 | }; | ||
| 204 | |||
| 205 | // matmul_clamp_f32_f32_f32p | ||
| 206 | inline constexpr MatMulStridedLhsInterface kai_matmul_clamp_f32_f32_f32p2vlx1b_1x16vl_sme2_mla_interface{ | ||
| 207 | .run_matmul = kai_run_matmul_clamp_f32_f32_f32p2vlx1b_1x16vl_sme2_mla, | ||
| 208 | }; | ||
| 209 | |||
| 210 | inline constexpr MatMulStridedLhsInterface kai_matmul_clamp_f32_f32_f32p2vlx1b_1x8vl_sme_mla_interface{ | ||
| 211 | .run_matmul = kai_run_matmul_clamp_f32_f32_f32p2vlx1b_1x8vl_sme_mla, | ||
| 212 | }; | ||
| 213 | |||
| 214 | inline constexpr MatMulStridedLhsInterface kai_matmul_clamp_f32_f32_f32p8x1biasf32_6x8x4_neon_mla_interface{ | ||
| 215 | .run_matmul = kai_run_matmul_clamp_f32_f32_f32p8x1biasf32_6x8x4_neon_mla, | ||
| 216 | }; | ||
| 217 | |||
| 218 | inline constexpr MatMulStridedLhsInterface kai_matmul_clamp_f32_f32_f32p16x1b_6x16_neon_mla_interface{ | ||
| 219 | .run_matmul = kai_run_matmul_clamp_f32_f32_f32p16x1b_6x16_neon_mla, | ||
| 220 | }; | ||
| 221 | |||
| 222 | inline constexpr MatMulStridedLhsInterface kai_matmul_clamp_f32_f32_f32p16x1b_6x16_neon_mla_cortexa55_interface{ | ||
| 223 | .run_matmul = kai_run_matmul_clamp_f32_f32_f32p16x1b_6x16_neon_mla_cortexa55, | ||
| 224 | }; | ||
| 225 | |||
| 226 | inline constexpr MatMulStridedLhsInterface kai_matmul_clamp_f32_f32_f32p16vlx1b_1x16vl_sme2_mla_interface{ | ||
| 227 | .run_matmul = kai_run_matmul_clamp_f32_f32_f32p16vlx1b_1x16vl_sme2_mla, | ||
| 228 | }; | ||
| 229 | |||
| 230 | inline constexpr MatMulStridedLhsInterface kai_matmul_clamp_f32_f32_f32p4vlx1b_6x4vl_sve_mla_interface{ | ||
| 231 | .run_matmul = kai_run_matmul_clamp_f32_f32_f32p4vlx1b_6x4vl_sve_mla, | ||
| 232 | }; | ||
| 233 | |||
| 234 | // matmul_clamp_f32_f32p_f32p | ||
| 235 | inline constexpr MatMulBaseInterface kai_matmul_clamp_f32_f32p2vlx1_f32p2vlx1biasf32_sme2_mopa_interface{ | ||
| 236 | .run_matmul = kai_run_matmul_clamp_f32_f32p2vlx1_f32p2vlx1biasf32_sme2_mopa, | ||
| 237 | }; | ||
| 238 | |||
| 239 | inline constexpr MatMulBaseInterface kai_matmul_clamp_f32_f32p2vlx1_f32p2vlx1b_2vlx2vl_sme_mopa_interface{ | ||
| 240 | .run_matmul = kai_run_matmul_clamp_f32_f32p2vlx1_f32p2vlx1b_2vlx2vl_sme_mopa, | ||
| 241 | }; | ||
| 242 | |||
| 243 | // matmul_clamp_f32_qai8dxp_qsi4c32p | ||
| 244 | inline constexpr MatMulBlockwiseDynamicQuantInterface | ||
| 245 | kai_matmul_clamp_f32_qai8dxp1x4_qsi4c32p4x4_1x4_neon_dotprod_interface{ | ||
| 246 | .run_matmul = kai_run_matmul_clamp_f32_qai8dxp1x4_qsi4c32p4x4_1x4_neon_dotprod, | ||
| 247 | }; | ||
| 248 | |||
| 249 | inline constexpr MatMulBlockwiseDynamicQuantInterface | ||
| 250 | kai_matmul_clamp_f32_qai8dxp1x4_qsi4c32p8x4_1x8_neon_dotprod_interface{ | ||
| 251 | .run_matmul = kai_run_matmul_clamp_f32_qai8dxp1x4_qsi4c32p8x4_1x8_neon_dotprod, | ||
| 252 | }; | ||
| 253 | |||
| 254 | inline constexpr MatMulBlockwiseDynamicQuantInterface | ||
| 255 | kai_matmul_clamp_f32_qai8dxp1x8_qsi4c32p4x8_1x4x32_neon_dotprod_interface{ | ||
| 256 | .run_matmul = kai_run_matmul_clamp_f32_qai8dxp1x8_qsi4c32p4x8_1x4x32_neon_dotprod, | ||
| 257 | }; | ||
| 258 | |||
| 259 | inline constexpr MatMulBlockwiseDynamicQuantInterface | ||
| 260 | kai_matmul_clamp_f32_qai8dxp1x8_qsi4c32p8x8_1x8_neon_dotprod_interface{ | ||
| 261 | .run_matmul = kai_run_matmul_clamp_f32_qai8dxp1x8_qsi4c32p8x8_1x8_neon_dotprod, | ||
| 262 | }; | ||
| 263 | inline constexpr MatMulBlockwiseDynamicQuantInterface | ||
| 264 | kai_matmul_clamp_f32_qai8dxp1x8_qsi4c32p8x8_1x8x32_neon_dotprod_interface{ | ||
| 265 | .run_matmul = kai_run_matmul_clamp_f32_qai8dxp1x8_qsi4c32p8x8_1x8x32_neon_dotprod, | ||
| 266 | }; | ||
| 267 | |||
| 268 | inline constexpr MatMulBlockwiseDynamicQuantInterface | ||
| 269 | kai_matmul_clamp_f32_qai8dxp4x4_qsi4c32p4x4_16x4_neon_dotprod_interface{ | ||
| 270 | .run_matmul = kai_run_matmul_clamp_f32_qai8dxp4x4_qsi4c32p4x4_16x4_neon_dotprod, | ||
| 271 | }; | ||
| 272 | |||
| 273 | inline constexpr MatMulBlockwiseDynamicQuantInterface | ||
| 274 | kai_matmul_clamp_f32_qai8dxp4x4_qsi4c32p8x4_4x8_neon_dotprod_interface{ | ||
| 275 | .run_matmul = kai_run_matmul_clamp_f32_qai8dxp4x4_qsi4c32p8x4_4x8_neon_dotprod, | ||
| 276 | }; | ||
| 277 | |||
| 278 | inline constexpr MatMulBlockwiseDynamicQuantInterface | ||
| 279 | kai_matmul_clamp_f32_qai8dxp4x8_qsi4c32p4x8_8x4x32_neon_i8mm_interface{ | ||
| 280 | .run_matmul = kai_run_matmul_clamp_f32_qai8dxp4x8_qsi4c32p4x8_8x4x32_neon_i8mm, | ||
| 281 | }; | ||
| 282 | |||
| 283 | inline constexpr MatMulBlockwiseDynamicQuantInterface | ||
| 284 | kai_matmul_clamp_f32_qai8dxp4x8_qsi4c32p4x8_16x4x32_neon_i8mm_interface{ | ||
| 285 | .run_matmul = kai_run_matmul_clamp_f32_qai8dxp4x8_qsi4c32p4x8_16x4x32_neon_i8mm, | ||
| 286 | }; | ||
| 287 | |||
| 288 | inline constexpr MatMulBlockwiseDynamicQuantInterface | ||
| 289 | kai_matmul_clamp_f32_qai8dxp4x8_qsi4c32p8x8_4x8_neon_i8mm_interface{ | ||
| 290 | .run_matmul = kai_run_matmul_clamp_f32_qai8dxp4x8_qsi4c32p8x8_4x8_neon_i8mm, | ||
| 291 | }; | ||
| 292 | |||
| 293 | inline constexpr MatMulBlockwiseDynamicQuantInterface | ||
| 294 | kai_matmul_clamp_f32_qai8dxp4x8_qsi4c32p8x8_4x8x32_neon_i8mm_interface{ | ||
| 295 | .run_matmul = kai_run_matmul_clamp_f32_qai8dxp4x8_qsi4c32p8x8_4x8x32_neon_i8mm, | ||
| 296 | }; | ||
| 297 | |||
| 298 | inline constexpr MatMulBlockwiseDynamicQuantInterface | ||
| 299 | kai_matmul_clamp_f32_qai8dxp1x4_qsi4c32p4vlx4_1x4vl_sme2_dot_interface{ | ||
| 300 | .run_matmul = kai_run_matmul_clamp_f32_qai8dxp1x4_qsi4c32p4vlx4_1x4vl_sme2_dot, | ||
| 301 | }; | ||
| 302 | |||
| 303 | inline constexpr MatMulBlockwiseDynamicQuantInterface | ||
| 304 | kai_matmul_clamp_f32_qai8dxp1vlx4_qsi4c32p4vlx4_1vlx4vl_sme2_mopa_interface{ | ||
| 305 | .run_matmul = kai_run_matmul_clamp_f32_qai8dxp1vlx4_qsi4c32p4vlx4_1vlx4vl_sme2_mopa, | ||
| 306 | }; | ||
| 307 | |||
| 308 | // matmul_clamp_f32_qai8dxp_qsi4cxp | ||
| 309 | inline constexpr MatMulFloatInterface kai_matmul_clamp_f32_qai8dxp1vlx8_qsi4cxp4vlx8_1vlx4vl_sme2_mopa_interface{ | ||
| 310 | .run_matmul = kai_run_matmul_clamp_f32_qai8dxp1vlx8_qsi4cxp4vlx8_1vlx4vl_sme2_mopa, | ||
| 311 | }; | ||
| 312 | |||
| 313 | inline constexpr MatMulFloatInterface kai_matmul_clamp_f32_qai8dxp1x4_qsi4cxp4vlx4_1x4vl_sme2_sdot_interface{ | ||
| 314 | .run_matmul = kai_run_matmul_clamp_f32_qai8dxp1x4_qsi4cxp4vlx4_1x4vl_sme2_sdot, | ||
| 315 | }; | ||
| 316 | |||
| 317 | inline constexpr MatMulFloatInterface kai_matmul_clamp_f32_qai8dxp1x4_qsi4cxp4x4_1x4_neon_dotprod_interface{ | ||
| 318 | .run_matmul = kai_run_matmul_clamp_f32_qai8dxp1x4_qsi4cxp4x4_1x4_neon_dotprod, | ||
| 319 | }; | ||
| 320 | |||
| 321 | inline constexpr MatMulFloatInterface kai_matmul_clamp_f32_qai8dxp1x8_qsi4cxp4x8_1x4x32_neon_dotprod_interface{ | ||
| 322 | .run_matmul = kai_run_matmul_clamp_f32_qai8dxp1x8_qsi4cxp4x8_1x4x32_neon_dotprod, | ||
| 323 | }; | ||
| 324 | |||
| 325 | inline constexpr MatMulFloatInterface kai_matmul_clamp_f32_qai8dxp1x8_qsi4cxp8x8_1x8x32_neon_dotprod_interface{ | ||
| 326 | .run_matmul = kai_run_matmul_clamp_f32_qai8dxp1x8_qsi4cxp8x8_1x8x32_neon_dotprod, | ||
| 327 | }; | ||
| 328 | |||
| 329 | inline constexpr MatMulFloatInterface kai_matmul_clamp_f32_qai8dxp4x4_qsi4cxp8x4_8x8x32_neon_dotprod_interface{ | ||
| 330 | .run_matmul = kai_run_matmul_clamp_f32_qai8dxp4x4_qsi4cxp8x4_8x8x32_neon_dotprod, | ||
| 331 | }; | ||
| 332 | |||
| 333 | inline constexpr MatMulFloatInterface kai_matmul_clamp_f32_qai8dxp4x8_qsi4cxp4x4_16x4x32_neon_dotprod_interface{ | ||
| 334 | .run_matmul = kai_run_matmul_clamp_f32_qai8dxp4x8_qsi4cxp4x4_16x4x32_neon_dotprod, | ||
| 335 | }; | ||
| 336 | |||
| 337 | inline constexpr MatMulFloatInterface kai_matmul_clamp_f32_qai8dxp4x8_qsi4cxp4x8_4x4x32_neon_i8mm_interface{ | ||
| 338 | .run_matmul = kai_run_matmul_clamp_f32_qai8dxp4x8_qsi4cxp4x8_4x4x32_neon_i8mm, | ||
| 339 | }; | ||
| 340 | |||
| 341 | inline constexpr MatMulFloatInterface kai_matmul_clamp_f32_qai8dxp4x8_qsi4cxp4x8_8x4x32_neon_i8mm_interface{ | ||
| 342 | .run_matmul = kai_run_matmul_clamp_f32_qai8dxp4x8_qsi4cxp4x8_8x4x32_neon_i8mm, | ||
| 343 | }; | ||
| 344 | |||
| 345 | inline constexpr MatMulFloatInterface kai_matmul_clamp_f32_qai8dxp4x8_qsi4cxp8x8_4x8x32_neon_i8mm_interface{ | ||
| 346 | .run_matmul = kai_run_matmul_clamp_f32_qai8dxp4x8_qsi4cxp8x8_4x8x32_neon_i8mm, | ||
| 347 | }; | ||
| 348 | |||
| 349 | inline constexpr MatMulFloatInterface kai_matmul_clamp_f32_qai8dxp4x8_qsi4cxp8x8_8x8x32_neon_i8mm_interface{ | ||
| 350 | .run_matmul = kai_run_matmul_clamp_f32_qai8dxp4x8_qsi4cxp8x8_8x8x32_neon_i8mm, | ||
| 351 | }; | ||
| 352 | |||
| 353 | // matmul_clamp_f32_qai8dxp_qsi8cxp | ||
| 354 | inline constexpr MatMulFloatInterface kai_matmul_clamp_f32_qai8dxp1vlx4_qsi8cxp4vlx4_1vlx4vl_sme_mopa_interface{ | ||
| 355 | .run_matmul = kai_run_matmul_clamp_f32_qai8dxp1vlx4_qsi8cxp4vlx4_1vlx4vl_sme_mopa, | ||
| 356 | }; | ||
| 357 | |||
| 358 | inline constexpr MatMulFloatInterface kai_matmul_clamp_f32_qai8dxp1x4_qsi8cxp4vlx4_1x4vl_sme_dot_interface{ | ||
| 359 | .run_matmul = kai_run_matmul_clamp_f32_qai8dxp1x4_qsi8cxp4vlx4_1x4vl_sme_dot, | ||
| 360 | }; | ||
| 361 | |||
| 362 | inline constexpr MatMulFloatInterface kai_matmul_clamp_f32_qai8dxp1x4_qsi8cxp4x4_1x4_neon_dotprod_interface{ | ||
| 363 | .run_matmul = kai_run_matmul_clamp_f32_qai8dxp1x4_qsi8cxp4x4_1x4_neon_dotprod, | ||
| 364 | }; | ||
| 365 | |||
| 366 | inline constexpr MatMulFloatInterface kai_matmul_clamp_f32_qai8dxp1x8_qsi8cxp4x8_1x4_neon_dotprod_interface{ | ||
| 367 | .run_matmul = kai_run_matmul_clamp_f32_qai8dxp1x8_qsi8cxp4x8_1x4_neon_dotprod, | ||
| 368 | }; | ||
| 369 | |||
| 370 | inline constexpr MatMulFloatInterface kai_matmul_clamp_f32_qai8dxp4x4_qsi8cxp4x4_16x4_neon_dotprod_interface{ | ||
| 371 | .run_matmul = kai_run_matmul_clamp_f32_qai8dxp4x4_qsi8cxp4x4_16x4_neon_dotprod, | ||
| 372 | }; | ||
| 373 | |||
| 374 | inline constexpr MatMulFloatInterface kai_matmul_clamp_f32_qai8dxp4x8_qsi8cxp4x8_16x4_neon_i8mm_interface{ | ||
| 375 | .run_matmul = kai_run_matmul_clamp_f32_qai8dxp4x8_qsi8cxp4x8_16x4_neon_i8mm, | ||
| 376 | }; | ||
| 377 | |||
| 378 | inline constexpr MatMulFloatInterface kai_matmul_clamp_f32_qai8dxp1vlx4_qsi8cxp4vlx4_1vlx4vl_sme2_mopa_interface{ | ||
| 379 | .run_matmul = kai_run_matmul_clamp_f32_qai8dxp1vlx4_qsi8cxp4vlx4_1vlx4vl_sme2_mopa, | ||
| 380 | }; | ||
| 381 | |||
| 382 | inline constexpr MatMulFloatInterface kai_matmul_clamp_f32_qai8dxp1x4_qsi8cxp4vlx4_1x4vl_sme2_dot_interface{ | ||
| 383 | .run_matmul = kai_run_matmul_clamp_f32_qai8dxp1x4_qsi8cxp4vlx4_1x4vl_sme2_dot, | ||
| 384 | }; | ||
| 385 | |||
| 386 | // matmul_clamp_f32_qsi8d32p_qsi4c32p | ||
| 387 | inline constexpr MatMulBlockwiseDynamicQuantInterface | ||
| 388 | kai_matmul_clamp_f32_qsi8d32p1vlx4_qsi4c32p4vlx4_1vlx4vl_sme2_mopa_interface{ | ||
| 389 | .run_matmul = kai_run_matmul_clamp_f32_qsi8d32p1vlx4_qsi4c32p4vlx4_1vlx4vl_sme2_mopa, | ||
| 390 | }; | ||
| 391 | |||
| 392 | inline constexpr MatMulBlockwiseDynamicQuantInterface | ||
| 393 | kai_matmul_clamp_f32_qsi8d32p1x4_qsi4c32p4vlx4_1x4vl_sme2_sdot_interface{ | ||
| 394 | .run_matmul = kai_run_matmul_clamp_f32_qsi8d32p1x4_qsi4c32p4vlx4_1x4vl_sme2_sdot, | ||
| 395 | }; | ||
| 396 | |||
| 397 | inline constexpr MatMulBlockwiseDynamicQuantInterface | ||
| 398 | kai_matmul_clamp_f32_qsi8d32p1x4_qsi4c32p4x4_1x4_neon_dotprod_interface{ | ||
| 399 | .run_matmul = kai_run_matmul_clamp_f32_qsi8d32p1x4_qsi4c32p4x4_1x4_neon_dotprod, | ||
| 400 | }; | ||
| 401 | |||
| 402 | inline constexpr MatMulBlockwiseDynamicQuantInterface | ||
| 403 | kai_matmul_clamp_f32_qsi8d32p1x8_qsi4c32p4x8_1x4x32_neon_dotprod_interface{ | ||
| 404 | .run_matmul = kai_run_matmul_clamp_f32_qsi8d32p1x8_qsi4c32p4x8_1x4x32_neon_dotprod, | ||
| 405 | }; | ||
| 406 | |||
| 407 | inline constexpr MatMulBlockwiseDynamicQuantInterface | ||
| 408 | kai_matmul_clamp_f32_qsi8d32p4x4_qsi4c32p4x4_16x4_neon_dotprod_interface{ | ||
| 409 | .run_matmul = kai_run_matmul_clamp_f32_qsi8d32p4x4_qsi4c32p4x4_16x4_neon_dotprod, | ||
| 410 | }; | ||
| 411 | |||
| 412 | inline constexpr MatMulBlockwiseDynamicQuantInterface | ||
| 413 | kai_matmul_clamp_f32_qsi8d32p4x8_qsi4c32p4x8_8x4x32_neon_i8mm_interface{ | ||
| 414 | .run_matmul = kai_run_matmul_clamp_f32_qsi8d32p4x8_qsi4c32p4x8_8x4x32_neon_i8mm, | ||
| 415 | }; | ||
| 416 | |||
| 417 | inline constexpr MatMulBlockwiseDynamicQuantInterface | ||
| 418 | kai_matmul_clamp_f32_qsi8d32p4x8_qsi4c32p4x8_16x4_neon_i8mm_interface{ | ||
| 419 | .run_matmul = kai_run_matmul_clamp_f32_qsi8d32p4x8_qsi4c32p4x8_16x4_neon_i8mm, | ||
| 420 | }; | ||
| 421 | |||
| 422 | inline constexpr MatMulBlockwiseDynamicQuantInterface | ||
| 423 | kai_matmul_clamp_f32_qsi8d32p1x4_qsi4c32p8x4_1x8_sve_dotprod_interface{ | ||
| 424 | .run_matmul = kai_run_matmul_clamp_f32_qsi8d32p1x4_qsi4c32p8x4_1x8_sve_dotprod, | ||
| 425 | }; | ||
| 426 | inline constexpr MatMulBlockwiseDynamicQuantInterface | ||
| 427 | kai_matmul_clamp_f32_qsi8d32p1x8_qsi4c32p8x8_1x8_sve_dotprod_interface{ | ||
| 428 | .run_matmul = kai_run_matmul_clamp_f32_qsi8d32p1x8_qsi4c32p8x8_1x8_sve_dotprod, | ||
| 429 | }; | ||
| 430 | inline constexpr MatMulBlockwiseDynamicQuantInterface | ||
| 431 | kai_matmul_clamp_f32_qsi8d32p4x8_qsi4c32p8x8_16x8_sve_i8mm_interface{ | ||
| 432 | .run_matmul = kai_run_matmul_clamp_f32_qsi8d32p4x8_qsi4c32p8x8_16x8_sve_i8mm, | ||
| 433 | }; | ||
| 434 | |||
| 435 | // matmul_clamp_fp32_bf16p_bf16p | ||
| 436 | inline constexpr MatMulBaseInterface kai_matmul_clamp_f32_bf16p2vlx2_bf16p2vlx2_2vlx2vl_sme2_mopa_interface{ | ||
| 437 | .run_matmul = kai_run_matmul_clamp_f32_bf16p2vlx2_bf16p2vlx2_2vlx2vl_sme2_mopa, | ||
| 438 | }; | ||
| 439 | |||
| 440 | // matmul_clamp_qai8_qai8_qsi8cxp | ||
| 441 | inline constexpr MatMulStaticQuantInterface kai_matmul_clamp_qai8_qai8_qsi8cxp2vlx4sb_1x16vl_sme2_dot_interface{ | ||
| 442 | .run_matmul = kai_run_matmul_clamp_qai8_qai8_qsi8cxp2vlx4sb_1x16vl_sme2_dot, | ||
| 443 | }; | ||
| 444 | |||
| 445 | // matmul_clamp_qai8_qai8p_qsi8cxp | ||
| 446 | inline constexpr MatMulStaticQuantInterface kai_matmul_clamp_qai8_qai8p2vlx4_qsi8cxpsb2vlx4_2vlx2vl_sme2_mopa_interface{ | ||
| 447 | .run_matmul = kai_run_matmul_clamp_qai8_qai8p2vlx4_qsi8cxpsb2vlx4_2vlx2vl_sme2_mopa, | ||
| 448 | }; | ||
| 449 | |||
| 450 | inline constexpr MatMulStaticQuantInterface kai_matmul_clamp_qai8_qai8p2vlx4_qsi8cxp2vlx4sb_2vlx2vl_sme_mopa_interface{ | ||
| 451 | .run_matmul = kai_run_matmul_clamp_qai8_qai8p2vlx4_qsi8cxp2vlx4sb_2vlx2vl_sme_mopa, | ||
| 452 | }; | ||
| 453 | |||
| 454 | // matmul_clamp_bf16_qai8dxp_qsi4c32p | ||
| 455 | inline constexpr MatMulBlockwiseDynamicQuantGenericDstInterface | ||
| 456 | kai_matmul_clamp_bf16_qai8dxp1x8_qsi4c32p4x8_1x4_neon_dotprod_interface{ | ||
| 457 | .run_matmul = kai_run_matmul_clamp_bf16_qai8dxp1x8_qsi4c32p4x8_1x4_neon_dotprod, | ||
| 458 | }; | ||
| 459 | |||
| 460 | inline constexpr MatMulBlockwiseDynamicQuantGenericDstInterface | ||
| 461 | kai_matmul_clamp_bf16_qai8dxp4x8_qsi4c32p4x8_16x4_neon_i8mm_interface{ | ||
| 462 | .run_matmul = kai_run_matmul_clamp_bf16_qai8dxp4x8_qsi4c32p4x8_16x4_neon_i8mm, | ||
| 463 | }; | ||
| 464 | |||
| 465 | // matmul_clamp_bf16_qai8dxp_qsi4cxp | ||
| 466 | inline constexpr MatMulBaseInterface kai_matmul_clamp_bf16_qai8dxp1x8_qsi4cxp8x8_1x8_neon_dotprod_interface{ | ||
| 467 | .run_matmul = kai_run_matmul_clamp_bf16_qai8dxp1x8_qsi4cxp8x8_1x8_neon_dotprod, | ||
| 468 | }; | ||
| 469 | |||
| 470 | inline constexpr MatMulBaseInterface kai_matmul_clamp_bf16_qai8dxp4x8_qsi4cxp8x8_8x8_neon_i8mm_interface{ | ||
| 471 | .run_matmul = kai_run_matmul_clamp_bf16_qai8dxp4x8_qsi4cxp8x8_8x8_neon_i8mm, | ||
| 472 | }; | ||
| 473 | |||
| 474 | // matmul_clamp_f16_qai8dxp_qsi4cxp | ||
| 475 | inline constexpr MatMulBaseInterface kai_matmul_clamp_f16_qai8dxp1x4_qsi4cxp4x4_1x4_neon_dotprod_interface{ | ||
| 476 | .run_matmul = kai_run_matmul_clamp_f16_qai8dxp1x4_qsi4cxp4x4_1x4_neon_dotprod, | ||
| 477 | }; | ||
| 478 | |||
| 479 | inline constexpr MatMulBaseInterface kai_matmul_clamp_f16_qai8dxp1x8_qsi4cxp4x8_1x4_neon_dotprod_interface{ | ||
| 480 | .run_matmul = kai_run_matmul_clamp_f16_qai8dxp1x8_qsi4cxp4x8_1x4_neon_dotprod, | ||
| 481 | }; | ||
| 482 | |||
| 483 | inline constexpr MatMulBaseInterface kai_matmul_clamp_f16_qai8dxp4x4_qsi4cxp4x4_16x4_neon_dotprod_interface{ | ||
| 484 | .run_matmul = kai_run_matmul_clamp_f16_qai8dxp4x4_qsi4cxp4x4_16x4_neon_dotprod, | ||
| 485 | }; | ||
| 486 | |||
| 487 | inline constexpr MatMulBaseInterface kai_matmul_clamp_f16_qai8dxp4x8_qsi4cxp4x8_16x4_neon_i8mm_interface{ | ||
| 488 | .run_matmul = kai_run_matmul_clamp_f16_qai8dxp4x8_qsi4cxp4x8_16x4_neon_i8mm, | ||
| 489 | }; | ||
| 490 | |||
| 491 | // matmul_clamp_f16_qai8dxp_qsi8cxp | ||
| 492 | inline constexpr MatMulBaseInterface kai_matmul_clamp_f16_qai8dxp1x4_qsi8cxp4x4_1x4_neon_dotprod_interface{ | ||
| 493 | .run_matmul = kai_run_matmul_clamp_f16_qai8dxp1x4_qsi8cxp4x4_1x4_neon_dotprod, | ||
| 494 | }; | ||
| 495 | |||
| 496 | inline constexpr MatMulBaseInterface kai_matmul_clamp_f16_qai8dxp1x8_qsi8cxp4x8_1x4_neon_dotprod_interface{ | ||
| 497 | .run_matmul = kai_run_matmul_clamp_f16_qai8dxp1x8_qsi8cxp4x8_1x4_neon_dotprod, | ||
| 498 | }; | ||
| 499 | |||
| 500 | inline constexpr MatMulBaseInterface kai_matmul_clamp_f16_qai8dxp4x4_qsi8cxp4x4_16x4_neon_dotprod_interface{ | ||
| 501 | .run_matmul = kai_run_matmul_clamp_f16_qai8dxp4x4_qsi8cxp4x4_16x4_neon_dotprod, | ||
| 502 | }; | ||
| 503 | |||
| 504 | inline constexpr MatMulBaseInterface kai_matmul_clamp_f16_qai8dxp4x8_qsi8cxp4x8_16x4_neon_i8mm_interface{ | ||
| 505 | .run_matmul = kai_run_matmul_clamp_f16_qai8dxp4x8_qsi8cxp4x8_16x4_neon_i8mm, | ||
| 506 | }; | ||
| 507 | |||
| 508 | // matmul_clamp_f16_qsi8d32p_qai4c32p | ||
| 509 | inline constexpr MatMulBlockwiseDynamicQuantGenericDstInterface | ||
| 510 | kai_matmul_clamp_f16_qsi8d32p1vlx4_qai4c32p4vlx4_1vlx4vl_sme2_mopa_interface{ | ||
| 511 | .run_matmul = kai_run_matmul_clamp_f16_qsi8d32p1vlx4_qai4c32p4vlx4_1vlx4vl_sme2_mopa, | ||
| 512 | }; | ||
| 513 | |||
| 514 | inline constexpr MatMulBlockwiseDynamicQuantGenericDstInterface | ||
| 515 | kai_matmul_clamp_f16_qsi8d32p1x4_qai4c32p4vlx4_1x4vl_sme2_dot_interface{ | ||
| 516 | .run_matmul = kai_run_matmul_clamp_f16_qsi8d32p1x4_qai4c32p4vlx4_1x4vl_sme2_dot, | ||
| 517 | }; | ||
| 518 | |||
| 519 | inline constexpr MatMulBlockwiseDynamicQuantGenericDstInterface | ||
| 520 | kai_matmul_clamp_f16_qsi8d32p1x4_qai4c32p4x4_1x4_neon_dotprod_interface{ | ||
| 521 | .run_matmul = kai_run_matmul_clamp_f16_qsi8d32p1x4_qai4c32p4x4_1x4_neon_dotprod, | ||
| 522 | }; | ||
| 523 | |||
| 524 | inline constexpr MatMulBlockwiseDynamicQuantGenericDstInterface | ||
| 525 | kai_matmul_clamp_f16_qsi8d32p1x8_qai4c32p4x8_1x4_neon_dotprod_interface{ | ||
| 526 | .run_matmul = kai_run_matmul_clamp_f16_qsi8d32p1x8_qai4c32p4x8_1x4_neon_dotprod, | ||
| 527 | }; | ||
| 528 | |||
| 529 | inline constexpr MatMulBlockwiseDynamicQuantGenericDstInterface | ||
| 530 | kai_matmul_clamp_f16_qsi8d32p4x4_qai4c32p4x4_8x4_neon_dotprod_interface{ | ||
| 531 | .run_matmul = kai_run_matmul_clamp_f16_qsi8d32p4x4_qai4c32p4x4_8x4_neon_dotprod, | ||
| 532 | }; | ||
| 533 | |||
| 534 | inline constexpr MatMulBlockwiseDynamicQuantGenericDstInterface | ||
| 535 | kai_matmul_clamp_f16_qsi8d32p4x8_qai4c32p4x8_8x4_neon_i8mm_interface{ | ||
| 536 | .run_matmul = kai_run_matmul_clamp_f16_qsi8d32p4x8_qai4c32p4x8_8x4_neon_i8mm, | ||
| 537 | }; | ||
| 538 | |||
| 539 | // matmul_clamp_f32_qsi8d32p_qai4c32p | ||
| 540 | inline constexpr MatMulBlockwiseDynamicQuantInterface | ||
| 541 | kai_matmul_clamp_f32_qsi8d32p1vlx4_qai4c32p4vlx4_1vlx4vl_sme2_mopa_interface{ | ||
| 542 | .run_matmul = kai_run_matmul_clamp_f32_qsi8d32p1vlx4_qai4c32p4vlx4_1vlx4vl_sme2_mopa, | ||
| 543 | }; | ||
| 544 | |||
| 545 | inline constexpr MatMulBlockwiseDynamicQuantInterface | ||
| 546 | kai_matmul_clamp_f32_qsi8d32p1x4_qai4c32p4vlx4_1x4vl_sme2_dot_interface{ | ||
| 547 | .run_matmul = kai_run_matmul_clamp_f32_qsi8d32p1x4_qai4c32p4vlx4_1x4vl_sme2_dot, | ||
| 548 | }; | ||
| 549 | |||
| 550 | inline constexpr MatMulBlockwiseDynamicQuantInterface | ||
| 551 | kai_matmul_clamp_f32_qsi8d32p1x4_qai4c32p4x4_1x4_neon_dotprod_interface{ | ||
| 552 | .run_matmul = kai_run_matmul_clamp_f32_qsi8d32p1x4_qai4c32p4x4_1x4_neon_dotprod, | ||
| 553 | }; | ||
| 554 | |||
| 555 | inline constexpr MatMulBlockwiseDynamicQuantInterface | ||
| 556 | kai_matmul_clamp_f32_qsi8d32p1x8_qai4c32p4x8_1x4_neon_dotprod_interface{ | ||
| 557 | .run_matmul = kai_run_matmul_clamp_f32_qsi8d32p1x8_qai4c32p4x8_1x4_neon_dotprod, | ||
| 558 | }; | ||
| 559 | |||
| 560 | inline constexpr MatMulBlockwiseDynamicQuantInterface | ||
| 561 | kai_matmul_clamp_f32_qsi8d32p4x4_qai4c32p4x4_8x4_neon_dotprod_interface{ | ||
| 562 | .run_matmul = kai_run_matmul_clamp_f32_qsi8d32p4x4_qai4c32p4x4_8x4_neon_dotprod, | ||
| 563 | }; | ||
| 564 | |||
| 565 | inline constexpr MatMulBlockwiseDynamicQuantInterface | ||
| 566 | kai_matmul_clamp_f32_qsi8d32p4x8_qai4c32p4x8_8x4_neon_i8mm_interface{ | ||
| 567 | .run_matmul = kai_run_matmul_clamp_f32_qsi8d32p4x8_qai4c32p4x8_8x4_neon_i8mm, | ||
| 568 | }; | ||
| 569 | |||
| 570 | 36 | inline const std::array matmul_benchmarks{ | |
| 571 | // matmul_clamp_f16_bf16p_bf16p | ||
| 572 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
3204 | RegisterBenchmark( |
| 573 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | "kai_matmul_clamp_f16_bf16p8x4_bf16p12x4b_8x12_neon_mmla", kai_benchmark_matmul<MatMulBaseInterface>, |
| 574 | 36 | kai_matmul_clamp_f16_bf16p8x4_bf16p12x4b_8x12_neon_mmla_interface, DataType::FP16, MatMulOp::GEMM, | |
| 575 | test::cpu_has_bf16), | ||
| 576 | |||
| 577 | // matmul_clamp_f16_f16_f16p | ||
| 578 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | RegisterBenchmark( |
| 579 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | "kai_matmul_clamp_f16_f16_f16p2vlx2b_1x8vl_sme_mla", kai_benchmark_matmul<MatMulStridedLhsInterface>, |
| 580 | 36 | kai_matmul_clamp_f16_f16_f16p2vlx2b_1x8vl_sme_mla_interface, DataType::FP16, MatMulOp::GEMV, test::cpu_has_sme), | |
| 581 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | RegisterBenchmark( |
| 582 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | "kai_matmul_clamp_f16_f16_f16p2vlx2b_1x16vl_sme2_dot", kai_benchmark_matmul<MatMulStridedLhsInterface>, |
| 583 | 36 | kai_matmul_clamp_f16_f16_f16p2vlx2b_1x16vl_sme2_dot_interface, DataType::FP16, MatMulOp::GEMV, | |
| 584 | test::cpu_has_sme2), | ||
| 585 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | RegisterBenchmark( |
| 586 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | "kai_matmul_clamp_f16_f16_f16p16x1biasf16_6x16x8_neon_mla", kai_benchmark_matmul<MatMulStridedLhsInterface>, |
| 587 | 36 | kai_matmul_clamp_f16_f16_f16p16x1biasf16_6x16x8_neon_mla_interface, DataType::FP16, MatMulOp::GEMM, | |
| 588 | test::cpu_has_fp16), | ||
| 589 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | RegisterBenchmark( |
| 590 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | "kai_matmul_clamp_f16_f16_f16p32x1b_6x32_neon_mla", kai_benchmark_matmul<MatMulStridedLhsInterface>, |
| 591 | 36 | kai_matmul_clamp_f16_f16_f16p32x1b_6x32_neon_mla_interface, DataType::FP16, MatMulOp::GEMM, test::cpu_has_fp16), | |
| 592 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | RegisterBenchmark( |
| 593 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | "kai_matmul_clamp_f16_f16_f16p32x1b_6x32_neon_mla_cortexa55", kai_benchmark_matmul<MatMulStridedLhsInterface>, |
| 594 | 36 | kai_matmul_clamp_f16_f16_f16p32x1b_6x32_neon_mla_cortexa55_interface, DataType::FP16, MatMulOp::GEMM, | |
| 595 | test::cpu_has_fp16), | ||
| 596 | |||
| 597 | // matmul_clamp_f16_f16p_f16p | ||
| 598 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | RegisterBenchmark( |
| 599 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | "kai_matmul_clamp_f16_f16p2vlx2_f16p2vlx2_2vlx2vl_sme2_mopa", kai_benchmark_matmul<MatMulBaseInterface>, |
| 600 | 36 | kai_matmul_clamp_f16_f16p2vlx2_f16p2vlx2_2vlx2vl_sme2_mopa_interface, DataType::FP16, MatMulOp::GEMM, | |
| 601 | test::cpu_has_sme2), | ||
| 602 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | RegisterBenchmark( |
| 603 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | "kai_matmul_clamp_f16_f16p2vlx2_f16p2vlx2b_2vlx2vl_sme_mopa", kai_benchmark_matmul<MatMulBaseInterface>, |
| 604 | 36 | kai_matmul_clamp_f16_f16p2vlx2_f16p2vlx2b_2vlx2vl_sme_mopa_interface, DataType::FP16, MatMulOp::GEMM, | |
| 605 | test::cpu_has_sme), | ||
| 606 | |||
| 607 | // matmul_clamp_f32_bf16p_bf16p | ||
| 608 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | RegisterBenchmark( |
| 609 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | "kai_matmul_clamp_f32_bf16p1x4_bf16p12x4b_1x36_neon_dot", kai_benchmark_matmul<MatMulBaseInterface>, |
| 610 | 36 | kai_matmul_clamp_f32_bf16p1x4_bf16p12x4b_1x36_neon_dot_interface, DataType::FP32, MatMulOp::GEMV, | |
| 611 | test::cpu_has_dotprod), | ||
| 612 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | RegisterBenchmark( |
| 613 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | "kai_matmul_clamp_f32_bf16p8x4_bf16p12x4b_8x12_neon_mmla", kai_benchmark_matmul<MatMulBaseInterface>, |
| 614 | 36 | kai_matmul_clamp_f32_bf16p8x4_bf16p12x4b_8x12_neon_mmla_interface, DataType::FP32, MatMulOp::GEMM, | |
| 615 | test::cpu_has_i8mm), | ||
| 616 | |||
| 617 | // matmul_clamp_f32_f32_f32p | ||
| 618 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | RegisterBenchmark( |
| 619 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | "kai_matmul_clamp_f32_f32_f32p2vlx1b_1x16vl_sme2_mla", kai_benchmark_matmul<MatMulStridedLhsInterface>, |
| 620 | 36 | kai_matmul_clamp_f32_f32_f32p2vlx1b_1x16vl_sme2_mla_interface, DataType::FP32, MatMulOp::GEMV, | |
| 621 | test::cpu_has_sme2), | ||
| 622 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | RegisterBenchmark( |
| 623 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | "kai_matmul_clamp_f32_f32_f32p2vlx1b_1x8vl_sme_mla", kai_benchmark_matmul<MatMulStridedLhsInterface>, |
| 624 | 36 | kai_matmul_clamp_f32_f32_f32p2vlx1b_1x8vl_sme_mla_interface, DataType::FP32, MatMulOp::GEMV, test::cpu_has_sme), | |
| 625 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | RegisterBenchmark( |
| 626 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | "kai_matmul_clamp_f32_f32_f32p8x1biasf32_6x8x4_neon_mla", kai_benchmark_matmul<MatMulStridedLhsInterface>, |
| 627 | 36 | kai_matmul_clamp_f32_f32_f32p8x1biasf32_6x8x4_neon_mla_interface, DataType::FP32, MatMulOp::GEMM, | |
| 628 | test::cpu_has_advsimd), | ||
| 629 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | RegisterBenchmark( |
| 630 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | "kai_matmul_clamp_f32_f32_f32p16x1b_6x16_neon_mla", kai_benchmark_matmul<MatMulStridedLhsInterface>, |
| 631 | 36 | kai_matmul_clamp_f32_f32_f32p16x1b_6x16_neon_mla_interface, DataType::FP32, MatMulOp::GEMM, | |
| 632 | test::cpu_has_advsimd), | ||
| 633 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | RegisterBenchmark( |
| 634 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | "kai_matmul_clamp_f32_f32_f32p16x1b_6x16_neon_mla_cortexa55", kai_benchmark_matmul<MatMulStridedLhsInterface>, |
| 635 | 36 | kai_matmul_clamp_f32_f32_f32p16x1b_6x16_neon_mla_cortexa55_interface, DataType::FP32, MatMulOp::GEMM, | |
| 636 | test::cpu_has_advsimd), | ||
| 637 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | RegisterBenchmark( |
| 638 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | "kai_matmul_clamp_f32_f32_f32p16vlx1b_1x16vl_sme2_mla", kai_benchmark_matmul<MatMulStridedLhsInterface>, |
| 639 | 36 | kai_matmul_clamp_f32_f32_f32p16vlx1b_1x16vl_sme2_mla_interface, DataType::FP32, MatMulOp::GEMV, | |
| 640 | test::cpu_has_sme2), | ||
| 641 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | RegisterBenchmark( |
| 642 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | "kai_matmul_clamp_f32_f32_f32p4vlx1b_6x4vl_sve_mla", kai_benchmark_matmul<MatMulStridedLhsInterface>, |
| 643 | 36 | kai_matmul_clamp_f32_f32_f32p4vlx1b_6x4vl_sve_mla_interface, DataType::FP32, MatMulOp::GEMM, test::cpu_has_sve), | |
| 644 | |||
| 645 | // matmul_clamp_f32_f32p_f32p | ||
| 646 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | RegisterBenchmark( |
| 647 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | "kai_matmul_clamp_f32_f32p2vlx1_f32p2vlx1biasf32_sme2_mopa", kai_benchmark_matmul<MatMulBaseInterface>, |
| 648 | 36 | kai_matmul_clamp_f32_f32p2vlx1_f32p2vlx1biasf32_sme2_mopa_interface, DataType::FP32, MatMulOp::GEMM, | |
| 649 | test::cpu_has_sme2), | ||
| 650 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | RegisterBenchmark( |
| 651 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | "kai_matmul_clamp_f32_f32p2vlx1_f32p2vlx1b_2vlx2vl_sme_mopa", kai_benchmark_matmul<MatMulBaseInterface>, |
| 652 | 36 | kai_matmul_clamp_f32_f32p2vlx1_f32p2vlx1b_2vlx2vl_sme_mopa_interface, DataType::FP32, MatMulOp::GEMM, | |
| 653 | test::cpu_has_sme), | ||
| 654 | |||
| 655 | // matmul_clamp_f32_qai8dxp_qsi4c32p | ||
| 656 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | RegisterBenchmark( |
| 657 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | "kai_matmul_clamp_f32_qai8dxp1x4_qsi4c32p4x4_1x4_neon_dotprod", |
| 658 | kai_benchmark_matmul<MatMulBlockwiseDynamicQuantInterface>, | ||
| 659 | 36 | kai_matmul_clamp_f32_qai8dxp1x4_qsi4c32p4x4_1x4_neon_dotprod_interface, DataType::FP32, MatMulOp::GEMV, | |
| 660 | test::cpu_has_dotprod), | ||
| 661 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | RegisterBenchmark( |
| 662 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | "kai_matmul_clamp_f32_qai8dxp1x4_qsi4c32p8x4_1x8_neon_dotprod", |
| 663 | kai_benchmark_matmul<MatMulBlockwiseDynamicQuantInterface>, | ||
| 664 | 36 | kai_matmul_clamp_f32_qai8dxp1x4_qsi4c32p8x4_1x8_neon_dotprod_interface, DataType::FP32, MatMulOp::GEMV, | |
| 665 | test::cpu_has_dotprod), | ||
| 666 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | RegisterBenchmark( |
| 667 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | "kai_matmul_clamp_f32_qai8dxp1x8_qsi4c32p4x8_1x4x32_neon_dotprod", |
| 668 | kai_benchmark_matmul<MatMulBlockwiseDynamicQuantInterface>, | ||
| 669 | 36 | kai_matmul_clamp_f32_qai8dxp1x8_qsi4c32p4x8_1x4x32_neon_dotprod_interface, DataType::FP32, MatMulOp::GEMV, | |
| 670 | test::cpu_has_dotprod), | ||
| 671 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | RegisterBenchmark( |
| 672 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | "kai_matmul_clamp_f32_qai8dxp1x8_qsi4c32p8x8_1x8x32_neon_dotprod", |
| 673 | kai_benchmark_matmul<MatMulBlockwiseDynamicQuantInterface>, | ||
| 674 | 36 | kai_matmul_clamp_f32_qai8dxp1x8_qsi4c32p8x8_1x8x32_neon_dotprod_interface, DataType::FP32, MatMulOp::GEMV, | |
| 675 | test::cpu_has_dotprod), | ||
| 676 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | RegisterBenchmark( |
| 677 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | "kai_matmul_clamp_f32_qai8dxp1x8_qsi4c32p8x8_1x8_neon_dotprod", |
| 678 | kai_benchmark_matmul<MatMulBlockwiseDynamicQuantInterface>, | ||
| 679 | 36 | kai_matmul_clamp_f32_qai8dxp1x8_qsi4c32p8x8_1x8_neon_dotprod_interface, DataType::FP32, MatMulOp::GEMV, | |
| 680 | test::cpu_has_dotprod), | ||
| 681 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | RegisterBenchmark( |
| 682 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | "kai_matmul_clamp_f32_qai8dxp4x4_qsi4c32p4x4_16x4_neon_dotprod", |
| 683 | kai_benchmark_matmul<MatMulBlockwiseDynamicQuantInterface>, | ||
| 684 | 36 | kai_matmul_clamp_f32_qai8dxp4x4_qsi4c32p4x4_16x4_neon_dotprod_interface, DataType::FP32, MatMulOp::GEMM, | |
| 685 | test::cpu_has_dotprod), | ||
| 686 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | RegisterBenchmark( |
| 687 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | "kai_matmul_clamp_f32_qai8dxp4x4_qsi4c32p8x4_4x8_neon_dotprod", |
| 688 | kai_benchmark_matmul<MatMulBlockwiseDynamicQuantInterface>, | ||
| 689 | 36 | kai_matmul_clamp_f32_qai8dxp4x4_qsi4c32p8x4_4x8_neon_dotprod_interface, DataType::FP32, MatMulOp::GEMM, | |
| 690 | test::cpu_has_dotprod), | ||
| 691 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | RegisterBenchmark( |
| 692 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | "kai_matmul_clamp_f32_qai8dxp4x8_qsi4c32p4x8_8x4x32_neon_i8mm", |
| 693 | kai_benchmark_matmul<MatMulBlockwiseDynamicQuantInterface>, | ||
| 694 | 36 | kai_matmul_clamp_f32_qai8dxp4x8_qsi4c32p4x8_8x4x32_neon_i8mm_interface, DataType::FP32, MatMulOp::GEMM, | |
| 695 | test::cpu_has_i8mm), | ||
| 696 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | RegisterBenchmark( |
| 697 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | "kai_matmul_clamp_f32_qai8dxp4x8_qsi4c32p4x8_16x4x32_neon_i8mm", |
| 698 | kai_benchmark_matmul<MatMulBlockwiseDynamicQuantInterface>, | ||
| 699 | 36 | kai_matmul_clamp_f32_qai8dxp4x8_qsi4c32p4x8_16x4x32_neon_i8mm_interface, DataType::FP32, MatMulOp::GEMM, | |
| 700 | test::cpu_has_i8mm), | ||
| 701 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | RegisterBenchmark( |
| 702 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | "kai_matmul_clamp_f32_qai8dxp4x8_qsi4c32p8x8_4x8_neon_i8mm", |
| 703 | kai_benchmark_matmul<MatMulBlockwiseDynamicQuantInterface>, | ||
| 704 | 36 | kai_matmul_clamp_f32_qai8dxp4x8_qsi4c32p8x8_4x8_neon_i8mm_interface, DataType::FP32, MatMulOp::GEMM, | |
| 705 | test::cpu_has_i8mm), | ||
| 706 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | RegisterBenchmark( |
| 707 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | "kai_matmul_clamp_f32_qai8dxp4x8_qsi4c32p8x8_4x8x32_neon_i8mm", |
| 708 | kai_benchmark_matmul<MatMulBlockwiseDynamicQuantInterface>, | ||
| 709 | 36 | kai_matmul_clamp_f32_qai8dxp4x8_qsi4c32p8x8_4x8x32_neon_i8mm_interface, DataType::FP32, MatMulOp::GEMM, | |
| 710 | test::cpu_has_i8mm), | ||
| 711 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | RegisterBenchmark( |
| 712 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | "kai_matmul_clamp_f32_qai8dxp1x4_qsi4c32p4vlx4_1x4vl_sme2_dot", |
| 713 | kai_benchmark_matmul<MatMulBlockwiseDynamicQuantInterface>, | ||
| 714 | 36 | kai_matmul_clamp_f32_qai8dxp1x4_qsi4c32p4vlx4_1x4vl_sme2_dot_interface, DataType::FP32, MatMulOp::GEMV, | |
| 715 | test::cpu_has_sme2), | ||
| 716 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | RegisterBenchmark( |
| 717 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | "kai_matmul_clamp_f32_qai8dxp1vlx4_qsi4c32p4vlx4_1vlx4vl_sme2_mopa", |
| 718 | kai_benchmark_matmul<MatMulBlockwiseDynamicQuantInterface>, | ||
| 719 | 36 | kai_matmul_clamp_f32_qai8dxp1vlx4_qsi4c32p4vlx4_1vlx4vl_sme2_mopa_interface, DataType::FP32, MatMulOp::GEMM, | |
| 720 | test::cpu_has_sme2), | ||
| 721 | |||
| 722 | // matmul_clamp_f32_qai8dxp_qsi4cxp | ||
| 723 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | RegisterBenchmark( |
| 724 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | "kai_matmul_clamp_f32_qai8dxp1vlx8_qsi4cxp4vlx8_1vlx4vl_sme2_mopa", kai_benchmark_matmul<MatMulFloatInterface>, |
| 725 | 36 | kai_matmul_clamp_f32_qai8dxp1vlx8_qsi4cxp4vlx8_1vlx4vl_sme2_mopa_interface, DataType::FP32, MatMulOp::GEMM, | |
| 726 | test::cpu_has_sme2), | ||
| 727 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | RegisterBenchmark( |
| 728 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | "kai_matmul_clamp_f32_qai8dxp1x4_qsi4cxp4vlx4_1x4vl_sme2_sdot", kai_benchmark_matmul<MatMulFloatInterface>, |
| 729 | 36 | kai_matmul_clamp_f32_qai8dxp1x4_qsi4cxp4vlx4_1x4vl_sme2_sdot_interface, DataType::FP32, MatMulOp::GEMV, | |
| 730 | test::cpu_has_sme2), | ||
| 731 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | RegisterBenchmark( |
| 732 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | "kai_matmul_clamp_f32_qai8dxp1x4_qsi4cxp4x4_1x4_neon_dotprod", kai_benchmark_matmul<MatMulFloatInterface>, |
| 733 | 36 | kai_matmul_clamp_f32_qai8dxp1x4_qsi4cxp4x4_1x4_neon_dotprod_interface, DataType::FP32, MatMulOp::GEMV, | |
| 734 | test::cpu_has_dotprod), | ||
| 735 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | RegisterBenchmark( |
| 736 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | "kai_matmul_clamp_f32_qai8dxp1x8_qsi4cxp4x8_1x4x32_neon_dotprod", kai_benchmark_matmul<MatMulFloatInterface>, |
| 737 | 36 | kai_matmul_clamp_f32_qai8dxp1x8_qsi4cxp4x8_1x4x32_neon_dotprod_interface, DataType::FP32, MatMulOp::GEMV, | |
| 738 | test::cpu_has_dotprod), | ||
| 739 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | RegisterBenchmark( |
| 740 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | "kai_matmul_clamp_f32_qai8dxp1x8_qsi4cxp8x8_1x8x32_neon_dotprod", kai_benchmark_matmul<MatMulFloatInterface>, |
| 741 | 36 | kai_matmul_clamp_f32_qai8dxp1x8_qsi4cxp8x8_1x8x32_neon_dotprod_interface, DataType::FP32, MatMulOp::GEMV, | |
| 742 | test::cpu_has_dotprod), | ||
| 743 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | RegisterBenchmark( |
| 744 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | "kai_matmul_clamp_f32_qai8dxp4x4_qsi4cxp8x4_8x8x32_neon_dotprod", kai_benchmark_matmul<MatMulFloatInterface>, |
| 745 | 36 | kai_matmul_clamp_f32_qai8dxp4x4_qsi4cxp8x4_8x8x32_neon_dotprod_interface, DataType::FP32, MatMulOp::GEMM, | |
| 746 | test::cpu_has_dotprod), | ||
| 747 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | RegisterBenchmark( |
| 748 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | "kai_matmul_clamp_f32_qai8dxp4x8_qsi4cxp4x4_16x4x32_neon_dotprod", kai_benchmark_matmul<MatMulFloatInterface>, |
| 749 | 36 | kai_matmul_clamp_f32_qai8dxp4x8_qsi4cxp4x4_16x4x32_neon_dotprod_interface, DataType::FP32, MatMulOp::GEMM, | |
| 750 | test::cpu_has_dotprod), | ||
| 751 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | RegisterBenchmark( |
| 752 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | "kai_matmul_clamp_f32_qai8dxp4x8_qsi4cxp4x8_4x4x32_neon_i8mm", kai_benchmark_matmul<MatMulFloatInterface>, |
| 753 | 36 | kai_matmul_clamp_f32_qai8dxp4x8_qsi4cxp4x8_4x4x32_neon_i8mm_interface, DataType::FP32, MatMulOp::GEMM, | |
| 754 | test::cpu_has_i8mm), | ||
| 755 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | RegisterBenchmark( |
| 756 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | "kai_matmul_clamp_f32_qai8dxp4x8_qsi4cxp4x8_8x4x32_neon_i8mm", kai_benchmark_matmul<MatMulFloatInterface>, |
| 757 | 36 | kai_matmul_clamp_f32_qai8dxp4x8_qsi4cxp4x8_8x4x32_neon_i8mm_interface, DataType::FP32, MatMulOp::GEMM, | |
| 758 | test::cpu_has_i8mm), | ||
| 759 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | RegisterBenchmark( |
| 760 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | "kai_matmul_clamp_f32_qai8dxp4x8_qsi4cxp8x8_4x8x32_neon_i8mm", kai_benchmark_matmul<MatMulFloatInterface>, |
| 761 | 36 | kai_matmul_clamp_f32_qai8dxp4x8_qsi4cxp8x8_4x8x32_neon_i8mm_interface, DataType::FP32, MatMulOp::GEMM, | |
| 762 | test::cpu_has_i8mm), | ||
| 763 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | RegisterBenchmark( |
| 764 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | "kai_matmul_clamp_f32_qai8dxp4x8_qsi4cxp8x8_8x8x32_neon_i8mm", kai_benchmark_matmul<MatMulFloatInterface>, |
| 765 | 36 | kai_matmul_clamp_f32_qai8dxp4x8_qsi4cxp8x8_8x8x32_neon_i8mm_interface, DataType::FP32, MatMulOp::GEMM, | |
| 766 | test::cpu_has_i8mm), | ||
| 767 | |||
| 768 | // matmul_clamp_f32_qai8dxp_qsi8cxp | ||
| 769 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | RegisterBenchmark( |
| 770 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | "kai_matmul_clamp_f32_qai8dxp1vlx4_qsi8cxp4vlx4_1vlx4vl_sme_mopa", kai_benchmark_matmul<MatMulFloatInterface>, |
| 771 | 36 | kai_matmul_clamp_f32_qai8dxp1vlx4_qsi8cxp4vlx4_1vlx4vl_sme_mopa_interface, DataType::FP32, MatMulOp::GEMM, | |
| 772 | test::cpu_has_sme), | ||
| 773 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | RegisterBenchmark( |
| 774 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | "kai_matmul_clamp_f32_qai8dxp1x4_qsi8cxp4vlx4_1x4vl_sme_dot", kai_benchmark_matmul<MatMulFloatInterface>, |
| 775 | 36 | kai_matmul_clamp_f32_qai8dxp1x4_qsi8cxp4vlx4_1x4vl_sme_dot_interface, DataType::FP32, MatMulOp::GEMV, | |
| 776 | test::cpu_has_sme), | ||
| 777 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | RegisterBenchmark( |
| 778 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | "kai_matmul_clamp_f32_qai8dxp1x4_qsi8cxp4x4_1x4_neon_dotprod", kai_benchmark_matmul<MatMulFloatInterface>, |
| 779 | 36 | kai_matmul_clamp_f32_qai8dxp1x4_qsi8cxp4x4_1x4_neon_dotprod_interface, DataType::FP32, MatMulOp::GEMV, | |
| 780 | test::cpu_has_dotprod), | ||
| 781 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | RegisterBenchmark( |
| 782 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | "kai_matmul_clamp_f32_qai8dxp1x8_qsi8cxp4x8_1x4_neon_dotprod", kai_benchmark_matmul<MatMulFloatInterface>, |
| 783 | 36 | kai_matmul_clamp_f32_qai8dxp1x8_qsi8cxp4x8_1x4_neon_dotprod_interface, DataType::FP32, MatMulOp::GEMV, | |
| 784 | test::cpu_has_dotprod), | ||
| 785 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | RegisterBenchmark( |
| 786 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | "kai_matmul_clamp_f32_qai8dxp4x4_qsi8cxp4x4_16x4_neon_dotprod", kai_benchmark_matmul<MatMulFloatInterface>, |
| 787 | 36 | kai_matmul_clamp_f32_qai8dxp4x4_qsi8cxp4x4_16x4_neon_dotprod_interface, DataType::FP32, MatMulOp::GEMM, | |
| 788 | test::cpu_has_dotprod), | ||
| 789 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | RegisterBenchmark( |
| 790 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | "kai_matmul_clamp_f32_qai8dxp4x8_qsi8cxp4x8_16x4_neon_i8mm", kai_benchmark_matmul<MatMulFloatInterface>, |
| 791 | 36 | kai_matmul_clamp_f32_qai8dxp4x8_qsi8cxp4x8_16x4_neon_i8mm_interface, DataType::FP32, MatMulOp::GEMM, | |
| 792 | test::cpu_has_i8mm), | ||
| 793 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | RegisterBenchmark( |
| 794 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | "kai_matmul_clamp_f32_qai8dxp1vlx4_qsi8cxp4vlx4_1vlx4vl_sme2_mopa", kai_benchmark_matmul<MatMulFloatInterface>, |
| 795 | 36 | kai_matmul_clamp_f32_qai8dxp1vlx4_qsi8cxp4vlx4_1vlx4vl_sme2_mopa_interface, DataType::FP32, MatMulOp::GEMM, | |
| 796 | test::cpu_has_sme2), | ||
| 797 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | RegisterBenchmark( |
| 798 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | "kai_matmul_clamp_f32_qai8dxp1x4_qsi8cxp4vlx4_1x4vl_sme2_dot", kai_benchmark_matmul<MatMulFloatInterface>, |
| 799 | 36 | kai_matmul_clamp_f32_qai8dxp1x4_qsi8cxp4vlx4_1x4vl_sme2_dot_interface, DataType::FP32, MatMulOp::GEMV, | |
| 800 | test::cpu_has_sme2), | ||
| 801 | |||
| 802 | // matmul_clamp_f32_qsi8d32p_qsi4c32p | ||
| 803 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | RegisterBenchmark( |
| 804 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | "kai_matmul_clamp_f32_qsi8d32p1vlx4_qsi4c32p4vlx4_1vlx4vl_sme2_mopa", |
| 805 | kai_benchmark_matmul<MatMulBlockwiseDynamicQuantInterface>, | ||
| 806 | 36 | kai_matmul_clamp_f32_qsi8d32p1vlx4_qsi4c32p4vlx4_1vlx4vl_sme2_mopa_interface, DataType::FP32, MatMulOp::GEMM, | |
| 807 | test::cpu_has_sme2), | ||
| 808 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | RegisterBenchmark( |
| 809 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | "kai_matmul_clamp_f32_qsi8d32p1x4_qsi4c32p4vlx4_1x4vl_sme2_sdot", |
| 810 | kai_benchmark_matmul<MatMulBlockwiseDynamicQuantInterface>, | ||
| 811 | 36 | kai_matmul_clamp_f32_qsi8d32p1x4_qsi4c32p4vlx4_1x4vl_sme2_sdot_interface, DataType::FP32, MatMulOp::GEMV, | |
| 812 | test::cpu_has_sme2), | ||
| 813 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | RegisterBenchmark( |
| 814 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | "kai_matmul_clamp_f32_qsi8d32p1x4_qsi4c32p4x4_1x4_neon_dotprod", |
| 815 | kai_benchmark_matmul<MatMulBlockwiseDynamicQuantInterface>, | ||
| 816 | 36 | kai_matmul_clamp_f32_qsi8d32p1x4_qsi4c32p4x4_1x4_neon_dotprod_interface, DataType::FP32, MatMulOp::GEMV, | |
| 817 | test::cpu_has_dotprod), | ||
| 818 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | RegisterBenchmark( |
| 819 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | "kai_matmul_clamp_f32_qsi8d32p1x8_qsi4c32p4x8_1x4x32_neon_dotprod", |
| 820 | kai_benchmark_matmul<MatMulBlockwiseDynamicQuantInterface>, | ||
| 821 | 36 | kai_matmul_clamp_f32_qsi8d32p1x8_qsi4c32p4x8_1x4x32_neon_dotprod_interface, DataType::FP32, MatMulOp::GEMV, | |
| 822 | test::cpu_has_dotprod), | ||
| 823 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | RegisterBenchmark( |
| 824 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | "kai_matmul_clamp_f32_qsi8d32p4x4_qsi4c32p4x4_16x4_neon_dotprod", |
| 825 | kai_benchmark_matmul<MatMulBlockwiseDynamicQuantInterface>, | ||
| 826 | 36 | kai_matmul_clamp_f32_qsi8d32p4x4_qsi4c32p4x4_16x4_neon_dotprod_interface, DataType::FP32, MatMulOp::GEMM, | |
| 827 | test::cpu_has_dotprod), | ||
| 828 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | RegisterBenchmark( |
| 829 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | "kai_matmul_clamp_f32_qsi8d32p4x8_qsi4c32p4x8_8x4x32_neon_i8mm", |
| 830 | kai_benchmark_matmul<MatMulBlockwiseDynamicQuantInterface>, | ||
| 831 | 36 | kai_matmul_clamp_f32_qsi8d32p4x8_qsi4c32p4x8_8x4x32_neon_i8mm_interface, DataType::FP32, MatMulOp::GEMM, | |
| 832 | test::cpu_has_i8mm), | ||
| 833 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | RegisterBenchmark( |
| 834 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | "kai_matmul_clamp_f32_qsi8d32p4x8_qsi4c32p4x8_16x4_neon_i8mm", |
| 835 | kai_benchmark_matmul<MatMulBlockwiseDynamicQuantInterface>, | ||
| 836 | 36 | kai_matmul_clamp_f32_qsi8d32p4x8_qsi4c32p4x8_16x4_neon_i8mm_interface, DataType::FP32, MatMulOp::GEMM, | |
| 837 | test::cpu_has_i8mm), | ||
| 838 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | RegisterBenchmark( |
| 839 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | "kai_matmul_clamp_f32_qsi8d32p1x4_qsi4c32p8x4_1x8_sve_dotprod", |
| 840 | kai_benchmark_matmul<MatMulBlockwiseDynamicQuantInterface>, | ||
| 841 | 36 | kai_matmul_clamp_f32_qsi8d32p1x4_qsi4c32p8x4_1x8_sve_dotprod_interface, DataType::FP32, MatMulOp::GEMV, | |
| 842 | (test::cpu_check<test::cpu_has_sve_vl256, test::cpu_has_dotprod>)), | ||
| 843 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | RegisterBenchmark( |
| 844 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | "kai_matmul_clamp_f32_qsi8d32p1x8_qsi4c32p8x8_1x8_sve_dotprod", |
| 845 | kai_benchmark_matmul<MatMulBlockwiseDynamicQuantInterface>, | ||
| 846 | 36 | kai_matmul_clamp_f32_qsi8d32p1x8_qsi4c32p8x8_1x8_sve_dotprod_interface, DataType::FP32, MatMulOp::GEMV, | |
| 847 | (test::cpu_check<test::cpu_has_sve_vl256, test::cpu_has_dotprod>)), | ||
| 848 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | RegisterBenchmark( |
| 849 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | "kai_matmul_clamp_f32_qsi8d32p4x8_qsi4c32p8x8_16x8_sve_i8mm", |
| 850 | kai_benchmark_matmul<MatMulBlockwiseDynamicQuantInterface>, | ||
| 851 | 36 | kai_matmul_clamp_f32_qsi8d32p4x8_qsi4c32p8x8_16x8_sve_i8mm_interface, DataType::FP32, MatMulOp::GEMM, | |
| 852 | (test::cpu_check<test::cpu_has_sve_vl256, test::cpu_has_i8mm>)), | ||
| 853 | |||
| 854 | // matmul_clamp_fp32_bf16p_bf16p | ||
| 855 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | RegisterBenchmark( |
| 856 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | "kai_matmul_clamp_f32_bf16p2vlx2_bf16p2vlx2_2vlx2vl_sme2_mopa", kai_benchmark_matmul<MatMulBaseInterface>, |
| 857 | 36 | kai_matmul_clamp_f32_bf16p2vlx2_bf16p2vlx2_2vlx2vl_sme2_mopa_interface, DataType::FP32, MatMulOp::GEMM, | |
| 858 | test::cpu_has_sme2), | ||
| 859 | |||
| 860 | // matmul_clamp_qai8_qai8_qsi8cxp | ||
| 861 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | RegisterBenchmark( |
| 862 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | "kai_matmul_clamp_qai8_qai8_qsi8cxp2vlx4sb_1x16vl_sme2_dot", kai_benchmark_matmul<MatMulStaticQuantInterface>, |
| 863 | 36 | kai_matmul_clamp_qai8_qai8_qsi8cxp2vlx4sb_1x16vl_sme2_dot_interface, DataType::QAI8, MatMulOp::GEMV, | |
| 864 | test::cpu_has_sme2), | ||
| 865 | |||
| 866 | // matmul_clamp_qai8_qai8p_qsi8cxp | ||
| 867 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | RegisterBenchmark( |
| 868 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | "kai_matmul_clamp_qai8_qai8p2vlx4_qsi8cxpsb2vlx4_2vlx2vl_sme2_mopa", |
| 869 | kai_benchmark_matmul<MatMulStaticQuantInterface>, | ||
| 870 | 36 | kai_matmul_clamp_qai8_qai8p2vlx4_qsi8cxpsb2vlx4_2vlx2vl_sme2_mopa_interface, DataType::QAI8, MatMulOp::GEMM, | |
| 871 | test::cpu_has_sme2), | ||
| 872 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | RegisterBenchmark( |
| 873 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | "kai_matmul_clamp_qai8_qai8p2vlx4_qsi8cxp2vlx4sb_2vlx2vl_sme_mopa", |
| 874 | kai_benchmark_matmul<MatMulStaticQuantInterface>, | ||
| 875 | 36 | kai_matmul_clamp_qai8_qai8p2vlx4_qsi8cxp2vlx4sb_2vlx2vl_sme_mopa_interface, DataType::QAI8, MatMulOp::GEMM, | |
| 876 | test::cpu_has_sme), | ||
| 877 | |||
| 878 | // matmul_clamp_bf16_qai8dxp_qsi4c32p | ||
| 879 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | RegisterBenchmark( |
| 880 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | "kai_matmul_clamp_bf16_qai8dxp1x8_qsi4c32p4x8_1x4_neon_dotprod", |
| 881 | kai_benchmark_matmul<MatMulBlockwiseDynamicQuantGenericDstInterface>, | ||
| 882 | 36 | kai_matmul_clamp_bf16_qai8dxp1x8_qsi4c32p4x8_1x4_neon_dotprod_interface, DataType::BF16, MatMulOp::GEMV, | |
| 883 | test::cpu_has_dotprod_and_bf16), | ||
| 884 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | RegisterBenchmark( |
| 885 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | "kai_matmul_clamp_bf16_qai8dxp4x8_qsi4c32p4x8_16x4_neon_i8mm", |
| 886 | kai_benchmark_matmul<MatMulBlockwiseDynamicQuantGenericDstInterface>, | ||
| 887 | 36 | kai_matmul_clamp_bf16_qai8dxp4x8_qsi4c32p4x8_16x4_neon_i8mm_interface, DataType::BF16, MatMulOp::GEMM, | |
| 888 | test::cpu_has_i8mm_and_bf16), | ||
| 889 | |||
| 890 | // matmul_clamp_bf16_qai8dxp_qsi4cxp | ||
| 891 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | RegisterBenchmark( |
| 892 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | "kai_matmul_clamp_bf16_qai8dxp1x8_qsi4cxp8x8_1x8_neon_dotprod", kai_benchmark_matmul<MatMulBaseInterface>, |
| 893 | 36 | kai_matmul_clamp_bf16_qai8dxp1x8_qsi4cxp8x8_1x8_neon_dotprod_interface, DataType::BF16, MatMulOp::GEMV, | |
| 894 | test::cpu_has_dotprod_and_bf16), | ||
| 895 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | RegisterBenchmark( |
| 896 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | "kai_matmul_clamp_bf16_qai8dxp4x8_qsi4cxp8x8_8x8_neon_i8mm", kai_benchmark_matmul<MatMulBaseInterface>, |
| 897 | 36 | kai_matmul_clamp_bf16_qai8dxp4x8_qsi4cxp8x8_8x8_neon_i8mm_interface, DataType::BF16, MatMulOp::GEMM, | |
| 898 | test::cpu_has_i8mm_and_bf16), | ||
| 899 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | RegisterBenchmark( |
| 900 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | "kai_matmul_clamp_f16_qai8dxp1x4_qsi4cxp4x4_1x4_neon_dotprod", kai_benchmark_matmul<MatMulBaseInterface>, |
| 901 | 36 | kai_matmul_clamp_f16_qai8dxp1x4_qsi4cxp4x4_1x4_neon_dotprod_interface, DataType::FP16, MatMulOp::GEMV, | |
| 902 | test::cpu_has_dotprod_and_fp16), | ||
| 903 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | RegisterBenchmark( |
| 904 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | "kai_matmul_clamp_f16_qai8dxp1x8_qsi4cxp4x8_1x4_neon_dotprod", kai_benchmark_matmul<MatMulBaseInterface>, |
| 905 | 36 | kai_matmul_clamp_f16_qai8dxp1x8_qsi4cxp4x8_1x4_neon_dotprod_interface, DataType::FP16, MatMulOp::GEMV, | |
| 906 | test::cpu_has_dotprod_and_fp16), | ||
| 907 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | RegisterBenchmark( |
| 908 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | "kai_matmul_clamp_f16_qai8dxp4x4_qsi4cxp4x4_16x4_neon_dotprod", kai_benchmark_matmul<MatMulBaseInterface>, |
| 909 | 36 | kai_matmul_clamp_f16_qai8dxp4x4_qsi4cxp4x4_16x4_neon_dotprod_interface, DataType::FP16, MatMulOp::GEMM, | |
| 910 | test::cpu_has_dotprod_and_fp16), | ||
| 911 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | RegisterBenchmark( |
| 912 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | "kai_matmul_clamp_f16_qai8dxp4x8_qsi4cxp4x8_16x4_neon_i8mm", kai_benchmark_matmul<MatMulBaseInterface>, |
| 913 | 36 | kai_matmul_clamp_f16_qai8dxp4x8_qsi4cxp4x8_16x4_neon_i8mm_interface, DataType::FP16, MatMulOp::GEMM, | |
| 914 | test::cpu_has_i8mm_and_fp16), | ||
| 915 | |||
| 916 | // matmul_clamp_f16_qai8dxp_qsi8cxp | ||
| 917 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | RegisterBenchmark( |
| 918 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | "kai_matmul_clamp_f16_qai8dxp1x4_qsi8cxp4x4_1x4_neon_dotprod", kai_benchmark_matmul<MatMulBaseInterface>, |
| 919 | 36 | kai_matmul_clamp_f16_qai8dxp1x4_qsi8cxp4x4_1x4_neon_dotprod_interface, DataType::FP16, MatMulOp::GEMV, | |
| 920 | test::cpu_has_dotprod_and_fp16), | ||
| 921 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | RegisterBenchmark( |
| 922 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | "kai_matmul_clamp_f16_qai8dxp1x8_qsi8cxp4x8_1x4_neon_dotprod", kai_benchmark_matmul<MatMulBaseInterface>, |
| 923 | 36 | kai_matmul_clamp_f16_qai8dxp1x8_qsi8cxp4x8_1x4_neon_dotprod_interface, DataType::FP16, MatMulOp::GEMV, | |
| 924 | test::cpu_has_dotprod_and_fp16), | ||
| 925 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | RegisterBenchmark( |
| 926 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | "kai_matmul_clamp_f16_qai8dxp4x4_qsi8cxp4x4_16x4_neon_dotprod", kai_benchmark_matmul<MatMulBaseInterface>, |
| 927 | 36 | kai_matmul_clamp_f16_qai8dxp4x4_qsi8cxp4x4_16x4_neon_dotprod_interface, DataType::FP16, MatMulOp::GEMM, | |
| 928 | test::cpu_has_dotprod_and_fp16), | ||
| 929 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | RegisterBenchmark( |
| 930 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | "kai_matmul_clamp_f16_qai8dxp4x8_qsi8cxp4x8_16x4_neon_i8mm", kai_benchmark_matmul<MatMulBaseInterface>, |
| 931 | 36 | kai_matmul_clamp_f16_qai8dxp4x8_qsi8cxp4x8_16x4_neon_i8mm_interface, DataType::FP16, MatMulOp::GEMM, | |
| 932 | test::cpu_has_i8mm_and_fp16), | ||
| 933 | |||
| 934 | // matmul_clamp_f16_qsi8d32p_qai4c32p | ||
| 935 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | RegisterBenchmark( |
| 936 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | "kai_matmul_clamp_f16_qsi8d32p1vlx4_qai4c32p4vlx4_1vlx4vl_sme2_mopa", |
| 937 | kai_benchmark_matmul<MatMulBlockwiseDynamicQuantGenericDstInterface>, | ||
| 938 | 36 | kai_matmul_clamp_f16_qsi8d32p1vlx4_qai4c32p4vlx4_1vlx4vl_sme2_mopa_interface, DataType::FP16, MatMulOp::GEMM, | |
| 939 | test::cpu_has_sme2), | ||
| 940 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | RegisterBenchmark( |
| 941 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | "kai_matmul_clamp_f16_qsi8d32p1x4_qai4c32p4vlx4_1x4vl_sme2_dot", |
| 942 | kai_benchmark_matmul<MatMulBlockwiseDynamicQuantGenericDstInterface>, | ||
| 943 | 36 | kai_matmul_clamp_f16_qsi8d32p1x4_qai4c32p4vlx4_1x4vl_sme2_dot_interface, DataType::FP16, MatMulOp::GEMV, | |
| 944 | test::cpu_has_sme2), | ||
| 945 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | RegisterBenchmark( |
| 946 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | "kai_matmul_clamp_f16_qsi8d32p1x4_qai4c32p4x4_1x4_neon_dotprod", |
| 947 | kai_benchmark_matmul<MatMulBlockwiseDynamicQuantGenericDstInterface>, | ||
| 948 | 36 | kai_matmul_clamp_f16_qsi8d32p1x4_qai4c32p4x4_1x4_neon_dotprod_interface, DataType::FP16, MatMulOp::GEMV, | |
| 949 | test::cpu_has_dotprod_and_fp16), | ||
| 950 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | RegisterBenchmark( |
| 951 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | "kai_matmul_clamp_f16_qsi8d32p1x8_qai4c32p4x8_1x4_neon_dotprod", |
| 952 | kai_benchmark_matmul<MatMulBlockwiseDynamicQuantGenericDstInterface>, | ||
| 953 | 36 | kai_matmul_clamp_f16_qsi8d32p1x8_qai4c32p4x8_1x4_neon_dotprod_interface, DataType::FP16, MatMulOp::GEMV, | |
| 954 | test::cpu_has_dotprod_and_fp16), | ||
| 955 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | RegisterBenchmark( |
| 956 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | "kai_matmul_clamp_f16_qsi8d32p4x4_qai4c32p4x4_8x4_neon_dotprod", |
| 957 | kai_benchmark_matmul<MatMulBlockwiseDynamicQuantGenericDstInterface>, | ||
| 958 | 36 | kai_matmul_clamp_f16_qsi8d32p4x4_qai4c32p4x4_8x4_neon_dotprod_interface, DataType::FP16, MatMulOp::GEMM, | |
| 959 | test::cpu_has_dotprod_and_fp16), | ||
| 960 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | RegisterBenchmark( |
| 961 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | "kai_matmul_clamp_f16_qsi8d32p4x8_qai4c32p4x8_8x4_neon_i8mm", |
| 962 | kai_benchmark_matmul<MatMulBlockwiseDynamicQuantGenericDstInterface>, | ||
| 963 | 36 | kai_matmul_clamp_f16_qsi8d32p4x8_qai4c32p4x8_8x4_neon_i8mm_interface, DataType::FP16, MatMulOp::GEMM, | |
| 964 | test::cpu_has_i8mm_and_fp16), | ||
| 965 | |||
| 966 | // matmul_clamp_f32_qsi8d32p_qai4c32p | ||
| 967 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | RegisterBenchmark( |
| 968 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | "kai_matmul_clamp_f32_qsi8d32p1vlx4_qai4c32p4vlx4_1vlx4vl_sme2_mopa", |
| 969 | kai_benchmark_matmul<MatMulBlockwiseDynamicQuantInterface>, | ||
| 970 | 36 | kai_matmul_clamp_f32_qsi8d32p1vlx4_qai4c32p4vlx4_1vlx4vl_sme2_mopa_interface, DataType::FP32, MatMulOp::GEMM, | |
| 971 | test::cpu_has_sme2), | ||
| 972 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | RegisterBenchmark( |
| 973 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | "kai_matmul_clamp_f32_qsi8d32p1x4_qai4c32p4vlx4_1x4vl_sme2_dot", |
| 974 | kai_benchmark_matmul<MatMulBlockwiseDynamicQuantInterface>, | ||
| 975 | 36 | kai_matmul_clamp_f32_qsi8d32p1x4_qai4c32p4vlx4_1x4vl_sme2_dot_interface, DataType::FP32, MatMulOp::GEMV, | |
| 976 | test::cpu_has_sme2), | ||
| 977 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | RegisterBenchmark( |
| 978 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | "kai_matmul_clamp_f32_qsi8d32p1x4_qai4c32p4x4_1x4_neon_dotprod", |
| 979 | kai_benchmark_matmul<MatMulBlockwiseDynamicQuantInterface>, | ||
| 980 | 36 | kai_matmul_clamp_f32_qsi8d32p1x4_qai4c32p4x4_1x4_neon_dotprod_interface, DataType::FP32, MatMulOp::GEMV, | |
| 981 | test::cpu_has_dotprod), | ||
| 982 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | RegisterBenchmark( |
| 983 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | "kai_matmul_clamp_f32_qsi8d32p1x8_qai4c32p4x8_1x4_neon_dotprod", |
| 984 | kai_benchmark_matmul<MatMulBlockwiseDynamicQuantInterface>, | ||
| 985 | 36 | kai_matmul_clamp_f32_qsi8d32p1x8_qai4c32p4x8_1x4_neon_dotprod_interface, DataType::FP32, MatMulOp::GEMV, | |
| 986 | test::cpu_has_dotprod), | ||
| 987 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | RegisterBenchmark( |
| 988 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | "kai_matmul_clamp_f32_qsi8d32p4x4_qai4c32p4x4_8x4_neon_dotprod", |
| 989 | kai_benchmark_matmul<MatMulBlockwiseDynamicQuantInterface>, | ||
| 990 | 36 | kai_matmul_clamp_f32_qsi8d32p4x4_qai4c32p4x4_8x4_neon_dotprod_interface, DataType::FP32, MatMulOp::GEMM, | |
| 991 | test::cpu_has_dotprod), | ||
| 992 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | RegisterBenchmark( |
| 993 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | "kai_matmul_clamp_f32_qsi8d32p4x8_qai4c32p4x8_8x4_neon_i8mm", |
| 994 | kai_benchmark_matmul<MatMulBlockwiseDynamicQuantInterface>, | ||
| 995 | 36 | kai_matmul_clamp_f32_qsi8d32p4x8_qai4c32p4x8_8x4_neon_i8mm_interface, DataType::FP32, MatMulOp::GEMM, | |
| 996 | test::cpu_has_i8mm), | ||
| 997 | |||
| 998 | }; | ||
| 999 | |||
| 1000 | 12 | void RegisterMatMulBenchmarks(const MatMulShape& shape, const size_t bl) { | |
| 1001 |
2/2✓ Branch 0 taken 1068 times.
✓ Branch 1 taken 12 times.
|
1080 | for (const auto& benchmark : matmul_benchmarks) { |
| 1002 |
0/2✗ Branch 0 not taken.
✗ Branch 1 not taken.
|
2136 | benchmark |
| 1003 |
2/2✓ Branch 0 taken 356 times.
✓ Branch 1 taken 712 times.
|
1068 | ->Args( |
| 1004 |
2/4✗ Branch 0 not taken.
✓ Branch 1 taken 712 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 712 times.
|
2136 | {static_cast<int64_t>(shape.m), static_cast<int64_t>(shape.n), static_cast<int64_t>(shape.k), |
| 1005 | 1068 | static_cast<int64_t>(bl)}) | |
| 1006 |
10/12✓ Branch 0 taken 356 times.
✓ Branch 1 taken 712 times.
✓ Branch 2 taken 356 times.
✓ Branch 3 taken 712 times.
✓ Branch 4 taken 356 times.
✓ Branch 5 taken 712 times.
✓ Branch 6 taken 356 times.
✓ Branch 7 taken 712 times.
✗ Branch 8 not taken.
✓ Branch 9 taken 1068 times.
✗ Branch 10 not taken.
✓ Branch 11 taken 1068 times.
|
1068 | ->ArgNames({"m", "n", "k", "bl"}); |
| 1007 | 1068 | } | |
| 1008 | 12 | } | |
| 1009 | } // namespace kai::benchmark | ||
| 1010 |