KleidiAI Coverage Report


Directory: ./
File: kai/ukernels/matmul/matmul_clamp_qai8_qai8p_qsi8cxp/kai_matmul_clamp_qai8_qai8p2vlx4_qsi8cxpsb2vlx4_2vlx2vl_sme2_mopa.c
Date: 2025-10-20 13:18:31
Coverage Exec Excl Total
Lines: 100.0% 46 4 50
Functions: 100.0% 13 0 13
Branches: -% 0 8 8

Line Branch Exec Source
1 //
2 // SPDX-FileCopyrightText: Copyright 2024-2025 Arm Limited and/or its affiliates <open-source-office@arm.com>
3 //
4 // SPDX-License-Identifier: Apache-2.0
5 //
6
7 #if (!defined(__aarch64__) || !defined(__ARM_FEATURE_SVE2)) && !defined(_M_ARM64)
8 #error This file must be compiled for AArch64, FEAT_SVE2.
9 #else // Architectural features check.
10 #include "kai_matmul_clamp_qai8_qai8p2vlx4_qsi8cxpsb2vlx4_2vlx2vl_sme2_mopa.h"
11
12 #include <stddef.h>
13 #include <stdint.h>
14
15 #include "kai/kai_common.h"
16
17 typedef struct {
18 const void* A;
19 const void* B;
20 void* C;
21 uint64_t ldcb;
22 uint64_t M;
23 uint64_t N;
24 uint64_t K;
25 int32_t min;
26 int32_t max;
27 int32_t result_zero_point;
28 void* accumulator_buffer;
29 uint64_t flags;
30 } KernelArgs;
31
32 static const size_t kai_mr = 2;
33 static const size_t kai_nr = 2;
34 static const size_t kai_kr = 4;
35 static const size_t kai_sr = 1;
36
37 void kai_kernel_matmul_clamp_qai8_qai8p2vlx4_qsi8cxpsb2vlx4_2vlx2vl_sme2_mopa(KernelArgs* args);
38
39 // Returns a constant value specific to this kernel that's relative to vector length
40 3330 static size_t kai_get_kernel_vec_length_constant(void) {
41 3330 const size_t kernel_vec_length_constant = kai_get_sme_vector_length_u8() / kai_kr;
42 6660 return kernel_vec_length_constant;
43 3330 }
44
45 999 size_t kai_get_m_step_matmul_clamp_qai8_qai8p2vlx4_qsi8cxpsb2vlx4_2vlx2vl_sme2_mopa(void) {
46 999 return kai_mr * kai_get_kernel_vec_length_constant();
47 }
48
49 1665 size_t kai_get_n_step_matmul_clamp_qai8_qai8p2vlx4_qsi8cxpsb2vlx4_2vlx2vl_sme2_mopa(void) {
50 1665 return kai_nr * kai_get_kernel_vec_length_constant();
51 }
52
53 333 size_t kai_get_mr_matmul_clamp_qai8_qai8p2vlx4_qsi8cxpsb2vlx4_2vlx2vl_sme2_mopa(void) {
54 333 return kai_mr * kai_get_kernel_vec_length_constant();
55 }
56
57 333 size_t kai_get_nr_matmul_clamp_qai8_qai8p2vlx4_qsi8cxpsb2vlx4_2vlx2vl_sme2_mopa(void) {
58 333 return kai_nr * kai_get_kernel_vec_length_constant();
59 }
60
61 333 size_t kai_get_kr_matmul_clamp_qai8_qai8p2vlx4_qsi8cxpsb2vlx4_2vlx2vl_sme2_mopa(void) {
62 333 return kai_kr;
63 }
64
65 333 size_t kai_get_sr_matmul_clamp_qai8_qai8p2vlx4_qsi8cxpsb2vlx4_2vlx2vl_sme2_mopa(void) {
66 333 return kai_sr;
67 }
68
69 333 size_t kai_get_lhs_packed_offset_matmul_clamp_qai8_qai8p2vlx4_qsi8cxpsb2vlx4_2vlx2vl_sme2_mopa(size_t m_idx, size_t k) {
70 KAI_ASSUME(m_idx % kai_get_m_step_matmul_clamp_qai8_qai8p2vlx4_qsi8cxpsb2vlx4_2vlx2vl_sme2_mopa() == 0);
71 333 return m_idx * kai_roundup(k, kai_kr) * sizeof(int8_t);
72 }
73
74 333 static size_t kai_get_rhs_packed_stride_matmul_clamp_qai8_qai8p2vlx4_qsi8cxpsb2vlx4_2vlx2vl_sme2_mopa(size_t k) {
75 666 return kai_get_n_step_matmul_clamp_qai8_qai8p2vlx4_qsi8cxpsb2vlx4_2vlx2vl_sme2_mopa() *
76 333 (sizeof(int32_t) + kai_roundup(k, kai_kr) * sizeof(int8_t) + sizeof(float));
77 }
78
79 333 size_t kai_get_rhs_packed_offset_matmul_clamp_qai8_qai8p2vlx4_qsi8cxpsb2vlx4_2vlx2vl_sme2_mopa(size_t n_idx, size_t k) {
80 KAI_ASSUME(n_idx % kai_get_n_step_matmul_clamp_qai8_qai8p2vlx4_qsi8cxpsb2vlx4_2vlx2vl_sme2_mopa() == 0);
81 333 const size_t block_idx = n_idx / kai_get_n_step_matmul_clamp_qai8_qai8p2vlx4_qsi8cxpsb2vlx4_2vlx2vl_sme2_mopa();
82 666 return block_idx * kai_get_rhs_packed_stride_matmul_clamp_qai8_qai8p2vlx4_qsi8cxpsb2vlx4_2vlx2vl_sme2_mopa(k);
83 333 }
84
85 333 size_t kai_get_dst_offset_matmul_clamp_qai8_qai8p2vlx4_qsi8cxpsb2vlx4_2vlx2vl_sme2_mopa(
86 size_t m_idx, size_t n_idx, size_t dst_stride_row) {
87 KAI_ASSUME(m_idx % kai_get_m_step_matmul_clamp_qai8_qai8p2vlx4_qsi8cxpsb2vlx4_2vlx2vl_sme2_mopa() == 0);
88 KAI_ASSUME(n_idx % kai_get_n_step_matmul_clamp_qai8_qai8p2vlx4_qsi8cxpsb2vlx4_2vlx2vl_sme2_mopa() == 0);
89
90 333 return m_idx * dst_stride_row + n_idx * sizeof(int8_t);
91 }
92
93 333 size_t kai_get_dst_size_matmul_clamp_qai8_qai8p2vlx4_qsi8cxpsb2vlx4_2vlx2vl_sme2_mopa(size_t m, size_t n) {
94 333 return m * n * sizeof(int8_t);
95 }
96
97 334 void kai_run_matmul_clamp_qai8_qai8p2vlx4_qsi8cxpsb2vlx4_2vlx2vl_sme2_mopa(
98 size_t m, size_t n, size_t k, const void* lhs_packed, const void* rhs_packed, void* dst, size_t dst_stride_row,
99 size_t dst_stride_col, const struct kai_matmul_requantize32_params* params) {
100 334 KAI_UNUSED(dst_stride_col);
101 334 KernelArgs args;
102
103 334 args.A = lhs_packed;
104 334 args.B = rhs_packed;
105 334 args.C = dst;
106 334 args.ldcb = dst_stride_row;
107 334 args.M = m;
108 334 args.N = n;
109 334 args.K = k;
110 334 args.min = params->min_value;
111 334 args.max = params->max_value;
112 334 args.result_zero_point = params->output_zero_point;
113 334 args.accumulator_buffer = NULL;
114 334 args.flags = 0;
115
116 334 kai_kernel_matmul_clamp_qai8_qai8p2vlx4_qsi8cxpsb2vlx4_2vlx2vl_sme2_mopa(&args);
117 334 }
118
119 #endif // Architectural features check.
120