KleidiAI Coverage Report


Directory: ./
File: kai/ukernels/matmul/pack/kai_lhs_imatmul_pack_x8p2vlx4_x8p_sme.c
Date: 2025-10-20 13:18:31
Coverage Exec Excl Total
Lines: 100.0% 32 5 37
Functions: 100.0% 5 0 5
Branches: 100.0% 10 10 20

Line Branch Exec Source
1 //
2 // SPDX-FileCopyrightText: Copyright 2025 Arm Limited and/or its affiliates <open-source-office@arm.com>
3 //
4 // SPDX-License-Identifier: Apache-2.0
5 //
6
7 #if (!defined(__aarch64__) || !defined(__ARM_FEATURE_SVE2)) && !defined(_M_ARM64)
8 #error This file must be compiled for AArch64, FEAT_SVE2.
9 #else // Architectural features check.
10 #include "kai_lhs_imatmul_pack_x8p2vlx4_x8p_sme.h"
11
12 #include <stddef.h>
13 #include <stdint.h>
14
15 #include "kai/kai_common.h"
16
17 enum {
18 MR = 2,
19 KR = 4,
20 MAX_M_STEP = MR * (KAI_SME_VEC_LENGTH_MAX_BYTES / sizeof(int8_t)) / KR,
21 };
22
23 void kai_kernel_lhs_imatmul_pack_x8p2vlx4_x8p_sme(
24 size_t height, size_t width, const void* in, size_t row_offset, void* out);
25
26 37296 static size_t kai_get_mr_lhs_imatmul_pack_x8p2vlx4_x8p_sme(void) {
27 37296 return MR * kai_get_sme_vector_length_u8() / KR;
28 }
29
30 18648 size_t kai_get_m_step_lhs_imatmul_pack_x8p2vlx4_x8p_sme(void) {
31 18648 return kai_get_mr_lhs_imatmul_pack_x8p2vlx4_x8p_sme();
32 }
33
34 18648 size_t kai_get_lhs_packed_offset_lhs_imatmul_pack_x8p2vlx4_x8p_sme(
35 size_t m_idx, size_t k_chunk_count, size_t k_chunk_length) {
36 KAI_ASSUME(m_idx % kai_get_m_step_lhs_imatmul_pack_x8p2vlx4_x8p_sme() == 0);
37
38 18648 return m_idx * k_chunk_count * kai_roundup(k_chunk_length, KR) * sizeof(int8_t);
39 }
40
41 9324 size_t kai_get_lhs_packed_size_lhs_imatmul_pack_x8p2vlx4_x8p_sme(
42 size_t m, size_t k_chunk_count, size_t k_chunk_length) {
43 9324 const size_t m_end = kai_roundup(m, kai_get_mr_lhs_imatmul_pack_x8p2vlx4_x8p_sme());
44 18648 return kai_get_lhs_packed_offset_lhs_imatmul_pack_x8p2vlx4_x8p_sme(m_end, k_chunk_count, k_chunk_length);
45 9324 }
46
47 9324 void kai_run_lhs_imatmul_pack_x8p2vlx4_x8p_sme(
48 size_t m, size_t k_chunk_count, size_t k_chunk_length, const void* const* lhs_ptrs, size_t lhs_ptr_offset,
49 const void* pad_ptr, void* lhs_packed) {
50 KAI_ASSUME(lhs_ptrs != NULL);
51 KAI_ASSUME(lhs_packed != NULL);
52
53 9324 const size_t m_step = kai_get_mr_lhs_imatmul_pack_x8p2vlx4_x8p_sme();
54 9324 const size_t row_offset = 0;
55 9324 const size_t width = k_chunk_length;
56
57 KAI_ASSERT(m_step <= MAX_M_STEP);
58 9324 const uint8_t* in[MAX_M_STEP];
59
60 9324 uint8_t* out_base = lhs_packed;
61
2/2
✓ Branch 0 taken 9996 times.
✓ Branch 1 taken 9324 times.
19320 for (size_t i_m = 0; i_m < m; i_m += m_step) {
62
2/2
✓ Branch 0 taken 9996 times.
✓ Branch 1 taken 220626 times.
230622 for (size_t i_k_chunk = 0; i_k_chunk < k_chunk_count; i_k_chunk += 1) {
63
2/2
✓ Branch 0 taken 165942 times.
✓ Branch 1 taken 54684 times.
220626 const size_t height = KAI_MIN(m - i_m, m_step);
64 220626 void* out = out_base;
65
2/2
✓ Branch 0 taken 2740878 times.
✓ Branch 1 taken 220626 times.
2961504 for (size_t y = 0; y < height; y += 1) {
66 KAI_ASSERT(i_k_chunk + (i_m + y) * k_chunk_count < m * k_chunk_count);
67 2740878 in[y] = *(lhs_ptrs + i_m * k_chunk_count + i_k_chunk * m_step + y);
68
2/2
✓ Branch 0 taken 139566 times.
✓ Branch 1 taken 2601312 times.
2740878 if (in[y] != pad_ptr) {
69 2601312 in[y] += lhs_ptr_offset;
70 2601312 }
71 2740878 }
72
73 220626 kai_kernel_lhs_imatmul_pack_x8p2vlx4_x8p_sme(
74 220626 height, width, in, row_offset, out); // NOLINT(bugprone-multi-level-implicit-pointer-conversion)
75 220626 out_base += m_step * kai_roundup(k_chunk_length, KR) * sizeof(int8_t);
76 220626 }
77 9996 }
78 9324 }
79
80 #endif // Architectural features check.
81