KleidiAI Coverage Report


Directory: ./
Coverage: low: ≥ 0% medium: ≥ 75.0% high: ≥ 90.0%
Coverage Exec / Excl / Total
Lines: 100.0% 33 / 5 / 38
Functions: 100.0% 5 / 0 / 5
Branches: 100.0% 10 / 10 / 20

kai/ukernels/matmul/pack/kai_lhs_imatmul_pack_x8p2vlx4_x8p_sme.c
Line Branch Exec Source
1 //
2 // SPDX-FileCopyrightText: Copyright 2025 Arm Limited and/or its affiliates <open-source-office@arm.com>
3 //
4 // SPDX-License-Identifier: Apache-2.0
5 //
6
7 #if (!defined(__aarch64__) || !defined(__ARM_FEATURE_SVE2)) && !defined(_M_ARM64)
8 #error This file must be compiled for AArch64, FEAT_SVE2.
9 #else // Architectural features check.
10 #include "kai_lhs_imatmul_pack_x8p2vlx4_x8p_sme.h"
11
12 #include <stddef.h>
13 #include <stdint.h>
14
15 #include "kai/kai_common.h"
16
17 enum {
18 MR = 2,
19 KR = 4,
20 MAX_M_STEP = MR * (KAI_SME_VEC_LENGTH_MAX_BYTES / sizeof(int8_t)) / KR,
21 };
22
23 void kai_kernel_lhs_imatmul_pack_x8p2vlx4_x8p_sme(size_t height, size_t width, const void* in, void* out);
24
25 13320 static size_t kai_get_mr_lhs_imatmul_pack_x8p2vlx4_x8p_sme(void) {
26 13320 return MR * kai_get_sme_vector_length_u8() / KR;
27 }
28
29 6660 size_t kai_get_m_step_lhs_imatmul_pack_x8p2vlx4_x8p_sme(void) {
30 6660 return kai_get_mr_lhs_imatmul_pack_x8p2vlx4_x8p_sme();
31 }
32
33 6660 size_t kai_get_lhs_packed_offset_lhs_imatmul_pack_x8p2vlx4_x8p_sme(
34 size_t m_idx, size_t k_chunk_count, size_t k_chunk_length) {
35 KAI_ASSUME(m_idx % kai_get_m_step_lhs_imatmul_pack_x8p2vlx4_x8p_sme() == 0);
36
37 6660 return m_idx * k_chunk_count * kai_roundup(k_chunk_length, KR) * sizeof(int8_t);
38 }
39
40 3330 size_t kai_get_lhs_packed_size_lhs_imatmul_pack_x8p2vlx4_x8p_sme(
41 size_t m, size_t k_chunk_count, size_t k_chunk_length) {
42 3330 const size_t m_end = kai_roundup(m, kai_get_mr_lhs_imatmul_pack_x8p2vlx4_x8p_sme());
43 6660 return kai_get_lhs_packed_offset_lhs_imatmul_pack_x8p2vlx4_x8p_sme(m_end, k_chunk_count, k_chunk_length);
44 3330 }
45
46 3330 void kai_run_lhs_imatmul_pack_x8p2vlx4_x8p_sme(
47 size_t m, size_t k_chunk_count, size_t k_chunk_length, const void* const* lhs_ptrs, size_t lhs_ptr_offset,
48 const void* pad_ptr, void* lhs_packed) {
49 KAI_ASSUME(lhs_ptrs != NULL);
50 KAI_ASSUME(lhs_packed != NULL);
51
52 3330 const size_t m_step = kai_get_mr_lhs_imatmul_pack_x8p2vlx4_x8p_sme();
53 3330 const size_t width = k_chunk_length;
54
55 KAI_ASSERT(m_step <= MAX_M_STEP);
56 3330 const uint8_t* in[MAX_M_STEP];
57
58 3330 uint8_t* out_base = lhs_packed;
59
60 3330 kai_commit_za();
61
62
2/2
✓ Branch 0 taken 4232 times.
✓ Branch 1 taken 3330 times.
7562 for (size_t i_m = 0; i_m < m; i_m += m_step) {
63
2/2
✓ Branch 0 taken 94440 times.
✓ Branch 1 taken 4232 times.
98672 for (size_t i_k_chunk = 0; i_k_chunk < k_chunk_count; i_k_chunk += 1) {
64
2/2
✓ Branch 0 taken 74694 times.
✓ Branch 1 taken 19746 times.
94440 const size_t height = KAI_MIN(m - i_m, m_step);
65 94440 void* out = out_base;
66 94440 out_base += m_step * kai_roundup(k_chunk_length, KR) * sizeof(int8_t);
67
2/2
✓ Branch 0 taken 1004748 times.
✓ Branch 1 taken 94440 times.
1099188 for (size_t y = 0; y < height; y += 1) {
68 KAI_ASSERT(i_k_chunk + (i_m + y) * k_chunk_count < m * k_chunk_count);
69 1004748 in[y] = *(lhs_ptrs + i_m * k_chunk_count + i_k_chunk * m_step + y);
70
2/2
✓ Branch 0 taken 49966 times.
✓ Branch 1 taken 954782 times.
1004748 if (in[y] != pad_ptr) {
71 954782 uintptr_t in_ptr = (uintptr_t)in[y] + lhs_ptr_offset;
72 954782 in[y] = (const uint8_t*)in_ptr; // NOLINT(performance-no-int-to-ptr)
73 954782 }
74 1004748 }
75
76 94440 kai_kernel_lhs_imatmul_pack_x8p2vlx4_x8p_sme(
77 94440 height, width, in, out); // NOLINT(bugprone-multi-level-implicit-pointer-conversion)
78 94440 }
79 4232 }
80 3330 }
81
82 #endif // Architectural features check.
83