KleidiAI Coverage Report


Directory: ./
File: kai/ukernels/matmul/pack/kai_lhs_imatmul_pack_x16p2vlx2_x16p_sme.c
Date: 2025-10-20 13:18:31
Coverage Exec Excl Total
Lines: 100.0% 32 5 37
Functions: 100.0% 5 0 5
Branches: 100.0% 10 10 20

Line Branch Exec Source
1 //
2 // SPDX-FileCopyrightText: Copyright 2025 Arm Limited and/or its affiliates <open-source-office@arm.com>
3 //
4 // SPDX-License-Identifier: Apache-2.0
5 //
6
7 #if (!defined(__aarch64__) || !defined(__ARM_FEATURE_SVE2)) && !defined(_M_ARM64)
8 #error This file must be compiled for AArch64, FEAT_SVE2.
9 #else // Architectural features check.
10 #include "kai_lhs_imatmul_pack_x16p2vlx2_x16p_sme.h"
11
12 #include <stddef.h>
13 #include <stdint.h>
14
15 #include "kai/kai_common.h"
16
17 enum {
18 MR = 2,
19 KR = 2,
20 MAX_M_STEP = MR * (KAI_SME_VEC_LENGTH_MAX_BYTES / sizeof(uint16_t)) / KR,
21 };
22
23 void kai_kernel_lhs_imatmul_pack_x16p2vlx2_x16p_sme(
24 size_t height, size_t width, const void* in, size_t row_offset, void* out);
25
26 45408 static size_t kai_get_mr_lhs_imatmul_pack_x16p2vlx2_x16p_sme(void) {
27 45408 return MR * kai_get_sme_vector_length_u16() / KR;
28 }
29
30 22704 size_t kai_get_m_step_lhs_imatmul_pack_x16p2vlx2_x16p_sme(void) {
31 22704 return kai_get_mr_lhs_imatmul_pack_x16p2vlx2_x16p_sme();
32 }
33
34 22704 size_t kai_get_lhs_packed_offset_lhs_imatmul_pack_x16p2vlx2_x16p_sme(
35 size_t m_idx, size_t k_chunk_count, size_t k_chunk_length) {
36 KAI_ASSUME(m_idx % kai_get_m_step_lhs_imatmul_pack_x16p2vlx2_x16p_sme() == 0);
37
38 22704 return m_idx * k_chunk_count * kai_roundup(k_chunk_length, KR) * sizeof(uint16_t);
39 }
40
41 11352 size_t kai_get_lhs_packed_size_lhs_imatmul_pack_x16p2vlx2_x16p_sme(
42 size_t m, size_t k_chunk_count, size_t k_chunk_length) {
43 11352 const size_t m_end = kai_roundup(m, kai_get_mr_lhs_imatmul_pack_x16p2vlx2_x16p_sme());
44 22704 return kai_get_lhs_packed_offset_lhs_imatmul_pack_x16p2vlx2_x16p_sme(m_end, k_chunk_count, k_chunk_length);
45 11352 }
46
47 11352 void kai_run_lhs_imatmul_pack_x16p2vlx2_x16p_sme(
48 size_t m, size_t k_chunk_count, size_t k_chunk_length, const void* const* lhs_ptrs, size_t lhs_ptr_offset,
49 const void* pad_ptr, void* lhs_packed) {
50 KAI_ASSUME(lhs_ptrs != NULL);
51 KAI_ASSUME(lhs_packed != NULL);
52
53 11352 const size_t m_step = kai_get_mr_lhs_imatmul_pack_x16p2vlx2_x16p_sme();
54 11352 const size_t row_offset = 0;
55 11352 const size_t width = k_chunk_length;
56
57 KAI_ASSERT(m_step <= MAX_M_STEP);
58 11352 const uint8_t* in[MAX_M_STEP];
59
60 11352 uint8_t* out_base = lhs_packed;
61
2/2
✓ Branch 0 taken 16434 times.
✓ Branch 1 taken 11352 times.
27786 for (size_t i_m = 0; i_m < m; i_m += m_step) {
62
2/2
✓ Branch 0 taken 16434 times.
✓ Branch 1 taken 250734 times.
267168 for (size_t i_k_chunk = 0; i_k_chunk < k_chunk_count; i_k_chunk += 1) {
63
2/2
✓ Branch 0 taken 159258 times.
✓ Branch 1 taken 91476 times.
250734 const size_t height = KAI_MIN(m - i_m, m_step);
64 250734 void* out = out_base;
65
2/2
✓ Branch 0 taken 4143414 times.
✓ Branch 1 taken 250734 times.
4394148 for (size_t y = 0; y < height; y += 1) {
66 KAI_ASSERT(i_k_chunk + (i_m + y) * k_chunk_count < m * k_chunk_count);
67 4143414 in[y] = *(lhs_ptrs + i_m * k_chunk_count + i_k_chunk * m_step + y);
68
2/2
✓ Branch 0 taken 127644 times.
✓ Branch 1 taken 4015770 times.
4143414 if (in[y] != pad_ptr) {
69 4015770 in[y] += lhs_ptr_offset;
70 4015770 }
71 4143414 }
72
73 250734 kai_kernel_lhs_imatmul_pack_x16p2vlx2_x16p_sme(
74 250734 height, width, in, row_offset, out); // NOLINT(bugprone-multi-level-implicit-pointer-conversion)
75 250734 out_base += m_step * kai_roundup(k_chunk_length, KR) * sizeof(uint16_t);
76 250734 }
77 16434 }
78 11352 }
79
80 #endif // Architectural features check.
81