KleidiAI Coverage Report


Directory: ./
Coverage: low: ≥ 0% medium: ≥ 75.0% high: ≥ 90.0%
Coverage Exec / Excl / Total
Lines: 77.1% 27 / 16 / 51
Functions: 66.7% 4 / 0 / 6
Branches: 100.0% 6 / 32 / 38

kai/ukernels/matmul/pack/kai_lhs_pack_f32p2vlx1_f32_sme.c
Line Branch Exec Source
1 //
2 // SPDX-FileCopyrightText: Copyright 2024-2025 Arm Limited and/or its affiliates <open-source-office@arm.com>
3 //
4 // SPDX-License-Identifier: Apache-2.0
5 //
6
7 #if (!defined(__aarch64__) || !defined(__ARM_FEATURE_SVE2)) && !defined(_M_ARM64)
8 #error This file must be compiled for AArch64, FEAT_SVE2.
9 #else // Architectural features check.
10 #include "kai_lhs_pack_f32p2vlx1_f32_sme.h"
11
12 #include <stddef.h>
13 #include <stdint.h>
14
15 #include "kai/kai_common.h"
16
17 enum {
18 MR = 2,
19 KR = 1,
20 MAX_M_STEP = MR * (KAI_SME_VEC_LENGTH_MAX_BYTES / sizeof(float)) / KR,
21 SR = 1,
22 };
23
24 void kai_kernel_lhs_pack_f32p2vlx1_f32_sme(size_t height, size_t width, const void* in, void* out);
25
26 540 static size_t kai_get_mr_lhs_pack_f32p2vlx1_f32_sme(void) {
27 540 return MR * kai_get_sme_vector_length_u32() / KR;
28 }
29
30 size_t kai_get_m_step_lhs_pack_f32p2vlx1_f32_sme(size_t mr) {
31 KAI_ASSUME(mr == kai_get_mr_lhs_pack_f32p2vlx1_f32_sme());
32 KAI_UNUSED(mr);
33 return kai_get_mr_lhs_pack_f32p2vlx1_f32_sme();
34 }
35
36 108 size_t kai_get_lhs_offset_lhs_pack_f32p2vlx1_f32_sme(size_t m_idx, size_t lhs_stride_row) {
37 KAI_ASSUME(m_idx % kai_get_mr_lhs_pack_f32p2vlx1_f32_sme() == 0);
38
39 108 return m_idx * lhs_stride_row;
40 }
41
42 size_t kai_get_lhs_packed_offset_lhs_pack_f32p2vlx1_f32_sme(size_t m_idx, size_t k, size_t mr, size_t kr, size_t sr) {
43 KAI_ASSUME(m_idx % kai_get_m_step_lhs_pack_f32p2vlx1_f32_sme(mr) == 0);
44 KAI_ASSUME(mr == kai_get_mr_lhs_pack_f32p2vlx1_f32_sme());
45 KAI_ASSUME(kr == KR);
46 KAI_ASSUME(sr == SR);
47
48 KAI_UNUSED(mr);
49 KAI_UNUSED(kr);
50 KAI_UNUSED(sr);
51
52 return m_idx * kai_roundup(k, KR) * sizeof(float);
53 }
54
55 108 size_t kai_get_lhs_packed_size_lhs_pack_f32p2vlx1_f32_sme(size_t m, size_t k, size_t mr, size_t kr, size_t sr) {
56 KAI_ASSUME(mr == kai_get_mr_lhs_pack_f32p2vlx1_f32_sme());
57 KAI_ASSUME(kr == KR);
58 KAI_ASSUME(sr == SR);
59
60 108 KAI_UNUSED(mr);
61 108 KAI_UNUSED(kr);
62 108 KAI_UNUSED(sr);
63 108 return kai_roundup(m, kai_get_mr_lhs_pack_f32p2vlx1_f32_sme()) * kai_roundup(k, KR) * sizeof(float);
64 }
65
66 108 void kai_run_lhs_pack_f32p2vlx1_f32_sme(
67 size_t m, size_t k, size_t mr, size_t kr, size_t sr, size_t m_idx_start, const void* lhs, size_t lhs_stride_row,
68 void* lhs_packed) {
69 KAI_ASSUME(mr == kai_get_mr_lhs_pack_f32p2vlx1_f32_sme());
70 KAI_ASSUME(kr == KR);
71 KAI_ASSUME(sr == SR);
72 KAI_ASSUME(m_idx_start == 0);
73 KAI_ASSUME(lhs != NULL);
74 KAI_ASSUME(lhs_packed != NULL);
75
76 108 const size_t m_step = kai_get_mr_lhs_pack_f32p2vlx1_f32_sme();
77 108 const size_t width = k;
78
79 KAI_ASSERT(m_step <= MAX_M_STEP);
80 108 const uint8_t* in[MAX_M_STEP];
81
82 108 uint8_t* out_base = lhs_packed;
83 108 const uint8_t* lhs_ptr = lhs;
84
85 108 kai_commit_za();
86
87
2/2
✓ Branch 0 taken 108 times.
✓ Branch 1 taken 120 times.
228 for (size_t i_m = 0; i_m < m; i_m += m_step) {
88
2/2
✓ Branch 0 taken 102 times.
✓ Branch 1 taken 18 times.
120 const size_t height = KAI_MIN(m - i_m, m_step);
89 120 void* out = out_base;
90 120 out_base += m_step * kai_roundup(k, KR) * sizeof(float);
91
92
2/2
✓ Branch 0 taken 1788 times.
✓ Branch 1 taken 120 times.
1908 for (size_t y = 0; y < height; y++) {
93 1788 in[y] = lhs_ptr + (i_m + y) * lhs_stride_row;
94 1788 }
95
96 120 kai_kernel_lhs_pack_f32p2vlx1_f32_sme(
97 120 height, width, in, out); // NOLINT(bugprone-multi-level-implicit-pointer-conversion)
98 120 }
99 108 }
100
101 #endif // Architectural features check.
102