KleidiAI Coverage Report


Directory: ./
File: kai/ukernels/matmul/pack/kai_rhs_pack_kxn_f32p2vlx1biasf32_f32_f32_sme.c
Date: 2025-10-20 13:18:31
Coverage Exec Excl Total
Lines: 100.0% 28 12 40
Functions: 100.0% 7 0 7
Branches: -% 0 24 24

Line Branch Exec Source
1 //
2 // SPDX-FileCopyrightText: Copyright 2024-2025 Arm Limited and/or its affiliates <open-source-office@arm.com>
3 //
4 // SPDX-License-Identifier: Apache-2.0
5 //
6
7 #if (!defined(__aarch64__) || !defined(__ARM_FEATURE_SVE2)) && !defined(_M_ARM64)
8 #error This file must be compiled for AArch64, FEAT_SVE2.
9 #else // Architectural features check.
10 #include "kai_rhs_pack_kxn_f32p2vlx1biasf32_f32_f32_sme.h"
11
12 #include <stddef.h>
13 #include <stdint.h>
14
15 #include "kai/kai_common.h"
16
17 enum {
18 NR = 2,
19 KR = 1,
20 };
21
22 typedef struct {
23 const void* bias_ptr;
24 size_t width;
25 size_t height;
26 size_t in_stride;
27 size_t out_stride;
28 const void* in;
29 void* out;
30 } KernelArgs;
31
32 static const size_t kai_num_bytes_input = sizeof(uint32_t);
33 static const size_t kai_num_bytes_output = sizeof(uint32_t);
34 static const size_t kai_num_bytes_bias = sizeof(float);
35
36 void kai_kernel_rhs_pack_kxn_f32p2vlx1biasf32_f32_f32_sme(const KernelArgs* args_ptr);
37
38 1386 size_t kai_get_n_step_rhs_pack_kxn_f32p2vlx1biasf32_f32_f32_sme(void) {
39 1386 return NR * kai_get_sme_vector_length_u32() / KR;
40 }
41
42 126 size_t kai_get_rhs_offset_rhs_pack_kxn_f32p2vlx1biasf32_f32_f32_sme(size_t n_idx) {
43 KAI_ASSUME(n_idx % kai_get_n_step_rhs_pack_kxn_f32p2vlx1biasf32_f32_f32_sme() == 0);
44
45 126 return n_idx * kai_num_bytes_input;
46 }
47
48 160 size_t kai_get_bias_offset_rhs_pack_kxn_f32p2vlx1biasf32_f32_f32_sme(size_t n_idx) {
49 160 return n_idx * kai_num_bytes_bias;
50 }
51
52 378 size_t kai_get_rhs_packed_stride_rhs_pack_kxn_f32p2vlx1biasf32_f32_f32_sme(size_t k) {
53 756 return kai_get_n_step_rhs_pack_kxn_f32p2vlx1biasf32_f32_f32_sme() *
54 378 (kai_num_bytes_bias + kai_roundup(k, KR) * kai_num_bytes_output);
55 }
56
57 252 size_t kai_get_rhs_packed_offset_rhs_pack_kxn_f32p2vlx1biasf32_f32_f32_sme(size_t n_idx, size_t k) {
58 KAI_ASSUME(n_idx % kai_get_n_step_rhs_pack_kxn_f32p2vlx1biasf32_f32_f32_sme() == 0);
59
60 252 const size_t block_idx = n_idx / kai_get_n_step_rhs_pack_kxn_f32p2vlx1biasf32_f32_f32_sme();
61 504 return block_idx * kai_get_rhs_packed_stride_rhs_pack_kxn_f32p2vlx1biasf32_f32_f32_sme(k);
62 252 }
63
64 126 size_t kai_get_rhs_packed_size_rhs_pack_kxn_f32p2vlx1biasf32_f32_f32_sme(size_t n, size_t k) {
65 126 const size_t n_nr_blocks = kai_roundup(n, kai_get_n_step_rhs_pack_kxn_f32p2vlx1biasf32_f32_f32_sme());
66 252 return kai_get_rhs_packed_offset_rhs_pack_kxn_f32p2vlx1biasf32_f32_f32_sme(n_nr_blocks, k);
67 126 }
68
69 126 void kai_run_rhs_pack_kxn_f32p2vlx1biasf32_f32_f32_sme(
70 size_t num_groups, size_t n, size_t k, size_t nr, size_t kr, size_t sr, size_t rhs_stride_row, const void* rhs,
71 const void* bias, const void* scale, void* rhs_packed, size_t extra_bytes, const void* params) {
72 KAI_ASSUME(num_groups == 1);
73 KAI_ASSUME(nr == kai_get_n_step_rhs_pack_kxn_f32p2vlx1biasf32_f32_f32_sme());
74 KAI_ASSUME(kr == KR);
75 KAI_ASSUME(sr == 1);
76 KAI_ASSUME(rhs != NULL);
77 KAI_ASSUME(bias != NULL);
78 KAI_ASSUME(scale == NULL);
79 KAI_ASSUME(rhs_packed != NULL);
80 KAI_ASSUME(extra_bytes == 0);
81 KAI_ASSUME(params == NULL);
82
83 126 KernelArgs args;
84 126 args.bias_ptr = bias;
85 126 args.height = k;
86 126 args.width = n;
87 126 args.in = rhs;
88 126 args.out = rhs_packed;
89 126 args.in_stride = rhs_stride_row;
90 126 args.out_stride = kai_get_rhs_packed_stride_rhs_pack_kxn_f32p2vlx1biasf32_f32_f32_sme(args.height);
91
92 126 kai_kernel_rhs_pack_kxn_f32p2vlx1biasf32_f32_f32_sme(&args);
93 126 }
94
95 #endif // Architectural features check.
96