KleidiAI Coverage Report


Directory: ./
Coverage: low: ≥ 0% medium: ≥ 75.0% high: ≥ 90.0%
Coverage Exec / Excl / Total
Lines: 100.0% 36 / 10 / 46
Functions: 100.0% 7 / 0 / 7
Branches: 100.0% 2 / 20 / 22

kai/ukernels/matmul/pack/kai_rhs_pack_kxn_x16p32x1b_x16_x16_neon.c
Line Branch Exec Source
1 //
2 // SPDX-FileCopyrightText: Copyright 2025 Arm Limited and/or its affiliates <open-source-office@arm.com>
3 //
4 // SPDX-License-Identifier: Apache-2.0
5 //
6
7 #if (!defined(__aarch64__)) && !defined(_M_ARM64)
8 #error This file must be compiled for AArch64.
9 #else // Architectural features check.
10
11 #include "kai_rhs_pack_kxn_x16p32x1b_x16_x16_neon.h"
12
13 #include <stddef.h>
14 #include <stdint.h>
15
16 #include "kai/kai_common.h"
17
18 enum {
19 NR = 32,
20 KR = 1,
21 };
22
23 typedef struct {
24 const void* bias_ptr;
25 size_t width;
26 size_t height;
27 size_t in_stride;
28 size_t out_stride;
29 size_t bias_step;
30 const void* in;
31 void* out;
32 } KernelArgs;
33
34 static const size_t kai_num_bytes_input = sizeof(uint16_t);
35 static const size_t kai_num_bytes_output = sizeof(uint16_t);
36 static const size_t kai_num_bytes_bias = sizeof(uint16_t);
37
38 void kai_kernel_rhs_pack_kxn_x16p32x1b_x16_x16_neon(const KernelArgs* args_ptr);
39
40 4320 size_t kai_get_n_step_rhs_pack_kxn_x16p32x1b_x16_x16_neon(void) {
41 4320 return NR;
42 }
43
44 432 size_t kai_get_rhs_offset_rhs_pack_kxn_x16p32x1b_x16_x16_neon(size_t n_idx) {
45 KAI_ASSUME(n_idx % kai_get_n_step_rhs_pack_kxn_x16p32x1b_x16_x16_neon() == 0);
46
47 432 return n_idx * kai_num_bytes_input;
48 }
49
50 432 size_t kai_get_bias_offset_rhs_pack_kxn_x16p32x1b_x16_x16_neon(size_t n_idx) {
51 432 return n_idx * kai_num_bytes_bias;
52 }
53
54 1296 size_t kai_get_rhs_packed_stride_rhs_pack_kxn_x16p32x1b_x16_x16_neon(size_t k) {
55 2592 return kai_get_n_step_rhs_pack_kxn_x16p32x1b_x16_x16_neon() *
56 1296 (kai_num_bytes_bias + kai_roundup(k, KR) * kai_num_bytes_output);
57 }
58
59 864 size_t kai_get_rhs_packed_offset_rhs_pack_kxn_x16p32x1b_x16_x16_neon(size_t n_idx, size_t k) {
60 KAI_ASSUME(n_idx % kai_get_n_step_rhs_pack_kxn_x16p32x1b_x16_x16_neon() == 0);
61
62 864 const size_t block_idx = n_idx / kai_get_n_step_rhs_pack_kxn_x16p32x1b_x16_x16_neon();
63 1728 return block_idx * kai_get_rhs_packed_stride_rhs_pack_kxn_x16p32x1b_x16_x16_neon(k);
64 864 }
65
66 432 size_t kai_get_rhs_packed_size_rhs_pack_kxn_x16p32x1b_x16_x16_neon(size_t n, size_t k) {
67 432 const size_t n_nr_blocks = kai_roundup(n, kai_get_n_step_rhs_pack_kxn_x16p32x1b_x16_x16_neon());
68 864 return kai_get_rhs_packed_offset_rhs_pack_kxn_x16p32x1b_x16_x16_neon(n_nr_blocks, k);
69 432 }
70
71 432 void kai_run_rhs_pack_kxn_x16p32x1b_x16_x16_neon(
72 size_t num_groups, size_t n, size_t k, size_t nr, size_t kr, size_t sr, size_t rhs_stride_row, const void* rhs,
73 const void* bias, const void* scale, void* rhs_packed, size_t extra_bytes, const void* params) {
74 KAI_ASSUME(num_groups == 1);
75 432 KAI_UNUSED(nr);
76 KAI_ASSUME(kr == KR);
77 KAI_ASSUME(sr == 1);
78 KAI_ASSUME(rhs != NULL);
79 KAI_ASSUME(scale == NULL);
80 KAI_ASSUME(rhs_packed != NULL);
81 KAI_ASSUME(extra_bytes == 0);
82 KAI_ASSUME(params == NULL);
83
84 // Null bias is supported by adding a set of zero bias values when the bias pointer is NULL
85 432 size_t bias_step = NR * sizeof(uint16_t);
86 static const uint8_t zero_bias[NR * sizeof(uint16_t)] = {0};
87
88 432 const void* bias_ptr = bias;
89
90
2/2
✓ Branch 0 taken 216 times.
✓ Branch 1 taken 216 times.
432 if (bias == NULL) {
91 216 bias_step = 0;
92 216 bias_ptr = zero_bias;
93 216 }
94
95 432 KernelArgs args;
96 432 args.bias_ptr = bias_ptr;
97 432 args.height = k;
98 432 args.width = n;
99 432 args.in = rhs;
100 432 args.out = rhs_packed;
101 432 args.bias_step = bias_step;
102 432 args.in_stride = rhs_stride_row;
103 432 args.out_stride = kai_get_rhs_packed_stride_rhs_pack_kxn_x16p32x1b_x16_x16_neon(args.height);
104
105 432 kai_kernel_rhs_pack_kxn_x16p32x1b_x16_x16_neon(&args);
106 432 }
107
108 #endif // Architectural features check.
109