KleidiAI Coverage Report


Directory: ./
File: kai/ukernels/matmul/matmul_clamp_f32_f32_f32p/kai_matmul_clamp_f32_f32_f32p16x1b_6x16_neon_mla.c
Date: 2025-10-20 13:18:31
Coverage Exec Excl Total
Lines: 100.0% 38 4 42
Functions: 100.0% 10 0 10
Branches: -% 0 8 8

Line Branch Exec Source
1 //
2 // SPDX-FileCopyrightText: Copyright 2025 Arm Limited and/or its affiliates <open-source-office@arm.com>
3 //
4 // SPDX-License-Identifier: Apache-2.0
5 //
6
7 #if (!defined(__aarch64__)) && !defined(_M_ARM64)
8 #error This file must be compiled for AArch64.
9 #else // Architectural features check.
10
11 #include "kai_matmul_clamp_f32_f32_f32p16x1b_6x16_neon_mla.h"
12
13 #include <stddef.h>
14 #include <stdint.h>
15
16 #include "kai/kai_common.h"
17
18 typedef struct {
19 float maxval;
20 float minval;
21 unsigned int num_strings;
22 const unsigned int* string_lengths;
23 size_t N;
24 const void* B_ptr;
25 size_t output_offset;
26 size_t input_initial_col;
27 size_t input_offset;
28 void* output_ptr;
29 const void* bias;
30 } KernelArgs;
31
32 void kai_kernel_matmul_clamp_f32_f32_f32p16x1b_6x16_neon_mla(
33 const void* input_ptr, size_t m, KernelArgs* args_ptr, unsigned long flags);
34
35 static const size_t kai_mr = 6;
36 static const size_t kai_nr = 16;
37 static const size_t kai_kr = 1;
38 static const size_t kai_sr = 1;
39
40 50 size_t kai_get_m_step_matmul_clamp_f32_f32_f32p16x1b_6x16_neon_mla(void) {
41 50 return kai_mr;
42 }
43
44 50 size_t kai_get_n_step_matmul_clamp_f32_f32_f32p16x1b_6x16_neon_mla(void) {
45 50 return kai_nr;
46 }
47
48 17 size_t kai_get_nr_matmul_clamp_f32_f32_f32p16x1b_6x16_neon_mla(void) {
49 17 return kai_nr;
50 }
51
52 17 size_t kai_get_kr_matmul_clamp_f32_f32_f32p16x1b_6x16_neon_mla(void) {
53 17 return kai_kr;
54 }
55
56 17 size_t kai_get_sr_matmul_clamp_f32_f32_f32p16x1b_6x16_neon_mla(void) {
57 17 return kai_sr;
58 }
59
60 16 size_t kai_get_lhs_offset_matmul_clamp_f32_f32_f32p16x1b_6x16_neon_mla(size_t m_idx, size_t stride) {
61 KAI_ASSUME(m_idx % kai_get_m_step_matmul_clamp_f32_f32_f32p16x1b_6x16_neon_mla() == 0);
62
63 16 return m_idx * stride;
64 }
65
66 16 size_t kai_get_rhs_packed_offset_matmul_clamp_f32_f32_f32p16x1b_6x16_neon_mla(size_t n_idx, size_t k) {
67 KAI_ASSUME(n_idx % kai_get_n_step_matmul_clamp_f32_f32_f32p16x1b_6x16_neon_mla() == 0);
68
69 16 return n_idx / kai_nr * (kai_nr * sizeof(float) + kai_nr * k * sizeof(float));
70 }
71
72 16 size_t kai_get_dst_offset_matmul_clamp_f32_f32_f32p16x1b_6x16_neon_mla(size_t m_idx, size_t n_idx, size_t stride) {
73 KAI_ASSUME(m_idx % kai_get_m_step_matmul_clamp_f32_f32_f32p16x1b_6x16_neon_mla() == 0);
74 KAI_ASSUME(n_idx % kai_get_n_step_matmul_clamp_f32_f32_f32p16x1b_6x16_neon_mla() == 0);
75
76 16 return m_idx * stride + n_idx * sizeof(float);
77 }
78
79 16 size_t kai_get_dst_size_matmul_clamp_f32_f32_f32p16x1b_6x16_neon_mla(size_t m, size_t n) {
80 16 return m * n * sizeof(float);
81 }
82
83 17 void kai_run_matmul_clamp_f32_f32_f32p16x1b_6x16_neon_mla(
84 size_t m, size_t n, size_t k, //
85 const void* lhs, size_t lhs_stride, //
86 const void* rhs_packed, //
87 void* dst, size_t dst_stride_row, size_t dst_stride_col, //
88 float clamp_min, float clamp_max) {
89 17 KAI_UNUSED(dst_stride_col);
90
91 17 KernelArgs ka;
92
93 17 unsigned long flags = 0;
94
95 17 unsigned int string_length = k;
96 17 ka.num_strings = 1;
97 17 ka.string_lengths = &string_length;
98 17 ka.N = n;
99 17 ka.B_ptr = rhs_packed;
100 17 ka.bias = NULL;
101
102 // Direct input.
103 17 const void* input_ptr = lhs;
104 17 ka.input_offset = lhs_stride / sizeof(float);
105 17 ka.input_initial_col = 0;
106
107 // Direct output.
108 17 ka.output_ptr = dst;
109 17 ka.output_offset = dst_stride_row / sizeof(float);
110
111 // Clamping output.
112 17 flags |= 0x2;
113 17 ka.maxval = clamp_max;
114 17 ka.minval = clamp_min;
115
116 17 kai_kernel_matmul_clamp_f32_f32_f32p16x1b_6x16_neon_mla(input_ptr, m, &ka, flags);
117 17 }
118
119 #endif // Architectural features check.
120