KleidiAI Coverage Report


Directory: ./
File: kai/ukernels/matmul/matmul_clamp_f32_f32_f32p/kai_matmul_clamp_f32_f32_f32p8x1biasf32_6x8x4_neon_mla.c
Date: 2025-10-20 13:18:31
Coverage Exec Excl Total
Lines: 100.0% 37 5 42
Functions: 100.0% 10 0 10
Branches: -% 0 10 10

Line Branch Exec Source
1 //
2 // SPDX-FileCopyrightText: Copyright 2024-2025 Arm Limited and/or its affiliates <open-source-office@arm.com>
3 //
4 // SPDX-License-Identifier: Apache-2.0
5 //
6
7 #if (!defined(__aarch64__) && !defined(_M_ARM64))
8 #error This file must be compiled for AArch64.
9 #else // Architectural features check.
10
11 #include "kai_matmul_clamp_f32_f32_f32p8x1biasf32_6x8x4_neon_mla.h"
12
13 #include <stddef.h>
14 #include <stdint.h>
15
16 #include "kai/kai_common.h"
17
18 typedef struct {
19 float maxval;
20 float minval;
21 unsigned int num_strings;
22 const unsigned int* string_lengths;
23 size_t N;
24 const void* B_ptr;
25 size_t output_offset;
26 size_t input_initial_col;
27 size_t input_offset;
28 void* output_ptr;
29 const void* bias;
30 } kai_matmul_clamp_f32_f32_f32p8x1biasf32_6x8x4_neon_mla_impl_args_t;
31
32 extern void kai_matmul_clamp_f32_f32_f32p8x1biasf32_6x8x4_neon_mla_impl(
33 const void* input_ptr, size_t m, kai_matmul_clamp_f32_f32_f32p8x1biasf32_6x8x4_neon_mla_impl_args_t* args_ptr,
34 unsigned long flags);
35
36 static const size_t kai_mr = 6;
37 static const size_t kai_nr = 8;
38 static const size_t kai_kr = 1;
39 static const size_t kai_sr = 1;
40
41 18 size_t kai_get_m_step_matmul_clamp_f32_f32_f32p8x1biasf32_6x8x4_neon_mla(void) {
42 18 return kai_mr;
43 }
44
45 18 size_t kai_get_n_step_matmul_clamp_f32_f32_f32p8x1biasf32_6x8x4_neon_mla(void) {
46 18 return kai_nr;
47 }
48
49 17 size_t kai_get_nr_matmul_clamp_f32_f32_f32p8x1biasf32_6x8x4_neon_mla(void) {
50 17 return kai_nr;
51 }
52
53 17 size_t kai_get_kr_matmul_clamp_f32_f32_f32p8x1biasf32_6x8x4_neon_mla(void) {
54 17 return kai_kr;
55 }
56
57 17 size_t kai_get_sr_matmul_clamp_f32_f32_f32p8x1biasf32_6x8x4_neon_mla(void) {
58 17 return kai_sr;
59 }
60
61 16 size_t kai_get_lhs_offset_matmul_clamp_f32_f32_f32p8x1biasf32_6x8x4_neon_mla(size_t m_idx, size_t stride) {
62 KAI_ASSUME(m_idx % kai_mr == 0);
63
64 16 return m_idx * stride;
65 }
66
67 16 size_t kai_get_rhs_packed_offset_matmul_clamp_f32_f32_f32p8x1biasf32_6x8x4_neon_mla(size_t n_idx, size_t k) {
68 KAI_ASSUME(n_idx % kai_nr == 0);
69
70 16 return n_idx / kai_nr * (kai_nr * sizeof(float) + kai_nr * k * sizeof(float));
71 }
72
73 16 size_t kai_get_dst_offset_matmul_clamp_f32_f32_f32p8x1biasf32_6x8x4_neon_mla(
74 size_t m_idx, size_t n_idx, size_t stride) {
75 KAI_ASSUME(m_idx % kai_mr == 0);
76 KAI_ASSUME(n_idx % kai_nr == 0);
77
78 16 return m_idx * stride + n_idx * sizeof(float);
79 }
80
81 16 size_t kai_get_dst_size_matmul_clamp_f32_f32_f32p8x1biasf32_6x8x4_neon_mla(size_t m, size_t n) {
82 16 return m * n * sizeof(float);
83 }
84
85 17 void kai_run_matmul_clamp_f32_f32_f32p8x1biasf32_6x8x4_neon_mla(
86 size_t m, size_t n, size_t k, //
87 const void* lhs, size_t lhs_stride, //
88 const void* rhs_packed, //
89 void* dst, size_t dst_stride_row, size_t dst_stride_col, //
90 float clamp_min, float clamp_max) {
91 KAI_ASSERT(dst_stride_col == sizeof(float));
92
93 17 kai_matmul_clamp_f32_f32_f32p8x1biasf32_6x8x4_neon_mla_impl_args_t ka;
94
95 17 unsigned long flags = 0;
96
97 17 unsigned int string_length = k;
98 17 ka.num_strings = 1;
99 17 ka.string_lengths = &string_length;
100 17 ka.N = n;
101 17 ka.B_ptr = rhs_packed;
102 17 ka.bias = NULL;
103
104 // Direct input.
105 17 const void* input_ptr = lhs;
106 17 ka.input_offset = lhs_stride / sizeof(float);
107 17 ka.input_initial_col = 0;
108
109 // Direct output.
110 17 ka.output_ptr = dst;
111 17 ka.output_offset = dst_stride_row / sizeof(float);
112
113 // Clamping output.
114 17 flags |= 0x2;
115 17 ka.maxval = clamp_max;
116 17 ka.minval = clamp_min;
117
118 17 kai_matmul_clamp_f32_f32_f32p8x1biasf32_6x8x4_neon_mla_impl(input_ptr, m, &ka, flags);
119 17 }
120
121 #endif // Architectural features check.
122