test/nextgen/operators/matmul/matmul_tb.cpp

Directory:	./
Coverage:	low: ≥ 0% medium: ≥ 75.0% high: ≥ 90.0%
	Coverage	Exec / Excl / Total
Lines:	96.0%	243 / 0 / 253
Functions:	75.9%	22 / 0 / 29
Branches:	47.2%	109 / 0 / 231
    test/nextgen/operators/matmul/matmul_tb.cpp
    
        Line
        Branch
        Exec
        Source
      
        //
      
        // SPDX-FileCopyrightText: Copyright 2025 Arm Limited and/or its affiliates <open-source-office@arm.com>
      
        //
      
        // SPDX-License-Identifier: Apache-2.0
      
        //
      
        #include "test/nextgen/operators/matmul/matmul_tb.hpp"
      
        #include <algorithm>
      
        #include <array>
      
        #include <cstddef>
      
        #include <tuple>
      
        #include <utility>
      
        #include <vector>
      
        #include "test/common/assert.hpp"
      
        #include "test/common/buffer.hpp"
      
        #include "test/common/compare.hpp"
      
        #include "test/common/data_type.hpp"
      
        #include "test/nextgen/common/poly.hpp"
      
        #include "test/nextgen/common/random.hpp"
      
        #include "test/nextgen/format/format.hpp"
      
        #include "test/nextgen/format/plain_format.hpp"
      
        #include "test/nextgen/harness/kernel_wrapper.hpp"
      
        #include "test/nextgen/operators/matmul/matmul_config.hpp"
      
        #include "test/nextgen/operators/matmul/matmul_slots.hpp"
      
        #include "test/nextgen/quantization/quantizer.hpp"
      
        #include "test/nextgen/reference/binary_elementwise.hpp"
      
        #include "test/nextgen/reference/clamp.hpp"
      
        #include "test/nextgen/reference/matmul.hpp"
      
        #include "test/nextgen/reference/reduce.hpp"
      
        #include "test/nextgen/reference/unary_elementwise.hpp"
      
        #include "test/reference/transpose.hpp"
      
        namespace kai::test {
      
        600
        MatMulTb::MatMulTb(
      
            size_t shape_m, size_t shape_n, size_t shape_k, MatMulBiasMode bias_mode, float clamp_ratio,
      
            const MatMulOperator* op) :
      
        200
            m_shape_m(shape_m),
      
        200
            m_shape_n(shape_n),
      
        200
            m_shape_k(shape_k),
      
        200
            m_bias_mode(bias_mode),
      
        200
            m_clamp_ratio(clamp_ratio),
      
        200
            m_op(op),
      
        400
            m_tensors_required() {
      
          1/2✓ Branch 0 taken 200 times.
✗ Branch 1 not taken.

        200
            std::fill(m_tensors_required.begin(), m_tensors_required.end(), false);
      
        400
        }
      
        200
        void MatMulTb::generate_test_data(Rng& rng) {
      
        200
            populate_config();
      
        200
            determine_required_tensors();
      
            // Populates the constant information.
      
        200
            m_op->matmul->populate_constant_info(m_tensors);
      
          1/2✗ Branch 0 not taken.
✓ Branch 1 taken 200 times.

        200
            if (m_op->pack_lhs.has_value()) {
      
        200
                const KernelWrapper& pack_lhs = *m_op->pack_lhs.value();
      
        200
                pack_lhs.populate_constant_info(m_tensors);
      
        200
            }
      
          1/2✗ Branch 0 not taken.
✓ Branch 1 taken 200 times.

        200
            if (m_op->pack_rhs.has_value()) {
      
        200
                const KernelWrapper& pack_rhs = *m_op->pack_rhs.value();
      
        200
                pack_rhs.populate_constant_info(m_tensors);
      
        200
            }
      
            // Generates the raw test data.
      
        200
            generate_lhs_raw(rng);
      
        200
            generate_rhs_raw(rng);
      
        200
            generate_bias_raw(rng);
      
        200
            compute_rhs_t_raw();  // The transposed RHS data is always needed for reference packing.
      
            // Quantizes the input data.
      
          1/2✗ Branch 0 not taken.
✓ Branch 1 taken 200 times.

        200
            if (m_op->lhs_quant.has_value()) {
      
        200
                quantize_lhs();
      
        200
            }
      
          1/2✗ Branch 0 not taken.
✓ Branch 1 taken 200 times.

        200
            if (m_op->rhs_quant.has_value()) {
      
        200
                quantize_rhs_t();
      
        200
            }
      
          1/2✓ Branch 0 taken 200 times.
✗ Branch 1 not taken.

        200
            if (m_op->bias_quant.has_value()) {
      
        ✗
                quantize_bias();
      
        ✗
            }
      
          1/2✗ Branch 0 not taken.
✓ Branch 1 taken 200 times.

        200
            if (m_tensors_required.at(MATMUL_SLOT_LHS_QZP_NEG)) {
      
        200
                compute_lhs_qzp_neg();
      
        200
            }
      
          1/2✗ Branch 0 not taken.
✓ Branch 1 taken 200 times.

        200
            if (m_tensors_required.at(MATMUL_SLOT_RHS_T_QDATA_SIGN)) {
      
        200
                compute_rhs_t_qdata_sign();
      
        200
            }
      
          1/2✗ Branch 0 not taken.
✓ Branch 1 taken 200 times.

        200
            if (m_tensors_required.at(MATMUL_SLOT_RHS_T_QDATA_SIGN_SUM)) {
      
        200
                compute_rhs_t_qdata_sign_sum();
      
        200
            }
      
            // Generates reference output.
      
          1/2✗ Branch 0 not taken.
✓ Branch 1 taken 200 times.

        200
            if (m_op->pack_lhs.has_value()) {
      
        200
                compute_ref_packed_lhs();
      
        200
            }
      
          1/2✗ Branch 0 not taken.
✓ Branch 1 taken 200 times.

        200
            if (m_op->pack_rhs.has_value()) {
      
        200
                compute_ref_packed_rhs();
      
        200
            }
      
        200
            compute_ref_matmul();
      
        200
        }
      
        200
        void MatMulTb::populate_config() {
      
        200
            m_tensors.at(MATMUL_SLOT_CONFIG).set_value(MatMulConfig{m_bias_mode});
      
        200
        }
      
        200
        void MatMulTb::determine_required_tensors() {
      
          0/2✗ Branch 0 not taken.
✗ Branch 1 not taken.

        200
            std::vector<const KernelWrapper*> kernels{m_op->matmul.get()};
      
          1/2✗ Branch 0 not taken.
✓ Branch 1 taken 200 times.

        200
            if (m_op->pack_lhs.has_value()) {
      
          2/4✓ Branch 0 taken 200 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 200 times.
✗ Branch 3 not taken.

        200
                kernels.emplace_back(m_op->pack_lhs.value().get());
      
        200
            }
      
          1/2✗ Branch 0 not taken.
✓ Branch 1 taken 200 times.

        200
            if (m_op->pack_rhs.has_value()) {
      
          2/4✓ Branch 0 taken 200 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 200 times.
✗ Branch 3 not taken.

        200
                kernels.emplace_back(m_op->pack_rhs.value().get());
      
        200
            }
      
          2/2✓ Branch 0 taken 200 times.
✓ Branch 1 taken 600 times.

        800
            for (const KernelWrapper* kernel : kernels) {
      
          1/2✗ Branch 0 not taken.
✓ Branch 1 taken 600 times.

        600
                if (kernel != nullptr) {
      
          1/2✗ Branch 0 not taken.
✓ Branch 1 taken 600 times.

        600
                    const std::vector<size_t> run_inputs = kernel->run_inputs(m_tensors);
      
          1/2✗ Branch 0 not taken.
✓ Branch 1 taken 600 times.

        600
                    const std::vector<size_t> ref_inputs = kernel->ref_inputs(m_tensors);
      
          2/2✓ Branch 0 taken 1345 times.
✓ Branch 1 taken 600 times.

        1945
                    for (const size_t id : run_inputs) {
      
          1/2✓ Branch 0 taken 1345 times.
✗ Branch 1 not taken.

        1345
                        m_tensors_required.at(id) = true;
      
        1345
                    }
      
          2/2✓ Branch 0 taken 600 times.
✓ Branch 1 taken 2545 times.

        3145
                    for (const size_t id : ref_inputs) {
      
          1/2✓ Branch 0 taken 2545 times.
✗ Branch 1 not taken.

        2545
                        m_tensors_required.at(id) = true;
      
        2545
                    }
      
        600
                }
      
        600
            }
      
        200
        }
      
        200
        void MatMulTb::generate_lhs_raw(Rng& rng) {
      
        200
            const std::array shape{m_shape_m, m_shape_k};
      
        200
            const Poly<Format> format(std::in_place_type<PlainFormat>, DataType::FP32);
      
          1/2✓ Branch 0 taken 200 times.
✗ Branch 1 not taken.

        200
            Tensor& tensor = m_tensors.at(MATMUL_SLOT_LHS_RAW);
      
          5/10✓ Branch 0 taken 200 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 200 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 200 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 200 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 200 times.
✗ Branch 9 not taken.

        200
            tensor.set_shape(shape).set_format(format).set_data(format->generate_random(shape, rng));
      
        200
        }
      
        200
        void MatMulTb::generate_rhs_raw(Rng& rng) {
      
        200
            const std::array shape{m_shape_k, m_shape_n};
      
        200
            const Poly<Format> format(std::in_place_type<PlainFormat>, DataType::FP32);
      
          1/2✓ Branch 0 taken 200 times.
✗ Branch 1 not taken.

        200
            Tensor& tensor = m_tensors.at(MATMUL_SLOT_RHS_RAW);
      
          5/10✓ Branch 0 taken 200 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 200 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 200 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 200 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 200 times.
✗ Branch 9 not taken.

        200
            tensor.set_shape(shape).set_format(format).set_data(format->generate_random(shape, rng));
      
        200
        }
      
        200
        void MatMulTb::generate_bias_raw(Rng& rng) {
      
        200
            const std::array shape{m_shape_n};
      
        200
            const Poly<Format> format(std::in_place_type<PlainFormat>, DataType::FP32);
      
          1/2✓ Branch 0 taken 200 times.
✗ Branch 1 not taken.

        200
            Tensor& tensor = m_tensors.at(MATMUL_SLOT_BIAS_RAW);
      
          5/10✓ Branch 0 taken 200 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 200 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 200 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 200 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 200 times.
✗ Branch 9 not taken.

        200
            tensor.set_shape(shape).set_format(format).set_data(format->generate_random(shape, rng));
      
        200
        }
      
        200
        void MatMulTb::compute_rhs_t_raw() {
      
        200
            const std::array shape{m_shape_n, m_shape_k};
      
        200
            const Poly<Format> format(std::in_place_type<PlainFormat>, DataType::FP32);
      
          1/2✓ Branch 0 taken 200 times.
✗ Branch 1 not taken.

        200
            Tensor& rhs_t_raw = m_tensors.at(MATMUL_SLOT_RHS_T_RAW);
      
          1/2✓ Branch 0 taken 200 times.
✗ Branch 1 not taken.

        200
            const Tensor& rhs_raw = m_tensors.at(MATMUL_SLOT_RHS_RAW);
      
          5/10✓ Branch 0 taken 200 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 200 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 200 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 200 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 200 times.
✗ Branch 9 not taken.

        200
            rhs_t_raw.set_shape(shape).set_format(format).set_data(transpose<float>(rhs_raw.data_ptr(), m_shape_k, m_shape_n));
      
        200
        }
      
        200
        void MatMulTb::quantize_lhs() {
      
        200
            const Quantizer& lhs_quant = *m_op->lhs_quant.value();
      
        200
            const std::array lhs_shape{m_shape_m, m_shape_k};
      
        200
            const Tensor& lhs_raw = m_tensors.at(MATMUL_SLOT_LHS_RAW);
      
        200
            Tensor& lhs_qdata = m_tensors.at(MATMUL_SLOT_LHS_QDATA);
      
        200
            Tensor& lhs_qscale = m_tensors.at(MATMUL_SLOT_LHS_QSCALE);
      
        200
            Tensor& lhs_qzp = m_tensors.at(MATMUL_SLOT_LHS_QZP);
      
        200
            lhs_quant.dynamic_quantize(DataType::FP32, lhs_shape, lhs_raw.data(), lhs_qdata, lhs_qscale, lhs_qzp);
      
        200
        }
      
        200
        void MatMulTb::quantize_rhs_t() {
      
        200
            const Quantizer& rhs_quant = *m_op->rhs_quant.value();
      
        200
            const std::array rhs_t_shape{m_shape_n, m_shape_k};
      
        200
            const Tensor& rhs_t_raw = m_tensors.at(MATMUL_SLOT_RHS_T_RAW);
      
        200
            Tensor& rhs_t_qdata = m_tensors.at(MATMUL_SLOT_RHS_T_QDATA);
      
        200
            Tensor& rhs_t_qscale = m_tensors.at(MATMUL_SLOT_RHS_T_QSCALE);
      
        200
            Tensor& rhs_t_qzp = m_tensors.at(MATMUL_SLOT_RHS_T_QZP);
      
        200
            rhs_quant.dynamic_quantize(DataType::FP32, rhs_t_shape, rhs_t_raw.data(), rhs_t_qdata, rhs_t_qscale, rhs_t_qzp);
      
        200
        }
      
        ✗
        void MatMulTb::quantize_bias() {
      
        ✗
            KAI_TEST_ERROR("Not supported.");
      
        ✗
        }
      
        200
        void MatMulTb::compute_lhs_qzp_neg() {
      
        200
            const Tensor& lhs_qzp = m_tensors.at(MATMUL_SLOT_LHS_QZP);
      
        200
            Tensor& lhs_qzp_neg = m_tensors.at(MATMUL_SLOT_LHS_QZP_NEG);
      
        200
            const Span<const size_t> shape = lhs_qzp.shape();
      
        200
            const Poly<Format>& format = lhs_qzp.format();
      
        200
            const UnaryElementwiseFn fn = make_negate(format->dtype());
      
        200
            Buffer data = fn(shape, lhs_qzp.data());
      
          3/6✓ Branch 0 taken 200 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 200 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 200 times.
✗ Branch 5 not taken.

        200
            lhs_qzp_neg.set_shape(shape).set_format(format).set_data(std::move(data));
      
        200
        }
      
        200
        void MatMulTb::compute_rhs_t_qdata_sign() {
      
        200
            const Tensor& rhs_t_qdata = m_tensors.at(MATMUL_SLOT_RHS_T_QDATA);
      
        200
            Tensor& rhs_t_qdata_sign = m_tensors.at(MATMUL_SLOT_RHS_T_QDATA_SIGN);
      
        200
            const Span<const size_t> shape = rhs_t_qdata.shape();
      
        200
            const Poly<Format>& format = rhs_t_qdata.format();
      
        200
            const UnaryElementwiseFn fn = make_change_signedness(format->dtype());
      
        200
            Buffer data = fn(shape, rhs_t_qdata.data());
      
          3/6✓ Branch 0 taken 200 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 200 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 200 times.
✗ Branch 5 not taken.

        200
            rhs_t_qdata_sign.set_shape(shape).set_format(format).set_data(std::move(data));
      
        200
        }
      
        200
        void MatMulTb::compute_rhs_t_qdata_sign_sum() {
      
        200
            const Tensor& rhs_t_qdata_sign = m_tensors.at(MATMUL_SLOT_RHS_T_QDATA_SIGN);
      
        200
            Tensor& rhs_t_qdata_sign_sum = m_tensors.at(MATMUL_SLOT_RHS_T_QDATA_SIGN_SUM);
      
        200
            const std::array rhs_t_shape = {m_shape_n, m_shape_k};
      
        200
            const std::array rhs_t_rowsum_shape = {m_shape_n};
      
        200
            const DataType src_dtype = rhs_t_qdata_sign.format()->dtype();
      
        200
            const DataType dst_dtype = rhs_t_qdata_sign_sum.format()->dtype();
      
        200
            const ReduceFn fn = make_reduce_add(src_dtype, dst_dtype);
      
        200
            Buffer data = fn(0, rhs_t_shape, rhs_t_qdata_sign.data());
      
          2/4✓ Branch 0 taken 200 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 200 times.
✗ Branch 3 not taken.

        200
            rhs_t_qdata_sign_sum.set_shape(rhs_t_rowsum_shape).set_data(std::move(data));
      
        200
        }
      
        200
        void MatMulTb::compute_ref_packed_lhs() {
      
        200
            const KernelWrapper& pack_lhs = *m_op->pack_lhs.value();
      
        200
            const std::array lhs_shape{m_shape_m, m_shape_k};
      
        200
            pack_lhs.compute_reference(lhs_shape, m_tensors);
      
        200
        }
      
        200
        void MatMulTb::compute_ref_packed_rhs() {
      
        200
            const KernelWrapper& pack_rhs = *m_op->pack_rhs.value();
      
        200
            const std::array rhs_t_shape{m_shape_n, m_shape_k};
      
        200
            pack_rhs.compute_reference(rhs_t_shape, m_tensors);
      
        200
        }
      
        200
        void MatMulTb::compute_ref_matmul() {
      
        200
            const MatMulConfig& config = m_tensors.at(MATMUL_SLOT_CONFIG).value<MatMulConfig>();
      
        200
            const Tensor& lhs_qdata = m_tensors.at(MATMUL_SLOT_LHS_QDATA);
      
        200
            const Tensor& lhs_qscale = m_tensors.at(MATMUL_SLOT_LHS_QSCALE);
      
        200
            const Tensor& lhs_qzp = m_tensors.at(MATMUL_SLOT_LHS_QZP);
      
        200
            const Tensor& rhs_t_qdata = m_tensors.at(MATMUL_SLOT_RHS_T_QDATA);
      
        200
            const Tensor& rhs_t_qscale = m_tensors.at(MATMUL_SLOT_RHS_T_QSCALE);
      
        200
            const Tensor& bias_raw = m_tensors.at(MATMUL_SLOT_BIAS_RAW);
      
        200
            Tensor& kernel_args = m_tensors.at(MATMUL_SLOT_MATMUL_ARGS);
      
        200
            Tensor& ref_dst_data = m_tensors.at(MATMUL_SLOT_REF_DST_DATA);
      
          2/4✓ Branch 0 taken 200 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 200 times.
✗ Branch 3 not taken.

        200
            ref_dst_data.set_shape({m_shape_m, m_shape_n}).set_format(make_poly<PlainFormat>(m_op->dst_dtype));
      
            // REVISIT: Assumes that the LHS and RHS are both quantized.
      
        200
            const Quantizer& lhs_quant = *m_op->lhs_quant.value();
      
        200
            const Quantizer& rhs_quant = *m_op->rhs_quant.value();
      
        1200
            const Buffer lhs_data = lhs_quant.dequantize(
      
        1000
                m_op->acc_dtype, {m_shape_m, m_shape_k}, lhs_qdata.data(), lhs_qscale.data(), lhs_qzp.data());
      
        200
            const Buffer rhs_t_data =
      
          3/6✓ Branch 0 taken 200 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 200 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 200 times.
✗ Branch 5 not taken.

        200
                rhs_quant.dequantize(m_op->acc_dtype, {m_shape_n, m_shape_k}, rhs_t_qdata.data(), rhs_t_qscale.data(), {});
      
          1/2✓ Branch 0 taken 200 times.
✗ Branch 1 not taken.

        200
            const MatMulFn matmul_fn = make_matmul_nt_t(m_op->acc_dtype);
      
          3/6✓ Branch 0 taken 200 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 200 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 200 times.
✗ Branch 5 not taken.

        200
            Buffer dst = matmul_fn(m_shape_m, m_shape_n, m_shape_k, lhs_data, rhs_t_data);
      
          2/3✓ Branch 0 taken 55 times.
✓ Branch 1 taken 145 times.
✗ Branch 2 not taken.

        200
            switch (config.bias_mode) {
      
                case MatMulBiasMode::NO_BIAS:
      
                    break;
      
                case MatMulBiasMode::PER_N: {
      
          1/2✓ Branch 0 taken 145 times.
✗ Branch 1 not taken.

        145
                    const BinaryElementwiseFn add_fn = make_add_2d(m_op->acc_dtype);
      
          3/6✓ Branch 0 taken 145 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 145 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 145 times.
✗ Branch 5 not taken.

        145
                    dst = add_fn(m_shape_m, m_shape_n, dst, 1, m_shape_n, bias_raw.data());
      
                    break;
      
        145
                }
      
                default:
      
        ✗
                    KAI_TEST_ERROR("Not supported.");
      
            }
      
          1/2✓ Branch 0 taken 200 times.
✗ Branch 1 not taken.

        200
            const DynamicClampFn dynamic_clamp_fn = make_dynamic_clamp(m_op->acc_dtype);
      
          2/4✓ Branch 0 taken 200 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 200 times.
✗ Branch 3 not taken.

        600
            auto [clamp_args, clampped_dst] = dynamic_clamp_fn(m_clamp_ratio, {m_shape_m, m_shape_n}, dst);
      
          5/10✓ Branch 0 taken 200 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 200 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 200 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 200 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 200 times.
✗ Branch 9 not taken.

        400
            kernel_args.set_shape({clamp_args.size()}).set_data(std::move(clamp_args));
      
          1/4✓ Branch 0 taken 200 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.

        200
            KAI_TEST_ASSERT_MSG(
      
                m_op->dst_dtype == m_op->acc_dtype, "Only support the accumulator and output type being the same.");
      
          2/4✓ Branch 0 taken 200 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 200 times.
✗ Branch 3 not taken.

        400
            ref_dst_data.set_data(std::move(clampped_dst));
      
        200
        }
      
        ✗
        bool MatMulTb::has_lhs_packing() const {
      
        ✗
            return m_op->pack_lhs != nullptr;
      
        }
      
        600
        std::tuple<size_t, size_t> MatMulTb::lhs_packing_steps() const {
      
        600
            const KernelWrapper& pack_lhs = *m_op->pack_lhs.value();
      
        600
            const std::vector<size_t> steps = pack_lhs.steps({m_shape_m, m_shape_k}, m_tensors);
      
          2/4✓ Branch 0 taken 600 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 600 times.
✗ Branch 3 not taken.

        600
            return {steps.at(0), steps.at(1)};
      
        600
        }
      
        600
        void MatMulTb::test_lhs_packing(size_t start_m, size_t start_k, size_t size_m, size_t size_k) {
      
        600
            const KernelWrapper& pack_lhs = *m_op->pack_lhs.value();
      
        600
            const std::array full_shape{m_shape_m, m_shape_k};
      
        600
            const std::array tile_coords{start_m, start_k};
      
        600
            const std::array tile_shape{size_m, size_k};
      
        600
            pack_lhs.run(full_shape, tile_coords, tile_shape, m_tensors);
      
        600
            const Tensor& ref_packed_lhs = m_tensors.at(MATMUL_SLOT_REF_LHS_PACKED);
      
        600
            const Tensor& imp_packed_lhs = m_tensors.at(MATMUL_SLOT_IMP_LHS_PACKED);
      
        600
            const Format& format = *ref_packed_lhs.format();
      
        600
            DefaultMismatchHandler handler(0.0F, 0.0F, 0, 0.0F);
      
        1200
            const bool ok =
      
          3/6✓ Branch 0 taken 600 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 600 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 600 times.
✗ Branch 5 not taken.

        600
                format.compare(full_shape, tile_coords, tile_shape, imp_packed_lhs.data(), ref_packed_lhs.data(), handler);
      
          1/6✓ Branch 0 taken 600 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.

        600
            KAI_TEST_ASSERT(ok);
      
        600
        }
      
        ✗
        bool MatMulTb::has_rhs_packing() const {
      
        ✗
            return m_op->pack_rhs.has_value();
      
        }
      
        600
        std::tuple<size_t, size_t> MatMulTb::rhs_packing_steps() const {
      
        600
            const KernelWrapper& pack_rhs = *m_op->pack_rhs.value();
      
        600
            const std::vector<size_t> steps = pack_rhs.steps({m_shape_n, m_shape_k}, m_tensors);
      
          2/4✓ Branch 0 taken 600 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 600 times.
✗ Branch 3 not taken.

        600
            return {steps.at(0), steps.at(1)};
      
        600
        }
      
        600
        void MatMulTb::test_rhs_packing(size_t start_n, size_t start_k, size_t size_n, size_t size_k) {
      
        600
            const KernelWrapper& pack_rhs = *m_op->pack_rhs.value();
      
        600
            const std::array full_shape{m_shape_n, m_shape_k};
      
        600
            const std::array tile_coords{start_n, start_k};
      
        600
            const std::array tile_shape{size_n, size_k};
      
        600
            pack_rhs.run(full_shape, tile_coords, tile_shape, m_tensors);
      
        600
            const Tensor& ref_packed_rhs = m_tensors.at(MATMUL_SLOT_REF_RHS_PACKED);
      
        600
            const Tensor& imp_packed_rhs = m_tensors.at(MATMUL_SLOT_IMP_RHS_PACKED);
      
        600
            const Format& format = *ref_packed_rhs.format();
      
        600
            DefaultMismatchHandler handler(0.0F, 0.0F, 0, 0.0F);
      
        1200
            const bool ok =
      
          3/6✓ Branch 0 taken 600 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 600 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 600 times.
✗ Branch 5 not taken.

        600
                format.compare(full_shape, tile_coords, tile_shape, imp_packed_rhs.data(), ref_packed_rhs.data(), handler);
      
          1/6✓ Branch 0 taken 600 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.

        600
            KAI_TEST_ASSERT(ok);
      
        600
        }
      
        600
        std::tuple<size_t, size_t> MatMulTb::matmul_steps() const {
      
        600
            const std::vector<size_t> steps = m_op->matmul->steps({m_shape_m, m_shape_n, m_shape_k}, m_tensors);
      
          2/4✓ Branch 0 taken 600 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 600 times.
✗ Branch 3 not taken.

        600
            return {steps.at(0), steps.at(1)};
      
        600
        }
      
        600
        void MatMulTb::test_matmul(size_t start_m, size_t start_n, size_t size_m, size_t size_n) {
      
        600
            const std::array matmul_full_shape{m_shape_m, m_shape_n, m_shape_k};
      
        600
            const std::array matmul_tile_coords{start_m, start_n, static_cast<size_t>(0)};
      
        600
            const std::array matmul_tile_shape{size_m, size_n, m_shape_k};
      
        600
            const std::array dst_full_shape{m_shape_m, m_shape_n};
      
        600
            const std::array dst_tile_coords{start_m, start_n};
      
        600
            const std::array dst_tile_shape{size_m, size_n};
      
        600
            m_op->matmul->run(matmul_full_shape, matmul_tile_coords, matmul_tile_shape, m_tensors);
      
        600
            const Tensor& ref_dst_data = m_tensors.at(MATMUL_SLOT_REF_DST_DATA);
      
        600
            const Tensor& imp_dst_data = m_tensors.at(MATMUL_SLOT_IMP_DST_DATA);
      
        600
            const Format& format = *ref_dst_data.format();
      
        600
            DefaultMismatchHandler handler(1e-3, 1e-3, 0, 0.0F);
      
          1/2✓ Branch 0 taken 600 times.
✗ Branch 1 not taken.

        2400
            const bool ok = format.compare(
      
          6/12✓ Branch 0 taken 600 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 600 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 600 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 600 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 600 times.
✗ Branch 9 not taken.
✓ Branch 10 taken 600 times.
✗ Branch 11 not taken.

        2400
                dst_full_shape, dst_tile_coords, dst_tile_shape, imp_dst_data.data(), ref_dst_data.data(), handler);
      
          1/6✓ Branch 0 taken 600 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.

        600
            KAI_TEST_ASSERT(ok);
      
        600
        }
      
        }  // namespace kai::test
KleidiAI Coverage Report