Skip to content

Commit 3726265

Browse files
authored
Merge pull request #5667 from fadara01/accelerate_sve128_sbgemm
Accelerate SVE128 SBGEMM/BGEMM
2 parents 75e2f12 + f30202b commit 3726265

File tree

5 files changed

+833
-7
lines changed

5 files changed

+833
-7
lines changed

CONTRIBUTORS.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -272,3 +272,6 @@ In chronological order:
272272

273273
* Anna Mayne <anna.mayne@arm.com>
274274
* [2025-11-19] Update thread throttling profile for SGEMV on NEOVERSEV1 and NEOVERSEV2
275+
276+
* Fadi Arafeh <fadi.arafeh@arm.com>
277+
* [2026-03-05] Accelerate SVE128 SBGEMM/BGEMM

kernel/arm64/KERNEL.NEOVERSEN2

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -191,25 +191,29 @@ ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX)
191191
ifeq ($(BUILD_BFLOAT16), 1)
192192
BGEMM_BETA = bgemm_beta_neon.c
193193
BGEMMKERNEL = sbgemm_kernel_$(BGEMM_UNROLL_M)x$(BGEMM_UNROLL_N)_neoversen2.c
194+
ifneq ($(BGEMM_UNROLL_M), $(BGEMM_UNROLL_N))
194195
BGEMMINCOPY = sbgemm_ncopy_$(BGEMM_UNROLL_M)_neoversen2.c
195196
BGEMMITCOPY = sbgemm_tcopy_$(BGEMM_UNROLL_M)_neoversen2.c
196-
BGEMMONCOPY = sbgemm_ncopy_$(BGEMM_UNROLL_N)_neoversen2.c
197-
BGEMMOTCOPY = sbgemm_tcopy_$(BGEMM_UNROLL_N)_neoversen2.c
198197
BGEMMINCOPYOBJ = bgemm_incopy$(TSUFFIX).$(SUFFIX)
199198
BGEMMITCOPYOBJ = bgemm_itcopy$(TSUFFIX).$(SUFFIX)
199+
endif
200+
BGEMMONCOPY = sbgemm_ncopy_$(BGEMM_UNROLL_N)_neoversen2.c
201+
BGEMMOTCOPY = sbgemm_tcopy_$(BGEMM_UNROLL_N)_neoversen2.c
200202
BGEMMONCOPYOBJ = bgemm_oncopy$(TSUFFIX).$(SUFFIX)
201203
BGEMMOTCOPYOBJ = bgemm_otcopy$(TSUFFIX).$(SUFFIX)
202204
BGEMVTKERNEL = sbgemv_t_bfdot.c
203205
BGEMVNKERNEL = bgemv_n_sve_v3x4.c
204206

205207
SBGEMM_BETA = sbgemm_beta_neoversen2.c
206208
SBGEMMKERNEL = sbgemm_kernel_$(SBGEMM_UNROLL_M)x$(SBGEMM_UNROLL_N)_neoversen2.c
209+
ifneq ($(SBGEMM_UNROLL_M), $(SBGEMM_UNROLL_N))
207210
SBGEMMINCOPY = sbgemm_ncopy_$(SBGEMM_UNROLL_M)_neoversen2.c
208211
SBGEMMITCOPY = sbgemm_tcopy_$(SBGEMM_UNROLL_M)_neoversen2.c
209-
SBGEMMONCOPY = sbgemm_ncopy_$(SBGEMM_UNROLL_N)_neoversen2.c
210-
SBGEMMOTCOPY = sbgemm_tcopy_$(SBGEMM_UNROLL_N)_neoversen2.c
211212
SBGEMMINCOPYOBJ = sbgemm_incopy$(TSUFFIX).$(SUFFIX)
212213
SBGEMMITCOPYOBJ = sbgemm_itcopy$(TSUFFIX).$(SUFFIX)
214+
endif
215+
SBGEMMONCOPY = sbgemm_ncopy_$(SBGEMM_UNROLL_N)_neoversen2.c
216+
SBGEMMOTCOPY = sbgemm_tcopy_$(SBGEMM_UNROLL_N)_neoversen2.c
213217
SBGEMMONCOPYOBJ = sbgemm_oncopy$(TSUFFIX).$(SUFFIX)
214218
SBGEMMOTCOPYOBJ = sbgemm_otcopy$(TSUFFIX).$(SUFFIX)
215219
SBGEMVTKERNEL = sbgemv_t_bfdot.c
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
/***************************************************************************
2+
* Copyright (c) 2026 The OpenBLAS Project
3+
* All rights reserved.
4+
* Redistribution and use in source and binary forms, with or without
5+
* modification, are permitted provided that the following conditions are
6+
* met:
7+
* 1. Redistributions of source code must retain the above copyright
8+
* notice, this list of conditions and the following disclaimer.
9+
* 2. Redistributions in binary form must reproduce the above copyright
10+
* notice, this list of conditions and the following disclaimer in
11+
* the documentation and/or other materials provided with the
12+
* distribution.
13+
* 3. Neither the name of the OpenBLAS project nor the names of
14+
* its contributors may be used to endorse or promote products
15+
* derived from this software without specific prior written permission.
16+
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17+
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18+
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19+
* ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
20+
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21+
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22+
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23+
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24+
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25+
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26+
* POSSIBILITY OF SUCH DAMAGE.
27+
* *****************************************************************************/
28+
29+
#include <arm_sve.h>
30+
#include <arm_neon.h>
31+
32+
#include "common.h"
33+
34+
#define ALPHA_ONE
35+
#include "sbgemm_kernel_8x8_neoversen2_impl.c"
36+
#undef ALPHA_ONE
37+
#undef UPDATE_C
38+
#include "sbgemm_kernel_8x8_neoversen2_impl.c"
39+
40+
int CNAME(BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha, IFLOAT *A, IFLOAT *B,
41+
FLOAT *C, BLASLONG ldc) {
42+
#ifdef BGEMM
43+
bfloat16_t alpha_bf16;
44+
memcpy(&alpha_bf16, &alpha, sizeof(bfloat16_t));
45+
float alpha_f32 = vcvtah_f32_bf16(alpha_bf16);
46+
#else
47+
float alpha_f32 = alpha;
48+
#endif
49+
50+
if (alpha_f32 == 1.0f)
51+
return gemm_kernel_neoversen2_alpha_one(m, n, k, alpha, A, B, C, ldc);
52+
else
53+
return gemm_kernel_neoversen2_alpha(m, n, k, alpha, A, B, C, ldc);
54+
55+
return 0;
56+
}

0 commit comments

Comments
 (0)