Skip to content

Commit d50cb04

Browse files
committed
f
1 parent 845c730 commit d50cb04

File tree

1 file changed

+2
-2
lines changed

1 file changed

+2
-2
lines changed

src/layer/x86/gemm_bf16s.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2515,7 +2515,7 @@ static void gemm_transB_packed_tile_bf16s(const Mat& AT_tile, const Mat& BT_tile
25152515
for (; kk < max_kk; kk++)
25162516
{
25172517
__m512 _pA0 = bfloat2float_avx512(_mm256_loadu_si256((const __m256i*)pA));
2518-
__m512 _pB0 = bfloat2float_avx512(_mm256_castps_si256(_mm256_set1_ps(((const float*)pB)[0])));
2518+
__m512 _pB0 = bfloat2float_avx512(_mm256_set1_epi32(((const int*)pB)[0]));
25192519
__m512 _pB1 = _mm512_permute_ps(_pB0, _MM_SHUFFLE(0, 3, 2, 1));
25202520

25212521
_sum0 = _mm512_fmadd_ps(_pA0, _pB0, _sum0);
@@ -3228,7 +3228,7 @@ static void gemm_transB_packed_tile_bf16s(const Mat& AT_tile, const Mat& BT_tile
32283228
#endif // __AVX512BF16__
32293229
for (; kk < max_kk; kk++)
32303230
{
3231-
__m512 _pA0 = bfloat2float_avx512(_mm256_set1_ps(((const float*)pA)[0]));
3231+
__m512 _pA0 = bfloat2float_avx512(_mm256_set1_epi32(((const int*)pA)[0]));
32323232
__m512 _pB0 = bfloat2float_avx512(_mm256_loadu_si256((const __m256i*)pB));
32333233
__m512 _pB1 = _mm512_permute_ps(_pB0, _MM_SHUFFLE(0, 3, 2, 1));
32343234

0 commit comments

Comments
 (0)