Skip to content

Commit 6f57382

Browse files
authored
[Snippets][CPU] Add check for RVV FP16 Zvfh extension support (#34835)
### Details: Add a Zvfh (half-precision floating-point arithmetic) runtime capability probe ### Tickets: - N/A ### AI Assistance: - *AI assistance used: yes* - The whole implementation is generated and manually adjusted and checked afterwards on the emulator
1 parent bde6a08 commit 6f57382

File tree

5 files changed

+71
-6
lines changed

5 files changed

+71
-6
lines changed

src/plugins/intel_cpu/src/emitters/plugin/riscv64/jit_eltwise_emitters.cpp

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,10 @@ static std::pair<SEW, LMUL> getVTypeForElementSize(const ov::element::Type& type
5252
}
5353
}
5454

55+
static bool requires_zvfh(const ov::element::Type& input_type, const ov::element::Type& output_type) {
56+
return input_type != output_type && any_of(ov::element::f16, input_type, output_type);
57+
}
58+
5559
/// ABS ///
5660
jit_abs_emitter::jit_abs_emitter(ov::intel_cpu::riscv64::jit_generator_t* host,
5761
ov::intel_cpu::riscv64::cpu_isa_t host_isa,
@@ -157,6 +161,11 @@ jit_convert_saturation_emitter::jit_convert_saturation_emitter(ov::intel_cpu::ri
157161
input_type,
158162
" -> ",
159163
output_type);
164+
OV_CPU_JIT_EMITTER_ASSERT(!requires_zvfh(input_type, output_type) || mayiuse(cpu_isa_t::gv_zvfh),
165+
"Unsupported Zvfh conversion: ",
166+
input_type,
167+
" -> ",
168+
output_type);
160169
}
161170

162171
size_t jit_convert_saturation_emitter::get_inputs_num() const {
@@ -367,6 +376,11 @@ jit_convert_truncation_emitter::jit_convert_truncation_emitter(ov::intel_cpu::ri
367376
input_type,
368377
" -> ",
369378
output_type);
379+
OV_CPU_JIT_EMITTER_ASSERT(!requires_zvfh(input_type, output_type) || mayiuse(cpu_isa_t::gv_zvfh),
380+
"Unsupported Zvfh conversion: ",
381+
input_type,
382+
" -> ",
383+
output_type);
370384
}
371385

372386
size_t jit_convert_truncation_emitter::get_inputs_num() const {

src/plugins/intel_cpu/src/nodes/kernels/riscv64/cpu_isa_traits.cpp

Lines changed: 29 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,20 @@ struct RVVGenerator : public CodeGenerator {
2828
}
2929
};
3030

31+
struct ZvfhGenerator : public CodeGenerator {
32+
ZvfhGenerator() : CodeGenerator(32) {
33+
// Probe Zvfh instructions used by Snippets Convert emitters.
34+
vsetivli(a0, 1, SEW::e16, LMUL::mf2);
35+
vfwcvt_f_f_v(v0, v0);
36+
vfncvt_f_f_w(v0, v0);
37+
li(a0, 1);
38+
ret();
39+
}
40+
};
41+
3142
// NOLINTBEGIN(misc-include-cleaner) bug in clang-tidy
32-
bool can_compile_rvv100() {
43+
template <typename Generator>
44+
bool can_execute_generated_code() {
3345
#if defined(__linux__)
3446
static thread_local sigjmp_buf jmpbuf;
3547
__sighandler_t signal_handler = []([[maybe_unused]] int signal) {
@@ -45,9 +57,9 @@ bool can_compile_rvv100() {
4557

4658
bool status = false;
4759
if (sigsetjmp(jmpbuf, 1) == 0) {
48-
RVVGenerator gen;
60+
Generator gen;
4961
gen.ready();
50-
const auto caller = gen.getCode<uint32_t (*)()>();
62+
const auto caller = gen.template getCode<uint32_t (*)()>();
5163
status = static_cast<bool>(caller());
5264
}
5365

@@ -61,6 +73,16 @@ bool can_compile_rvv100() {
6173
#endif
6274
}
6375

76+
bool can_compile_rvv100() {
77+
static const bool status = can_execute_generated_code<RVVGenerator>();
78+
return status;
79+
}
80+
81+
bool can_compile_zvfh() {
82+
static const bool status = can_execute_generated_code<ZvfhGenerator>();
83+
return status;
84+
}
85+
6486
} // namespace
6587

6688
bool mayiuse(const cpu_isa_t cpu_isa) {
@@ -77,6 +99,8 @@ bool mayiuse(const cpu_isa_t cpu_isa) {
7799
// [TODO] If needed, support other RVV versions
78100
case gv:
79101
return mayiuse(g) && cpu.hasExtension(RISCVExtension::V) && can_compile_rvv100();
102+
case gv_zvfh:
103+
return mayiuse(gv) && can_compile_zvfh();
80104
case isa_all:
81105
return false;
82106
case isa_undef:
@@ -93,6 +117,8 @@ std::string isa2str(cpu_isa_t isa) {
93117
return "g";
94118
case cpu_isa_t::gv:
95119
return "gv";
120+
case cpu_isa_t::gv_zvfh:
121+
return "gv_zvfh";
96122
case cpu_isa_t::isa_all:
97123
return "all";
98124
default:

src/plugins/intel_cpu/src/nodes/kernels/riscv64/cpu_isa_traits.hpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ enum cpu_isa_bit_t : unsigned {
1919
d_bit = 1U << 4,
2020
c_bit = 1U << 5,
2121
v_bit = 1U << 6, // rvv 1.0
22+
zvfh_bit = 1U << 7,
2223

2324
last_bit = 1U << (cpu_isa_total_bits - 1),
2425
};
@@ -27,6 +28,7 @@ enum cpu_isa_t : unsigned {
2728
isa_undef = 0U,
2829
g = i_bit | m_bit | a_bit | f_bit | d_bit, // G = IMAFD
2930
gv = g | v_bit,
31+
gv_zvfh = gv | zvfh_bit,
3032
isa_all = ~0U & ~last_bit
3133
};
3234

src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -268,6 +268,7 @@
268268
#endif
269269

270270
#if defined(OPENVINO_ARCH_RISCV64)
271+
# include "nodes/kernels/riscv64/cpu_isa_traits.hpp"
271272
# include "openvino/op/power.hpp"
272273
# include "openvino/op/select.hpp"
273274
# include "openvino/op/swish.hpp"
@@ -1433,12 +1434,29 @@ void Transformations::MainSnippets() {
14331434
auto is_supported_tensor = [&n, ignoreCallback](descriptor::Tensor& t, bool is_input) -> bool {
14341435
// TODO [105804] int32 isn't supported in general because i32 emitters are required for bit-exact i32
14351436
// calculations in some cases So i32 is supported exclusively for transposes and broadcast
1436-
static const std::set<ov::element::Type> supported_element_types =
1437+
static const auto supported_element_types = [] {
14371438
#if defined(OPENVINO_ARCH_ARM64)
1438-
{ov::element::f32, ov::element::f16, ov::element::i8, ov::element::u8};
1439+
return std::set<ov::element::Type>{ov::element::f32,
1440+
ov::element::f16,
1441+
ov::element::i8,
1442+
ov::element::u8};
1443+
#elif defined(OPENVINO_ARCH_RISCV64)
1444+
auto types =
1445+
std::set<ov::element::Type>{ov::element::f32, ov::element::bf16, ov::element::i8, ov::element::u8};
1446+
if (ov::intel_cpu::riscv64::mayiuse(ov::intel_cpu::riscv64::cpu_isa_t::gv_zvfh)) {
1447+
types.insert(ov::element::f16);
1448+
}
1449+
return types;
14391450
#else
1440-
{ov::element::f32, ov::element::bf16, ov::element::f16, ov::element::i8, ov::element::u8};
1451+
return std::set<ov::element::Type>{
1452+
ov::element::f32,
1453+
ov::element::bf16,
1454+
ov::element::f16,
1455+
ov::element::i8,
1456+
ov::element::u8,
1457+
};
14411458
#endif
1459+
}();
14421460

14431461
if (!ignoreCallback) {
14441462
// Check for supported ranks

src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -652,6 +652,11 @@ const std::vector<std::regex>& disabled_test_patterns() {
652652
patterns.emplace_back(std::regex(R"(.*smoke_EltwiseChain/EltwiseChainTest.CompareWithRefs.*InPRC3=i32_Op0=Div_Op1.*)"));
653653
patterns.emplace_back(std::regex(R"(.*smoke_CompareWithRefs_static.*eltwise_op_type=Div.*model_type=i32.*)"));
654654
}
655+
if (!ov::intel_cpu::riscv64::mayiuse(ov::intel_cpu::riscv64::gv_zvfh)) {
656+
// Snippets Convert on RISC-V requires Zvfh instructions.
657+
patterns.emplace_back(std::regex(R"(.*smoke_Snippets_Convert.*_IT=\([^)]*f16[^)]*\).*)"));
658+
patterns.emplace_back(std::regex(R"(.*smoke_Snippets_Convert.*_OT=\([^)]*f16[^)]*\).*)"));
659+
}
655660
#endif
656661
#if defined(OPENVINO_ARCH_X86) || defined(OPENVINO_ARCH_X86_64)
657662
if (!ov::with_cpu_x86_avx2()) {

0 commit comments

Comments
 (0)