Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions src/plugins/intel_cpu/src/nodes/conv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -566,6 +566,19 @@ static MemoryPtr memoryViewToVector(const std::vector<T>& vec, const dnnl::engin
bool Convolution::canFuse(const NodePtr& node) const {
#if defined(OV_CPU_WITH_ACL)
if (!fusedWith.empty()) {
// Allow FakeQuantize to fuse after a single Eltwise activation
// to enable Conv -> Activation -> FakeQuantize for the ACL INT8 path.
if (fusedWith.size() == 1 &&
fusedWith[0]->getType() == Type::Eltwise &&
node->getType() == Type::FakeQuantize) {
const auto fqOutPrc = node->getOriginalOutputPrecisionAtPort(0);
const auto convInPrc = getOriginalInputPrecisionAtPort(0);
if (any_of(convInPrc, ov::element::u8, ov::element::i8) && fqOutPrc != convInPrc) {
return false;
}
return canFuseSimpleOperation(node);
}

return false;
}

Expand Down
26 changes: 15 additions & 11 deletions src/plugins/intel_cpu/src/nodes/executors/acl/acl_conv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,14 +68,14 @@ ACLConvolutionExecutor::ACLConvolutionExecutor(const ConvAttrs& attrs,
paddingBottom,
arm_compute::DimensionRoundingType::FLOOR);
dilation = arm_compute::Size2D(attrs.dilation[1] + 1, attrs.dilation[0] + 1);

if (attrs.postOps.size() == 1) {
if (const auto* const activation = std::any_cast<ActivationPostOp>(attrs.postOps.data())) {
// Instead of checking ==1 ==2 branch, iterate throught he vector
for (const auto& postOp : attrs.postOps ){
if (const auto* const activation = std::any_cast<ActivationPostOp>(&postOp)) {
activationLayerInfo = getActivationLayerInfo(convertToEltwiseAlgorithm(activation->type()),
activation->alpha(),
activation->beta(),
activation->gamma());
} else if (const auto* const fq = std::any_cast<FakeQuantizePostOp>(attrs.postOps.data())) {
activation->alpha(),
activation->beta(),
activation->gamma());
} else if (const auto* const fq = std::any_cast<FakeQuantizePostOp>(&postOp)) {
fqInputScale = fq->inputScale();
fqInputShift = fq->inputShift();
fqOutputScale = fq->outputScale();
Expand Down Expand Up @@ -112,13 +112,11 @@ ACLConvolutionExecutor::ACLConvolutionExecutor(const ConvAttrs& attrs,
} else {
OPENVINO_THROW("ACLConvolutionExecutor: the executor supports FakeQuantize and Activation post ops only");
}
} else if (attrs.postOps.size() > 1) {
OPENVINO_THROW("ACLConvolutionExecutor: ACL does not support more than 1 post op");
}
}

bool ACLConvolutionExecutor::supports(const ConvConfig& config) {
VERIFY(config.attrs.postOps.size() <= 1U, UNSUPPORTED_BY_EXECUTOR);
VERIFY(config.attrs.postOps.size() <= 2U, UNSUPPORTED_BY_EXECUTOR);

const auto& srcDesc = config.descs.at(ARG_SRC);
const auto& weiDesc = config.descs.at(ARG_WEI);
Expand All @@ -128,7 +126,13 @@ bool ACLConvolutionExecutor::supports(const ConvConfig& config) {
VERIFY(srcDesc->getShape().getRank() == 4 && weiDesc->getShape().getRank() == 4, UNSUPPORTED_BY_EXECUTOR);
// isQuantized verifies whether src is u8/i8, weights is i8 and FQ is fused if dst is u8/i8
// the last requirement is due to ACL int32 accumulation that needs to be requantized by non-trivial scales
const bool hasQuantizationPostOp = std::any_cast<FakeQuantizePostOp>(config.attrs.postOps.data()) != nullptr;
const bool hasQuantizationPostOp = std::any_of(
config.attrs.postOps.begin(),
config.attrs.postOps.end(),
[](const std::any& op){
return std::any_cast<FakeQuantizePostOp>(&op) != nullptr;
}
);
const bool isQuantizedU8 = srcDesc->getPrecision() == ov::element::u8 &&
any_of(weiDesc->getPrecision(), ov::element::u8, ov::element::i8) &&
dstDesc->getPrecision() == ov::element::u8 && hasQuantizationPostOp;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
#include "openvino/op/convolution.hpp"
#include "openvino/op/fake_quantize.hpp"
#include "openvino/op/multiply.hpp"
#include "openvino/op/swish.hpp"
#include "openvino/op/relu.hpp"
#include "openvino/pass/pattern/op/block.hpp"
#include "openvino/pass/pattern/op/label.hpp"
#include "openvino/pass/pattern/op/or.hpp"
Expand All @@ -38,17 +40,20 @@ ov::intel_cpu::ConvMulAddFQBlock::ConvMulAddFQBlock(const bool require_int_fq_ou
return !type_matches(ov::element::i32)(output);
});
auto add = wrap_type<ov::op::v1::Add>({multiply, bias_const});
auto activation = wrap_type<ov::op::v4::Swish, ov::op::v0::Relu>({add});
auto activation_or_add = std::make_shared<ov::pass::pattern::op::Or>(ov::OutputVector{activation, add});

ov::pass::pattern::op::Predicate predicate =
require_int_fq_output ? type_matches_any({element::i8, element::u8}) : ov::pass::pattern::op::Predicate();
auto fake_quantize =
wrap_type<ov::op::v0::FakeQuantize>({add, any_input(), any_input(), any_input(), any_input()}, predicate);
wrap_type<ov::op::v0::FakeQuantize>({activation_or_add, any_input(), any_input(), any_input(), any_input()}, predicate);

m_inputs = ov::OutputVector{conv};
m_outputs = ov::OutputVector{fake_quantize};

register_anchor("convolution", conv);
register_anchor("multiply", multiply);
register_anchor("add", add);
register_anchor("activation", activation);
register_anchor("fake_quantize", fake_quantize);
}
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ ov::intel_cpu::ConvertConvolutionBias::ConvertConvolutionBias() {
const auto conv_out = conv_mul_add_fq->get_anchor("convolution", pattern_map);
const auto mul_out = conv_mul_add_fq->get_anchor("multiply", pattern_map);
const auto add_out = conv_mul_add_fq->get_anchor("add", pattern_map);
const auto activation_out = conv_mul_add_fq->get_anchor("activation", pattern_map);
const auto fq_out = conv_mul_add_fq->get_anchor("fake_quantize", pattern_map);
if (!conv_out || !mul_out || !add_out || !fq_out) {
return false;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
#include "openvino/op/fake_quantize.hpp"
#include "openvino/op/multiply.hpp"
#include "openvino/op/reshape.hpp"
#include "openvino/op/swish.hpp"
#include "openvino/pass/matcher_pass.hpp"
#include "openvino/pass/pattern/matcher.hpp"
#include "openvino/pass/pattern/op/pattern.hpp"
Expand Down Expand Up @@ -56,6 +57,7 @@ ov::intel_cpu::FallbackUnsupportedLPConvToFP16::FallbackUnsupportedLPConvToFP16(
const auto conv_out = conv_mul_add_fq->get_anchor("convolution", pattern_map);
const auto mul_out = conv_mul_add_fq->get_anchor("multiply", pattern_map);
const auto add_out = conv_mul_add_fq->get_anchor("add", pattern_map);
const auto activation_out = conv_mul_add_fq->get_anchor("activation", pattern_map);
const auto fq_out = conv_mul_add_fq->get_anchor("fake_quantize", pattern_map);
if (!conv_out || !mul_out || !add_out || !fq_out) {
return false;
Expand Down Expand Up @@ -105,6 +107,14 @@ ov::intel_cpu::FallbackUnsupportedLPConvToFP16::FallbackUnsupportedLPConvToFP16(
ov::copy_runtime_info(rt_info_sources, {reshape_const, scales_reshape, scales_to_weights, conv_scaled});

ov::replace_node(mul, conv_scaled);

if (activation_out) {
const auto activation_node = activation_out->get_node_shared_ptr();
if (auto type_relaxed = std::dynamic_pointer_cast<ov::op::TypeRelaxedBase>(activation_node)){
type_relaxed->set_overridden_output_type(ov::element::f16, 0);
activation_node->validate_and_infer_types();
}
}

// Keep this matched Conv->Mul->Add->FQ pattern in FP16 end-to-end for CPU plugin selection.
if (add->get_output_element_type(0) == ov::element::f16) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -990,8 +990,6 @@ void Transformations::runLptPasses(const std::vector<ov::element::Type>& default
FuseMultiplyToFakeQuantizeTransformation);
CPU_DISABLE_PASS_COMMON(lptManager, MultiplyToGroupConvolutionTransformation);

// ConvolutionTransformation is disabled temporary until ACL issues are fixed: #1252, #1253
CPU_DISABLE_PASS_ARM(lptManager, ConvolutionTransformation);
CPU_DISABLE_PASS_ARM(lptManager, ConvolutionBackpropDataTransformation);
CPU_DISABLE_PASS_ARM(lptManager, InterpolateTransformation);
CPU_DISABLE_PASS_ARM(lptManager, GroupConvolutionTransformation);
Expand Down