Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,22 @@ ov::pass::ConvertWeightCompressedConv1x1ToMatmul::ConvertWeightCompressedConv1x1
}
}

// If the activation has a static leading dimension of 1, squeeze it.
// This is done to allow pre-selection of OCL implementations for non-IMMAD devices, reducing memory pressure.
bool squeeze_activation = false;
if (activation->get_output_partial_shape(0)[0].is_static() && activation->get_output_partial_shape(0)[0] == 1) {
squeeze_activation = true;
auto shape_out = activation->get_output_partial_shape(0);
auto squeeze_const =
std::make_shared<ov::op::v0::Constant>(ov::element::i64,
ov::Shape{3},
std::vector<int64_t>{1, -1, shape_out[-1].get_length()});
auto squeeze = std::make_shared<ov::op::v1::Reshape>(activation, squeeze_const, false);
Comment on lines +241 to +246
Copy link

Copilot AI Mar 27, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[BLOCKER] The new squeeze/unsqueeze reshape constants rely on shape_out[-1].get_length(). If the last dimension is dynamic (e.g., input has dynamic channel/feature dim), get_length() throws and the transformation will crash during pass execution. Consider building the reshape pattern without requiring static last dim (e.g., use special_zero=true with a 0 in the last position to copy the input’s last dimension), and apply the same fix to the unsqueeze block as well.

Suggested change
auto shape_out = activation->get_output_partial_shape(0);
auto squeeze_const =
std::make_shared<ov::op::v0::Constant>(ov::element::i64,
ov::Shape{3},
std::vector<int64_t>{1, -1, shape_out[-1].get_length()});
MatcherPass::register_new_node(squeeze_const);
auto squeeze = std::make_shared<ov::op::v1::Reshape>(activation, squeeze_const, false);
// Reshape [1, D1, ..., Dn] -> [1, -1, Dn] while safely copying the last dimension
// without requiring it to be statically known.
auto squeeze_const =
std::make_shared<ov::op::v0::Constant>(ov::element::i64,
ov::Shape{3},
std::vector<int64_t>{1, -1, 0});
MatcherPass::register_new_node(squeeze_const);
auto squeeze = std::make_shared<ov::op::v1::Reshape>(activation, squeeze_const, true);

Copilot uses AI. Check for mistakes.
ov::copy_runtime_info(activation, squeeze);
squeeze->set_friendly_name(activation->get_friendly_name() + "_squeeze");
activation = squeeze;
}

auto matmul = std::make_shared<ov::op::v0::MatMul>(activation, scaled_weight, false, true);
ov::copy_runtime_info(conv1x1, matmul);
std::shared_ptr<Node> matmul_out;
Expand All @@ -258,6 +274,18 @@ ov::pass::ConvertWeightCompressedConv1x1ToMatmul::ConvertWeightCompressedConv1x1
matmul_out = matmul;
}

if (squeeze_activation) {
auto shape_out = matmul_out->get_output_partial_shape(0);
auto unsqueeze_const =
std::make_shared<ov::op::v0::Constant>(ov::element::i64,
ov::Shape{4},
std::vector<int64_t>{1, 1, -1, shape_out[-1].get_length()});
auto unsqueeze = std::make_shared<ov::op::v1::Reshape>(matmul_out, unsqueeze_const, false);
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You're inserting Reshape, but calling it unsqueeze. Is this how it should be?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The squeeze/unsqueeze operations are represented by reshapes here to avoid adding new node types at this stage in the pipeline.

ov::copy_runtime_info(matmul_out, unsqueeze);
unsqueeze->set_friendly_name(matmul_out->get_friendly_name() + "_unsqueeze");
matmul_out = unsqueeze;
}

if (reshape_out) {
if (convert_out) {
auto convert_final = convert_out->clone_with_new_inputs({matmul_out});
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,20 +41,24 @@ struct Conv1x1ToMatmulTestParams {
bool with_convert;
bool with_param_weight;
bool with_act_new_reshape;
bool with_batched_input;
std::string activation_op_type;
};

std::shared_ptr<ov::Model> gen_model(const Conv1x1ToMatmulTestParams& p) {
int input_batch = p.with_batched_input ? 4 : 1;
auto input = std::make_shared<ov::opset1::Parameter>(
ov::element::f16,
(p.activation_op_type == "Reshape" && p.with_act_new_reshape) ? ov::Shape{1, 1, 2, 5} : ov::Shape{1, 1, 1, 10});
(p.activation_op_type == "Reshape" && p.with_act_new_reshape)
? ov::Shape{(size_t)input_batch, 1, 2, 5}
: ov::Shape{(size_t)input_batch, 1, 1, 10});

std::shared_ptr<ov::Node> act_node;
if (p.activation_op_type == "Transpose") {
auto transpose_const = ov::opset1::Constant::create(ov::element::i32, ov::Shape{4}, {0, 3, 1, 2});
act_node = std::make_shared<ov::opset1::Transpose>(input, transpose_const);
} else {
auto reshape_const = ov::opset1::Constant::create(ov::element::i32, ov::Shape{4}, {1, 10, 1, 1});
auto reshape_const = ov::opset1::Constant::create(ov::element::i32, ov::Shape{4}, {input_batch, 10, 1, 1});
act_node = std::make_shared<ov::opset1::Reshape>(input, reshape_const, false);
}

Expand Down Expand Up @@ -118,17 +122,20 @@ std::shared_ptr<ov::Model> gen_model(const Conv1x1ToMatmulTestParams& p) {
auto transpose_const = ov::opset1::Constant::create(ov::element::i32, ov::Shape{4}, {0, 2, 3, 1});
out_node = std::make_shared<ov::opset1::Transpose>(current_node, transpose_const);
} else {
auto reshape_const = ov::opset1::Constant::create(ov::element::i32, ov::Shape{4}, {1, 1, 1, 15});
auto reshape_const = ov::opset1::Constant::create(ov::element::i32, ov::Shape{4}, {input_batch, 1, 1, 15});
out_node = std::make_shared<ov::opset1::Reshape>(current_node, reshape_const, false);
}

return std::make_shared<ov::Model>(ov::OutputVector{out_node}, params);
}

std::shared_ptr<ov::Model> gen_model_ref(const Conv1x1ToMatmulTestParams& p) {
int input_batch = p.with_batched_input ? 4 : 1;
auto input = std::make_shared<ov::opset1::Parameter>(
ov::element::f16,
(p.activation_op_type == "Reshape" && p.with_act_new_reshape) ? ov::Shape{1, 1, 2, 5} : ov::Shape{1, 1, 1, 10});
(p.activation_op_type == "Reshape" && p.with_act_new_reshape)
? ov::Shape{(size_t)input_batch, 1, 2, 5}
: ov::Shape{(size_t)input_batch, 1, 1, 10});

std::shared_ptr<ov::Node> weights_node;
ov::ParameterVector params = {input};
Expand Down Expand Up @@ -170,23 +177,31 @@ std::shared_ptr<ov::Model> gen_model_ref(const Conv1x1ToMatmulTestParams& p) {

std::shared_ptr<ov::Node> act_node = input;
if (p.activation_op_type == "Reshape" && p.with_act_new_reshape) {
auto reshape_const = ov::opset1::Constant::create(ov::element::i64, ov::Shape{4}, {1, 1, 1, 10});
auto reshape_const = ov::opset1::Constant::create(ov::element::i64, ov::Shape{4}, {1, 1, input_batch, 10});
act_node = std::make_shared<ov::opset1::Reshape>(input, reshape_const, false);
}
if (input_batch == 1 || (p.activation_op_type == "Reshape" && p.with_act_new_reshape)) {
auto squeeze_const = ov::opset1::Constant::create(ov::element::i64, ov::Shape{3}, {1, input_batch, 10});
act_node = std::make_shared<ov::opset1::Reshape>(act_node, squeeze_const, false);
}
auto matmul = std::make_shared<ov::op::v0::MatMul>(act_node, mul, false, true);
current_node = matmul;

if (p.with_bias) {
auto bias_const = ov::opset1::Constant::create(ov::element::f16, ov::Shape{1, 1, 1, 15}, {1});
current_node = std::make_shared<ov::opset1::Add>(matmul, bias_const);
current_node = std::make_shared<ov::opset1::Add>(current_node, bias_const);
}
if (input_batch == 1 || (p.activation_op_type == "Reshape" && p.with_act_new_reshape)) {
auto unsqueeze_const = ov::opset1::Constant::create(ov::element::i64, ov::Shape{4}, {1, 1, input_batch, 15});
current_node = std::make_shared<ov::opset1::Reshape>(current_node, unsqueeze_const, false);
}
Comment on lines 178 to 197
Copy link

Copilot AI Mar 27, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[HIGH] The new squeeze/unsqueeze behavior is only validated with fully static shapes. Since the implementation currently depends on shape introspection (and should support dynamic channel dims once the get_length() issue is addressed), please add a regression test case where the activation’s last dimension is dynamic (or weights/output dim is dynamic) to ensure the transformation does not throw and preserves the expected output rank/shape.

Copilot generated this review using guidance from repository custom instructions.
if (p.with_convert) {
current_node = std::make_shared<ov::op::v0::Convert>(current_node, ov::element::f32);
}

std::shared_ptr<ov::Node> out_node;
if (p.activation_op_type == "Reshape") {
auto reshape_const = ov::opset1::Constant::create(ov::element::i32, ov::Shape{4}, {1, 1, 1, 15});
auto reshape_const = ov::opset1::Constant::create(ov::element::i32, ov::Shape{4}, {input_batch, 1, 1, 15});
out_node = std::make_shared<ov::opset1::Reshape>(current_node, reshape_const, false);
} else {
out_node = current_node;
Expand All @@ -198,16 +213,17 @@ std::shared_ptr<ov::Model> gen_model_ref(const Conv1x1ToMatmulTestParams& p) {

class ConvertWeightCompressedConv1x1ToMatmulTest
: public TransformationTestsF,
public WithParamInterface<std::tuple<bool, bool, bool, bool, bool, bool, std::string>> {
public WithParamInterface<std::tuple<bool, bool, bool, bool, bool, bool, bool, std::string>> {
public:
static std::string get_test_case_name(
const testing::TestParamInfo<std::tuple<bool, bool, bool, bool, bool, bool, std::string>>& obj) {
const testing::TestParamInfo<std::tuple<bool, bool, bool, bool, bool, bool, bool, std::string>>& obj) {
const auto& [with_group_quant,
with_zp,
with_bias,
with_convert,
with_param_weight,
with_act_new_reshape,
with_batched_input,
activation_op_type] = obj.param;

std::ostringstream result;
Expand All @@ -217,6 +233,7 @@ class ConvertWeightCompressedConv1x1ToMatmulTest
result << "with_convert=" << with_convert << "_";
result << "with_param_weight=" << with_param_weight << "_";
result << "with_act_new_reshape=" << with_act_new_reshape << "_";
result << "with_batched_input=" << with_batched_input << "_";
result << "activation_op_type=" << activation_op_type;
return result.str();
}
Expand All @@ -230,13 +247,15 @@ class ConvertWeightCompressedConv1x1ToMatmulTest
with_convert,
with_param_weight,
with_act_new_reshape,
with_batched_input,
activation_op_type] = GetParam();
Conv1x1ToMatmulTestParams params{with_group_quant,
with_zp,
with_bias,
with_convert,
with_param_weight,
with_act_new_reshape,
with_batched_input,
activation_op_type};
model = gen_model(params);
model_ref = gen_model_ref(params);
Expand All @@ -254,6 +273,7 @@ INSTANTIATE_TEST_SUITE_P(TransformationTests,
::testing::Bool(),
::testing::Bool(),
::testing::Bool(),
::testing::Bool(),
::testing::Values("Transpose", "Reshape")),
ConvertWeightCompressedConv1x1ToMatmulTest::get_test_case_name);

Expand Down
Loading