-
Notifications
You must be signed in to change notification settings - Fork 3.2k
[GPU] Add squeeze/unsqueeze to the compressed conv1x1 transformation #34957
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
fa9e9b2
2edd1d6
1b2f139
fb7c1db
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -233,6 +233,22 @@ ov::pass::ConvertWeightCompressedConv1x1ToMatmul::ConvertWeightCompressedConv1x1 | |
| } | ||
| } | ||
|
|
||
| // If the activation has a static leading dimension of 1, squeeze it. | ||
| // This is done to allow pre-selection of OCL implementations for non-IMMAD devices, reducing memory pressure. | ||
| bool squeeze_activation = false; | ||
| if (activation->get_output_partial_shape(0)[0].is_static() && activation->get_output_partial_shape(0)[0] == 1) { | ||
| squeeze_activation = true; | ||
| auto shape_out = activation->get_output_partial_shape(0); | ||
| auto squeeze_const = | ||
| std::make_shared<ov::op::v0::Constant>(ov::element::i64, | ||
| ov::Shape{3}, | ||
| std::vector<int64_t>{1, -1, shape_out[-1].get_length()}); | ||
| auto squeeze = std::make_shared<ov::op::v1::Reshape>(activation, squeeze_const, false); | ||
| ov::copy_runtime_info(activation, squeeze); | ||
| squeeze->set_friendly_name(activation->get_friendly_name() + "_squeeze"); | ||
| activation = squeeze; | ||
| } | ||
|
|
||
| auto matmul = std::make_shared<ov::op::v0::MatMul>(activation, scaled_weight, false, true); | ||
| ov::copy_runtime_info(conv1x1, matmul); | ||
| std::shared_ptr<Node> matmul_out; | ||
|
|
@@ -258,6 +274,18 @@ ov::pass::ConvertWeightCompressedConv1x1ToMatmul::ConvertWeightCompressedConv1x1 | |
| matmul_out = matmul; | ||
| } | ||
|
|
||
| if (squeeze_activation) { | ||
| auto shape_out = matmul_out->get_output_partial_shape(0); | ||
| auto unsqueeze_const = | ||
| std::make_shared<ov::op::v0::Constant>(ov::element::i64, | ||
| ov::Shape{4}, | ||
| std::vector<int64_t>{1, 1, -1, shape_out[-1].get_length()}); | ||
| auto unsqueeze = std::make_shared<ov::op::v1::Reshape>(matmul_out, unsqueeze_const, false); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You're inserting Reshape, but calling it unsqueeze. Is this how it should be?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The squeeze/unsqueeze operations are represented by reshapes here to avoid adding new node types at this stage in the pipeline. |
||
| ov::copy_runtime_info(matmul_out, unsqueeze); | ||
| unsqueeze->set_friendly_name(matmul_out->get_friendly_name() + "_unsqueeze"); | ||
| matmul_out = unsqueeze; | ||
| } | ||
|
|
||
| if (reshape_out) { | ||
| if (convert_out) { | ||
| auto convert_final = convert_out->clone_with_new_inputs({matmul_out}); | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -41,20 +41,24 @@ struct Conv1x1ToMatmulTestParams { | |
| bool with_convert; | ||
| bool with_param_weight; | ||
| bool with_act_new_reshape; | ||
| bool with_batched_input; | ||
| std::string activation_op_type; | ||
| }; | ||
|
|
||
| std::shared_ptr<ov::Model> gen_model(const Conv1x1ToMatmulTestParams& p) { | ||
| int input_batch = p.with_batched_input ? 4 : 1; | ||
| auto input = std::make_shared<ov::opset1::Parameter>( | ||
| ov::element::f16, | ||
| (p.activation_op_type == "Reshape" && p.with_act_new_reshape) ? ov::Shape{1, 1, 2, 5} : ov::Shape{1, 1, 1, 10}); | ||
| (p.activation_op_type == "Reshape" && p.with_act_new_reshape) | ||
| ? ov::Shape{(size_t)input_batch, 1, 2, 5} | ||
| : ov::Shape{(size_t)input_batch, 1, 1, 10}); | ||
|
|
||
| std::shared_ptr<ov::Node> act_node; | ||
| if (p.activation_op_type == "Transpose") { | ||
| auto transpose_const = ov::opset1::Constant::create(ov::element::i32, ov::Shape{4}, {0, 3, 1, 2}); | ||
| act_node = std::make_shared<ov::opset1::Transpose>(input, transpose_const); | ||
| } else { | ||
| auto reshape_const = ov::opset1::Constant::create(ov::element::i32, ov::Shape{4}, {1, 10, 1, 1}); | ||
| auto reshape_const = ov::opset1::Constant::create(ov::element::i32, ov::Shape{4}, {input_batch, 10, 1, 1}); | ||
| act_node = std::make_shared<ov::opset1::Reshape>(input, reshape_const, false); | ||
| } | ||
|
|
||
|
|
@@ -118,17 +122,20 @@ std::shared_ptr<ov::Model> gen_model(const Conv1x1ToMatmulTestParams& p) { | |
| auto transpose_const = ov::opset1::Constant::create(ov::element::i32, ov::Shape{4}, {0, 2, 3, 1}); | ||
| out_node = std::make_shared<ov::opset1::Transpose>(current_node, transpose_const); | ||
| } else { | ||
| auto reshape_const = ov::opset1::Constant::create(ov::element::i32, ov::Shape{4}, {1, 1, 1, 15}); | ||
| auto reshape_const = ov::opset1::Constant::create(ov::element::i32, ov::Shape{4}, {input_batch, 1, 1, 15}); | ||
| out_node = std::make_shared<ov::opset1::Reshape>(current_node, reshape_const, false); | ||
| } | ||
|
|
||
| return std::make_shared<ov::Model>(ov::OutputVector{out_node}, params); | ||
| } | ||
|
|
||
| std::shared_ptr<ov::Model> gen_model_ref(const Conv1x1ToMatmulTestParams& p) { | ||
| int input_batch = p.with_batched_input ? 4 : 1; | ||
| auto input = std::make_shared<ov::opset1::Parameter>( | ||
| ov::element::f16, | ||
| (p.activation_op_type == "Reshape" && p.with_act_new_reshape) ? ov::Shape{1, 1, 2, 5} : ov::Shape{1, 1, 1, 10}); | ||
| (p.activation_op_type == "Reshape" && p.with_act_new_reshape) | ||
| ? ov::Shape{(size_t)input_batch, 1, 2, 5} | ||
| : ov::Shape{(size_t)input_batch, 1, 1, 10}); | ||
|
|
||
| std::shared_ptr<ov::Node> weights_node; | ||
| ov::ParameterVector params = {input}; | ||
|
|
@@ -170,23 +177,31 @@ std::shared_ptr<ov::Model> gen_model_ref(const Conv1x1ToMatmulTestParams& p) { | |
|
|
||
| std::shared_ptr<ov::Node> act_node = input; | ||
| if (p.activation_op_type == "Reshape" && p.with_act_new_reshape) { | ||
| auto reshape_const = ov::opset1::Constant::create(ov::element::i64, ov::Shape{4}, {1, 1, 1, 10}); | ||
| auto reshape_const = ov::opset1::Constant::create(ov::element::i64, ov::Shape{4}, {1, 1, input_batch, 10}); | ||
| act_node = std::make_shared<ov::opset1::Reshape>(input, reshape_const, false); | ||
| } | ||
| if (input_batch == 1 || (p.activation_op_type == "Reshape" && p.with_act_new_reshape)) { | ||
| auto squeeze_const = ov::opset1::Constant::create(ov::element::i64, ov::Shape{3}, {1, input_batch, 10}); | ||
| act_node = std::make_shared<ov::opset1::Reshape>(act_node, squeeze_const, false); | ||
| } | ||
| auto matmul = std::make_shared<ov::op::v0::MatMul>(act_node, mul, false, true); | ||
| current_node = matmul; | ||
|
|
||
| if (p.with_bias) { | ||
| auto bias_const = ov::opset1::Constant::create(ov::element::f16, ov::Shape{1, 1, 1, 15}, {1}); | ||
| current_node = std::make_shared<ov::opset1::Add>(matmul, bias_const); | ||
| current_node = std::make_shared<ov::opset1::Add>(current_node, bias_const); | ||
| } | ||
| if (input_batch == 1 || (p.activation_op_type == "Reshape" && p.with_act_new_reshape)) { | ||
| auto unsqueeze_const = ov::opset1::Constant::create(ov::element::i64, ov::Shape{4}, {1, 1, input_batch, 15}); | ||
| current_node = std::make_shared<ov::opset1::Reshape>(current_node, unsqueeze_const, false); | ||
| } | ||
|
Comment on lines
178
to
197
|
||
| if (p.with_convert) { | ||
| current_node = std::make_shared<ov::op::v0::Convert>(current_node, ov::element::f32); | ||
| } | ||
|
|
||
| std::shared_ptr<ov::Node> out_node; | ||
| if (p.activation_op_type == "Reshape") { | ||
| auto reshape_const = ov::opset1::Constant::create(ov::element::i32, ov::Shape{4}, {1, 1, 1, 15}); | ||
| auto reshape_const = ov::opset1::Constant::create(ov::element::i32, ov::Shape{4}, {input_batch, 1, 1, 15}); | ||
| out_node = std::make_shared<ov::opset1::Reshape>(current_node, reshape_const, false); | ||
| } else { | ||
| out_node = current_node; | ||
|
|
@@ -198,16 +213,17 @@ std::shared_ptr<ov::Model> gen_model_ref(const Conv1x1ToMatmulTestParams& p) { | |
|
|
||
| class ConvertWeightCompressedConv1x1ToMatmulTest | ||
| : public TransformationTestsF, | ||
| public WithParamInterface<std::tuple<bool, bool, bool, bool, bool, bool, std::string>> { | ||
| public WithParamInterface<std::tuple<bool, bool, bool, bool, bool, bool, bool, std::string>> { | ||
| public: | ||
| static std::string get_test_case_name( | ||
| const testing::TestParamInfo<std::tuple<bool, bool, bool, bool, bool, bool, std::string>>& obj) { | ||
| const testing::TestParamInfo<std::tuple<bool, bool, bool, bool, bool, bool, bool, std::string>>& obj) { | ||
| const auto& [with_group_quant, | ||
| with_zp, | ||
| with_bias, | ||
| with_convert, | ||
| with_param_weight, | ||
| with_act_new_reshape, | ||
| with_batched_input, | ||
| activation_op_type] = obj.param; | ||
|
|
||
| std::ostringstream result; | ||
|
|
@@ -217,6 +233,7 @@ class ConvertWeightCompressedConv1x1ToMatmulTest | |
| result << "with_convert=" << with_convert << "_"; | ||
| result << "with_param_weight=" << with_param_weight << "_"; | ||
| result << "with_act_new_reshape=" << with_act_new_reshape << "_"; | ||
| result << "with_batched_input=" << with_batched_input << "_"; | ||
| result << "activation_op_type=" << activation_op_type; | ||
| return result.str(); | ||
| } | ||
|
|
@@ -230,13 +247,15 @@ class ConvertWeightCompressedConv1x1ToMatmulTest | |
| with_convert, | ||
| with_param_weight, | ||
| with_act_new_reshape, | ||
| with_batched_input, | ||
| activation_op_type] = GetParam(); | ||
| Conv1x1ToMatmulTestParams params{with_group_quant, | ||
| with_zp, | ||
| with_bias, | ||
| with_convert, | ||
| with_param_weight, | ||
| with_act_new_reshape, | ||
| with_batched_input, | ||
| activation_op_type}; | ||
| model = gen_model(params); | ||
| model_ref = gen_model_ref(params); | ||
|
|
@@ -254,6 +273,7 @@ INSTANTIATE_TEST_SUITE_P(TransformationTests, | |
| ::testing::Bool(), | ||
| ::testing::Bool(), | ||
| ::testing::Bool(), | ||
| ::testing::Bool(), | ||
| ::testing::Values("Transpose", "Reshape")), | ||
| ConvertWeightCompressedConv1x1ToMatmulTest::get_test_case_name); | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
[BLOCKER] The new squeeze/unsqueeze reshape constants rely on
shape_out[-1].get_length(). If the last dimension is dynamic (e.g., input has dynamic channel/feature dim),get_length()throws and the transformation will crash during pass execution. Consider building the reshape pattern without requiring static last dim (e.g., usespecial_zero=truewith a0in the last position to copy the input’s last dimension), and apply the same fix to the unsqueeze block as well.