openvinotoolkit · mdvoretc-intel · Mar 25, 2026 · Mar 26, 2026 · Mar 27, 2026 · Mar 30, 2026
@@ -233,6 +233,22 @@ ov::pass::ConvertWeightCompressedConv1x1ToMatmul::ConvertWeightCompressedConv1x1
             }
         }
 
+        // If the activation has a static leading dimension of 1, squeeze it.
+        // This is done to allow pre-selection of OCL implementations for non-IMMAD devices, reducing memory pressure.
+        bool squeeze_activation = false;
+        if (activation->get_output_partial_shape(0)[0].is_static() && activation->get_output_partial_shape(0)[0] == 1) {
+            squeeze_activation = true;
+            auto shape_out = activation->get_output_partial_shape(0);
+            auto squeeze_const =
+                std::make_shared<ov::op::v0::Constant>(ov::element::i64,
+                                                       ov::Shape{3},
+                                                       std::vector<int64_t>{1, -1, shape_out[-1].get_length()});
+            auto squeeze = std::make_shared<ov::op::v1::Reshape>(activation, squeeze_const, false);
-            auto shape_out = activation->get_output_partial_shape(0);
-            auto squeeze_const =
-                std::make_shared<ov::op::v0::Constant>(ov::element::i64,
-                                                       ov::Shape{3},
-                                                       std::vector<int64_t>{1, -1, shape_out[-1].get_length()});
-            MatcherPass::register_new_node(squeeze_const);
-            auto squeeze = std::make_shared<ov::op::v1::Reshape>(activation, squeeze_const, false);
+            // Reshape [1, D1, ..., Dn] -> [1, -1, Dn] while safely copying the last dimension
+            // without requiring it to be statically known.
+            auto squeeze_const =
+                std::make_shared<ov::op::v0::Constant>(ov::element::i64,
+                                                       ov::Shape{3},
+                                                       std::vector<int64_t>{1, -1, 0});
+            MatcherPass::register_new_node(squeeze_const);
+            auto squeeze = std::make_shared<ov::op::v1::Reshape>(activation, squeeze_const, true);
-            auto shape_out = activation->get_output_partial_shape(0);
-            auto squeeze_const =
-                std::make_shared<ov::op::v0::Constant>(ov::element::i64,
-                                                       ov::Shape{3},
-                                                       std::vector<int64_t>{1, -1, shape_out[-1].get_length()});
-            MatcherPass::register_new_node(squeeze_const);
-            auto squeeze = std::make_shared<ov::op::v1::Reshape>(activation, squeeze_const, false);
+            // Reshape [1, D1, ..., Dn] -> [1, -1, Dn] while safely copying the last dimension
+            // without requiring it to be statically known.
+            auto squeeze_const =
+                std::make_shared<ov::op::v0::Constant>(ov::element::i64,
+                                                       ov::Shape{3},
+                                                       std::vector<int64_t>{1, -1, 0});
+            MatcherPass::register_new_node(squeeze_const);
+            auto squeeze = std::make_shared<ov::op::v1::Reshape>(activation, squeeze_const, true);
+            ov::copy_runtime_info(activation, squeeze);
+            squeeze->set_friendly_name(activation->get_friendly_name() + "_squeeze");
+            activation = squeeze;
+        }
+
         auto matmul = std::make_shared<ov::op::v0::MatMul>(activation, scaled_weight, false, true);
         ov::copy_runtime_info(conv1x1, matmul);
         std::shared_ptr<Node> matmul_out;
@@ -258,6 +274,18 @@ ov::pass::ConvertWeightCompressedConv1x1ToMatmul::ConvertWeightCompressedConv1x1
             matmul_out = matmul;
         }
 
+        if (squeeze_activation) {
+            auto shape_out = matmul_out->get_output_partial_shape(0);
+            auto unsqueeze_const =
+                std::make_shared<ov::op::v0::Constant>(ov::element::i64,
+                                                       ov::Shape{4},
+                                                       std::vector<int64_t>{1, 1, -1, shape_out[-1].get_length()});
+            auto unsqueeze = std::make_shared<ov::op::v1::Reshape>(matmul_out, unsqueeze_const, false);
+            ov::copy_runtime_info(matmul_out, unsqueeze);
+            unsqueeze->set_friendly_name(matmul_out->get_friendly_name() + "_unsqueeze");
+            matmul_out = unsqueeze;
+        }
+
         if (reshape_out) {
             if (convert_out) {
                 auto convert_final = convert_out->clone_with_new_inputs({matmul_out});

@@ -41,20 +41,24 @@ struct Conv1x1ToMatmulTestParams {
     bool with_convert;
     bool with_param_weight;
     bool with_act_new_reshape;
+    bool with_batched_input;
     std::string activation_op_type;
 };
 
 std::shared_ptr<ov::Model> gen_model(const Conv1x1ToMatmulTestParams& p) {
+    int input_batch = p.with_batched_input ? 4 : 1;
     auto input = std::make_shared<ov::opset1::Parameter>(
         ov::element::f16,
-        (p.activation_op_type == "Reshape" && p.with_act_new_reshape) ? ov::Shape{1, 1, 2, 5} : ov::Shape{1, 1, 1, 10});
+        (p.activation_op_type == "Reshape" && p.with_act_new_reshape)
+         ? ov::Shape{(size_t)input_batch, 1, 2, 5}
+         : ov::Shape{(size_t)input_batch, 1, 1, 10});
 
     std::shared_ptr<ov::Node> act_node;
     if (p.activation_op_type == "Transpose") {
         auto transpose_const = ov::opset1::Constant::create(ov::element::i32, ov::Shape{4}, {0, 3, 1, 2});
         act_node = std::make_shared<ov::opset1::Transpose>(input, transpose_const);
     } else {
-        auto reshape_const = ov::opset1::Constant::create(ov::element::i32, ov::Shape{4}, {1, 10, 1, 1});
+        auto reshape_const = ov::opset1::Constant::create(ov::element::i32, ov::Shape{4}, {input_batch, 10, 1, 1});
         act_node = std::make_shared<ov::opset1::Reshape>(input, reshape_const, false);
     }
 
@@ -118,17 +122,20 @@ std::shared_ptr<ov::Model> gen_model(const Conv1x1ToMatmulTestParams& p) {
         auto transpose_const = ov::opset1::Constant::create(ov::element::i32, ov::Shape{4}, {0, 2, 3, 1});
         out_node = std::make_shared<ov::opset1::Transpose>(current_node, transpose_const);
     } else {
-        auto reshape_const = ov::opset1::Constant::create(ov::element::i32, ov::Shape{4}, {1, 1, 1, 15});
+        auto reshape_const = ov::opset1::Constant::create(ov::element::i32, ov::Shape{4}, {input_batch, 1, 1, 15});
         out_node = std::make_shared<ov::opset1::Reshape>(current_node, reshape_const, false);
     }
 
     return std::make_shared<ov::Model>(ov::OutputVector{out_node}, params);
 }
 
 std::shared_ptr<ov::Model> gen_model_ref(const Conv1x1ToMatmulTestParams& p) {
+    int input_batch = p.with_batched_input ? 4 : 1;
     auto input = std::make_shared<ov::opset1::Parameter>(
         ov::element::f16,
-        (p.activation_op_type == "Reshape" && p.with_act_new_reshape) ? ov::Shape{1, 1, 2, 5} : ov::Shape{1, 1, 1, 10});
+        (p.activation_op_type == "Reshape" && p.with_act_new_reshape)
+         ? ov::Shape{(size_t)input_batch, 1, 2, 5}
+         : ov::Shape{(size_t)input_batch, 1, 1, 10});
 
     std::shared_ptr<ov::Node> weights_node;
     ov::ParameterVector params = {input};
@@ -170,23 +177,31 @@ std::shared_ptr<ov::Model> gen_model_ref(const Conv1x1ToMatmulTestParams& p) {
 
     std::shared_ptr<ov::Node> act_node = input;
     if (p.activation_op_type == "Reshape" && p.with_act_new_reshape) {
-        auto reshape_const = ov::opset1::Constant::create(ov::element::i64, ov::Shape{4}, {1, 1, 1, 10});
+        auto reshape_const = ov::opset1::Constant::create(ov::element::i64, ov::Shape{4}, {1, 1, input_batch, 10});
         act_node = std::make_shared<ov::opset1::Reshape>(input, reshape_const, false);
     }
+    if (input_batch == 1 || (p.activation_op_type == "Reshape" && p.with_act_new_reshape)) {
+        auto squeeze_const = ov::opset1::Constant::create(ov::element::i64, ov::Shape{3}, {1, input_batch, 10});
+        act_node = std::make_shared<ov::opset1::Reshape>(act_node, squeeze_const, false);
+    }
     auto matmul = std::make_shared<ov::op::v0::MatMul>(act_node, mul, false, true);
     current_node = matmul;
 
     if (p.with_bias) {
         auto bias_const = ov::opset1::Constant::create(ov::element::f16, ov::Shape{1, 1, 1, 15}, {1});
-        current_node = std::make_shared<ov::opset1::Add>(matmul, bias_const);
+        current_node = std::make_shared<ov::opset1::Add>(current_node, bias_const);
+    }
+    if (input_batch == 1 || (p.activation_op_type == "Reshape" && p.with_act_new_reshape)) {
+        auto unsqueeze_const = ov::opset1::Constant::create(ov::element::i64, ov::Shape{4}, {1, 1, input_batch, 15});
+        current_node = std::make_shared<ov::opset1::Reshape>(current_node, unsqueeze_const, false);
     }
     if (p.with_convert) {
         current_node = std::make_shared<ov::op::v0::Convert>(current_node, ov::element::f32);
     }
 
     std::shared_ptr<ov::Node> out_node;
     if (p.activation_op_type == "Reshape") {
-        auto reshape_const = ov::opset1::Constant::create(ov::element::i32, ov::Shape{4}, {1, 1, 1, 15});
+        auto reshape_const = ov::opset1::Constant::create(ov::element::i32, ov::Shape{4}, {input_batch, 1, 1, 15});
         out_node = std::make_shared<ov::opset1::Reshape>(current_node, reshape_const, false);
     } else {
         out_node = current_node;
@@ -198,16 +213,17 @@ std::shared_ptr<ov::Model> gen_model_ref(const Conv1x1ToMatmulTestParams& p) {
 
 class ConvertWeightCompressedConv1x1ToMatmulTest
     : public TransformationTestsF,
-      public WithParamInterface<std::tuple<bool, bool, bool, bool, bool, bool, std::string>> {
+      public WithParamInterface<std::tuple<bool, bool, bool, bool, bool, bool, bool, std::string>> {
 public:
     static std::string get_test_case_name(
-        const testing::TestParamInfo<std::tuple<bool, bool, bool, bool, bool, bool, std::string>>& obj) {
+        const testing::TestParamInfo<std::tuple<bool, bool, bool, bool, bool, bool, bool, std::string>>& obj) {
         const auto& [with_group_quant,
                      with_zp,
                      with_bias,
                      with_convert,
                      with_param_weight,
                      with_act_new_reshape,
+                     with_batched_input,
                      activation_op_type] = obj.param;
 
         std::ostringstream result;
@@ -217,6 +233,7 @@ class ConvertWeightCompressedConv1x1ToMatmulTest
         result << "with_convert=" << with_convert << "_";
         result << "with_param_weight=" << with_param_weight << "_";
         result << "with_act_new_reshape=" << with_act_new_reshape << "_";
+        result << "with_batched_input=" << with_batched_input << "_";
         result << "activation_op_type=" << activation_op_type;
         return result.str();
     }
@@ -230,13 +247,15 @@ class ConvertWeightCompressedConv1x1ToMatmulTest
                      with_convert,
                      with_param_weight,
                      with_act_new_reshape,
+                     with_batched_input,
                      activation_op_type] = GetParam();
         Conv1x1ToMatmulTestParams params{with_group_quant,
                                          with_zp,
                                          with_bias,
                                          with_convert,
                                          with_param_weight,
                                          with_act_new_reshape,
+                                         with_batched_input,
                                          activation_op_type};
         model = gen_model(params);
         model_ref = gen_model_ref(params);
@@ -254,6 +273,7 @@ INSTANTIATE_TEST_SUITE_P(TransformationTests,
                                             ::testing::Bool(),
                                             ::testing::Bool(),
                                             ::testing::Bool(),
+                                            ::testing::Bool(),
                                             ::testing::Values("Transpose", "Reshape")),
                          ConvertWeightCompressedConv1x1ToMatmulTest::get_test_case_name);