openvinotoolkit · andrey-golubev · Mar 26, 2026 · Mar 24, 2026
@@ -179,6 +179,54 @@ class WeightsSeparationTests : public ov::test::behavior::OVPluginTestBase,
         return std::make_shared<ov::Model>(ov::OutputVector{add}, parameter_vector);
     }
 
+    // This is a special test model with multiple constants using the same weights buffer.
+    std::shared_ptr<ov::Model> createTestModelWeightlessWithDuplicateConstants() {
+        const std::vector<float> sharedData{1.0f, 2.0f, 3.0f, 4.0f};
+        constexpr auto precision = element::f32;
+
+        auto input1 = std::make_shared<op::v0::Parameter>(precision, Shape{1, 1, 4});
+        auto input2 = std::make_shared<op::v0::Parameter>(precision, Shape{1, 4, 1});
+
+        auto weights1 = std::make_shared<op::v0::Constant>(precision, Shape{1, 1, 4}, sharedData);
+        auto multiply1 = std::make_shared<op::v1::Multiply>(input1, weights1);
+
+        auto weights2 = std::make_shared<op::v0::Constant>(precision, Shape{1, 4, 1}, sharedData);
+        auto multiply2 = std::make_shared<op::v1::Multiply>(input2, weights2);
+
+        auto reshapeWeights = std::make_shared<op::v0::Constant>(element::i64, Shape{3}, std::vector<int64_t>{1, 4, 1});
+        auto reshape = std::make_shared<op::v1::Reshape>(multiply1, reshapeWeights, false);
+
+        auto add = std::make_shared<op::v1::Add>(reshape, multiply2);
+
+        input1->set_friendly_name("input1");
+        input2->set_friendly_name("input2");
+        weights1->set_friendly_name("weights");
+        multiply1->set_friendly_name("multiply1");
+        weights2->set_friendly_name("weights_new_shape");
+        multiply2->set_friendly_name("multiply2");
+        reshapeWeights->set_friendly_name("reshapeWeights");
+        reshape->set_friendly_name("reshape");
+        add->set_friendly_name("add");
+
+        // Note: if this offset is changed, compiled_model->export_model() =>
+        // core->import_model() would fail as the weightless bin offset is
+        // "reset" through this boundary; right now this is by design
+        constexpr size_t theOnlyFunctioningBinOffset = 0;
+        weights1->get_rt_info()[ov::WeightlessCacheAttribute::get_type_info_static()] =
+            ov::WeightlessCacheAttribute(weights1->get_byte_size(),
+                                         theOnlyFunctioningBinOffset,
+                                         weights1->get_element_type());
+        weights2->get_rt_info()[ov::WeightlessCacheAttribute::get_type_info_static()] =
+            ov::WeightlessCacheAttribute(weights2->get_byte_size(),
+                                         theOnlyFunctioningBinOffset,
+                                         weights2->get_element_type());
+
+        auto model =
+            std::make_shared<Model>(OutputVector{add}, ParameterVector{input1, input2}, "duplicate_weights_model");
+        ov::util::set_tensors_names(AUTO, *model, {}, {{0, {"add"}}});
+        return model;
+    }
+
     Tensor from_stream(std::istream& stream, const size_t size) {
         ov::Tensor t(element::from<char>(), Shape{size});
         stream.read(t.data<char>(), size);
@@ -559,6 +607,38 @@ void WeightsSeparationTests::runCorrectInferenceResultIfCannotCompileAsWeightles
     OV_ASSERT_NO_THROW(utils::compare(expected, output));
 }
 
+TEST_P(WeightsSeparationTests, CorrectInferenceResultWeightlessWithDuplicateConstants) {
+    model = createTestModelWeightlessWithDuplicateConstants();
+    configuration.insert(ov::intel_npu::weightless_blob(true));
+
+    model_path = ov::util::path_join({utils::getCurrentWorkingDir(), utils::generateTestFilePrefix()}).string();
+    ov::serialize(model, model_path + ".xml", model_path + ".bin");
+
+    // compilation should succeed
+    OV_ASSERT_NO_THROW(compiled_model = core->compile_model(model, target_device, configuration));
+    ASSERT_TRUE(compiled_model);
+
+    std::stringstream export_stream;
+    compiled_model.export_model(export_stream);
+
+    configuration.insert(ov::weights_path(model_path + ".bin"));
+    OV_ASSERT_NO_THROW(compiled_model = core->import_model(export_stream, target_device, configuration));
+    ASSERT_TRUE(compiled_model);
+
+    // inference should also succeed
+    const ov::Tensor input1 = utils::create_tensor(element::f32, Shape{1, 1, 4}, std::vector<float>(4, -2.0f));
+    const ov::Tensor input2 = utils::create_tensor(element::f32, Shape{1, 4, 1}, std::vector<float>(4, 0.0f));
+    OV_ASSERT_NO_THROW(inference_request = compiled_model.create_infer_request());
+    OV_ASSERT_NO_THROW(inference_request.set_tensor("input1", input1));
+    OV_ASSERT_NO_THROW(inference_request.set_tensor("input2", input2));
+    OV_ASSERT_NO_THROW(inference_request.infer());
+
+    const ov::Tensor expected =
+        utils::create_tensor(element::f32, Shape{1, 4, 1}, std::vector<float>{-2.0f, -4.0f, -6.0f, -8.0f});
+    const ov::Tensor output = inference_request.get_tensor("add");
+    OV_ASSERT_NO_THROW(utils::compare(expected, output));
+}
+
 using WeightsSeparationOneShotTests = WeightsSeparationTests;
 
 /**