Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,54 @@ class WeightsSeparationTests : public ov::test::behavior::OVPluginTestBase,
return std::make_shared<ov::Model>(ov::OutputVector{add}, parameter_vector);
}

// This is a special test model with multiple constants using the same weights buffer.
std::shared_ptr<ov::Model> createTestModelWeightlessWithDuplicateConstants() {
const std::vector<float> sharedData{1.0f, 2.0f, 3.0f, 4.0f};
constexpr auto precision = element::f32;

auto input1 = std::make_shared<op::v0::Parameter>(precision, Shape{1, 1, 4});
auto input2 = std::make_shared<op::v0::Parameter>(precision, Shape{1, 4, 1});

auto weights1 = std::make_shared<op::v0::Constant>(precision, Shape{1, 1, 4}, sharedData);
auto multiply1 = std::make_shared<op::v1::Multiply>(input1, weights1);

auto weights2 = std::make_shared<op::v0::Constant>(precision, Shape{1, 4, 1}, sharedData);
auto multiply2 = std::make_shared<op::v1::Multiply>(input2, weights2);

auto reshapeWeights = std::make_shared<op::v0::Constant>(element::i64, Shape{3}, std::vector<int64_t>{1, 4, 1});
auto reshape = std::make_shared<op::v1::Reshape>(multiply1, reshapeWeights, false);

auto add = std::make_shared<op::v1::Add>(reshape, multiply2);

input1->set_friendly_name("input1");
input2->set_friendly_name("input2");
weights1->set_friendly_name("weights");
multiply1->set_friendly_name("multiply1");
weights2->set_friendly_name("weights_new_shape");
multiply2->set_friendly_name("multiply2");
reshapeWeights->set_friendly_name("reshapeWeights");
reshape->set_friendly_name("reshape");
add->set_friendly_name("add");

// Note: if this offset is changed, compiled_model->export_model() =>
// core->import_model() would fail as the weightless bin offset is
// "reset" through this boundary; right now this is by design
constexpr size_t theOnlyFunctioningBinOffset = 0;
weights1->get_rt_info()[ov::WeightlessCacheAttribute::get_type_info_static()] =
ov::WeightlessCacheAttribute(weights1->get_byte_size(),
theOnlyFunctioningBinOffset,
weights1->get_element_type());
weights2->get_rt_info()[ov::WeightlessCacheAttribute::get_type_info_static()] =
ov::WeightlessCacheAttribute(weights2->get_byte_size(),
theOnlyFunctioningBinOffset,
weights2->get_element_type());

auto model =
std::make_shared<Model>(OutputVector{add}, ParameterVector{input1, input2}, "duplicate_weights_model");
ov::util::set_tensors_names(AUTO, *model, {}, {{0, {"add"}}});
return model;
}

Tensor from_stream(std::istream& stream, const size_t size) {
ov::Tensor t(element::from<char>(), Shape{size});
stream.read(t.data<char>(), size);
Expand Down Expand Up @@ -559,6 +607,38 @@ void WeightsSeparationTests::runCorrectInferenceResultIfCannotCompileAsWeightles
OV_ASSERT_NO_THROW(utils::compare(expected, output));
}

TEST_P(WeightsSeparationTests, CorrectInferenceResultWeightlessWithDuplicateConstants) {
model = createTestModelWeightlessWithDuplicateConstants();
configuration.insert(ov::intel_npu::weightless_blob(true));

model_path = ov::util::path_join({utils::getCurrentWorkingDir(), utils::generateTestFilePrefix()}).string();
ov::serialize(model, model_path + ".xml", model_path + ".bin");

// compilation should succeed
OV_ASSERT_NO_THROW(compiled_model = core->compile_model(model, target_device, configuration));
ASSERT_TRUE(compiled_model);

std::stringstream export_stream;
compiled_model.export_model(export_stream);

configuration.insert(ov::weights_path(model_path + ".bin"));
OV_ASSERT_NO_THROW(compiled_model = core->import_model(export_stream, target_device, configuration));
ASSERT_TRUE(compiled_model);

// inference should also succeed
const ov::Tensor input1 = utils::create_tensor(element::f32, Shape{1, 1, 4}, std::vector<float>(4, -2.0f));
const ov::Tensor input2 = utils::create_tensor(element::f32, Shape{1, 4, 1}, std::vector<float>(4, 0.0f));
OV_ASSERT_NO_THROW(inference_request = compiled_model.create_infer_request());
OV_ASSERT_NO_THROW(inference_request.set_tensor("input1", input1));
OV_ASSERT_NO_THROW(inference_request.set_tensor("input2", input2));
OV_ASSERT_NO_THROW(inference_request.infer());

const ov::Tensor expected =
utils::create_tensor(element::f32, Shape{1, 4, 1}, std::vector<float>{-2.0f, -4.0f, -6.0f, -8.0f});
const ov::Tensor output = inference_request.get_tensor("add");
OV_ASSERT_NO_THROW(utils::compare(expected, output));
}

using WeightsSeparationOneShotTests = WeightsSeparationTests;

/**
Expand Down
Loading