Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@

#include "intel_npu/common/icompiled_model.hpp"
#include "intel_npu/common/igraph.hpp"
#include "intel_npu/common/network_metadata.hpp"
#include "intel_npu/common/npu.hpp"
#include "intel_npu/network_metadata.hpp"
#include "intel_npu/utils/logger/logger.hpp"
#include "intel_npu/utils/zero/zero_tensor.hpp"
#include "zero_pipeline.hpp"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
#include <vector>

#include "intel_npu/common/filtered_config.hpp"
#include "intel_npu/network_metadata.hpp"
#include "intel_npu/common/network_metadata.hpp"
#include "intel_npu/utils/zero/zero_wrappers.hpp"
#include "openvino/runtime/itensor.hpp"
#include "openvino/runtime/profiling_info.hpp"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -158,26 +158,4 @@ struct NetworkMetadata final {
void bindRelatedDescriptors();
};

/**
* @struct NetworkDescription
* @brief The object returned by the compiler
* to provide such information about a network as description of inputs and outputs,
* name and compiled network in a format executable by device
*/
struct NetworkDescription final {
NetworkDescription(ov::Tensor&& compiledNetWorkTensor, NetworkMetadata&& metadata)
: metadata(std::move(metadata)),
compiledNetworkTensor(std::move(compiledNetWorkTensor)) {}
// Force move semantics to prevent blob copies
NetworkDescription(const NetworkDescription&) = delete;
NetworkDescription(NetworkDescription&&) = default;
NetworkDescription& operator=(const NetworkDescription&) = delete;
NetworkDescription& operator=(NetworkDescription&&) = default;
~NetworkDescription() = default;

NetworkMetadata metadata;

ov::Tensor compiledNetworkTensor;
};

} // namespace intel_npu
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
// SPDX-License-Identifier: Apache-2.0
//

#include "intel_npu/network_metadata.hpp"
#include "intel_npu/common/network_metadata.hpp"

namespace intel_npu {

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

#include "compiler.h"
#include "intel_npu/common/filtered_config.hpp"
#include "intel_npu/network_metadata.hpp"
#include "intel_npu/common/network_metadata.hpp"
#include "openvino/core/except.hpp"

namespace intel_npu {
Expand All @@ -26,18 +26,18 @@ class VCLCompilerImpl final : public std::enable_shared_from_this<VCLCompilerImp
* @param model a shared pointer to the OpenVINO model to be compiled
* @param config a reference to NPUConfig containing plugin config options
* including config options related to compilation
* @return a shared pointer on an object implementing NetworkDescription interface
* @return an ov::Tensor object containing the blob of the compiled model
*/
NetworkDescription compile(const std::shared_ptr<const ov::Model>& model, const FilteredConfig& config) const;
ov::Tensor compile(const std::shared_ptr<const ov::Model>& model, const FilteredConfig& config) const;

/**
* @brief Compiles the model, weights separation enabled. All init schedules along with the main one are compiled in
* the same scope.
* @return A "NetworkDescription" object for each init schedule, followed by another one corresponding to the main
* @return An ov::Tensor object for each init schedule, followed by another one corresponding to the main
* part.
*/
std::vector<std::shared_ptr<NetworkDescription>> compileWsOneShot(const std::shared_ptr<ov::Model>& model,
const FilteredConfig& config) const;
std::vector<ov::Tensor> compileWsOneShot(const std::shared_ptr<ov::Model>& model,
const FilteredConfig& config) const;
/**
* @brief Sequential compilation of Init(s) and Main
*
Expand All @@ -52,9 +52,9 @@ class VCLCompilerImpl final : public std::enable_shared_from_this<VCLCompilerImp
* Compiler should somehow understand wich Init(or Main) to return
* Plugin does not know total numbers of Init schedules
*/
NetworkDescription compileWsIterative(const std::shared_ptr<ov::Model>& model,
const FilteredConfig& config,
size_t callNumber) const;
ov::Tensor compileWsIterative(const std::shared_ptr<ov::Model>& model,
const FilteredConfig& config,
size_t callNumber) const;
/**
* @brief Returns information about supported layers of the network passed
* @param model The model to be queried
Expand Down Expand Up @@ -102,9 +102,9 @@ class VCLCompilerImpl final : public std::enable_shared_from_this<VCLCompilerImp
* "WeightlessCacheAttribute" may be stored within the serialized model if requested.
* @note Storing the "WeightlessCacheAttribute" is necessary if the "weights separation" flow is being used.
*/
NetworkDescription compile(const std::shared_ptr<const ov::Model>& model,
const FilteredConfig& config,
const bool storeWeightlessCacheAttributeFlag) const;
ov::Tensor compile(const std::shared_ptr<const ov::Model>& model,
const FilteredConfig& config,
const bool storeWeightlessCacheAttributeFlag) const;

vcl_log_handle_t _logHandle = nullptr;
vcl_compiler_handle_t _compilerHandle = nullptr;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
#include <ze_graph_ext.h>

#include "intel_npu/common/idynamic_graph.hpp"
#include "intel_npu/network_metadata.hpp"
#include "intel_npu/common/network_metadata.hpp"
#include "intel_npu/utils/zero/zero_init.hpp"
#include "npu_vm_runtime_api.hpp"
#include "openvino/runtime/so_ptr.hpp"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,6 @@ class WeightlessGraph final : public Graph {
std::vector<std::unique_ptr<CommandList>> _initsCommandLists;
std::vector<std::unique_ptr<Fence>> _initsFences;
std::shared_ptr<CommandQueue> _initsCommandQueue;
uint32_t _initsCommandQueueGroupOrdinal = 0;

/**
* @brief Tensors holding the L0 buffers corresponding to the inputs of the main schedule.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

#pragma once

#include "intel_npu/network_metadata.hpp"
#include "intel_npu/common/network_metadata.hpp"

namespace intel_npu {

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
#include <unordered_set>
#include <vector>

#include "intel_npu/network_metadata.hpp"
#include "intel_npu/common/network_metadata.hpp"
#include "intel_npu/utils/logger/logger.hpp"
#include "intel_npu/utils/zero/zero_init.hpp"
#include "model_serializer.hpp"
Expand Down
36 changes: 13 additions & 23 deletions src/plugins/intel_npu/src/compiler_adapter/src/compiler_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -372,14 +372,13 @@ std::shared_ptr<void> VCLCompilerImpl::getLinkedLibrary() const {
return VCLApi::getInstance()->getLibrary();
}

NetworkDescription VCLCompilerImpl::compile(const std::shared_ptr<const ov::Model>& model,
const FilteredConfig& config) const {
ov::Tensor VCLCompilerImpl::compile(const std::shared_ptr<const ov::Model>& model, const FilteredConfig& config) const {
return compile(model, config, false);
}

NetworkDescription VCLCompilerImpl::compile(const std::shared_ptr<const ov::Model>& model,
const FilteredConfig& config,
const bool storeWeightlessCacheAttributeFlag) const {
ov::Tensor VCLCompilerImpl::compile(const std::shared_ptr<const ov::Model>& model,
const FilteredConfig& config,
const bool storeWeightlessCacheAttributeFlag) const {
_logger.debug("compile start");

/// Check the linked vcl version whether supported in plugin
Expand Down Expand Up @@ -454,22 +453,17 @@ NetworkDescription VCLCompilerImpl::compile(const std::shared_ptr<const ov::Mode
allocator.m_size,
static_cast<void*>(allocator.m_allocated));

// Use empty metadata as VCL does not support metadata extraction
NetworkMetadata metadata;

_logger.debug("compile end, blob size:%d", allocator.m_size);
return NetworkDescription(make_tensor_from_aligned_addr(allocator.m_allocated, allocator.m_size),
std::move(metadata));
return make_tensor_from_aligned_addr(allocator.m_allocated, allocator.m_size);
} else {
OPENVINO_THROW("Not supported VCL version: %d.%d, please use VCL 6.1 or later",
_vclVersion.major,
_vclVersion.minor);
}
}

std::vector<std::shared_ptr<NetworkDescription>> VCLCompilerImpl::compileWsOneShot(
const std::shared_ptr<ov::Model>& model,
const FilteredConfig& config) const {
std::vector<ov::Tensor> VCLCompilerImpl::compileWsOneShot(const std::shared_ptr<ov::Model>& model,
const FilteredConfig& config) const {
_logger.debug("compileWsOneShot start");

/// Check the linked vcl version whether supported in plugin
Expand Down Expand Up @@ -529,20 +523,16 @@ std::vector<std::shared_ptr<NetworkDescription>> VCLCompilerImpl::compileWsOneSh
OPENVINO_THROW("Failed to create VCL executable, blobCount is zero");
}

std::vector<std::shared_ptr<NetworkDescription>> networkDescrs;
std::vector<ov::Tensor> initMainTensors;
for (auto& blob : allocator.m_info) {
// Use empty metadata as VCL does not support metadata extraction
NetworkMetadata metadata;
networkDescrs.emplace_back(
std::make_shared<NetworkDescription>(make_tensor_from_aligned_addr(blob.first, blob.second),
std::move(metadata)));
initMainTensors.emplace_back(make_tensor_from_aligned_addr(blob.first, blob.second));
}
return networkDescrs;
return initMainTensors;
}

NetworkDescription VCLCompilerImpl::compileWsIterative(const std::shared_ptr<ov::Model>& model,
const FilteredConfig& config,
size_t callNumber) const {
ov::Tensor VCLCompilerImpl::compileWsIterative(const std::shared_ptr<ov::Model>& model,
const FilteredConfig& config,
size_t callNumber) const {
_logger.debug("compileWsIterative start");
FilteredConfig updatedConfig = config;
updatedConfig.update({{ov::intel_npu::ws_compile_call_number.name(), std::to_string(callNumber)}});
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,12 +64,9 @@ std::shared_ptr<IGraph> PluginCompilerAdapter::compile(const std::shared_ptr<con
OV_ITT_TASK_CHAIN(COMPILE_BLOB, itt::domains::NPUPlugin, "PluginCompilerAdapter", "compile");

_logger.debug("compile start");
auto networkDesc = _compiler->compile(model, config);
auto tensor = _compiler->compile(model, config);
_logger.debug("compile end");

ov::Tensor tensor;
tensor = std::move(networkDesc.compiledNetworkTensor);

if (config.get<COMPILATION_MODE>() == "HostCompile") {
// metadata will be obtained in initialze() of DynamicGraph
_logger.debug("Use dynamicGraph to hold blob for HostCompile mode!");
Expand Down Expand Up @@ -133,20 +130,17 @@ std::shared_ptr<IGraph> PluginCompilerAdapter::compileWS(std::shared_ptr<ov::Mod

switch (localConfig.get<SEPARATE_WEIGHTS_VERSION>()) {
case ov::intel_npu::WSVersion::ONE_SHOT: {
std::vector<std::shared_ptr<NetworkDescription>> initMainNetworkDescriptions =
_compiler->compileWsOneShot(model, localConfig);
std::vector<ov::Tensor> initMainTensors = _compiler->compileWsOneShot(model, localConfig);

std::shared_ptr<NetworkDescription> mainNetworkDescription = initMainNetworkDescriptions.back();
initMainNetworkDescriptions.pop_back();
if (initMainNetworkDescriptions.empty()) {
auto tensorMain = initMainTensors.back();
initMainTensors.pop_back();
if (initMainTensors.empty()) {
_logger.warning("NPU compiler did not produce any init schedules. "
"This likely means that the compiled model blob has weights inside even "
"though weightless compilation was requested.");
}

std::vector<std::shared_ptr<NetworkDescription>> initNetworkDescriptions =
std::move(initMainNetworkDescriptions);
tensorMain = std::move(mainNetworkDescription->compiledNetworkTensor);
tensorsInits = std::move(initMainTensors);

if (_zeGraphExt) {
// Depending on the config, we may get an error when trying to
Expand All @@ -164,13 +158,9 @@ std::shared_ptr<IGraph> PluginCompilerAdapter::compileWS(std::shared_ptr<ov::Mod
"No driver is found, zeGraphExt is nullptr, so metadata is empty. Only exports are available");
}

initGraphDescriptors.reserve(initNetworkDescriptions.size());
tensorsInits.reserve(initNetworkDescriptions.size());
initNetworkMetadata.reserve(initNetworkDescriptions.size());
for (auto& networkDesc : initNetworkDescriptions) {
ov::Tensor tensor;
tensor = std::move(networkDesc->compiledNetworkTensor);

initGraphDescriptors.reserve(tensorsInits.size());
initNetworkMetadata.reserve(tensorsInits.size());
for (const auto& tensor : tensorsInits) {
GraphDescriptor initGraphDesc;
NetworkMetadata initNetworkMeta;
if (_zeGraphExt) {
Expand Down Expand Up @@ -203,11 +193,7 @@ std::shared_ptr<IGraph> PluginCompilerAdapter::compileWS(std::shared_ptr<ov::Mod
std::shared_ptr<ov::Model> targetModel = model;
size_t i = 0;

while (auto networkDescription =
std::make_shared<NetworkDescription>(_compiler->compileWsIterative(targetModel, localConfig, i++))) {
ov::Tensor tensor;
tensor = std::move(networkDescription->compiledNetworkTensor);

while (auto tensor = _compiler->compileWsIterative(targetModel, localConfig, i++)) {
GraphDescriptor graphDesc = _zeGraphExt->getGraphDescriptor(tensor.data(), tensor.get_byte_size());
NetworkMetadata networkMetadata = _zeGraphExt->getNetworkMeta(graphDesc);

Expand Down
Loading