diff --git a/src/plugins/intel_npu/src/backend/include/zero_infer_request.hpp b/src/plugins/intel_npu/src/backend/include/zero_infer_request.hpp index d21aed69607238..75c497aa5c10fd 100644 --- a/src/plugins/intel_npu/src/backend/include/zero_infer_request.hpp +++ b/src/plugins/intel_npu/src/backend/include/zero_infer_request.hpp @@ -6,8 +6,8 @@ #include "intel_npu/common/icompiled_model.hpp" #include "intel_npu/common/igraph.hpp" +#include "intel_npu/common/network_metadata.hpp" #include "intel_npu/common/npu.hpp" -#include "intel_npu/network_metadata.hpp" #include "intel_npu/utils/logger/logger.hpp" #include "intel_npu/utils/zero/zero_tensor.hpp" #include "zero_pipeline.hpp" diff --git a/src/plugins/intel_npu/src/common/include/intel_npu/common/igraph.hpp b/src/plugins/intel_npu/src/common/include/intel_npu/common/igraph.hpp index 2c477d061c7ec7..cbce2ebe143e35 100644 --- a/src/plugins/intel_npu/src/common/include/intel_npu/common/igraph.hpp +++ b/src/plugins/intel_npu/src/common/include/intel_npu/common/igraph.hpp @@ -10,7 +10,7 @@ #include #include "intel_npu/common/filtered_config.hpp" -#include "intel_npu/network_metadata.hpp" +#include "intel_npu/common/network_metadata.hpp" #include "intel_npu/utils/zero/zero_wrappers.hpp" #include "openvino/runtime/itensor.hpp" #include "openvino/runtime/profiling_info.hpp" diff --git a/src/plugins/intel_npu/src/al/include/intel_npu/network_metadata.hpp b/src/plugins/intel_npu/src/common/include/intel_npu/common/network_metadata.hpp similarity index 87% rename from src/plugins/intel_npu/src/al/include/intel_npu/network_metadata.hpp rename to src/plugins/intel_npu/src/common/include/intel_npu/common/network_metadata.hpp index 6133f1efac69b2..e64fe319d5afb9 100644 --- a/src/plugins/intel_npu/src/al/include/intel_npu/network_metadata.hpp +++ b/src/plugins/intel_npu/src/common/include/intel_npu/common/network_metadata.hpp @@ -158,26 +158,4 @@ struct NetworkMetadata final { void bindRelatedDescriptors(); }; -/** - * @struct NetworkDescription - * @brief The object returned by the compiler - * to provide such information about a network as description of inputs and outputs, - * name and compiled network in a format executable by device - */ -struct NetworkDescription final { - NetworkDescription(ov::Tensor&& compiledNetWorkTensor, NetworkMetadata&& metadata) - : metadata(std::move(metadata)), - compiledNetworkTensor(std::move(compiledNetWorkTensor)) {} - // Force move semantics to prevent blob copies - NetworkDescription(const NetworkDescription&) = delete; - NetworkDescription(NetworkDescription&&) = default; - NetworkDescription& operator=(const NetworkDescription&) = delete; - NetworkDescription& operator=(NetworkDescription&&) = default; - ~NetworkDescription() = default; - - NetworkMetadata metadata; - - ov::Tensor compiledNetworkTensor; -}; - } // namespace intel_npu diff --git a/src/plugins/intel_npu/src/al/src/network_metadata.cpp b/src/plugins/intel_npu/src/common/src/network_metadata.cpp similarity index 97% rename from src/plugins/intel_npu/src/al/src/network_metadata.cpp rename to src/plugins/intel_npu/src/common/src/network_metadata.cpp index 5d59a6afee3d12..b27475fd69057b 100644 --- a/src/plugins/intel_npu/src/al/src/network_metadata.cpp +++ b/src/plugins/intel_npu/src/common/src/network_metadata.cpp @@ -2,7 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "intel_npu/network_metadata.hpp" +#include "intel_npu/common/network_metadata.hpp" namespace intel_npu { diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/compiler_impl.hpp b/src/plugins/intel_npu/src/compiler_adapter/include/compiler_impl.hpp index e4242dc01770ff..ef43e128027ee3 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/include/compiler_impl.hpp +++ b/src/plugins/intel_npu/src/compiler_adapter/include/compiler_impl.hpp @@ -9,8 +9,11 @@ #include "compiler.h" #include "intel_npu/common/filtered_config.hpp" -#include "intel_npu/network_metadata.hpp" #include "openvino/core/except.hpp" +#include "openvino/core/model.hpp" +#include "openvino/runtime/common.hpp" +#include "openvino/runtime/profiling_info.hpp" +#include "openvino/runtime/tensor.hpp" namespace intel_npu { @@ -26,18 +29,18 @@ class VCLCompilerImpl final : public std::enable_shared_from_this& model, const FilteredConfig& config) const; + ov::Tensor compile(const std::shared_ptr& model, const FilteredConfig& config) const; /** * @brief Compiles the model, weights separation enabled. All init schedules along with the main one are compiled in * the same scope. - * @return A "NetworkDescription" object for each init schedule, followed by another one corresponding to the main + * @return An ov::Tensor object for each init schedule, followed by another one corresponding to the main * part. */ - std::vector> compileWsOneShot(const std::shared_ptr& model, - const FilteredConfig& config) const; + std::vector compileWsOneShot(const std::shared_ptr& model, + const FilteredConfig& config) const; /** * @brief Sequential compilation of Init(s) and Main * @@ -52,9 +55,9 @@ class VCLCompilerImpl final : public std::enable_shared_from_this& model, - const FilteredConfig& config, - size_t callNumber) const; + ov::Tensor compileWsIterative(const std::shared_ptr& model, + const FilteredConfig& config, + size_t callNumber) const; /** * @brief Returns information about supported layers of the network passed * @param model The model to be queried @@ -64,17 +67,6 @@ class VCLCompilerImpl final : public std::enable_shared_from_this& model, const FilteredConfig& config) const; - /** - * @brief Parses already compiled network to extract meta information: - * inputs and outputs descriptions - * @param network compiled network represented as a vector of char - * @param config a reference to NPUConfig containing plugin config options - * Note: compilation options will be ignored, - * since the network is already compiled - * @return a shared pointer on an object implementing NetworkDescription interface - */ - NetworkMetadata parse(const std::vector& network, const FilteredConfig& config) const; - /** * @brief Returns the compiler version * @return composite uint32_t value of compiler version. @@ -102,9 +94,9 @@ class VCLCompilerImpl final : public std::enable_shared_from_this& model, - const FilteredConfig& config, - const bool storeWeightlessCacheAttributeFlag) const; + ov::Tensor compile(const std::shared_ptr& model, + const FilteredConfig& config, + const bool storeWeightlessCacheAttributeFlag) const; vcl_log_handle_t _logHandle = nullptr; vcl_compiler_handle_t _compilerHandle = nullptr; diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/dynamic_graph.hpp b/src/plugins/intel_npu/src/compiler_adapter/include/dynamic_graph.hpp index 5ca8f99baacf59..def759ce7a7e1e 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/include/dynamic_graph.hpp +++ b/src/plugins/intel_npu/src/compiler_adapter/include/dynamic_graph.hpp @@ -7,7 +7,7 @@ #include #include "intel_npu/common/idynamic_graph.hpp" -#include "intel_npu/network_metadata.hpp" +#include "intel_npu/common/network_metadata.hpp" #include "intel_npu/utils/zero/zero_init.hpp" #include "npu_vm_runtime_api.hpp" #include "openvino/runtime/so_ptr.hpp" diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/weightless_graph.hpp b/src/plugins/intel_npu/src/compiler_adapter/include/weightless_graph.hpp index bbfc23af484893..1b39c3927d04c2 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/include/weightless_graph.hpp +++ b/src/plugins/intel_npu/src/compiler_adapter/include/weightless_graph.hpp @@ -111,7 +111,6 @@ class WeightlessGraph final : public Graph { std::vector> _initsCommandLists; std::vector> _initsFences; std::shared_ptr _initsCommandQueue; - uint32_t _initsCommandQueueGroupOrdinal = 0; /** * @brief Tensors holding the L0 buffers corresponding to the inputs of the main schedule. diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/weightless_utils.hpp b/src/plugins/intel_npu/src/compiler_adapter/include/weightless_utils.hpp index 13cc5e013162ad..605da73453e9dd 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/include/weightless_utils.hpp +++ b/src/plugins/intel_npu/src/compiler_adapter/include/weightless_utils.hpp @@ -4,7 +4,7 @@ #pragma once -#include "intel_npu/network_metadata.hpp" +#include "intel_npu/common/network_metadata.hpp" namespace intel_npu { diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/ze_graph_ext_wrappers.hpp b/src/plugins/intel_npu/src/compiler_adapter/include/ze_graph_ext_wrappers.hpp index 2252a0b699304d..b819a7390ff97d 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/include/ze_graph_ext_wrappers.hpp +++ b/src/plugins/intel_npu/src/compiler_adapter/include/ze_graph_ext_wrappers.hpp @@ -13,7 +13,7 @@ #include #include -#include "intel_npu/network_metadata.hpp" +#include "intel_npu/common/network_metadata.hpp" #include "intel_npu/utils/logger/logger.hpp" #include "intel_npu/utils/zero/zero_init.hpp" #include "model_serializer.hpp" diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/compiler_impl.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/compiler_impl.cpp index 47ee6af72c6614..787f6f0213668a 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/compiler_impl.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/compiler_impl.cpp @@ -372,14 +372,13 @@ std::shared_ptr VCLCompilerImpl::getLinkedLibrary() const { return VCLApi::getInstance()->getLibrary(); } -NetworkDescription VCLCompilerImpl::compile(const std::shared_ptr& model, - const FilteredConfig& config) const { +ov::Tensor VCLCompilerImpl::compile(const std::shared_ptr& model, const FilteredConfig& config) const { return compile(model, config, false); } -NetworkDescription VCLCompilerImpl::compile(const std::shared_ptr& model, - const FilteredConfig& config, - const bool storeWeightlessCacheAttributeFlag) const { +ov::Tensor VCLCompilerImpl::compile(const std::shared_ptr& model, + const FilteredConfig& config, + const bool storeWeightlessCacheAttributeFlag) const { _logger.debug("compile start"); /// Check the linked vcl version whether supported in plugin @@ -454,12 +453,8 @@ NetworkDescription VCLCompilerImpl::compile(const std::shared_ptr(allocator.m_allocated)); - // Use empty metadata as VCL does not support metadata extraction - NetworkMetadata metadata; - _logger.debug("compile end, blob size:%d", allocator.m_size); - return NetworkDescription(make_tensor_from_aligned_addr(allocator.m_allocated, allocator.m_size), - std::move(metadata)); + return make_tensor_from_aligned_addr(allocator.m_allocated, allocator.m_size); } else { OPENVINO_THROW("Not supported VCL version: %d.%d, please use VCL 6.1 or later", _vclVersion.major, @@ -467,9 +462,8 @@ NetworkDescription VCLCompilerImpl::compile(const std::shared_ptr> VCLCompilerImpl::compileWsOneShot( - const std::shared_ptr& model, - const FilteredConfig& config) const { +std::vector VCLCompilerImpl::compileWsOneShot(const std::shared_ptr& model, + const FilteredConfig& config) const { _logger.debug("compileWsOneShot start"); /// Check the linked vcl version whether supported in plugin @@ -529,32 +523,22 @@ std::vector> VCLCompilerImpl::compileWsOneSh OPENVINO_THROW("Failed to create VCL executable, blobCount is zero"); } - std::vector> networkDescrs; + std::vector initMainTensors; for (auto& blob : allocator.m_info) { - // Use empty metadata as VCL does not support metadata extraction - NetworkMetadata metadata; - networkDescrs.emplace_back( - std::make_shared(make_tensor_from_aligned_addr(blob.first, blob.second), - std::move(metadata))); + initMainTensors.emplace_back(make_tensor_from_aligned_addr(blob.first, blob.second)); } - return networkDescrs; + return initMainTensors; } -NetworkDescription VCLCompilerImpl::compileWsIterative(const std::shared_ptr& model, - const FilteredConfig& config, - size_t callNumber) const { +ov::Tensor VCLCompilerImpl::compileWsIterative(const std::shared_ptr& model, + const FilteredConfig& config, + size_t callNumber) const { _logger.debug("compileWsIterative start"); FilteredConfig updatedConfig = config; updatedConfig.update({{ov::intel_npu::ws_compile_call_number.name(), std::to_string(callNumber)}}); return compile(model, updatedConfig, true); } -intel_npu::NetworkMetadata VCLCompilerImpl::parse(const std::vector& network, - const FilteredConfig& config) const { - // VCL returns empty metadata. In plugin adapter, use driver metadata instead. - OPENVINO_THROW_NOT_IMPLEMENTED("VCL does not support parse."); -} - std::vector VCLCompilerImpl::process_profiling_output(const std::vector& profData, const std::vector& network) const { _logger.debug("process_profiling_output start"); diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp index 41dc870e84d02c..8e857f9e081eca 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp @@ -64,12 +64,9 @@ std::shared_ptr PluginCompilerAdapter::compile(const std::shared_ptrcompile(model, config); + auto tensor = _compiler->compile(model, config); _logger.debug("compile end"); - ov::Tensor tensor; - tensor = std::move(networkDesc.compiledNetworkTensor); - if (config.get() == "HostCompile") { // metadata will be obtained in initialze() of DynamicGraph _logger.debug("Use dynamicGraph to hold blob for HostCompile mode!"); @@ -133,20 +130,17 @@ std::shared_ptr PluginCompilerAdapter::compileWS(std::shared_ptr()) { case ov::intel_npu::WSVersion::ONE_SHOT: { - std::vector> initMainNetworkDescriptions = - _compiler->compileWsOneShot(model, localConfig); + std::vector initMainTensors = _compiler->compileWsOneShot(model, localConfig); - std::shared_ptr mainNetworkDescription = initMainNetworkDescriptions.back(); - initMainNetworkDescriptions.pop_back(); - if (initMainNetworkDescriptions.empty()) { + tensorMain = initMainTensors.back(); + initMainTensors.pop_back(); + if (initMainTensors.empty()) { _logger.warning("NPU compiler did not produce any init schedules. " "This likely means that the compiled model blob has weights inside even " "though weightless compilation was requested."); } - std::vector> initNetworkDescriptions = - std::move(initMainNetworkDescriptions); - tensorMain = std::move(mainNetworkDescription->compiledNetworkTensor); + tensorsInits = std::move(initMainTensors); if (_zeGraphExt) { // Depending on the config, we may get an error when trying to @@ -164,13 +158,9 @@ std::shared_ptr PluginCompilerAdapter::compileWS(std::shared_ptrcompiledNetworkTensor); - + initGraphDescriptors.reserve(tensorsInits.size()); + initNetworkMetadata.reserve(tensorsInits.size()); + for (const auto& tensor : tensorsInits) { GraphDescriptor initGraphDesc; NetworkMetadata initNetworkMeta; if (_zeGraphExt) { @@ -189,7 +179,6 @@ std::shared_ptr PluginCompilerAdapter::compileWS(std::shared_ptr PluginCompilerAdapter::compileWS(std::shared_ptr targetModel = model; size_t i = 0; - while (auto networkDescription = - std::make_shared(_compiler->compileWsIterative(targetModel, localConfig, i++))) { - ov::Tensor tensor; - tensor = std::move(networkDescription->compiledNetworkTensor); - + while (auto tensor = _compiler->compileWsIterative(targetModel, localConfig, i++)) { GraphDescriptor graphDesc = _zeGraphExt->getGraphDescriptor(tensor.data(), tensor.get_byte_size()); NetworkMetadata networkMetadata = _zeGraphExt->getNetworkMeta(graphDesc);