openvinotoolkit
diff --git a/‎src/plugins/intel_npu/src/backend/include/zero_dynamic_pipeline.hpp‎
Lines changed: 5 additions & 43 deletions b/‎src/plugins/intel_npu/src/backend/include/zero_dynamic_pipeline.hpp‎
Lines changed: 5 additions & 43 deletions
diff --git a/‎src/plugins/intel_npu/src/backend/src/zero_dynamic_infer_request.cpp‎
Lines changed: 23 additions & 6 deletions b/‎src/plugins/intel_npu/src/backend/src/zero_dynamic_infer_request.cpp‎
Lines changed: 23 additions & 6 deletions
diff --git a/‎src/plugins/intel_npu/src/backend/src/zero_dynamic_pipeline.cpp‎
Lines changed: 13 additions & 82 deletions b/‎src/plugins/intel_npu/src/backend/src/zero_dynamic_pipeline.cpp‎
Lines changed: 13 additions & 82 deletions
diff --git a/‎src/plugins/intel_npu/src/common/include/intel_npu/common/idynamic_graph.hpp‎
Lines changed: 5 additions & 9 deletions b/‎src/plugins/intel_npu/src/common/include/intel_npu/common/idynamic_graph.hpp‎
Lines changed: 5 additions & 9 deletions
@@ -10,13 +10,6 @@
 namespace intel_npu {
 
 class DynamicPipeline final : public IPipeline {
-    enum ReuseCmdListMode {
-        ENABLE_EXECUTION_CONTEXT_CREATION,
-        ENABLE_REUSE_WITH_MUTABLE_COMMANDLIST,
-        ENABLE_REUSE_WITHOUT_MUTATING_COMMANDLIST,
-        DISABLE_EXECUTION_CONTEXT_CREATION
-    };
-
     struct PipelinedCommandLists {
         mutable IDynamicGraph::GraphArguments _binding;
 
@@ -61,37 +54,14 @@ class DynamicPipeline final : public IPipeline {
                                       const ov::Shape& shapes) {
             if (arg_index < _binding._inputs.size()) {
                 _binding._inputs[arg_index].setArg(arg_value);
-                // Only store the valid shape dimensions
-                for (int64_t i = 0; i < _binding._inputs[arg_index]._dimsCount; i++) {
-                    _binding._inputs[arg_index]._sizes[i] = shapes[i];
-                }
-
-                if (!strides.empty()) {
-                    for (int64_t i = 0; i < _binding._inputs[arg_index]._dimsCount; i++) {
-                        _binding._inputs[arg_index]._strides[i] = strides[i];
-                    }
-                } else {
-                    // Need stride based on element but not byte, calc from shape
-                    _binding._inputs[arg_index].updateStride();
-                }
+                _binding._inputs[arg_index].setSize(shapes);
+                _binding._inputs[arg_index].setStrides(strides);
             } else {
                 size_t output_index = static_cast<size_t>(arg_index) - _binding._inputs.size();
                 if (output_index < _binding._outputs.size()) {
                     _binding._outputs[output_index].setArg(arg_value);
-
-                    // Only store the valid shape dimensions
-                    for (int64_t i = 0; i < _binding._outputs[output_index]._dimsCount; i++) {
-                        _binding._outputs[output_index]._sizes[i] = shapes[i];
-                    }
-
-                    if (!strides.empty()) {
-                        for (int64_t i = 0; i < _binding._outputs[output_index]._dimsCount; i++) {
-                            _binding._outputs[output_index]._strides[i] = strides[i];
-                        }
-                    } else {
-                        // Need stride based on element but not byte, calc from shape
-                        _binding._outputs[output_index].updateStride();
-                    }
+                    _binding._outputs[output_index].setSize(shapes);
+                    _binding._outputs[output_index].setStrides(strides);
                 }
             }
         }
@@ -101,12 +71,6 @@ class DynamicPipeline final : public IPipeline {
                 cmd_list->reset();
             }
         }
-
-        void closeCommandList() {
-            for (auto& cmd_list : _commandLists) {
-                cmd_list->close();
-            }
-        }
     };
 
 public:
@@ -119,7 +83,7 @@ class DynamicPipeline final : public IPipeline {
 
     DynamicPipeline(const DynamicPipeline&) = delete;
     DynamicPipeline& operator=(const DynamicPipeline&) = delete;
-    ~DynamicPipeline() override;
+    ~DynamicPipeline() override = default;
 
     void push() override;
     void pull() override;
@@ -134,8 +98,6 @@ class DynamicPipeline final : public IPipeline {
 
 private:
     std::vector<std::unique_ptr<PipelinedCommandLists>> _command_lists;
-    std::vector<void*> _executionContexts;
-    ReuseCmdListMode _reuseCmdListMode = ENABLE_EXECUTION_CONTEXT_CREATION;
 };
 
 }  // namespace intel_npu
@@ -40,6 +40,8 @@ void ZeroDynamicInferRequest::update_command_list_for_tensor(SyncInferRequest::F
         auto& levelZeroTensor =
             foundPort.is_input() ? get_level_zero_input(foundPort.idx) : _levelZeroOutputTensors.at(foundPort.idx);
 
+        auto originallevelZeroTensor = levelZeroTensor;
+
         try {
             _logger.debug("set_tensor - create zero tensor");
             OV_ITT_TASK_NEXT(ZERO_SET_TENSOR, "create zero tensor");
@@ -72,7 +74,8 @@ void ZeroDynamicInferRequest::update_command_list_for_tensor(SyncInferRequest::F
             OPENVINO_ASSERT(levelZeroTensor->data(), "Empty buffer");
 
             OV_ITT_TASK_NEXT(ZERO_SET_TENSOR, "update_graph_arguments");
-            if (levelZeroTensor->get_byte_size() != tensor->get_byte_size()) {
+            if (originallevelZeroTensor != nullptr && originallevelZeroTensor->get_shape() != tensor->get_shape()) {
+                _logger.debug("set_tensor - update graph arguments with user tensor pointer since shape is changed");
                 _pipeline->update_graph_arguments(foundPort.is_input()
                                                       ? _metadata.inputs.at(foundPort.idx).indexUsedByDriver
                                                       : _metadata.outputs.at(foundPort.idx).indexUsedByDriver,
@@ -81,6 +84,8 @@ void ZeroDynamicInferRequest::update_command_list_for_tensor(SyncInferRequest::F
                 _isTensorChanged = true;
             } else {
                 // This L0 tensor shall have same info with user tensor
+                _logger.debug(
+                    "set_tensor - update graph arguments without user tensor pointer since shape is not changed");
                 _pipeline->update_graph_arguments(foundPort.is_input()
                                                       ? _metadata.inputs.at(foundPort.idx).indexUsedByDriver
                                                       : _metadata.outputs.at(foundPort.idx).indexUsedByDriver,
@@ -106,6 +111,7 @@ void ZeroDynamicInferRequest::update_command_list_for_tensors(SyncInferRequest::
         get_level_zero_inputs(foundPort.idx).resize(tensors.size());
 
         for (size_t i = 0; i < tensors.size(); i++) {
+            auto originalLevelZeroTensor = get_level_zero_input(foundPort.idx, i);
             try {
                 _logger.debug("set_tensors - create zero tensor");
                 OV_ITT_TASK_NEXT(ZERO_SET_TENSORS, "create zero tensor");
@@ -124,11 +130,22 @@ void ZeroDynamicInferRequest::update_command_list_for_tensors(SyncInferRequest::
             if (_pipelineIsCreated && !_dynamicBatchValueChanged) {
                 OPENVINO_ASSERT(get_level_zero_input(foundPort.idx, i)->data(), "Empty buffer");
                 OV_ITT_TASK_NEXT(ZERO_SET_TENSORS, "updateCommandList");
-                _pipeline->update_graph_arguments(_metadata.inputs.at(foundPort.idx).indexUsedByDriver,
-                                                  get_level_zero_input(foundPort.idx, i),
-                                                  i,
-                                                  tensors.at(i)._ptr);
-                _isTensorChanged = true;
+                if (originalLevelZeroTensor != nullptr &&
+                    originalLevelZeroTensor->get_shape() != tensors.at(i)->get_shape()) {
+                    _logger.debug(
+                        "set_tensors - update graph arguments with user tensor pointer since shape is changed");
+                    _pipeline->update_graph_arguments(_metadata.inputs.at(foundPort.idx).indexUsedByDriver,
+                                                      get_level_zero_input(foundPort.idx, i),
+                                                      i,
+                                                      tensors.at(i)._ptr);
+                    _isTensorChanged = true;
+                } else {
+                    _logger.debug(
+                        "set_tensors - update graph arguments without user tensor pointer since shape is not changed");
+                    _pipeline->update_graph_arguments(_metadata.inputs.at(foundPort.idx).indexUsedByDriver,
+                                                      get_level_zero_input(foundPort.idx, i),
+                                                      i);
+                }
             }
         }
     }
 
@@ -56,17 +56,6 @@ DynamicPipeline::DynamicPipeline(const std::shared_ptr<ZeroInitStructsHolder>& i
     intel_npu::IDynamicGraph* dynamicGraph = dynamic_cast<intel_npu::IDynamicGraph*>(graph.get());
     OPENVINO_ASSERT(dynamicGraph != nullptr, "Failed to cast graph to IDynamicGraph");
 
-    auto reuseCmdList = getenv("ENABLED_HOST_COMPILE_REUSE_CMDLIST");
-    if (reuseCmdList != nullptr) {
-        if (std::string(reuseCmdList) == "1")
-            _reuseCmdListMode = ENABLE_REUSE_WITH_MUTABLE_COMMANDLIST;
-        else if (std::string(reuseCmdList) == "2")
-            _reuseCmdListMode = ENABLE_REUSE_WITHOUT_MUTATING_COMMANDLIST;
-        else {
-            _reuseCmdListMode = DISABLE_EXECUTION_CONTEXT_CREATION;
-        }
-    }
-
     if (!_sync_output_with_fences) {
         _event_pool = std::make_shared<EventPool>(_init_structs->getDevice(),
                                                   _init_structs->getContext(),
@@ -86,13 +75,6 @@ DynamicPipeline::DynamicPipeline(const std::shared_ptr<ZeroInitStructsHolder>& i
         _command_lists.emplace_back(std::make_unique<PipelinedCommandLists>(num_of_subgraphs, _init_structs));
     }
 
-    _executionContexts.resize(_batch_size, nullptr);
-    if (_reuseCmdListMode != DISABLE_EXECUTION_CONTEXT_CREATION) {
-        for (size_t i = 0; i < _batch_size; i++) {
-            _executionContexts[i] = dynamicGraph->create_execution_context();
-        }
-    }
-
     if (_sync_output_with_fences) {
         _fences.reserve(_batch_size);
 
@@ -193,19 +175,8 @@ DynamicPipeline::DynamicPipeline(const std::shared_ptr<ZeroInitStructsHolder>& i
     _logger.debug("DynamicPipeline - initialization completed");
 }
 
-DynamicPipeline::~DynamicPipeline() {
-    auto dynamicGraph = dynamic_cast<intel_npu::IDynamicGraph*>(_graph.get());
-    for (auto executionContext : _executionContexts) {
-        if (executionContext) {
-            dynamicGraph->destroy_execution_context(executionContext);
-        }
-    }
-    _executionContexts.clear();
-}
-
 void DynamicPipeline::push() {
     _logger.debug("DynamicPipeline - push() started");
-    static bool isFirst = true;
 
     auto* dynamicGraph = dynamic_cast<IDynamicGraph*>(_graph.get());
     OPENVINO_ASSERT(dynamicGraph != nullptr, "Failed to cast graph to IDynamicGraph");
@@ -233,44 +204,13 @@ void DynamicPipeline::push() {
             }
         }
 
-        if (_reuseCmdListMode == ENABLE_EXECUTION_CONTEXT_CREATION ||
-            _reuseCmdListMode == DISABLE_EXECUTION_CONTEXT_CREATION || isFirst) {
-            command_lists->resetCommandList();
-            dynamicGraph->execute(_init_structs,
-                                  command_lists->getBinding(),
-                                  command_lists->getHandles(),
-                                  commandQueueHandle,
-                                  fence,
-                                  event,
-                                  nullptr,
-                                  _executionContexts.at(i));
-            isFirst = false;
-        } else {
-            auto& cmdLists = command_lists->_commandListHandles;
-            if (_reuseCmdListMode == ENABLE_REUSE_WITH_MUTABLE_COMMANDLIST) {
-                uint64_t numArgs = graphArguments._inputs.size() + graphArguments._outputs.size();
-
-                // tentatively populates all updated arguments.
-                std::vector<uint64_t> argIndexArray(numArgs);
-                for (uint64_t i = 0; i < numArgs; ++i) {
-                    argIndexArray[i] = i;
-                }
-                dynamic_cast<IDynamicGraph*>(_graph.get())
-                    ->update_mutable_commandlist(_init_structs, command_lists->getBinding(), argIndexArray);
-                // according to spec, CloseCommandList should be called after
-                // UpdateMutableCommandList is called.
-                command_lists->closeCommandList();
-            }
-
-            auto cmdQueue = _graph->get_command_queue();
-            auto result = zeCommandQueueExecuteCommandLists(cmdQueue->handle(),
-                                                            static_cast<uint32_t>(cmdLists.size()),
-                                                            cmdLists.data(),
-                                                            fence);
-            if (result != ZE_RESULT_SUCCESS) {
-                OPENVINO_THROW("Failed to submit command lists");
-            }
-        }
+        dynamicGraph->execute(_init_structs,
+                              graphArguments,
+                              command_lists->getHandles(),
+                              commandQueueHandle,
+                              fence,
+                              event,
+                              nullptr);
     }
 
     _logger.debug("push - completed");
@@ -304,8 +244,7 @@ void DynamicPipeline::reset() const {
             _events.at(i)->reset();
         }
     }
-
-    _logger.debug("reset - completed");
+    _logger.debug("DynamicPipeline - reset() completed");
 }
 
 void DynamicPipeline::update_graph_arguments(uint32_t index,
@@ -355,19 +294,11 @@ void DynamicPipeline::update_graph_arguments(uint32_t index,
                     "Command list index is higher than the number of Command lists ",
                     batch_index);
 
-    if (tensor->get_element_type().bitwidth() < 8 || tensor->is_continuous() || tensor->get_strides().empty()) {
-        _command_lists.at(batch_index)
-            ->updateMutableCommandList(index,
-                                       zeroTensor->data(),
-                                       get_strides(tensor->get_strides(), elementSize),
-                                       tensor->get_shape());
-    } else {
-        _command_lists.at(batch_index)
-            ->updateMutableCommandList(index,
-                                       zeroTensor->data(),
-                                       get_strides(tensor->get_strides(), elementSize),
-                                       tensor->get_shape());
-    }
+    _command_lists.at(batch_index)
+        ->updateMutableCommandList(index,
+                                   zeroTensor->data(),
+                                   get_strides(tensor->get_strides(), elementSize),
+                                   tensor->get_shape());
 }
 
 }  // namespace intel_npu
@@ -11,7 +11,6 @@ namespace intel_npu {
 
 class IDynamicGraph : public IGraph {
 public:
-    typedef void* execution_context_handle_t;
     struct MemRefType {
         const void* _basePtr;
         const void* _data;
@@ -20,6 +19,9 @@ class IDynamicGraph : public IGraph {
         std::vector<int64_t> _strides;
         int64_t _dimsCount;
         std::shared_ptr<void> _impl;
+        bool _ptrUpdated = false;
+        bool _shapeUpdated = false;
+        bool _strideUpdated = false;
 
         MemRefType() : _basePtr(nullptr), _data(nullptr), _offset(0), _sizes(), _strides(), _dimsCount(0) {}
 
@@ -38,6 +40,7 @@ class IDynamicGraph : public IGraph {
 
         void setArg(const void* arg);
         void setSize(const ov::Shape& shape);
+        void setStrides(const ov::Strides& strides);
         void set(const void* basePtr, int64_t offset, std::shared_ptr<ov::ITensor> tensor);
         void updateStride();
         bool compare(const MemRefType& memref);
@@ -66,21 +69,14 @@ class IDynamicGraph : public IGraph {
                          ze_command_queue_handle_t commandQueue,
                          ze_fence_handle_t inferenceFence,
                          ze_event_handle_t event,
-                         ze_graph_profiling_pool_handle_t profiling,
-                         execution_context_handle_t executionContext);
+                         ze_graph_profiling_pool_handle_t profiling);
 
     virtual void getBinding(GraphArguments& args);
 
     virtual uint64_t get_num_subgraphs() const;
 
     virtual void predict_output_shape(std::vector<MemRefType>& inputDescriptors,
                                       std::vector<MemRefType>& outputDescriptors);
-
-    virtual execution_context_handle_t create_execution_context();
-    virtual void destroy_execution_context(execution_context_handle_t);
-    virtual void update_mutable_commandlist(const std::shared_ptr<ZeroInitStructsHolder>& zeroInitStruct,
-                                            GraphArguments& args,
-                                            const std::vector<uint64_t>& argIndexArray);
 };
 
 }  // namespace intel_npu