Skip to content

Commit cba95d5

Browse files
committed
Move context control from dynamic pipeline to dynamic graph
Signed-off-by: Xin Wang <xin1.wang@intel.com>
1 parent 903dac0 commit cba95d5

File tree

7 files changed

+292
-272
lines changed

7 files changed

+292
-272
lines changed

src/plugins/intel_npu/src/backend/include/zero_dynamic_pipeline.hpp

Lines changed: 5 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -10,13 +10,6 @@
1010
namespace intel_npu {
1111

1212
class DynamicPipeline final : public IPipeline {
13-
enum ReuseCmdListMode {
14-
ENABLE_EXECUTION_CONTEXT_CREATION,
15-
ENABLE_REUSE_WITH_MUTABLE_COMMANDLIST,
16-
ENABLE_REUSE_WITHOUT_MUTATING_COMMANDLIST,
17-
DISABLE_EXECUTION_CONTEXT_CREATION
18-
};
19-
2013
struct PipelinedCommandLists {
2114
mutable IDynamicGraph::GraphArguments _binding;
2215

@@ -61,37 +54,14 @@ class DynamicPipeline final : public IPipeline {
6154
const ov::Shape& shapes) {
6255
if (arg_index < _binding._inputs.size()) {
6356
_binding._inputs[arg_index].setArg(arg_value);
64-
// Only store the valid shape dimensions
65-
for (int64_t i = 0; i < _binding._inputs[arg_index]._dimsCount; i++) {
66-
_binding._inputs[arg_index]._sizes[i] = shapes[i];
67-
}
68-
69-
if (!strides.empty()) {
70-
for (int64_t i = 0; i < _binding._inputs[arg_index]._dimsCount; i++) {
71-
_binding._inputs[arg_index]._strides[i] = strides[i];
72-
}
73-
} else {
74-
// Need stride based on element but not byte, calc from shape
75-
_binding._inputs[arg_index].updateStride();
76-
}
57+
_binding._inputs[arg_index].setSize(shapes);
58+
_binding._inputs[arg_index].setStrides(strides);
7759
} else {
7860
size_t output_index = static_cast<size_t>(arg_index) - _binding._inputs.size();
7961
if (output_index < _binding._outputs.size()) {
8062
_binding._outputs[output_index].setArg(arg_value);
81-
82-
// Only store the valid shape dimensions
83-
for (int64_t i = 0; i < _binding._outputs[output_index]._dimsCount; i++) {
84-
_binding._outputs[output_index]._sizes[i] = shapes[i];
85-
}
86-
87-
if (!strides.empty()) {
88-
for (int64_t i = 0; i < _binding._outputs[output_index]._dimsCount; i++) {
89-
_binding._outputs[output_index]._strides[i] = strides[i];
90-
}
91-
} else {
92-
// Need stride based on element but not byte, calc from shape
93-
_binding._outputs[output_index].updateStride();
94-
}
63+
_binding._outputs[output_index].setSize(shapes);
64+
_binding._outputs[output_index].setStrides(strides);
9565
}
9666
}
9767
}
@@ -101,12 +71,6 @@ class DynamicPipeline final : public IPipeline {
10171
cmd_list->reset();
10272
}
10373
}
104-
105-
void closeCommandList() {
106-
for (auto& cmd_list : _commandLists) {
107-
cmd_list->close();
108-
}
109-
}
11074
};
11175

11276
public:
@@ -119,7 +83,7 @@ class DynamicPipeline final : public IPipeline {
11983

12084
DynamicPipeline(const DynamicPipeline&) = delete;
12185
DynamicPipeline& operator=(const DynamicPipeline&) = delete;
122-
~DynamicPipeline() override;
86+
~DynamicPipeline() override = default;
12387

12488
void push() override;
12589
void pull() override;
@@ -134,8 +98,6 @@ class DynamicPipeline final : public IPipeline {
13498

13599
private:
136100
std::vector<std::unique_ptr<PipelinedCommandLists>> _command_lists;
137-
std::vector<void*> _executionContexts;
138-
ReuseCmdListMode _reuseCmdListMode = ENABLE_EXECUTION_CONTEXT_CREATION;
139101
};
140102

141103
} // namespace intel_npu

src/plugins/intel_npu/src/backend/src/zero_dynamic_infer_request.cpp

Lines changed: 23 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,8 @@ void ZeroDynamicInferRequest::update_command_list_for_tensor(SyncInferRequest::F
4040
auto& levelZeroTensor =
4141
foundPort.is_input() ? get_level_zero_input(foundPort.idx) : _levelZeroOutputTensors.at(foundPort.idx);
4242

43+
auto originallevelZeroTensor = levelZeroTensor;
44+
4345
try {
4446
_logger.debug("set_tensor - create zero tensor");
4547
OV_ITT_TASK_NEXT(ZERO_SET_TENSOR, "create zero tensor");
@@ -72,7 +74,8 @@ void ZeroDynamicInferRequest::update_command_list_for_tensor(SyncInferRequest::F
7274
OPENVINO_ASSERT(levelZeroTensor->data(), "Empty buffer");
7375

7476
OV_ITT_TASK_NEXT(ZERO_SET_TENSOR, "update_graph_arguments");
75-
if (levelZeroTensor->get_byte_size() != tensor->get_byte_size()) {
77+
if (originallevelZeroTensor != nullptr && originallevelZeroTensor->get_shape() != tensor->get_shape()) {
78+
_logger.debug("set_tensor - update graph arguments with user tensor pointer since shape is changed");
7679
_pipeline->update_graph_arguments(foundPort.is_input()
7780
? _metadata.inputs.at(foundPort.idx).indexUsedByDriver
7881
: _metadata.outputs.at(foundPort.idx).indexUsedByDriver,
@@ -81,6 +84,8 @@ void ZeroDynamicInferRequest::update_command_list_for_tensor(SyncInferRequest::F
8184
_isTensorChanged = true;
8285
} else {
8386
// This L0 tensor shall have same info with user tensor
87+
_logger.debug(
88+
"set_tensor - update graph arguments without user tensor pointer since shape is not changed");
8489
_pipeline->update_graph_arguments(foundPort.is_input()
8590
? _metadata.inputs.at(foundPort.idx).indexUsedByDriver
8691
: _metadata.outputs.at(foundPort.idx).indexUsedByDriver,
@@ -106,6 +111,7 @@ void ZeroDynamicInferRequest::update_command_list_for_tensors(SyncInferRequest::
106111
get_level_zero_inputs(foundPort.idx).resize(tensors.size());
107112

108113
for (size_t i = 0; i < tensors.size(); i++) {
114+
auto originalLevelZeroTensor = get_level_zero_input(foundPort.idx, i);
109115
try {
110116
_logger.debug("set_tensors - create zero tensor");
111117
OV_ITT_TASK_NEXT(ZERO_SET_TENSORS, "create zero tensor");
@@ -124,11 +130,22 @@ void ZeroDynamicInferRequest::update_command_list_for_tensors(SyncInferRequest::
124130
if (_pipelineIsCreated && !_dynamicBatchValueChanged) {
125131
OPENVINO_ASSERT(get_level_zero_input(foundPort.idx, i)->data(), "Empty buffer");
126132
OV_ITT_TASK_NEXT(ZERO_SET_TENSORS, "updateCommandList");
127-
_pipeline->update_graph_arguments(_metadata.inputs.at(foundPort.idx).indexUsedByDriver,
128-
get_level_zero_input(foundPort.idx, i),
129-
i,
130-
tensors.at(i)._ptr);
131-
_isTensorChanged = true;
133+
if (originalLevelZeroTensor != nullptr &&
134+
originalLevelZeroTensor->get_shape() != tensors.at(i)->get_shape()) {
135+
_logger.debug(
136+
"set_tensors - update graph arguments with user tensor pointer since shape is changed");
137+
_pipeline->update_graph_arguments(_metadata.inputs.at(foundPort.idx).indexUsedByDriver,
138+
get_level_zero_input(foundPort.idx, i),
139+
i,
140+
tensors.at(i)._ptr);
141+
_isTensorChanged = true;
142+
} else {
143+
_logger.debug(
144+
"set_tensors - update graph arguments without user tensor pointer since shape is not changed");
145+
_pipeline->update_graph_arguments(_metadata.inputs.at(foundPort.idx).indexUsedByDriver,
146+
get_level_zero_input(foundPort.idx, i),
147+
i);
148+
}
132149
}
133150
}
134151
}

src/plugins/intel_npu/src/backend/src/zero_dynamic_pipeline.cpp

Lines changed: 13 additions & 82 deletions
Original file line numberDiff line numberDiff line change
@@ -56,17 +56,6 @@ DynamicPipeline::DynamicPipeline(const std::shared_ptr<ZeroInitStructsHolder>& i
5656
intel_npu::IDynamicGraph* dynamicGraph = dynamic_cast<intel_npu::IDynamicGraph*>(graph.get());
5757
OPENVINO_ASSERT(dynamicGraph != nullptr, "Failed to cast graph to IDynamicGraph");
5858

59-
auto reuseCmdList = getenv("ENABLED_HOST_COMPILE_REUSE_CMDLIST");
60-
if (reuseCmdList != nullptr) {
61-
if (std::string(reuseCmdList) == "1")
62-
_reuseCmdListMode = ENABLE_REUSE_WITH_MUTABLE_COMMANDLIST;
63-
else if (std::string(reuseCmdList) == "2")
64-
_reuseCmdListMode = ENABLE_REUSE_WITHOUT_MUTATING_COMMANDLIST;
65-
else {
66-
_reuseCmdListMode = DISABLE_EXECUTION_CONTEXT_CREATION;
67-
}
68-
}
69-
7059
if (!_sync_output_with_fences) {
7160
_event_pool = std::make_shared<EventPool>(_init_structs->getDevice(),
7261
_init_structs->getContext(),
@@ -86,13 +75,6 @@ DynamicPipeline::DynamicPipeline(const std::shared_ptr<ZeroInitStructsHolder>& i
8675
_command_lists.emplace_back(std::make_unique<PipelinedCommandLists>(num_of_subgraphs, _init_structs));
8776
}
8877

89-
_executionContexts.resize(_batch_size, nullptr);
90-
if (_reuseCmdListMode != DISABLE_EXECUTION_CONTEXT_CREATION) {
91-
for (size_t i = 0; i < _batch_size; i++) {
92-
_executionContexts[i] = dynamicGraph->create_execution_context();
93-
}
94-
}
95-
9678
if (_sync_output_with_fences) {
9779
_fences.reserve(_batch_size);
9880

@@ -193,19 +175,8 @@ DynamicPipeline::DynamicPipeline(const std::shared_ptr<ZeroInitStructsHolder>& i
193175
_logger.debug("DynamicPipeline - initialization completed");
194176
}
195177

196-
DynamicPipeline::~DynamicPipeline() {
197-
auto dynamicGraph = dynamic_cast<intel_npu::IDynamicGraph*>(_graph.get());
198-
for (auto executionContext : _executionContexts) {
199-
if (executionContext) {
200-
dynamicGraph->destroy_execution_context(executionContext);
201-
}
202-
}
203-
_executionContexts.clear();
204-
}
205-
206178
void DynamicPipeline::push() {
207179
_logger.debug("DynamicPipeline - push() started");
208-
static bool isFirst = true;
209180

210181
auto* dynamicGraph = dynamic_cast<IDynamicGraph*>(_graph.get());
211182
OPENVINO_ASSERT(dynamicGraph != nullptr, "Failed to cast graph to IDynamicGraph");
@@ -233,44 +204,13 @@ void DynamicPipeline::push() {
233204
}
234205
}
235206

236-
if (_reuseCmdListMode == ENABLE_EXECUTION_CONTEXT_CREATION ||
237-
_reuseCmdListMode == DISABLE_EXECUTION_CONTEXT_CREATION || isFirst) {
238-
command_lists->resetCommandList();
239-
dynamicGraph->execute(_init_structs,
240-
command_lists->getBinding(),
241-
command_lists->getHandles(),
242-
commandQueueHandle,
243-
fence,
244-
event,
245-
nullptr,
246-
_executionContexts.at(i));
247-
isFirst = false;
248-
} else {
249-
auto& cmdLists = command_lists->_commandListHandles;
250-
if (_reuseCmdListMode == ENABLE_REUSE_WITH_MUTABLE_COMMANDLIST) {
251-
uint64_t numArgs = graphArguments._inputs.size() + graphArguments._outputs.size();
252-
253-
// tentatively populates all updated arguments.
254-
std::vector<uint64_t> argIndexArray(numArgs);
255-
for (uint64_t i = 0; i < numArgs; ++i) {
256-
argIndexArray[i] = i;
257-
}
258-
dynamic_cast<IDynamicGraph*>(_graph.get())
259-
->update_mutable_commandlist(_init_structs, command_lists->getBinding(), argIndexArray);
260-
// according to spec, CloseCommandList should be called after
261-
// UpdateMutableCommandList is called.
262-
command_lists->closeCommandList();
263-
}
264-
265-
auto cmdQueue = _graph->get_command_queue();
266-
auto result = zeCommandQueueExecuteCommandLists(cmdQueue->handle(),
267-
static_cast<uint32_t>(cmdLists.size()),
268-
cmdLists.data(),
269-
fence);
270-
if (result != ZE_RESULT_SUCCESS) {
271-
OPENVINO_THROW("Failed to submit command lists");
272-
}
273-
}
207+
dynamicGraph->execute(_init_structs,
208+
graphArguments,
209+
command_lists->getHandles(),
210+
commandQueueHandle,
211+
fence,
212+
event,
213+
nullptr);
274214
}
275215

276216
_logger.debug("push - completed");
@@ -304,8 +244,7 @@ void DynamicPipeline::reset() const {
304244
_events.at(i)->reset();
305245
}
306246
}
307-
308-
_logger.debug("reset - completed");
247+
_logger.debug("DynamicPipeline - reset() completed");
309248
}
310249

311250
void DynamicPipeline::update_graph_arguments(uint32_t index,
@@ -355,19 +294,11 @@ void DynamicPipeline::update_graph_arguments(uint32_t index,
355294
"Command list index is higher than the number of Command lists ",
356295
batch_index);
357296

358-
if (tensor->get_element_type().bitwidth() < 8 || tensor->is_continuous() || tensor->get_strides().empty()) {
359-
_command_lists.at(batch_index)
360-
->updateMutableCommandList(index,
361-
zeroTensor->data(),
362-
get_strides(tensor->get_strides(), elementSize),
363-
tensor->get_shape());
364-
} else {
365-
_command_lists.at(batch_index)
366-
->updateMutableCommandList(index,
367-
zeroTensor->data(),
368-
get_strides(tensor->get_strides(), elementSize),
369-
tensor->get_shape());
370-
}
297+
_command_lists.at(batch_index)
298+
->updateMutableCommandList(index,
299+
zeroTensor->data(),
300+
get_strides(tensor->get_strides(), elementSize),
301+
tensor->get_shape());
371302
}
372303

373304
} // namespace intel_npu

src/plugins/intel_npu/src/common/include/intel_npu/common/idynamic_graph.hpp

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@ namespace intel_npu {
1111

1212
class IDynamicGraph : public IGraph {
1313
public:
14-
typedef void* execution_context_handle_t;
1514
struct MemRefType {
1615
const void* _basePtr;
1716
const void* _data;
@@ -20,6 +19,9 @@ class IDynamicGraph : public IGraph {
2019
std::vector<int64_t> _strides;
2120
int64_t _dimsCount;
2221
std::shared_ptr<void> _impl;
22+
bool _ptrUpdated = false;
23+
bool _shapeUpdated = false;
24+
bool _strideUpdated = false;
2325

2426
MemRefType() : _basePtr(nullptr), _data(nullptr), _offset(0), _sizes(), _strides(), _dimsCount(0) {}
2527

@@ -38,6 +40,7 @@ class IDynamicGraph : public IGraph {
3840

3941
void setArg(const void* arg);
4042
void setSize(const ov::Shape& shape);
43+
void setStrides(const ov::Strides& strides);
4144
void set(const void* basePtr, int64_t offset, std::shared_ptr<ov::ITensor> tensor);
4245
void updateStride();
4346
bool compare(const MemRefType& memref);
@@ -66,21 +69,14 @@ class IDynamicGraph : public IGraph {
6669
ze_command_queue_handle_t commandQueue,
6770
ze_fence_handle_t inferenceFence,
6871
ze_event_handle_t event,
69-
ze_graph_profiling_pool_handle_t profiling,
70-
execution_context_handle_t executionContext);
72+
ze_graph_profiling_pool_handle_t profiling);
7173

7274
virtual void getBinding(GraphArguments& args);
7375

7476
virtual uint64_t get_num_subgraphs() const;
7577

7678
virtual void predict_output_shape(std::vector<MemRefType>& inputDescriptors,
7779
std::vector<MemRefType>& outputDescriptors);
78-
79-
virtual execution_context_handle_t create_execution_context();
80-
virtual void destroy_execution_context(execution_context_handle_t);
81-
virtual void update_mutable_commandlist(const std::shared_ptr<ZeroInitStructsHolder>& zeroInitStruct,
82-
GraphArguments& args,
83-
const std::vector<uint64_t>& argIndexArray);
8480
};
8581

8682
} // namespace intel_npu

0 commit comments

Comments
 (0)