Skip to content

Commit 6b65316

Browse files
authored
[OV][ITT][GPU Plugin] Enable default ITT markers for inference and op submission (#33313)
- Enables default ITT markers for higher level operations such as inference pass, op preparation and submission - Follows the same guidelines to standardize the conventions for namespaces: ov::phases::gpu::inference ov::op::gpu - Supports both synchronous and asynchronous operations Enabling default GPU ITT markers using standard convention - Part 3 This PR is the **third** of a series of PRs to standardize the ITT markers in OpenVINO that will be enabled by default through host-side instrumentation. 1. The first PR addresses the enhancements required in ITT and the framework to support the creation and propagation of IDs when asynchronous execution is in play [PR#33639](#33639). 2. The second PR will standardize ITT markers in the CPU and enhance support to include asynchronous execution [PR#33312](#33312). 3. This **third** PR will enable default markers for GPU plugin to allow visibility into inference pass begin/end and operator preparation and submission within each inference. Follow standardized conventions as described in 1 and 2 4. The final PR will extend the same host side markers for NPU execution, which capturing the inference span and pipeline activity. Summary of the current PR (PR#3) Use the same convention standardized in [PR#33639](#33639) Ensures the namespace for GPU Plugin activity falls under: ov::phases::gpu::inference ov::op::gpu Details: GPU support is enabled with default ITT markers that support synchronous an asynchronous execution. This PR ensures a standardized convention is followed in namespaces used. Tickets: [CVS-179230](https://jira.devtools.intel.com/browse/CVS-179230) @isanghao Please review this as you are generally aware of what was discussed --------- Signed-off-by: Vasanth Tovinkere <vasanth.tovinkere@intel.com>
1 parent 91f33ea commit 6b65316

File tree

6 files changed

+21
-5
lines changed

6 files changed

+21
-5
lines changed

src/plugins/intel_gpu/include/intel_gpu/plugin/compiled_model.hpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,12 @@ class CompiledModel : public ov::ICompiledModel {
6161
RemoteContextImpl::Ptr get_context_impl() const {
6262
return m_context;
6363
}
64+
65+
// Helper function to return the model name for ITT tracing
66+
std::string_view get_model_name() const {
67+
return m_model_name;
68+
}
69+
6470
const std::vector<std::shared_ptr<Graph>>& get_graphs() const;
6571
std::shared_ptr<Graph> get_graph(size_t n) const;
6672

src/plugins/intel_gpu/include/intel_gpu/plugin/sync_infer_request.hpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,8 @@ class SyncInferRequest : public ov::ISyncInferRequest {
118118
void init_mappings();
119119
bool is_batched_input(const ov::Output<const ov::Node>& port) const;
120120
uint64_t total_output_bytes = 0;
121-
};
121+
// Variable to hold the inference request string with compiled model name
122+
// to prevent this string being constructed for each inference call
123+
std::string m_itt_infer_request_str;};
122124

123125
} // namespace ov::intel_gpu

src/plugins/intel_gpu/include/intel_gpu/runtime/itt.hpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,10 @@
1414
namespace ov::intel_gpu {
1515
namespace itt {
1616
namespace domains {
17+
// Domain namespace to define GPU Inference phase tasks
18+
OV_ITT_DOMAIN(intel_gpu_inference, "ov::phases::gpu::inference");
19+
// Domain namespace for all of the operators
20+
OV_ITT_DOMAIN(intel_gpu_op, "ov::op::gpu");
1721
OV_ITT_DOMAIN(intel_gpu_plugin);
1822
} // namespace domains
1923
} // namespace itt

src/plugins/intel_gpu/src/graph/network.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -761,6 +761,7 @@ void network::execute_impl(const std::vector<event::ptr>& events) {
761761

762762
for (auto& inst : _exec_order) {
763763
NODE_DEBUG(*inst);
764+
OV_ITT_SCOPED_TASK_BASE(ov::intel_gpu::itt::domains::intel_gpu_op, openvino::itt::handle(inst->id()));
764765

765766
inst->reset_events();
766767

src/plugins/intel_gpu/src/graph/primitive_inst.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1993,7 +1993,7 @@ void primitive_inst::reset_flags() {
19931993
}
19941994

19951995
void primitive_inst::prepare_primitive() {
1996-
OV_ITT_SCOPED_TASK(ov::intel_gpu::itt::domains::intel_gpu_plugin, openvino::itt::handle("primitive_inst::execute: " + id()));
1996+
OV_ITT_SCOPED_TASK(ov::intel_gpu::itt::domains::intel_gpu_plugin, openvino::itt::handle(id() + "::prepare"));
19971997
const auto& primitive_id = id();
19981998
if (!_has_valid_input) {
19991999
// For unfused network with dynamic_quantization, we may have empty/unused input
@@ -2184,6 +2184,7 @@ void primitive_inst::prepare_primitive() {
21842184
}
21852185

21862186
void primitive_inst::execute() {
2187+
OV_ITT_SCOPED_TASK(ov::intel_gpu::itt::domains::intel_gpu_plugin, openvino::itt::handle(id() + "::execute"));
21872188
GPU_DEBUG_PROFILED_STAGE(instrumentation::pipeline_stage::inference);
21882189
if (get_flag(ExecutionFlags::SKIP)) {
21892190
set_out_event(get_network().get_stream().aggregate_events(_impl_params->dep_events));

src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -93,15 +93,17 @@ SyncInferRequest::SyncInferRequest(const std::shared_ptr<const CompiledModel>& c
9393
, m_context(std::static_pointer_cast<RemoteContextImpl>(compiled_model->get_context_impl()))
9494
, m_shape_predictor(new cldnn::ShapePredictor(&m_graph->get_engine(), m_graph->get_config().get_shape_predictor_settings()))
9595
, m_enable_profiling(m_graph->get_config().get_enable_profiling())
96-
, m_use_external_queue(m_graph->use_external_queue()) {
96+
, m_use_external_queue(m_graph->use_external_queue())
97+
, m_itt_infer_request_str("SyncInferenceGPU::infer::" + std::string(compiled_model->get_model_name())) {
9798
init_mappings();
9899
allocate_inputs();
99100
allocate_outputs();
100101
allocate_states();
101102
}
102103

103104
void SyncInferRequest::infer() {
104-
OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "SyncInferRequest::infer");
105+
// String can be constructed once in the constructor
106+
OV_ITT_SCOPED_TASK_BASE(itt::domains::intel_gpu_inference, m_itt_infer_request_str.c_str());
105107
setup_stream_graph();
106108
std::lock_guard<std::mutex> lk(m_graph->get_mutex());
107109
enqueue();
@@ -308,7 +310,7 @@ void SyncInferRequest::enqueue() {
308310
}
309311

310312
void SyncInferRequest::wait() {
311-
OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "SyncInferRequest::wait");
313+
OV_ITT_SCOPED_TASK_BASE(itt::domains::intel_gpu_inference, "SyncInferenceGPU::wait");
312314
OPENVINO_ASSERT(!m_internal_outputs.empty(), "[GPU] Inference was not started!\n");
313315

314316
int64_t sync_total_time = 0;

0 commit comments

Comments
 (0)