Skip to content

Commit 6c03c4f

Browse files
sndmitrievbader
authored andcommitted
[SYCL] Add profiling info for host execution
Signed-off-by: Sergey Dmitriev <serguei.n.dmitriev@intel.com>
1 parent 6bcf548 commit 6c03c4f

File tree

5 files changed

+71
-17
lines changed

5 files changed

+71
-17
lines changed

sycl/include/CL/sycl/detail/cg.hpp

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,7 @@ class NDRDescT {
134134
class HostKernelBase {
135135
public:
136136
// The method executes lambda stored using NDRange passed.
137-
virtual void call(const NDRDescT &NDRDesc) = 0;
137+
virtual void call(const NDRDescT &NDRDesc, HostProfilingInfo *HPI) = 0;
138138
// Return pointer to the lambda object.
139139
// Used to extract captured variables.
140140
virtual char *getPtr() = 0;
@@ -149,7 +149,7 @@ class HostKernel : public HostKernelBase {
149149

150150
public:
151151
HostKernel(KernelType Kernel) : MKernel(Kernel) {}
152-
void call(const NDRDescT &NDRDesc) override {
152+
void call(const NDRDescT &NDRDesc, HostProfilingInfo *HPI) override {
153153
// adjust ND range for serial host:
154154
NDRDescT AdjustedRange;
155155
bool Adjust = false;
@@ -167,7 +167,11 @@ class HostKernel : public HostKernelBase {
167167
Adjust = true;
168168
}
169169
const NDRDescT &R = Adjust ? AdjustedRange : NDRDesc;
170+
if (HPI)
171+
HPI->start();
170172
runOnHost(R);
173+
if (HPI)
174+
HPI->end();
171175
}
172176

173177
char *getPtr() override { return reinterpret_cast<char *>(&MKernel); }

sycl/include/CL/sycl/detail/event_impl.hpp

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,11 +21,26 @@ class context;
2121
namespace detail {
2222
class context_impl;
2323
using ContextImplPtr = std::shared_ptr<cl::sycl::detail::context_impl>;
24+
class queue_impl;
25+
26+
// Profiling info for the host execution.
27+
class HostProfilingInfo {
28+
cl_ulong StartTime = 0;
29+
cl_ulong EndTime = 0;
30+
31+
public:
32+
cl_ulong getStartTime() const { return StartTime; }
33+
cl_ulong getEndTime() const { return EndTime; }
34+
35+
void start();
36+
void end();
37+
};
2438

2539
class event_impl {
2640
public:
2741
event_impl() = default;
2842
event_impl(cl_event CLEvent, const context &SyclContext);
43+
event_impl(std::shared_ptr<cl::sycl::detail::queue_impl> Queue);
2944

3045
// Threat all devices that don't support interoperability as host devices to
3146
// avoid attempts to call method get on such events.
@@ -65,11 +80,16 @@ class event_impl {
6580

6681
void setCommand(void *Command) { m_Command = Command; }
6782

83+
HostProfilingInfo *getHostProfilingInfo() {
84+
return m_HostProfilingInfo.get();
85+
}
86+
6887
private:
6988
RT::PiEvent m_Event = nullptr;
7089
ContextImplPtr m_Context;
7190
bool m_OpenCLInterop = false;
7291
bool m_HostEvent = true;
92+
std::unique_ptr<HostProfilingInfo> m_HostProfilingInfo;
7393
void *m_Command = nullptr;
7494
};
7595

sycl/source/detail/event_impl.cpp

Lines changed: 30 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@
1111
#include <CL/sycl/detail/queue_impl.hpp>
1212
#include <CL/sycl/detail/scheduler/scheduler.hpp>
1313

14+
#include <chrono>
15+
1416
namespace cl {
1517
namespace sycl {
1618
namespace detail {
@@ -81,6 +83,15 @@ event_impl::event_impl(cl_event CLEvent, const context &SyclContext)
8183
PI_CALL(RT::piEventRetain(m_Event));
8284
}
8385

86+
event_impl::event_impl(std::shared_ptr<cl::sycl::detail::queue_impl> Queue) {
87+
if (Queue->is_host() &&
88+
Queue->has_property<property::queue::enable_profiling>()) {
89+
m_HostProfilingInfo.reset(new HostProfilingInfo());
90+
if (!m_HostProfilingInfo)
91+
throw runtime_error("Out of host memory");
92+
}
93+
}
94+
8495
void event_impl::wait(
8596
std::shared_ptr<cl::sycl::detail::event_impl> Self) const {
8697

@@ -110,8 +121,9 @@ event_impl::get_profiling_info<info::event_profiling::command_submit>() const {
110121
return get_event_profiling_info<
111122
info::event_profiling::command_submit>::_(this->getHandleRef());
112123
}
113-
assert(!"Not implemented for host device.");
114-
return (cl_ulong)0;
124+
if (!m_HostProfilingInfo)
125+
throw invalid_object_error("Profiling info is not available.");
126+
return m_HostProfilingInfo->getStartTime();
115127
}
116128

117129
template <>
@@ -121,8 +133,9 @@ event_impl::get_profiling_info<info::event_profiling::command_start>() const {
121133
return get_event_profiling_info<info::event_profiling::command_start>::_(
122134
this->getHandleRef());
123135
}
124-
assert(!"Not implemented for host device.");
125-
return (cl_ulong)0;
136+
if (!m_HostProfilingInfo)
137+
throw invalid_object_error("Profiling info is not available.");
138+
return m_HostProfilingInfo->getStartTime();
126139
}
127140

128141
template <>
@@ -132,17 +145,17 @@ event_impl::get_profiling_info<info::event_profiling::command_end>() const {
132145
return get_event_profiling_info<info::event_profiling::command_end>::_(
133146
this->getHandleRef());
134147
}
135-
assert(!"Not implemented for host device.");
136-
return (cl_ulong)0;
148+
if (!m_HostProfilingInfo)
149+
throw invalid_object_error("Profiling info is not available.");
150+
return m_HostProfilingInfo->getEndTime();
137151
}
138152

139153
template <> cl_uint event_impl::get_info<info::event::reference_count>() const {
140154
if (!m_HostEvent) {
141155
return get_event_info<info::event::reference_count>::_(
142156
this->getHandleRef());
143157
}
144-
assert(!"Not implemented for host device.");
145-
return (cl_ulong)0;
158+
return 0;
146159
}
147160

148161
template <>
@@ -152,10 +165,18 @@ event_impl::get_info<info::event::command_execution_status>() const {
152165
return get_event_info<info::event::command_execution_status>::_(
153166
this->getHandleRef());
154167
}
155-
assert(!"Not implemented for host device.");
156168
return info::event_command_status::complete;
157169
}
158170

171+
static uint64_t getTimestamp() {
172+
auto ts = std::chrono::high_resolution_clock::now().time_since_epoch();
173+
return std::chrono::duration_cast<std::chrono::nanoseconds>(ts).count();
174+
}
175+
176+
void HostProfilingInfo::start() { StartTime = getTimestamp(); }
177+
178+
void HostProfilingInfo::end() { EndTime = getTimestamp(); }
179+
159180
} // namespace detail
160181
} // namespace sycl
161182
} // namespace cl

sycl/source/detail/scheduler/commands.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,7 @@ std::vector<RT::PiEvent> Command::prepareEvents(ContextImplPtr Context) {
125125
Command::Command(CommandType Type, QueueImplPtr Queue, bool UseExclusiveQueue)
126126
: MQueue(std::move(Queue)), MUseExclusiveQueue(UseExclusiveQueue),
127127
MType(Type), MEnqueued(false) {
128-
MEvent.reset(new detail::event_impl());
128+
MEvent.reset(new detail::event_impl(MQueue));
129129
MEvent->setCommand(this);
130130
MEvent->setContextImpl(detail::getSyclObjImpl(MQueue->get_context()));
131131
}
@@ -550,7 +550,7 @@ void DispatchNativeKernel(void *Blob) {
550550
void **NextArg = (void **)Blob + 1;
551551
for (detail::Requirement *Req : HostTask->MRequirements)
552552
Req->MData = *(NextArg++);
553-
HostTask->MHostKernel->call(HostTask->MNDRDesc);
553+
HostTask->MHostKernel->call(HostTask->MNDRDesc, nullptr);
554554
}
555555

556556
cl_int ExecCGCommand::enqueueImp() {
@@ -702,7 +702,8 @@ cl_int ExecCGCommand::enqueueImp() {
702702
}
703703
if (!RawEvents.empty())
704704
PI_CALL(RT::piEventsWait(RawEvents.size(), &RawEvents[0]));
705-
ExecKernel->MHostKernel->call(NDRDesc);
705+
ExecKernel->MHostKernel->call(NDRDesc,
706+
getEvent()->getHostProfilingInfo());
706707
return CL_SUCCESS;
707708
}
708709

sycl/test/basic_tests/event_profiling_info.cpp

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
// RUN: %clangxx -fsycl %s -o %t.out
22
//
3-
// Profiling info is not supported on host device so far.
4-
//
3+
// RUN: env SYCL_DEVICE_TYPE=HOST %t.out
54
// RUN: %CPU_RUN_PLACEHOLDER %t.out
65
// RUN: %GPU_RUN_PLACEHOLDER %t.out
76
// RUN: %ACC_RUN_PLACEHOLDER %t.out
@@ -14,6 +13,7 @@
1413
//===----------------------------------------------------------------------===//
1514

1615
#include <CL/sycl.hpp>
16+
#include <cassert>
1717

1818
using namespace cl;
1919

@@ -25,7 +25,15 @@ int main() {
2525
CGH.single_task<class EmptyKernel>([=]() {});
2626
});
2727

28-
Event.get_profiling_info<sycl::info::event_profiling::command_start>();
28+
auto Submit =
29+
Event.get_profiling_info<sycl::info::event_profiling::command_submit>();
30+
auto Start =
31+
Event.get_profiling_info<sycl::info::event_profiling::command_start>();
32+
auto End =
33+
Event.get_profiling_info<sycl::info::event_profiling::command_end>();
34+
35+
assert(Submit <= Start);
36+
assert(Start <= End);
2937

3038
bool Fail = sycl::info::event_command_status::complete !=
3139
Event.get_info<sycl::info::event::command_execution_status>();

0 commit comments

Comments
 (0)