Skip to content

Commit ef666b1

Browse files
author
iclsrc
committed
Merge from 'sycl' to 'sycl-web' (#1)
2 parents 97d52a1 + 835a05d commit ef666b1

File tree

11 files changed

+723
-271
lines changed

11 files changed

+723
-271
lines changed

sycl/doc/extensions/USM/cl_intel_unified_shared_memory.asciidoc

Lines changed: 228 additions & 70 deletions
Large diffs are not rendered by default.

sycl/include/CL/sycl/detail/queue_impl.hpp

Lines changed: 198 additions & 95 deletions
Large diffs are not rendered by default.

sycl/include/CL/sycl/info/info_desc.hpp

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -276,9 +276,7 @@ template <typename T, T param> class param_traits {};
276276

277277
#include <CL/sycl/info/program_traits.def>
278278

279-
PARAM_TRAITS_SPEC(queue, reference_count, cl_uint)
280-
PARAM_TRAITS_SPEC(queue, context, cl::sycl::context)
281-
PARAM_TRAITS_SPEC(queue, device, cl::sycl::device)
279+
#include <CL/sycl/info/queue_traits.def>
282280

283281
#undef PARAM_TRAITS_SPEC
284282
#undef PARAM_TRAITS_SPEC_WITH_INPUT
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
PARAM_TRAITS_SPEC(queue, reference_count, cl_uint)
2+
PARAM_TRAITS_SPEC(queue, context, cl::sycl::context)
3+
PARAM_TRAITS_SPEC(queue, device, cl::sycl::device)
4+

sycl/include/CL/sycl/property_list.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ template <class T> class PropertyHolder {
7878

7979
const T &getProp() const {
8080
assert(true == m_Initialized && "Property was not set!");
81-
return *(T *)m_Mem;
81+
return *(const T *)m_Mem;
8282
}
8383
bool isInitialized() const { return m_Initialized; }
8484

sycl/include/CL/sycl/queue.hpp

Lines changed: 197 additions & 78 deletions
Large diffs are not rendered by default.

sycl/source/detail/queue_impl.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ namespace detail {
2222
template <> cl_uint queue_impl::get_info<info::queue::reference_count>() const {
2323
RT::PiResult result = PI_SUCCESS;
2424
if (!is_host())
25-
PI_CALL(piQueueGetInfo)(m_CommandQueue, PI_QUEUE_INFO_REFERENCE_COUNT,
25+
PI_CALL(piQueueGetInfo)(MCommandQueue, PI_QUEUE_INFO_REFERENCE_COUNT,
2626
sizeof(result), &result, nullptr);
2727
return result;
2828
}
@@ -35,7 +35,7 @@ template <> device queue_impl::get_info<info::queue::device>() const {
3535
return get_device();
3636
}
3737

38-
event queue_impl::memset(std::shared_ptr<detail::queue_impl> Impl, void *Ptr,
38+
event queue_impl::memset(shared_ptr_class<detail::queue_impl> Impl, void *Ptr,
3939
int Value, size_t Count) {
4040
context Context = get_context();
4141
RT::PiEvent Event = nullptr;
@@ -47,7 +47,7 @@ event queue_impl::memset(std::shared_ptr<detail::queue_impl> Impl, void *Ptr,
4747
return event(pi::cast<cl_event>(Event), Context);
4848
}
4949

50-
event queue_impl::memcpy(std::shared_ptr<detail::queue_impl> Impl, void *Dest,
50+
event queue_impl::memcpy(shared_ptr_class<detail::queue_impl> Impl, void *Dest,
5151
const void *Src, size_t Count) {
5252
context Context = get_context();
5353
RT::PiEvent Event = nullptr;

sycl/source/detail/scheduler/graph_builder.cpp

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,7 @@ Scheduler::GraphBuilder::getOrInsertMemObjRecord(const QueueImplPtr &Queue,
123123
MemObject->MRecord.reset(new MemObjRecord{/*MAllocaCommands*/ {},
124124
/*MReadLeaves*/ {},
125125
/*MWriteLeaves*/ {},
126-
Queue->get_context_impl(),
126+
Queue->getContextImplPtr(),
127127
/*MMemModified*/ false});
128128

129129
MMemObjs.push_back(MemObject);
@@ -162,7 +162,7 @@ void Scheduler::GraphBuilder::AddNodeToLeaves(MemObjRecord *Record,
162162
UpdateHostRequirementCommand *Scheduler::GraphBuilder::insertUpdateHostReqCmd(
163163
MemObjRecord *Record, Requirement *Req, const QueueImplPtr &Queue) {
164164
AllocaCommandBase *AllocaCmd =
165-
findAllocaForReq(Record, Req, Queue->get_context_impl());
165+
findAllocaForReq(Record, Req, Queue->getContextImplPtr());
166166
assert(AllocaCmd && "There must be alloca for requirement!");
167167
UpdateHostRequirementCommand *UpdateCommand =
168168
new UpdateHostRequirementCommand(Queue, *Req, AllocaCmd, &Req->MData);
@@ -171,7 +171,7 @@ UpdateHostRequirementCommand *Scheduler::GraphBuilder::insertUpdateHostReqCmd(
171171
const Requirement *StoredReq = UpdateCommand->getRequirement();
172172

173173
std::set<Command *> Deps =
174-
findDepsForReq(Record, Req, Queue->get_context_impl());
174+
findDepsForReq(Record, Req, Queue->getContextImplPtr());
175175
for (Command *Dep : Deps) {
176176
UpdateCommand->addDep(DepDesc{Dep, StoredReq, AllocaCmd});
177177
Dep->addUser(UpdateCommand);
@@ -218,7 +218,7 @@ Command *Scheduler::GraphBuilder::insertMemoryMove(MemObjRecord *Record,
218218
throw runtime_error("Out of host memory");
219219

220220
std::set<Command *> Deps =
221-
findDepsForReq(Record, Req, Queue->get_context_impl());
221+
findDepsForReq(Record, Req, Queue->getContextImplPtr());
222222
Deps.insert(AllocaCmdDst);
223223
// Get parent allocation of sub buffer to perform full copy of whole buffer
224224
if (IsSuitableSubReq(Req)) {
@@ -237,7 +237,7 @@ Command *Scheduler::GraphBuilder::insertMemoryMove(MemObjRecord *Record,
237237
// current context, need to find a parent alloca command for it (it must be
238238
// there)
239239
auto IsSuitableAlloca = [Record, Req](AllocaCommandBase *AllocaCmd) {
240-
bool Res = sameCtx(AllocaCmd->getQueue()->get_context_impl(),
240+
bool Res = sameCtx(AllocaCmd->getQueue()->getContextImplPtr(),
241241
Record->MCurContext) &&
242242
// Looking for a parent buffer alloca command
243243
AllocaCmd->getType() == Command::CommandType::ALLOCA;
@@ -279,7 +279,7 @@ Command *Scheduler::GraphBuilder::insertMemoryMove(MemObjRecord *Record,
279279
}
280280
UpdateLeaves(Deps, Record, access::mode::read_write);
281281
AddNodeToLeaves(Record, NewCmd, access::mode::read_write);
282-
Record->MCurContext = Queue->get_context_impl();
282+
Record->MCurContext = Queue->getContextImplPtr();
283283
return NewCmd;
284284
}
285285

@@ -298,7 +298,7 @@ Command *Scheduler::GraphBuilder::addCopyBack(Requirement *Req) {
298298
return nullptr;
299299

300300
std::set<Command *> Deps =
301-
findDepsForReq(Record, Req, HostQueue->get_context_impl());
301+
findDepsForReq(Record, Req, HostQueue->getContextImplPtr());
302302
AllocaCommandBase *SrcAllocaCmd =
303303
findAllocaForReq(Record, Req, Record->MCurContext);
304304

@@ -336,7 +336,7 @@ Command *Scheduler::GraphBuilder::addHostAccessor(Requirement *Req) {
336336
AllocaCommandBase *HostAllocaCmd =
337337
getOrCreateAllocaForReq(Record, Req, HostQueue);
338338

339-
if (!sameCtx(HostAllocaCmd->getQueue()->get_context_impl(),
339+
if (!sameCtx(HostAllocaCmd->getQueue()->getContextImplPtr(),
340340
Record->MCurContext))
341341
insertMemoryMove(Record, Req, HostQueue);
342342

@@ -418,7 +418,7 @@ Scheduler::GraphBuilder::findDepsForReq(MemObjRecord *Record, Requirement *Req,
418418
// Going through copying memory between contexts is not supported.
419419
if (Dep.MDepCommand)
420420
CanBypassDep &=
421-
sameCtx(Context, Dep.MDepCommand->getQueue()->get_context_impl());
421+
sameCtx(Context, Dep.MDepCommand->getQueue()->getContextImplPtr());
422422

423423
if (!CanBypassDep) {
424424
RetDeps.insert(DepCmd);
@@ -441,7 +441,7 @@ Scheduler::GraphBuilder::findDepsForReq(MemObjRecord *Record, Requirement *Req,
441441
AllocaCommandBase *Scheduler::GraphBuilder::findAllocaForReq(
442442
MemObjRecord *Record, Requirement *Req, const ContextImplPtr &Context) {
443443
auto IsSuitableAlloca = [&Context, Req](AllocaCommandBase *AllocaCmd) {
444-
bool Res = sameCtx(AllocaCmd->getQueue()->get_context_impl(), Context);
444+
bool Res = sameCtx(AllocaCmd->getQueue()->getContextImplPtr(), Context);
445445
if (IsSuitableSubReq(Req)) {
446446
const Requirement *TmpReq = AllocaCmd->getRequirement();
447447
Res &= TmpReq->MOffsetInBytes == Req->MOffsetInBytes;
@@ -462,7 +462,7 @@ AllocaCommandBase *Scheduler::GraphBuilder::getOrCreateAllocaForReq(
462462
MemObjRecord *Record, Requirement *Req, QueueImplPtr Queue) {
463463

464464
AllocaCommandBase *AllocaCmd =
465-
findAllocaForReq(Record, Req, Queue->get_context_impl());
465+
findAllocaForReq(Record, Req, Queue->getContextImplPtr());
466466

467467
if (!AllocaCmd) {
468468
if (IsSuitableSubReq(Req)) {
@@ -477,7 +477,7 @@ AllocaCommandBase *Scheduler::GraphBuilder::getOrCreateAllocaForReq(
477477
auto *ParentAlloca =
478478
getOrCreateAllocaForReq(Record, &ParentRequirement, Queue);
479479
AllocaCmd = new AllocaSubBufCommand(Queue, *Req, ParentAlloca);
480-
UpdateLeaves(findDepsForReq(Record, Req, Queue->get_context_impl()),
480+
UpdateLeaves(findDepsForReq(Record, Req, Queue->getContextImplPtr()),
481481
Record, access::mode::read_write);
482482
} else {
483483

@@ -530,7 +530,7 @@ AllocaCommandBase *Scheduler::GraphBuilder::getOrCreateAllocaForReq(
530530
AllocaCmd->MIsActive = false;
531531
} else {
532532
LinkedAllocaCmd->MIsActive = false;
533-
Record->MCurContext = Queue->get_context_impl();
533+
Record->MCurContext = Queue->getContextImplPtr();
534534
}
535535
}
536536
}
@@ -576,7 +576,7 @@ Scheduler::GraphBuilder::addCG(std::unique_ptr<detail::CG> CommandGroup,
576576
AllocaCommandBase *AllocaCmd = getOrCreateAllocaForReq(Record, Req, Queue);
577577
// If there is alloca command we need to check if the latest memory is in
578578
// required context.
579-
if (!sameCtx(Queue->get_context_impl(), Record->MCurContext)) {
579+
if (!sameCtx(Queue->getContextImplPtr(), Record->MCurContext)) {
580580
// Cannot directly copy memory from OpenCL device to OpenCL device -
581581
// create two copies: device->host and host->device.
582582
if (!Queue->is_host() && !Record->MCurContext->is_host())
@@ -585,7 +585,7 @@ Scheduler::GraphBuilder::addCG(std::unique_ptr<detail::CG> CommandGroup,
585585
insertMemoryMove(Record, Req, Queue);
586586
}
587587
std::set<Command *> Deps =
588-
findDepsForReq(Record, Req, Queue->get_context_impl());
588+
findDepsForReq(Record, Req, Queue->getContextImplPtr());
589589

590590
for (Command *Dep : Deps)
591591
NewCmd->addDep(DepDesc{Dep, Req, AllocaCmd});

sycl/source/detail/usm/usm_impl.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#include <CL/sycl/detail/aligned_allocator.hpp>
1111
#include <CL/sycl/detail/os_util.hpp>
1212
#include <CL/sycl/detail/pi.hpp>
13+
#include <CL/sycl/detail/queue_impl.hpp>
1314
#include <CL/sycl/device.hpp>
1415
#include <CL/sycl/usm.hpp>
1516

sycl/source/ordered_queue.cpp

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -50,9 +50,8 @@ ordered_queue::ordered_queue(cl_command_queue clQueue,
5050
throw runtime_error(
5151
"Failed to build a sycl ordered queue from a cl OOO queue.");
5252

53-
impl =
54-
std::make_shared<detail::queue_impl>(clQueue,
55-
detail::getSyclObjImpl(syclContext), asyncHandler);
53+
impl = std::make_shared<detail::queue_impl>(
54+
m_CommandQueue, detail::getSyclObjImpl(syclContext), asyncHandler);
5655
}
5756

5857
} // namespace sycl

sycl/source/queue.cpp

Lines changed: 73 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,12 @@
66
//
77
//===----------------------------------------------------------------------===//
88

9+
#include <CL/sycl/detail/queue_impl.hpp>
10+
#include <CL/sycl/event.hpp>
911
#include <CL/sycl/exception_list.hpp>
12+
#include <CL/sycl/handler.hpp>
1013
#include <CL/sycl/queue.hpp>
14+
#include <CL/sycl/stl.hpp>
1115

1216
#include <algorithm>
1317

@@ -37,10 +41,76 @@ queue::queue(const device &syclDevice, const async_handler &asyncHandler,
3741

3842
queue::queue(cl_command_queue clQueue, const context &syclContext,
3943
const async_handler &asyncHandler) {
40-
impl =
41-
std::make_shared<detail::queue_impl>(clQueue,
42-
detail::getSyclObjImpl(syclContext), asyncHandler);
44+
impl = std::make_shared<detail::queue_impl>(
45+
detail::pi::cast<detail::RT::PiQueue>(clQueue),
46+
detail::getSyclObjImpl(syclContext), asyncHandler);
47+
}
48+
49+
queue::queue(const context &syclContext, const device_selector &deviceSelector,
50+
const property_list &propList)
51+
: queue(syclContext, deviceSelector,
52+
detail::getSyclObjImpl(syclContext)->get_async_handler(),
53+
propList) {}
54+
55+
cl_command_queue queue::get() const { return impl->get(); }
56+
57+
context queue::get_context() const { return impl->get_context(); }
58+
59+
device queue::get_device() const { return impl->get_device(); }
60+
61+
bool queue::is_host() const { return impl->is_host(); }
62+
63+
void queue::wait() { impl->wait(); }
64+
65+
void queue::wait_and_throw() { impl->wait_and_throw(); }
66+
67+
void queue::throw_asynchronous() { impl->throw_asynchronous(); }
68+
69+
event queue::memset(void *ptr, int value, size_t count) {
70+
return impl->memset(impl, ptr, value, count);
71+
}
72+
73+
event queue::memcpy(void *dest, const void *src, size_t count) {
74+
return impl->memcpy(impl, dest, src, count);
75+
}
76+
77+
event queue::mem_advise(const void *ptr, size_t length, int advice) {
78+
return impl->mem_advise(ptr, length, advice);
79+
}
80+
81+
event queue::submit_impl(function_class<void(handler &)> CGH) {
82+
return impl->submit(CGH, impl);
83+
}
84+
85+
event queue::submit_impl(function_class<void(handler &)> CGH,
86+
queue secondQueue) {
87+
return impl->submit(CGH, impl, secondQueue.impl);
88+
}
89+
90+
template <info::queue param>
91+
typename info::param_traits<info::queue, param>::return_type
92+
queue::get_info() const {
93+
return impl->get_info<param>();
4394
}
4495

96+
#define PARAM_TRAITS_SPEC(param_type, param, ret_type) \
97+
template ret_type queue::get_info<info::param_type::param>() const;
98+
99+
#include <CL/sycl/info/queue_traits.def>
100+
101+
#undef PARAM_TRAITS_SPEC
102+
103+
template <typename propertyT> bool queue::has_property() const {
104+
return impl->has_property<propertyT>();
105+
}
106+
107+
template <typename propertyT> propertyT queue::get_property() const {
108+
return impl->get_property<propertyT>();
109+
}
110+
111+
template bool queue::has_property<property::queue::enable_profiling>() const;
112+
template property::queue::enable_profiling
113+
queue::get_property<property::queue::enable_profiling>() const;
114+
45115
} // namespace sycl
46116
} // namespace cl

0 commit comments

Comments
 (0)