diff --git a/sycl/include/sycl/detail/sycl_mem_obj_t.hpp b/sycl/include/sycl/detail/sycl_mem_obj_t.hpp index 6d6a852ce619a..8b731ce516854 100644 --- a/sycl/include/sycl/detail/sycl_mem_obj_t.hpp +++ b/sycl/include/sycl/detail/sycl_mem_obj_t.hpp @@ -312,6 +312,8 @@ class __SYCL_EXPORT SYCLMemObjT : public SYCLMemObjI { bool isInterop() const; + bool isHostPointerReadOnly() const { return MHostPtrReadOnly; } + protected: // An allocateMem helper that determines which host ptr to use void determineHostPtr(const ContextImplPtr &Context, bool InitFromUserData, diff --git a/sycl/source/detail/memory_manager.cpp b/sycl/source/detail/memory_manager.cpp index 5e104bb23b99e..c9dbe5aeeca28 100644 --- a/sycl/source/detail/memory_manager.cpp +++ b/sycl/source/detail/memory_manager.cpp @@ -276,16 +276,12 @@ void *MemoryManager::allocate(ContextImplPtr TargetContext, SYCLMemObjI *MemObj, void *MemoryManager::allocateHostMemory(SYCLMemObjI *MemObj, void *UserPtr, bool HostPtrReadOnly, size_t Size, const sycl::property_list &) { - // Can return user pointer directly if it points to writable memory. - if (UserPtr && HostPtrReadOnly == false) + // Can return user pointer directly if it is not a nullptr. + if (UserPtr) return UserPtr; - void *NewMem = MemObj->allocateHostMem(); - // Need to initialize new memory if user provides pointer to read only - // memory. - if (UserPtr && HostPtrReadOnly == true) - std::memcpy((char *)NewMem, (char *)UserPtr, Size); - return NewMem; + return MemObj->allocateHostMem(); + ; } void *MemoryManager::allocateInteropMemObject( @@ -312,8 +308,7 @@ static RT::PiMemFlags getMemObjCreationFlags(void *UserPtr, RT::PiMemFlags Result = HostPtrReadOnly ? PI_MEM_ACCESS_READ_ONLY : PI_MEM_FLAGS_ACCESS_RW; if (UserPtr) - Result |= HostPtrReadOnly ? PI_MEM_FLAGS_HOST_PTR_COPY - : PI_MEM_FLAGS_HOST_PTR_USE; + Result |= PI_MEM_FLAGS_HOST_PTR_USE; return Result; } diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index b18067bb0bf30..a121e09082c54 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -833,10 +833,11 @@ const char *Command::getBlockReason() const { AllocaCommandBase::AllocaCommandBase(CommandType Type, QueueImplPtr Queue, Requirement Req, - AllocaCommandBase *LinkedAllocaCmd) + AllocaCommandBase *LinkedAllocaCmd, + bool IsConst) : Command(Type, Queue), MLinkedAllocaCmd(LinkedAllocaCmd), - MIsLeaderAlloca(nullptr == LinkedAllocaCmd), MRequirement(std::move(Req)), - MReleaseCmd(Queue, this) { + MIsLeaderAlloca(nullptr == LinkedAllocaCmd), MIsConst(IsConst), + MRequirement(std::move(Req)), MReleaseCmd(Queue, this) { MRequirement.MAccessMode = access::mode::read_write; emitInstrumentationDataProxy(); } @@ -868,9 +869,9 @@ bool AllocaCommandBase::supportsPostEnqueueCleanup() const { return false; } AllocaCommand::AllocaCommand(QueueImplPtr Queue, Requirement Req, bool InitFromUserData, - AllocaCommandBase *LinkedAllocaCmd) + AllocaCommandBase *LinkedAllocaCmd, bool IsConst) : AllocaCommandBase(CommandType::ALLOCA, std::move(Queue), std::move(Req), - LinkedAllocaCmd), + LinkedAllocaCmd, IsConst), MInitFromUserData(InitFromUserData) { // Node event must be created before the dependent edge is added to this node, // so this call must be before the addDep() call. @@ -949,7 +950,7 @@ AllocaSubBufCommand::AllocaSubBufCommand(QueueImplPtr Queue, Requirement Req, std::vector &ToCleanUp) : AllocaCommandBase(CommandType::ALLOCA_SUB_BUF, std::move(Queue), std::move(Req), - /*LinkedAllocaCmd*/ nullptr), + /*LinkedAllocaCmd*/ nullptr, /*IsConst*/ false), MParentAlloca(ParentAlloca) { // Node event must be created before the dependent edge // is added to this node, so this call must be before diff --git a/sycl/source/detail/scheduler/commands.hpp b/sycl/source/detail/scheduler/commands.hpp index b1c65612dfc46..5dbbf9160b445 100644 --- a/sycl/source/detail/scheduler/commands.hpp +++ b/sycl/source/detail/scheduler/commands.hpp @@ -364,7 +364,7 @@ class ReleaseCommand : public Command { class AllocaCommandBase : public Command { public: AllocaCommandBase(CommandType Type, QueueImplPtr Queue, Requirement Req, - AllocaCommandBase *LinkedAllocaCmd); + AllocaCommandBase *LinkedAllocaCmd, bool IsConst); ReleaseCommand *getReleaseCmd() { return &MReleaseCmd; } @@ -394,6 +394,8 @@ class AllocaCommandBase : public Command { /// Indicates that the command owns memory allocation in case of connected /// alloca command. bool MIsLeaderAlloca = true; + // Indicates that the data in this allocation must not be modified + bool MIsConst = false; protected: Requirement MRequirement; @@ -406,7 +408,8 @@ class AllocaCommand : public AllocaCommandBase { public: AllocaCommand(QueueImplPtr Queue, Requirement Req, bool InitFromUserData = true, - AllocaCommandBase *LinkedAllocaCmd = nullptr); + AllocaCommandBase *LinkedAllocaCmd = nullptr, + bool IsConst = false); void *getMemAllocation() const final { return MMemAllocation; } void printDot(std::ostream &Stream) const final; diff --git a/sycl/source/detail/scheduler/graph_builder.cpp b/sycl/source/detail/scheduler/graph_builder.cpp index 81283028d7c96..8fa6a54991d94 100644 --- a/sycl/source/detail/scheduler/graph_builder.cpp +++ b/sycl/source/detail/scheduler/graph_builder.cpp @@ -627,17 +627,18 @@ DepDesc Scheduler::GraphBuilder::findDepForRecord(Command *Cmd, // The function searches for the alloca command matching context and // requirement. -AllocaCommandBase * -Scheduler::GraphBuilder::findAllocaForReq(MemObjRecord *Record, - const Requirement *Req, - const ContextImplPtr &Context) { - auto IsSuitableAlloca = [&Context, Req](AllocaCommandBase *AllocaCmd) { +AllocaCommandBase *Scheduler::GraphBuilder::findAllocaForReq( + MemObjRecord *Record, const Requirement *Req, const ContextImplPtr &Context, + bool AllowConst) { + auto IsSuitableAlloca = [&Context, Req, + AllowConst](AllocaCommandBase *AllocaCmd) { bool Res = sameCtx(AllocaCmd->getQueue()->getContextImplPtr(), Context); if (IsSuitableSubReq(Req)) { const Requirement *TmpReq = AllocaCmd->getRequirement(); Res &= AllocaCmd->getType() == Command::CommandType::ALLOCA_SUB_BUF; Res &= TmpReq->MOffsetInBytes == Req->MOffsetInBytes; Res &= TmpReq->MSYCLMemObj->getSize() == Req->MSYCLMemObj->getSize(); + Res &= AllowConst || !AllocaCmd->MIsConst; } return Res; }; @@ -668,8 +669,8 @@ AllocaCommandBase *Scheduler::GraphBuilder::getOrCreateAllocaForReq( MemObjRecord *Record, const Requirement *Req, QueueImplPtr Queue, std::vector &ToEnqueue) { - AllocaCommandBase *AllocaCmd = - findAllocaForReq(Record, Req, Queue->getContextImplPtr()); + AllocaCommandBase *AllocaCmd = findAllocaForReq( + Record, Req, Queue->getContextImplPtr(), /*AllowConst=*/false); if (!AllocaCmd) { std::vector ToCleanUp; @@ -722,7 +723,8 @@ AllocaCommandBase *Scheduler::GraphBuilder::getOrCreateAllocaForReq( Scheduler::getInstance().getDefaultHostQueue(); AllocaCommand *HostAllocaCmd = new AllocaCommand( DefaultHostQueue, FullReq, true /* InitFromUserData */, - nullptr /* LinkedAllocaCmd */); + nullptr /* LinkedAllocaCmd */, + MemObj->isHostPointerReadOnly() /* IsConst */); Record->MAllocaCommands.push_back(HostAllocaCmd); Record->MWriteLeaves.push_back(HostAllocaCmd, ToEnqueue); ++(HostAllocaCmd->MLeafCounter); @@ -754,8 +756,8 @@ AllocaCommandBase *Scheduler::GraphBuilder::getOrCreateAllocaForReq( Queue->is_host() ? checkHostUnifiedMemory(Record->MCurContext) : HostUnifiedMemory; if (PinnedHostMemory || HostUnifiedMemoryOnNonHostDevice) { - AllocaCommandBase *LinkedAllocaCmdCand = - findAllocaForReq(Record, Req, Record->MCurContext); + AllocaCommandBase *LinkedAllocaCmdCand = findAllocaForReq( + Record, Req, Record->MCurContext, /*AllowConst=*/false); // Cannot setup link if candidate is linked already if (LinkedAllocaCmdCand && diff --git a/sycl/source/detail/scheduler/scheduler.hpp b/sycl/source/detail/scheduler/scheduler.hpp index 657db109b5f18..ed7890b5dc412 100644 --- a/sycl/source/detail/scheduler/scheduler.hpp +++ b/sycl/source/detail/scheduler/scheduler.hpp @@ -618,7 +618,8 @@ class Scheduler { /// Searches for suitable alloca in memory record. AllocaCommandBase *findAllocaForReq(MemObjRecord *Record, const Requirement *Req, - const ContextImplPtr &Context); + const ContextImplPtr &Context, + bool AllowConst = true); friend class Command;