diff --git a/sycl/include/CL/sycl/detail/circular_buffer.hpp b/sycl/include/CL/sycl/detail/circular_buffer.hpp new file mode 100644 index 0000000000000..49879a050c5d8 --- /dev/null +++ b/sycl/include/CL/sycl/detail/circular_buffer.hpp @@ -0,0 +1,98 @@ +//==---------------- circular_buffer.hpp - Circular buffer -----------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#pragma once + +#include + +#include +#include + +__SYCL_INLINE namespace cl { +namespace sycl { +namespace detail { + +// A partial implementation of a circular buffer: once its capacity is full, +// new data overwrites the old. +template class CircularBuffer { +public: + explicit CircularBuffer(size_t Capacity) : MCapacity{Capacity} {}; + + using value_type = T; + using pointer = T *; + using const_pointer = const T *; + using reference = T &; + using const_reference = const T &; + + using iterator = typename std::deque::iterator; + using const_iterator = typename std::deque::const_iterator; + + iterator begin() { return MValues.begin(); } + + const_iterator begin() const { return MValues.begin(); } + + iterator end() { return MValues.end(); } + + const_iterator end() const { return MValues.end(); } + + reference front() { return MValues.front(); } + + const_reference front() const { return MValues.front(); } + + reference back() { return MValues.back(); } + + const_reference back() const { return MValues.back(); } + + reference operator[](size_t Idx) { return MValues[Idx]; } + + const_reference operator[](size_t Idx) const { return MValues[Idx]; } + + size_t size() const { return MValues.size(); } + + size_t capacity() const { return MCapacity; } + + bool empty() const { return MValues.empty(); }; + + bool full() const { return MValues.size() == MCapacity; }; + + void push_back(T Val) { + if (MValues.size() == MCapacity) + MValues.pop_front(); + MValues.push_back(std::move(Val)); + } + + void push_front(T Val) { + if (MValues.size() == MCapacity) + MValues.pop_back(); + MValues.push_front(std::move(Val)); + } + + void pop_back() { MValues.pop_back(); } + + void pop_front() { MValues.pop_front(); } + + void erase(const_iterator Pos) { MValues.erase(Pos); } + + void erase(const_iterator First, const_iterator Last) { + MValues.erase(First, Last); + } + + void clear() { MValues.clear(); } + +private: + // Deque is used as the underlying container for double-ended push/pop + // operations and built-in iterator support. Frequent memory allocations + // and deallocations are a concern, switching to an array/vector might be a + // worthwhile optimization. + std::deque MValues; + const size_t MCapacity; +}; + +} // namespace detail +} // namespace sycl +} // namespace cl diff --git a/sycl/include/CL/sycl/detail/scheduler/scheduler.hpp b/sycl/include/CL/sycl/detail/scheduler/scheduler.hpp index d936732e4566f..191aeb17ef465 100644 --- a/sycl/include/CL/sycl/detail/scheduler/scheduler.hpp +++ b/sycl/include/CL/sycl/detail/scheduler/scheduler.hpp @@ -9,6 +9,7 @@ #pragma once #include +#include #include #include @@ -32,21 +33,25 @@ using ContextImplPtr = std::shared_ptr; // The MemObjRecord is created for each memory object used in command // groups. There should be only one MemObjRecord for SYCL memory object. struct MemObjRecord { + MemObjRecord(ContextImplPtr CurContext, size_t LeafLimit) + : MReadLeaves{LeafLimit}, MWriteLeaves{LeafLimit}, MCurContext{ + CurContext} {} + // Contains all allocation commands for the memory object. std::vector MAllocaCommands; // Contains latest read only commands working with memory object. - std::vector MReadLeaves; + CircularBuffer MReadLeaves; // Contains latest write commands working with memory object. - std::vector MWriteLeaves; + CircularBuffer MWriteLeaves; // The context which has the latest state of the memory object. ContextImplPtr MCurContext; // The flag indicates that the content of the memory object was/will be // modified. Used while deciding if copy back needed. - bool MMemModified; + bool MMemModified = false; }; class Scheduler { @@ -165,6 +170,9 @@ class Scheduler { std::set findDepsForReq(MemObjRecord *Record, Requirement *Req, const ContextImplPtr &Context); + // Finds a command dependency corresponding to the record + DepDesc findDepForRecord(Command *Cmd, MemObjRecord *Record); + // Searches for suitable alloca in memory record. AllocaCommandBase *findAllocaForReq(MemObjRecord *Record, Requirement *Req, const ContextImplPtr &Context); diff --git a/sycl/source/detail/scheduler/graph_builder.cpp b/sycl/source/detail/scheduler/graph_builder.cpp index df761b5fad610..6940b4cb9de8b 100644 --- a/sycl/source/detail/scheduler/graph_builder.cpp +++ b/sycl/source/detail/scheduler/graph_builder.cpp @@ -120,11 +120,9 @@ Scheduler::GraphBuilder::getOrInsertMemObjRecord(const QueueImplPtr &Queue, if (nullptr != Record) return Record; - MemObject->MRecord.reset(new MemObjRecord{/*MAllocaCommands*/ {}, - /*MReadLeaves*/ {}, - /*MWriteLeaves*/ {}, - Queue->getContextImplPtr(), - /*MMemModified*/ false}); + const size_t LeafLimit = 8; + MemObject->MRecord.reset( + new MemObjRecord{Queue->getContextImplPtr(), LeafLimit}); MMemObjs.push_back(MemObject); return MemObject->MRecord.get(); @@ -153,10 +151,22 @@ void Scheduler::GraphBuilder::UpdateLeaves(const std::set &Cmds, void Scheduler::GraphBuilder::AddNodeToLeaves(MemObjRecord *Record, Command *Cmd, access::mode AccessMode) { - if (AccessMode == access::mode::read) - Record->MReadLeaves.push_back(Cmd); - else - Record->MWriteLeaves.push_back(Cmd); + CircularBuffer &Leaves{AccessMode == access::mode::read + ? Record->MReadLeaves + : Record->MWriteLeaves}; + if (Leaves.full()) { + Command *OldLeaf = Leaves.front(); + // TODO this is a workaround for duplicate leaves, remove once fixed + if (OldLeaf == Cmd) + return; + // Add the old leaf as a dependency for the new one by duplicating one of + // the requirements for the current record + DepDesc Dep = findDepForRecord(Cmd, Record); + Dep.MDepCommand = OldLeaf; + Cmd->addDep(Dep); + OldLeaf->addUser(Cmd); + } + Leaves.push_back(Cmd); } UpdateHostRequirementCommand *Scheduler::GraphBuilder::insertUpdateHostReqCmd( @@ -389,9 +399,8 @@ Scheduler::GraphBuilder::findDepsForReq(MemObjRecord *Record, Requirement *Req, std::set Visited; const bool ReadOnlyReq = Req->MAccessMode == access::mode::read; - std::vector ToAnalyze; - - ToAnalyze = Record->MWriteLeaves; + std::vector ToAnalyze{Record->MWriteLeaves.begin(), + Record->MWriteLeaves.end()}; if (!ReadOnlyReq) ToAnalyze.insert(ToAnalyze.begin(), Record->MReadLeaves.begin(), @@ -436,6 +445,19 @@ Scheduler::GraphBuilder::findDepsForReq(MemObjRecord *Record, Requirement *Req, return RetDeps; } +// A helper function for finding a command dependency on a specific memory +// object +DepDesc Scheduler::GraphBuilder::findDepForRecord(Command *Cmd, + MemObjRecord *Record) { + for (const DepDesc &DD : Cmd->MDeps) { + if (getMemObjRecord(DD.MDepRequirement->MSYCLMemObj) == Record) { + return DD; + } + } + assert(false && "No dependency found for a leaf of the record"); + return {nullptr, nullptr, nullptr}; +} + // The function searches for the alloca command matching context and // requirement. AllocaCommandBase *Scheduler::GraphBuilder::findAllocaForReq( diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp index 8e3fe9a4cbcfe..c3352907948fa 100644 --- a/sycl/source/detail/scheduler/scheduler.cpp +++ b/sycl/source/detail/scheduler/scheduler.cpp @@ -146,7 +146,7 @@ EventImplPtr Scheduler::addHostAccessor(Requirement *Req) { void Scheduler::releaseHostAccessor(Requirement *Req) { Req->MBlockedCmd->MCanEnqueue = true; MemObjRecord* Record = Req->MSYCLMemObj->MRecord.get(); - auto EnqueueLeaves = [](std::vector &Leaves) { + auto EnqueueLeaves = [](CircularBuffer &Leaves) { for (Command *Cmd : Leaves) { EnqueueResultT Res; bool Enqueued = GraphProcessor::enqueueCommand(Cmd, Res); diff --git a/sycl/test/basic_tests/circular_buffer.cpp b/sycl/test/basic_tests/circular_buffer.cpp new file mode 100644 index 0000000000000..53387df0936a1 --- /dev/null +++ b/sycl/test/basic_tests/circular_buffer.cpp @@ -0,0 +1,47 @@ +// RUN: %clangxx -fsycl %s -o %t.out +// RUN: %t.out + +#include + +#include +#include +#include + +// This test contains basic checks for cl::sycl::detail::CircularBuffer +void checkEquality(const cl::sycl::detail::CircularBuffer &CB, + const std::vector &V) { + assert(std::equal(CB.begin(), CB.end(), V.begin())); +} + +int main() { + const size_t Capacity = 6; + cl::sycl::detail::CircularBuffer CB{Capacity}; + assert(CB.capacity() == Capacity); + assert(CB.empty()); + + int nextValue = 0; + for (; nextValue < Capacity; ++nextValue) { + assert(CB.size() == nextValue); + CB.push_back(nextValue); + } + assert(CB.full() && CB.size() == CB.capacity()); + checkEquality(CB, {0, 1, 2, 3, 4, 5}); + + CB.push_back(nextValue++); + checkEquality(CB, {1, 2, 3, 4, 5, 6}); + CB.push_front(nextValue++); + checkEquality(CB, {7, 1, 2, 3, 4, 5}); + + assert(CB.front() == 7); + assert(CB.back() == 5); + + CB.erase(CB.begin() + 2); + checkEquality(CB, {7, 1, 3, 4, 5}); + CB.erase(CB.begin(), CB.begin() + 2); + checkEquality(CB, {3, 4, 5}); + + CB.pop_back(); + checkEquality(CB, {3, 4}); + CB.pop_front(); + checkEquality(CB, {4}); +} diff --git a/sycl/test/scheduler/LeafLimit.cpp b/sycl/test/scheduler/LeafLimit.cpp new file mode 100644 index 0000000000000..f4019a457a9d3 --- /dev/null +++ b/sycl/test/scheduler/LeafLimit.cpp @@ -0,0 +1,92 @@ +// RUN: %clangxx -fsycl %s -o %t.out +// RUN: %t.out +#include + +#include +#include + +// This test checks the leaf limit imposed on the execution graph + +using namespace cl::sycl; + +class FakeCommand : public detail::Command { +public: + FakeCommand(detail::QueueImplPtr Queue, detail::Requirement Req) + : Command{detail::Command::ALLOCA, Queue}, MRequirement{std::move(Req)} {} + + void printDot(std::ostream &Stream) const override {} + + const detail::Requirement *getRequirement() const final { + return &MRequirement; + }; + + cl_int enqueueImp() override { return MRetVal; } + + cl_int MRetVal = CL_SUCCESS; + +protected: + detail::Requirement MRequirement; +}; + +class TestScheduler : public detail::Scheduler { +public: + void AddNodeToLeaves(detail::MemObjRecord *Rec, detail::Command *Cmd, + access::mode Mode) { + return MGraphBuilder.AddNodeToLeaves(Rec, Cmd, Mode); + } + + detail::MemObjRecord * + getOrInsertMemObjRecord(const detail::QueueImplPtr &Queue, + detail::Requirement *Req) { + return MGraphBuilder.getOrInsertMemObjRecord(Queue, Req); + } +}; + +int main() { + TestScheduler TS; + queue Queue; + buffer Buf(range<1>(1)); + detail::Requirement FakeReq{{0, 0, 0}, + {0, 0, 0}, + {0, 0, 0}, + access::mode::read_write, + detail::getSyclObjImpl(Buf).get(), + 0, + 0, + 0}; + FakeCommand *FakeDepCmd = + new FakeCommand(detail::getSyclObjImpl(Queue), FakeReq); + detail::MemObjRecord *Rec = + TS.getOrInsertMemObjRecord(detail::getSyclObjImpl(Queue), &FakeReq); + + // Create commands that will be added as leaves exceeding the limit by 1 + std::vector LeavesToAdd; + for (size_t i = 0; i < Rec->MWriteLeaves.capacity() + 1; ++i) { + LeavesToAdd.push_back( + new FakeCommand(detail::getSyclObjImpl(Queue), FakeReq)); + } + // Create edges: all soon-to-be leaves are direct users of FakeDep + for (auto Leaf : LeavesToAdd) { + FakeDepCmd->addUser(Leaf); + Leaf->addDep(detail::DepDesc{FakeDepCmd, Leaf->getRequirement(), nullptr}); + } + // Add edges as leaves and exceed the leaf limit + for (auto LeafPtr : LeavesToAdd) { + TS.AddNodeToLeaves(Rec, LeafPtr, access::mode::read_write); + } + // Check that the oldest leaf has been removed from the leaf list + // and added as a dependency of the newest one instead + const detail::CircularBuffer &Leaves = Rec->MWriteLeaves; + assert(std::find(Leaves.begin(), Leaves.end(), LeavesToAdd.front()) == + Leaves.end()); + for (size_t i = 1; i < LeavesToAdd.size(); ++i) { + assert(std::find(Leaves.begin(), Leaves.end(), LeavesToAdd[i]) != + Leaves.end()); + } + FakeCommand *OldestLeaf = LeavesToAdd.front(); + FakeCommand *NewestLeaf = LeavesToAdd.back(); + assert(OldestLeaf->MUsers.size() == 1); + assert(OldestLeaf->MUsers[0] == NewestLeaf); + assert(NewestLeaf->MDeps.size() == 2); + assert(NewestLeaf->MDeps[1].MDepCommand == OldestLeaf); +}