diff --git a/sycl/doc/EnvironmentVariables.md b/sycl/doc/EnvironmentVariables.md index 046358b3f157c..502077247e127 100644 --- a/sycl/doc/EnvironmentVariables.md +++ b/sycl/doc/EnvironmentVariables.md @@ -249,6 +249,7 @@ variables in production code. | `SYCL_PI_LEVEL_ZERO_USE_MULTIPLE_COMMANDLIST_BARRIERS` | Integer | When set to a positive value enables use of multiple Level Zero commandlists when submitting barriers. Default is 1. | | `SYCL_PI_LEVEL_ZERO_USE_COPY_ENGINE_FOR_FILL` | Integer | When set to a positive value enables use of a copy engine for memory fill operations. Default is 0. | | `SYCL_PI_LEVEL_ZERO_SINGLE_ROOT_DEVICE_BUFFER_MIGRATION` | Integer | When set to "0" tells to use single root-device allocation for all devices in a context where all devices have same root. Otherwise performs regular buffer migration. Default is 1. | +| `SYCL_PI_LEVEL_ZERO_REUSE_DISCARDED_EVENTS` | Integer | When set to a positive value enables the mode when discarded Level Zero events are reset and reused in scope of the same in-order queue based on the dependency chain between commands. Default is 1. | ## Debugging variables for CUDA Plugin diff --git a/sycl/plugins/level_zero/pi_level_zero.cpp b/sycl/plugins/level_zero/pi_level_zero.cpp index 252e437497b45..62b3d6cde9d4d 100644 --- a/sycl/plugins/level_zero/pi_level_zero.cpp +++ b/sycl/plugins/level_zero/pi_level_zero.cpp @@ -96,6 +96,16 @@ static const bool DisableEventsCaching = [] { return std::stoi(DisableEventsCachingFlag) != 0; }(); +// This is an experimental option that allows reset and reuse of uncompleted +// events in the in-order queue with discard_events property. +static const bool ReuseDiscardedEvents = [] { + const char *ReuseDiscardedEventsFlag = + std::getenv("SYCL_PI_LEVEL_ZERO_REUSE_DISCARDED_EVENTS"); + if (!ReuseDiscardedEventsFlag) + return true; + return std::stoi(ReuseDiscardedEventsFlag) > 0; +}(); + // This class encapsulates actions taken along with a call to Level Zero API. class ZeCall { private: @@ -649,6 +659,39 @@ ze_result_t ZeCall::doCall(ze_result_t ZeResult, const char *ZeName, if (!(condition)) \ return error; +bool _pi_queue::doReuseDiscardedEvents() { + return ReuseDiscardedEvents && isInOrderQueue() && isDiscardEvents(); +} + +pi_result _pi_queue::resetDiscardedEvent(pi_command_list_ptr_t CommandList) { + if (LastCommandEvent && LastCommandEvent->IsDiscarded) { + ZE_CALL(zeCommandListAppendBarrier, + (CommandList->first, nullptr, 1, &(LastCommandEvent->ZeEvent))); + ZE_CALL(zeCommandListAppendEventReset, + (CommandList->first, LastCommandEvent->ZeEvent)); + + // Create new pi_event but with the same ze_event_handle_t. We are going + // to use this pi_event for the next command with discarded event. + pi_event PiEvent; + try { + PiEvent = new _pi_event(LastCommandEvent->ZeEvent, + LastCommandEvent->ZeEventPool, Context, + PI_COMMAND_TYPE_USER, true); + } catch (const std::bad_alloc &) { + return PI_ERROR_OUT_OF_HOST_MEMORY; + } catch (...) { + return PI_ERROR_UNKNOWN; + } + + if (LastCommandEvent->isHostVisible()) + PiEvent->HostVisibleEvent = PiEvent; + + PI_CALL(addEventToQueueCache(PiEvent)); + } + + return PI_SUCCESS; +} + // This helper function creates a pi_event and associate a pi_queue. // Note that the caller of this function must have acquired lock on the Queue // that is passed in. @@ -667,10 +710,23 @@ inline static pi_result createEventAndAssociateQueue( if (!ForceHostVisible) ForceHostVisible = DeviceEventsSetting == AllHostVisible; - PI_CALL(EventCreate(Queue->Context, Queue, ForceHostVisible, Event)); + + // If event is discarded then try to get event from the queue cache. + *Event = + IsInternal ? Queue->getEventFromQueueCache(ForceHostVisible) : nullptr; + + if (*Event == nullptr) + PI_CALL(EventCreate(Queue->Context, Queue, ForceHostVisible, Event)); (*Event)->Queue = Queue; (*Event)->CommandType = CommandType; + (*Event)->IsDiscarded = IsInternal; + // Discarded event doesn't own ze_event, it is used by multiple pi_event + // objects. We destroy corresponding ze_event by releasing events from the + // events cache at queue destruction. Event in the cache owns the Level Zero + // event. + if (IsInternal) + (*Event)->OwnZeEvent = false; // Append this Event to the CommandList, if any if (CommandList != Queue->CommandListMap.end()) { @@ -699,6 +755,48 @@ inline static pi_result createEventAndAssociateQueue( return PI_SUCCESS; } +pi_result _pi_queue::signalEventFromCmdListIfLastEventDiscarded( + pi_command_list_ptr_t CommandList) { + // We signal new event at the end of command list only if we have queue with + // discard_events property and the last command event is discarded. + if (!(doReuseDiscardedEvents() && LastCommandEvent && + LastCommandEvent->IsDiscarded)) + return PI_SUCCESS; + + pi_event Event; + PI_CALL(createEventAndAssociateQueue( + this, &Event, PI_COMMAND_TYPE_USER, CommandList, + /* IsDiscarded */ false, /* ForceHostVisible */ false)) + PI_CALL(piEventReleaseInternal(Event)); + LastCommandEvent = Event; + + ZE_CALL(zeCommandListAppendSignalEvent, (CommandList->first, Event->ZeEvent)); + return PI_SUCCESS; +} + +pi_event _pi_queue::getEventFromQueueCache(bool HostVisible) { + auto Cache = HostVisible ? &EventCaches[0] : &EventCaches[1]; + + // If we don't have any events, return nullptr. + // If we have only a single event then it was used by the last command and we + // can't use it now because we have to enforce round robin between two events. + if (Cache->size() < 2) + return nullptr; + + // If there are two events then return an event from the beginning of the list + // since event of the last command is added to the end of the list. + auto It = Cache->begin(); + pi_event RetEvent = *It; + Cache->erase(It); + return RetEvent; +} + +pi_result _pi_queue::addEventToQueueCache(pi_event Event) { + auto Cache = Event->isHostVisible() ? &EventCaches[0] : &EventCaches[1]; + Cache->emplace_back(Event); + return PI_SUCCESS; +} + pi_result _pi_device::initialize(int SubSubDeviceOrdinal, int SubSubDeviceIndex) { uint32_t numQueueGroups = 0; @@ -1319,6 +1417,7 @@ pi_result _pi_context::getAvailableCommandList( // Immediate commandlists have been pre-allocated and are always available. if (Queue->Device->useImmediateCommandLists()) { CommandList = Queue->getQueueGroup(UseCopyEngine).getImmCmdList(); + PI_CALL(Queue->insertStartBarrierIfDiscardEventsMode(CommandList)); if (auto Res = Queue->insertActiveBarriers(CommandList, UseCopyEngine)) return Res; return PI_SUCCESS; @@ -1334,6 +1433,7 @@ pi_result _pi_context::getAvailableCommandList( (!ForcedCmdQueue || *ForcedCmdQueue == CommandBatch.OpenCommandList->second.ZeQueue)) { CommandList = CommandBatch.OpenCommandList; + PI_CALL(Queue->insertStartBarrierIfDiscardEventsMode(CommandList)); return PI_SUCCESS; } // If this command isn't allowed to be batched or doesn't match the forced @@ -1401,6 +1501,8 @@ pi_result _pi_context::getAvailableCommandList( .first; } ZeCommandListCache.erase(ZeCommandListIt); + if (auto Res = Queue->insertStartBarrierIfDiscardEventsMode(CommandList)) + return Res; if (auto Res = Queue->insertActiveBarriers(CommandList, UseCopyEngine)) return Res; return PI_SUCCESS; @@ -1428,6 +1530,8 @@ pi_result _pi_context::getAvailableCommandList( true /* QueueLocked */); CommandList = it; CommandList->second.ZeFenceInUse = true; + if (auto Res = Queue->insertStartBarrierIfDiscardEventsMode(CommandList)) + return Res; return PI_SUCCESS; } } @@ -1470,6 +1574,7 @@ _pi_queue::createCommandList(bool UseCopyEngine, std::pair( ZeCommandList, {ZeFence, false, ZeCommandQueue, QueueGroupOrdinal})); + PI_CALL(insertStartBarrierIfDiscardEventsMode(CommandList)); PI_CALL(insertActiveBarriers(CommandList, UseCopyEngine)); return PI_SUCCESS; } @@ -1571,9 +1676,19 @@ pi_result _pi_queue::executeCommandList(pi_command_list_ptr_t CommandList, bool CurrentlyEmpty = !PrintPiTrace && this->LastCommandEvent == nullptr; // The list can be empty if command-list only contains signals of proxy - // events. - if (!CommandList->second.EventList.empty()) + // events. It is possible that executeCommandList is called twice for the same + // command list without new appended command. We don't to want process the + // same last command event twice that's why additionally check that new + // command was appended to the command list. + if (!CommandList->second.EventList.empty() && + this->LastCommandEvent != CommandList->second.EventList.back()) { this->LastCommandEvent = CommandList->second.EventList.back(); + if (doReuseDiscardedEvents()) { + PI_CALL(resetDiscardedEvent(CommandList)); + } + } + + this->LastUsedCommandList = CommandList; if (!Device->useImmediateCommandLists()) { // Batch if allowed to, but don't batch if we know there are no kernels @@ -1678,21 +1793,45 @@ pi_result _pi_queue::executeCommandList(pi_command_list_ptr_t CommandList, // after createEventAndAssociateQueue ref count is 2 and then +1 for // each event in the EventList. PI_CALL(piEventReleaseInternal(HostVisibleEvent)); - PI_CALL(piEventReleaseInternal(HostVisibleEvent)); - // Indicate no cleanup is needed for this PI event as it is special. - HostVisibleEvent->CleanedUp = true; + if (doReuseDiscardedEvents()) { + // If we have in-order queue with discarded events then we want to + // treat this event as regular event. We insert a barrier in the next + // command list to wait for this event. + LastCommandEvent = HostVisibleEvent; + } else { + // For all other queues treat this as a special event and indicate no + // cleanup is needed. + // TODO: always treat this host event as a regular event. + PI_CALL(piEventReleaseInternal(HostVisibleEvent)); + HostVisibleEvent->CleanedUp = true; + } // Finally set to signal the host-visible event at the end of the // command-list after a barrier that waits for all commands // completion. - ZE_CALL(zeCommandListAppendBarrier, - (CommandList->first, HostVisibleEvent->ZeEvent, 0, nullptr)); + if (doReuseDiscardedEvents() && LastCommandEvent && + LastCommandEvent->IsDiscarded) { + // If we the last event is discarded then we already have a barrier + // inserted, so just signal the event. + ZE_CALL(zeCommandListAppendSignalEvent, + (CommandList->first, HostVisibleEvent->ZeEvent)); + } else { + ZE_CALL(zeCommandListAppendBarrier, + (CommandList->first, HostVisibleEvent->ZeEvent, 0, nullptr)); + } + } else { + // If we don't have host visible proxy then signal event if needed. + this->signalEventFromCmdListIfLastEventDiscarded(CommandList); } + } else { + // If we don't have host visible proxy then signal event if needed. + this->signalEventFromCmdListIfLastEventDiscarded(CommandList); } // Close the command list and have it ready for dispatch. ZE_CALL(zeCommandListClose, (CommandList->first)); + this->LastUsedCommandList = CommandListMap.end(); // Offload command list to the GPU for asynchronous execution auto ZeCommandList = CommandList->first; auto ZeResult = ZE_CALL_NOCHECK( @@ -1729,12 +1868,15 @@ bool _pi_queue::isBatchingAllowed(bool IsCopy) const { // Return the index of the next queue to use based on a // round robin strategy and the queue group ordinal. uint32_t _pi_queue::pi_queue_group_t::getQueueIndex(uint32_t *QueueGroupOrdinal, - uint32_t *QueueIndex) { - + uint32_t *QueueIndex, + bool QueryOnly) { auto CurrentIndex = NextIndex; - ++NextIndex; - if (NextIndex > UpperIndex) - NextIndex = LowerIndex; + + if (!QueryOnly) { + ++NextIndex; + if (NextIndex > UpperIndex) + NextIndex = LowerIndex; + } // Find out the right queue group ordinal (first queue might be "main" or // "link") @@ -1881,6 +2023,19 @@ pi_command_list_ptr_t _pi_queue::eventOpenCommandList(pi_event Event) { return CommandListMap.end(); } +pi_result _pi_queue::insertStartBarrierIfDiscardEventsMode( + pi_command_list_ptr_t &CmdList) { + // If current command list is different from the last command list then insert + // a barrier waiting for the last command event. + if (doReuseDiscardedEvents() && CmdList != LastUsedCommandList && + LastCommandEvent) { + ZE_CALL(zeCommandListAppendBarrier, + (CmdList->first, nullptr, 1, &(LastCommandEvent->ZeEvent))); + LastCommandEvent = nullptr; + } + return PI_SUCCESS; +} + pi_result _pi_queue::insertActiveBarriers(pi_command_list_ptr_t &CmdList, bool UseCopyEngine) { // Early exit if there are no active barriers. @@ -1938,8 +2093,54 @@ pi_result _pi_ze_event_list_t::createAndRetainPiZeEventList( this->ZeEventList = nullptr; this->PiEventList = nullptr; + if (CurQueue->isInOrderQueue() && CurQueue->LastCommandEvent != nullptr) { + if (CurQueue->Device->useImmediateCommandLists()) { + if (ReuseDiscardedEvents && CurQueue->isDiscardEvents()) { + // If queue is in-order with discarded events and if + // new command list is different from the last used command list then + // signal new event from the last immediate command list. We are going + // to insert a barrier in the new command list waiting for that event. + auto QueueGroup = CurQueue->getQueueGroup(UseCopyEngine); + uint32_t QueueGroupOrdinal, QueueIndex; + auto NextIndex = + QueueGroup.getQueueIndex(&QueueGroupOrdinal, &QueueIndex, + /*QueryOnly */ true); + auto NextImmCmdList = QueueGroup.ImmCmdLists[NextIndex]; + if (CurQueue->LastUsedCommandList != CurQueue->CommandListMap.end() && + CurQueue->LastUsedCommandList != NextImmCmdList) { + CurQueue->signalEventFromCmdListIfLastEventDiscarded( + CurQueue->LastUsedCommandList); + } + } + } else { + // Ensure LastCommandEvent's batch is submitted if it is differrent + // from the one this command is going to. If we reuse discarded events + // then signalEventFromCmdListIfLastEventDiscarded will be called at batch + // close if needed. + const auto &OpenCommandList = + CurQueue->eventOpenCommandList(CurQueue->LastCommandEvent); + if (OpenCommandList != CurQueue->CommandListMap.end() && + OpenCommandList->second.isCopy(CurQueue) != UseCopyEngine) { + + if (auto Res = CurQueue->executeOpenCommandList( + OpenCommandList->second.isCopy(CurQueue))) + return Res; + } + } + } + + bool IncludeLastCommandEvent = + CurQueue->isInOrderQueue() && CurQueue->LastCommandEvent != nullptr; + + // If the last event is discarded then we already have a barrier waiting for + // that event, so must not include the last command event into the wait + // list because it will cause waiting for event which was reset. + if (ReuseDiscardedEvents && CurQueue->isDiscardEvents() && + CurQueue->LastCommandEvent && CurQueue->LastCommandEvent->IsDiscarded) + IncludeLastCommandEvent = false; + try { - if (CurQueue->isInOrderQueue() && CurQueue->LastCommandEvent != nullptr) { + if (IncludeLastCommandEvent) { this->ZeEventList = new ze_event_handle_t[EventListLength + 1]; this->PiEventList = new pi_event[EventListLength + 1]; } else if (EventListLength > 0) { @@ -2031,19 +2232,7 @@ pi_result _pi_ze_event_list_t::createAndRetainPiZeEventList( // For in-order queues, every command should be executed only after the // previous command has finished. The event associated with the last // enqueued command is added into the waitlist to ensure in-order semantics. - if (CurQueue->isInOrderQueue() && CurQueue->LastCommandEvent != nullptr) { - - // Ensure LastCommandEvent's batch is submitted if it is differrent - // from the one this command is going to. - const auto &OpenCommandList = - CurQueue->eventOpenCommandList(CurQueue->LastCommandEvent); - if (OpenCommandList != CurQueue->CommandListMap.end() && - OpenCommandList->second.isCopy(CurQueue) != UseCopyEngine) { - - if (auto Res = CurQueue->executeOpenCommandList( - OpenCommandList->second.isCopy(CurQueue))) - return Res; - } + if (IncludeLastCommandEvent) { std::shared_lock Lock(CurQueue->LastCommandEvent->Mutex); this->ZeEventList[TmpListLength] = CurQueue->LastCommandEvent->ZeEvent; this->PiEventList[TmpListLength] = CurQueue->LastCommandEvent; @@ -3707,6 +3896,10 @@ static pi_result piQueueReleaseInternal(pi_queue Queue) { if (!Queue->RefCount.decrementAndTest()) return PI_SUCCESS; + for (auto Cache : Queue->EventCaches) + for (auto Event : Cache) + PI_CALL(piEventReleaseInternal(Event)); + if (Queue->OwnZeCommandQueue) { for (auto &ZeQueue : Queue->ComputeQueueGroup.ZeQueues) { if (ZeQueue) @@ -5596,7 +5789,8 @@ pi_result _pi_event::reset() { return PI_SUCCESS; } -pi_event _pi_context::getEventFromCache(bool HostVisible, bool WithProfiling) { +pi_event _pi_context::getEventFromContextCache(bool HostVisible, + bool WithProfiling) { std::scoped_lock Lock(EventCacheMutex); auto Cache = getEventCache(HostVisible, WithProfiling); if (Cache->empty()) @@ -5608,7 +5802,7 @@ pi_event _pi_context::getEventFromCache(bool HostVisible, bool WithProfiling) { return Event; } -void _pi_context::addEventToCache(pi_event Event) { +void _pi_context::addEventToContextCache(pi_event Event) { std::scoped_lock Lock(EventCacheMutex); auto Cache = getEventCache(Event->isHostVisible(), Event->isProfilingEnabled()); @@ -5627,7 +5821,7 @@ static pi_result EventCreate(pi_context Context, pi_queue Queue, !Queue || (Queue->Properties & PI_QUEUE_PROFILING_ENABLE) != 0; if (auto CachedEvent = - Context->getEventFromCache(HostVisible, ProfilingEnabled)) { + Context->getEventFromContextCache(HostVisible, ProfilingEnabled)) { *RetEvent = CachedEvent; return PI_SUCCESS; } @@ -6084,7 +6278,7 @@ static pi_result piEventReleaseInternal(pi_event Event) { if (DisableEventsCaching || !Event->OwnZeEvent) { delete Event; } else { - Event->Context->addEventToCache(Event); + Event->Context->addEventToContextCache(Event); } // We intentionally incremented the reference counter when an event is diff --git a/sycl/plugins/level_zero/pi_level_zero.hpp b/sycl/plugins/level_zero/pi_level_zero.hpp index 7e35310ff93b1..733fc43cea223 100644 --- a/sycl/plugins/level_zero/pi_level_zero.hpp +++ b/sycl/plugins/level_zero/pi_level_zero.hpp @@ -761,10 +761,10 @@ struct _pi_context : _pi_object { std::unordered_map MemAllocs; // Get pi_event from cache. - pi_event getEventFromCache(bool HostVisible, bool WithProfiling); + pi_event getEventFromContextCache(bool HostVisible, bool WithProfiling); // Add pi_event to cache. - void addEventToCache(pi_event); + void addEventToContextCache(pi_event); private: // If context contains one device then return this device. @@ -863,7 +863,10 @@ struct _pi_queue : _pi_object { // Return the index of the next queue to use based on a // round robin strategy and the queue group ordinal. - uint32_t getQueueIndex(uint32_t *QueueGroupOrdinal, uint32_t *QueueIndex); + // If QueryOnly is true then return index values but don't update internal + // index data members of the queue. + uint32_t getQueueIndex(uint32_t *QueueGroupOrdinal, uint32_t *QueueIndex, + bool QueryOnly = false); // Get the ordinal for a command queue handle. int32_t getCmdQueueOrdinal(ze_command_queue_handle_t CmdQueue); @@ -1073,6 +1076,99 @@ struct _pi_queue : _pi_object { // Indicates that the queue is healthy and all operations on it are OK. bool Healthy{true}; + + // The following data structures and methods are used only for handling + // in-order queue with discard_events property. Some commands in such queue + // may have discarded event. Which means that event is not visible outside of + // the plugin. It is possible to reset and reuse discarded events in the same + // in-order queue because of the dependency between commands. We don't have to + // wait event completion to do this. We use the following 2-event model to + // reuse events inside each command list: + // + // Operation1 = zeCommantListAppendMemoryCopy (signal ze_event1) + // zeCommandListAppendBarrier(wait for ze_event1) + // zeCommandListAppendEventReset(ze_event1) + // # Create new pi_event using ze_event1 and append to the cache. + // + // Operation2 = zeCommandListAppendMemoryCopy (signal ze_event2) + // zeCommandListAppendBarrier(wait for ze_event2) + // zeCommandListAppendEventReset(ze_event2) + // # Create new pi_event using ze_event2 and append to the cache. + // + // # Get pi_event from the beginning of the cache because there are two events + // # there. So it is guaranteed that we do round-robin between two events - + // # event from the last command is appended to the cache. + // Operation3 = zeCommandListAppendMemoryCopy (signal ze_event1) + // # The same ze_event1 is used for Operation1 and Operation3. + // + // When we switch to a different command list we need to signal new event and + // wait for it in the new command list using barrier. + // [CmdList1] + // Operation1 = zeCommantListAppendMemoryCopy (signal event1) + // zeCommandListAppendBarrier(wait for event1) + // zeCommandListAppendEventReset(event1) + // zeCommandListAppendSignalEvent(NewEvent) + // + // [CmdList2] + // zeCommandListAppendBarrier(wait for NewEvent) + // + // This barrier guarantees that command list execution starts only after + // completion of previous command list which signals aforementioned event. It + // allows to reset and reuse same event handles inside all command lists in + // scope of the queue. It means that we need 2 reusable events of each type + // (host-visible and device-scope) per queue at maximum. + + // This data member keeps track of the last used command list and allows to + // handle switch of immediate command lists because immediate command lists + // are never closed unlike regular command lists. + pi_command_list_ptr_t LastUsedCommandList = CommandListMap.end(); + + // Vector of 2 lists of reusable events: host-visible and device-scope. + // They are separated to allow faster access to stored events depending on + // requested type of event. Each list contains events which can be reused + // inside all command lists in the queue as described in the 2-event model. + // Leftover events in the cache are relased at the queue destruction. + std::vector> EventCaches{2}; + + // Get event from the queue's cache. + // Returns nullptr if the cache doesn't contain any reusable events or if the + // cache contains only one event which corresponds to the previous command and + // can't be used for the current command because we can't use the same event + // two times in a row and have to do round-robin between two events. Otherwise + // it picks an event from the beginning of the cache and returns it. Event + // from the last command is always appended to the end of the list. + pi_event getEventFromQueueCache(bool HostVisible); + + // Put pi_event to the cache. Provided pi_event object is not used by + // any command but its ZeEvent is used by many pi_event objects. + // Commands to wait and reset ZeEvent must be submitted to the queue before + // calling this method. + pi_result addEventToQueueCache(pi_event Event); + + // Append command to provided command list to wait and reset the last event if + // it is discarded and create new pi_event wrapper using the same native event + // and put it to the cache. We call this method after each command submission + // to make native event available to use by next commands. + pi_result resetDiscardedEvent(pi_command_list_ptr_t); + + // Append command to the command list to signal new event if the last event in + // the command list is discarded. While we submit commands in scope of the + // same command list we can reset and reuse events but when we switch to a + // different command list we currently need to signal new event and wait for + // it in the new command list using barrier. + pi_result signalEventFromCmdListIfLastEventDiscarded(pi_command_list_ptr_t); + + // Insert a barrier waiting for the last command event into the beginning of + // command list. This barrier guarantees that command list execution starts + // only after completion of previous command list which signals aforementioned + // event. It allows to reset and reuse same event handles inside all command + // lists in the queue. + pi_result + insertStartBarrierIfDiscardEventsMode(pi_command_list_ptr_t &CmdList); + + // Helper method telling whether we need to reuse discarded event in this + // queue. + bool doReuseDiscardedEvents(); }; struct _pi_mem : _pi_object { @@ -1380,6 +1476,10 @@ struct _pi_event : _pi_object { // being visible to the host at all. bool Completed = {false}; + // Indicates that this event is discarded, i.e. it is not visible outside of + // plugin. + bool IsDiscarded = {false}; + // Besides each PI object keeping a total reference count in // _pi_object::RefCount we keep special track of the event *external* // references. This way we are able to tell when the event is not referenced