Skip to content

Commit 9c2acd9

Browse files
authored
[SYCL] Reduce number of getenv calls (#4321)
This change avoids repeated calls to getenv when launching parallel_for kernels to improve performance of short-running parallel_for kernels on Windows. Signed-off-by: rdeodhar rajiv.deodhar@intel.com
1 parent 47c786d commit 9c2acd9

File tree

6 files changed

+100
-28
lines changed

6 files changed

+100
-28
lines changed

sycl/include/CL/sycl/handler.hpp

Lines changed: 17 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -780,33 +780,17 @@ class __SYCL_EXPORT handler {
780780
// Range should be at least this to make rounding worthwhile.
781781
size_t MinRangeX = 1024;
782782

783-
// Parse optional parameters of this form:
784-
// MinRound:PreferredRound:MinRange
785-
char *RoundParams = getenv("SYCL_PARALLEL_FOR_RANGE_ROUNDING_PARAMS");
786-
if (RoundParams != nullptr) {
787-
std::string Params(RoundParams);
788-
size_t Pos = Params.find(':');
789-
if (Pos != std::string::npos) {
790-
MinFactorX = std::stoi(Params.substr(0, Pos));
791-
Params.erase(0, Pos + 1);
792-
Pos = Params.find(':');
793-
if (Pos != std::string::npos) {
794-
GoodFactorX = std::stoi(Params.substr(0, Pos));
795-
Params.erase(0, Pos + 1);
796-
MinRangeX = std::stoi(Params);
797-
}
798-
}
799-
}
783+
// Check if rounding parameters have been set through environment:
784+
// SYCL_PARALLEL_FOR_RANGE_ROUNDING_PARAMS=MinRound:PreferredRound:MinRange
785+
this->GetRangeRoundingSettings(MinFactorX, GoodFactorX, MinRangeX);
800786

801787
// Disable the rounding-up optimizations under these conditions:
802788
// 1. The env var SYCL_DISABLE_PARALLEL_FOR_RANGE_ROUNDING is set.
803-
// 2. The string SYCL_DISABLE_PARALLEL_FOR_RANGE_ROUNDING is in
804-
// the kernel name.
805-
// 3. The kernel is provided via an interoperability method.
806-
// 4. The API "this_item" is used inside the kernel.
807-
// 5. The range is already a multiple of the rounding factor.
789+
// 2. The kernel is provided via an interoperability method.
790+
// 3. The API "this_item" is used inside the kernel.
791+
// 4. The range is already a multiple of the rounding factor.
808792
//
809-
// Cases 3 and 4 could be supported with extra effort.
793+
// Cases 2 and 3 could be supported with extra effort.
810794
// As an optimization for the common case it is an
811795
// implementation choice to not support those scenarios.
812796
// Note that "this_item" is a free function, i.e. not tied to any
@@ -816,13 +800,11 @@ class __SYCL_EXPORT handler {
816800
// call-graph to make this_item calls kernel-specific but this is
817801
// not considered worthwhile.
818802

819-
// Get the kernal name to check condition 3.
803+
// Get the kernel name to check condition 2.
820804
std::string KName = typeid(NameT *).name();
821805
using KI = detail::KernelInfo<KernelName>;
822806
bool DisableRounding =
823-
(getenv("SYCL_DISABLE_PARALLEL_FOR_RANGE_ROUNDING") != nullptr) ||
824-
(KName.find("SYCL_DISABLE_PARALLEL_FOR_RANGE_ROUNDING") !=
825-
std::string::npos) ||
807+
this->DisableRangeRounding() ||
826808
(KI::getName() == nullptr || KI::getName()[0] == '\0') ||
827809
(KI::callsThisItem());
828810

@@ -837,7 +819,7 @@ class __SYCL_EXPORT handler {
837819
// will yield a rounded-up value for the total range.
838820
size_t NewValX =
839821
((NumWorkItems[0] + GoodFactorX - 1) / GoodFactorX) * GoodFactorX;
840-
if (getenv("SYCL_PARALLEL_FOR_RANGE_ROUNDING_TRACE") != nullptr)
822+
if (this->RangeRoundingTrace())
841823
std::cout << "parallel_for range adjusted from " << NumWorkItems[0]
842824
<< " to " << NewValX << std::endl;
843825

@@ -2451,6 +2433,13 @@ class __SYCL_EXPORT handler {
24512433

24522434
friend class ::MockHandler;
24532435

2436+
bool DisableRangeRounding();
2437+
2438+
bool RangeRoundingTrace();
2439+
2440+
void GetRangeRoundingSettings(size_t &MinFactor, size_t &GoodFactor,
2441+
size_t &MinRange);
2442+
24542443
template <typename WrapperT, typename TransformedArgType, int Dims,
24552444
typename KernelType>
24562445
auto getRangeRoundedKernelLambda(KernelType KernelFunc,

sycl/source/detail/config.def

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,9 @@ CONFIG(SYCL_DISABLE_EXECUTION_GRAPH_CLEANUP, 1, __SYCL_DISABLE_EXECUTION_GRAPH_C
1515
CONFIG(SYCL_DEVICE_ALLOWLIST, 1024, __SYCL_DEVICE_ALLOWLIST)
1616
CONFIG(SYCL_BE, 16, __SYCL_BE)
1717
CONFIG(SYCL_PI_TRACE, 16, __SYCL_PI_TRACE)
18+
CONFIG(SYCL_PARALLEL_FOR_RANGE_ROUNDING_TRACE, 16, __SYCL_PARALLEL_FOR_RANGE_ROUNDING_TRACE)
19+
CONFIG(SYCL_DISABLE_PARALLEL_FOR_RANGE_ROUNDING, 16, __SYCL_DISABLE_PARALLEL_FOR_RANGE_ROUNDING)
20+
CONFIG(SYCL_PARALLEL_FOR_RANGE_ROUNDING_PARAMS, 64, __SYCL_PARALLEL_FOR_RANGE_ROUNDING_PARAMS)
1821
CONFIG(SYCL_DEVICELIB_NO_FALLBACK, 1, __SYCL_DEVICELIB_NO_FALLBACK)
1922
CONFIG(SYCL_DEVICE_FILTER, 1024, __SYCL_DEVICE_FILTER)
2023
CONFIG(SYCL_PROGRAM_LINK_OPTIONS, 64, __SYCL_PROGRAM_LINK_OPTIONS)

sycl/source/detail/config.hpp

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,64 @@ template <> class SYCLConfig<SYCL_PI_TRACE> {
176176
}
177177
};
178178

179+
template <> class SYCLConfig<SYCL_PARALLEL_FOR_RANGE_ROUNDING_TRACE> {
180+
using BaseT = SYCLConfigBase<SYCL_PARALLEL_FOR_RANGE_ROUNDING_TRACE>;
181+
182+
public:
183+
static bool get() {
184+
static const char *ValStr = BaseT::getRawValue();
185+
return ValStr != nullptr;
186+
}
187+
};
188+
189+
template <> class SYCLConfig<SYCL_DISABLE_PARALLEL_FOR_RANGE_ROUNDING> {
190+
using BaseT = SYCLConfigBase<SYCL_DISABLE_PARALLEL_FOR_RANGE_ROUNDING>;
191+
192+
public:
193+
static bool get() {
194+
static const char *ValStr = BaseT::getRawValue();
195+
return ValStr != nullptr;
196+
}
197+
};
198+
199+
template <> class SYCLConfig<SYCL_PARALLEL_FOR_RANGE_ROUNDING_PARAMS> {
200+
using BaseT = SYCLConfigBase<SYCL_PARALLEL_FOR_RANGE_ROUNDING_PARAMS>;
201+
202+
private:
203+
public:
204+
static void GetSettings(size_t &MinFactor, size_t &GoodFactor,
205+
size_t &MinRange) {
206+
static const char *RoundParams = BaseT::getRawValue();
207+
if (RoundParams == nullptr)
208+
return;
209+
210+
static bool ProcessedFactors = false;
211+
static size_t MF;
212+
static size_t GF;
213+
static size_t MR;
214+
if (!ProcessedFactors) {
215+
// Parse optional parameters of this form (all values required):
216+
// MinRound:PreferredRound:MinRange
217+
std::string Params(RoundParams);
218+
size_t Pos = Params.find(':');
219+
if (Pos != std::string::npos) {
220+
MF = std::stoi(Params.substr(0, Pos));
221+
Params.erase(0, Pos + 1);
222+
Pos = Params.find(':');
223+
if (Pos != std::string::npos) {
224+
GF = std::stoi(Params.substr(0, Pos));
225+
Params.erase(0, Pos + 1);
226+
MR = std::stoi(Params);
227+
}
228+
}
229+
ProcessedFactors = true;
230+
}
231+
MinFactor = MF;
232+
GoodFactor = GF;
233+
MinRange = MR;
234+
}
235+
};
236+
179237
// Array is used by SYCL_DEVICE_FILTER and SYCL_DEVICE_ALLOWLIST
180238
const std::array<std::pair<std::string, info::device_type>, 5> &
181239
getSyclDeviceTypeMap();

sycl/source/handler.cpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#include <CL/sycl/handler.hpp>
1616
#include <CL/sycl/info/info_desc.hpp>
1717
#include <CL/sycl/stream.hpp>
18+
#include <detail/config.hpp>
1819
#include <detail/global_handler.hpp>
1920
#include <detail/kernel_bundle_impl.hpp>
2021
#include <detail/kernel_impl.hpp>
@@ -496,6 +497,21 @@ void handler::barrier(const std::vector<event> &WaitList) {
496497
[](const event &Event) { return detail::getSyclObjImpl(Event); });
497498
}
498499

500+
using namespace sycl::detail;
501+
bool handler::DisableRangeRounding() {
502+
return SYCLConfig<SYCL_DISABLE_PARALLEL_FOR_RANGE_ROUNDING>::get();
503+
}
504+
505+
bool handler::RangeRoundingTrace() {
506+
return SYCLConfig<SYCL_PARALLEL_FOR_RANGE_ROUNDING_TRACE>::get();
507+
}
508+
509+
void handler::GetRangeRoundingSettings(size_t &MinFactor, size_t &GoodFactor,
510+
size_t &MinRange) {
511+
SYCLConfig<SYCL_PARALLEL_FOR_RANGE_ROUNDING_PARAMS>::GetSettings(
512+
MinFactor, GoodFactor, MinRange);
513+
}
514+
499515
void handler::memcpy(void *Dest, const void *Src, size_t Count) {
500516
throwIfActionIsCreated();
501517
MSrcPtr = const_cast<void *>(Src);

sycl/test/abi/sycl_symbols_linux.dump

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3899,9 +3899,12 @@ _ZN2cl4sycl7handler10mem_adviseEPKvmi
38993899
_ZN2cl4sycl7handler10processArgEPvRKNS0_6detail19kernel_param_kind_tEimRmb
39003900
_ZN2cl4sycl7handler10processArgEPvRKNS0_6detail19kernel_param_kind_tEimRmbb
39013901
_ZN2cl4sycl7handler13getKernelNameB5cxx11Ev
3902+
_ZN2cl4sycl7handler18RangeRoundingTraceEv
39023903
_ZN2cl4sycl7handler18extractArgsAndReqsEv
3904+
_ZN2cl4sycl7handler20DisableRangeRoundingEv
39033905
_ZN2cl4sycl7handler20associateWithHandlerEPNS0_6detail16AccessorBaseHostENS0_6access6targetE
39043906
_ZN2cl4sycl7handler22setHandlerKernelBundleERKSt10shared_ptrINS0_6detail18kernel_bundle_implEE
3907+
_ZN2cl4sycl7handler24GetRangeRoundingSettingsERmS2_S2_
39053908
_ZN2cl4sycl7handler28extractArgsAndReqsFromLambdaEPcmPKNS0_6detail19kernel_param_desc_tE
39063909
_ZN2cl4sycl7handler28extractArgsAndReqsFromLambdaEPcmPKNS0_6detail19kernel_param_desc_tEb
39073910
_ZN2cl4sycl7handler6memcpyEPvPKvm

sycl/test/abi/sycl_symbols_windows.dump

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -596,6 +596,7 @@
596596
?Any@__host_std@cl@@YAHV?$vec@_J$0BA@@sycl@2@@Z
597597
?Clear@exception_list@sycl@cl@@AEAAXXZ
598598
?DirSep@OSUtil@detail@sycl@cl@@2QEBDEB
599+
?DisableRangeRounding@handler@sycl@cl@@AEAA_NXZ
599600
?Dot@__host_std@cl@@YA?AVhalf@half_impl@detail@sycl@2@V?$vec@Vhalf@half_impl@detail@sycl@cl@@$00@62@0@Z
600601
?Dot@__host_std@cl@@YA?AVhalf@half_impl@detail@sycl@2@V?$vec@Vhalf@half_impl@detail@sycl@cl@@$01@62@0@Z
601602
?Dot@__host_std@cl@@YA?AVhalf@half_impl@detail@sycl@2@V?$vec@Vhalf@half_impl@detail@sycl@cl@@$02@62@0@Z
@@ -747,6 +748,7 @@
747748
?FUnordNotEqual@__host_std@cl@@YAHVhalf@half_impl@detail@sycl@2@0@Z
748749
?GetNativeMem@interop_handler@sycl@cl@@AEBA_KPEAVAccessorImplHost@detail@23@@Z
749750
?GetNativeQueue@interop_handler@sycl@cl@@AEBA_KXZ
751+
?GetRangeRoundingSettings@handler@sycl@cl@@AEAAXAEA_K00@Z
750752
?IsFinite@__host_std@cl@@YA?AV?$vec@F$00@sycl@2@V?$vec@Vhalf@half_impl@detail@sycl@cl@@$00@42@@Z
751753
?IsFinite@__host_std@cl@@YA?AV?$vec@F$01@sycl@2@V?$vec@Vhalf@half_impl@detail@sycl@cl@@$01@42@@Z
752754
?IsFinite@__host_std@cl@@YA?AV?$vec@F$02@sycl@2@V?$vec@Vhalf@half_impl@detail@sycl@cl@@$02@42@@Z
@@ -877,6 +879,7 @@
877879
?PushBack@exception_list@sycl@cl@@AEAAX$$QEAVexception_ptr@std@@@Z
878880
?PushBack@exception_list@sycl@cl@@AEAAXAEBVexception_ptr@std@@@Z
879881
?REJECT_DEVICE_SCORE@device_selector@sycl@cl@@1HB
882+
?RangeRoundingTrace@handler@sycl@cl@@AEAA_NXZ
880883
?SignBitSet@__host_std@cl@@YA?AV?$vec@F$00@sycl@2@V?$vec@Vhalf@half_impl@detail@sycl@cl@@$00@42@@Z
881884
?SignBitSet@__host_std@cl@@YA?AV?$vec@F$01@sycl@2@V?$vec@Vhalf@half_impl@detail@sycl@cl@@$01@42@@Z
882885
?SignBitSet@__host_std@cl@@YA?AV?$vec@F$02@sycl@2@V?$vec@Vhalf@half_impl@detail@sycl@cl@@$02@42@@Z

0 commit comments

Comments
 (0)