diff --git a/sycl/include/CL/sycl/handler.hpp b/sycl/include/CL/sycl/handler.hpp index b7c64ee4456dd..4cb4dbe514b3d 100644 --- a/sycl/include/CL/sycl/handler.hpp +++ b/sycl/include/CL/sycl/handler.hpp @@ -773,33 +773,17 @@ class __SYCL_EXPORT handler { // Range should be at least this to make rounding worthwhile. size_t MinRangeX = 1024; - // Parse optional parameters of this form: - // MinRound:PreferredRound:MinRange - char *RoundParams = getenv("SYCL_PARALLEL_FOR_RANGE_ROUNDING_PARAMS"); - if (RoundParams != nullptr) { - std::string Params(RoundParams); - size_t Pos = Params.find(':'); - if (Pos != std::string::npos) { - MinFactorX = std::stoi(Params.substr(0, Pos)); - Params.erase(0, Pos + 1); - Pos = Params.find(':'); - if (Pos != std::string::npos) { - GoodFactorX = std::stoi(Params.substr(0, Pos)); - Params.erase(0, Pos + 1); - MinRangeX = std::stoi(Params); - } - } - } + // Check if rounding parameters have been set through environment: + // SYCL_PARALLEL_FOR_RANGE_ROUNDING_PARAMS=MinRound:PreferredRound:MinRange + this->GetRangeRoundingSettings(MinFactorX, GoodFactorX, MinRangeX); // Disable the rounding-up optimizations under these conditions: // 1. The env var SYCL_DISABLE_PARALLEL_FOR_RANGE_ROUNDING is set. - // 2. The string SYCL_DISABLE_PARALLEL_FOR_RANGE_ROUNDING is in - // the kernel name. - // 3. The kernel is provided via an interoperability method. - // 4. The API "this_item" is used inside the kernel. - // 5. The range is already a multiple of the rounding factor. + // 2. The kernel is provided via an interoperability method. + // 3. The API "this_item" is used inside the kernel. + // 4. The range is already a multiple of the rounding factor. // - // Cases 3 and 4 could be supported with extra effort. + // Cases 2 and 3 could be supported with extra effort. // As an optimization for the common case it is an // implementation choice to not support those scenarios. // Note that "this_item" is a free function, i.e. not tied to any @@ -809,13 +793,11 @@ class __SYCL_EXPORT handler { // call-graph to make this_item calls kernel-specific but this is // not considered worthwhile. - // Get the kernal name to check condition 3. + // Get the kernel name to check condition 2. std::string KName = typeid(NameT *).name(); using KI = detail::KernelInfo; bool DisableRounding = - (getenv("SYCL_DISABLE_PARALLEL_FOR_RANGE_ROUNDING") != nullptr) || - (KName.find("SYCL_DISABLE_PARALLEL_FOR_RANGE_ROUNDING") != - std::string::npos) || + this->DisableRangeRounding() || (KI::getName() == nullptr || KI::getName()[0] == '\0') || (KI::callsThisItem()); @@ -830,7 +812,7 @@ class __SYCL_EXPORT handler { // will yield a rounded-up value for the total range. size_t NewValX = ((NumWorkItems[0] + GoodFactorX - 1) / GoodFactorX) * GoodFactorX; - if (getenv("SYCL_PARALLEL_FOR_RANGE_ROUNDING_TRACE") != nullptr) + if (this->RangeRoundingTrace()) std::cout << "parallel_for range adjusted from " << NumWorkItems[0] << " to " << NewValX << std::endl; @@ -2444,6 +2426,13 @@ class __SYCL_EXPORT handler { friend class ::MockHandler; + bool DisableRangeRounding(); + + bool RangeRoundingTrace(); + + void GetRangeRoundingSettings(size_t &MinFactor, size_t &GoodFactor, + size_t &MinRange); + template auto getRangeRoundedKernelLambda(KernelType KernelFunc, diff --git a/sycl/source/detail/config.def b/sycl/source/detail/config.def index 74f5cf2a693ab..86ed251967e3e 100644 --- a/sycl/source/detail/config.def +++ b/sycl/source/detail/config.def @@ -15,6 +15,9 @@ CONFIG(SYCL_DISABLE_EXECUTION_GRAPH_CLEANUP, 1, __SYCL_DISABLE_EXECUTION_GRAPH_C CONFIG(SYCL_DEVICE_ALLOWLIST, 1024, __SYCL_DEVICE_ALLOWLIST) CONFIG(SYCL_BE, 16, __SYCL_BE) CONFIG(SYCL_PI_TRACE, 16, __SYCL_PI_TRACE) +CONFIG(SYCL_PARALLEL_FOR_RANGE_ROUNDING_TRACE, 16, __SYCL_PARALLEL_FOR_RANGE_ROUNDING_TRACE) +CONFIG(SYCL_DISABLE_PARALLEL_FOR_RANGE_ROUNDING, 16, __SYCL_DISABLE_PARALLEL_FOR_RANGE_ROUNDING) +CONFIG(SYCL_PARALLEL_FOR_RANGE_ROUNDING_PARAMS, 64, __SYCL_PARALLEL_FOR_RANGE_ROUNDING_PARAMS) CONFIG(SYCL_DEVICELIB_NO_FALLBACK, 1, __SYCL_DEVICELIB_NO_FALLBACK) CONFIG(SYCL_DEVICE_FILTER, 1024, __SYCL_DEVICE_FILTER) CONFIG(SYCL_PROGRAM_LINK_OPTIONS, 64, __SYCL_PROGRAM_LINK_OPTIONS) diff --git a/sycl/source/detail/config.hpp b/sycl/source/detail/config.hpp index b19152c912b1f..45ed5614c2bf0 100644 --- a/sycl/source/detail/config.hpp +++ b/sycl/source/detail/config.hpp @@ -176,6 +176,64 @@ template <> class SYCLConfig { } }; +template <> class SYCLConfig { + using BaseT = SYCLConfigBase; + +public: + static bool get() { + static const char *ValStr = BaseT::getRawValue(); + return ValStr != nullptr; + } +}; + +template <> class SYCLConfig { + using BaseT = SYCLConfigBase; + +public: + static bool get() { + static const char *ValStr = BaseT::getRawValue(); + return ValStr != nullptr; + } +}; + +template <> class SYCLConfig { + using BaseT = SYCLConfigBase; + +private: +public: + static void GetSettings(size_t &MinFactor, size_t &GoodFactor, + size_t &MinRange) { + static const char *RoundParams = BaseT::getRawValue(); + if (RoundParams == nullptr) + return; + + static bool ProcessedFactors = false; + static size_t MF; + static size_t GF; + static size_t MR; + if (!ProcessedFactors) { + // Parse optional parameters of this form (all values required): + // MinRound:PreferredRound:MinRange + std::string Params(RoundParams); + size_t Pos = Params.find(':'); + if (Pos != std::string::npos) { + MF = std::stoi(Params.substr(0, Pos)); + Params.erase(0, Pos + 1); + Pos = Params.find(':'); + if (Pos != std::string::npos) { + GF = std::stoi(Params.substr(0, Pos)); + Params.erase(0, Pos + 1); + MR = std::stoi(Params); + } + } + ProcessedFactors = true; + } + MinFactor = MF; + GoodFactor = GF; + MinRange = MR; + } +}; + // Array is used by SYCL_DEVICE_FILTER and SYCL_DEVICE_ALLOWLIST static const std::array, 5> SyclDeviceTypeMap = {{{"host", info::device_type::host}, diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index 1ae7238703e4f..bcdf2f4aebbc3 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -483,6 +484,21 @@ void handler::barrier(const std::vector &WaitList) { [](const event &Event) { return detail::getSyclObjImpl(Event); }); } +using namespace sycl::detail; +bool handler::DisableRangeRounding() { + return SYCLConfig::get(); +} + +bool handler::RangeRoundingTrace() { + return SYCLConfig::get(); +} + +void handler::GetRangeRoundingSettings(size_t &MinFactor, size_t &GoodFactor, + size_t &MinRange) { + SYCLConfig::GetSettings( + MinFactor, GoodFactor, MinRange); +} + void handler::memcpy(void *Dest, const void *Src, size_t Count) { throwIfActionIsCreated(); MSrcPtr = const_cast(Src); diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump index 56bda0b5e7a6b..d94751483d76d 100644 --- a/sycl/test/abi/sycl_symbols_linux.dump +++ b/sycl/test/abi/sycl_symbols_linux.dump @@ -3899,9 +3899,12 @@ _ZN2cl4sycl7handler10mem_adviseEPKvmi _ZN2cl4sycl7handler10processArgEPvRKNS0_6detail19kernel_param_kind_tEimRmb _ZN2cl4sycl7handler10processArgEPvRKNS0_6detail19kernel_param_kind_tEimRmbb _ZN2cl4sycl7handler13getKernelNameB5cxx11Ev +_ZN2cl4sycl7handler18RangeRoundingTraceEv _ZN2cl4sycl7handler18extractArgsAndReqsEv +_ZN2cl4sycl7handler20DisableRangeRoundingEv _ZN2cl4sycl7handler20associateWithHandlerEPNS0_6detail16AccessorBaseHostENS0_6access6targetE _ZN2cl4sycl7handler22setHandlerKernelBundleERKSt10shared_ptrINS0_6detail18kernel_bundle_implEE +_ZN2cl4sycl7handler24GetRangeRoundingSettingsERmS2_S2_ _ZN2cl4sycl7handler28extractArgsAndReqsFromLambdaEPcmPKNS0_6detail19kernel_param_desc_tE _ZN2cl4sycl7handler28extractArgsAndReqsFromLambdaEPcmPKNS0_6detail19kernel_param_desc_tEb _ZN2cl4sycl7handler6memcpyEPvPKvm diff --git a/sycl/test/abi/sycl_symbols_windows.dump b/sycl/test/abi/sycl_symbols_windows.dump index 1fadc5a7ba9e3..81161742f0910 100644 --- a/sycl/test/abi/sycl_symbols_windows.dump +++ b/sycl/test/abi/sycl_symbols_windows.dump @@ -596,6 +596,7 @@ ?Any@__host_std@cl@@YAHV?$vec@_J$0BA@@sycl@2@@Z ?Clear@exception_list@sycl@cl@@AEAAXXZ ?DirSep@OSUtil@detail@sycl@cl@@2QEBDEB +?DisableRangeRounding@handler@sycl@cl@@AEAA_NXZ ?Dot@__host_std@cl@@YA?AVhalf@half_impl@detail@sycl@2@V?$vec@Vhalf@half_impl@detail@sycl@cl@@$00@62@0@Z ?Dot@__host_std@cl@@YA?AVhalf@half_impl@detail@sycl@2@V?$vec@Vhalf@half_impl@detail@sycl@cl@@$01@62@0@Z ?Dot@__host_std@cl@@YA?AVhalf@half_impl@detail@sycl@2@V?$vec@Vhalf@half_impl@detail@sycl@cl@@$02@62@0@Z @@ -747,6 +748,7 @@ ?FUnordNotEqual@__host_std@cl@@YAHVhalf@half_impl@detail@sycl@2@0@Z ?GetNativeMem@interop_handler@sycl@cl@@AEBA_KPEAVAccessorImplHost@detail@23@@Z ?GetNativeQueue@interop_handler@sycl@cl@@AEBA_KXZ +?GetRangeRoundingSettings@handler@sycl@cl@@AEAAXAEA_K00@Z ?IsFinite@__host_std@cl@@YA?AV?$vec@F$00@sycl@2@V?$vec@Vhalf@half_impl@detail@sycl@cl@@$00@42@@Z ?IsFinite@__host_std@cl@@YA?AV?$vec@F$01@sycl@2@V?$vec@Vhalf@half_impl@detail@sycl@cl@@$01@42@@Z ?IsFinite@__host_std@cl@@YA?AV?$vec@F$02@sycl@2@V?$vec@Vhalf@half_impl@detail@sycl@cl@@$02@42@@Z @@ -877,6 +879,7 @@ ?PushBack@exception_list@sycl@cl@@AEAAX$$QEAVexception_ptr@std@@@Z ?PushBack@exception_list@sycl@cl@@AEAAXAEBVexception_ptr@std@@@Z ?REJECT_DEVICE_SCORE@device_selector@sycl@cl@@1HB +?RangeRoundingTrace@handler@sycl@cl@@AEAA_NXZ ?SignBitSet@__host_std@cl@@YA?AV?$vec@F$00@sycl@2@V?$vec@Vhalf@half_impl@detail@sycl@cl@@$00@42@@Z ?SignBitSet@__host_std@cl@@YA?AV?$vec@F$01@sycl@2@V?$vec@Vhalf@half_impl@detail@sycl@cl@@$01@42@@Z ?SignBitSet@__host_std@cl@@YA?AV?$vec@F$02@sycl@2@V?$vec@Vhalf@half_impl@detail@sycl@cl@@$02@42@@Z