diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 211597c74a4e8..28ecaa9bc3472 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -2680,6 +2680,11 @@ def fsycl_footer_path_EQ : Joined<["-"], "fsycl-footer-path=">, def fno_sycl_link_spirv : Flag<["-"], "fno-sycl-link-spirv">, Flags<[CoreOption]>, HelpText<"Disable adding of the default (spir64) triple " "when discovered in user specified objects and archives.">; +def fsycl_max_parallel_jobs_EQ : Joined<["-"], "fsycl-max-parallel-link-jobs=">, + Flags<[CoreOption]>, Group, + HelpText<"Experimental feature: Controls the maximum parallelism of actions performed " + "on SYCL device code post-link, i.e. the generation of SPIR-V device images " + "or AOT compilation of each device image.">; def fsyntax_only : Flag<["-"], "fsyntax-only">, Flags<[NoXarchOption,CoreOption,CC1Option,FC1Option]>, Group; def ftabstop_EQ : Joined<["-"], "ftabstop=">, Group; diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 1e46dfd078c0b..384dc14811414 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -8803,6 +8803,11 @@ void SPIRVTranslator::ConstructJob(Compilation &C, const JobAction &JA, TCArgs.MakeArgString("--out-file-list=" + OutputFileName)); ForeachArgs.push_back( TCArgs.MakeArgString("--out-replace=" + OutputFileName)); + StringRef ParallelJobs = + TCArgs.getLastArgValue(options::OPT_fsycl_max_parallel_jobs_EQ); + if (!ParallelJobs.empty()) + ForeachArgs.push_back(TCArgs.MakeArgString("--jobs=" + ParallelJobs)); + ForeachArgs.push_back(TCArgs.MakeArgString("--")); ForeachArgs.push_back(TCArgs.MakeArgString(Cmd->getExecutable())); diff --git a/clang/lib/Driver/ToolChains/SYCL.cpp b/clang/lib/Driver/ToolChains/SYCL.cpp index 00c4628824ecc..d29d8ef08aae8 100644 --- a/clang/lib/Driver/ToolChains/SYCL.cpp +++ b/clang/lib/Driver/ToolChains/SYCL.cpp @@ -100,8 +100,8 @@ void SYCL::constructLLVMForeachCommand(Compilation &C, const JobAction &JA, std::unique_ptr InputCommand, const InputInfoList &InputFiles, const InputInfo &Output, const Tool *T, - StringRef Increment, - StringRef Ext = "out") { + StringRef Increment, StringRef Ext, + StringRef ParallelJobs) { // Construct llvm-foreach command. // The llvm-foreach command looks like this: // llvm-foreach --in-file-list=a.list --in-replace='{}' -- echo '{}' @@ -123,6 +123,9 @@ void SYCL::constructLLVMForeachCommand(Compilation &C, const JobAction &JA, if (!Increment.empty()) ForeachArgs.push_back( C.getArgs().MakeArgString("--out-increment=" + Increment)); + if (!ParallelJobs.empty()) + ForeachArgs.push_back(C.getArgs().MakeArgString("--jobs=" + ParallelJobs)); + ForeachArgs.push_back(C.getArgs().MakeArgString("--")); ForeachArgs.push_back( C.getArgs().MakeArgString(InputCommand->getExecutable())); @@ -395,10 +398,12 @@ void SYCL::fpga::BackendCompiler::constructOpenCLAOTCommand( const char *Exec = C.getArgs().MakeArgString(ExecPath); auto Cmd = std::make_unique(JA, *this, ResponseFileSupport::None(), Exec, CmdArgs, None); - if (!ForeachInputs.empty()) + if (!ForeachInputs.empty()) { + StringRef ParallelJobs = + Args.getLastArgValue(options::OPT_fsycl_max_parallel_jobs_EQ); constructLLVMForeachCommand(C, JA, std::move(Cmd), ForeachInputs, Output, - this, "", ForeachExt); - else + this, "", ForeachExt, ParallelJobs); + } else C.addCommand(std::move(Cmd)); } @@ -560,10 +565,12 @@ void SYCL::fpga::BackendCompiler::ConstructJob( auto Cmd = std::make_unique(JA, *this, ResponseFileSupport::None(), Exec, CmdArgs, None); addFPGATimingDiagnostic(Cmd, C); - if (!ForeachInputs.empty()) + if (!ForeachInputs.empty()) { + StringRef ParallelJobs = + Args.getLastArgValue(options::OPT_fsycl_max_parallel_jobs_EQ); constructLLVMForeachCommand(C, JA, std::move(Cmd), ForeachInputs, Output, - this, ReportOptArg, ForeachExt); - else + this, ReportOptArg, ForeachExt, ParallelJobs); + } else C.addCommand(std::move(Cmd)); } @@ -599,10 +606,12 @@ void SYCL::gen::BackendCompiler::ConstructJob(Compilation &C, const char *Exec = C.getArgs().MakeArgString(ExecPath); auto Cmd = std::make_unique(JA, *this, ResponseFileSupport::None(), Exec, CmdArgs, None); - if (!ForeachInputs.empty()) + if (!ForeachInputs.empty()) { + StringRef ParallelJobs = + Args.getLastArgValue(options::OPT_fsycl_max_parallel_jobs_EQ); constructLLVMForeachCommand(C, JA, std::move(Cmd), ForeachInputs, Output, - this, ""); - else + this, "", "out", ParallelJobs); + } else C.addCommand(std::move(Cmd)); } @@ -632,10 +641,12 @@ void SYCL::x86_64::BackendCompiler::ConstructJob( const char *Exec = C.getArgs().MakeArgString(ExecPath); auto Cmd = std::make_unique(JA, *this, ResponseFileSupport::None(), Exec, CmdArgs, None); - if (!ForeachInputs.empty()) + if (!ForeachInputs.empty()) { + StringRef ParallelJobs = + Args.getLastArgValue(options::OPT_fsycl_max_parallel_jobs_EQ); constructLLVMForeachCommand(C, JA, std::move(Cmd), ForeachInputs, Output, - this, ""); - else + this, "", "out", ParallelJobs); + } else C.addCommand(std::move(Cmd)); } diff --git a/clang/lib/Driver/ToolChains/SYCL.h b/clang/lib/Driver/ToolChains/SYCL.h index e767ede212b9b..83723400c3d44 100644 --- a/clang/lib/Driver/ToolChains/SYCL.h +++ b/clang/lib/Driver/ToolChains/SYCL.h @@ -36,7 +36,8 @@ void constructLLVMForeachCommand(Compilation &C, const JobAction &JA, std::unique_ptr InputCommand, const InputInfoList &InputFiles, const InputInfo &Output, const Tool *T, - StringRef Increment, StringRef Ext); + StringRef Increment, StringRef Ext = "out", + StringRef ParallelJobs = ""); // Runs llvm-spirv to convert spirv to bc, llvm-link, which links multiple LLVM // bitcode. Converts generated bc back to spirv using llvm-spirv, wraps with diff --git a/clang/test/Driver/sycl-offload-with-split.c b/clang/test/Driver/sycl-offload-with-split.c index 6d22bdea20390..7db9d61e1e030 100644 --- a/clang/test/Driver/sycl-offload-with-split.c +++ b/clang/test/Driver/sycl-offload-with-split.c @@ -220,6 +220,22 @@ /// ########################################################################### +/// Check parallel compilation enforcement for split modules when running SPIR-V translation and AOT compilation +// RUN: %clang -target x86_64-unknown-linux-gnu -fsycl -fsycl-max-parallel-link-jobs=4 -fsycl-targets=spir64-unknown-unknown %s -### 2>&1 \ +// RUN: | FileCheck %s -check-prefixes=CHK-PARALLEL-JOBS +// RUN: %clang -target x86_64-unknown-linux-gnu -fsycl -fsycl-max-parallel-link-jobs=4 -fsycl-targets=spir64_fpga-unknown-unknown -Xshardware %s -### 2>&1 \ +// RUN: | FileCheck %s -check-prefixes=CHK-PARALLEL-JOBS,CHK-PARALLEL-JOBS-AOT -DBE_COMPILER=aoc +// RUN: %clang -target x86_64-unknown-linux-gnu -fsycl -fsycl-max-parallel-link-jobs=4 -fintelfpga -Xshardware %s -### 2>&1 \ +// RUN: | FileCheck %s -check-prefixes=CHK-PARALLEL-JOBS,CHK-PARALLEL-JOBS-AOT -DBE_COMPILER=aoc +// RUN: %clang -target x86_64-unknown-linux-gnu -fsycl -fsycl-max-parallel-link-jobs=4 -fsycl-targets=spir64_gen-unknown-unknown %s -### 2>&1 \ +// RUN: | FileCheck %s -check-prefixes=CHK-PARALLEL-JOBS,CHK-PARALLEL-JOBS-AOT -DBE_COMPILER=ocloc +// RUN: %clang -target x86_64-unknown-linux-gnu -fsycl -fsycl-max-parallel-link-jobs=4 -fsycl-targets=spir64_x86_64-unknown-unknown %s -### 2>&1 \ +// RUN: | FileCheck %s -check-prefixes=CHK-PARALLEL-JOBS,CHK-PARALLEL-JOBS-AOT -DBE_COMPILER=opencl-aot +// CHK-PARALLEL-JOBS: llvm-foreach{{.*}} "--jobs=4" "--" "{{.*}}llvm-spirv{{.*}}" +// CHK-PARALLEL-JOBS-AOT: llvm-foreach{{.*}} "--jobs=4" "--" "{{.*}}[[BE_COMPILER]]{{.*}} + +/// ########################################################################### + /// offload with multiple targets, including AOT // RUN: %clang -target x86_64-unknown-linux-gnu -fsycl -fno-sycl-device-lib=all -fsycl-device-code-split -fsycl-targets=spir64-unknown-unknown,spir64_fpga-unknown-unknown,spir64_gen-unknown-unknown -ccc-print-phases %s 2>&1 \ // RUN: | FileCheck -check-prefix=CHK-PHASE-MULTI-TARG %s diff --git a/sycl/doc/UsersManual.md b/sycl/doc/UsersManual.md index 051faff500c2d..0e39e271471d5 100644 --- a/sycl/doc/UsersManual.md +++ b/sycl/doc/UsersManual.md @@ -176,6 +176,15 @@ and not recommended to use in production environment. * auto - the compiler will use a heuristic to select the best way of splitting device code. This is default mode. +**`-fsycl-max-parallel-link-jobs=`** + + Experimental feature. When specified, it informs the compiler + that it can simultaneously spawn up to `N` processes to perform + actions required to link the DPC++ application. This option is + only useful in SYCL mode. It only takes effect if link action + needs to be executed, i.e. it won't have any effect in presence of + options like `-c` or `-E`. Default value of `N` is 1. + **`-f[no-]sycl-device-lib=[,,...]`** Enables/disables linking of the device libraries. Supported libraries: