Skip to content

Expose llvm-foreach --jobs functionality through a driver option #4543

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions clang/include/clang/Driver/Options.td
Original file line number Diff line number Diff line change
Expand Up @@ -2680,6 +2680,11 @@ def fsycl_footer_path_EQ : Joined<["-"], "fsycl-footer-path=">,
def fno_sycl_link_spirv : Flag<["-"], "fno-sycl-link-spirv">,
Flags<[CoreOption]>, HelpText<"Disable adding of the default (spir64) triple "
"when discovered in user specified objects and archives.">;
def fsycl_max_parallel_jobs_EQ : Joined<["-"], "fsycl-max-parallel-link-jobs=">,
Flags<[CoreOption]>, Group<f_Group>,
HelpText<"Experimental feature: Controls the maximum parallelism of actions performed "
"on SYCL device code post-link, i.e. the generation of SPIR-V device images "
"or AOT compilation of each device image.">;
def fsyntax_only : Flag<["-"], "fsyntax-only">,
Flags<[NoXarchOption,CoreOption,CC1Option,FC1Option]>, Group<Action_Group>;
def ftabstop_EQ : Joined<["-"], "ftabstop=">, Group<f_Group>;
Expand Down
5 changes: 5 additions & 0 deletions clang/lib/Driver/ToolChains/Clang.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8803,6 +8803,11 @@ void SPIRVTranslator::ConstructJob(Compilation &C, const JobAction &JA,
TCArgs.MakeArgString("--out-file-list=" + OutputFileName));
ForeachArgs.push_back(
TCArgs.MakeArgString("--out-replace=" + OutputFileName));
StringRef ParallelJobs =
TCArgs.getLastArgValue(options::OPT_fsycl_max_parallel_jobs_EQ);
if (!ParallelJobs.empty())
ForeachArgs.push_back(TCArgs.MakeArgString("--jobs=" + ParallelJobs));

ForeachArgs.push_back(TCArgs.MakeArgString("--"));
ForeachArgs.push_back(TCArgs.MakeArgString(Cmd->getExecutable()));

Expand Down
39 changes: 25 additions & 14 deletions clang/lib/Driver/ToolChains/SYCL.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -100,8 +100,8 @@ void SYCL::constructLLVMForeachCommand(Compilation &C, const JobAction &JA,
std::unique_ptr<Command> InputCommand,
const InputInfoList &InputFiles,
const InputInfo &Output, const Tool *T,
StringRef Increment,
StringRef Ext = "out") {
StringRef Increment, StringRef Ext,
StringRef ParallelJobs) {
// Construct llvm-foreach command.
// The llvm-foreach command looks like this:
// llvm-foreach --in-file-list=a.list --in-replace='{}' -- echo '{}'
Expand All @@ -123,6 +123,9 @@ void SYCL::constructLLVMForeachCommand(Compilation &C, const JobAction &JA,
if (!Increment.empty())
ForeachArgs.push_back(
C.getArgs().MakeArgString("--out-increment=" + Increment));
if (!ParallelJobs.empty())
ForeachArgs.push_back(C.getArgs().MakeArgString("--jobs=" + ParallelJobs));

ForeachArgs.push_back(C.getArgs().MakeArgString("--"));
ForeachArgs.push_back(
C.getArgs().MakeArgString(InputCommand->getExecutable()));
Expand Down Expand Up @@ -395,10 +398,12 @@ void SYCL::fpga::BackendCompiler::constructOpenCLAOTCommand(
const char *Exec = C.getArgs().MakeArgString(ExecPath);
auto Cmd = std::make_unique<Command>(JA, *this, ResponseFileSupport::None(),
Exec, CmdArgs, None);
if (!ForeachInputs.empty())
if (!ForeachInputs.empty()) {
StringRef ParallelJobs =
Args.getLastArgValue(options::OPT_fsycl_max_parallel_jobs_EQ);
constructLLVMForeachCommand(C, JA, std::move(Cmd), ForeachInputs, Output,
this, "", ForeachExt);
else
this, "", ForeachExt, ParallelJobs);
} else
C.addCommand(std::move(Cmd));
}

Expand Down Expand Up @@ -560,10 +565,12 @@ void SYCL::fpga::BackendCompiler::ConstructJob(
auto Cmd = std::make_unique<Command>(JA, *this, ResponseFileSupport::None(),
Exec, CmdArgs, None);
addFPGATimingDiagnostic(Cmd, C);
if (!ForeachInputs.empty())
if (!ForeachInputs.empty()) {
StringRef ParallelJobs =
Args.getLastArgValue(options::OPT_fsycl_max_parallel_jobs_EQ);
constructLLVMForeachCommand(C, JA, std::move(Cmd), ForeachInputs, Output,
this, ReportOptArg, ForeachExt);
else
this, ReportOptArg, ForeachExt, ParallelJobs);
} else
C.addCommand(std::move(Cmd));
}

Expand Down Expand Up @@ -599,10 +606,12 @@ void SYCL::gen::BackendCompiler::ConstructJob(Compilation &C,
const char *Exec = C.getArgs().MakeArgString(ExecPath);
auto Cmd = std::make_unique<Command>(JA, *this, ResponseFileSupport::None(),
Exec, CmdArgs, None);
if (!ForeachInputs.empty())
if (!ForeachInputs.empty()) {
StringRef ParallelJobs =
Args.getLastArgValue(options::OPT_fsycl_max_parallel_jobs_EQ);
constructLLVMForeachCommand(C, JA, std::move(Cmd), ForeachInputs, Output,
this, "");
else
this, "", "out", ParallelJobs);
} else
C.addCommand(std::move(Cmd));
}

Expand Down Expand Up @@ -632,10 +641,12 @@ void SYCL::x86_64::BackendCompiler::ConstructJob(
const char *Exec = C.getArgs().MakeArgString(ExecPath);
auto Cmd = std::make_unique<Command>(JA, *this, ResponseFileSupport::None(),
Exec, CmdArgs, None);
if (!ForeachInputs.empty())
if (!ForeachInputs.empty()) {
StringRef ParallelJobs =
Args.getLastArgValue(options::OPT_fsycl_max_parallel_jobs_EQ);
constructLLVMForeachCommand(C, JA, std::move(Cmd), ForeachInputs, Output,
this, "");
else
this, "", "out", ParallelJobs);
} else
C.addCommand(std::move(Cmd));
}

Expand Down
3 changes: 2 additions & 1 deletion clang/lib/Driver/ToolChains/SYCL.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,8 @@ void constructLLVMForeachCommand(Compilation &C, const JobAction &JA,
std::unique_ptr<Command> InputCommand,
const InputInfoList &InputFiles,
const InputInfo &Output, const Tool *T,
StringRef Increment, StringRef Ext);
StringRef Increment, StringRef Ext = "out",
StringRef ParallelJobs = "");

// Runs llvm-spirv to convert spirv to bc, llvm-link, which links multiple LLVM
// bitcode. Converts generated bc back to spirv using llvm-spirv, wraps with
Expand Down
16 changes: 16 additions & 0 deletions clang/test/Driver/sycl-offload-with-split.c
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,22 @@

/// ###########################################################################

/// Check parallel compilation enforcement for split modules when running SPIR-V translation and AOT compilation
// RUN: %clang -target x86_64-unknown-linux-gnu -fsycl -fsycl-max-parallel-link-jobs=4 -fsycl-targets=spir64-unknown-unknown %s -### 2>&1 \
// RUN: | FileCheck %s -check-prefixes=CHK-PARALLEL-JOBS
// RUN: %clang -target x86_64-unknown-linux-gnu -fsycl -fsycl-max-parallel-link-jobs=4 -fsycl-targets=spir64_fpga-unknown-unknown -Xshardware %s -### 2>&1 \
// RUN: | FileCheck %s -check-prefixes=CHK-PARALLEL-JOBS,CHK-PARALLEL-JOBS-AOT -DBE_COMPILER=aoc
// RUN: %clang -target x86_64-unknown-linux-gnu -fsycl -fsycl-max-parallel-link-jobs=4 -fintelfpga -Xshardware %s -### 2>&1 \
// RUN: | FileCheck %s -check-prefixes=CHK-PARALLEL-JOBS,CHK-PARALLEL-JOBS-AOT -DBE_COMPILER=aoc
// RUN: %clang -target x86_64-unknown-linux-gnu -fsycl -fsycl-max-parallel-link-jobs=4 -fsycl-targets=spir64_gen-unknown-unknown %s -### 2>&1 \
// RUN: | FileCheck %s -check-prefixes=CHK-PARALLEL-JOBS,CHK-PARALLEL-JOBS-AOT -DBE_COMPILER=ocloc
// RUN: %clang -target x86_64-unknown-linux-gnu -fsycl -fsycl-max-parallel-link-jobs=4 -fsycl-targets=spir64_x86_64-unknown-unknown %s -### 2>&1 \
// RUN: | FileCheck %s -check-prefixes=CHK-PARALLEL-JOBS,CHK-PARALLEL-JOBS-AOT -DBE_COMPILER=opencl-aot
// CHK-PARALLEL-JOBS: llvm-foreach{{.*}} "--jobs=4" "--" "{{.*}}llvm-spirv{{.*}}"
// CHK-PARALLEL-JOBS-AOT: llvm-foreach{{.*}} "--jobs=4" "--" "{{.*}}[[BE_COMPILER]]{{.*}}

/// ###########################################################################

/// offload with multiple targets, including AOT
// RUN: %clang -target x86_64-unknown-linux-gnu -fsycl -fno-sycl-device-lib=all -fsycl-device-code-split -fsycl-targets=spir64-unknown-unknown,spir64_fpga-unknown-unknown,spir64_gen-unknown-unknown -ccc-print-phases %s 2>&1 \
// RUN: | FileCheck -check-prefix=CHK-PHASE-MULTI-TARG %s
Expand Down
9 changes: 9 additions & 0 deletions sycl/doc/UsersManual.md
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,15 @@ and not recommended to use in production environment.
* auto - the compiler will use a heuristic to select the best way of
splitting device code. This is default mode.

**`-fsycl-max-parallel-link-jobs=<N>`**

Experimental feature. When specified, it informs the compiler
that it can simultaneously spawn up to `N` processes to perform
actions required to link the DPC++ application. This option is
only useful in SYCL mode. It only takes effect if link action
needs to be executed, i.e. it won't have any effect in presence of
options like `-c` or `-E`. Default value of `N` is 1.

**`-f[no-]sycl-device-lib=<lib1>[,<lib2>,...]`**

Enables/disables linking of the device libraries. Supported libraries:
Expand Down