Skip to content

[SYCL][CUDA] Reverse max work-group size order #9

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
34 changes: 34 additions & 0 deletions .github/workflows/clang-format.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
name: clang-format-check

on:
pull_request:
branches:
- sycl
types: [open, edit, reopen, synchronize]

jobs:
build:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
with:
# checkout PR head
ref: '${{github.event.pull_request.head.sha}}'
- name: Fetch target branch
run: git fetch --no-tags --prune --depth=1 origin +refs/heads/${{github.base_ref}}:refs/remotes/origin/${{github.base_ref}}

- name: Get clang-format first
run: sudo apt-get install -yqq clang-format-9

- name: Run clang-format for the patch
run: |
git diff -U0 --no-color origin/${{github.base_ref}}..HEAD | ./clang/tools/clang-format/clang-format-diff.py -p1 -binary clang-format-9 > ./clang-format.patch

# Add patch with formatting fixes to CI job artifacts
- uses: actions/upload-artifact@v1
with:
name: clang-format-patch
path: ./clang-format.patch

- name: Check if clang-format patch is empty
run: bash -c "if [ -s ./clang-format.patch ]; then cat ./clang-format.patch; exit 1; fi"
58 changes: 39 additions & 19 deletions buildbot/configure.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,30 +11,49 @@ def do_configure(args):
sycl_dir = os.path.join(args.src_dir, "sycl")
spirv_dir = os.path.join(args.src_dir, "llvm-spirv")
ocl_header_dir = os.path.join(args.obj_dir, "OpenCL-Headers")
icd_loader_lib = ''
icd_loader_lib = os.path.join(args.obj_dir, "OpenCL-ICD-Loader", "build")
llvm_targets_to_build = 'X86'
llvm_enable_projects = 'clang;llvm-spirv;sycl;opencl-aot'
libclc_targets_to_build = ''
sycl_build_pi_cuda = 'OFF'
llvm_enable_assertions = 'ON'

if platform.system() == 'Linux':
icd_loader_lib = os.path.join(args.obj_dir, "OpenCL-ICD-Loader", "build", "libOpenCL.so")
icd_loader_lib = os.path.join(icd_loader_lib, "libOpenCL.so")
else:
icd_loader_lib = os.path.join(args.obj_dir, "OpenCL-ICD-Loader", "build", "OpenCL.lib")
icd_loader_lib = os.path.join(icd_loader_lib, "OpenCL.lib")

if args.cuda:
llvm_targets_to_build += ';NVPTX'
llvm_enable_projects += ';libclc'
libclc_targets_to_build = 'nvptx64--;nvptx64--nvidiacl'
sycl_build_pi_cuda = 'ON'

if args.assertions:
llvm_enable_assertions = 'ON'

install_dir = os.path.join(args.obj_dir, "install")

cmake_cmd = ["cmake",
"-G", "Ninja",
"-DCMAKE_BUILD_TYPE={}".format(args.build_type),
"-DLLVM_EXTERNAL_PROJECTS=sycl;llvm-spirv;opencl-aot",
"-DLLVM_EXTERNAL_SYCL_SOURCE_DIR={}".format(sycl_dir),
"-DLLVM_EXTERNAL_LLVM_SPIRV_SOURCE_DIR={}".format(spirv_dir),
"-DLLVM_ENABLE_PROJECTS=clang;sycl;llvm-spirv;opencl-aot",
"-DOpenCL_INCLUDE_DIR={}".format(ocl_header_dir),
"-DOpenCL_LIBRARY={}".format(icd_loader_lib),
"-DLLVM_BUILD_TOOLS=ON",
"-DSYCL_ENABLE_WERROR=ON",
"-DLLVM_ENABLE_ASSERTIONS=ON",
"-DCMAKE_INSTALL_PREFIX={}".format(install_dir),
"-DSYCL_INCLUDE_TESTS=ON", # Explicitly include all kinds of SYCL tests.
llvm_dir]
cmake_cmd = [
"cmake",
"-G", "Ninja",
"-DCMAKE_BUILD_TYPE={}".format(args.build_type),
"-DLLVM_ENABLE_ASSERTIONS={}".format(llvm_enable_assertions),
"-DLLVM_TARGETS_TO_BUILD={}".format(llvm_targets_to_build),
"-DLLVM_EXTERNAL_PROJECTS=sycl;llvm-spirv;opencl-aot",
"-DLLVM_EXTERNAL_SYCL_SOURCE_DIR={}".format(sycl_dir),
"-DLLVM_EXTERNAL_LLVM_SPIRV_SOURCE_DIR={}".format(spirv_dir),
"-DLLVM_ENABLE_PROJECTS={}".format(llvm_enable_projects),
"-DLIBCLC_TARGETS_TO_BUILD={}".format(libclc_targets_to_build),
"-DOpenCL_INCLUDE_DIR={}".format(ocl_header_dir),
"-DOpenCL_LIBRARY={}".format(icd_loader_lib),
"-DSYCL_BUILD_PI_CUDA={}".format(sycl_build_pi_cuda),
"-DLLVM_BUILD_TOOLS=ON",
"-DSYCL_ENABLE_WERROR=ON",
"-DCMAKE_INSTALL_PREFIX={}".format(install_dir),
"-DSYCL_INCLUDE_TESTS=ON", # Explicitly include all kinds of SYCL tests.
llvm_dir
]

print(cmake_cmd)

Expand Down Expand Up @@ -63,6 +82,8 @@ def main():
parser.add_argument("-o", "--obj-dir", metavar="OBJ_DIR", required=True, help="build directory")
parser.add_argument("-t", "--build-type",
metavar="BUILD_TYPE", required=True, help="build type, debug or release")
parser.add_argument("--cuda", action='store_true', help="switch from OpenCL to CUDA")
parser.add_argument("--assertions", action='store_true', help="build with assertions")

args = parser.parse_args()

Expand All @@ -74,4 +95,3 @@ def main():
ret = main()
exit_code = 0 if ret else 1
sys.exit(exit_code)

3 changes: 3 additions & 0 deletions clang/include/clang/Basic/DiagnosticDriverKinds.td
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,9 @@ def warn_drv_unknown_cuda_version: Warning<
"Unknown CUDA version %0. Assuming the latest supported version %1">,
InGroup<CudaUnknownVersion>;
def err_drv_cuda_host_arch : Error<"unsupported architecture '%0' for host compilation.">;
def err_drv_no_sycl_libspirv : Error<
"cannot find `libspirv-nvptx64--nvidiacl.bc`. Provide path to libspirv library via "
"-fsycl-libspirv-path, or pass -fno-sycl-libspirv to build without linking with libspirv.">;
def err_drv_mix_cuda_hip : Error<"Mixed Cuda and HIP compilation is not supported.">;
def err_drv_invalid_thread_model_for_target : Error<
"invalid thread model '%0' in '%1' for this target">;
Expand Down
2 changes: 1 addition & 1 deletion clang/include/clang/Basic/DiagnosticIDs.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ namespace clang {
// Size of each of the diagnostic categories.
enum {
DIAG_SIZE_COMMON = 300,
DIAG_SIZE_DRIVER = 250, // 200 -> 250 for SYCL related diagnostics
DIAG_SIZE_DRIVER = 210,
DIAG_SIZE_FRONTEND = 150,
DIAG_SIZE_SERIALIZATION = 120,
DIAG_SIZE_LEX = 400,
Expand Down
3 changes: 3 additions & 0 deletions clang/include/clang/Driver/Options.td
Original file line number Diff line number Diff line change
Expand Up @@ -1872,6 +1872,9 @@ def fsycl_help_EQ : Joined<["-"], "fsycl-help=">,
def fsycl_help : Flag<["-"], "fsycl-help">, Alias<fsycl_help_EQ>,
Flags<[DriverOption, CoreOption]>, AliasArgs<["all"]>, HelpText<"Emit help information "
"from all of the offline compilation tools">;
def fsycl_libspirv_path_EQ : Joined<["-"], "fsycl-libspirv-path=">,
Flags<[CC1Option, CoreOption]>, HelpText<"Path to libspirv library">;
def fno_sycl_libspirv : Flag<["-"], "fno-sycl-libspirv">, HelpText<"Disable check for libspirv">;
def fsyntax_only : Flag<["-"], "fsyntax-only">,
Flags<[DriverOption,CoreOption,CC1Option]>, Group<Action_Group>;
def ftabstop_EQ : Joined<["-"], "ftabstop=">, Group<f_Group>;
Expand Down
3 changes: 2 additions & 1 deletion clang/lib/Basic/Targets/NVPTX.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,8 @@ NVPTXTargetInfo::NVPTXTargetInfo(const llvm::Triple &Triple,
.Default(32);
}

TLSSupported = false;
// FIXME: Needed for compiling SYCL to PTX.
TLSSupported = Triple.getEnvironment() == llvm::Triple::SYCLDevice;
VLASupported = false;
AddrSpaceMap = &NVPTXAddrSpaceMap;
UseAddrSpaceMapMangling = true;
Expand Down
6 changes: 6 additions & 0 deletions clang/lib/Basic/Targets/NVPTX.h
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,12 @@ class LLVM_LIBRARY_VISIBILITY NVPTXTargetInfo : public TargetInfo {
Opts.support("cl_khr_global_int32_extended_atomics");
Opts.support("cl_khr_local_int32_base_atomics");
Opts.support("cl_khr_local_int32_extended_atomics");
// PTX actually supports 64 bits operations even if the Nvidia OpenCL
// runtime does not report support for it.
// This is required for libclc to compile 64 bits atomic functions.
// FIXME: maybe we should have a way to control this ?
Opts.support("cl_khr_int64_base_atomics");
Opts.support("cl_khr_int64_extended_atomics");
}

/// \returns If a target requires an address within a target specific address
Expand Down
3 changes: 0 additions & 3 deletions clang/lib/CodeGen/BackendUtil.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -842,9 +842,6 @@ void EmitAssemblyHelper::EmitAssembly(BackendAction Action,
PerFunctionPasses.add(
createTargetTransformInfoWrapperPass(getTargetIRAnalysis()));

if (LangOpts.SYCLIsDevice)
PerFunctionPasses.add(createSYCLLowerWGScopePass());

CreatePasses(PerModulePasses, PerFunctionPasses);

legacy::PassManager CodeGenPasses;
Expand Down
6 changes: 6 additions & 0 deletions clang/lib/CodeGen/CGCall.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -755,6 +755,12 @@ CodeGenTypes::arrangeLLVMFunctionInfo(CanQualType resultType,
return *FI;

unsigned CC = ClangCallConvToLLVMCallConv(info.getCC());
// This is required so SYCL kernels are successfully processed by tools from CUDA. Kernels
// with a `spir_kernel` calling convention are ignored otherwise.
if (CC == llvm::CallingConv::SPIR_KERNEL && CGM.getTriple().isNVPTX() &&
getContext().getLangOpts().SYCLIsDevice) {
CC = llvm::CallingConv::C;
}

// Construct the function info. We co-allocate the ArgInfos.
FI = CGFunctionInfo::create(CC, instanceMethod, chainCall, info,
Expand Down
13 changes: 13 additions & 0 deletions clang/lib/CodeGen/CodeGenAction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include "CodeGenModule.h"
#include "CoverageMappingGen.h"
#include "MacroPPCallbacks.h"
#include "SYCLLowerIR/LowerWGScope.h"
#include "clang/AST/ASTConsumer.h"
#include "clang/AST/ASTContext.h"
#include "clang/AST/DeclCXX.h"
Expand All @@ -33,6 +34,7 @@
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/LLVMRemarkStreamer.h"
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/IR/Module.h"
#include "llvm/IRReader/IRReader.h"
#include "llvm/Linker/Linker.h"
Expand Down Expand Up @@ -326,6 +328,17 @@ namespace clang {
CodeGenOpts.getProfileUse() != CodeGenOptions::ProfileNone)
Ctx.setDiagnosticsHotnessRequested(true);

// The parallel_for_work_group legalization pass can emit calls to
// builtins function. Definitions of those builtins can be provided in
// LinkModule. We force the pass to legalize the code before the link
// happens.
if (LangOpts.SYCLIsDevice) {
PrettyStackTraceString CrashInfo("Pre-linking SYCL passes");
legacy::PassManager PreLinkingSyclPasses;
PreLinkingSyclPasses.add(createSYCLLowerWGScopePass());
PreLinkingSyclPasses.run(*getModule());
}

// Link each LinkModule into our module.
if (LinkInModules())
return;
Expand Down
2 changes: 2 additions & 0 deletions clang/lib/CodeGen/CodeGenModule.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -240,6 +240,8 @@ void CodeGenModule::createSYCLRuntime() {
switch (getTriple().getArch()) {
case llvm::Triple::spir:
case llvm::Triple::spir64:
case llvm::Triple::nvptx:
case llvm::Triple::nvptx64:
SYCLRuntime.reset(new CGSYCLRuntime(*this));
break;
default:
Expand Down
Loading