Skip to content

Commit 1071a2c

Browse files
Merge branch 'llvm:main' into gh-101657
2 parents a111502 + b95ad8e commit 1071a2c

File tree

534 files changed

+1949
-1449
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

534 files changed

+1949
-1449
lines changed

clang/lib/Driver/ToolChains/Flang.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -484,7 +484,7 @@ void Flang::addTargetOptions(const ArgList &Args,
484484
Triple.getArch() != llvm::Triple::x86_64)
485485
D.Diag(diag::err_drv_unsupported_opt_for_target)
486486
<< Name << Triple.getArchName();
487-
} else if (Name == "libmvec") {
487+
} else if (Name == "libmvec" || Name == "AMDLIBM") {
488488
if (Triple.getArch() != llvm::Triple::x86 &&
489489
Triple.getArch() != llvm::Triple::x86_64)
490490
D.Diag(diag::err_drv_unsupported_opt_for_target)

flang/include/flang/Frontend/CodeGenOptions.def

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ CODEGENOPT(AliasAnalysis, 1, 0) ///< Enable alias analysis pass
4242
CODEGENOPT(Underscoring, 1, 1)
4343
ENUM_CODEGENOPT(RelocationModel, llvm::Reloc::Model, 3, llvm::Reloc::PIC_) ///< Name of the relocation model to use.
4444
ENUM_CODEGENOPT(DebugInfo, llvm::codegenoptions::DebugInfoKind, 4, llvm::codegenoptions::NoDebugInfo) ///< Level of debug info to generate
45-
ENUM_CODEGENOPT(VecLib, llvm::driver::VectorLibrary, 3, llvm::driver::VectorLibrary::NoLibrary) ///< Vector functions library to use
45+
ENUM_CODEGENOPT(VecLib, llvm::driver::VectorLibrary, 4, llvm::driver::VectorLibrary::NoLibrary) ///< Vector functions library to use
4646
ENUM_CODEGENOPT(FramePointer, llvm::FramePointerKind, 2, llvm::FramePointerKind::None) ///< Enable the usage of frame pointers
4747

4848
ENUM_CODEGENOPT(DoConcurrentMapping, DoConcurrentMappingKind, 2, DoConcurrentMappingKind::DCMK_None) ///< Map `do concurrent` to OpenMP

flang/lib/Frontend/CompilerInvocation.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -201,6 +201,7 @@ static bool parseVectorLibArg(Fortran::frontend::CodeGenOptions &opts,
201201
.Case("SLEEF", VectorLibrary::SLEEF)
202202
.Case("Darwin_libsystem_m", VectorLibrary::Darwin_libsystem_m)
203203
.Case("ArmPL", VectorLibrary::ArmPL)
204+
.Case("AMDLIBM", VectorLibrary::AMDLIBM)
204205
.Case("NoLibrary", VectorLibrary::NoLibrary)
205206
.Default(std::nullopt);
206207
if (!val.has_value()) {

flang/lib/Lower/OpenMP/ClauseProcessor.cpp

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1080,6 +1080,40 @@ bool ClauseProcessor::processIsDevicePtr(
10801080
});
10811081
}
10821082

1083+
bool ClauseProcessor::processLinear(mlir::omp::LinearClauseOps &result) const {
1084+
lower::StatementContext stmtCtx;
1085+
return findRepeatableClause<
1086+
omp::clause::Linear>([&](const omp::clause::Linear &clause,
1087+
const parser::CharBlock &) {
1088+
auto &objects = std::get<omp::ObjectList>(clause.t);
1089+
for (const omp::Object &object : objects) {
1090+
semantics::Symbol *sym = object.sym();
1091+
const mlir::Value variable = converter.getSymbolAddress(*sym);
1092+
result.linearVars.push_back(variable);
1093+
}
1094+
if (objects.size()) {
1095+
if (auto &mod =
1096+
std::get<std::optional<omp::clause::Linear::StepComplexModifier>>(
1097+
clause.t)) {
1098+
mlir::Value operand =
1099+
fir::getBase(converter.genExprValue(toEvExpr(*mod), stmtCtx));
1100+
result.linearStepVars.append(objects.size(), operand);
1101+
} else if (std::get<std::optional<omp::clause::Linear::LinearModifier>>(
1102+
clause.t)) {
1103+
mlir::Location currentLocation = converter.getCurrentLocation();
1104+
TODO(currentLocation, "Linear modifiers not yet implemented");
1105+
} else {
1106+
// If nothing is present, add the default step of 1.
1107+
fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
1108+
mlir::Location currentLocation = converter.getCurrentLocation();
1109+
mlir::Value operand = firOpBuilder.createIntegerConstant(
1110+
currentLocation, firOpBuilder.getI32Type(), 1);
1111+
result.linearStepVars.append(objects.size(), operand);
1112+
}
1113+
}
1114+
});
1115+
}
1116+
10831117
bool ClauseProcessor::processLink(
10841118
llvm::SmallVectorImpl<DeclareTargetCapturePair> &result) const {
10851119
return findRepeatableClause<omp::clause::Link>(

flang/lib/Lower/OpenMP/ClauseProcessor.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,7 @@ class ClauseProcessor {
128128
bool processIsDevicePtr(
129129
mlir::omp::IsDevicePtrClauseOps &result,
130130
llvm::SmallVectorImpl<const semantics::Symbol *> &isDeviceSyms) const;
131+
bool processLinear(mlir::omp::LinearClauseOps &result) const;
131132
bool
132133
processLink(llvm::SmallVectorImpl<DeclareTargetCapturePair> &result) const;
133134

flang/lib/Lower/OpenMP/DataSharingProcessor.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -213,14 +213,15 @@ void DataSharingProcessor::collectSymbolsForPrivatization() {
213213
// so, we won't need to explicitely handle block objects (or forget to do
214214
// so).
215215
for (auto *sym : explicitlyPrivatizedSymbols)
216-
allPrivatizedSymbols.insert(sym);
216+
if (!sym->test(Fortran::semantics::Symbol::Flag::OmpLinear))
217+
allPrivatizedSymbols.insert(sym);
217218
}
218219

219220
bool DataSharingProcessor::needBarrier() {
220221
// Emit implicit barrier to synchronize threads and avoid data races on
221222
// initialization of firstprivate variables and post-update of lastprivate
222223
// variables.
223-
// Emit implicit barrier for linear clause. Maybe on somewhere else.
224+
// Emit implicit barrier for linear clause in the OpenMPIRBuilder.
224225
for (const semantics::Symbol *sym : allPrivatizedSymbols) {
225226
if (sym->test(semantics::Symbol::Flag::OmpLastPrivate) &&
226227
(sym->test(semantics::Symbol::Flag::OmpFirstPrivate) ||

flang/lib/Lower/OpenMP/OpenMP.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1963,13 +1963,13 @@ static void genWsloopClauses(
19631963
llvm::SmallVectorImpl<const semantics::Symbol *> &reductionSyms) {
19641964
ClauseProcessor cp(converter, semaCtx, clauses);
19651965
cp.processNowait(clauseOps);
1966+
cp.processLinear(clauseOps);
19661967
cp.processOrder(clauseOps);
19671968
cp.processOrdered(clauseOps);
19681969
cp.processReduction(loc, clauseOps, reductionSyms);
19691970
cp.processSchedule(stmtCtx, clauseOps);
19701971

1971-
cp.processTODO<clause::Allocate, clause::Linear>(
1972-
loc, llvm::omp::Directive::OMPD_do);
1972+
cp.processTODO<clause::Allocate>(loc, llvm::omp::Directive::OMPD_do);
19731973
}
19741974

19751975
//===----------------------------------------------------------------------===//

flang/test/Driver/fveclib-codegen.f90

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
! test that -fveclib= is passed to the backend
22
! RUN: %if aarch64-registered-target %{ %flang -S -Ofast -target aarch64-unknown-linux-gnu -fveclib=SLEEF -o - %s | FileCheck %s --check-prefix=SLEEF %}
33
! RUN: %if x86-registered-target %{ %flang -S -Ofast -target x86_64-unknown-linux-gnu -fveclib=libmvec -o - %s | FileCheck %s %}
4+
! RUN: %if x86-registered-target %{ %flang -S -O3 -ffast-math -target x86_64-unknown-linux-gnu -fveclib=AMDLIBM -o - %s | FileCheck %s --check-prefix=AMDLIBM %}
45
! RUN: %flang -S -Ofast -fveclib=NoLibrary -o - %s | FileCheck %s --check-prefix=NOLIB
56

67
subroutine sb(a, b)
@@ -10,6 +11,7 @@ subroutine sb(a, b)
1011
! check that we used a vectorized call to powf()
1112
! CHECK: _ZGVbN4vv_powf
1213
! SLEEF: _ZGVnN4vv_powf
14+
! AMDLIBM: amd_vrs4_powf
1315
! NOLIB: powf
1416
a(i) = a(i) ** b(i)
1517
end do

flang/test/Driver/fveclib.f90

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
! RUN: %flang -### -c -fveclib=Darwin_libsystem_m %s 2>&1 | FileCheck -check-prefix CHECK-DARWIN_LIBSYSTEM_M %s
66
! RUN: %flang -### -c --target=aarch64-none-none -fveclib=SLEEF %s 2>&1 | FileCheck -check-prefix CHECK-SLEEF %s
77
! RUN: %flang -### -c --target=aarch64-none-none -fveclib=ArmPL %s 2>&1 | FileCheck -check-prefix CHECK-ARMPL %s
8+
! RUN: %flang -### -c --target=x86_64-unknown-linux-gnu -fveclib=AMDLIBM %s 2>&1 | FileCheck -check-prefix CHECK-AMDLIBM %s
89
! RUN: %flang -### -c --target=aarch64-apple-darwin -fveclib=none %s 2>&1 | FileCheck -check-prefix CHECK-NOLIB-DARWIN %s
910
! RUN: not %flang -c -fveclib=something %s 2>&1 | FileCheck -check-prefix CHECK-INVALID %s
1011

@@ -15,6 +16,7 @@
1516
! CHECK-DARWIN_LIBSYSTEM_M: "-fveclib=Darwin_libsystem_m"
1617
! CHECK-SLEEF: "-fveclib=SLEEF"
1718
! CHECK-ARMPL: "-fveclib=ArmPL"
19+
! CHECK-AMDLIBM: "-fveclib=AMDLIBM"
1820
! CHECK-NOLIB-DARWIN: "-fveclib=none"
1921

2022
! CHECK-INVALID: error: invalid value 'something' in '-fveclib=something'
@@ -23,6 +25,7 @@
2325
! RUN: not %flang --target=x86-none-none -c -fveclib=ArmPL %s 2>&1 | FileCheck -check-prefix CHECK-ERROR %s
2426
! RUN: not %flang --target=aarch64-none-none -c -fveclib=libmvec %s 2>&1 | FileCheck -check-prefix CHECK-ERROR %s
2527
! RUN: not %flang --target=aarch64-none-none -c -fveclib=SVML %s 2>&1 | FileCheck -check-prefix CHECK-ERROR %s
28+
! RUN: not %flang --target=aarch64-none-none -c -fveclib=AMDLIBM %s 2>&1 | FileCheck -check-prefix CHECK-ERROR %s
2629
! CHECK-ERROR: unsupported option {{.*}} for target
2730

2831
! RUN: %flang -fveclib=Accelerate %s -target arm64-apple-ios8.0.0 -### 2>&1 | FileCheck --check-prefix=CHECK-LINK %s
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
! This test checks lowering of OpenMP DO Directive (Worksharing)
2+
! with linear clause
3+
4+
! RUN: %flang_fc1 -fopenmp -emit-hlfir %s -o - 2>&1 | FileCheck %s
5+
6+
!CHECK: %[[X_alloca:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFsimple_linearEx"}
7+
!CHECK: %[[X:.*]]:2 = hlfir.declare %[[X_alloca]] {uniq_name = "_QFsimple_linearEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
8+
!CHECK: %[[const:.*]] = arith.constant 1 : i32
9+
subroutine simple_linear
10+
implicit none
11+
integer :: x, y, i
12+
!CHECK: omp.wsloop linear(%[[X]]#0 = %[[const]] : !fir.ref<i32>) {{.*}}
13+
!$omp do linear(x)
14+
!CHECK: %[[LOAD:.*]] = fir.load %[[X]]#0 : !fir.ref<i32>
15+
!CHECK: %[[const:.*]] = arith.constant 2 : i32
16+
!CHECK: %[[RESULT:.*]] = arith.addi %[[LOAD]], %[[const]] : i32
17+
do i = 1, 10
18+
y = x + 2
19+
end do
20+
!$omp end do
21+
end subroutine
22+
23+
24+
!CHECK: %[[X_alloca:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFlinear_stepEx"}
25+
!CHECK: %[[X:.*]]:2 = hlfir.declare %[[X_alloca]] {uniq_name = "_QFlinear_stepEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
26+
subroutine linear_step
27+
implicit none
28+
integer :: x, y, i
29+
!CHECK: %[[const:.*]] = arith.constant 4 : i32
30+
!CHECK: omp.wsloop linear(%[[X]]#0 = %[[const]] : !fir.ref<i32>) {{.*}}
31+
!$omp do linear(x:4)
32+
!CHECK: %[[LOAD:.*]] = fir.load %[[X]]#0 : !fir.ref<i32>
33+
!CHECK: %[[const:.*]] = arith.constant 2 : i32
34+
!CHECK: %[[RESULT:.*]] = arith.addi %[[LOAD]], %[[const]] : i32
35+
do i = 1, 10
36+
y = x + 2
37+
end do
38+
!$omp end do
39+
end subroutine
40+
41+
!CHECK: %[[A_alloca:.*]] = fir.alloca i32 {bindc_name = "a", uniq_name = "_QFlinear_exprEa"}
42+
!CHECK: %[[A:.*]]:2 = hlfir.declare %[[A_alloca]] {uniq_name = "_QFlinear_exprEa"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
43+
!CHECK: %[[X_alloca:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFlinear_exprEx"}
44+
!CHECK: %[[X:.*]]:2 = hlfir.declare %[[X_alloca]] {uniq_name = "_QFlinear_exprEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
45+
subroutine linear_expr
46+
implicit none
47+
integer :: x, y, i, a
48+
!CHECK: %[[LOAD_A:.*]] = fir.load %[[A]]#0 : !fir.ref<i32>
49+
!CHECK: %[[const:.*]] = arith.constant 4 : i32
50+
!CHECK: %[[LINEAR_EXPR:.*]] = arith.addi %[[LOAD_A]], %[[const]] : i32
51+
!CHECK: omp.wsloop linear(%[[X]]#0 = %[[LINEAR_EXPR]] : !fir.ref<i32>) {{.*}}
52+
!$omp do linear(x:a+4)
53+
do i = 1, 10
54+
y = x + 2
55+
end do
56+
!$omp end do
57+
end subroutine

libclc/CMakeLists.txt

Lines changed: 27 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -18,13 +18,14 @@ include( AddLibclc )
1818

1919
include( GNUInstallDirs )
2020
set_property(DIRECTORY APPEND PROPERTY CMAKE_CONFIGURE_DEPENDS
21-
amdgcn-amdhsa/lib/SOURCES;
22-
amdgcn/lib/SOURCES;
23-
clspv/lib/SOURCES;
24-
generic/lib/SOURCES;
25-
ptx-nvidiacl/lib/SOURCES;
26-
r600/lib/SOURCES;
27-
spirv/lib/SOURCES;
21+
# OpenCL libraries
22+
opencl/lib/amdgcn-amdhsa/SOURCES;
23+
opencl/lib/amdgcn/SOURCES;
24+
opencl/lib/clspv/SOURCES;
25+
opencl/lib/generic/SOURCES;
26+
opencl/lib/ptx-nvidiacl/SOURCES;
27+
opencl/lib/r600/SOURCES;
28+
opencl/lib/spirv/SOURCES;
2829
# CLC internal libraries
2930
clc/lib/generic/SOURCES;
3031
clc/lib/amdgcn/SOURCES;
@@ -227,7 +228,7 @@ if( ENABLE_RUNTIME_SUBNORMAL )
227228
foreach( file IN ITEMS subnormal_use_default subnormal_disable )
228229
link_bc(
229230
TARGET ${file}
230-
INPUTS ${CMAKE_CURRENT_SOURCE_DIR}/generic/lib/${file}.ll
231+
INPUTS ${CMAKE_CURRENT_SOURCE_DIR}/opencl/lib/generic/${file}.ll
231232
)
232233
install(
233234
FILES $<TARGET_PROPERTY:${file},TARGET_FILE>
@@ -237,7 +238,7 @@ if( ENABLE_RUNTIME_SUBNORMAL )
237238
endif()
238239

239240
find_package( Python3 REQUIRED COMPONENTS Interpreter )
240-
file( TO_CMAKE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/generic/lib/gen_convert.py script_loc )
241+
file( TO_CMAKE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/utils/gen_convert.py script_loc )
241242
add_custom_command(
242243
OUTPUT convert.cl
243244
COMMAND ${Python3_EXECUTABLE} ${script_loc} > convert.cl
@@ -283,20 +284,20 @@ set_source_files_properties(
283284
${CMAKE_CURRENT_SOURCE_DIR}/clc/lib/amdgpu/math/clc_native_log10.cl
284285
${CMAKE_CURRENT_SOURCE_DIR}/clc/lib/r600/math/clc_native_rsqrt.cl
285286
# OpenCL builtins
286-
${CMAKE_CURRENT_SOURCE_DIR}/generic/lib/math/native_cos.cl
287-
${CMAKE_CURRENT_SOURCE_DIR}/generic/lib/math/native_divide.cl
288-
${CMAKE_CURRENT_SOURCE_DIR}/generic/lib/math/native_exp.cl
289-
${CMAKE_CURRENT_SOURCE_DIR}/generic/lib/math/native_exp10.cl
290-
${CMAKE_CURRENT_SOURCE_DIR}/generic/lib/math/native_exp2.cl
291-
${CMAKE_CURRENT_SOURCE_DIR}/generic/lib/math/native_log.cl
292-
${CMAKE_CURRENT_SOURCE_DIR}/generic/lib/math/native_log10.cl
293-
${CMAKE_CURRENT_SOURCE_DIR}/generic/lib/math/native_log2.cl
294-
${CMAKE_CURRENT_SOURCE_DIR}/generic/lib/math/native_powr.cl
295-
${CMAKE_CURRENT_SOURCE_DIR}/generic/lib/math/native_recip.cl
296-
${CMAKE_CURRENT_SOURCE_DIR}/generic/lib/math/native_rsqrt.cl
297-
${CMAKE_CURRENT_SOURCE_DIR}/generic/lib/math/native_sin.cl
298-
${CMAKE_CURRENT_SOURCE_DIR}/generic/lib/math/native_sqrt.cl
299-
${CMAKE_CURRENT_SOURCE_DIR}/generic/lib/math/native_tan.cl
287+
${CMAKE_CURRENT_SOURCE_DIR}/opencl/lib/generic/math/native_cos.cl
288+
${CMAKE_CURRENT_SOURCE_DIR}/opencl/lib/generic/math/native_divide.cl
289+
${CMAKE_CURRENT_SOURCE_DIR}/opencl/lib/generic/math/native_exp.cl
290+
${CMAKE_CURRENT_SOURCE_DIR}/opencl/lib/generic/math/native_exp10.cl
291+
${CMAKE_CURRENT_SOURCE_DIR}/opencl/lib/generic/math/native_exp2.cl
292+
${CMAKE_CURRENT_SOURCE_DIR}/opencl/lib/generic/math/native_log.cl
293+
${CMAKE_CURRENT_SOURCE_DIR}/opencl/lib/generic/math/native_log10.cl
294+
${CMAKE_CURRENT_SOURCE_DIR}/opencl/lib/generic/math/native_log2.cl
295+
${CMAKE_CURRENT_SOURCE_DIR}/opencl/lib/generic/math/native_powr.cl
296+
${CMAKE_CURRENT_SOURCE_DIR}/opencl/lib/generic/math/native_recip.cl
297+
${CMAKE_CURRENT_SOURCE_DIR}/opencl/lib/generic/math/native_rsqrt.cl
298+
${CMAKE_CURRENT_SOURCE_DIR}/opencl/lib/generic/math/native_sin.cl
299+
${CMAKE_CURRENT_SOURCE_DIR}/opencl/lib/generic/math/native_sqrt.cl
300+
${CMAKE_CURRENT_SOURCE_DIR}/opencl/lib/generic/math/native_tan.cl
300301
PROPERTIES COMPILE_OPTIONS -fapprox-func
301302
)
302303

@@ -351,7 +352,6 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} )
351352

352353
libclc_configure_lib_source(
353354
clc_lib_files
354-
CLC_INTERNAL
355355
LIB_ROOT_DIR clc
356356
DIRS ${clc_dirs}
357357
)
@@ -365,13 +365,14 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} )
365365
else()
366366
list( APPEND opencl_gen_files convert.cl )
367367
if ( NOT ENABLE_RUNTIME_SUBNORMAL )
368-
list( APPEND opencl_lib_files generic/lib/subnormal_use_default.ll )
368+
list( APPEND opencl_lib_files opencl/lib/generic/subnormal_use_default.ll )
369369
endif()
370370
endif()
371371
endif()
372372

373373
libclc_configure_lib_source(
374374
opencl_lib_files
375+
LIB_ROOT_DIR opencl
375376
DIRS ${opencl_dirs}
376377
)
377378

@@ -439,7 +440,7 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} )
439440
)
440441

441442
list( APPEND build_flags
442-
-I${CMAKE_CURRENT_SOURCE_DIR}/generic/include
443+
-I${CMAKE_CURRENT_SOURCE_DIR}/opencl/include
443444
)
444445

445446
add_libclc_builtin_set(

libclc/clspv/lib/SOURCES

Lines changed: 0 additions & 73 deletions
This file was deleted.

0 commit comments

Comments
 (0)