Skip to content

Commit 6bfca42

Browse files
committed
backend changes
1 parent 6a42a1d commit 6bfca42

File tree

4 files changed

+64
-11
lines changed

4 files changed

+64
-11
lines changed
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
! RUN: %flang_fc1 -emit-llvm -triple amdgcn-amd-amdhsa -fopenmp -fopenmp-is-device -munsafe-fp-atomics %s -o -|FileCheck -check-prefix=UNSAFE-FP-ATOMICS %s
2+
! RUN: %flang_fc1 -emit-llvm -triple amdgcn-amd-amdhsa -fopenmp -fopenmp-is-device -fatomic-ignore-denormal-mode %s -o -|FileCheck -check-prefix=IGNORE-DENORMAL-MODE %s
3+
! RUN: %flang_fc1 -emit-llvm -triple amdgcn-amd-amdhsa -fopenmp -fopenmp-is-device -fatomic-fine-grained-memory %s -o -|FileCheck -check-prefix=FINE-GRAINED-MEMORY %s
4+
! RUN: %flang_fc1 -emit-llvm -triple amdgcn-amd-amdhsa -fopenmp -fopenmp-is-device -fatomic-remote-memory %s -o -|FileCheck -check-prefix=REMOTE-MEMORY %s
5+
program test
6+
implicit none
7+
integer :: A, threads
8+
threads = 128
9+
A = 0
10+
!$omp target parallel num_threads(threads)
11+
!$omp atomic
12+
A = A + 1
13+
!$omp end target parallel
14+
end program test
15+
16+
!UNSAFE-FP-ATOMICS: %{{.*}} = atomicrmw add ptr {{.*}}, i32 1 monotonic, align 4, !amdgpu.ignore.denormal.mode !{{.*}}, !amdgpu.no.fine.grained.memory !{{.*}}, !amdgpu.no.remote.memory !{{.*}}
17+
!IGNORE-DENORMAL-MODE: %{{.*}} = atomicrmw add ptr {{.*}}, i32 1 monotonic, align 4, !amdgpu.ignore.denormal.mode !{{.*}}, !amdgpu.no.fine.grained.memory !{{.*}}, !amdgpu.no.remote.memory !{{.*}}
18+
!FINE-GRAINED-MEMORY: %{{.*}} = atomicrmw add ptr {{.*}}, i32 1 monotonic, align 4, !amdgpu.no.remote.memory !{{.*}}
19+
!REMOTE-MEMORY: %{{.*}} = atomicrmw add ptr {{.*}}, i32 1 monotonic, align 4, !amdgpu.no.fine.grained.memory !{{.*}}

llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3278,7 +3278,8 @@ class OpenMPIRBuilder {
32783278
emitAtomicUpdate(InsertPointTy AllocaIP, Value *X, Type *XElemTy, Value *Expr,
32793279
AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp,
32803280
AtomicUpdateCallbackTy &UpdateOp, bool VolatileX,
3281-
bool IsXBinopExpr);
3281+
bool IsXBinopExpr, bool IsAmdgpuIgnoreDenormalMode,
3282+
bool IsAmdgpuNoFineGrainedMemory, bool IsAmdgpuNoRemoteMemory);
32823283

32833284
/// Emit the binary op. described by \p RMWOp, using \p Src1 and \p Src2 .
32843285
///
@@ -3349,7 +3350,9 @@ class OpenMPIRBuilder {
33493350
LLVM_ABI InsertPointOrErrorTy createAtomicUpdate(
33503351
const LocationDescription &Loc, InsertPointTy AllocaIP, AtomicOpValue &X,
33513352
Value *Expr, AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp,
3352-
AtomicUpdateCallbackTy &UpdateOp, bool IsXBinopExpr);
3353+
AtomicUpdateCallbackTy &UpdateOp, bool IsXBinopExpr,
3354+
bool IsAmdgpuIgnoreDenormalMode = false,
3355+
bool IsAmdgpuNoFineGrainedMemory = false, bool IsAmdgpuNoRemoteMemory = false);
33533356

33543357
/// Emit atomic update for constructs: --- Only Scalar data types
33553358
/// V = X; X = X BinOp Expr ,
@@ -3384,7 +3387,9 @@ class OpenMPIRBuilder {
33843387
const LocationDescription &Loc, InsertPointTy AllocaIP, AtomicOpValue &X,
33853388
AtomicOpValue &V, Value *Expr, AtomicOrdering AO,
33863389
AtomicRMWInst::BinOp RMWOp, AtomicUpdateCallbackTy &UpdateOp,
3387-
bool UpdateExpr, bool IsPostfixUpdate, bool IsXBinopExpr);
3390+
bool UpdateExpr, bool IsPostfixUpdate, bool IsXBinopExpr,
3391+
bool IsAmdgpuIgnoreDenormalMode = false,
3392+
bool IsAmdgpuNoFineGrainedMemory = false, bool IsAmdgpuNoRemoteMemory = false);
33883393

33893394
/// Emit atomic compare for constructs: --- Only scalar data types
33903395
/// cond-expr-stmt:

llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp

Lines changed: 19 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8761,7 +8761,8 @@ OpenMPIRBuilder::createAtomicWrite(const LocationDescription &Loc,
87618761
OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createAtomicUpdate(
87628762
const LocationDescription &Loc, InsertPointTy AllocaIP, AtomicOpValue &X,
87638763
Value *Expr, AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp,
8764-
AtomicUpdateCallbackTy &UpdateOp, bool IsXBinopExpr) {
8764+
AtomicUpdateCallbackTy &UpdateOp, bool IsXBinopExpr, bool IsAmdgpuIgnoreDenormalMode,
8765+
bool IsNoFineGrainedMemory, bool IsNoRemoteMemory) {
87658766
assert(!isConflictIP(Loc.IP, AllocaIP) && "IPs must not be ambiguous");
87668767
if (!updateToLocation(Loc))
87678768
return Loc.IP;
@@ -8781,7 +8782,7 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createAtomicUpdate(
87818782

87828783
Expected<std::pair<Value *, Value *>> AtomicResult =
87838784
emitAtomicUpdate(AllocaIP, X.Var, X.ElemTy, Expr, AO, RMWOp, UpdateOp,
8784-
X.IsVolatile, IsXBinopExpr);
8785+
X.IsVolatile, IsXBinopExpr, IsAmdgpuIgnoreDenormalMode, IsNoFineGrainedMemory, IsNoRemoteMemory);
87858786
if (!AtomicResult)
87868787
return AtomicResult.takeError();
87878788
checkAndEmitFlushAfterAtomic(Loc, AO, AtomicKind::Update);
@@ -8828,7 +8829,8 @@ Value *OpenMPIRBuilder::emitRMWOpAsInstruction(Value *Src1, Value *Src2,
88288829
Expected<std::pair<Value *, Value *>> OpenMPIRBuilder::emitAtomicUpdate(
88298830
InsertPointTy AllocaIP, Value *X, Type *XElemTy, Value *Expr,
88308831
AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp,
8831-
AtomicUpdateCallbackTy &UpdateOp, bool VolatileX, bool IsXBinopExpr) {
8832+
AtomicUpdateCallbackTy &UpdateOp, bool VolatileX, bool IsXBinopExpr, bool IsAmdgpuIgnoreDenormalMode,
8833+
bool IsAmdgpuNoFineGrainedMemory, bool IsAmdgpuNoRemoteMemory) {
88328834
// TODO: handle the case where XElemTy is not byte-sized or not a power of 2
88338835
// or a complex datatype.
88348836
bool emitRMWOp = false;
@@ -8851,7 +8853,17 @@ Expected<std::pair<Value *, Value *>> OpenMPIRBuilder::emitAtomicUpdate(
88518853

88528854
std::pair<Value *, Value *> Res;
88538855
if (emitRMWOp) {
8854-
Res.first = Builder.CreateAtomicRMW(RMWOp, X, Expr, llvm::MaybeAlign(), AO);
8856+
AtomicRMWInst *atomicRMWInst = Builder.CreateAtomicRMW(RMWOp, X, Expr, llvm::MaybeAlign(), AO);
8857+
if(IsAmdgpuIgnoreDenormalMode)
8858+
atomicRMWInst->setMetadata("amdgpu.ignore.denormal.mode",
8859+
llvm::MDNode::get(Builder.getContext(), {}));
8860+
if(IsAmdgpuNoFineGrainedMemory)
8861+
atomicRMWInst->setMetadata("amdgpu.no.fine.grained.memory",
8862+
llvm::MDNode::get(Builder.getContext(), {}));
8863+
if(IsAmdgpuNoRemoteMemory)
8864+
atomicRMWInst->setMetadata("amdgpu.no.remote.memory",
8865+
llvm::MDNode::get(Builder.getContext(), {}));
8866+
Res.first = atomicRMWInst;
88558867
// not needed except in case of postfix captures. Generate anyway for
88568868
// consistency with the else part. Will be removed with any DCE pass.
88578869
// AtomicRMWInst::Xchg does not have a coressponding instruction.
@@ -8983,7 +8995,8 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createAtomicCapture(
89838995
const LocationDescription &Loc, InsertPointTy AllocaIP, AtomicOpValue &X,
89848996
AtomicOpValue &V, Value *Expr, AtomicOrdering AO,
89858997
AtomicRMWInst::BinOp RMWOp, AtomicUpdateCallbackTy &UpdateOp,
8986-
bool UpdateExpr, bool IsPostfixUpdate, bool IsXBinopExpr) {
8998+
bool UpdateExpr, bool IsPostfixUpdate, bool IsXBinopExpr, bool IsAmdgpuIgnoreDenormalMode,
8999+
bool IsAmdgpuNoFineGrainedMemory, bool IsAmdgpuNoRemoteMemory) {
89879000
if (!updateToLocation(Loc))
89889001
return Loc.IP;
89899002

@@ -9004,7 +9017,7 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createAtomicCapture(
90049017
AtomicRMWInst::BinOp AtomicOp = (UpdateExpr ? RMWOp : AtomicRMWInst::Xchg);
90059018
Expected<std::pair<Value *, Value *>> AtomicResult =
90069019
emitAtomicUpdate(AllocaIP, X.Var, X.ElemTy, Expr, AO, AtomicOp, UpdateOp,
9007-
X.IsVolatile, IsXBinopExpr);
9020+
X.IsVolatile, IsXBinopExpr, IsAmdgpuIgnoreDenormalMode, IsAmdgpuNoFineGrainedMemory, IsAmdgpuNoRemoteMemory);
90089021
if (!AtomicResult)
90099022
return AtomicResult.takeError();
90109023
Value *CapturedVal =

mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3166,13 +3166,18 @@ convertOmpAtomicUpdate(omp::AtomicUpdateOp &opInst,
31663166
return moduleTranslation.lookupValue(yieldop.getResults()[0]);
31673167
};
31683168

3169+
mlir::omp::AtomicControlAttr atomicControlAttr = opInst.getAtomicControlAttr();
3170+
bool isAmdgpuIgnoreDenormalMode = atomicControlAttr.getAmdgpuIgnoreDenormalMode();
3171+
bool isAmdgpuNoFineGrainedMemory = !atomicControlAttr.getAmdgpuFineGrainedMemory();
3172+
bool isAmdgpuNoRemoteMemory = !atomicControlAttr.getAmdgpuRemoteMemory();
31693173
// Handle ambiguous alloca, if any.
31703174
auto allocaIP = findAllocaInsertPoint(builder, moduleTranslation);
31713175
llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
31723176
llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
31733177
ompBuilder->createAtomicUpdate(ompLoc, allocaIP, llvmAtomicX, llvmExpr,
31743178
atomicOrdering, binop, updateFn,
3175-
isXBinopExpr);
3179+
isXBinopExpr, isAmdgpuIgnoreDenormalMode, isAmdgpuNoFineGrainedMemory, isAmdgpuNoRemoteMemory);
3180+
31763181

31773182
if (failed(handleError(afterIP, *opInst)))
31783183
return failure();
@@ -3194,6 +3199,7 @@ convertOmpAtomicCapture(omp::AtomicCaptureOp atomicCaptureOp,
31943199
llvm::AtomicRMWInst::BinOp binop = llvm::AtomicRMWInst::BinOp::BAD_BINOP;
31953200

31963201
omp::AtomicUpdateOp atomicUpdateOp = atomicCaptureOp.getAtomicUpdateOp();
3202+
31973203
omp::AtomicWriteOp atomicWriteOp = atomicCaptureOp.getAtomicWriteOp();
31983204

31993205
assert((atomicUpdateOp || atomicWriteOp) &&
@@ -3261,13 +3267,23 @@ convertOmpAtomicCapture(omp::AtomicCaptureOp atomicCaptureOp,
32613267
return moduleTranslation.lookupValue(yieldop.getResults()[0]);
32623268
};
32633269

3270+
bool isAmdgpuIgnoreDenormalMode = false;
3271+
bool isAmdgpuNoFineGrainedMemory = true;
3272+
bool isAmdgpuNoRemoteMemory = true;
3273+
if(atomicUpdateOp) {
3274+
mlir::omp::AtomicControlAttr atomicControlAttr = atomicUpdateOp.getAtomicControlAttr();
3275+
isAmdgpuIgnoreDenormalMode = atomicControlAttr.getAmdgpuIgnoreDenormalMode();
3276+
isAmdgpuNoFineGrainedMemory = !atomicControlAttr.getAmdgpuFineGrainedMemory();
3277+
isAmdgpuNoRemoteMemory = !atomicControlAttr.getAmdgpuRemoteMemory();
3278+
}
32643279
// Handle ambiguous alloca, if any.
32653280
auto allocaIP = findAllocaInsertPoint(builder, moduleTranslation);
32663281
llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
32673282
llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
32683283
ompBuilder->createAtomicCapture(
32693284
ompLoc, allocaIP, llvmAtomicX, llvmAtomicV, llvmExpr, atomicOrdering,
3270-
binop, updateFn, atomicUpdateOp, isPostfixUpdate, isXBinopExpr);
3285+
binop, updateFn, atomicUpdateOp, isPostfixUpdate,
3286+
isXBinopExpr, isAmdgpuIgnoreDenormalMode, isAmdgpuNoFineGrainedMemory, isAmdgpuNoRemoteMemory);
32713287

32723288
if (failed(handleError(afterIP, *atomicCaptureOp)))
32733289
return failure();

0 commit comments

Comments
 (0)