Skip to content

Commit da81ad9

Browse files
committed
backend changes
1 parent 1b93740 commit da81ad9

File tree

4 files changed

+82
-13
lines changed

4 files changed

+82
-13
lines changed
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
! RUN: %flang_fc1 -emit-llvm -triple amdgcn-amd-amdhsa -fopenmp -fopenmp-is-device -munsafe-fp-atomics %s -o -|FileCheck -check-prefix=UNSAFE-FP-ATOMICS %s
2+
! RUN: %flang_fc1 -emit-llvm -triple amdgcn-amd-amdhsa -fopenmp -fopenmp-is-device -fatomic-ignore-denormal-mode %s -o -|FileCheck -check-prefix=IGNORE-DENORMAL-MODE %s
3+
! RUN: %flang_fc1 -emit-llvm -triple amdgcn-amd-amdhsa -fopenmp -fopenmp-is-device -fatomic-fine-grained-memory %s -o -|FileCheck -check-prefix=FINE-GRAINED-MEMORY %s
4+
! RUN: %flang_fc1 -emit-llvm -triple amdgcn-amd-amdhsa -fopenmp -fopenmp-is-device -fatomic-remote-memory %s -o -|FileCheck -check-prefix=REMOTE-MEMORY %s
5+
program test
6+
implicit none
7+
integer :: A, threads
8+
threads = 128
9+
A = 0
10+
!$omp target parallel num_threads(threads)
11+
!$omp atomic
12+
A = A + 1
13+
!$omp end target parallel
14+
end program test
15+
16+
!UNSAFE-FP-ATOMICS: %{{.*}} = atomicrmw add ptr {{.*}}, i32 1 monotonic, align 4, !amdgpu.ignore.denormal.mode !{{.*}}, !amdgpu.no.fine.grained.memory !{{.*}}, !amdgpu.no.remote.memory !{{.*}}
17+
!IGNORE-DENORMAL-MODE: %{{.*}} = atomicrmw add ptr {{.*}}, i32 1 monotonic, align 4, !amdgpu.ignore.denormal.mode !{{.*}}, !amdgpu.no.fine.grained.memory !{{.*}}, !amdgpu.no.remote.memory !{{.*}}
18+
!FINE-GRAINED-MEMORY: %{{.*}} = atomicrmw add ptr {{.*}}, i32 1 monotonic, align 4, !amdgpu.no.remote.memory !{{.*}}
19+
!REMOTE-MEMORY: %{{.*}} = atomicrmw add ptr {{.*}}, i32 1 monotonic, align 4, !amdgpu.no.fine.grained.memory !{{.*}}

llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3278,7 +3278,9 @@ class OpenMPIRBuilder {
32783278
emitAtomicUpdate(InsertPointTy AllocaIP, Value *X, Type *XElemTy, Value *Expr,
32793279
AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp,
32803280
AtomicUpdateCallbackTy &UpdateOp, bool VolatileX,
3281-
bool IsXBinopExpr);
3281+
bool IsXBinopExpr, bool IsAmdgpuIgnoreDenormalMode,
3282+
bool IsAmdgpuNoFineGrainedMemory,
3283+
bool IsAmdgpuNoRemoteMemory);
32823284

32833285
/// Emit the binary op. described by \p RMWOp, using \p Src1 and \p Src2 .
32843286
///
@@ -3349,7 +3351,10 @@ class OpenMPIRBuilder {
33493351
LLVM_ABI InsertPointOrErrorTy createAtomicUpdate(
33503352
const LocationDescription &Loc, InsertPointTy AllocaIP, AtomicOpValue &X,
33513353
Value *Expr, AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp,
3352-
AtomicUpdateCallbackTy &UpdateOp, bool IsXBinopExpr);
3354+
AtomicUpdateCallbackTy &UpdateOp, bool IsXBinopExpr,
3355+
bool IsAmdgpuIgnoreDenormalMode = false,
3356+
bool IsAmdgpuNoFineGrainedMemory = false,
3357+
bool IsAmdgpuNoRemoteMemory = false);
33533358

33543359
/// Emit atomic update for constructs: --- Only Scalar data types
33553360
/// V = X; X = X BinOp Expr ,
@@ -3384,7 +3389,10 @@ class OpenMPIRBuilder {
33843389
const LocationDescription &Loc, InsertPointTy AllocaIP, AtomicOpValue &X,
33853390
AtomicOpValue &V, Value *Expr, AtomicOrdering AO,
33863391
AtomicRMWInst::BinOp RMWOp, AtomicUpdateCallbackTy &UpdateOp,
3387-
bool UpdateExpr, bool IsPostfixUpdate, bool IsXBinopExpr);
3392+
bool UpdateExpr, bool IsPostfixUpdate, bool IsXBinopExpr,
3393+
bool IsAmdgpuIgnoreDenormalMode = false,
3394+
bool IsAmdgpuNoFineGrainedMemory = false,
3395+
bool IsAmdgpuNoRemoteMemory = false);
33883396

33893397
/// Emit atomic compare for constructs: --- Only scalar data types
33903398
/// cond-expr-stmt:

llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp

Lines changed: 25 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8761,7 +8761,9 @@ OpenMPIRBuilder::createAtomicWrite(const LocationDescription &Loc,
87618761
OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createAtomicUpdate(
87628762
const LocationDescription &Loc, InsertPointTy AllocaIP, AtomicOpValue &X,
87638763
Value *Expr, AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp,
8764-
AtomicUpdateCallbackTy &UpdateOp, bool IsXBinopExpr) {
8764+
AtomicUpdateCallbackTy &UpdateOp, bool IsXBinopExpr,
8765+
bool IsAmdgpuIgnoreDenormalMode, bool IsNoFineGrainedMemory,
8766+
bool IsNoRemoteMemory) {
87658767
assert(!isConflictIP(Loc.IP, AllocaIP) && "IPs must not be ambiguous");
87668768
if (!updateToLocation(Loc))
87678769
return Loc.IP;
@@ -8781,7 +8783,8 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createAtomicUpdate(
87818783

87828784
Expected<std::pair<Value *, Value *>> AtomicResult =
87838785
emitAtomicUpdate(AllocaIP, X.Var, X.ElemTy, Expr, AO, RMWOp, UpdateOp,
8784-
X.IsVolatile, IsXBinopExpr);
8786+
X.IsVolatile, IsXBinopExpr, IsAmdgpuIgnoreDenormalMode,
8787+
IsNoFineGrainedMemory, IsNoRemoteMemory);
87858788
if (!AtomicResult)
87868789
return AtomicResult.takeError();
87878790
checkAndEmitFlushAfterAtomic(Loc, AO, AtomicKind::Update);
@@ -8828,7 +8831,9 @@ Value *OpenMPIRBuilder::emitRMWOpAsInstruction(Value *Src1, Value *Src2,
88288831
Expected<std::pair<Value *, Value *>> OpenMPIRBuilder::emitAtomicUpdate(
88298832
InsertPointTy AllocaIP, Value *X, Type *XElemTy, Value *Expr,
88308833
AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp,
8831-
AtomicUpdateCallbackTy &UpdateOp, bool VolatileX, bool IsXBinopExpr) {
8834+
AtomicUpdateCallbackTy &UpdateOp, bool VolatileX, bool IsXBinopExpr,
8835+
bool IsAmdgpuIgnoreDenormalMode, bool IsAmdgpuNoFineGrainedMemory,
8836+
bool IsAmdgpuNoRemoteMemory) {
88328837
// TODO: handle the case where XElemTy is not byte-sized or not a power of 2
88338838
// or a complex datatype.
88348839
bool emitRMWOp = false;
@@ -8851,7 +8856,18 @@ Expected<std::pair<Value *, Value *>> OpenMPIRBuilder::emitAtomicUpdate(
88518856

88528857
std::pair<Value *, Value *> Res;
88538858
if (emitRMWOp) {
8854-
Res.first = Builder.CreateAtomicRMW(RMWOp, X, Expr, llvm::MaybeAlign(), AO);
8859+
AtomicRMWInst *atomicRMWInst =
8860+
Builder.CreateAtomicRMW(RMWOp, X, Expr, llvm::MaybeAlign(), AO);
8861+
if (IsAmdgpuIgnoreDenormalMode)
8862+
atomicRMWInst->setMetadata("amdgpu.ignore.denormal.mode",
8863+
llvm::MDNode::get(Builder.getContext(), {}));
8864+
if (IsAmdgpuNoFineGrainedMemory)
8865+
atomicRMWInst->setMetadata("amdgpu.no.fine.grained.memory",
8866+
llvm::MDNode::get(Builder.getContext(), {}));
8867+
if (IsAmdgpuNoRemoteMemory)
8868+
atomicRMWInst->setMetadata("amdgpu.no.remote.memory",
8869+
llvm::MDNode::get(Builder.getContext(), {}));
8870+
Res.first = atomicRMWInst;
88558871
// not needed except in case of postfix captures. Generate anyway for
88568872
// consistency with the else part. Will be removed with any DCE pass.
88578873
// AtomicRMWInst::Xchg does not have a coressponding instruction.
@@ -8983,7 +8999,9 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createAtomicCapture(
89838999
const LocationDescription &Loc, InsertPointTy AllocaIP, AtomicOpValue &X,
89849000
AtomicOpValue &V, Value *Expr, AtomicOrdering AO,
89859001
AtomicRMWInst::BinOp RMWOp, AtomicUpdateCallbackTy &UpdateOp,
8986-
bool UpdateExpr, bool IsPostfixUpdate, bool IsXBinopExpr) {
9002+
bool UpdateExpr, bool IsPostfixUpdate, bool IsXBinopExpr,
9003+
bool IsAmdgpuIgnoreDenormalMode, bool IsAmdgpuNoFineGrainedMemory,
9004+
bool IsAmdgpuNoRemoteMemory) {
89879005
if (!updateToLocation(Loc))
89889006
return Loc.IP;
89899007

@@ -9004,7 +9022,8 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createAtomicCapture(
90049022
AtomicRMWInst::BinOp AtomicOp = (UpdateExpr ? RMWOp : AtomicRMWInst::Xchg);
90059023
Expected<std::pair<Value *, Value *>> AtomicResult =
90069024
emitAtomicUpdate(AllocaIP, X.Var, X.ElemTy, Expr, AO, AtomicOp, UpdateOp,
9007-
X.IsVolatile, IsXBinopExpr);
9025+
X.IsVolatile, IsXBinopExpr, IsAmdgpuIgnoreDenormalMode,
9026+
IsAmdgpuNoFineGrainedMemory, IsAmdgpuNoRemoteMemory);
90089027
if (!AtomicResult)
90099028
return AtomicResult.takeError();
90109029
Value *CapturedVal =

mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp

Lines changed: 27 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3166,13 +3166,21 @@ convertOmpAtomicUpdate(omp::AtomicUpdateOp &opInst,
31663166
return moduleTranslation.lookupValue(yieldop.getResults()[0]);
31673167
};
31683168

3169+
mlir::omp::AtomicControlAttr atomicControlAttr =
3170+
opInst.getAtomicControlAttr();
3171+
bool isAmdgpuIgnoreDenormalMode =
3172+
atomicControlAttr.getAmdgpuIgnoreDenormalMode();
3173+
bool isAmdgpuNoFineGrainedMemory =
3174+
!atomicControlAttr.getAmdgpuFineGrainedMemory();
3175+
bool isAmdgpuNoRemoteMemory = !atomicControlAttr.getAmdgpuRemoteMemory();
31693176
// Handle ambiguous alloca, if any.
31703177
auto allocaIP = findAllocaInsertPoint(builder, moduleTranslation);
31713178
llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
31723179
llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
3173-
ompBuilder->createAtomicUpdate(ompLoc, allocaIP, llvmAtomicX, llvmExpr,
3174-
atomicOrdering, binop, updateFn,
3175-
isXBinopExpr);
3180+
ompBuilder->createAtomicUpdate(
3181+
ompLoc, allocaIP, llvmAtomicX, llvmExpr, atomicOrdering, binop,
3182+
updateFn, isXBinopExpr, isAmdgpuIgnoreDenormalMode,
3183+
isAmdgpuNoFineGrainedMemory, isAmdgpuNoRemoteMemory);
31763184

31773185
if (failed(handleError(afterIP, *opInst)))
31783186
return failure();
@@ -3194,6 +3202,7 @@ convertOmpAtomicCapture(omp::AtomicCaptureOp atomicCaptureOp,
31943202
llvm::AtomicRMWInst::BinOp binop = llvm::AtomicRMWInst::BinOp::BAD_BINOP;
31953203

31963204
omp::AtomicUpdateOp atomicUpdateOp = atomicCaptureOp.getAtomicUpdateOp();
3205+
31973206
omp::AtomicWriteOp atomicWriteOp = atomicCaptureOp.getAtomicWriteOp();
31983207

31993208
assert((atomicUpdateOp || atomicWriteOp) &&
@@ -3261,13 +3270,27 @@ convertOmpAtomicCapture(omp::AtomicCaptureOp atomicCaptureOp,
32613270
return moduleTranslation.lookupValue(yieldop.getResults()[0]);
32623271
};
32633272

3273+
bool isAmdgpuIgnoreDenormalMode = false;
3274+
bool isAmdgpuNoFineGrainedMemory = true;
3275+
bool isAmdgpuNoRemoteMemory = true;
3276+
if (atomicUpdateOp) {
3277+
mlir::omp::AtomicControlAttr atomicControlAttr =
3278+
atomicUpdateOp.getAtomicControlAttr();
3279+
isAmdgpuIgnoreDenormalMode =
3280+
atomicControlAttr.getAmdgpuIgnoreDenormalMode();
3281+
isAmdgpuNoFineGrainedMemory =
3282+
!atomicControlAttr.getAmdgpuFineGrainedMemory();
3283+
isAmdgpuNoRemoteMemory = !atomicControlAttr.getAmdgpuRemoteMemory();
3284+
}
32643285
// Handle ambiguous alloca, if any.
32653286
auto allocaIP = findAllocaInsertPoint(builder, moduleTranslation);
32663287
llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
32673288
llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
32683289
ompBuilder->createAtomicCapture(
32693290
ompLoc, allocaIP, llvmAtomicX, llvmAtomicV, llvmExpr, atomicOrdering,
3270-
binop, updateFn, atomicUpdateOp, isPostfixUpdate, isXBinopExpr);
3291+
binop, updateFn, atomicUpdateOp, isPostfixUpdate, isXBinopExpr,
3292+
isAmdgpuIgnoreDenormalMode, isAmdgpuNoFineGrainedMemory,
3293+
isAmdgpuNoRemoteMemory);
32713294

32723295
if (failed(handleError(afterIP, *atomicCaptureOp)))
32733296
return failure();

0 commit comments

Comments
 (0)