diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 6fa35486669d6..5527e4a8818a5 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -8047,6 +8047,43 @@ it will contain a list of ids, including the ids of the callsites in the full inline sequence, in order from the leaf-most call's id to the outermost inlined call. + +'``noalias.addrspace``' Metadata +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The ``noalias.addrspace`` metadata is used to identify memory +operations which cannot access objects allocated in a range of address +spaces. It is attached to memory instructions, including +:ref:`atomicrmw `, :ref:`cmpxchg `, and +:ref:`call ` instructions. + +This follows the same form as :ref:`range metadata `, +except the field entries must be of type `i32`. The interpretation is +the same numeric address spaces as applied to IR values. + +Example: + +.. code-block:: llvm + + ; %ptr cannot point to an object allocated in addrspace(5) + %rmw.valid = atomicrmw and ptr %ptr, i64 %value seq_cst, !noalias.addrspace !0 + + ; Undefined behavior. The underlying object is allocated in one of the listed + ; address spaces. + %alloca = alloca i64, addrspace(5) + %alloca.cast = addrspacecast ptr addrspace(5) %alloca to ptr + %rmw.ub = atomicrmw and ptr %alloca.cast, i64 %value seq_cst, !noalias.addrspace !0 + + !0 = !{i32 5, i32 6} ; Exclude addrspace(5) only + + +This is intended for use on targets with a notion of generic address +spaces, which at runtime resolve to different physical memory +spaces. The interpretation of the address space values is target +specific. The behavior is undefined if the runtime memory address does +resolve to an object defined in one of the indicated address spaces. + + Module Flags Metadata ===================== diff --git a/llvm/docs/ReleaseNotes.md b/llvm/docs/ReleaseNotes.md index f44d636a20337..8ac5900a7e532 100644 --- a/llvm/docs/ReleaseNotes.md +++ b/llvm/docs/ReleaseNotes.md @@ -62,6 +62,8 @@ Changes to the LLVM IR * Added `usub_cond` and `usub_sat` operations to `atomicrmw`. +* Introduced `noalias.addrspace` metadata. + * Remove the following intrinsics which can be replaced with a `bitcast`: * `llvm.nvvm.bitcast.f2i` diff --git a/llvm/include/llvm/IR/FixedMetadataKinds.def b/llvm/include/llvm/IR/FixedMetadataKinds.def index 5f4cc230a0f5f..df572e8791e13 100644 --- a/llvm/include/llvm/IR/FixedMetadataKinds.def +++ b/llvm/include/llvm/IR/FixedMetadataKinds.def @@ -52,3 +52,4 @@ LLVM_FIXED_MD_KIND(MD_pcsections, "pcsections", 37) LLVM_FIXED_MD_KIND(MD_DIAssignID, "DIAssignID", 38) LLVM_FIXED_MD_KIND(MD_coro_outside_frame, "coro.outside.frame", 39) LLVM_FIXED_MD_KIND(MD_mmra, "mmra", 40) +LLVM_FIXED_MD_KIND(MD_noalias_addrspace, "noalias.addrspace", 41) diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp index e469c2ae52eb7..3753509f9aa71 100644 --- a/llvm/lib/IR/AutoUpgrade.cpp +++ b/llvm/lib/IR/AutoUpgrade.cpp @@ -34,9 +34,11 @@ #include "llvm/IR/IntrinsicsWebAssembly.h" #include "llvm/IR/IntrinsicsX86.h" #include "llvm/IR/LLVMContext.h" +#include "llvm/IR/MDBuilder.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" #include "llvm/IR/Verifier.h" +#include "llvm/Support/AMDGPUAddrSpace.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Regex.h" @@ -4270,13 +4272,22 @@ static Value *upgradeAMDGCNIntrinsicCall(StringRef Name, CallBase *CI, AtomicRMWInst *RMW = Builder.CreateAtomicRMW(RMWOp, Ptr, Val, std::nullopt, Order, SSID); - if (PtrTy->getAddressSpace() != 3) { + unsigned AddrSpace = PtrTy->getAddressSpace(); + if (AddrSpace != AMDGPUAS::LOCAL_ADDRESS) { MDNode *EmptyMD = MDNode::get(F->getContext(), {}); RMW->setMetadata("amdgpu.no.fine.grained.memory", EmptyMD); if (RMWOp == AtomicRMWInst::FAdd && RetTy->isFloatTy()) RMW->setMetadata("amdgpu.ignore.denormal.mode", EmptyMD); } + if (AddrSpace == AMDGPUAS::FLAT_ADDRESS) { + MDBuilder MDB(F->getContext()); + MDNode *RangeNotPrivate = + MDB.createRange(APInt(32, AMDGPUAS::PRIVATE_ADDRESS), + APInt(32, AMDGPUAS::PRIVATE_ADDRESS + 1)); + RMW->setMetadata(LLVMContext::MD_noalias_addrspace, RangeNotPrivate); + } + if (IsVolatile) RMW->setVolatile(true); diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index 1cd5eb36c4ab6..b89c9ce46e7d6 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -492,6 +492,14 @@ class Verifier : public InstVisitor, VerifierSupport { /// Whether a metadata node is allowed to be, or contain, a DILocation. enum class AreDebugLocsAllowed { No, Yes }; + /// Metadata that should be treated as a range, with slightly different + /// requirements. + enum class RangeLikeMetadataKind { + Range, // MD_range + AbsoluteSymbol, // MD_absolute_symbol + NoaliasAddrspace // MD_noalias_addrspace + }; + // Verification methods... void visitGlobalValue(const GlobalValue &GV); void visitGlobalVariable(const GlobalVariable &GV); @@ -515,9 +523,10 @@ class Verifier : public InstVisitor, VerifierSupport { void visitModuleFlagCGProfileEntry(const MDOperand &MDO); void visitFunction(const Function &F); void visitBasicBlock(BasicBlock &BB); - void verifyRangeMetadata(const Value &V, const MDNode *Range, Type *Ty, - bool IsAbsoluteSymbol); + void verifyRangeLikeMetadata(const Value &V, const MDNode *Range, Type *Ty, + RangeLikeMetadataKind Kind); void visitRangeMetadata(Instruction &I, MDNode *Range, Type *Ty); + void visitNoaliasAddrspaceMetadata(Instruction &I, MDNode *Range, Type *Ty); void visitDereferenceableMetadata(Instruction &I, MDNode *MD); void visitProfMetadata(Instruction &I, MDNode *MD); void visitCallStackMetadata(MDNode *MD); @@ -760,8 +769,9 @@ void Verifier::visitGlobalValue(const GlobalValue &GV) { // FIXME: Why is getMetadata on GlobalValue protected? if (const MDNode *AbsoluteSymbol = GO->getMetadata(LLVMContext::MD_absolute_symbol)) { - verifyRangeMetadata(*GO, AbsoluteSymbol, DL.getIntPtrType(GO->getType()), - true); + verifyRangeLikeMetadata(*GO, AbsoluteSymbol, + DL.getIntPtrType(GO->getType()), + RangeLikeMetadataKind::AbsoluteSymbol); } } @@ -4136,8 +4146,8 @@ static bool isContiguous(const ConstantRange &A, const ConstantRange &B) { /// Verify !range and !absolute_symbol metadata. These have the same /// restrictions, except !absolute_symbol allows the full set. -void Verifier::verifyRangeMetadata(const Value &I, const MDNode *Range, - Type *Ty, bool IsAbsoluteSymbol) { +void Verifier::verifyRangeLikeMetadata(const Value &I, const MDNode *Range, + Type *Ty, RangeLikeMetadataKind Kind) { unsigned NumOperands = Range->getNumOperands(); Check(NumOperands % 2 == 0, "Unfinished range!", Range); unsigned NumRanges = NumOperands / 2; @@ -4154,8 +4164,14 @@ void Verifier::verifyRangeMetadata(const Value &I, const MDNode *Range, Check(High->getType() == Low->getType(), "Range pair types must match!", &I); - Check(High->getType() == Ty->getScalarType(), - "Range types must match instruction type!", &I); + + if (Kind == RangeLikeMetadataKind::NoaliasAddrspace) { + Check(High->getType()->isIntegerTy(32), + "noalias.addrspace type must be i32!", &I); + } else { + Check(High->getType() == Ty->getScalarType(), + "Range types must match instruction type!", &I); + } APInt HighV = High->getValue(); APInt LowV = Low->getValue(); @@ -4166,7 +4182,9 @@ void Verifier::verifyRangeMetadata(const Value &I, const MDNode *Range, "The upper and lower limits cannot be the same value", &I); ConstantRange CurRange(LowV, HighV); - Check(!CurRange.isEmptySet() && (IsAbsoluteSymbol || !CurRange.isFullSet()), + Check(!CurRange.isEmptySet() && + (Kind == RangeLikeMetadataKind::AbsoluteSymbol || + !CurRange.isFullSet()), "Range must not be empty!", Range); if (i != 0) { Check(CurRange.intersectWith(LastRange).isEmptySet(), @@ -4194,7 +4212,15 @@ void Verifier::verifyRangeMetadata(const Value &I, const MDNode *Range, void Verifier::visitRangeMetadata(Instruction &I, MDNode *Range, Type *Ty) { assert(Range && Range == I.getMetadata(LLVMContext::MD_range) && "precondition violation"); - verifyRangeMetadata(I, Range, Ty, false); + verifyRangeLikeMetadata(I, Range, Ty, RangeLikeMetadataKind::Range); +} + +void Verifier::visitNoaliasAddrspaceMetadata(Instruction &I, MDNode *Range, + Type *Ty) { + assert(Range && Range == I.getMetadata(LLVMContext::MD_noalias_addrspace) && + "precondition violation"); + verifyRangeLikeMetadata(I, Range, Ty, + RangeLikeMetadataKind::NoaliasAddrspace); } void Verifier::checkAtomicMemAccessSize(Type *Ty, const Instruction *I) { @@ -5187,6 +5213,13 @@ void Verifier::visitInstruction(Instruction &I) { visitRangeMetadata(I, Range, I.getType()); } + if (MDNode *Range = I.getMetadata(LLVMContext::MD_noalias_addrspace)) { + Check(isa(I) || isa(I) || isa(I) || + isa(I) || isa(I), + "noalias.addrspace are only for memory operations!", &I); + visitNoaliasAddrspaceMetadata(I, Range, I.getType()); + } + if (I.hasMetadata(LLVMContext::MD_invariant_group)) { Check(isa(I) || isa(I), "invariant.group metadata is only for loads and stores", &I); diff --git a/llvm/test/Assembler/noalias-addrspace-md.ll b/llvm/test/Assembler/noalias-addrspace-md.ll new file mode 100644 index 0000000000000..62fabad86f683 --- /dev/null +++ b/llvm/test/Assembler/noalias-addrspace-md.ll @@ -0,0 +1,110 @@ +; RUN: llvm-as < %s | llvm-dis | FileCheck %s + +define i64 @atomicrmw_noalias_addrspace__0_1(ptr %ptr, i64 %val) { +; CHECK-LABEL: define i64 @atomicrmw_noalias_addrspace__0_1( +; CHECK-SAME: ptr [[PTR:%.*]], i64 [[VAL:%.*]]) { +; CHECK-NEXT: [[RET:%.*]] = atomicrmw add ptr [[PTR]], i64 [[VAL]] seq_cst, align 8, !noalias.addrspace [[META0:![0-9]+]] +; CHECK-NEXT: ret i64 [[RET]] +; + %ret = atomicrmw add ptr %ptr, i64 %val seq_cst, align 8, !noalias.addrspace !0 + ret i64 %ret +} + +define i64 @atomicrmw_noalias_addrspace__0_2(ptr %ptr, i64 %val) { +; CHECK-LABEL: define i64 @atomicrmw_noalias_addrspace__0_2( +; CHECK-SAME: ptr [[PTR:%.*]], i64 [[VAL:%.*]]) { +; CHECK-NEXT: [[RET:%.*]] = atomicrmw add ptr [[PTR]], i64 [[VAL]] seq_cst, align 8, !noalias.addrspace [[META1:![0-9]+]] +; CHECK-NEXT: ret i64 [[RET]] +; + %ret = atomicrmw add ptr %ptr, i64 %val seq_cst, align 8, !noalias.addrspace !1 + ret i64 %ret +} + +define i64 @atomicrmw_noalias_addrspace__1_3(ptr %ptr, i64 %val) { +; CHECK-LABEL: define i64 @atomicrmw_noalias_addrspace__1_3( +; CHECK-SAME: ptr [[PTR:%.*]], i64 [[VAL:%.*]]) { +; CHECK-NEXT: [[RET:%.*]] = atomicrmw add ptr [[PTR]], i64 [[VAL]] seq_cst, align 8, !noalias.addrspace [[META2:![0-9]+]] +; CHECK-NEXT: ret i64 [[RET]] +; + %ret = atomicrmw add ptr %ptr, i64 %val seq_cst, align 8, !noalias.addrspace !2 + ret i64 %ret +} + +define i64 @atomicrmw_noalias_addrspace__multiple_ranges(ptr %ptr, i64 %val) { +; CHECK-LABEL: define i64 @atomicrmw_noalias_addrspace__multiple_ranges( +; CHECK-SAME: ptr [[PTR:%.*]], i64 [[VAL:%.*]]) { +; CHECK-NEXT: [[RET:%.*]] = atomicrmw add ptr [[PTR]], i64 [[VAL]] seq_cst, align 8, !noalias.addrspace [[META3:![0-9]+]] +; CHECK-NEXT: ret i64 [[RET]] +; + %ret = atomicrmw add ptr %ptr, i64 %val seq_cst, align 8, !noalias.addrspace !3 + ret i64 %ret +} + +define i64 @load_noalias_addrspace__5_6(ptr %ptr) { +; CHECK-LABEL: define i64 @load_noalias_addrspace__5_6( +; CHECK-SAME: ptr [[PTR:%.*]]) { +; CHECK-NEXT: [[RET:%.*]] = load i64, ptr [[PTR]], align 4, !noalias.addrspace [[META4:![0-9]+]] +; CHECK-NEXT: ret i64 [[RET]] +; + %ret = load i64, ptr %ptr, align 4, !noalias.addrspace !4 + ret i64 %ret +} + +define void @store_noalias_addrspace__5_6(ptr %ptr, i64 %val) { +; CHECK-LABEL: define void @store_noalias_addrspace__5_6( +; CHECK-SAME: ptr [[PTR:%.*]], i64 [[VAL:%.*]]) { +; CHECK-NEXT: store i64 [[VAL]], ptr [[PTR]], align 4, !noalias.addrspace [[META4]] +; CHECK-NEXT: ret void +; + store i64 %val, ptr %ptr, align 4, !noalias.addrspace !4 + ret void +} + +define { i64, i1 } @cmpxchg_noalias_addrspace__5_6(ptr %ptr, i64 %val0, i64 %val1) { +; CHECK-LABEL: define { i64, i1 } @cmpxchg_noalias_addrspace__5_6( +; CHECK-SAME: ptr [[PTR:%.*]], i64 [[VAL0:%.*]], i64 [[VAL1:%.*]]) { +; CHECK-NEXT: [[RET:%.*]] = cmpxchg ptr [[PTR]], i64 [[VAL0]], i64 [[VAL1]] monotonic monotonic, align 8, !noalias.addrspace [[META4]] +; CHECK-NEXT: ret { i64, i1 } [[RET]] +; + %ret = cmpxchg ptr %ptr, i64 %val0, i64 %val1 monotonic monotonic, align 8, !noalias.addrspace !4 + ret { i64, i1 } %ret +} + +declare void @foo() + +define void @call_noalias_addrspace__5_6(ptr %ptr) { +; CHECK-LABEL: define void @call_noalias_addrspace__5_6( +; CHECK-SAME: ptr [[PTR:%.*]]) { +; CHECK-NEXT: call void @foo(), !noalias.addrspace [[META4]] +; CHECK-NEXT: ret void +; + call void @foo(), !noalias.addrspace !4 + ret void +} + +define void @call_memcpy_intrinsic_addrspace__5_6(ptr %dst, ptr %src, i64 %size) { +; CHECK-LABEL: define void @call_memcpy_intrinsic_addrspace__5_6( +; CHECK-SAME: ptr [[DST:%.*]], ptr [[SRC:%.*]], i64 [[SIZE:%.*]]) { +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[DST]], ptr [[SRC]], i64 [[SIZE]], i1 false), !noalias.addrspace [[META4]] +; CHECK-NEXT: ret void +; + call void @llvm.memcpy.p0.p0.i64(ptr %dst, ptr %src, i64 %size, i1 false), !noalias.addrspace !4 + ret void +} + +declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg) #0 + +attributes #0 = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } + +!0 = !{i32 0, i32 1} +!1 = !{i32 0, i32 2} +!2 = !{i32 1, i32 3} +!3 = !{i32 4, i32 6, i32 10, i32 55} +!4 = !{i32 5, i32 6} +;. +; CHECK: [[META0]] = !{i32 0, i32 1} +; CHECK: [[META1]] = !{i32 0, i32 2} +; CHECK: [[META2]] = !{i32 1, i32 3} +; CHECK: [[META3]] = !{i32 4, i32 6, i32 10, i32 55} +; CHECK: [[META4]] = !{i32 5, i32 6} +;. diff --git a/llvm/test/Bitcode/amdgcn-atomic.ll b/llvm/test/Bitcode/amdgcn-atomic.ll index d642372799f56..87ca1e3a617ed 100644 --- a/llvm/test/Bitcode/amdgcn-atomic.ll +++ b/llvm/test/Bitcode/amdgcn-atomic.ll @@ -2,10 +2,10 @@ define void @atomic_inc(ptr %ptr0, ptr addrspace(1) %ptr1, ptr addrspace(3) %ptr3) { - ; CHECK: atomicrmw uinc_wrap ptr %ptr0, i32 42 syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0 + ; CHECK: atomicrmw uinc_wrap ptr %ptr0, i32 42 syncscope("agent") seq_cst, align 4, !noalias.addrspace !0, !amdgpu.no.fine.grained.memory !1{{$}} %result0 = call i32 @llvm.amdgcn.atomic.inc.i32.p0(ptr %ptr0, i32 42, i32 0, i32 0, i1 false) - ; CHECK: atomicrmw uinc_wrap ptr addrspace(1) %ptr1, i32 43 syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0 + ; CHECK: atomicrmw uinc_wrap ptr addrspace(1) %ptr1, i32 43 syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !1 %result1 = call i32 @llvm.amdgcn.atomic.inc.i32.p1(ptr addrspace(1) %ptr1, i32 43, i32 0, i32 0, i1 false) ; CHECK: atomicrmw uinc_wrap ptr addrspace(3) %ptr3, i32 46 syncscope("agent") seq_cst, align 4{{$}} @@ -26,10 +26,10 @@ define void @atomic_inc(ptr %ptr0, ptr addrspace(1) %ptr1, ptr addrspace(3) %ptr } define void @atomic_dec(ptr %ptr0, ptr addrspace(1) %ptr1, ptr addrspace(3) %ptr3) { - ; CHECK: atomicrmw udec_wrap ptr %ptr0, i32 42 syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0 + ; CHECK: atomicrmw udec_wrap ptr %ptr0, i32 42 syncscope("agent") seq_cst, align 4, !noalias.addrspace !0, !amdgpu.no.fine.grained.memory !1{{$}} %result0 = call i32 @llvm.amdgcn.atomic.dec.i32.p0(ptr %ptr0, i32 42, i32 0, i32 0, i1 false) - ; CHECK: atomicrmw udec_wrap ptr addrspace(1) %ptr1, i32 43 syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0 + ; CHECK: atomicrmw udec_wrap ptr addrspace(1) %ptr1, i32 43 syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !1 %result1 = call i32 @llvm.amdgcn.atomic.dec.i32.p1(ptr addrspace(1) %ptr1, i32 43, i32 0, i32 0, i1 false) ; CHECK: atomicrmw udec_wrap ptr addrspace(3) %ptr3, i32 46 syncscope("agent") seq_cst, align 4{{$}} @@ -51,49 +51,49 @@ define void @atomic_dec(ptr %ptr0, ptr addrspace(1) %ptr1, ptr addrspace(3) %ptr ; Test some invalid ordering handling define void @ordering(ptr %ptr0, ptr addrspace(1) %ptr1, ptr addrspace(3) %ptr3) { - ; CHECK: atomicrmw volatile uinc_wrap ptr %ptr0, i32 42 syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0 + ; CHECK: atomicrmw volatile uinc_wrap ptr %ptr0, i32 42 syncscope("agent") seq_cst, align 4, !noalias.addrspace !0, !amdgpu.no.fine.grained.memory !1{{$}} %result0 = call i32 @llvm.amdgcn.atomic.inc.i32.p0(ptr %ptr0, i32 42, i32 -1, i32 0, i1 true) - ; CHECK: atomicrmw volatile uinc_wrap ptr addrspace(1) %ptr1, i32 43 syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0 + ; CHECK: atomicrmw volatile uinc_wrap ptr addrspace(1) %ptr1, i32 43 syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !1 %result1 = call i32 @llvm.amdgcn.atomic.inc.i32.p1(ptr addrspace(1) %ptr1, i32 43, i32 0, i32 0, i1 true) - ; CHECK: atomicrmw uinc_wrap ptr addrspace(1) %ptr1, i32 43 syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0 + ; CHECK: atomicrmw uinc_wrap ptr addrspace(1) %ptr1, i32 43 syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !1 %result2 = call i32 @llvm.amdgcn.atomic.inc.i32.p1(ptr addrspace(1) %ptr1, i32 43, i32 1, i32 0, i1 false) - ; CHECK: atomicrmw volatile uinc_wrap ptr addrspace(1) %ptr1, i32 43 syncscope("agent") monotonic, align 4, !amdgpu.no.fine.grained.memory !0 + ; CHECK: atomicrmw volatile uinc_wrap ptr addrspace(1) %ptr1, i32 43 syncscope("agent") monotonic, align 4, !amdgpu.no.fine.grained.memory !1 %result3 = call i32 @llvm.amdgcn.atomic.inc.i32.p1(ptr addrspace(1) %ptr1, i32 43, i32 2, i32 0, i1 true) - ; CHECK: atomicrmw uinc_wrap ptr addrspace(1) %ptr1, i32 43 syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0 + ; CHECK: atomicrmw uinc_wrap ptr addrspace(1) %ptr1, i32 43 syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !1 %result4 = call i32 @llvm.amdgcn.atomic.inc.i32.p1(ptr addrspace(1) %ptr1, i32 43, i32 3, i32 0, i1 false) - ; CHECK: atomicrmw volatile udec_wrap ptr %ptr0, i32 42 syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0 + ; CHECK: atomicrmw volatile udec_wrap ptr %ptr0, i32 42 syncscope("agent") seq_cst, align 4, !noalias.addrspace !0, !amdgpu.no.fine.grained.memory !1{{$}} %result5 = call i32 @llvm.amdgcn.atomic.dec.i32.p0(ptr %ptr0, i32 42, i32 0, i32 4, i1 true) - ; CHECK: atomicrmw udec_wrap ptr %ptr0, i32 42 syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0 + ; CHECK: atomicrmw udec_wrap ptr %ptr0, i32 42 syncscope("agent") seq_cst, align 4, !noalias.addrspace !0, !amdgpu.no.fine.grained.memory !1{{$}} %result6 = call i32 @llvm.amdgcn.atomic.dec.i32.p0(ptr %ptr0, i32 42, i32 0, i32 5, i1 false) - ; CHECK: atomicrmw volatile udec_wrap ptr %ptr0, i32 42 syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0 + ; CHECK: atomicrmw volatile udec_wrap ptr %ptr0, i32 42 syncscope("agent") seq_cst, align 4, !noalias.addrspace !0, !amdgpu.no.fine.grained.memory !1{{$}} %result7 = call i32 @llvm.amdgcn.atomic.dec.i32.p0(ptr %ptr0, i32 42, i32 0, i32 6, i1 true) - ; CHECK: atomicrmw udec_wrap ptr %ptr0, i32 42 syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0 + ; CHECK: atomicrmw udec_wrap ptr %ptr0, i32 42 syncscope("agent") seq_cst, align 4, !noalias.addrspace !0, !amdgpu.no.fine.grained.memory !1{{$}} %result8 = call i32 @llvm.amdgcn.atomic.dec.i32.p0(ptr %ptr0, i32 42, i32 0, i32 7, i1 false) - ; CHECK:= atomicrmw volatile udec_wrap ptr %ptr0, i32 42 syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0 + ; CHECK:= atomicrmw volatile udec_wrap ptr %ptr0, i32 42 syncscope("agent") seq_cst, align 4, !noalias.addrspace !0, !amdgpu.no.fine.grained.memory !1{{$}} %result9 = call i32 @llvm.amdgcn.atomic.dec.i32.p0(ptr %ptr0, i32 42, i32 0, i32 8, i1 true) - ; CHECK:= atomicrmw volatile udec_wrap ptr addrspace(1) %ptr1, i32 43 syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0 + ; CHECK:= atomicrmw volatile udec_wrap ptr addrspace(1) %ptr1, i32 43 syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !1 %result10 = call i32 @llvm.amdgcn.atomic.dec.i32.p1(ptr addrspace(1) %ptr1, i32 43, i32 3, i32 0, i1 true) ret void } define void @immarg_violations(ptr %ptr0, i32 %val32, i1 %val1) { - ; CHECK: atomicrmw udec_wrap ptr %ptr0, i32 42 syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0 + ; CHECK: atomicrmw udec_wrap ptr %ptr0, i32 42 syncscope("agent") seq_cst, align 4, !noalias.addrspace !0, !amdgpu.no.fine.grained.memory !1{{$}} %result0 = call i32 @llvm.amdgcn.atomic.dec.i32.p0(ptr %ptr0, i32 42, i32 %val32, i32 0, i1 false) -; CHECK: atomicrmw udec_wrap ptr %ptr0, i32 42 syncscope("agent") monotonic, align 4, !amdgpu.no.fine.grained.memory !0 +; CHECK: atomicrmw udec_wrap ptr %ptr0, i32 42 syncscope("agent") monotonic, align 4, !noalias.addrspace !0, !amdgpu.no.fine.grained.memory !1{{$}} %result1 = call i32 @llvm.amdgcn.atomic.dec.i32.p0(ptr %ptr0, i32 42, i32 2, i32 %val32, i1 false) - ; CHECK: atomicrmw volatile udec_wrap ptr %ptr0, i32 42 syncscope("agent") monotonic, align 4, !amdgpu.no.fine.grained.memory !0 + ; CHECK: atomicrmw volatile udec_wrap ptr %ptr0, i32 42 syncscope("agent") monotonic, align 4, !noalias.addrspace !0, !amdgpu.no.fine.grained.memory !1{{$}} %result2 = call i32 @llvm.amdgcn.atomic.dec.i32.p0(ptr %ptr0, i32 42, i32 2, i32 0, i1 %val1) ret void } @@ -304,7 +304,7 @@ declare <2 x i16> @llvm.amdgcn.flat.atomic.fadd.v2bf16.p0(ptr, <2 x i16>) define <2 x i16> @upgrade_amdgcn_flat_atomic_fadd_v2bf16_p0(ptr %ptr, <2 x i16> %data) { ; CHECK: [[BC0:%.+]] = bitcast <2 x i16> %data to <2 x bfloat> - ; CHECK-NEXT: [[ATOMIC:%.+]] = atomicrmw fadd ptr %ptr, <2 x bfloat> [[BC0]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} + ; CHECK-NEXT: [[ATOMIC:%.+]] = atomicrmw fadd ptr %ptr, <2 x bfloat> [[BC0]] syncscope("agent") seq_cst, align 4, !noalias.addrspace !0, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} ; CHECK-NEXT: [[BC1:%.+]] = bitcast <2 x bfloat> [[ATOMIC]] to <2 x i16> ; CHECK-NEXT: ret <2 x i16> [[BC1]] %result = call <2 x i16> @llvm.amdgcn.flat.atomic.fadd.v2bf16.p0(ptr %ptr, <2 x i16> %data) @@ -325,7 +325,7 @@ define <2 x i16> @upgrade_amdgcn_global_atomic_fadd_v2bf16_p1(ptr addrspace(1) % declare <2 x half> @llvm.amdgcn.flat.atomic.fadd.v2f16.p0.v2f16(ptr nocapture, <2 x half>) #0 define <2 x half> @upgrade_amdgcn_flat_atomic_fadd_v2f16_p0_v2f16(ptr %ptr, <2 x half> %data) { - ; CHECK: %{{.+}} = atomicrmw fadd ptr %ptr, <2 x half> %data syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} + ; CHECK: %{{.+}} = atomicrmw fadd ptr %ptr, <2 x half> %data syncscope("agent") seq_cst, align 4, !noalias.addrspace !{{[0-9]+}}, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} %result = call <2 x half> @llvm.amdgcn.flat.atomic.fadd.v2f16.p0.v2f16(ptr %ptr, <2 x half> %data) ret <2 x half> %result } @@ -341,7 +341,7 @@ define <2 x half> @upgrade_amdgcn_global_atomic_fadd_v2f16_p1_v2f16(ptr addrspac declare float @llvm.amdgcn.flat.atomic.fadd.f32.p0.f32(ptr nocapture, float) #0 define float @upgrade_amdgcn_flat_atomic_fadd_f32_p0_f32(ptr %ptr, float %data) { - ; CHECK: %{{.+}} = atomicrmw fadd ptr %ptr, float %data syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+}}, !amdgpu.ignore.denormal.mode !{{[0-9]+$}} + ; CHECK: %{{.+}} = atomicrmw fadd ptr %ptr, float %data syncscope("agent") seq_cst, align 4, !noalias.addrspace !{{[0-9]+}}, !amdgpu.no.fine.grained.memory !{{[0-9]+}}, !amdgpu.ignore.denormal.mode !{{[0-9]+$}} %result = call float @llvm.amdgcn.flat.atomic.fadd.f32.p0.f32(ptr %ptr, float %data) ret float %result } @@ -355,3 +355,6 @@ define float @upgrade_amdgcn_global_atomic_fadd_f32_p1_f32(ptr addrspace(1) %ptr } attributes #0 = { argmemonly nounwind willreturn } + +; CHECK: !0 = !{i32 5, i32 6} +; CHECK: !1 = !{} diff --git a/llvm/test/Transforms/InstCombine/loadstore-metadata.ll b/llvm/test/Transforms/InstCombine/loadstore-metadata.ll index b9a96937e57c7..247a02f0bcc14 100644 --- a/llvm/test/Transforms/InstCombine/loadstore-metadata.ll +++ b/llvm/test/Transforms/InstCombine/loadstore-metadata.ll @@ -173,6 +173,19 @@ define i32 @test_load_cast_combine_noundef(ptr %ptr) { ret i32 %c } +define i32 @test_load_cast_combine_noalias_addrspace(ptr %ptr) { +; Ensure (cast (load (...))) -> (load (cast (...))) preserves TBAA. +; CHECK-LABEL: @test_load_cast_combine_noalias_addrspace( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[L1:%.*]] = load i32, ptr [[PTR:%.*]], align 4 +; CHECK-NEXT: ret i32 [[L1]] +; +entry: + %l = load float, ptr %ptr, align 4, !noalias.addrspace !11 + %c = bitcast float %l to i32 + ret i32 %c +} + !0 = !{!1, !1, i64 0} !1 = !{!"scalar type", !2} !2 = !{!"root"} @@ -184,3 +197,4 @@ define i32 @test_load_cast_combine_noundef(ptr %ptr) { !8 = !{i32 1} !9 = !{i64 8} !10 = distinct !{} +!11 = !{i32 5, i32 6} diff --git a/llvm/test/Transforms/SimplifyCFG/hoist-with-metadata.ll b/llvm/test/Transforms/SimplifyCFG/hoist-with-metadata.ll index cbf2924b28198..18aa5c9e044a9 100644 --- a/llvm/test/Transforms/SimplifyCFG/hoist-with-metadata.ll +++ b/llvm/test/Transforms/SimplifyCFG/hoist-with-metadata.ll @@ -316,10 +316,80 @@ out: ret void } +define void @hoist_noalias_addrspace_both(i1 %c, ptr %p, i64 %val) { +; CHECK-LABEL: @hoist_noalias_addrspace_both( +; CHECK-NEXT: if: +; CHECK-NEXT: [[T:%.*]] = atomicrmw add ptr [[P:%.*]], i64 [[VAL:%.*]] seq_cst, align 8 +; CHECK-NEXT: ret void +; +if: + br i1 %c, label %then, label %else + +then: + %t = atomicrmw add ptr %p, i64 %val seq_cst, !noalias.addrspace !4 + br label %out + +else: + %e = atomicrmw add ptr %p, i64 %val seq_cst, !noalias.addrspace !4 + br label %out + +out: + ret void +} + +define void @hoist_noalias_addrspace_one(i1 %c, ptr %p, i64 %val) { +; CHECK-LABEL: @hoist_noalias_addrspace_one( +; CHECK-NEXT: if: +; CHECK-NEXT: [[T:%.*]] = atomicrmw add ptr [[P:%.*]], i64 [[VAL:%.*]] seq_cst, align 8 +; CHECK-NEXT: ret void +; +if: + br i1 %c, label %then, label %else + +then: + %t = atomicrmw add ptr %p, i64 %val seq_cst, !noalias.addrspace !4 + br label %out + +else: + %e = atomicrmw add ptr %p, i64 %val seq_cst + br label %out + +out: + ret void +} + +define void @hoist_noalias_addrspace_switch(i64 %i, ptr %p, i64 %val) { +; CHECK-LABEL: @hoist_noalias_addrspace_switch( +; CHECK-NEXT: out: +; CHECK-NEXT: [[T:%.*]] = atomicrmw add ptr [[P:%.*]], i64 [[VAL:%.*]] seq_cst, align 8 +; CHECK-NEXT: ret void +; + switch i64 %i, label %bb0 [ + i64 1, label %bb1 + i64 2, label %bb2 + ] +bb0: + %t = atomicrmw add ptr %p, i64 %val seq_cst, !noalias.addrspace !4 + br label %out +bb1: + %e = atomicrmw add ptr %p, i64 %val seq_cst, !noalias.addrspace !5 + br label %out +bb2: + %f = atomicrmw add ptr %p, i64 %val seq_cst, !noalias.addrspace !6 + br label %out +out: + ret void +} + + !0 = !{ i8 0, i8 1 } !1 = !{ i8 3, i8 5 } !2 = !{} !3 = !{ i8 7, i8 9 } +!4 = !{i32 5, i32 6} +!5 = !{i32 5, i32 7} +!6 = !{i32 4, i32 8} + ;. ; CHECK: [[RNG0]] = !{i8 0, i8 1, i8 3, i8 5} ; CHECK: [[RNG1]] = !{i8 0, i8 1, i8 3, i8 5, i8 7, i8 9} diff --git a/llvm/test/Verifier/noalias-addrspace.ll b/llvm/test/Verifier/noalias-addrspace.ll new file mode 100644 index 0000000000000..67a7293d2561c --- /dev/null +++ b/llvm/test/Verifier/noalias-addrspace.ll @@ -0,0 +1,60 @@ +; RUN: not llvm-as < %s -o /dev/null 2>&1 | FileCheck %s + +; CHECK: It should have at least one range! +; CHECK-NEXT: !0 = !{} +define i64 @noalias_addrspace__empty(ptr %ptr, i64 %val) { + %ret = atomicrmw add ptr %ptr, i64 %val seq_cst, !noalias.addrspace !0 + ret i64 %ret +} + +; CHECK: Unfinished range! +; CHECK-NEXT: !1 = !{i32 0} +define i64 @noalias_addrspace__single_field(ptr %ptr, i64 %val) { + %ret = atomicrmw add ptr %ptr, i64 %val seq_cst, !noalias.addrspace !1 + ret i64 %ret +} + +; CHECK: Range must not be empty! +; CHECK-NEXT: !2 = !{i32 0, i32 0} +define i64 @noalias_addrspace__0_0(ptr %ptr, i64 %val) { + %ret = atomicrmw add ptr %ptr, i64 %val seq_cst, !noalias.addrspace !2 + ret i64 %ret +} + +; CHECK: noalias.addrspace type must be i32! +; CHECK-NEXT: %ret = atomicrmw add ptr %ptr, i64 %val seq_cst, align 8, !noalias.addrspace !3 +define i64 @noalias_addrspace__i64(ptr %ptr, i64 %val) { + %ret = atomicrmw add ptr %ptr, i64 %val seq_cst, !noalias.addrspace !3 + ret i64 %ret +} + +; CHECK: The lower limit must be an integer! +define i64 @noalias_addrspace__fp(ptr %ptr, i64 %val) { + %ret = atomicrmw add ptr %ptr, i64 %val seq_cst, !noalias.addrspace !4 + ret i64 %ret +} + +; CHECK: The lower limit must be an integer! +define i64 @noalias_addrspace__ptr(ptr %ptr, i64 %val) { + %ret = atomicrmw add ptr %ptr, i64 %val seq_cst, !noalias.addrspace !5 + ret i64 %ret +} + +; CHECK: The lower limit must be an integer! +define i64 @noalias_addrspace__nonconstant(ptr %ptr, i64 %val) { + %ret = atomicrmw add ptr %ptr, i64 %val seq_cst, !noalias.addrspace !6 + ret i64 %ret +} + +@gv0 = global i32 0 +@gv1 = global i32 1 + +!0 = !{} +!1 = !{i32 0} +!2 = !{i32 0, i32 0} +!3 = !{i64 1, i64 5} +!4 = !{float 0.0, float 2.0} +!5 = !{ptr null, ptr addrspace(1) null} +!6 = !{i32 ptrtoint (ptr @gv0 to i32), i32 ptrtoint (ptr @gv1 to i32) } + +