diff --git a/llvm/include/llvm/Analysis/DependenceAnalysis.h b/llvm/include/llvm/Analysis/DependenceAnalysis.h index 426ac757b4b0d..6b715ab62331e 100644 --- a/llvm/include/llvm/Analysis/DependenceAnalysis.h +++ b/llvm/include/llvm/Analysis/DependenceAnalysis.h @@ -40,6 +40,7 @@ #define LLVM_ANALYSIS_DEPENDENCEANALYSIS_H #include "llvm/ADT/SmallBitVector.h" +#include "llvm/Analysis/ScalarEvolution.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/PassManager.h" #include "llvm/Pass.h" @@ -49,8 +50,6 @@ namespace llvm { template class ArrayRef; class Loop; class LoopInfo; - class ScalarEvolution; - class SCEV; class SCEVConstant; class raw_ostream; @@ -74,8 +73,9 @@ namespace llvm { Dependence &operator=(Dependence &&) = default; public: - Dependence(Instruction *Source, Instruction *Destination) - : Src(Source), Dst(Destination) {} + Dependence(Instruction *Source, Instruction *Destination, + const SCEVUnionPredicate &A) + : Src(Source), Dst(Destination), Assumptions(A) {} virtual ~Dependence() = default; /// Dependence::DVEntry - Each level in the distance/direction vector @@ -203,6 +203,10 @@ namespace llvm { /// field. void setNextSuccessor(const Dependence *succ) { NextSuccessor = succ; } + /// getRuntimeAssumptions - Returns the runtime assumptions under which this + /// Dependence relation is valid. + SCEVUnionPredicate getRuntimeAssumptions() const { return Assumptions; } + /// dump - For debugging purposes, dumps a dependence to OS. /// void dump(raw_ostream &OS) const; @@ -211,6 +215,7 @@ namespace llvm { Instruction *Src, *Dst; private: + SCEVUnionPredicate Assumptions; const Dependence *NextPredecessor = nullptr, *NextSuccessor = nullptr; friend class DependenceInfo; }; @@ -225,8 +230,9 @@ namespace llvm { /// input dependences are unordered. class FullDependence final : public Dependence { public: - FullDependence(Instruction *Src, Instruction *Dst, bool LoopIndependent, - unsigned Levels); + FullDependence(Instruction *Source, Instruction *Destination, + const SCEVUnionPredicate &Assumes, + bool PossiblyLoopIndependent, unsigned Levels); /// isLoopIndependent - Returns true if this is a loop-independent /// dependence. @@ -302,9 +308,13 @@ namespace llvm { /// depends - Tests for a dependence between the Src and Dst instructions. /// Returns NULL if no dependence; otherwise, returns a Dependence (or a - /// FullDependence) with as much information as can be gleaned. - std::unique_ptr depends(Instruction *Src, - Instruction *Dst); + /// FullDependence) with as much information as can be gleaned. By default, + /// the dependence test collects a set of runtime assumptions that cannot be + /// solved at compilation time. By default UnderRuntimeAssumptions is false + /// for a safe approximation of the dependence relation that does not + /// require runtime checks. + std::unique_ptr depends(Instruction *Src, Instruction *Dst, + bool UnderRuntimeAssumptions = false); /// getSplitIteration - Give a dependence that's splittable at some /// particular level, return the iteration that should be used to split @@ -350,11 +360,16 @@ namespace llvm { Function *getFunction() const { return F; } + /// getRuntimeAssumptions - Returns all the runtime assumptions under which + /// the dependence test is valid. + SCEVUnionPredicate getRuntimeAssumptions() const; + private: AAResults *AA; ScalarEvolution *SE; LoopInfo *LI; Function *F; + SmallVector Assumptions; /// Subscript - This private struct represents a pair of subscripts from /// a pair of potentially multi-dimensional array references. We use a diff --git a/llvm/include/llvm/Analysis/ScalarEvolution.h b/llvm/include/llvm/Analysis/ScalarEvolution.h index f729b07076d29..339bdfeb4956a 100644 --- a/llvm/include/llvm/Analysis/ScalarEvolution.h +++ b/llvm/include/llvm/Analysis/ScalarEvolution.h @@ -1044,6 +1044,13 @@ class ScalarEvolution { bool isKnownToBeAPowerOfTwo(const SCEV *S, bool OrZero = false, bool OrNegative = false); + /// Check that \p S is a multiple of \p M. When \p S is an AddRecExpr, \p S is + /// a multiple of \p M if \p S starts with a multiple of \p M and at every + /// iteration step \p S only adds multiples of \p M. \p Assumptions records + /// the runtime predicates under which \p S is a multiple of \p M. + bool isKnownMultipleOf(const SCEV *S, uint64_t M, + SmallVectorImpl &Assumptions); + /// Splits SCEV expression \p S into two SCEVs. One of them is obtained from /// \p S by substitution of all AddRec sub-expression related to loop \p L /// with initial value of that SCEV. The second is obtained from \p S by diff --git a/llvm/lib/Analysis/DependenceAnalysis.cpp b/llvm/lib/Analysis/DependenceAnalysis.cpp index dc0ed22dbcc0b..dba3ac28b37a3 100644 --- a/llvm/lib/Analysis/DependenceAnalysis.cpp +++ b/llvm/lib/Analysis/DependenceAnalysis.cpp @@ -187,7 +187,8 @@ static void dumpExampleDependence(raw_ostream &OS, DependenceInfo *DA, if (DstI->mayReadOrWriteMemory()) { OS << "Src:" << *SrcI << " --> Dst:" << *DstI << "\n"; OS << " da analyze - "; - if (auto D = DA->depends(&*SrcI, &*DstI)) { + if (auto D = DA->depends(&*SrcI, &*DstI, + /*UnderRuntimeAssumptions=*/true)) { // Normalize negative direction vectors if required by clients. if (NormalizeResults && D->normalize(&SE)) OS << "normalized - "; @@ -199,13 +200,17 @@ static void dumpExampleDependence(raw_ostream &OS, DependenceInfo *DA, OS << "!\n"; } } - } - else + } else OS << "none!\n"; } } } } + SCEVUnionPredicate Assumptions = DA->getRuntimeAssumptions(); + if (!Assumptions.isAlwaysTrue()) { + OS << "Runtime Assumptions:\n"; + Assumptions.print(OS, 0); + } } void DependenceAnalysisWrapperPass::print(raw_ostream &OS, @@ -264,9 +269,10 @@ bool Dependence::isScalar(unsigned level) const { // FullDependence methods FullDependence::FullDependence(Instruction *Source, Instruction *Destination, + const SCEVUnionPredicate &Assumes, bool PossiblyLoopIndependent, unsigned CommonLevels) - : Dependence(Source, Destination), Levels(CommonLevels), + : Dependence(Source, Destination, Assumes), Levels(CommonLevels), LoopIndependent(PossiblyLoopIndependent) { Consistent = true; if (CommonLevels) @@ -706,6 +712,12 @@ void Dependence::dump(raw_ostream &OS) const { OS << " splitable"; } OS << "!\n"; + + SCEVUnionPredicate Assumptions = getRuntimeAssumptions(); + if (!Assumptions.isAlwaysTrue()) { + OS << " Runtime Assumptions:\n"; + Assumptions.print(OS, 2); + } } // Returns NoAlias/MayAliass/MustAlias for two memory locations based upon their @@ -3569,6 +3581,10 @@ bool DependenceInfo::invalidate(Function &F, const PreservedAnalyses &PA, Inv.invalidate(F, PA); } +SCEVUnionPredicate DependenceInfo::getRuntimeAssumptions() const { + return SCEVUnionPredicate(Assumptions, *SE); +} + // depends - // Returns NULL if there is no dependence. // Otherwise, return a Dependence with as many details as possible. @@ -3581,7 +3597,9 @@ bool DependenceInfo::invalidate(Function &F, const PreservedAnalyses &PA, // Care is required to keep the routine below, getSplitIteration(), // up to date with respect to this routine. std::unique_ptr -DependenceInfo::depends(Instruction *Src, Instruction *Dst) { +DependenceInfo::depends(Instruction *Src, Instruction *Dst, + bool UnderRuntimeAssumptions) { + SmallVector Assume; bool PossiblyLoopIndependent = true; if (Src == Dst) PossiblyLoopIndependent = false; @@ -3593,22 +3611,20 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst) { if (!isLoadOrStore(Src) || !isLoadOrStore(Dst)) { // can only analyze simple loads and stores, i.e., no calls, invokes, etc. LLVM_DEBUG(dbgs() << "can only handle simple loads and stores\n"); - return std::make_unique(Src, Dst); + return std::make_unique(Src, Dst, + SCEVUnionPredicate(Assume, *SE)); } - assert(isLoadOrStore(Src) && "instruction is not load or store"); - assert(isLoadOrStore(Dst) && "instruction is not load or store"); - Value *SrcPtr = getLoadStorePointerOperand(Src); - Value *DstPtr = getLoadStorePointerOperand(Dst); + const MemoryLocation &DstLoc = MemoryLocation::get(Dst); + const MemoryLocation &SrcLoc = MemoryLocation::get(Src); - switch (underlyingObjectsAlias(AA, F->getDataLayout(), - MemoryLocation::get(Dst), - MemoryLocation::get(Src))) { + switch (underlyingObjectsAlias(AA, F->getDataLayout(), DstLoc, SrcLoc)) { case AliasResult::MayAlias: case AliasResult::PartialAlias: // cannot analyse objects if we don't understand their aliasing. LLVM_DEBUG(dbgs() << "can't analyze may or partial alias\n"); - return std::make_unique(Src, Dst); + return std::make_unique(Src, Dst, + SCEVUnionPredicate(Assume, *SE)); case AliasResult::NoAlias: // If the objects noalias, they are distinct, accesses are independent. LLVM_DEBUG(dbgs() << "no alias\n"); @@ -3617,21 +3633,24 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst) { break; // The underlying objects alias; test accesses for dependence. } - // establish loop nesting levels - establishNestingLevels(Src, Dst); - LLVM_DEBUG(dbgs() << " common nesting levels = " << CommonLevels << "\n"); - LLVM_DEBUG(dbgs() << " maximum nesting levels = " << MaxLevels << "\n"); - - FullDependence Result(Src, Dst, PossiblyLoopIndependent, CommonLevels); - ++TotalArrayPairs; + if (DstLoc.Size != SrcLoc.Size || !DstLoc.Size.isPrecise() || + !SrcLoc.Size.isPrecise()) { + // The dependence test gets confused if the size of the memory accesses + // differ. + LLVM_DEBUG(dbgs() << "can't analyze must alias with different sizes\n"); + return std::make_unique(Src, Dst, + SCEVUnionPredicate(Assume, *SE)); + } - unsigned Pairs = 1; - SmallVector Pair(Pairs); + Value *SrcPtr = getLoadStorePointerOperand(Src); + Value *DstPtr = getLoadStorePointerOperand(Dst); const SCEV *SrcSCEV = SE->getSCEV(SrcPtr); const SCEV *DstSCEV = SE->getSCEV(DstPtr); LLVM_DEBUG(dbgs() << " SrcSCEV = " << *SrcSCEV << "\n"); LLVM_DEBUG(dbgs() << " DstSCEV = " << *DstSCEV << "\n"); - if (SE->getPointerBase(SrcSCEV) != SE->getPointerBase(DstSCEV)) { + const SCEV *SrcBase = SE->getPointerBase(SrcSCEV); + const SCEV *DstBase = SE->getPointerBase(DstSCEV); + if (SrcBase != DstBase) { // If two pointers have different bases, trying to analyze indexes won't // work; we can't compare them to each other. This can happen, for example, // if one is produced by an LCSSA PHI node. @@ -3639,8 +3658,50 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst) { // We check this upfront so we don't crash in cases where getMinusSCEV() // returns a SCEVCouldNotCompute. LLVM_DEBUG(dbgs() << "can't analyze SCEV with different pointer base\n"); - return std::make_unique(Src, Dst); + return std::make_unique(Src, Dst, + SCEVUnionPredicate(Assume, *SE)); + } + + uint64_t EltSize = SrcLoc.Size.toRaw(); + const SCEV *SrcEv = SE->getMinusSCEV(SrcSCEV, SrcBase); + const SCEV *DstEv = SE->getMinusSCEV(DstSCEV, DstBase); + + if (Src != Dst) { + // Check that memory access offsets are multiples of element sizes. + if (!SE->isKnownMultipleOf(SrcEv, EltSize, Assume) || + !SE->isKnownMultipleOf(DstEv, EltSize, Assume)) { + LLVM_DEBUG(dbgs() << "can't analyze SCEV with different offsets\n"); + return std::make_unique(Src, Dst, + SCEVUnionPredicate(Assume, *SE)); + } + } + + if (!Assume.empty()) { + if (!UnderRuntimeAssumptions) + return std::make_unique(Src, Dst, + SCEVUnionPredicate(Assume, *SE)); + // Add non-redundant assumptions. + unsigned N = Assumptions.size(); + for (const SCEVPredicate *P : Assume) { + bool Implied = false; + for (unsigned I = 0; I != N && !Implied; I++) + if (Assumptions[I]->implies(P, *SE)) + Implied = true; + if (!Implied) + Assumptions.push_back(P); + } } + + establishNestingLevels(Src, Dst); + LLVM_DEBUG(dbgs() << " common nesting levels = " << CommonLevels << "\n"); + LLVM_DEBUG(dbgs() << " maximum nesting levels = " << MaxLevels << "\n"); + + FullDependence Result(Src, Dst, SCEVUnionPredicate(Assume, *SE), + PossiblyLoopIndependent, CommonLevels); + ++TotalArrayPairs; + + unsigned Pairs = 1; + SmallVector Pair(Pairs); Pair[0].Src = SrcSCEV; Pair[0].Dst = DstSCEV; @@ -4034,7 +4095,7 @@ const SCEV *DependenceInfo::getSplitIteration(const Dependence &Dep, // establish loop nesting levels establishNestingLevels(Src, Dst); - FullDependence Result(Src, Dst, false, CommonLevels); + FullDependence Result(Src, Dst, Dep.Assumptions, false, CommonLevels); unsigned Pairs = 1; SmallVector Pair(Pairs); diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index c62ea1526981d..d94474a902519 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -10971,6 +10971,56 @@ bool ScalarEvolution::isKnownToBeAPowerOfTwo(const SCEV *S, bool OrZero, return all_of(Mul->operands(), NonRecursive) && (OrZero || isKnownNonZero(S)); } +bool ScalarEvolution::isKnownMultipleOf( + const SCEV *S, uint64_t M, + SmallVectorImpl &Assumptions) { + if (M == 0) + return false; + if (M == 1) + return true; + + // Recursively check AddRec operands. An AddRecExpr S is a multiple of M if S + // starts with a multiple of M and at every iteration step S only adds + // multiples of M. + if (auto *AddRec = dyn_cast(S)) + return isKnownMultipleOf(AddRec->getStart(), M, Assumptions) && + isKnownMultipleOf(AddRec->getStepRecurrence(*this), M, Assumptions); + + // For a constant, check that "S % M == 0". + if (auto *Cst = dyn_cast(S)) { + APInt C = Cst->getAPInt(); + return C.urem(M) == 0; + } + + // TODO: Also check other SCEV expressions, i.e., SCEVAddRecExpr, etc. + + // Basic tests have failed. + // Check "S % M == 0" at compile time and record runtime Assumptions. + auto *STy = dyn_cast(S->getType()); + const SCEV *SmodM = + getURemExpr(S, getConstant(ConstantInt::get(STy, M, false))); + const SCEV *Zero = getZero(STy); + + // Check whether "S % M == 0" is known at compile time. + if (isKnownPredicate(ICmpInst::ICMP_EQ, SmodM, Zero)) + return true; + + // Check whether "S % M != 0" is known at compile time. + if (isKnownPredicate(ICmpInst::ICMP_NE, SmodM, Zero)) + return false; + + const SCEVPredicate *P = getComparePredicate(ICmpInst::ICMP_EQ, SmodM, Zero); + + // Detect redundant predicates. + for (auto *A : Assumptions) + if (A->implies(P, *this)) + return true; + + // Only record non-redundant predicates. + Assumptions.push_back(P); + return true; +} + std::pair ScalarEvolution::SplitIntoInitAndPostInc(const Loop *L, const SCEV *S) { // Compute SCEV on entry of loop L. diff --git a/llvm/test/Analysis/DependenceAnalysis/DifferentAccessSize.ll b/llvm/test/Analysis/DependenceAnalysis/DifferentAccessSize.ll new file mode 100644 index 0000000000000..2dded8f3b13a1 --- /dev/null +++ b/llvm/test/Analysis/DependenceAnalysis/DifferentAccessSize.ll @@ -0,0 +1,22 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5 +; RUN: opt < %s -disable-output "-passes=print" -aa-pipeline=basic-aa 2>&1 \ +; RUN: | FileCheck %s + +; The dependence test does not handle array accesses of different sizes: i32 and i64. +; Bug 16183 - https://github.com/llvm/llvm-project/issues/16183 + +define i64 @bug16183_alias(ptr nocapture %A) { +; CHECK-LABEL: 'bug16183_alias' +; CHECK-NEXT: Src: store i32 2, ptr %arrayidx, align 4 --> Dst: store i32 2, ptr %arrayidx, align 4 +; CHECK-NEXT: da analyze - none! +; CHECK-NEXT: Src: store i32 2, ptr %arrayidx, align 4 --> Dst: %0 = load i64, ptr %A, align 8 +; CHECK-NEXT: da analyze - confused! +; CHECK-NEXT: Src: %0 = load i64, ptr %A, align 8 --> Dst: %0 = load i64, ptr %A, align 8 +; CHECK-NEXT: da analyze - none! +; +entry: + %arrayidx = getelementptr inbounds i32, ptr %A, i64 1 + store i32 2, ptr %arrayidx, align 4 + %0 = load i64, ptr %A, align 8 + ret i64 %0 +} diff --git a/llvm/test/Analysis/DependenceAnalysis/DifferentOffsets.ll b/llvm/test/Analysis/DependenceAnalysis/DifferentOffsets.ll new file mode 100644 index 0000000000000..1f8fac3087bff --- /dev/null +++ b/llvm/test/Analysis/DependenceAnalysis/DifferentOffsets.ll @@ -0,0 +1,201 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5 +; RUN: opt < %s -disable-output "-passes=print" -aa-pipeline=basic-aa 2>&1 \ +; RUN: | FileCheck %s + +; The dependence test does not handle array accesses with difference between array accesses +; is not a multiple of the array element size. + +; In this test, the element size is i32 = 4 bytes and the difference between the +; load and the store is 2 bytes. + +define i32 @alias_with_different_offsets(ptr nocapture %A) { +; CHECK-LABEL: 'alias_with_different_offsets' +; CHECK-NEXT: Src: store i32 2, ptr %arrayidx, align 1 --> Dst: store i32 2, ptr %arrayidx, align 1 +; CHECK-NEXT: da analyze - none! +; CHECK-NEXT: Src: store i32 2, ptr %arrayidx, align 1 --> Dst: %0 = load i32, ptr %A, align 1 +; CHECK-NEXT: da analyze - confused! +; CHECK-NEXT: Src: %0 = load i32, ptr %A, align 1 --> Dst: %0 = load i32, ptr %A, align 1 +; CHECK-NEXT: da analyze - none! +; +entry: + %arrayidx = getelementptr inbounds i8, ptr %A, i64 2 + store i32 2, ptr %arrayidx, align 1 + %0 = load i32, ptr %A, align 1 + ret i32 %0 +} + +define i32 @alias_with_parametric_offset(ptr nocapture %A, i64 %n) { +; CHECK-LABEL: 'alias_with_parametric_offset' +; CHECK-NEXT: Src: store i32 2, ptr %arrayidx, align 1 --> Dst: store i32 2, ptr %arrayidx, align 1 +; CHECK-NEXT: da analyze - none! +; CHECK-NEXT: Src: store i32 2, ptr %arrayidx, align 1 --> Dst: %0 = load i32, ptr %A, align 1 +; CHECK-NEXT: da analyze - flow [|<]! +; CHECK-NEXT: Runtime Assumptions: +; CHECK-NEXT: Equal predicate: (zext i2 (trunc i64 %n to i2) to i64) == 0 +; CHECK-NEXT: Src: %0 = load i32, ptr %A, align 1 --> Dst: %0 = load i32, ptr %A, align 1 +; CHECK-NEXT: da analyze - none! +; CHECK-NEXT: Runtime Assumptions: +; CHECK-NEXT: Equal predicate: (zext i2 (trunc i64 %n to i2) to i64) == 0 +; +entry: + %arrayidx = getelementptr inbounds i8, ptr %A, i64 %n + store i32 2, ptr %arrayidx, align 1 + %0 = load i32, ptr %A, align 1 + ret i32 %0 +} + +define i32 @alias_with_parametric_expr(ptr nocapture %A, i64 %n, i64 %m) { +; CHECK-LABEL: 'alias_with_parametric_expr' +; CHECK-NEXT: Src: store i32 2, ptr %arrayidx, align 1 --> Dst: store i32 2, ptr %arrayidx, align 1 +; CHECK-NEXT: da analyze - none! +; CHECK-NEXT: Src: store i32 2, ptr %arrayidx, align 1 --> Dst: %0 = load i32, ptr %arrayidx1, align 1 +; CHECK-NEXT: da analyze - flow [|<]! +; CHECK-NEXT: Runtime Assumptions: +; CHECK-NEXT: Equal predicate: (zext i2 ((trunc i64 %m to i2) + (-2 * (trunc i64 %n to i2))) to i64) == 0 +; CHECK-NEXT: Equal predicate: (zext i2 (-2 + (trunc i64 %m to i2)) to i64) == 0 +; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx1, align 1 --> Dst: %0 = load i32, ptr %arrayidx1, align 1 +; CHECK-NEXT: da analyze - none! +; CHECK-NEXT: Runtime Assumptions: +; CHECK-NEXT: Equal predicate: (zext i2 ((trunc i64 %m to i2) + (-2 * (trunc i64 %n to i2))) to i64) == 0 +; CHECK-NEXT: Equal predicate: (zext i2 (-2 + (trunc i64 %m to i2)) to i64) == 0 +; +entry: + %mul = mul nsw i64 %n, 10 + %add = add nsw i64 %mul, %m + %arrayidx = getelementptr inbounds i8, ptr %A, i64 %add + store i32 2, ptr %arrayidx, align 1 + + %add1 = add nsw i64 %m, 42 + %arrayidx1 = getelementptr inbounds i8, ptr %A, i64 %add1 + %0 = load i32, ptr %arrayidx1, align 1 + ret i32 %0 +} + +define i32 @gep_i8_vs_i32(ptr nocapture %A, i64 %n, i64 %m) { +; CHECK-LABEL: 'gep_i8_vs_i32' +; CHECK-NEXT: Src: store i32 42, ptr %arrayidx0, align 1 --> Dst: store i32 42, ptr %arrayidx0, align 1 +; CHECK-NEXT: da analyze - none! +; CHECK-NEXT: Src: store i32 42, ptr %arrayidx0, align 1 --> Dst: store i32 42, ptr %arrayidx1, align 4 +; CHECK-NEXT: da analyze - output [|<]! +; CHECK-NEXT: Runtime Assumptions: +; CHECK-NEXT: Equal predicate: (zext i2 (trunc i64 %n to i2) to i64) == 0 +; CHECK-NEXT: Src: store i32 42, ptr %arrayidx1, align 4 --> Dst: store i32 42, ptr %arrayidx1, align 4 +; CHECK-NEXT: da analyze - none! +; CHECK-NEXT: Runtime Assumptions: +; CHECK-NEXT: Equal predicate: (zext i2 (trunc i64 %n to i2) to i64) == 0 +; +entry: + %arrayidx0 = getelementptr inbounds i8, ptr %A, i64 %n + store i32 42, ptr %arrayidx0, align 1 + + %arrayidx1 = getelementptr inbounds i32, ptr %A, i64 %m + store i32 42, ptr %arrayidx1, align 4 + ret i32 0 +} + +define void @linearized_accesses(i64 %n, i64 %m, i64 %o, ptr %A) { +; CHECK-LABEL: 'linearized_accesses' +; CHECK-NEXT: Src: store i32 1, ptr %idx0, align 4 --> Dst: store i32 1, ptr %idx0, align 4 +; CHECK-NEXT: da analyze - output [* * *]! +; CHECK-NEXT: Src: store i32 1, ptr %idx0, align 4 --> Dst: store i32 1, ptr %idx1, align 4 +; CHECK-NEXT: da analyze - output [* * *|<]! +; CHECK-NEXT: Src: store i32 1, ptr %idx1, align 4 --> Dst: store i32 1, ptr %idx1, align 4 +; CHECK-NEXT: da analyze - none! +; +entry: + br label %for.i + +for.i: + %i = phi i64 [ 0, %entry ], [ %i.inc, %for.i.inc ] + br label %for.j + +for.j: + %j = phi i64 [ 0, %for.i ], [ %j.inc, %for.j.inc ] + br label %for.k + +for.k: + %k = phi i64 [ 0, %for.j ], [ %k.inc, %for.k.inc ] + %subscript0 = mul i64 %i, %m + %subscript1 = add i64 %j, %subscript0 + %subscript2 = mul i64 %subscript1, %o + %subscript3 = add i64 %subscript2, %k + %idx0 = getelementptr inbounds i64, ptr %A, i64 %subscript3 ; (i64*)(A) + i*m*o + j*o + k + store i32 1, ptr %idx0 + %idx1 = getelementptr inbounds i32, ptr %A, i64 %subscript3 ; (i32*)(A) + i*m*o + j*o + k + store i32 1, ptr %idx1 + br label %for.k.inc + +for.k.inc: + %k.inc = add nsw i64 %k, 1 + %k.exitcond = icmp eq i64 %k.inc, %o + br i1 %k.exitcond, label %for.j.inc, label %for.k + +for.j.inc: + %j.inc = add nsw i64 %j, 1 + %j.exitcond = icmp eq i64 %j.inc, %m + br i1 %j.exitcond, label %for.i.inc, label %for.j + +for.i.inc: + %i.inc = add nsw i64 %i, 1 + %i.exitcond = icmp eq i64 %i.inc, %n + br i1 %i.exitcond, label %end, label %for.i + +end: + ret void +} + +define void @multidim_accesses(ptr %A) { +; CHECK-LABEL: 'multidim_accesses' +; CHECK-NEXT: Src: store i32 1, ptr %idx0, align 4 --> Dst: store i32 1, ptr %idx0, align 4 +; CHECK-NEXT: da analyze - none! +; CHECK-NEXT: Src: store i32 1, ptr %idx0, align 4 --> Dst: store i32 1, ptr %idx1, align 4 +; FIXME: the dependence distance is not constant. Distance vector should be [* * *|<]! +; CHECK-NEXT: da analyze - consistent output [0 0 0|<]! +; CHECK-NEXT: Src: store i32 1, ptr %idx1, align 4 --> Dst: store i32 1, ptr %idx1, align 4 +; CHECK-NEXT: da analyze - none! +; +; for (i = 0; i < 256; i++) +; for (j = 0; j < 256; j++) +; for (k = 0; k < 256; k++) { +; int *idx0 = (int *)((long long *)(A) + 256*256*i + 256*j + k); +; *idx0 = 1; +; int *idx1 = (int *)((int *)(A) + 256*256*i + 256*j + k); +; *idx1 = 1; +; } +entry: + br label %for.i + +for.i: + %i = phi i64 [ 0, %entry ], [ %i.inc, %for.i.inc ] + br label %for.j + +for.j: + %j = phi i64 [ 0, %for.i ], [ %j.inc, %for.j.inc ] + br label %for.k + +for.k: + %k = phi i64 [ 0, %for.j ], [ %k.inc, %for.k.inc ] + %idx0 = getelementptr inbounds [256 x [256 x [256 x i64]]], ptr %A, i64 %i, i64 %j, i64 %k + store i32 1, ptr %idx0 + %idx1 = getelementptr inbounds [256 x [256 x [256 x i32]]], ptr %A, i64 %i, i64 %j, i64 %k + store i32 1, ptr %idx1 + br label %for.k.inc + +for.k.inc: + %k.inc = add nsw i64 %k, 1 + %k.exitcond = icmp eq i64 %k.inc, 256 + br i1 %k.exitcond, label %for.j.inc, label %for.k + +for.j.inc: + %j.inc = add nsw i64 %j, 1 + %j.exitcond = icmp eq i64 %j.inc, 256 + br i1 %j.exitcond, label %for.i.inc, label %for.j + +for.i.inc: + %i.inc = add nsw i64 %i, 1 + %i.exitcond = icmp eq i64 %i.inc, 256 + br i1 %i.exitcond, label %end, label %for.i + +end: + ret void +} diff --git a/llvm/test/Analysis/DependenceAnalysis/MIVCheckConst.ll b/llvm/test/Analysis/DependenceAnalysis/MIVCheckConst.ll index fa58a81d2355b..c1f8c85f2bf0e 100644 --- a/llvm/test/Analysis/DependenceAnalysis/MIVCheckConst.ll +++ b/llvm/test/Analysis/DependenceAnalysis/MIVCheckConst.ll @@ -42,8 +42,14 @@ define void @test(ptr %A, ptr %B, i1 %arg, i32 %n, i32 %m) #0 align 2 { ; CHECK-NEXT: da analyze - consistent input [0 S S]! ; CHECK-NEXT: Src: %v27 = load <32 x i32>, ptr %v25, align 256 --> Dst: %v32 = load <32 x i32>, ptr %v30, align 128 ; CHECK-NEXT: da analyze - input [* S S|<]! +; CHECK-NEXT: Runtime Assumptions: +; CHECK-NEXT: Equal predicate: (zext i7 (4 * (trunc i32 %v1 to i7) * (1 + (trunc i32 %n to i7))) to i32) == 0 +; CHECK-NEXT: Equal predicate: (8 * (zext i4 (trunc i32 %v1 to i4) to i32)) == 0 ; CHECK-NEXT: Src: %v32 = load <32 x i32>, ptr %v30, align 128 --> Dst: %v32 = load <32 x i32>, ptr %v30, align 128 ; CHECK-NEXT: da analyze - consistent input [0 S S]! +; CHECK-NEXT: Runtime Assumptions: +; CHECK-NEXT: Equal predicate: (zext i7 (4 * (trunc i32 %v1 to i7) * (1 + (trunc i32 %n to i7))) to i32) == 0 +; CHECK-NEXT: Equal predicate: (8 * (zext i4 (trunc i32 %v1 to i4) to i32)) == 0 ; entry: %v1 = load i32, ptr %B, align 4