Skip to content

[SimplifyIndVar] ICMP predicate conversion to EQ/NE #144945

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
163 changes: 148 additions & 15 deletions llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ namespace {
bool eliminateIVUser(Instruction *UseInst, Instruction *IVOperand);
bool makeIVComparisonInvariant(ICmpInst *ICmp, Instruction *IVOperand);
void eliminateIVComparison(ICmpInst *ICmp, Instruction *IVOperand);
bool forceEqualityForICmp(ICmpInst *ICmp, Instruction *IVOperand);
void simplifyIVRemainder(BinaryOperator *Rem, Instruction *IVOperand,
bool IsSigned);
void replaceRemWithNumerator(BinaryOperator *Rem);
Expand Down Expand Up @@ -244,6 +245,128 @@ bool SimplifyIndvar::makeIVComparisonInvariant(ICmpInst *ICmp,
return true;
}

/// Try to change predicate of ICmp to EQ/NE to facilitate better work of OSR.
/// This can be done only if all possible IV values but one lead to the same
/// produced comparison result, while the 'chosen one' value gives the opposite
/// result.
bool SimplifyIndvar::forceEqualityForICmp(ICmpInst *ICmp,
Instruction *IVOperand) {
if (ICmp->isEquality()) {
// nothing to do
return false;
}

unsigned BoundOperandIdx = IVOperand == ICmp->getOperand(0) ? 1 : 0;
const SCEV *BoundSCEV = SE->getSCEV(ICmp->getOperand(BoundOperandIdx));
const SCEVConstant *BoundC = dyn_cast<SCEVConstant>(BoundSCEV);
CmpInst::Predicate OrigPredicate = ICmp->getPredicate();
CmpInst::Predicate NewPredicate = CmpInst::BAD_ICMP_PREDICATE;
Type *Ty = IVOperand->getType();
APInt NewBoundA;

if (BoundC) {
// Try to find the 'chosen one' value basing on predicate type and bound
const APInt &BoundA = BoundC->getAPInt();
ConstantRange ExactCR =
ConstantRange::makeExactICmpRegion(OrigPredicate, BoundA);
if (!ExactCR.getEquivalentICmp(NewPredicate, NewBoundA)) {
NewPredicate = CmpInst::BAD_ICMP_PREDICATE;
}
}

if (!ICmpInst::isEquality(NewPredicate)) {
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(IVOperand));
if (!AR) {
return false;
}
const SCEVConstant *IVStart = dyn_cast<SCEVConstant>(AR->getStart());
const SCEVConstant *IVStep =
dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE));
if (!IVStart || !IVStep || !IVStep->getValue()->getValue()) {
return false;
}

if (BoundC) {
// Check to see the 'chosen one' value is the IV start value
bool HasNoWrap = ICmpInst::isSigned(OrigPredicate)
? AR->hasNoSignedWrap()
: AR->hasNoUnsignedWrap();
if (HasNoWrap) {
const DataLayout &DL = ICmp->getParent()->getDataLayout();
Constant *SecondIterIV =
ConstantInt::get(Ty, IVStart->getAPInt() + IVStep->getAPInt());
Constant *FirstIterResult = ConstantFoldCompareInstOperands(
OrigPredicate, IVStart->getValue(), BoundC->getValue(), DL);
Constant *SecondIterResult = ConstantFoldCompareInstOperands(
OrigPredicate, SecondIterIV, BoundC->getValue(), DL);
if (FirstIterResult != SecondIterResult) {
NewBoundA = IVStart->getAPInt();
NewPredicate = FirstIterResult->isAllOnesValue() ? CmpInst::ICMP_EQ
: CmpInst::ICMP_NE;
}
}
}

if (!ICmpInst::isEquality(NewPredicate)) {
// Check to see the 'chosen one' value is the very last IV value.
// To put it differently, check to see if ICmp directly or indirectly
// defines maximum loop trip count (or simply has aligned behavior by
// accident). This is different from loop exit condition rewriting as here
// not only ICmp instructions directly writing to exiting branch are
// considered.

// check to see if max trip count and IV parameters are constant
const SCEVConstant *MaxBackCount =
dyn_cast<SCEVConstant>(SE->getConstantMaxBackedgeTakenCount(L));
if (!MaxBackCount) {
return false;
}

// compute the number of consecutive iterations in which produced
// predicate value will be the same
bool ExitIfTrue = false;
auto EL = SE->computeExitLimitFromCond(L, ICmp, ExitIfTrue, false);
const SCEVConstant *SameIterCount =
dyn_cast<SCEVConstant>(EL.ExactNotTaken);
if (!SameIterCount || SameIterCount->getValue()->isZero()) {
ExitIfTrue = !ExitIfTrue;
EL = SE->computeExitLimitFromCond(L, ICmp, ExitIfTrue, false);
SameIterCount = dyn_cast<SCEVConstant>(EL.ExactNotTaken);
}

if (SameIterCount != MaxBackCount) {
// ICmp isn't aligned with maximum trip count
return false;
}

unsigned IVBitWidth = IVStep->getAPInt().getBitWidth();
unsigned CountBitWidth = SameIterCount->getAPInt().getBitWidth();
APInt SameIterCountA = SameIterCount->getAPInt();
if (IVBitWidth < CountBitWidth) {
SameIterCountA = SameIterCountA.trunc(IVBitWidth);
} else if (IVBitWidth > CountBitWidth) {
SameIterCountA = SameIterCountA.zext(IVBitWidth);
}
NewBoundA = IVStart->getAPInt() + (IVStep->getAPInt() * SameIterCountA);
NewPredicate = ExitIfTrue ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE;
}
}

if (!TTI->isLegalICmpImmediate(NewBoundA.getSExtValue())) {
return false;
}

LLVM_DEBUG(dbgs() << "INDVARS: Force EQ/NE predicate for max trip count: "
<< *ICmp << '\n');

assert(Ty->getPrimitiveSizeInBits() == NewBoundA.getBitWidth() &&
"bit widths should be aligned");
ICmp->setOperand(BoundOperandIdx, ConstantInt::get(Ty, NewBoundA));
ICmp->setPredicate(NewPredicate);

return true;
}

/// SimplifyIVUsers helper for eliminating useless
/// comparisons against an induction variable.
void SimplifyIndvar::eliminateIVComparison(ICmpInst *ICmp,
Expand All @@ -267,33 +390,43 @@ void SimplifyIndvar::eliminateIVComparison(ICmpInst *ICmp,
// If the condition is always true or always false in the given context,
// replace it with a constant value.
SmallVector<Instruction *, 4> Users;
bool IsDead = false;
for (auto *U : ICmp->users())
Users.push_back(cast<Instruction>(U));
const Instruction *CtxI = findCommonDominator(Users, *DT);
if (auto Ev = SE->evaluatePredicateAt(Pred, S, X, CtxI)) {
SE->forgetValue(ICmp);
ICmp->replaceAllUsesWith(ConstantInt::getBool(ICmp->getContext(), *Ev));
DeadInsts.emplace_back(ICmp);
IsDead = true;
LLVM_DEBUG(dbgs() << "INDVARS: Eliminated comparison: " << *ICmp << '\n');
} else if (makeIVComparisonInvariant(ICmp, IVOperand)) {
// fallthrough to end of function
} else if (ICmpInst::isSigned(OriginalPred) &&
SE->isKnownNonNegative(S) && SE->isKnownNonNegative(X)) {
IsDead = true;
} else {
// If we were unable to make anything above, all we can is to canonicalize
// the comparison hoping that it will open the doors for other
// optimizations. If we find out that we compare two non-negative values,
// we turn the instruction's predicate to its unsigned version. Note that
// we cannot rely on Pred here unless we check if we have swapped it.
assert(ICmp->getPredicate() == OriginalPred && "Predicate changed?");
LLVM_DEBUG(dbgs() << "INDVARS: Turn to unsigned comparison: " << *ICmp
<< '\n');
ICmp->setPredicate(ICmpInst::getUnsignedPredicate(OriginalPred));
ICmp->setSameSign();
} else
return;
// optimizations.
if (ICmpInst::isSigned(OriginalPred) && SE->isKnownNonNegative(S) &&
SE->isKnownNonNegative(X)) {
// If we find out that we compare two non-negative values,
// we turn the instruction's predicate to its unsigned version. Note that
// we cannot rely on Pred here unless we check if we have swapped it.
assert(ICmp->getPredicate() == OriginalPred && "Predicate changed?");
LLVM_DEBUG(dbgs() << "INDVARS: Turn to unsigned comparison: " << *ICmp
<< '\n');
ICmp->setPredicate(ICmpInst::getUnsignedPredicate(OriginalPred));
ICmp->setSameSign();
Changed = true;
}
if (forceEqualityForICmp(ICmp, IVOperand)) {
Changed = true;
}
}

++NumElimCmp;
Changed = true;
if (IsDead) {
NumElimCmp++;
Changed = true;
}
}

bool SimplifyIndvar::eliminateSDiv(BinaryOperator *SDiv) {
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/Transforms/IndVarSimplify/AArch64/loop-guards.ll
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ define i32 @guards_applied_to_add_rec(ptr %dst) {
; CHECK-NEXT: [[OUTER_IV_0:%.*]] = phi i32 [ 2, %[[ENTRY]] ], [ [[OUTER_IV_0_NEXT:%.*]], %[[OUTER_LATCH:.*]] ]
; CHECK-NEXT: [[OUTER_IV_1:%.*]] = phi i32 [ 1, %[[ENTRY]] ], [ [[OUTER_IV_0]], %[[OUTER_LATCH]] ]
; CHECK-NEXT: [[SHR28:%.*]] = lshr i32 [[OUTER_IV_1]], 1
; CHECK-NEXT: [[PRE:%.*]] = icmp samesign ult i32 [[OUTER_IV_1]], 2
; CHECK-NEXT: [[PRE:%.*]] = icmp samesign eq i32 [[OUTER_IV_1]], 1
; CHECK-NEXT: br i1 [[PRE]], label %[[OUTER_LATCH]], label %[[INNER_PREHEADER:.*]]
; CHECK: [[INNER_PREHEADER]]:
; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[SHR28]] to i64
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ define void @test2(ptr %a, ptr %b, i8 %limit, i1 %arg) {
; CHECK-LABEL: @test2(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[LIMIT:%.*]] to i32
; CHECK-NEXT: br i1 %arg, label [[FOR_COND1_PREHEADER_PREHEADER:%.*]], label [[FOR_COND1_PREHEADER_US_PREHEADER:%.*]]
; CHECK-NEXT: br i1 [[ARG:%.*]], label [[FOR_COND1_PREHEADER_PREHEADER:%.*]], label [[FOR_COND1_PREHEADER_US_PREHEADER:%.*]]
; CHECK: for.cond1.preheader.us.preheader:
; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[CONV]], i32 1)
; CHECK-NEXT: br label [[FOR_COND1_PREHEADER_US:%.*]]
Expand All @@ -110,7 +110,7 @@ define void @test2(ptr %a, ptr %b, i8 %limit, i1 %arg) {
; CHECK-NEXT: br label [[FOR_INC13_US]]
; CHECK: for.inc13.us:
; CHECK-NEXT: [[INDVARS_IV_NEXT4]] = add nuw nsw i64 [[INDVARS_IV3]], 1
; CHECK-NEXT: [[EXITCOND6:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT4]], 4
; CHECK-NEXT: [[EXITCOND6:%.*]] = icmp samesign ne i64 [[INDVARS_IV_NEXT4]], 4
; CHECK-NEXT: br i1 [[EXITCOND6]], label [[FOR_COND1_PREHEADER_US]], label [[FOR_END_LOOPEXIT1:%.*]]
; CHECK: for.body4.us:
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[FOR_BODY4_LR_PH_US]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY4_US:%.*]] ]
Expand Down Expand Up @@ -237,8 +237,7 @@ define i32 @test4(i32 %a) {
; CHECK-NEXT: [[CONV3:%.*]] = trunc i32 [[OR]] to i8
; CHECK-NEXT: [[CALL:%.*]] = call i32 @fn1(i8 signext [[CONV3]])
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i32 [[INDVARS_IV]], -1
; CHECK-NEXT: [[TMP0:%.*]] = trunc nuw i32 [[INDVARS_IV_NEXT]] to i8
; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i8 [[TMP0]], -14
; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[INDVARS_IV_NEXT]], 242
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]]
; CHECK: for.end:
; CHECK-NEXT: ret i32 0
Expand Down Expand Up @@ -517,8 +516,8 @@ define i32 @test10(i32 %v) {
; CHECK-NEXT: [[TMP0:%.*]] = mul nsw i64 [[INDVARS_IV]], -1
; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i64 [[TMP0]], [[SEXT]]
; CHECK-NEXT: call void @consume.i1(i1 [[TMP1]])
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp samesign ne i64 [[INDVARS_IV_NEXT]], 11
; CHECK-NEXT: call void @consume.i64(i64 [[TMP0]])
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], 11
; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LEAVE:%.*]]
; CHECK: leave:
; CHECK-NEXT: ret i32 22
Expand Down
37 changes: 33 additions & 4 deletions llvm/test/Transforms/IndVarSimplify/X86/pr24356.ll
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt -S -passes=indvars -indvars-predicate-loops=0 < %s | FileCheck %s

target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
Expand All @@ -7,15 +8,43 @@ target triple = "x86_64-apple-macosx10.10.0"

; Function Attrs: nounwind ssp uwtable
define void @fn1() {
; CHECK-LABEL: @fn1(
; CHECK-LABEL: define void @fn1() {
; CHECK-NEXT: [[BB:.*]]:
; CHECK-NEXT: br label %[[BB4_PREHEADER:.*]]
; CHECK: [[BB4_PREHEADER]]:
; CHECK-NEXT: [[B_03:%.*]] = phi i8 [ 0, %[[BB]] ], [ [[TMP17:%.*]], %[[BB16:.*]] ]
; CHECK-NEXT: [[TMP9:%.*]] = icmp ne i8 [[B_03]], 0
; CHECK-NEXT: br i1 [[TMP9]], label %[[BB4_PREHEADER_BB18_LOOPEXIT_SPLIT_CRIT_EDGE:.*]], label %[[BB4_PREHEADER_BB4_PREHEADER_SPLIT_CRIT_EDGE:.*]]
; CHECK: [[BB4_PREHEADER_BB4_PREHEADER_SPLIT_CRIT_EDGE]]:
; CHECK-NEXT: br label %[[BB4_PREHEADER_SPLIT:.*]]
; CHECK: [[BB4_PREHEADER_BB18_LOOPEXIT_SPLIT_CRIT_EDGE]]:
; CHECK-NEXT: store i32 0, ptr @a, align 4
; CHECK-NEXT: br label %[[BB18_LOOPEXIT_SPLIT:.*]]
; CHECK: [[BB4_PREHEADER_SPLIT]]:
; CHECK-NEXT: br label %[[BB7:.*]]
; CHECK: [[BB4:.*]]:
; CHECK-NEXT: br i1 false, label %[[BB7]], label %[[BB16]]
; CHECK: [[BB7]]:
; CHECK-NEXT: br i1 false, label %[[BB18_LOOPEXIT:.*]], label %[[BB4]]
; CHECK: [[BB16]]:
; CHECK-NEXT: [[TMP17]] = add nuw nsw i8 [[B_03]], -1
; CHECK-NEXT: br i1 false, label %[[BB18_LOOPEXIT1:.*]], label %[[BB4_PREHEADER]]
; CHECK: [[BB18_LOOPEXIT]]:
; CHECK-NEXT: br label %[[BB18_LOOPEXIT_SPLIT]]
; CHECK: [[BB18_LOOPEXIT_SPLIT]]:
; CHECK-NEXT: br label %[[BB18:.*]]
; CHECK: [[BB18_LOOPEXIT1]]:
; CHECK-NEXT: [[TMP14_LCSSA5_LCSSA:%.*]] = phi i32 [ 1, %[[BB16]] ]
; CHECK-NEXT: store i32 [[TMP14_LCSSA5_LCSSA]], ptr @a, align 4
; CHECK-NEXT: br label %[[BB18]]
; CHECK: [[BB18]]:
; CHECK-NEXT: ret void
;
bb:
br label %bb4.preheader

bb4.preheader: ; preds = %bb, %bb16
; CHECK-LABEL: bb4.preheader:
%b.03 = phi i8 [ 0, %bb ], [ %tmp17, %bb16 ]
; CHECK: %tmp9 = icmp ugt i8 %b.03, 1
; CHECK-NOT: %tmp9 = icmp ugt i8 0, 1

%tmp9 = icmp ugt i8 %b.03, 1
br i1 %tmp9, label %bb4.preheader.bb18.loopexit.split_crit_edge, label %bb4.preheader.bb4.preheader.split_crit_edge
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/Transforms/IndVarSimplify/ada-loops.ll
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ define void @kinds__urangezero(ptr nocapture %a) nounwind {
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr [21 x i32], ptr [[A]], i32 0, i32 [[TMP4]]
; CHECK-NEXT: store i32 0, ptr [[TMP5]], align 4
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i32 [[INDVARS_IV]], 1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INDVARS_IV_NEXT]], 31
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp samesign eq i32 [[INDVARS_IV_NEXT]], 31
; CHECK-NEXT: br i1 [[EXITCOND]], label [[RETURN:%.*]], label [[BB]]
; CHECK: return:
; CHECK-NEXT: ret void
Expand Down
52 changes: 12 additions & 40 deletions llvm/test/Transforms/PGOProfile/Inputs/thinlto_cs.proftext
Original file line number Diff line number Diff line change
@@ -1,72 +1,44 @@
# CSIR level Instrumentation Flag
:csir
cond.llvm.11253644763537639171
# Func Hash:
1152921517491748863
# Num Counters:
1
# Counter Values:
200000

foo
# Func Hash:
1720106746050921044
# Num Counters:
2
# Counter Values:
100000
1

bar
# Func Hash:
1299757151682747028
# Num Counters:
2
# Counter Values:
0
0

bar
# Func Hash:
29667547796
# Num Counters:
2
# Counter Values:
100000
100000

main
# Func Hash:
1152921517491748863
1895182923573755903
# Num Counters:
1
# Counter Values:
1

main
cspgo_bar.c;clobber
# Func Hash:
1895182923573755903
# Num Counters:
1
# Counter Values:
1
200000

cspgo.c:foo
cspgo_bar.c;cond
# Func Hash:
1720106746050921044
1895182923573755903
# Num Counters:
4
# Counter Values:
100000
100000
0
1
# Counter Values:
200000

cspgo_bar.c:cond
cspgo.c;foo
# Func Hash:
12884901887
2216626667076672412
# Num Counters:
1
2
# Counter Values:
200000
100000
1

Loading