Skip to content

Commit b5c5289

Browse files
committed
[LV] Use getFixedValue instead of getKnownMinValue when appropriate
There are many places in VPlan and LoopVectorize where we use getKnownMinValue to discover the number of elements in a vector. Where we expect the vector to have a fixed length, I have used the stronger getFixedValue call. I believe this is clearer and adds extra protection in the form of an assert in getFixedValue that the vector is not scalable. While looking at VPFirstOrderRecurrencePHIRecipe::computeCost I also took the liberty of simplifying the code. In theory I believe this patch should be NFC, but I'm reluctant to add that to the title in case we're just missing tests. I built and ran the LLVM test suite when targeting neoverse-v1 and it seemed ok.
1 parent 80ea5f4 commit b5c5289

File tree

3 files changed

+30
-29
lines changed

3 files changed

+30
-29
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 19 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -3116,12 +3116,13 @@ LoopVectorizationCostModel::getDivRemSpeculationCost(Instruction *I,
31163116
// that we will create. This cost is likely to be zero. The phi node
31173117
// cost, if any, should be scaled by the block probability because it
31183118
// models a copy at the end of each predicated block.
3119-
ScalarizationCost += VF.getKnownMinValue() *
3120-
TTI.getCFInstrCost(Instruction::PHI, CostKind);
3119+
ScalarizationCost +=
3120+
VF.getFixedValue() * TTI.getCFInstrCost(Instruction::PHI, CostKind);
31213121

31223122
// The cost of the non-predicated instruction.
3123-
ScalarizationCost += VF.getKnownMinValue() *
3124-
TTI.getArithmeticInstrCost(I->getOpcode(), I->getType(), CostKind);
3123+
ScalarizationCost +=
3124+
VF.getFixedValue() *
3125+
TTI.getArithmeticInstrCost(I->getOpcode(), I->getType(), CostKind);
31253126

31263127
// The cost of insertelement and extractelement instructions needed for
31273128
// scalarization.
@@ -4290,7 +4291,7 @@ static bool willGenerateVectors(VPlan &Plan, ElementCount VF,
42904291
return NumLegalParts <= VF.getKnownMinValue();
42914292
}
42924293
// Two or more elements that share a register - are vectorized.
4293-
return NumLegalParts < VF.getKnownMinValue();
4294+
return NumLegalParts < VF.getFixedValue();
42944295
};
42954296

42964297
// If no def nor is a store, e.g., branches, continue - no value to check.
@@ -4575,8 +4576,8 @@ VectorizationFactor LoopVectorizationPlanner::selectEpilogueVectorizationFactor(
45754576
assert(!isa<SCEVCouldNotCompute>(TC) &&
45764577
"Trip count SCEV must be computable");
45774578
RemainingIterations = SE.getURemExpr(
4578-
TC, SE.getConstant(TCType, MainLoopVF.getKnownMinValue() * IC));
4579-
MaxTripCount = MainLoopVF.getKnownMinValue() * IC - 1;
4579+
TC, SE.getConstant(TCType, MainLoopVF.getFixedValue() * IC));
4580+
MaxTripCount = MainLoopVF.getFixedValue() * IC - 1;
45804581
if (SE.isKnownPredicate(CmpInst::ICMP_ULT, RemainingIterations,
45814582
SE.getConstant(TCType, MaxTripCount))) {
45824583
MaxTripCount =
@@ -4587,7 +4588,7 @@ VectorizationFactor LoopVectorizationPlanner::selectEpilogueVectorizationFactor(
45874588
}
45884589
if (SE.isKnownPredicate(
45894590
CmpInst::ICMP_UGT,
4590-
SE.getConstant(TCType, NextVF.Width.getKnownMinValue()),
4591+
SE.getConstant(TCType, NextVF.Width.getFixedValue()),
45914592
RemainingIterations))
45924593
continue;
45934594
}
@@ -5258,14 +5259,14 @@ LoopVectorizationCostModel::getMemInstScalarizationCost(Instruction *I,
52585259

52595260
// Get the cost of the scalar memory instruction and address computation.
52605261
InstructionCost Cost =
5261-
VF.getKnownMinValue() * TTI.getAddressComputationCost(PtrTy, SE, PtrSCEV);
5262+
VF.getFixedValue() * TTI.getAddressComputationCost(PtrTy, SE, PtrSCEV);
52625263

52635264
// Don't pass *I here, since it is scalar but will actually be part of a
52645265
// vectorized loop where the user of it is a vectorized instruction.
52655266
const Align Alignment = getLoadStoreAlignment(I);
5266-
Cost += VF.getKnownMinValue() * TTI.getMemoryOpCost(I->getOpcode(),
5267-
ValTy->getScalarType(),
5268-
Alignment, AS, CostKind);
5267+
Cost += VF.getFixedValue() * TTI.getMemoryOpCost(I->getOpcode(),
5268+
ValTy->getScalarType(),
5269+
Alignment, AS, CostKind);
52695270

52705271
// Get the overhead of the extractelement and insertelement instructions
52715272
// we might create due to scalarization.
@@ -5281,7 +5282,7 @@ LoopVectorizationCostModel::getMemInstScalarizationCost(Instruction *I,
52815282
auto *VecI1Ty =
52825283
VectorType::get(IntegerType::getInt1Ty(ValTy->getContext()), VF);
52835284
Cost += TTI.getScalarizationOverhead(
5284-
VecI1Ty, APInt::getAllOnes(VF.getKnownMinValue()),
5285+
VecI1Ty, APInt::getAllOnes(VF.getFixedValue()),
52855286
/*Insert=*/false, /*Extract=*/true, CostKind);
52865287
Cost += TTI.getCFInstrCost(Instruction::Br, CostKind);
52875288

@@ -5327,7 +5328,6 @@ InstructionCost
53275328
LoopVectorizationCostModel::getUniformMemOpCost(Instruction *I,
53285329
ElementCount VF) {
53295330
assert(Legal->isUniformMemOp(*I, VF));
5330-
53315331
Type *ValTy = getLoadStoreType(I);
53325332
auto *VectorTy = cast<VectorType>(toVectorTy(ValTy, VF));
53335333
const Align Alignment = getLoadStoreAlignment(I);
@@ -5342,6 +5342,10 @@ LoopVectorizationCostModel::getUniformMemOpCost(Instruction *I,
53425342
StoreInst *SI = cast<StoreInst>(I);
53435343

53445344
bool IsLoopInvariantStoreValue = Legal->isInvariant(SI->getValueOperand());
5345+
// TODO: We have tests that request the cost of extracting element
5346+
// VF.getKnownMinValue() - 1 from a scalable vector. This is actually
5347+
// meaningless, given what we actually want is the last lane and is likely
5348+
// to be more expensive.
53455349
return TTI.getAddressComputationCost(ValTy) +
53465350
TTI.getMemoryOpCost(Instruction::Store, ValTy, Alignment, AS,
53475351
CostKind) +
@@ -5624,7 +5628,7 @@ LoopVectorizationCostModel::getScalarizationOverhead(Instruction *I,
56245628

56255629
for (Type *VectorTy : getContainedTypes(RetTy)) {
56265630
Cost += TTI.getScalarizationOverhead(
5627-
cast<VectorType>(VectorTy), APInt::getAllOnes(VF.getKnownMinValue()),
5631+
cast<VectorType>(VectorTy), APInt::getAllOnes(VF.getFixedValue()),
56285632
/*Insert=*/true,
56295633
/*Extract=*/false, CostKind);
56305634
}

llvm/lib/Transforms/Vectorize/VPlan.cpp

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -331,7 +331,7 @@ Value *VPTransformState::get(const VPValue *Def, bool NeedsScalar) {
331331

332332
bool IsSingleScalar = vputils::isSingleScalar(Def);
333333

334-
VPLane LastLane(IsSingleScalar ? 0 : VF.getKnownMinValue() - 1);
334+
VPLane LastLane(IsSingleScalar ? 0 : VF.getFixedValue() - 1);
335335
// Check if there is a scalar value for the selected lane.
336336
if (!hasScalarValue(Def, LastLane)) {
337337
// At the moment, VPWidenIntOrFpInductionRecipes, VPScalarIVStepsRecipes and
@@ -368,7 +368,7 @@ Value *VPTransformState::get(const VPValue *Def, bool NeedsScalar) {
368368
assert(!VF.isScalable() && "VF is assumed to be non scalable.");
369369
Value *Undef = PoisonValue::get(toVectorizedTy(LastInst->getType(), VF));
370370
set(Def, Undef);
371-
for (unsigned Lane = 0; Lane < VF.getKnownMinValue(); ++Lane)
371+
for (unsigned Lane = 0; Lane < VF.getFixedValue(); ++Lane)
372372
packScalarIntoVectorizedValue(Def, Lane);
373373
VectorValue = get(Def);
374374
}
@@ -789,8 +789,7 @@ void VPRegionBlock::execute(VPTransformState *State) {
789789
ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>> RPOT(
790790
Entry);
791791
State->Lane = VPLane(0);
792-
for (unsigned Lane = 0, VF = State->VF.getKnownMinValue(); Lane < VF;
793-
++Lane) {
792+
for (unsigned Lane = 0, VF = State->VF.getFixedValue(); Lane < VF; ++Lane) {
794793
State->Lane = VPLane(Lane, VPLane::Kind::First);
795794
// Visit the VPBlocks connected to \p this, starting from it.
796795
for (VPBlockBase *Block : RPOT) {

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -872,7 +872,7 @@ void VPInstruction::execute(VPTransformState &State) {
872872
isVectorToScalar() || isSingleScalar());
873873
bool GeneratesPerAllLanes = doesGeneratePerAllLanes();
874874
if (GeneratesPerAllLanes) {
875-
for (unsigned Lane = 0, NumLanes = State.VF.getKnownMinValue();
875+
for (unsigned Lane = 0, NumLanes = State.VF.getFixedValue();
876876
Lane != NumLanes; ++Lane) {
877877
Value *GeneratedValue = generatePerLane(State, VPLane(Lane));
878878
assert(GeneratedValue && "generatePerLane must produce a value");
@@ -2792,8 +2792,7 @@ void VPReplicateRecipe::execute(VPTransformState &State) {
27922792
}
27932793

27942794
// Generate scalar instances for all VF lanes.
2795-
assert(!State.VF.isScalable() && "Can't scalarize a scalable vector");
2796-
const unsigned EndLane = State.VF.getKnownMinValue();
2795+
const unsigned EndLane = State.VF.getFixedValue();
27972796
for (unsigned Lane = 0; Lane < EndLane; ++Lane)
27982797
scalarizeInstruction(UI, this, VPLane(Lane), State);
27992798
}
@@ -2846,7 +2845,7 @@ InstructionCost VPReplicateRecipe::computeCost(ElementCount VF,
28462845
UI->getOpcode(), ResultTy, CostKind,
28472846
{TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None},
28482847
Op2Info, Operands, UI, &Ctx.TLI) *
2849-
(isSingleScalar() ? 1 : VF.getKnownMinValue());
2848+
(isSingleScalar() ? 1 : VF.getFixedValue());
28502849
}
28512850
}
28522851

@@ -3407,7 +3406,7 @@ void VPInterleaveRecipe::execute(VPTransformState &State) {
34073406
Value *ResBlockInMask = State.get(BlockInMask);
34083407
Value *ShuffledMask = State.Builder.CreateShuffleVector(
34093408
ResBlockInMask,
3410-
createReplicatedMask(InterleaveFactor, State.VF.getKnownMinValue()),
3409+
createReplicatedMask(InterleaveFactor, State.VF.getFixedValue()),
34113410
"interleaved.mask");
34123411
return MaskForGaps ? State.Builder.CreateBinOp(Instruction::And,
34133412
ShuffledMask, MaskForGaps)
@@ -3419,8 +3418,8 @@ void VPInterleaveRecipe::execute(VPTransformState &State) {
34193418
if (isa<LoadInst>(Instr)) {
34203419
Value *MaskForGaps = nullptr;
34213420
if (NeedsMaskForGaps) {
3422-
MaskForGaps = createBitMaskForGaps(State.Builder,
3423-
State.VF.getKnownMinValue(), *Group);
3421+
MaskForGaps =
3422+
createBitMaskForGaps(State.Builder, State.VF.getFixedValue(), *Group);
34243423
assert(MaskForGaps && "Mask for Gaps is required but it is null");
34253424
}
34263425

@@ -3508,13 +3507,12 @@ void VPInterleaveRecipe::execute(VPTransformState &State) {
35083507
continue;
35093508

35103509
auto StrideMask =
3511-
createStrideMask(I, InterleaveFactor, State.VF.getKnownMinValue());
3510+
createStrideMask(I, InterleaveFactor, State.VF.getFixedValue());
35123511
Value *StridedVec =
35133512
State.Builder.CreateShuffleVector(NewLoad, StrideMask, "strided.vec");
35143513

35153514
// If this member has different type, cast the result type.
35163515
if (Member->getType() != ScalarTy) {
3517-
assert(!State.VF.isScalable() && "VF is assumed to be non scalable.");
35183516
VectorType *OtherVTy = VectorType::get(Member->getType(), State.VF);
35193517
StridedVec =
35203518
createBitOrPointerCast(State.Builder, StridedVec, OtherVTy, DL);
@@ -3850,7 +3848,7 @@ VPFirstOrderRecurrencePHIRecipe::computeCost(ElementCount VF,
38503848
if (VF.isScalar())
38513849
return Ctx.TTI.getCFInstrCost(Instruction::PHI, Ctx.CostKind);
38523850

3853-
if (VF.isScalable() && VF.getKnownMinValue() == 1)
3851+
if (VF == ElementCount::getScalable(1))
38543852
return InstructionCost::getInvalid();
38553853

38563854
return 0;

0 commit comments

Comments
 (0)