@@ -23589,17 +23589,16 @@ static SDValue combineV3I8LoadExt(LoadSDNode *LD, SelectionDAG &DAG) {
23589
23589
return DAG.getMergeValues({Extract, TokenFactor}, DL);
23590
23590
}
23591
23591
23592
- // Replace scalable loads with fixed loads when vscale_range(1, 1).
23592
+ // Replace packed scalable loads with fixed loads when vscale_range(1, 1).
23593
23593
// This enables further optimisations such as LDP folds.
23594
23594
static SDValue combineVScale1Load(LoadSDNode *LD, SelectionDAG &DAG,
23595
+ TargetLowering::DAGCombinerInfo &DCI,
23595
23596
const AArch64Subtarget *Subtarget) {
23596
23597
EVT MemVT = LD->getMemoryVT();
23597
- if (!Subtarget->isNeonAvailable() || !MemVT.isScalableVector() ||
23598
- Subtarget->getMaxSVEVectorSizeInBits() != AArch64::SVEBitsPerBlock)
23599
- return SDValue();
23600
-
23601
- // Skip unpacked types given their different layouts between Neon and SVE.
23602
- if (MemVT.getSizeInBits().getKnownMinValue() != AArch64::SVEBitsPerBlock)
23598
+ if (!DCI.isBeforeLegalize() || !Subtarget->hasNEON() ||
23599
+ !MemVT.isScalableVector() || LD->getExtensionType() != ISD::NON_EXTLOAD ||
23600
+ MemVT.getSizeInBits().getKnownMinValue() != 128 ||
23601
+ Subtarget->getMaxSVEVectorSizeInBits() != 128)
23603
23602
return SDValue();
23604
23603
23605
23604
SDLoc DL(LD);
@@ -23609,9 +23608,7 @@ static SDValue combineVScale1Load(LoadSDNode *LD, SelectionDAG &DAG,
23609
23608
NewVT, DL, LD->getChain(), LD->getBasePtr(), LD->getPointerInfo(),
23610
23609
LD->getOriginalAlign(), LD->getMemOperand()->getFlags(), LD->getAAInfo());
23611
23610
SDValue Insert = convertToScalableVector(DAG, MemVT, NewLoad);
23612
- SDValue TokenFactor = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
23613
- {SDValue(cast<SDNode>(NewLoad), 1)});
23614
- return DAG.getMergeValues({Insert, TokenFactor}, DL);
23611
+ return DAG.getMergeValues({Insert, SDValue(cast<SDNode>(NewLoad), 1)}, DL);
23615
23612
}
23616
23613
23617
23614
// Perform TBI simplification if supported by the target and try to break up
@@ -23651,7 +23648,7 @@ static SDValue performLOADCombine(SDNode *N,
23651
23648
if (SDValue Res = combineV3I8LoadExt(LD, DAG))
23652
23649
return Res;
23653
23650
23654
- if (SDValue Res = combineVScale1Load(LD, DAG, Subtarget))
23651
+ if (SDValue Res = combineVScale1Load(LD, DAG, DCI, Subtarget))
23655
23652
return Res;
23656
23653
23657
23654
if (!LD->isNonTemporal())
@@ -23912,18 +23909,17 @@ static SDValue combineI8TruncStore(StoreSDNode *ST, SelectionDAG &DAG,
23912
23909
return Chain;
23913
23910
}
23914
23911
23915
- // Replace scalable stores with fixed stores when vscale_range(1, 1).
23912
+ // Replace packed scalable stores with fixed stores when vscale_range(1, 1).
23916
23913
static SDValue combineVScale1Store(StoreSDNode *ST, SelectionDAG &DAG,
23914
+ TargetLowering::DAGCombinerInfo &DCI,
23917
23915
const AArch64Subtarget *Subtarget) {
23918
23916
SDValue Value = ST->getValue();
23919
23917
EVT ValueVT = Value.getValueType();
23920
23918
if (ST->isVolatile() || !Subtarget->isLittleEndian() ||
23921
- !Subtarget->isNeonAvailable() || !ValueVT.isScalableVector() ||
23922
- Subtarget->getMaxSVEVectorSizeInBits() != AArch64::SVEBitsPerBlock)
23923
- return SDValue();
23924
-
23925
- // Skip unpacked types given their different layouts between Neon and SVE.
23926
- if (ValueVT.getSizeInBits().getKnownMinValue() != AArch64::SVEBitsPerBlock)
23919
+ !DCI.isBeforeLegalize() || !Subtarget->hasNEON() ||
23920
+ !ValueVT.isScalableVector() || ST->isTruncatingStore() ||
23921
+ ValueVT.getSizeInBits().getKnownMinValue() != 128 ||
23922
+ Subtarget->getMaxSVEVectorSizeInBits() != 128)
23927
23923
return SDValue();
23928
23924
23929
23925
SDLoc DL(ST);
@@ -23970,7 +23966,7 @@ static SDValue performSTORECombine(SDNode *N,
23970
23966
if (SDValue Res = combineI8TruncStore(ST, DAG, Subtarget))
23971
23967
return Res;
23972
23968
23973
- if (SDValue Res = combineVScale1Store(ST, DAG, Subtarget))
23969
+ if (SDValue Res = combineVScale1Store(ST, DAG, DCI, Subtarget))
23974
23970
return Res;
23975
23971
23976
23972
// If this is an FP_ROUND followed by a store, fold this into a truncating
0 commit comments