Skip to content

Commit cb3f415

Browse files
committed
[PowerPC] Fix up memory ordering after combining BV to a load
The combiner for BUILD_VECTOR that merges consecutive loads into a wide load had two issues: - It didn't check that the input loads all have the same input chain - It didn't update nodes that are chained to the original loads to be chained to the new load This caused issues with bootstrap when 3c4d2a0 was committed. This patch fixes the issue so it can unblock this commit. Differential revision: https://reviews.llvm.org/D140046
1 parent 29fa062 commit cb3f415

File tree

3 files changed

+96
-29
lines changed

3 files changed

+96
-29
lines changed

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11497,7 +11497,7 @@ bool SelectionDAG::areNonVolatileConsecutiveLoads(LoadSDNode *LD,
1149711497
return false;
1149811498
if (LD->getChain() != Base->getChain())
1149911499
return false;
11500-
EVT VT = LD->getValueType(0);
11500+
EVT VT = LD->getMemoryVT();
1150111501
if (VT.getSizeInBits() / 8 != Bytes)
1150211502
return false;
1150311503

llvm/lib/Target/PowerPC/PPCISelLowering.cpp

Lines changed: 36 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -14245,17 +14245,23 @@ static SDValue combineBVOfConsecutiveLoads(SDNode *N, SelectionDAG &DAG) {
1424514245
unsigned ElemSize = N->getValueType(0).getScalarType().getStoreSize();
1424614246
SDValue FirstInput = N->getOperand(0);
1424714247
bool IsRoundOfExtLoad = false;
14248+
LoadSDNode *FirstLoad = nullptr;
1424814249

1424914250
if (FirstInput.getOpcode() == ISD::FP_ROUND &&
1425014251
FirstInput.getOperand(0).getOpcode() == ISD::LOAD) {
14251-
LoadSDNode *LD = dyn_cast<LoadSDNode>(FirstInput.getOperand(0));
14252-
IsRoundOfExtLoad = LD->getExtensionType() == ISD::EXTLOAD;
14252+
FirstLoad = cast<LoadSDNode>(FirstInput.getOperand(0));
14253+
IsRoundOfExtLoad = FirstLoad->getExtensionType() == ISD::EXTLOAD;
1425314254
}
1425414255
// Not a build vector of (possibly fp_rounded) loads.
1425514256
if ((!IsRoundOfExtLoad && FirstInput.getOpcode() != ISD::LOAD) ||
1425614257
N->getNumOperands() == 1)
1425714258
return SDValue();
1425814259

14260+
if (!IsRoundOfExtLoad)
14261+
FirstLoad = cast<LoadSDNode>(FirstInput);
14262+
14263+
SmallVector<LoadSDNode *, 4> InputLoads;
14264+
InputLoads.push_back(FirstLoad);
1425914265
for (int i = 1, e = N->getNumOperands(); i < e; ++i) {
1426014266
// If any inputs are fp_round(extload), they all must be.
1426114267
if (IsRoundOfExtLoad && N->getOperand(i).getOpcode() != ISD::FP_ROUND)
@@ -14268,53 +14274,55 @@ static SDValue combineBVOfConsecutiveLoads(SDNode *N, SelectionDAG &DAG) {
1426814274

1426914275
SDValue PreviousInput =
1427014276
IsRoundOfExtLoad ? N->getOperand(i-1).getOperand(0) : N->getOperand(i-1);
14271-
LoadSDNode *LD1 = dyn_cast<LoadSDNode>(PreviousInput);
14272-
LoadSDNode *LD2 = dyn_cast<LoadSDNode>(NextInput);
14277+
LoadSDNode *LD1 = cast<LoadSDNode>(PreviousInput);
14278+
LoadSDNode *LD2 = cast<LoadSDNode>(NextInput);
1427314279

1427414280
// If any inputs are fp_round(extload), they all must be.
1427514281
if (IsRoundOfExtLoad && LD2->getExtensionType() != ISD::EXTLOAD)
1427614282
return SDValue();
1427714283

14278-
if (!isConsecutiveLS(LD2, LD1, ElemSize, 1, DAG))
14284+
// We only care about regular loads. The PPC-specific load intrinsics
14285+
// will not lead to a merge opportunity.
14286+
if (!DAG.areNonVolatileConsecutiveLoads(LD2, LD1, ElemSize, 1))
1427914287
InputsAreConsecutiveLoads = false;
14280-
if (!isConsecutiveLS(LD1, LD2, ElemSize, 1, DAG))
14288+
if (!DAG.areNonVolatileConsecutiveLoads(LD1, LD2, ElemSize, 1))
1428114289
InputsAreReverseConsecutive = false;
1428214290

1428314291
// Exit early if the loads are neither consecutive nor reverse consecutive.
1428414292
if (!InputsAreConsecutiveLoads && !InputsAreReverseConsecutive)
1428514293
return SDValue();
14294+
InputLoads.push_back(LD2);
1428614295
}
1428714296

1428814297
assert(!(InputsAreConsecutiveLoads && InputsAreReverseConsecutive) &&
1428914298
"The loads cannot be both consecutive and reverse consecutive.");
1429014299

14291-
SDValue FirstLoadOp =
14292-
IsRoundOfExtLoad ? FirstInput.getOperand(0) : FirstInput;
14293-
SDValue LastLoadOp =
14294-
IsRoundOfExtLoad ? N->getOperand(N->getNumOperands()-1).getOperand(0) :
14295-
N->getOperand(N->getNumOperands()-1);
14296-
14297-
LoadSDNode *LD1 = dyn_cast<LoadSDNode>(FirstLoadOp);
14298-
LoadSDNode *LDL = dyn_cast<LoadSDNode>(LastLoadOp);
14300+
SDValue WideLoad;
14301+
SDValue ReturnSDVal;
1429914302
if (InputsAreConsecutiveLoads) {
14300-
assert(LD1 && "Input needs to be a LoadSDNode.");
14301-
return DAG.getLoad(N->getValueType(0), dl, LD1->getChain(),
14302-
LD1->getBasePtr(), LD1->getPointerInfo(),
14303-
LD1->getAlign());
14304-
}
14305-
if (InputsAreReverseConsecutive) {
14306-
assert(LDL && "Input needs to be a LoadSDNode.");
14307-
SDValue Load =
14308-
DAG.getLoad(N->getValueType(0), dl, LDL->getChain(), LDL->getBasePtr(),
14309-
LDL->getPointerInfo(), LDL->getAlign());
14303+
assert(FirstLoad && "Input needs to be a LoadSDNode.");
14304+
WideLoad = DAG.getLoad(N->getValueType(0), dl, FirstLoad->getChain(),
14305+
FirstLoad->getBasePtr(), FirstLoad->getPointerInfo(),
14306+
FirstLoad->getAlign());
14307+
ReturnSDVal = WideLoad;
14308+
} else if (InputsAreReverseConsecutive) {
14309+
LoadSDNode *LastLoad = InputLoads.back();
14310+
assert(LastLoad && "Input needs to be a LoadSDNode.");
14311+
WideLoad = DAG.getLoad(N->getValueType(0), dl, LastLoad->getChain(),
14312+
LastLoad->getBasePtr(), LastLoad->getPointerInfo(),
14313+
LastLoad->getAlign());
1431014314
SmallVector<int, 16> Ops;
1431114315
for (int i = N->getNumOperands() - 1; i >= 0; i--)
1431214316
Ops.push_back(i);
1431314317

14314-
return DAG.getVectorShuffle(N->getValueType(0), dl, Load,
14315-
DAG.getUNDEF(N->getValueType(0)), Ops);
14316-
}
14317-
return SDValue();
14318+
ReturnSDVal = DAG.getVectorShuffle(N->getValueType(0), dl, WideLoad,
14319+
DAG.getUNDEF(N->getValueType(0)), Ops);
14320+
} else
14321+
return SDValue();
14322+
14323+
for (auto *LD : InputLoads)
14324+
DAG.makeEquivalentMemoryOrdering(LD, WideLoad);
14325+
return ReturnSDVal;
1431814326
}
1431914327

1432014328
// This function adds the required vector_shuffle needed to get
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-- -mcpu=pwr8 < %s | \
3+
; RUN: FileCheck %s
4+
5+
%0 = type <{ %1, ptr, i32, [4 x i8] }>
6+
%1 = type { %2 }
7+
%2 = type { %3 }
8+
%3 = type { ptr, ptr, ptr }
9+
10+
$testfunc = comdat any
11+
12+
declare void @_ZdlPv() local_unnamed_addr #0
13+
14+
define void @testfunc(i64 %arg) local_unnamed_addr #0 comdat {
15+
; CHECK-LABEL: testfunc:
16+
; CHECK: # %bb.0: # %bb
17+
; CHECK-NEXT: mflr 0
18+
; CHECK-NEXT: stdu 1, -80(1)
19+
; CHECK-NEXT: std 0, 96(1)
20+
; CHECK-NEXT: .cfi_def_cfa_offset 80
21+
; CHECK-NEXT: .cfi_offset lr, 16
22+
; CHECK-NEXT: .cfi_offset v30, -32
23+
; CHECK-NEXT: .cfi_offset v31, -16
24+
; CHECK-NEXT: li 4, 48
25+
; CHECK-NEXT: addi 3, 3, 24
26+
; CHECK-NEXT: stvx 30, 1, 4 # 16-byte Folded Spill
27+
; CHECK-NEXT: li 4, 64
28+
; CHECK-NEXT: stvx 31, 1, 4 # 16-byte Folded Spill
29+
; CHECK-NEXT: lxvd2x 63, 0, 3
30+
; CHECK-NEXT: xxswapd 62, 63
31+
; CHECK-NEXT: bc 12, 20, .LBB0_2
32+
; CHECK-NEXT: # %bb.1: # %bb37
33+
; CHECK-NEXT: bl _ZdlPv
34+
; CHECK-NEXT: nop
35+
; CHECK-NEXT: .LBB0_2: # %bb38
36+
; CHECK-NEXT: stxsiwx 62, 0, 3
37+
; CHECK-NEXT: stxsdx 63, 0, 3
38+
; CHECK-NEXT: li 3, 64
39+
; CHECK-NEXT: lvx 31, 1, 3 # 16-byte Folded Reload
40+
; CHECK-NEXT: li 3, 48
41+
; CHECK-NEXT: lvx 30, 1, 3 # 16-byte Folded Reload
42+
; CHECK-NEXT: addi 1, 1, 80
43+
; CHECK-NEXT: ld 0, 16(1)
44+
; CHECK-NEXT: mtlr 0
45+
; CHECK-NEXT: blr
46+
bb:
47+
%i = inttoptr i64 %arg to ptr
48+
%i6 = getelementptr inbounds %0, ptr %i, i64 0, i32 1
49+
%i7 = load <12 x i8>, ptr %i6, align 8
50+
br i1 poison, label %bb38, label %bb37
51+
52+
bb37: ; preds = %bb
53+
tail call void @_ZdlPv() #1
54+
br label %bb38
55+
56+
bb38: ; preds = %bb37, %bb
57+
store <12 x i8> %i7, ptr poison, align 8
58+
ret void
59+
}

0 commit comments

Comments
 (0)