Skip to content

Commit 7fa503e

Browse files
committed
[SROA] rewritePartition()/findCommonType(): if uses have conflicting type, try getTypePartition() before falling back to largest integral use type (PR47592)
And another step towards transformss not introducing inttoptr and/or ptrtoint casts that weren't there already. In this case, when load/store uses have conflicting types, instead of falling back to the iN, we can try to use allocated sub-type. As disscussed, this isn't the best idea overall (we shouldn't rely on allocated type), but it works fine as a temporary measure. I've measured, and @ `-O3` as of vanilla llvm test-suite + RawSpeed, this results in +0.05% more bitcasts, -5.51% less inttoptr and -1.05% less ptrtoint (at the end of middle-end opt pipeline) See https://bugs.llvm.org/show_bug.cgi?id=47592 Reviewed By: efriedma Differential Revision: https://reviews.llvm.org/D88788
1 parent edd71db commit 7fa503e

File tree

4 files changed

+26
-21
lines changed

4 files changed

+26
-21
lines changed

llvm/lib/Transforms/Scalar/SROA.cpp

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1128,9 +1128,9 @@ LLVM_DUMP_METHOD void AllocaSlices::dump() const { print(dbgs()); }
11281128

11291129
/// Walk the range of a partitioning looking for a common type to cover this
11301130
/// sequence of slices.
1131-
static Type *findCommonType(AllocaSlices::const_iterator B,
1132-
AllocaSlices::const_iterator E,
1133-
uint64_t EndOffset) {
1131+
static std::pair<Type *, IntegerType *>
1132+
findCommonType(AllocaSlices::const_iterator B, AllocaSlices::const_iterator E,
1133+
uint64_t EndOffset) {
11341134
Type *Ty = nullptr;
11351135
bool TyIsCommon = true;
11361136
IntegerType *ITy = nullptr;
@@ -1174,7 +1174,7 @@ static Type *findCommonType(AllocaSlices::const_iterator B,
11741174
Ty = UserTy;
11751175
}
11761176

1177-
return TyIsCommon ? Ty : ITy;
1177+
return {TyIsCommon ? Ty : nullptr, ITy};
11781178
}
11791179

11801180
/// PHI instructions that use an alloca and are subsequently loaded can be
@@ -4264,13 +4264,21 @@ AllocaInst *SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS,
42644264
// or an i8 array of an appropriate size.
42654265
Type *SliceTy = nullptr;
42664266
const DataLayout &DL = AI.getModule()->getDataLayout();
4267-
if (Type *CommonUseTy = findCommonType(P.begin(), P.end(), P.endOffset()))
4268-
if (DL.getTypeAllocSize(CommonUseTy).getFixedSize() >= P.size())
4269-
SliceTy = CommonUseTy;
4267+
std::pair<Type *, IntegerType *> CommonUseTy =
4268+
findCommonType(P.begin(), P.end(), P.endOffset());
4269+
// Do all uses operate on the same type?
4270+
if (CommonUseTy.first)
4271+
if (DL.getTypeAllocSize(CommonUseTy.first).getFixedSize() >= P.size())
4272+
SliceTy = CommonUseTy.first;
4273+
// If not, can we find an appropriate subtype in the original allocated type?
42704274
if (!SliceTy)
42714275
if (Type *TypePartitionTy = getTypePartition(DL, AI.getAllocatedType(),
42724276
P.beginOffset(), P.size()))
42734277
SliceTy = TypePartitionTy;
4278+
// If still not, can we use the largest bitwidth integer type used?
4279+
if (!SliceTy && CommonUseTy.second)
4280+
if (DL.getTypeAllocSize(CommonUseTy.second).getFixedSize() >= P.size())
4281+
SliceTy = CommonUseTy.second;
42744282
if ((!SliceTy || (SliceTy->isArrayTy() &&
42754283
SliceTy->getArrayElementType()->isIntegerTy())) &&
42764284
DL.isLegalInteger(P.size() * 8))

llvm/test/DebugInfo/ARM/sroa-complex.ll

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,14 +15,13 @@ entry:
1515
%c.realp = getelementptr inbounds { double, double }, { double, double }* %c, i32 0, i32 0, !dbg !17
1616
%c.imagp = getelementptr inbounds { double, double }, { double, double }* %c, i32 0, i32 1, !dbg !17
1717
store double 0.000000e+00, double* %c.realp, align 8, !dbg !17
18-
; SROA will split the complex double into two i64 values, because there is
19-
; no native double data type available.
18+
; SROA will split the complex double into two double values.
2019
; Test that debug info for both values survives:
21-
; CHECK: call void @llvm.dbg.value(metadata i64 0,
20+
; CHECK: call void @llvm.dbg.value(metadata double 0.000000e+00,
2221
; CHECK-SAME: metadata ![[C:[^,]*]],
2322
; CHECK-SAME: metadata !DIExpression(DW_OP_LLVM_fragment, 0, 64))
2423
store double 0.000000e+00, double* %c.imagp, align 8, !dbg !17
25-
; CHECK: call void @llvm.dbg.value(metadata i64 0,
24+
; CHECK: call void @llvm.dbg.value(metadata double 0.000000e+00,
2625
; CHECK-SAME: metadata ![[C]],
2726
; CHECK-SAME: metadata !DIExpression(DW_OP_LLVM_fragment, 64, 64))
2827
ret void, !dbg !18

llvm/test/Transforms/SROA/ppcf128-no-fold.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
; RUN: opt < %s -sroa -S | FileCheck %s
1+
; RUN: opt < %s -sroa -S | FileCheck %s
22
target datalayout = "E-m:e-i64:64-n32:64"
33
target triple = "powerpc64-unknown-linux-gnu"
44

@@ -27,8 +27,8 @@ entry:
2727
; CHECK-LABEL: @foo
2828
; CHECK-NOT: i128 4628293042053316608
2929
; CHECK-NOT: i128 4653260752096854016
30-
; CHECK-DAG: i128 bitcast (ppc_fp128 0xM403B0000000000000000000000000000 to i128)
31-
; CHECK-DAG: i128 bitcast (ppc_fp128 0xM4093B400000000000000000000000000 to i128)
30+
; CHECK-DAG: bitcast ppc_fp128 0xM403B0000000000000000000000000000 to i128
31+
; CHECK-DAG: bitcast ppc_fp128 0xM4093B400000000000000000000000000 to i128
3232
; CHECK: call void @bar(i8* %v, [2 x i128]
3333
; CHECK: ret void
3434

llvm/test/Transforms/SROA/preserve-nonnull.ll

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -51,11 +51,10 @@ entry:
5151
define i8* @propagate_nonnull_to_int() {
5252
; CHECK-LABEL: define i8* @propagate_nonnull_to_int(
5353
; CHECK-NEXT: entry:
54-
; CHECK-NEXT: %[[A:.*]] = alloca i64
55-
; CHECK-NEXT: store i64 42, i64* %[[A]]
56-
; CHECK-NEXT: %[[LOAD:.*]] = load volatile i64, i64* %[[A]]
57-
; CHECK-NEXT: %[[CAST:.*]] = inttoptr i64 %[[LOAD]] to i8*
58-
; CHECK-NEXT: ret i8* %[[CAST]]
54+
; CHECK-NEXT: %[[A:.*]] = alloca i8*
55+
; CHECK-NEXT: store i8* inttoptr (i64 42 to i8*), i8** %[[A]]
56+
; CHECK-NEXT: %[[LOAD:.*]] = load volatile i8*, i8** %[[A]]
57+
; CHECK-NEXT: ret i8* %[[LOAD]]
5958
entry:
6059
%a = alloca [2 x i8*]
6160
%a.gep0 = getelementptr [2 x i8*], [2 x i8*]* %a, i32 0, i32 0
@@ -75,8 +74,7 @@ entry:
7574
define i8* @propagate_nonnull_to_int_and_promote() {
7675
; CHECK-LABEL: define i8* @propagate_nonnull_to_int_and_promote(
7776
; CHECK-NEXT: entry:
78-
; CHECK-NEXT: %[[PROMOTED_VALUE:.*]] = inttoptr i64 42 to i8*
79-
; CHECK-NEXT: ret i8* %[[PROMOTED_VALUE]]
77+
; CHECK-NEXT: ret i8* inttoptr (i64 42 to i8*)
8078
entry:
8179
%a = alloca [2 x i8*], align 8
8280
%a.gep0 = getelementptr [2 x i8*], [2 x i8*]* %a, i32 0, i32 0

0 commit comments

Comments
 (0)