Skip to content

Commit cf624b2

Browse files
committed
[SROA] isVectorPromotionViable(): memory intrinsics operate on vectors of bytes
Now, there's a big caveat here - these bytes are abstract bytes, not the i8 we have in LLVM, so strictly speaking this is not exactly legal, see e.g. AliveToolkit/alive2#860 ^ the "bytes" "could" have been a pointer, and loading it as an integer inserts an implicit ptrtoint. But at the same time, InstCombine's `InstCombinerImpl::SimplifyAnyMemTransfer()` would expand a memtransfer of 1/2/4/8 bytes into integer-typed load+store, so this isn't exactly a new problem. Note that in memory, poison is byte-wise, so we really can't widen elements, but SROA seems to be inconsistent here. Fixes #59116.
1 parent 11c2c16 commit cf624b2

File tree

17 files changed

+216
-268
lines changed

17 files changed

+216
-268
lines changed

clang/test/CodeGenOpenCL/amdgpu-nullptr.cl

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -515,13 +515,17 @@ typedef struct {
515515
private char *p;
516516
} StructTy3;
517517

518-
// CHECK-LABEL: test_memset_private
519-
// CHECK: call void @llvm.memset.p5i8.i64(i8 addrspace(5)* noundef align 8 {{.*}}, i8 0, i64 32, i1 false)
520-
// CHECK: [[GEP:%.*]] = getelementptr inbounds %struct.StructTy3, %struct.StructTy3 addrspace(5)* %ptr, i32 0, i32 4
521-
// CHECK: store i8 addrspace(5)* addrspacecast (i8* null to i8 addrspace(5)*), i8 addrspace(5)* addrspace(5)* [[GEP]]
522-
// CHECK: [[GEP1:%.*]] = getelementptr inbounds i8, i8 addrspace(5)* {{.*}}, i32 36
523-
// CHECK: [[GEP1_CAST:%.*]] = bitcast i8 addrspace(5)* [[GEP1]] to i32 addrspace(5)*
524-
// CHECK: store i32 0, i32 addrspace(5)* [[GEP1_CAST]], align 4
518+
// CHECK-LABEL: @test_memset_private(
519+
// CHECK-NEXT: entry:
520+
// CHECK-NEXT: [[TMP0:%.*]] = bitcast [[STRUCT_STRUCTTY3:%.*]] addrspace(5)* [[PTR:%.*]] to i8 addrspace(5)*
521+
// CHECK-NEXT: [[S3_SROA_0_SROA_0_0_S3_SROA_0_0__SROA_CAST2_SROA_CAST:%.*]] = bitcast [[STRUCT_STRUCTTY3]] addrspace(5)* [[PTR]] to <32 x i8> addrspace(5)*
522+
// CHECK-NEXT: store <32 x i8> zeroinitializer, <32 x i8> addrspace(5)* [[S3_SROA_0_SROA_0_0_S3_SROA_0_0__SROA_CAST2_SROA_CAST]], align 8, !tbaa.struct !9
523+
// CHECK-NEXT: [[S3_SROA_4_0__SROA_IDX6:%.*]] = getelementptr inbounds [[STRUCT_STRUCTTY3]], [[STRUCT_STRUCTTY3]] addrspace(5)* [[PTR]], i32 0, i32 4
524+
// CHECK-NEXT: store i8 addrspace(5)* addrspacecast (i8* null to i8 addrspace(5)*), i8 addrspace(5)* addrspace(5)* [[S3_SROA_4_0__SROA_IDX6]], align 8, !tbaa.struct !12
525+
// CHECK-NEXT: [[S3_SROA_5_0__SROA_IDX:%.*]] = getelementptr inbounds i8, i8 addrspace(5)* [[TMP0]], i32 36
526+
// CHECK-NEXT: [[S3_SROA_5_0__SROA_CAST8:%.*]] = bitcast i8 addrspace(5)* [[S3_SROA_5_0__SROA_IDX]] to i32 addrspace(5)*
527+
// CHECK-NEXT: store i32 0, i32 addrspace(5)* [[S3_SROA_5_0__SROA_CAST8]], align 4, !tbaa.struct !13
528+
// CHECK-NEXT: ret void
525529
void test_memset_private(private StructTy3 *ptr) {
526530
StructTy3 S3 = {0, 0, 0, 0, 0};
527531
*ptr = S3;

llvm/lib/Transforms/Scalar/SROA.cpp

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1806,8 +1806,10 @@ static bool isVectorPromotionViableForSlice(Partition &P, const Slice &S,
18061806
? Ty->getElementType()
18071807
: FixedVectorType::get(Ty->getElementType(), NumElements);
18081808

1809-
Type *SplitIntTy =
1810-
Type::getIntNTy(Ty->getContext(), NumElements * ElementSize * 8);
1809+
Type *SplitIntTy = nullptr;
1810+
if (uint64_t Bitwidth = NumElements * ElementSize * 8;
1811+
Bitwidth <= IntegerType::MAX_INT_BITS)
1812+
SplitIntTy = Type::getIntNTy(Ty->getContext(), Bitwidth);
18111813

18121814
Use *U = S.getUse();
18131815

@@ -1826,7 +1828,8 @@ static bool isVectorPromotionViableForSlice(Partition &P, const Slice &S,
18261828
// Disable vector promotion when there are loads or stores of an FCA.
18271829
if (LTy->isStructTy())
18281830
return false;
1829-
if (P.beginOffset() > S.beginOffset() || P.endOffset() < S.endOffset()) {
1831+
if (SplitIntTy &&
1832+
(P.beginOffset() > S.beginOffset() || P.endOffset() < S.endOffset())) {
18301833
assert(LTy->isIntegerTy());
18311834
LTy = SplitIntTy;
18321835
}
@@ -1839,7 +1842,8 @@ static bool isVectorPromotionViableForSlice(Partition &P, const Slice &S,
18391842
// Disable vector promotion when there are loads or stores of an FCA.
18401843
if (STy->isStructTy())
18411844
return false;
1842-
if (P.beginOffset() > S.beginOffset() || P.endOffset() < S.endOffset()) {
1845+
if (SplitIntTy &&
1846+
(P.beginOffset() > S.beginOffset() || P.endOffset() < S.endOffset())) {
18431847
assert(STy->isIntegerTy());
18441848
STy = SplitIntTy;
18451849
}
@@ -1934,6 +1938,9 @@ static VectorType *isVectorPromotionViable(Partition &P, const DataLayout &DL) {
19341938
CheckCandidateType(LI->getType());
19351939
else if (auto *SI = dyn_cast<StoreInst>(S.getUse()->getUser()))
19361940
CheckCandidateType(SI->getValueOperand()->getType());
1941+
else if (auto *MTI = dyn_cast<MemIntrinsic>(S.getUse()->getUser()))
1942+
CheckCandidateType(FixedVectorType::get(
1943+
IntegerType::getInt8Ty(MTI->getContext()), P.size()));
19371944
}
19381945

19391946
// If we didn't find a vector type, nothing to do here.

llvm/test/CodeGen/AMDGPU/v1024.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
; GCN-LABEL: {{^}}test_v1024:
66
; GCN-NOT: v_accvgpr
7-
; GCN-COUNT-32: v_mov_b32_e32
7+
; GCN-COUNT-10: v_mov_b32_e32
88
; GCN-NOT: v_accvgpr
99
define amdgpu_kernel void @test_v1024() {
1010
entry:

llvm/test/DebugInfo/X86/sroasplit-1.ll

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,8 @@
2020
;
2121

2222
; Verify that SROA creates a variable piece when splitting i1.
23-
; CHECK: %[[I1:.*]] = alloca [12 x i8], align 4
24-
; CHECK: call void @llvm.dbg.declare(metadata [12 x i8]* %[[I1]], metadata ![[VAR:[0-9]+]], metadata !DIExpression(DW_OP_LLVM_fragment, 32, 96))
25-
; CHECK: call void @llvm.dbg.value(metadata i32 %[[A:.*]], metadata ![[VAR]], metadata !DIExpression(DW_OP_LLVM_fragment, 0, 32))
26-
; CHECK: ret i32 %[[A]]
23+
; CHECK: %[[I1:.*]] = load <12 x i8>,
24+
; CHECK: call void @llvm.dbg.value(metadata <12 x i8> %[[I1]], metadata ![[VAR:.*]], metadata !DIExpression(DW_OP_LLVM_fragment, 32, 96))
2725
; Read Var and Piece:
2826
; CHECK: ![[VAR]] = !DILocalVariable(name: "i1",{{.*}} line: 11,
2927

llvm/test/DebugInfo/X86/sroasplit-4.ll

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,28 +1,28 @@
11
; RUN: opt -sroa < %s -S -o - | FileCheck %s
22
;
33
; Test that recursively splitting an alloca updates the debug info correctly.
4-
; CHECK: %[[T:.*]] = load i64, i64* @t, align 8
5-
; CHECK: call void @llvm.dbg.value(metadata i64 %[[T]], metadata ![[Y:.*]], metadata !DIExpression(DW_OP_LLVM_fragment, 0, 64))
6-
; CHECK: %[[T1:.*]] = load i64, i64* @t, align 8
7-
; CHECK: call void @llvm.dbg.value(metadata i64 %[[T1]], metadata ![[Y]], metadata !DIExpression(DW_OP_LLVM_fragment, 64, 64))
8-
; CHECK: call void @llvm.dbg.value(metadata i64 %[[T]], metadata ![[R:.*]], metadata !DIExpression(DW_OP_LLVM_fragment, 192, 64))
9-
; CHECK: call void @llvm.dbg.value(metadata i64 %[[T1]], metadata ![[R]], metadata !DIExpression(DW_OP_LLVM_fragment, 256, 64))
10-
;
4+
; CHECK: call void @llvm.dbg.value(metadata <16 x i8> %[[Y_VEC:.*]], metadata ![[Y:.*]], metadata !DIExpression())
5+
; CHECK: call void @llvm.dbg.value(metadata <16 x i8> %[[Y_VEC1:.*]], metadata ![[Y]], metadata !DIExpression())
6+
; CHECK: call void @llvm.dbg.value(metadata i32 0, metadata ![[R:.*]], metadata !DIExpression(DW_OP_LLVM_fragment, 0, 32))
7+
; CHECK: call void @llvm.dbg.value(metadata i64 0, metadata ![[R]], metadata !DIExpression(DW_OP_LLVM_fragment, 64, 64))
8+
; CHECK: call void @llvm.dbg.value(metadata i64 0, metadata ![[R]], metadata !DIExpression(DW_OP_LLVM_fragment, 128, 64))
9+
; CHECK: call void @llvm.dbg.value(metadata <16 x i8> %[[Y_VEC1]], metadata ![[R]], metadata !DIExpression(DW_OP_LLVM_fragment, 192, 128))
10+
;
1111
; struct p {
1212
; __SIZE_TYPE__ s;
1313
; __SIZE_TYPE__ t;
1414
; };
15-
;
15+
;
1616
; struct r {
1717
; int i;
1818
; struct p x;
1919
; struct p y;
2020
; };
21-
;
21+
;
2222
; extern int call_me(struct r);
2323
; extern int maybe();
2424
; extern __SIZE_TYPE__ t;
25-
;
25+
;
2626
; int test() {
2727
; if (maybe())
2828
; return 0;

llvm/test/Transforms/PhaseOrdering/instcombine-sroa-inttoptr.ll

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -68,12 +68,13 @@ define dso_local i32* @_Z3foo1S(%0* byval(%0) align 8 %arg) {
6868
; CHECK-LABEL: @_Z3foo1S(
6969
; CHECK-NEXT: bb:
7070
; CHECK-NEXT: [[I2:%.*]] = alloca [[TMP0:%.*]], align 8
71-
; CHECK-NEXT: [[I1_SROA_0_0_I5_SROA_IDX:%.*]] = getelementptr inbounds [[TMP0]], %0* [[ARG:%.*]], i64 0, i32 0
72-
; CHECK-NEXT: [[I1_SROA_0_0_COPYLOAD:%.*]] = load i32*, i32** [[I1_SROA_0_0_I5_SROA_IDX]], align 8
71+
; CHECK-NEXT: [[TMP0]] = bitcast %0* [[ARG:%.*]] to i64*
72+
; CHECK-NEXT: [[I11_SROA_0_0_VEC_EXTRACT_EXTRACT:%.*]] = load i64, i64* [[TMP0]], align 8
73+
; CHECK-NEXT: [[TMP1:%.*]] = inttoptr i64 [[I11_SROA_0_0_VEC_EXTRACT_EXTRACT]] to i32*
7374
; CHECK-NEXT: [[I_SROA_0_0_I6_SROA_IDX:%.*]] = getelementptr inbounds [[TMP0]], %0* [[I2]], i64 0, i32 0
74-
; CHECK-NEXT: store i32* [[I1_SROA_0_0_COPYLOAD]], i32** [[I_SROA_0_0_I6_SROA_IDX]], align 8
75+
; CHECK-NEXT: store i32* [[TMP1]], i32** [[I_SROA_0_0_I6_SROA_IDX]], align 8
7576
; CHECK-NEXT: tail call void @_Z7escape01S(%0* nonnull byval([[TMP0]]) align 8 [[I2]])
76-
; CHECK-NEXT: ret i32* [[I1_SROA_0_0_COPYLOAD]]
77+
; CHECK-NEXT: ret i32* [[TMP1]]
7778
;
7879
bb:
7980
%i = alloca %0, align 8
@@ -107,21 +108,22 @@ declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture)
107108
define dso_local i32* @_Z3bar1S(%0* byval(%0) align 8 %arg) {
108109
; CHECK-LABEL: @_Z3bar1S(
109110
; CHECK-NEXT: bb:
110-
; CHECK-NEXT: [[I1_SROA_0_0_I4_SROA_IDX:%.*]] = getelementptr inbounds [[TMP0:%.*]], %0* [[ARG:%.*]], i64 0, i32 0
111-
; CHECK-NEXT: [[I1_SROA_0_0_COPYLOAD:%.*]] = load i32*, i32** [[I1_SROA_0_0_I4_SROA_IDX]], align 8
111+
; CHECK-NEXT: [[TMP0:%.*]] = bitcast %0* [[ARG:%.*]] to i64*
112+
; CHECK-NEXT: [[I13_SROA_0_0_VEC_EXTRACT_EXTRACT:%.*]] = load i64, i64* [[TMP0]], align 8
113+
; CHECK-NEXT: [[TMP1:%.*]] = inttoptr i64 [[I13_SROA_0_0_VEC_EXTRACT_EXTRACT]] to i32*
112114
; CHECK-NEXT: [[I5:%.*]] = tail call i32 @_Z4condv()
113115
; CHECK-NEXT: [[I6_NOT:%.*]] = icmp eq i32 [[I5]], 0
114116
; CHECK-NEXT: br i1 [[I6_NOT]], label [[BB10:%.*]], label [[BB7:%.*]]
115117
; CHECK: bb7:
116118
; CHECK-NEXT: tail call void @_Z5sync0v()
117-
; CHECK-NEXT: tail call void @_Z7escape0Pi(i32* [[I1_SROA_0_0_COPYLOAD]])
119+
; CHECK-NEXT: tail call void @_Z7escape0Pi(i32* [[TMP1]])
118120
; CHECK-NEXT: br label [[BB13:%.*]]
119121
; CHECK: bb10:
120122
; CHECK-NEXT: tail call void @_Z5sync1v()
121-
; CHECK-NEXT: tail call void @_Z7escape1Pi(i32* [[I1_SROA_0_0_COPYLOAD]])
123+
; CHECK-NEXT: tail call void @_Z7escape1Pi(i32* [[TMP1]])
122124
; CHECK-NEXT: br label [[BB13]]
123125
; CHECK: bb13:
124-
; CHECK-NEXT: ret i32* [[I1_SROA_0_0_COPYLOAD]]
126+
; CHECK-NEXT: ret i32* [[TMP1]]
125127
;
126128
bb:
127129
%i = alloca %0, align 8

llvm/test/Transforms/SROA/address-spaces.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,8 @@ declare void @llvm.memcpy.p1.p1.i32(ptr addrspace(1) nocapture, ptr addrspace(1)
1111
; Make sure an illegal bitcast isn't introduced
1212
define void @test_address_space_1_1(ptr addrspace(1) %a, ptr addrspace(1) %b) {
1313
; CHECK-LABEL: @test_address_space_1_1(
14-
; CHECK-NEXT: [[AA_0_COPYLOAD:%.*]] = load <2 x i64>, ptr addrspace(1) [[A:%.*]], align 2
15-
; CHECK-NEXT: store <2 x i64> [[AA_0_COPYLOAD]], ptr addrspace(1) [[B:%.*]], align 2
14+
; CHECK-NEXT: [[AA_SROA_0_0_COPYLOAD:%.*]] = load <16 x i8>, ptr addrspace(1) [[A:%.*]], align 2
15+
; CHECK-NEXT: store <16 x i8> [[AA_SROA_0_0_COPYLOAD]], ptr addrspace(1) [[B:%.*]], align 2
1616
; CHECK-NEXT: ret void
1717
;
1818
%aa = alloca <2 x i64>, align 16
@@ -23,8 +23,8 @@ define void @test_address_space_1_1(ptr addrspace(1) %a, ptr addrspace(1) %b) {
2323

2424
define void @test_address_space_1_0(ptr addrspace(1) %a, ptr %b) {
2525
; CHECK-LABEL: @test_address_space_1_0(
26-
; CHECK-NEXT: [[AA_0_COPYLOAD:%.*]] = load <2 x i64>, ptr addrspace(1) [[A:%.*]], align 2
27-
; CHECK-NEXT: store <2 x i64> [[AA_0_COPYLOAD]], ptr [[B:%.*]], align 2
26+
; CHECK-NEXT: [[AA_SROA_0_0_COPYLOAD:%.*]] = load <16 x i8>, ptr addrspace(1) [[A:%.*]], align 2
27+
; CHECK-NEXT: store <16 x i8> [[AA_SROA_0_0_COPYLOAD]], ptr [[B:%.*]], align 2
2828
; CHECK-NEXT: ret void
2929
;
3030
%aa = alloca <2 x i64>, align 16
@@ -35,8 +35,8 @@ define void @test_address_space_1_0(ptr addrspace(1) %a, ptr %b) {
3535

3636
define void @test_address_space_0_1(ptr %a, ptr addrspace(1) %b) {
3737
; CHECK-LABEL: @test_address_space_0_1(
38-
; CHECK-NEXT: [[AA_0_COPYLOAD:%.*]] = load <2 x i64>, ptr [[A:%.*]], align 2
39-
; CHECK-NEXT: store <2 x i64> [[AA_0_COPYLOAD]], ptr addrspace(1) [[B:%.*]], align 2
38+
; CHECK-NEXT: [[AA_SROA_0_0_COPYLOAD:%.*]] = load <16 x i8>, ptr [[A:%.*]], align 2
39+
; CHECK-NEXT: store <16 x i8> [[AA_SROA_0_0_COPYLOAD]], ptr addrspace(1) [[B:%.*]], align 2
4040
; CHECK-NEXT: ret void
4141
;
4242
%aa = alloca <2 x i64>, align 16

llvm/test/Transforms/SROA/alignment.ll

Lines changed: 10 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -92,15 +92,15 @@ define void @PR13920(ptr %a, ptr %b) {
9292
; Test that alignments on memcpy intrinsics get propagated to loads and stores.
9393
; CHECK-LABEL: @PR13920(
9494
; CHECK-NEXT: entry:
95-
; CHECK-NEXT: [[AA_0_COPYLOAD:%.*]] = load <2 x i64>, ptr [[A:%.*]], align 2
96-
; CHECK-NEXT: store <2 x i64> [[AA_0_COPYLOAD]], ptr [[B:%.*]], align 2
95+
; CHECK-NEXT: [[AA_SROA_0_0_COPYLOAD:%.*]] = load <16 x i8>, ptr [[A:%.*]], align 2
96+
; CHECK-NEXT: store <16 x i8> [[AA_SROA_0_0_COPYLOAD]], ptr [[B:%.*]], align 2
9797
; CHECK-NEXT: ret void
9898
;
9999
; DEBUGLOC-LABEL: @PR13920(
100100
; DEBUGLOC-NEXT: entry:
101101
; DEBUGLOC-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META37:![0-9]+]], metadata !DIExpression()), !dbg [[DBG38:![0-9]+]]
102-
; DEBUGLOC-NEXT: [[AA_0_COPYLOAD:%.*]] = load <2 x i64>, ptr [[A:%.*]], align 2, !dbg [[DBG39:![0-9]+]]
103-
; DEBUGLOC-NEXT: store <2 x i64> [[AA_0_COPYLOAD]], ptr [[B:%.*]], align 2, !dbg [[DBG40:![0-9]+]]
102+
; DEBUGLOC-NEXT: [[AA_SROA_0_0_COPYLOAD:%.*]] = load <16 x i8>, ptr [[A:%.*]], align 2, !dbg [[DBG39:![0-9]+]]
103+
; DEBUGLOC-NEXT: store <16 x i8> [[AA_SROA_0_0_COPYLOAD]], ptr [[B:%.*]], align 2, !dbg [[DBG40:![0-9]+]]
104104
; DEBUGLOC-NEXT: ret void, !dbg [[DBG41:![0-9]+]]
105105
;
106106

@@ -118,21 +118,17 @@ define void @test3(ptr %x) {
118118
; reduce the alignment.
119119
; CHECK-LABEL: @test3(
120120
; CHECK-NEXT: entry:
121-
; CHECK-NEXT: [[A_SROA_0:%.*]] = alloca [22 x i8], align 8
122-
; CHECK-NEXT: [[B_SROA_0:%.*]] = alloca [18 x i8], align 2
123-
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[A_SROA_0]], ptr align 8 [[X:%.*]], i32 22, i1 false)
124-
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 2 [[B_SROA_0]], ptr align 2 [[X]], i32 18, i1 false)
121+
; CHECK-NEXT: [[A_SROA_0_0_COPYLOAD:%.*]] = load <22 x i8>, ptr [[X:%.*]], align 8
122+
; CHECK-NEXT: [[B_SROA_0_6_COPYLOAD:%.*]] = load <18 x i8>, ptr [[X]], align 2
125123
; CHECK-NEXT: ret void
126124
;
127125
; DEBUGLOC-LABEL: @test3(
128126
; DEBUGLOC-NEXT: entry:
129-
; DEBUGLOC-NEXT: [[A_SROA_0:%.*]] = alloca [22 x i8], align 8, !dbg [[DBG47:![0-9]+]]
130-
; DEBUGLOC-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META44:![0-9]+]], metadata !DIExpression()), !dbg [[DBG47]]
131-
; DEBUGLOC-NEXT: [[B_SROA_0:%.*]] = alloca [18 x i8], align 2, !dbg [[DBG48:![0-9]+]]
132-
; DEBUGLOC-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META45:![0-9]+]], metadata !DIExpression()), !dbg [[DBG48]]
133-
; DEBUGLOC-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[A_SROA_0]], ptr align 8 [[X:%.*]], i32 22, i1 false), !dbg [[DBG49:![0-9]+]]
127+
; DEBUGLOC-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META44:![0-9]+]], metadata !DIExpression()), !dbg [[DBG47:![0-9]+]]
128+
; DEBUGLOC-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META45:![0-9]+]], metadata !DIExpression()), !dbg [[DBG48:![0-9]+]]
129+
; DEBUGLOC-NEXT: [[A_SROA_0_0_COPYLOAD:%.*]] = load <22 x i8>, ptr [[X:%.*]], align 8, !dbg [[DBG49:![0-9]+]]
134130
; DEBUGLOC-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META46:![0-9]+]], metadata !DIExpression()), !dbg [[DBG50:![0-9]+]]
135-
; DEBUGLOC-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 2 [[B_SROA_0]], ptr align 2 [[X]], i32 18, i1 false), !dbg [[DBG51:![0-9]+]]
131+
; DEBUGLOC-NEXT: [[B_SROA_0_6_COPYLOAD:%.*]] = load <18 x i8>, ptr [[X]], align 2, !dbg [[DBG51:![0-9]+]]
136132
; DEBUGLOC-NEXT: ret void, !dbg [[DBG52:![0-9]+]]
137133
;
138134

llvm/test/Transforms/SROA/alloca-address-space.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,8 @@ declare void @llvm.memcpy.p1.p1.i32(ptr addrspace(1) nocapture, ptr addrspace(1)
1010

1111
define void @test_address_space_1_1(ptr addrspace(1) %a, ptr addrspace(1) %b) {
1212
; CHECK-LABEL: @test_address_space_1_1(
13-
; CHECK-NEXT: [[AA_0_COPYLOAD:%.*]] = load <2 x i64>, ptr addrspace(1) [[A:%.*]], align 2
14-
; CHECK-NEXT: store <2 x i64> [[AA_0_COPYLOAD]], ptr addrspace(1) [[B:%.*]], align 2
13+
; CHECK-NEXT: [[AA_SROA_0_0_COPYLOAD:%.*]] = load <16 x i8>, ptr addrspace(1) [[A:%.*]], align 2
14+
; CHECK-NEXT: store <16 x i8> [[AA_SROA_0_0_COPYLOAD]], ptr addrspace(1) [[B:%.*]], align 2
1515
; CHECK-NEXT: ret void
1616
;
1717
%aa = alloca <2 x i64>, align 16, addrspace(2)
@@ -22,8 +22,8 @@ define void @test_address_space_1_1(ptr addrspace(1) %a, ptr addrspace(1) %b) {
2222

2323
define void @test_address_space_1_0(ptr addrspace(1) %a, ptr addrspace(2) %b) {
2424
; CHECK-LABEL: @test_address_space_1_0(
25-
; CHECK-NEXT: [[AA_0_COPYLOAD:%.*]] = load <2 x i64>, ptr addrspace(1) [[A:%.*]], align 2
26-
; CHECK-NEXT: store <2 x i64> [[AA_0_COPYLOAD]], ptr addrspace(2) [[B:%.*]], align 2
25+
; CHECK-NEXT: [[AA_SROA_0_0_COPYLOAD:%.*]] = load <16 x i8>, ptr addrspace(1) [[A:%.*]], align 2
26+
; CHECK-NEXT: store <16 x i8> [[AA_SROA_0_0_COPYLOAD]], ptr addrspace(2) [[B:%.*]], align 2
2727
; CHECK-NEXT: ret void
2828
;
2929
%aa = alloca <2 x i64>, align 16, addrspace(2)
@@ -34,8 +34,8 @@ define void @test_address_space_1_0(ptr addrspace(1) %a, ptr addrspace(2) %b) {
3434

3535
define void @test_address_space_0_1(ptr addrspace(2) %a, ptr addrspace(1) %b) {
3636
; CHECK-LABEL: @test_address_space_0_1(
37-
; CHECK-NEXT: [[AA_0_COPYLOAD:%.*]] = load <2 x i64>, ptr addrspace(2) [[A:%.*]], align 2
38-
; CHECK-NEXT: store <2 x i64> [[AA_0_COPYLOAD]], ptr addrspace(1) [[B:%.*]], align 2
37+
; CHECK-NEXT: [[AA_SROA_0_0_COPYLOAD:%.*]] = load <16 x i8>, ptr addrspace(2) [[A:%.*]], align 2
38+
; CHECK-NEXT: store <16 x i8> [[AA_SROA_0_0_COPYLOAD]], ptr addrspace(1) [[B:%.*]], align 2
3939
; CHECK-NEXT: ret void
4040
;
4141
%aa = alloca <2 x i64>, align 16, addrspace(2)

0 commit comments

Comments
 (0)