Skip to content

Commit 65cb3bc

Browse files
authored
[Clang][PowerPC] Add __dmr1024 type and DMF integer calculation builtins (#142480)
Define the __dmr1024 type used to manipulate the new DMR registers introduced by the Dense Math Facility (DMF) on PowerPC, and add six Clang builtins that correspond to the integer outer-product accumulate to ACC PowerPC instructions: * __builtin_mma_dmxvi8gerx4 * __builtin_mma_pmdmxvi8gerx4 * __builtin_mma_dmxvi8gerx4pp * __builtin_mma_pmdmxvi8gerx4pp * __builtin_mma_dmxvi8gerx4spp * __builtin_mma_pmdmxvi8gerx4spp.
1 parent 8d6e29d commit 65cb3bc

File tree

11 files changed

+445
-3
lines changed

11 files changed

+445
-3
lines changed

clang/include/clang/Basic/BuiltinsPPC.def

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1134,6 +1134,18 @@ UNALIASED_CUSTOM_BUILTIN(mma_pmxvbf16ger2np, "vW512*VVi15i15i3", true,
11341134
"mma,paired-vector-memops")
11351135
UNALIASED_CUSTOM_BUILTIN(mma_pmxvbf16ger2nn, "vW512*VVi15i15i3", true,
11361136
"mma,paired-vector-memops")
1137+
UNALIASED_CUSTOM_BUILTIN(mma_dmxvi8gerx4, "vW1024*W256V", false,
1138+
"mma,paired-vector-memops")
1139+
UNALIASED_CUSTOM_BUILTIN(mma_pmdmxvi8gerx4, "vW1024*W256Vi255i15i15", false,
1140+
"mma,paired-vector-memops")
1141+
UNALIASED_CUSTOM_BUILTIN(mma_dmxvi8gerx4pp, "vW1024*W256V", true,
1142+
"mma,paired-vector-memops")
1143+
UNALIASED_CUSTOM_BUILTIN(mma_pmdmxvi8gerx4pp, "vW1024*W256Vi255i15i15", true,
1144+
"mma,paired-vector-memops")
1145+
UNALIASED_CUSTOM_BUILTIN(mma_dmxvi8gerx4spp, "vW1024*W256V", true,
1146+
"mma,paired-vector-memops")
1147+
UNALIASED_CUSTOM_BUILTIN(mma_pmdmxvi8gerx4spp, "vW1024*W256Vi255i15i15", true,
1148+
"mma,paired-vector-memops")
11371149

11381150
// FIXME: Obviously incomplete.
11391151

clang/include/clang/Basic/PPCTypes.def

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
#endif
3131

3232

33+
PPC_VECTOR_MMA_TYPE(__dmr1024, DMR1024, 1024)
3334
PPC_VECTOR_MMA_TYPE(__vector_quad, VectorQuad, 512)
3435
PPC_VECTOR_VSX_TYPE(__vector_pair, VectorPair, 256)
3536

clang/lib/AST/ASTContext.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3522,6 +3522,7 @@ static void encodeTypeForFunctionPointerAuth(const ASTContext &Ctx,
35223522
case BuiltinType::BFloat16:
35233523
case BuiltinType::VectorQuad:
35243524
case BuiltinType::VectorPair:
3525+
case BuiltinType::DMR1024:
35253526
OS << "?";
35263527
return;
35273528

clang/test/AST/ast-dump-ppc-types.c

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
1+
// RUN: %clang_cc1 -triple powerpc64le-unknown-unknown -target-cpu future \
2+
// RUN: -ast-dump %s | FileCheck %s
13
// RUN: %clang_cc1 -triple powerpc64le-unknown-unknown -target-cpu pwr10 \
2-
// RUN: -ast-dump -ast-dump-filter __vector %s | FileCheck %s
4+
// RUN: -ast-dump %s | FileCheck %s
35
// RUN: %clang_cc1 -triple powerpc64le-unknown-unknown -target-cpu pwr9 \
4-
// RUN: -ast-dump -ast-dump-filter __vector %s | FileCheck %s
6+
// RUN: -ast-dump %s | FileCheck %s
57
// RUN: %clang_cc1 -triple powerpc64le-unknown-unknown -target-cpu pwr8 \
6-
// RUN: -ast-dump -ast-dump-filter __vector %s | FileCheck %s
8+
// RUN: -ast-dump %s | FileCheck %s
79
// RUN: %clang_cc1 -triple x86_64-unknown-unknown -ast-dump %s | FileCheck %s \
810
// RUN: --check-prefix=CHECK-X86_64
911
// RUN: %clang_cc1 -triple arm-unknown-unknown -ast-dump %s | FileCheck %s \
@@ -15,16 +17,21 @@
1517
// are correctly defined. We also added checks on a couple of other targets to
1618
// ensure the types are target-dependent.
1719

20+
// CHECK: TypedefDecl {{.*}} implicit __dmr1024 '__dmr1024'
21+
// CHECK: `-BuiltinType {{.*}} '__dmr1024'
1822
// CHECK: TypedefDecl {{.*}} implicit __vector_quad '__vector_quad'
1923
// CHECK-NEXT: -BuiltinType {{.*}} '__vector_quad'
2024
// CHECK: TypedefDecl {{.*}} implicit __vector_pair '__vector_pair'
2125
// CHECK-NEXT: -BuiltinType {{.*}} '__vector_pair'
2226

27+
// CHECK-X86_64-NOT: __dmr1024
2328
// CHECK-X86_64-NOT: __vector_quad
2429
// CHECK-X86_64-NOT: __vector_pair
2530

31+
// CHECK-ARM-NOT: __dmr1024
2632
// CHECK-ARM-NOT: __vector_quad
2733
// CHECK-ARM-NOT: __vector_pair
2834

35+
// CHECK-RISCV64-NOT: __dmr1024
2936
// CHECK-RISCV64-NOT: __vector_quad
3037
// CHECK-RISCV64-NOT: __vector_pair
Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
2+
// RUN: %clang_cc1 -O3 -triple powerpc64le-unknown-unknown -target-cpu future \
3+
// RUN: -emit-llvm %s -o - | FileCheck %s
4+
// RUN: %clang_cc1 -O3 -triple powerpc64-ibm-aix -target-cpu future \
5+
// RUN: -emit-llvm %s -o - | FileCheck %s
6+
7+
8+
// CHECK-LABEL: @test_dmxvi8gerx4(
9+
// CHECK-NEXT: entry:
10+
// CHECK-NEXT: [[TMP0:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA2:![0-9]+]]
11+
// CHECK-NEXT: [[TMP1:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmxvi8gerx4(<256 x i1> [[TMP0]], <16 x i8> [[VC:%.*]])
12+
// CHECK-NEXT: store <1024 x i1> [[TMP1]], ptr [[RESP:%.*]], align 128, !tbaa [[TBAA6:![0-9]+]]
13+
// CHECK-NEXT: ret void
14+
//
15+
void test_dmxvi8gerx4(unsigned char *vdmrp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
16+
__dmr1024 vdmr = *((__dmr1024 *)vdmrp);
17+
__vector_pair vp = *((__vector_pair *)vpp);
18+
__builtin_mma_dmxvi8gerx4(&vdmr, vp, vc);
19+
*((__dmr1024 *)resp) = vdmr;
20+
}
21+
22+
// CHECK-LABEL: @test_pmdmxvi8gerx4(
23+
// CHECK-NEXT: entry:
24+
// CHECK-NEXT: [[TMP0:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA2]]
25+
// CHECK-NEXT: [[TMP1:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.pmdmxvi8gerx4(<256 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], i32 0, i32 0, i32 0)
26+
// CHECK-NEXT: store <1024 x i1> [[TMP1]], ptr [[RESP:%.*]], align 128, !tbaa [[TBAA6]]
27+
// CHECK-NEXT: ret void
28+
//
29+
void test_pmdmxvi8gerx4(unsigned char *vdmrp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
30+
__dmr1024 vdmr = *((__dmr1024 *)vdmrp);
31+
__vector_pair vp = *((__vector_pair *)vpp);
32+
__builtin_mma_pmdmxvi8gerx4(&vdmr, vp, vc, 0, 0, 0);
33+
*((__dmr1024 *)resp) = vdmr;
34+
}
35+
36+
// CHECK-LABEL: @test_dmxvi8gerx4pp(
37+
// CHECK-NEXT: entry:
38+
// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP:%.*]], align 128, !tbaa [[TBAA6]]
39+
// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA2]]
40+
// CHECK-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmxvi8gerx4pp(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC:%.*]])
41+
// CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP:%.*]], align 128, !tbaa [[TBAA6]]
42+
// CHECK-NEXT: ret void
43+
//
44+
void test_dmxvi8gerx4pp(unsigned char *vdmrp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
45+
__dmr1024 vdmr = *((__dmr1024 *)vdmrp);
46+
__vector_pair vp = *((__vector_pair *)vpp);
47+
__builtin_mma_dmxvi8gerx4pp(&vdmr, vp, vc);
48+
*((__dmr1024 *)resp) = vdmr;
49+
}
50+
51+
// CHECK-LABEL: @test_pmdmxvi8gerx4pp(
52+
// CHECK-NEXT: entry:
53+
// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP:%.*]], align 128, !tbaa [[TBAA6]]
54+
// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA2]]
55+
// CHECK-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.pmdmxvi8gerx4pp(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], i32 0, i32 0, i32 0)
56+
// CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP:%.*]], align 128, !tbaa [[TBAA6]]
57+
// CHECK-NEXT: ret void
58+
//
59+
void test_pmdmxvi8gerx4pp(unsigned char *vdmrp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
60+
__dmr1024 vdmr = *((__dmr1024 *)vdmrp);
61+
__vector_pair vp = *((__vector_pair *)vpp);
62+
__builtin_mma_pmdmxvi8gerx4pp(&vdmr, vp, vc, 0, 0, 0);
63+
*((__dmr1024 *)resp) = vdmr;
64+
}
65+
66+
// CHECK-LABEL: @test_dmxvi8gerx4spp(
67+
// CHECK-NEXT: entry:
68+
// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP:%.*]], align 128, !tbaa [[TBAA6]]
69+
// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA2]]
70+
// CHECK-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmxvi8gerx4spp(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC:%.*]])
71+
// CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP:%.*]], align 128, !tbaa [[TBAA6]]
72+
// CHECK-NEXT: ret void
73+
//
74+
void test_dmxvi8gerx4spp(unsigned char *vdmrp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
75+
__dmr1024 vdmr = *((__dmr1024 *)vdmrp);
76+
__vector_pair vp = *((__vector_pair *)vpp);
77+
__builtin_mma_dmxvi8gerx4spp(&vdmr, vp, vc);
78+
*((__dmr1024 *)resp) = vdmr;
79+
}
80+
81+
// CHECK-LABEL: @test_pmdmxvi8gerx4spp(
82+
// CHECK-NEXT: entry:
83+
// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP:%.*]], align 128, !tbaa [[TBAA6]]
84+
// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA2]]
85+
// CHECK-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.pmdmxvi8gerx4spp(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], i32 0, i32 0, i32 0)
86+
// CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP:%.*]], align 128, !tbaa [[TBAA6]]
87+
// CHECK-NEXT: ret void
88+
//
89+
void test_pmdmxvi8gerx4spp(unsigned char *vdmrp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
90+
__dmr1024 vdmr = *((__dmr1024 *)vdmrp);
91+
__vector_pair vp = *((__vector_pair *)vpp);
92+
__builtin_mma_pmdmxvi8gerx4spp(&vdmr, vp, vc, 0, 0, 0);
93+
*((__dmr1024 *)resp) = vdmr;
94+
}
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
// RUN: not %clang_cc1 -triple powerpc64le-unknown-linux-gnu -target-cpu future \
2+
// RUN: %s -emit-llvm-only 2>&1 | FileCheck %s
3+
4+
__attribute__((target("no-paired-vector-memops")))
5+
void test_pair(unsigned char *vdmr, unsigned char *vpp, vector unsigned char vc) {
6+
__vector_pair vp = *((__vector_pair *)vpp);
7+
__builtin_mma_dmxvi8gerx4((__dmr1024 *)vdmr, vp, vc);
8+
__builtin_mma_pmdmxvi8gerx4((__dmr1024 *)vdmr, vp, vc, 0, 0, 0);
9+
__builtin_mma_dmxvi8gerx4pp((__dmr1024 *)vdmr, vp, vc);
10+
__builtin_mma_pmdmxvi8gerx4pp((__dmr1024 *)vdmr, vp, vc, 0, 0, 0);
11+
__builtin_mma_dmxvi8gerx4spp((__dmr1024 *)vdmr, vp, vc);
12+
__builtin_mma_pmdmxvi8gerx4spp((__dmr1024 *)vdmr, vp, vc, 0, 0, 0);
13+
14+
// CHECK: error: '__builtin_mma_dmxvi8gerx4' needs target feature mma,paired-vector-memops
15+
// CHECK: error: '__builtin_mma_pmdmxvi8gerx4' needs target feature mma,paired-vector-memops
16+
// CHECK: error: '__builtin_mma_dmxvi8gerx4pp' needs target feature mma,paired-vector-memops
17+
// CHECK: error: '__builtin_mma_pmdmxvi8gerx4pp' needs target feature mma,paired-vector-memops
18+
// CHECK: error: '__builtin_mma_dmxvi8gerx4spp' needs target feature mma,paired-vector-memops
19+
// CHECK: error: '__builtin_mma_pmdmxvi8gerx4spp' needs target feature mma,paired-vector-memops
20+
}
Lines changed: 177 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,177 @@
1+
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
2+
// RUN: %clang_cc1 -triple powerpc64le-linux-unknown -target-cpu future \
3+
// RUN: -emit-llvm -o - %s | FileCheck %s
4+
5+
6+
// CHECK-LABEL: @test_dmr_copy(
7+
// CHECK-NEXT: entry:
8+
// CHECK-NEXT: [[PTR1_ADDR:%.*]] = alloca ptr, align 8
9+
// CHECK-NEXT: [[PTR2_ADDR:%.*]] = alloca ptr, align 8
10+
// CHECK-NEXT: store ptr [[PTR1:%.*]], ptr [[PTR1_ADDR]], align 8
11+
// CHECK-NEXT: store ptr [[PTR2:%.*]], ptr [[PTR2_ADDR]], align 8
12+
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR1_ADDR]], align 8
13+
// CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds <1024 x i1>, ptr [[TMP0]], i64 2
14+
// CHECK-NEXT: [[TMP1:%.*]] = load <1024 x i1>, ptr [[ADD_PTR]], align 128
15+
// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[PTR2_ADDR]], align 8
16+
// CHECK-NEXT: [[ADD_PTR1:%.*]] = getelementptr inbounds <1024 x i1>, ptr [[TMP2]], i64 1
17+
// CHECK-NEXT: store <1024 x i1> [[TMP1]], ptr [[ADD_PTR1]], align 128
18+
// CHECK-NEXT: ret void
19+
//
20+
void test_dmr_copy(__dmr1024 *ptr1, __dmr1024 *ptr2) {
21+
*(ptr2 + 1) = *(ptr1 + 2);
22+
}
23+
24+
// CHECK-LABEL: @test_dmr_typedef(
25+
// CHECK-NEXT: entry:
26+
// CHECK-NEXT: [[INP_ADDR:%.*]] = alloca ptr, align 8
27+
// CHECK-NEXT: [[OUTP_ADDR:%.*]] = alloca ptr, align 8
28+
// CHECK-NEXT: [[VDMRIN:%.*]] = alloca ptr, align 8
29+
// CHECK-NEXT: [[VDMROUT:%.*]] = alloca ptr, align 8
30+
// CHECK-NEXT: store ptr [[INP:%.*]], ptr [[INP_ADDR]], align 8
31+
// CHECK-NEXT: store ptr [[OUTP:%.*]], ptr [[OUTP_ADDR]], align 8
32+
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[INP_ADDR]], align 8
33+
// CHECK-NEXT: store ptr [[TMP0]], ptr [[VDMRIN]], align 8
34+
// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[OUTP_ADDR]], align 8
35+
// CHECK-NEXT: store ptr [[TMP1]], ptr [[VDMROUT]], align 8
36+
// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VDMRIN]], align 8
37+
// CHECK-NEXT: [[TMP3:%.*]] = load <1024 x i1>, ptr [[TMP2]], align 128
38+
// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[VDMROUT]], align 8
39+
// CHECK-NEXT: store <1024 x i1> [[TMP3]], ptr [[TMP4]], align 128
40+
// CHECK-NEXT: ret void
41+
//
42+
void test_dmr_typedef(int *inp, int *outp) {
43+
__dmr1024 *vdmrin = (__dmr1024 *)inp;
44+
__dmr1024 *vdmrout = (__dmr1024 *)outp;
45+
*vdmrout = *vdmrin;
46+
}
47+
48+
// CHECK-LABEL: @test_dmr_arg(
49+
// CHECK-NEXT: entry:
50+
// CHECK-NEXT: [[VDMR_ADDR:%.*]] = alloca ptr, align 8
51+
// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 8
52+
// CHECK-NEXT: [[VDMRP:%.*]] = alloca ptr, align 8
53+
// CHECK-NEXT: store ptr [[VDMR:%.*]], ptr [[VDMR_ADDR]], align 8
54+
// CHECK-NEXT: store ptr [[PTR:%.*]], ptr [[PTR_ADDR]], align 8
55+
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8
56+
// CHECK-NEXT: store ptr [[TMP0]], ptr [[VDMRP]], align 8
57+
// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VDMR_ADDR]], align 8
58+
// CHECK-NEXT: [[TMP2:%.*]] = load <1024 x i1>, ptr [[TMP1]], align 128
59+
// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VDMRP]], align 8
60+
// CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[TMP3]], align 128
61+
// CHECK-NEXT: ret void
62+
//
63+
void test_dmr_arg(__dmr1024 *vdmr, int *ptr) {
64+
__dmr1024 *vdmrp = (__dmr1024 *)ptr;
65+
*vdmrp = *vdmr;
66+
}
67+
68+
// CHECK-LABEL: @test_dmr_const_arg(
69+
// CHECK-NEXT: entry:
70+
// CHECK-NEXT: [[VDMR_ADDR:%.*]] = alloca ptr, align 8
71+
// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 8
72+
// CHECK-NEXT: [[VDMRP:%.*]] = alloca ptr, align 8
73+
// CHECK-NEXT: store ptr [[VDMR:%.*]], ptr [[VDMR_ADDR]], align 8
74+
// CHECK-NEXT: store ptr [[PTR:%.*]], ptr [[PTR_ADDR]], align 8
75+
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8
76+
// CHECK-NEXT: store ptr [[TMP0]], ptr [[VDMRP]], align 8
77+
// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VDMR_ADDR]], align 8
78+
// CHECK-NEXT: [[TMP2:%.*]] = load <1024 x i1>, ptr [[TMP1]], align 128
79+
// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VDMRP]], align 8
80+
// CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[TMP3]], align 128
81+
// CHECK-NEXT: ret void
82+
//
83+
void test_dmr_const_arg(const __dmr1024 *const vdmr, int *ptr) {
84+
__dmr1024 *vdmrp = (__dmr1024 *)ptr;
85+
*vdmrp = *vdmr;
86+
}
87+
88+
// CHECK-LABEL: @test_dmr_array_arg(
89+
// CHECK-NEXT: entry:
90+
// CHECK-NEXT: [[VDMRA_ADDR:%.*]] = alloca ptr, align 8
91+
// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 8
92+
// CHECK-NEXT: [[VDMRP:%.*]] = alloca ptr, align 8
93+
// CHECK-NEXT: store ptr [[VDMRA:%.*]], ptr [[VDMRA_ADDR]], align 8
94+
// CHECK-NEXT: store ptr [[PTR:%.*]], ptr [[PTR_ADDR]], align 8
95+
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8
96+
// CHECK-NEXT: store ptr [[TMP0]], ptr [[VDMRP]], align 8
97+
// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VDMRA_ADDR]], align 8
98+
// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds <1024 x i1>, ptr [[TMP1]], i64 0
99+
// CHECK-NEXT: [[TMP2:%.*]] = load <1024 x i1>, ptr [[ARRAYIDX]], align 128
100+
// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VDMRP]], align 8
101+
// CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[TMP3]], align 128
102+
// CHECK-NEXT: ret void
103+
//
104+
void test_dmr_array_arg(__dmr1024 vdmra[], int *ptr) {
105+
__dmr1024 *vdmrp = (__dmr1024 *)ptr;
106+
*vdmrp = vdmra[0];
107+
}
108+
109+
// CHECK-LABEL: @test_dmr_ret(
110+
// CHECK-NEXT: entry:
111+
// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 8
112+
// CHECK-NEXT: [[VDMRP:%.*]] = alloca ptr, align 8
113+
// CHECK-NEXT: store ptr [[PTR:%.*]], ptr [[PTR_ADDR]], align 8
114+
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8
115+
// CHECK-NEXT: store ptr [[TMP0]], ptr [[VDMRP]], align 8
116+
// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VDMRP]], align 8
117+
// CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds <1024 x i1>, ptr [[TMP1]], i64 2
118+
// CHECK-NEXT: ret ptr [[ADD_PTR]]
119+
//
120+
__dmr1024 *test_dmr_ret(int *ptr) {
121+
__dmr1024 *vdmrp = (__dmr1024 *)ptr;
122+
return vdmrp + 2;
123+
}
124+
125+
// CHECK-LABEL: @test_dmr_ret_const(
126+
// CHECK-NEXT: entry:
127+
// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 8
128+
// CHECK-NEXT: [[VDMRP:%.*]] = alloca ptr, align 8
129+
// CHECK-NEXT: store ptr [[PTR:%.*]], ptr [[PTR_ADDR]], align 8
130+
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8
131+
// CHECK-NEXT: store ptr [[TMP0]], ptr [[VDMRP]], align 8
132+
// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VDMRP]], align 8
133+
// CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds <1024 x i1>, ptr [[TMP1]], i64 2
134+
// CHECK-NEXT: ret ptr [[ADD_PTR]]
135+
//
136+
const __dmr1024 *test_dmr_ret_const(int *ptr) {
137+
__dmr1024 *vdmrp = (__dmr1024 *)ptr;
138+
return vdmrp + 2;
139+
}
140+
141+
// CHECK-LABEL: @test_dmr_sizeof_alignof(
142+
// CHECK-NEXT: entry:
143+
// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 8
144+
// CHECK-NEXT: [[VDMRP:%.*]] = alloca ptr, align 8
145+
// CHECK-NEXT: [[VDMR:%.*]] = alloca <1024 x i1>, align 128
146+
// CHECK-NEXT: [[SIZET:%.*]] = alloca i32, align 4
147+
// CHECK-NEXT: [[ALIGNT:%.*]] = alloca i32, align 4
148+
// CHECK-NEXT: [[SIZEV:%.*]] = alloca i32, align 4
149+
// CHECK-NEXT: [[ALIGNV:%.*]] = alloca i32, align 4
150+
// CHECK-NEXT: store ptr [[PTR:%.*]], ptr [[PTR_ADDR]], align 8
151+
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8
152+
// CHECK-NEXT: store ptr [[TMP0]], ptr [[VDMRP]], align 8
153+
// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VDMRP]], align 8
154+
// CHECK-NEXT: [[TMP2:%.*]] = load <1024 x i1>, ptr [[TMP1]], align 128
155+
// CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[VDMR]], align 128
156+
// CHECK-NEXT: store i32 128, ptr [[SIZET]], align 4
157+
// CHECK-NEXT: store i32 128, ptr [[ALIGNT]], align 4
158+
// CHECK-NEXT: store i32 128, ptr [[SIZEV]], align 4
159+
// CHECK-NEXT: store i32 128, ptr [[ALIGNV]], align 4
160+
// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[SIZET]], align 4
161+
// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[ALIGNT]], align 4
162+
// CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP3]], [[TMP4]]
163+
// CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[SIZEV]], align 4
164+
// CHECK-NEXT: [[ADD1:%.*]] = add i32 [[ADD]], [[TMP5]]
165+
// CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[ALIGNV]], align 4
166+
// CHECK-NEXT: [[ADD2:%.*]] = add i32 [[ADD1]], [[TMP6]]
167+
// CHECK-NEXT: ret i32 [[ADD2]]
168+
//
169+
int test_dmr_sizeof_alignof(int *ptr) {
170+
__dmr1024 *vdmrp = (__dmr1024 *)ptr;
171+
__dmr1024 vdmr = *vdmrp;
172+
unsigned sizet = sizeof(__dmr1024);
173+
unsigned alignt = __alignof__(__dmr1024);
174+
unsigned sizev = sizeof(vdmr);
175+
unsigned alignv = __alignof__(vdmr);
176+
return sizet + alignt + sizev + alignv;
177+
}
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
// RUN: not %clang_cc1 -triple powerpc64le-unknown-linux-gnu -target-cpu future \
2+
// RUN: %s -emit-llvm-only 2>&1 | FileCheck %s
3+
4+
__attribute__((target("no-mma")))
5+
void test_mma(unsigned char *vdmrp, unsigned char *vpp, vector unsigned char vc) {
6+
__dmr1024 vdmr = *((__dmr1024 *)vdmrp);
7+
__vector_pair vp = *((__vector_pair *)vpp);
8+
__builtin_mma_dmxvi8gerx4(&vdmr, vp, vc);
9+
__builtin_mma_pmdmxvi8gerx4(&vdmr, vp, vc, 0, 0, 0);
10+
__builtin_mma_dmxvi8gerx4pp(&vdmr, vp, vc);
11+
__builtin_mma_pmdmxvi8gerx4pp(&vdmr, vp, vc, 0, 0, 0);
12+
__builtin_mma_dmxvi8gerx4spp(&vdmr, vp, vc);
13+
__builtin_mma_pmdmxvi8gerx4spp(&vdmr, vp, vc, 0, 0, 0);
14+
15+
// CHECK: error: '__builtin_mma_dmxvi8gerx4' needs target feature mma,paired-vector-memops
16+
// CHECK: error: '__builtin_mma_pmdmxvi8gerx4' needs target feature mma,paired-vector-memops
17+
// CHECK: error: '__builtin_mma_dmxvi8gerx4pp' needs target feature mma,paired-vector-memops
18+
// CHECK: error: '__builtin_mma_pmdmxvi8gerx4pp' needs target feature mma,paired-vector-memops
19+
// CHECK: error: '__builtin_mma_dmxvi8gerx4spp' needs target feature mma,paired-vector-memops
20+
// CHECK: error: '__builtin_mma_pmdmxvi8gerx4spp' needs target feature mma,paired-vector-memops
21+
}

0 commit comments

Comments
 (0)