Skip to content

Commit 68ab5bc

Browse files
committed
Add tests for ballot_group algorithms
This commit adds tests for using ballot_group and the following algorithms: - group_barrier - group_broadcast - any_of_group - all_of_group - none_of_group - reduce_over_group - exclusive_scan_over_group - inclusive_scan_over_group Signed-off-by: John Pennycook <john.pennycook@intel.com>
1 parent b2a4a11 commit 68ab5bc

File tree

1 file changed

+131
-0
lines changed

1 file changed

+131
-0
lines changed
Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,131 @@
1+
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
2+
// RUN: %GPU_RUN_PLACEHOLDER %t.out
3+
//
4+
// UNSUPPORTED: cpu || cuda || hip
5+
6+
#include <sycl/sycl.hpp>
7+
#include <vector>
8+
namespace syclex = sycl::ext::oneapi::experimental;
9+
10+
class TestKernel;
11+
12+
int main() {
13+
sycl::queue Q;
14+
15+
auto SGSizes = Q.get_device().get_info<sycl::info::device::sub_group_sizes>();
16+
if (std::find(SGSizes.begin(), SGSizes.end(), 32) == SGSizes.end()) {
17+
std::cout << "Test skipped due to missing support for sub-group size 32."
18+
<< std::endl;
19+
return 0;
20+
}
21+
22+
sycl::buffer<size_t, 1> TmpBuf{sycl::range{32}};
23+
sycl::buffer<bool, 1> BarrierBuf{sycl::range{32}};
24+
sycl::buffer<bool, 1> BroadcastBuf{sycl::range{32}};
25+
sycl::buffer<bool, 1> AnyBuf{sycl::range{32}};
26+
sycl::buffer<bool, 1> AllBuf{sycl::range{32}};
27+
sycl::buffer<bool, 1> NoneBuf{sycl::range{32}};
28+
sycl::buffer<bool, 1> ReduceBuf{sycl::range{32}};
29+
sycl::buffer<bool, 1> ExScanBuf{sycl::range{32}};
30+
sycl::buffer<bool, 1> IncScanBuf{sycl::range{32}};
31+
32+
const auto NDR = sycl::nd_range<1>{32, 32};
33+
Q.submit([&](sycl::handler &CGH) {
34+
sycl::accessor TmpAcc{TmpBuf, CGH, sycl::write_only};
35+
sycl::accessor BarrierAcc{BarrierBuf, CGH, sycl::write_only};
36+
sycl::accessor BroadcastAcc{BroadcastBuf, CGH, sycl::write_only};
37+
sycl::accessor AnyAcc{AnyBuf, CGH, sycl::write_only};
38+
sycl::accessor AllAcc{AllBuf, CGH, sycl::write_only};
39+
sycl::accessor NoneAcc{NoneBuf, CGH, sycl::write_only};
40+
sycl::accessor ReduceAcc{ReduceBuf, CGH, sycl::write_only};
41+
sycl::accessor ExScanAcc{ExScanBuf, CGH, sycl::write_only};
42+
sycl::accessor IncScanAcc{IncScanBuf, CGH, sycl::write_only};
43+
const auto KernelFunc =
44+
[=](sycl::nd_item<1> item) [[sycl::reqd_sub_group_size(32)]] {
45+
auto WI = item.get_global_id();
46+
auto SG = item.get_sub_group();
47+
48+
// Split into odd and even work-items
49+
bool Predicate = WI % 2 == 0;
50+
auto BallotGroup = syclex::get_ballot_group(SG, Predicate);
51+
52+
// Check all other members' writes are visible after a barrier
53+
TmpAcc[WI] = 1;
54+
sycl::group_barrier(BallotGroup);
55+
size_t Visible = 0;
56+
for (size_t Other = 0; Other < 32; ++Other) {
57+
if (WI % 2 == Other % 2) {
58+
Visible += TmpAcc[Other];
59+
}
60+
}
61+
BarrierAcc[WI] = Visible;
62+
63+
// Simple check of group algorithms
64+
uint32_t OriginalLID = SG.get_local_linear_id();
65+
uint32_t LID = BallotGroup.get_local_linear_id();
66+
67+
uint32_t BroadcastResult =
68+
sycl::group_broadcast(BallotGroup, OriginalLID, 0);
69+
if (Predicate) {
70+
BroadcastAcc[WI] = (BroadcastResult == 0);
71+
} else {
72+
BroadcastAcc[WI] = (BroadcastResult == 1);
73+
}
74+
75+
bool AnyResult = sycl::any_of_group(BallotGroup, Predicate);
76+
if (Predicate) {
77+
AnyAcc[WI] = (AnyResult == true);
78+
} else {
79+
AnyAcc[WI] = (AnyResult == false);
80+
}
81+
82+
bool AllResult = sycl::all_of_group(BallotGroup, Predicate);
83+
if (Predicate) {
84+
AllAcc[WI] = (AllResult == true);
85+
} else {
86+
AllAcc[WI] = (AllResult == false);
87+
}
88+
89+
bool NoneResult = sycl::none_of_group(BallotGroup, Predicate);
90+
if (Predicate) {
91+
NoneAcc[WI] = (NoneResult == false);
92+
} else {
93+
NoneAcc[WI] = (NoneResult == true);
94+
}
95+
96+
uint32_t ReduceResult =
97+
sycl::reduce_over_group(BallotGroup, 1, sycl::plus<>());
98+
ReduceAcc[WI] =
99+
(ReduceResult == BallotGroup.get_local_linear_range());
100+
101+
uint32_t ExScanResult =
102+
sycl::exclusive_scan_over_group(BallotGroup, 1, sycl::plus<>());
103+
ExScanAcc[WI] = (ExScanResult == LID);
104+
105+
uint32_t IncScanResult =
106+
sycl::inclusive_scan_over_group(BallotGroup, 1, sycl::plus<>());
107+
IncScanAcc[WI] = (IncScanResult == LID + 1);
108+
};
109+
CGH.parallel_for<TestKernel>(NDR, KernelFunc);
110+
});
111+
112+
sycl::host_accessor BarrierAcc{BarrierBuf, sycl::read_only};
113+
sycl::host_accessor BroadcastAcc{BroadcastBuf, sycl::read_only};
114+
sycl::host_accessor AnyAcc{AnyBuf, sycl::read_only};
115+
sycl::host_accessor AllAcc{AllBuf, sycl::read_only};
116+
sycl::host_accessor NoneAcc{NoneBuf, sycl::read_only};
117+
sycl::host_accessor ReduceAcc{ReduceBuf, sycl::read_only};
118+
sycl::host_accessor ExScanAcc{ExScanBuf, sycl::read_only};
119+
sycl::host_accessor IncScanAcc{IncScanBuf, sycl::read_only};
120+
for (int WI = 0; WI < 32; ++WI) {
121+
assert(BarrierAcc[WI] == true);
122+
assert(BroadcastAcc[WI] == true);
123+
assert(AnyAcc[WI] == true);
124+
assert(AllAcc[WI] == true);
125+
assert(NoneAcc[WI] == true);
126+
assert(ReduceAcc[WI] == true);
127+
assert(ExScanAcc[WI] == true);
128+
assert(IncScanAcc[WI] == true);
129+
}
130+
return 0;
131+
}

0 commit comments

Comments
 (0)