Skip to content

Commit b2a4a11

Browse files
committed
Add basic tests for non-uniform groups
Tests the ability to create an instance of each new group type, and the correctness of the core member functions. Signed-off-by: John Pennycook <john.pennycook@intel.com>
1 parent 01ecf06 commit b2a4a11

File tree

6 files changed

+283
-0
lines changed

6 files changed

+283
-0
lines changed
Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
2+
// RUN: %GPU_RUN_PLACEHOLDER %t.out
3+
//
4+
// UNSUPPORTED: cpu || cuda || hip
5+
6+
#include <sycl/sycl.hpp>
7+
#include <vector>
8+
namespace syclex = sycl::ext::oneapi::experimental;
9+
10+
class TestKernel;
11+
12+
int main() {
13+
sycl::queue Q;
14+
15+
auto SGSizes = Q.get_device().get_info<sycl::info::device::sub_group_sizes>();
16+
if (std::find(SGSizes.begin(), SGSizes.end(), 32) == SGSizes.end()) {
17+
std::cout << "Test skipped due to missing support for sub-group size 32."
18+
<< std::endl;
19+
return 0;
20+
}
21+
22+
sycl::buffer<bool, 1> MatchBuf{sycl::range{32}};
23+
sycl::buffer<bool, 1> LeaderBuf{sycl::range{32}};
24+
25+
const auto NDR = sycl::nd_range<1>{32, 32};
26+
Q.submit([&](sycl::handler &CGH) {
27+
sycl::accessor MatchAcc{MatchBuf, CGH, sycl::write_only};
28+
sycl::accessor LeaderAcc{LeaderBuf, CGH, sycl::write_only};
29+
const auto KernelFunc =
30+
[=](sycl::nd_item<1> item) [[sycl::reqd_sub_group_size(32)]] {
31+
auto WI = item.get_global_id();
32+
auto SG = item.get_sub_group();
33+
34+
// Split into odd and even work-items
35+
bool Predicate = item.get_global_id() % 2 == 0;
36+
auto BallotGroup = syclex::get_ballot_group(SG, Predicate);
37+
38+
// Check function return values match Predicate
39+
bool Match = true;
40+
auto GroupID = (Predicate) ? 1 : 0;
41+
Match &= (BallotGroup.get_group_id() == GroupID);
42+
Match &= (BallotGroup.get_local_id() == SG.get_local_id() / 2);
43+
Match &= (BallotGroup.get_group_range() == 2);
44+
Match &= (BallotGroup.get_local_range() == 16);
45+
MatchAcc[WI] = Match;
46+
LeaderAcc[WI] = BallotGroup.leader();
47+
};
48+
CGH.parallel_for<TestKernel>(NDR, KernelFunc);
49+
});
50+
51+
sycl::host_accessor MatchAcc{MatchBuf, sycl::read_only};
52+
sycl::host_accessor LeaderAcc{LeaderBuf, sycl::read_only};
53+
for (int WI = 0; WI < 32; ++WI) {
54+
assert(MatchAcc[WI] == true);
55+
assert(LeaderAcc[WI] == (WI < 2));
56+
}
57+
return 0;
58+
}
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
2+
// RUN: %GPU_RUN_PLACEHOLDER %t.out
3+
//
4+
// UNSUPPORTED: cpu || cuda || hip
5+
6+
#include <sycl/sycl.hpp>
7+
#include <vector>
8+
namespace syclex = sycl::ext::oneapi::experimental;
9+
10+
template <size_t ClusterSize> class TestKernel;
11+
12+
template <size_t ClusterSize> void test() {
13+
sycl::queue Q;
14+
15+
auto SGSizes = Q.get_device().get_info<sycl::info::device::sub_group_sizes>();
16+
if (std::find(SGSizes.begin(), SGSizes.end(), 32) == SGSizes.end()) {
17+
std::cout << "Test skipped due to missing support for sub-group size 32."
18+
<< std::endl;
19+
}
20+
21+
sycl::buffer<bool, 1> MatchBuf{sycl::range{32}};
22+
sycl::buffer<bool, 1> LeaderBuf{sycl::range{32}};
23+
24+
const auto NDR = sycl::nd_range<1>{32, 32};
25+
Q.submit([&](sycl::handler &CGH) {
26+
sycl::accessor MatchAcc{MatchBuf, CGH, sycl::write_only};
27+
sycl::accessor LeaderAcc{LeaderBuf, CGH, sycl::write_only};
28+
const auto KernelFunc =
29+
[=](sycl::nd_item<1> item) [[sycl::reqd_sub_group_size(32)]] {
30+
auto WI = item.get_global_id();
31+
auto SG = item.get_sub_group();
32+
33+
auto ClusterGroup = syclex::get_cluster_group<ClusterSize>(SG);
34+
35+
bool Match = true;
36+
Match &= (ClusterGroup.get_group_id() == (WI / ClusterSize));
37+
Match &= (ClusterGroup.get_local_id() == (WI % ClusterSize));
38+
Match &= (ClusterGroup.get_group_range() == (32 / ClusterSize));
39+
Match &= (ClusterGroup.get_local_range() == ClusterSize);
40+
MatchAcc[WI] = Match;
41+
LeaderAcc[WI] = ClusterGroup.leader();
42+
};
43+
CGH.parallel_for<TestKernel<ClusterSize>>(NDR, KernelFunc);
44+
});
45+
46+
sycl::host_accessor MatchAcc{MatchBuf, sycl::read_only};
47+
sycl::host_accessor LeaderAcc{LeaderBuf, sycl::read_only};
48+
for (int WI = 0; WI < 32; ++WI) {
49+
assert(MatchAcc[WI] == true);
50+
assert(LeaderAcc[WI] == ((WI % ClusterSize) == 0));
51+
}
52+
}
53+
54+
int main() {
55+
test<1>();
56+
test<2>();
57+
test<4>();
58+
test<8>();
59+
test<16>();
60+
test<32>();
61+
return 0;
62+
}
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
// RUN: %clangxx -fsycl -fsyntax-only -fsycl-targets=%sycl_triple %s -o %t.out
2+
3+
#include <sycl/sycl.hpp>
4+
namespace syclex = sycl::ext::oneapi::experimental;
5+
6+
#ifdef SYCL_EXT_ONEAPI_ROOT_GROUP
7+
static_assert(syclex::is_fixed_topology_group_v<syclex::root_group>);
8+
#endif
9+
static_assert(syclex::is_fixed_topology_group_v<sycl::group<1>>);
10+
static_assert(syclex::is_fixed_topology_group_v<sycl::group<2>>);
11+
static_assert(syclex::is_fixed_topology_group_v<sycl::group<3>>);
12+
static_assert(syclex::is_fixed_topology_group_v<sycl::sub_group>);
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
// RUN: %clangxx -fsycl -fsyntax-only -fsycl-targets=%sycl_triple %s -o %t.out
2+
3+
#include <sycl/sycl.hpp>
4+
namespace syclex = sycl::ext::oneapi::experimental;
5+
6+
static_assert(
7+
syclex::is_user_constructed_group_v<syclex::ballot_group<sycl::sub_group>>);
8+
static_assert(syclex::is_user_constructed_group_v<
9+
syclex::cluster_group<1, sycl::sub_group>>);
10+
static_assert(syclex::is_user_constructed_group_v<
11+
syclex::cluster_group<2, sycl::sub_group>>);
12+
static_assert(
13+
syclex::is_user_constructed_group_v<syclex::tangle_group<sycl::sub_group>>);
14+
static_assert(syclex::is_user_constructed_group_v<syclex::opportunistic_group>);
Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
2+
// RUN: %GPU_RUN_PLACEHOLDER %t.out
3+
//
4+
// UNSUPPORTED: cpu || cuda || hip
5+
6+
#include <sycl/sycl.hpp>
7+
#include <vector>
8+
namespace syclex = sycl::ext::oneapi::experimental;
9+
10+
class TestKernel;
11+
12+
int main() {
13+
sycl::queue Q;
14+
15+
auto SGSizes = Q.get_device().get_info<sycl::info::device::sub_group_sizes>();
16+
if (std::find(SGSizes.begin(), SGSizes.end(), 32) == SGSizes.end()) {
17+
std::cout << "Test skipped due to missing support for sub-group size 32."
18+
<< std::endl;
19+
return 0;
20+
}
21+
22+
sycl::buffer<bool, 1> MatchBuf{sycl::range{32}};
23+
sycl::buffer<bool, 1> LeaderBuf{sycl::range{32}};
24+
25+
const auto NDR = sycl::nd_range<1>{32, 32};
26+
Q.submit([&](sycl::handler &CGH) {
27+
sycl::accessor MatchAcc{MatchBuf, CGH, sycl::write_only};
28+
sycl::accessor LeaderAcc{LeaderBuf, CGH, sycl::write_only};
29+
const auto KernelFunc =
30+
[=](sycl::nd_item<1> item) [[sycl::reqd_sub_group_size(32)]] {
31+
auto WI = item.get_global_id();
32+
auto SG = item.get_sub_group();
33+
34+
// Due to the unpredictable runtime behavior of opportunistic groups,
35+
// some values may change from run to run. Check they're in expected
36+
// ranges and consistent with other groups.
37+
if (item.get_global_id() % 2 == 0) {
38+
auto OpportunisticGroup =
39+
syclex::this_kernel::get_opportunistic_group();
40+
41+
bool Match = true;
42+
Match &= (OpportunisticGroup.get_group_id() == 0);
43+
Match &= (OpportunisticGroup.get_local_id() <
44+
OpportunisticGroup.get_local_range());
45+
Match &= (OpportunisticGroup.get_group_range() == 1);
46+
Match &= (OpportunisticGroup.get_local_linear_range() <=
47+
SG.get_local_linear_range());
48+
MatchAcc[WI] = Match;
49+
LeaderAcc[WI] = OpportunisticGroup.leader();
50+
}
51+
};
52+
CGH.parallel_for<TestKernel>(NDR, KernelFunc);
53+
});
54+
55+
sycl::host_accessor MatchAcc{MatchBuf, sycl::read_only};
56+
sycl::host_accessor LeaderAcc{LeaderBuf, sycl::read_only};
57+
uint32_t NumLeaders = 0;
58+
for (int WI = 0; WI < 32; ++WI) {
59+
if (WI % 2 == 0) {
60+
assert(MatchAcc[WI] == true);
61+
if (LeaderAcc[WI]) {
62+
NumLeaders++;
63+
}
64+
}
65+
}
66+
assert(NumLeaders > 0);
67+
return 0;
68+
}
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
// RUN: %clangxx -fsycl -fno-sycl-early-optimizations -fsycl-targets=%sycl_triple %s -o %t.out
2+
// RUN: %GPU_RUN_PLACEHOLDER %t.out
3+
//
4+
// UNSUPPORTED: cpu || cuda || hip
5+
6+
#include <sycl/sycl.hpp>
7+
#include <vector>
8+
namespace syclex = sycl::ext::oneapi::experimental;
9+
10+
class TestKernel;
11+
12+
int main() {
13+
sycl::queue Q;
14+
15+
auto SGSizes = Q.get_device().get_info<sycl::info::device::sub_group_sizes>();
16+
if (std::find(SGSizes.begin(), SGSizes.end(), 32) == SGSizes.end()) {
17+
std::cout << "Test skipped due to missing support for sub-group size 32."
18+
<< std::endl;
19+
return 0;
20+
}
21+
22+
sycl::buffer<bool, 1> MatchBuf{sycl::range{32}};
23+
sycl::buffer<bool, 1> LeaderBuf{sycl::range{32}};
24+
25+
const auto NDR = sycl::nd_range<1>{32, 32};
26+
Q.submit([&](sycl::handler &CGH) {
27+
sycl::accessor MatchAcc{MatchBuf, CGH, sycl::write_only};
28+
sycl::accessor LeaderAcc{LeaderBuf, CGH, sycl::write_only};
29+
const auto KernelFunc =
30+
[=](sycl::nd_item<1> item) [[sycl::reqd_sub_group_size(32)]] {
31+
auto WI = item.get_global_id();
32+
auto SG = item.get_sub_group();
33+
34+
// Split into odd and even work-items via control flow
35+
// Branches deliberately duplicated to test impact of optimizations
36+
// This only reliably works with optimizations disabled right now
37+
if (item.get_global_id() % 2 == 0) {
38+
auto TangleGroup = syclex::get_tangle_group(SG);
39+
40+
bool Match = true;
41+
Match &= (TangleGroup.get_group_id() == 0);
42+
Match &= (TangleGroup.get_local_id() == SG.get_local_id() / 2);
43+
Match &= (TangleGroup.get_group_range() == 1);
44+
Match &= (TangleGroup.get_local_range() == 16);
45+
MatchAcc[WI] = Match;
46+
LeaderAcc[WI] = TangleGroup.leader();
47+
} else {
48+
auto TangleGroup = syclex::get_tangle_group(SG);
49+
50+
bool Match = true;
51+
Match &= (TangleGroup.get_group_id() == 0);
52+
Match &= (TangleGroup.get_local_id() == SG.get_local_id() / 2);
53+
Match &= (TangleGroup.get_group_range() == 1);
54+
Match &= (TangleGroup.get_local_range() == 16);
55+
MatchAcc[WI] = Match;
56+
LeaderAcc[WI] = TangleGroup.leader();
57+
}
58+
};
59+
CGH.parallel_for<TestKernel>(NDR, KernelFunc);
60+
});
61+
62+
sycl::host_accessor MatchAcc{MatchBuf, sycl::read_only};
63+
sycl::host_accessor LeaderAcc{LeaderBuf, sycl::read_only};
64+
for (int WI = 0; WI < 32; ++WI) {
65+
assert(MatchAcc[WI] == true);
66+
assert(LeaderAcc[WI] == (WI < 2));
67+
}
68+
return 0;
69+
}

0 commit comments

Comments
 (0)