aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYilong Guo <yilong.guo@intel.com>2024-03-13 00:25:06 +0800
committerGitHub <noreply@github.com>2024-03-12 09:25:06 -0700
commita045f76eed24a67eb2664aadcb036e48fe2e23aa (patch)
tree0167bc8b5dbce539e7ac527f495390e994b67fb1
parentee504ba861c8263afbec48147ec03df2ec089c5e (diff)
downloadOpenCL-CTS-a045f76eed24a67eb2664aadcb036e48fe2e23aa.tar.gz
[subgroups][non_uniform_broadcast] Fix broadcasting index generation (#1680)
* [subgroups][non_uniform_broadcast] Fix broadcasting index generation The subgroup size may not be greater than `NR_OF_ACTIVE_WORK_ITEMS`. Broadcasting index needs to be reduced in that case. Otherwise, if subgroup size == `NR_OF_ACTIVE_WORK_ITEMS` == 4, then we will encounter "divide-by-zero" error when evaluating `bcast_index % (n - NR_OF_ACTIVE_WORK_ITEMS)`. * Revert "[subgroups][non_uniform_broadcast] Fix broadcasting index generation" This reverts commit 9bbab539de6ee30676e3a4af1249ba91d0b6c5ed. * [subgroups][non_uniform_broadcast] Fix broadcasting index generation Dynamically activate half of the work items in the current subgroup instead of hardcoding as `NR_OF_ACTIVE_WORK_ITEMS`. * Apply suggestion
-rw-r--r--test_conformance/subgroups/subgroup_common_templates.h32
-rw-r--r--test_conformance/subgroups/subhelpers.h4
-rw-r--r--test_conformance/subgroups/test_subgroup_ballot.cpp4
3 files changed, 14 insertions, 26 deletions
diff --git a/test_conformance/subgroups/subgroup_common_templates.h b/test_conformance/subgroups/subgroup_common_templates.h
index 30f2a2a9..23f83713 100644
--- a/test_conformance/subgroups/subgroup_common_templates.h
+++ b/test_conformance/subgroups/subgroup_common_templates.h
@@ -29,7 +29,7 @@
// subgroup takes only one value from only one chosen (the smallest subgroup ID)
// work_item
// sub_group_non_uniform_broadcast - same as type 0 but
-// only 4 work_items from subgroup enter the code (are active)
+// only half of work_items from subgroup enter the code (are active)
template <typename Ty, SubgroupsBroadcastOp operation> struct BC
{
static void log_test(const WorkGroupParams &test_params,
@@ -78,24 +78,16 @@ template <typename Ty, SubgroupsBroadcastOp operation> struct BC
int bcast_elseif = 0;
int bcast_index = (int)(genrand_int32(gMTdata) & 0x7fffffff)
% (d > n ? n : d);
+ int num_of_active_items = n >> 1;
// l - calculate subgroup local id from which value will be
// broadcasted (one the same value for whole subgroup)
if (operation != SubgroupsBroadcastOp::broadcast)
{
- // reduce brodcasting index in case of non_uniform and
- // last workgroup last subgroup
- if (last_subgroup_size && j == nj - 1
- && last_subgroup_size < NR_OF_ACTIVE_WORK_ITEMS)
- {
- bcast_if = bcast_index % last_subgroup_size;
- bcast_elseif = bcast_if;
- }
- else
- {
- bcast_if = bcast_index % NR_OF_ACTIVE_WORK_ITEMS;
- bcast_elseif = NR_OF_ACTIVE_WORK_ITEMS
- + bcast_index % (n - NR_OF_ACTIVE_WORK_ITEMS);
- }
+ if (num_of_active_items != 0)
+ bcast_if = bcast_index % num_of_active_items;
+ if (num_of_active_items != n)
+ bcast_elseif = num_of_active_items
+ + bcast_index % (n - num_of_active_items);
}
for (i = 0; i < n; ++i)
@@ -107,7 +99,7 @@ template <typename Ty, SubgroupsBroadcastOp operation> struct BC
}
else
{
- if (i < NR_OF_ACTIVE_WORK_ITEMS)
+ if (i < num_of_active_items)
{
// index of the third
// element int the vector.
@@ -182,15 +174,15 @@ template <typename Ty, SubgroupsBroadcastOp operation> struct BC
}
// Check result
+ int num_of_active_items = n >> 1;
if (operation == SubgroupsBroadcastOp::broadcast_first)
{
int lowest_active_id = -1;
for (i = 0; i < n; ++i)
{
- lowest_active_id = i < NR_OF_ACTIVE_WORK_ITEMS
- ? 0
- : NR_OF_ACTIVE_WORK_ITEMS;
+ lowest_active_id =
+ i < num_of_active_items ? 0 : num_of_active_items;
// findout if broadcasted
// value is the same
tr = mx[ii + lowest_active_id];
@@ -221,7 +213,7 @@ template <typename Ty, SubgroupsBroadcastOp operation> struct BC
}
else
{
- if (i < NR_OF_ACTIVE_WORK_ITEMS)
+ if (i < num_of_active_items)
{ // take index of array where info
// which work_item will be
// broadcast its value is stored
diff --git a/test_conformance/subgroups/subhelpers.h b/test_conformance/subgroups/subhelpers.h
index ed92e5d3..8600088e 100644
--- a/test_conformance/subgroups/subhelpers.h
+++ b/test_conformance/subgroups/subhelpers.h
@@ -28,8 +28,6 @@
#include <regex>
#include <map>
-#define NR_OF_ACTIVE_WORK_ITEMS 4
-
extern MTdata gMTdata;
typedef std::bitset<128> bs128;
extern cl_half_rounding_mode g_rounding_mode;
@@ -1474,8 +1472,6 @@ template <typename Ty, typename Fns, size_t TSIZE = 0> struct test
Fns::log_test(test_params, "");
- kernel_sstr << "#define NR_OF_ACTIVE_WORK_ITEMS ";
- kernel_sstr << NR_OF_ACTIVE_WORK_ITEMS << "\n";
// Make sure a test of type Ty is supported by the device
if (!TypeManager<Ty>::type_supported(device))
{
diff --git a/test_conformance/subgroups/test_subgroup_ballot.cpp b/test_conformance/subgroups/test_subgroup_ballot.cpp
index 6795a411..0eb0c499 100644
--- a/test_conformance/subgroups/test_subgroup_ballot.cpp
+++ b/test_conformance/subgroups/test_subgroup_ballot.cpp
@@ -767,7 +767,7 @@ __kernel void test_sub_group_non_uniform_broadcast(const __global Type *in, __gl
int gid = get_global_id(0);
XY(xy,gid);
Type x = in[gid];
- if (xy[gid].x < NR_OF_ACTIVE_WORK_ITEMS) {
+ if (xy[gid].x < (get_sub_group_size() >> 1)) {
out[gid] = sub_group_non_uniform_broadcast(x, xy[gid].z);
} else {
out[gid] = sub_group_non_uniform_broadcast(x, xy[gid].w);
@@ -779,7 +779,7 @@ __kernel void test_sub_group_broadcast_first(const __global Type *in, __global i
int gid = get_global_id(0);
XY(xy,gid);
Type x = in[gid];
- if (xy[gid].x < NR_OF_ACTIVE_WORK_ITEMS) {
+ if (xy[gid].x < (get_sub_group_size() >> 1)) {
out[gid] = sub_group_broadcast_first(x);;
} else {
out[gid] = sub_group_broadcast_first(x);;