diff options
author | Yilong Guo <yilong.guo@intel.com> | 2024-03-13 00:25:06 +0800 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-03-12 09:25:06 -0700 |
commit | a045f76eed24a67eb2664aadcb036e48fe2e23aa (patch) | |
tree | 0167bc8b5dbce539e7ac527f495390e994b67fb1 | |
parent | ee504ba861c8263afbec48147ec03df2ec089c5e (diff) | |
download | OpenCL-CTS-a045f76eed24a67eb2664aadcb036e48fe2e23aa.tar.gz |
[subgroups][non_uniform_broadcast] Fix broadcasting index generation (#1680)
* [subgroups][non_uniform_broadcast] Fix broadcasting index generation
The subgroup size may not be greater than `NR_OF_ACTIVE_WORK_ITEMS`.
Broadcasting index needs to be reduced in that case.
Otherwise, if subgroup size == `NR_OF_ACTIVE_WORK_ITEMS` == 4, then we
will encounter "divide-by-zero" error when evaluating `bcast_index %
(n - NR_OF_ACTIVE_WORK_ITEMS)`.
* Revert "[subgroups][non_uniform_broadcast] Fix broadcasting index generation"
This reverts commit 9bbab539de6ee30676e3a4af1249ba91d0b6c5ed.
* [subgroups][non_uniform_broadcast] Fix broadcasting index generation
Dynamically activate half of the work items in the current subgroup
instead of hardcoding as `NR_OF_ACTIVE_WORK_ITEMS`.
* Apply suggestion
-rw-r--r-- | test_conformance/subgroups/subgroup_common_templates.h | 32 | ||||
-rw-r--r-- | test_conformance/subgroups/subhelpers.h | 4 | ||||
-rw-r--r-- | test_conformance/subgroups/test_subgroup_ballot.cpp | 4 |
3 files changed, 14 insertions, 26 deletions
diff --git a/test_conformance/subgroups/subgroup_common_templates.h b/test_conformance/subgroups/subgroup_common_templates.h index 30f2a2a9..23f83713 100644 --- a/test_conformance/subgroups/subgroup_common_templates.h +++ b/test_conformance/subgroups/subgroup_common_templates.h @@ -29,7 +29,7 @@ // subgroup takes only one value from only one chosen (the smallest subgroup ID) // work_item // sub_group_non_uniform_broadcast - same as type 0 but -// only 4 work_items from subgroup enter the code (are active) +// only half of work_items from subgroup enter the code (are active) template <typename Ty, SubgroupsBroadcastOp operation> struct BC { static void log_test(const WorkGroupParams &test_params, @@ -78,24 +78,16 @@ template <typename Ty, SubgroupsBroadcastOp operation> struct BC int bcast_elseif = 0; int bcast_index = (int)(genrand_int32(gMTdata) & 0x7fffffff) % (d > n ? n : d); + int num_of_active_items = n >> 1; // l - calculate subgroup local id from which value will be // broadcasted (one the same value for whole subgroup) if (operation != SubgroupsBroadcastOp::broadcast) { - // reduce brodcasting index in case of non_uniform and - // last workgroup last subgroup - if (last_subgroup_size && j == nj - 1 - && last_subgroup_size < NR_OF_ACTIVE_WORK_ITEMS) - { - bcast_if = bcast_index % last_subgroup_size; - bcast_elseif = bcast_if; - } - else - { - bcast_if = bcast_index % NR_OF_ACTIVE_WORK_ITEMS; - bcast_elseif = NR_OF_ACTIVE_WORK_ITEMS - + bcast_index % (n - NR_OF_ACTIVE_WORK_ITEMS); - } + if (num_of_active_items != 0) + bcast_if = bcast_index % num_of_active_items; + if (num_of_active_items != n) + bcast_elseif = num_of_active_items + + bcast_index % (n - num_of_active_items); } for (i = 0; i < n; ++i) @@ -107,7 +99,7 @@ template <typename Ty, SubgroupsBroadcastOp operation> struct BC } else { - if (i < NR_OF_ACTIVE_WORK_ITEMS) + if (i < num_of_active_items) { // index of the third // element int the vector. @@ -182,15 +174,15 @@ template <typename Ty, SubgroupsBroadcastOp operation> struct BC } // Check result + int num_of_active_items = n >> 1; if (operation == SubgroupsBroadcastOp::broadcast_first) { int lowest_active_id = -1; for (i = 0; i < n; ++i) { - lowest_active_id = i < NR_OF_ACTIVE_WORK_ITEMS - ? 0 - : NR_OF_ACTIVE_WORK_ITEMS; + lowest_active_id = + i < num_of_active_items ? 0 : num_of_active_items; // findout if broadcasted // value is the same tr = mx[ii + lowest_active_id]; @@ -221,7 +213,7 @@ template <typename Ty, SubgroupsBroadcastOp operation> struct BC } else { - if (i < NR_OF_ACTIVE_WORK_ITEMS) + if (i < num_of_active_items) { // take index of array where info // which work_item will be // broadcast its value is stored diff --git a/test_conformance/subgroups/subhelpers.h b/test_conformance/subgroups/subhelpers.h index ed92e5d3..8600088e 100644 --- a/test_conformance/subgroups/subhelpers.h +++ b/test_conformance/subgroups/subhelpers.h @@ -28,8 +28,6 @@ #include <regex> #include <map> -#define NR_OF_ACTIVE_WORK_ITEMS 4 - extern MTdata gMTdata; typedef std::bitset<128> bs128; extern cl_half_rounding_mode g_rounding_mode; @@ -1474,8 +1472,6 @@ template <typename Ty, typename Fns, size_t TSIZE = 0> struct test Fns::log_test(test_params, ""); - kernel_sstr << "#define NR_OF_ACTIVE_WORK_ITEMS "; - kernel_sstr << NR_OF_ACTIVE_WORK_ITEMS << "\n"; // Make sure a test of type Ty is supported by the device if (!TypeManager<Ty>::type_supported(device)) { diff --git a/test_conformance/subgroups/test_subgroup_ballot.cpp b/test_conformance/subgroups/test_subgroup_ballot.cpp index 6795a411..0eb0c499 100644 --- a/test_conformance/subgroups/test_subgroup_ballot.cpp +++ b/test_conformance/subgroups/test_subgroup_ballot.cpp @@ -767,7 +767,7 @@ __kernel void test_sub_group_non_uniform_broadcast(const __global Type *in, __gl int gid = get_global_id(0); XY(xy,gid); Type x = in[gid]; - if (xy[gid].x < NR_OF_ACTIVE_WORK_ITEMS) { + if (xy[gid].x < (get_sub_group_size() >> 1)) { out[gid] = sub_group_non_uniform_broadcast(x, xy[gid].z); } else { out[gid] = sub_group_non_uniform_broadcast(x, xy[gid].w); @@ -779,7 +779,7 @@ __kernel void test_sub_group_broadcast_first(const __global Type *in, __global i int gid = get_global_id(0); XY(xy,gid); Type x = in[gid]; - if (xy[gid].x < NR_OF_ACTIVE_WORK_ITEMS) { + if (xy[gid].x < (get_sub_group_size() >> 1)) { out[gid] = sub_group_broadcast_first(x);; } else { out[gid] = sub_group_broadcast_first(x);; |