aboutsummaryrefslogtreecommitdiff
path: root/test_conformance/basic/test_async_copy2D.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'test_conformance/basic/test_async_copy2D.cpp')
-rw-r--r--test_conformance/basic/test_async_copy2D.cpp182
1 files changed, 81 insertions, 101 deletions
diff --git a/test_conformance/basic/test_async_copy2D.cpp b/test_conformance/basic/test_async_copy2D.cpp
index bf3f1552..11ef84bd 100644
--- a/test_conformance/basic/test_async_copy2D.cpp
+++ b/test_conformance/basic/test_async_copy2D.cpp
@@ -27,17 +27,25 @@
static const char *async_global_to_local_kernel2D = R"OpenCLC(
#pragma OPENCL EXTENSION cl_khr_extended_async_copies : enable
-%s // optional pragma string
-__kernel void test_fn(const __global %s *src, __global %s *dst,
- __local %s *localBuffer, int numElementsPerLine,
+#define STRUCT_SIZE %d
+typedef struct __attribute__((packed))
+{
+ uchar byte[STRUCT_SIZE];
+} VarSizeStruct __attribute__((aligned(1)));
+
+
+__kernel void test_fn(const __global VarSizeStruct *src, __global VarSizeStruct *dst,
+ __local VarSizeStruct *localBuffer, int numElementsPerLine,
int lineCopiesPerWorkgroup, int lineCopiesPerWorkItem,
int srcStride, int dstStride) {
// Zero the local storage first
for (int i = 0; i < lineCopiesPerWorkItem; i++) {
for (int j = 0; j < numElementsPerLine; j++) {
const int index = (get_local_id(0) * lineCopiesPerWorkItem + i) * dstStride + j;
- localBuffer[index] = (%s)(%s)0;
+ for (int k = 0; k < STRUCT_SIZE; k++) {
+ localBuffer[index].byte[k] = 0;
+ }
}
}
@@ -45,7 +53,7 @@ __kernel void test_fn(const __global %s *src, __global %s *dst,
// try the copy
barrier( CLK_LOCAL_MEM_FENCE );
event_t event = async_work_group_copy_2D2D(localBuffer, 0, src,
- lineCopiesPerWorkgroup * get_group_id(0) * srcStride, sizeof(%s),
+ lineCopiesPerWorkgroup * get_group_id(0) * srcStride, sizeof(VarSizeStruct),
(size_t)numElementsPerLine, (size_t)lineCopiesPerWorkgroup, srcStride, dstStride, 0);
// Wait for the copy to complete, then verify by manually copying to the dest
@@ -63,16 +71,24 @@ __kernel void test_fn(const __global %s *src, __global %s *dst,
static const char *async_local_to_global_kernel2D = R"OpenCLC(
#pragma OPENCL EXTENSION cl_khr_extended_async_copies : enable
-%s // optional pragma string
-__kernel void test_fn(const __global %s *src, __global %s *dst, __local %s *localBuffer,
+#define STRUCT_SIZE %d
+typedef struct __attribute__((packed))
+{
+ uchar byte[STRUCT_SIZE];
+} VarSizeStruct __attribute__((aligned(1)));
+
+
+__kernel void test_fn(const __global VarSizeStruct *src, __global VarSizeStruct *dst, __local VarSizeStruct *localBuffer,
int numElementsPerLine, int lineCopiesPerWorkgroup,
int lineCopiesPerWorkItem, int srcStride, int dstStride) {
// Zero the local storage first
for (int i = 0; i < lineCopiesPerWorkItem; i++) {
for (int j = 0; j < numElementsPerLine; j++) {
const int index = (get_local_id(0) * lineCopiesPerWorkItem + i) * srcStride + j;
- localBuffer[index] = (%s)(%s)0;
+ for (int k = 0; k < STRUCT_SIZE; k++) {
+ localBuffer[index].byte[k] = 0;
+ }
}
}
@@ -90,36 +106,22 @@ __kernel void test_fn(const __global %s *src, __global %s *dst, __local %s *loca
// Do this to verify all kernels are done copying to the local buffer before we try the copy
barrier(CLK_LOCAL_MEM_FENCE);
event_t event = async_work_group_copy_2D2D(dst, lineCopiesPerWorkgroup * get_group_id(0) * dstStride,
- localBuffer, 0, sizeof(%s), (size_t)numElementsPerLine, (size_t)lineCopiesPerWorkgroup, srcStride,
+ localBuffer, 0, sizeof(VarSizeStruct), (size_t)numElementsPerLine, (size_t)lineCopiesPerWorkgroup, srcStride,
dstStride, 0 );
wait_group_events(1, &event);
};
)OpenCLC";
-int test_copy2D(cl_device_id deviceID, cl_context context,
- cl_command_queue queue, const char *kernelCode,
- ExplicitType vecType, int vecSize, int srcMargin, int dstMargin,
- bool localIsDst)
+int test_copy2D(const cl_device_id deviceID, const cl_context context,
+ const cl_command_queue queue, const char *const kernelCode,
+ const size_t elementSize, const int srcMargin,
+ const int dstMargin, const bool localIsDst)
{
int error;
- clProgramWrapper program;
- clKernelWrapper kernel;
- clMemWrapper streams[2];
- size_t threads[1], localThreads[1];
- void *inBuffer, *outBuffer, *outBufferCopy;
- MTdata d;
- char vecNameString[64];
- vecNameString[0] = 0;
- if (vecSize == 1)
- sprintf(vecNameString, "%s", get_explicit_type_name(vecType));
- else
- sprintf(vecNameString, "%s%d", get_explicit_type_name(vecType),
- vecSize);
- size_t elementSize = get_explicit_type_size(vecType) * vecSize;
- log_info("Testing %s with srcMargin = %d, dstMargin = %d\n", vecNameString,
- srcMargin, dstMargin);
+ log_info("Testing %d byte element with srcMargin = %d, dstMargin = %d\n",
+ elementSize, srcMargin, dstMargin);
cl_long max_local_mem_size;
error =
@@ -139,6 +141,13 @@ int test_copy2D(cl_device_id deviceID, cl_context context,
test_error(error,
"clGetDeviceInfo for CL_DEVICE_MAX_MEM_ALLOC_SIZE failed.");
+ cl_long max_work_group_size;
+ error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_WORK_GROUP_SIZE,
+ sizeof(max_work_group_size), &max_work_group_size,
+ NULL);
+ test_error(error,
+ "clGetDeviceInfo for CL_DEVICE_MAX_WORK_GROUP_SIZE failed.");
+
if (max_alloc_size > max_global_mem_size / 2)
max_alloc_size = max_global_mem_size / 2;
@@ -149,20 +158,17 @@ int test_copy2D(cl_device_id deviceID, cl_context context,
test_error(error,
"clGetDeviceInfo for CL_DEVICE_MAX_COMPUTE_UNITS failed.");
- char programSource[4096];
- programSource[0] = 0;
- char *programPtr;
+ char programSource[4096] = { 0 };
+ const char *programPtr = programSource;
- sprintf(programSource, kernelCode,
- vecType == kDouble ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable"
- : "",
- vecNameString, vecNameString, vecNameString, vecNameString,
- get_explicit_type_name(vecType), vecNameString);
+ sprintf(programSource, kernelCode, elementSize);
// log_info("program: %s\n", programSource);
- programPtr = programSource;
+
+ clProgramWrapper program;
+ clKernelWrapper kernel;
error = create_single_kernel_helper(context, &program, &kernel, 1,
- (const char **)&programPtr, "test_fn");
+ &programPtr, "test_fn");
test_error(error, "Unable to create testing kernel");
size_t max_workgroup_size;
@@ -188,9 +194,6 @@ int test_copy2D(cl_device_id deviceID, cl_context context,
const cl_int dstStride = numElementsPerLine + dstMargin;
const cl_int srcStride = numElementsPerLine + srcMargin;
- elementSize =
- get_explicit_type_size(vecType) * ((vecSize == 3) ? 4 : vecSize);
-
const size_t lineCopiesPerWorkItem = 13;
const size_t localStorageSpacePerWorkitem = lineCopiesPerWorkItem
* elementSize * (localIsDst ? dstStride : srcStride);
@@ -208,7 +211,6 @@ int test_copy2D(cl_device_id deviceID, cl_context context,
if (maxLocalWorkgroupSize > max_workgroup_size)
localWorkgroupSize = max_workgroup_size;
-
const size_t maxTotalLinesIn =
(max_alloc_size / elementSize + srcMargin) / srcStride;
const size_t maxTotalLinesOut =
@@ -231,9 +233,17 @@ int test_copy2D(cl_device_id deviceID, cl_context context,
const size_t globalWorkgroupSize =
numberOfLocalWorkgroups * localWorkgroupSize;
- inBuffer = (void *)malloc(inBufferSize);
- outBuffer = (void *)malloc(outBufferSize);
- outBufferCopy = (void *)malloc(outBufferSize);
+ if ((localBufferSize / 4) > max_work_group_size)
+ {
+ log_info("Skipping due to resource requirements local:%db "
+ "max_work_group_size:%d\n",
+ localBufferSize, max_work_group_size);
+ return 0;
+ }
+
+ void *const inBuffer = (void *)malloc(inBufferSize);
+ void *const outBuffer = (void *)malloc(outBufferSize);
+ void *const outBufferCopy = (void *)malloc(outBufferSize);
const cl_int lineCopiesPerWorkItemInt =
static_cast<cl_int>(lineCopiesPerWorkItem);
@@ -250,18 +260,20 @@ int test_copy2D(cl_device_id deviceID, cl_context context,
(int)inBufferSize, (int)outBufferSize, lineCopiesPerWorkgroup,
lineCopiesPerWorkItemInt);
+ size_t threads[1], localThreads[1];
+
threads[0] = globalWorkgroupSize;
localThreads[0] = localWorkgroupSize;
- d = init_genrand(gRandomSeed);
- generate_random_data(
- vecType, inBufferSize / get_explicit_type_size(vecType), d, inBuffer);
- generate_random_data(
- vecType, outBufferSize / get_explicit_type_size(vecType), d, outBuffer);
+ MTdata d = init_genrand(gRandomSeed);
+ generate_random_data(kChar, inBufferSize, d, inBuffer);
+ generate_random_data(kChar, outBufferSize, d, outBuffer);
free_mtdata(d);
d = NULL;
memcpy(outBufferCopy, outBuffer, outBufferSize);
+ clMemWrapper streams[2];
+
streams[0] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, inBufferSize,
inBuffer, &error);
test_error(error, "Unable to create input buffer");
@@ -301,8 +313,7 @@ int test_copy2D(cl_device_id deviceID, cl_context context,
// Verify
int failuresPrinted = 0;
- // Verify
- size_t typeSize = get_explicit_type_size(vecType) * vecSize;
+
for (int i = 0;
i < (int)globalWorkgroupSize * lineCopiesPerWorkItem * elementSize;
i += elementSize)
@@ -313,13 +324,12 @@ int test_copy2D(cl_device_id deviceID, cl_context context,
int inIdx = i * srcStride + j;
int outIdx = i * dstStride + j;
if (memcmp(((char *)inBuffer) + inIdx, ((char *)outBuffer) + outIdx,
- typeSize)
+ elementSize)
!= 0)
{
unsigned char *inchar = (unsigned char *)inBuffer + inIdx;
unsigned char *outchar = (unsigned char *)outBuffer + outIdx;
- char values[4096];
- values[0] = 0;
+ char values[4096] = { 0 };
if (failuresPrinted == 0)
{
@@ -382,16 +392,14 @@ int test_copy2D_all_types(cl_device_id deviceID, cl_context context,
cl_command_queue queue, const char *kernelCode,
bool localIsDst)
{
- ExplicitType vecType[] = {
- kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong,
- kULong, kFloat, kDouble, kNumExplicitTypes
- };
+ const unsigned int elemSizes[] = { 1, 2, 3, 4, 5, 6, 7,
+ 8, 13, 16, 32, 47, 64 };
// The margins below represent the number of elements between the end of
// one line and the start of the next. The strides are equivalent to the
// length of the line plus the chosen margin.
- unsigned int vecSizes[] = { 1, 2, 3, 4, 8, 16, 0 };
- unsigned int smallTypesMarginSizes[] = { 0, 10, 100 };
- unsigned int size, typeIndex, srcMargin, dstMargin;
+ // These have to be multipliers, because the margin must be a multiple of
+ // element size.
+ const unsigned int marginMultipliers[] = { 0, 10, 100 };
int errors = 0;
@@ -399,55 +407,27 @@ int test_copy2D_all_types(cl_device_id deviceID, cl_context context,
{
log_info(
"Device does not support extended async copies. Skipping test.\n");
- return 0;
}
-
- for (typeIndex = 0; vecType[typeIndex] != kNumExplicitTypes; typeIndex++)
+ else
{
- if (vecType[typeIndex] == kDouble
- && !is_extension_available(deviceID, "cl_khr_fp64"))
- continue;
-
- if ((vecType[typeIndex] == kLong || vecType[typeIndex] == kULong)
- && !gHasLong)
- continue;
-
- for (size = 0; vecSizes[size] != 0; size++)
+ for (const unsigned int elemSize : elemSizes)
{
- if (get_explicit_type_size(vecType[typeIndex]) * vecSizes[size]
- <= 2) // small type
+ for (const unsigned int srcMarginMultiplier : marginMultipliers)
{
- for (srcMargin = 0; srcMargin < sizeof(smallTypesMarginSizes)
- / sizeof(smallTypesMarginSizes[0]);
- srcMargin++)
+ for (const unsigned int dstMarginMultiplier : marginMultipliers)
{
- for (dstMargin = 0;
- dstMargin < sizeof(smallTypesMarginSizes)
- / sizeof(smallTypesMarginSizes[0]);
- dstMargin++)
+ if (test_copy2D(deviceID, context, queue, kernelCode,
+ elemSize, srcMarginMultiplier * elemSize,
+ dstMarginMultiplier * elemSize, localIsDst))
{
- if (test_copy2D(deviceID, context, queue, kernelCode,
- vecType[typeIndex], vecSizes[size],
- smallTypesMarginSizes[srcMargin],
- smallTypesMarginSizes[dstMargin],
- localIsDst))
- {
- errors++;
- }
+ errors++;
}
}
}
- // not a small type, check only zero stride
- else if (test_copy2D(deviceID, context, queue, kernelCode,
- vecType[typeIndex], vecSizes[size], 0, 0,
- localIsDst))
- {
- errors++;
- }
}
}
- if (errors) return -1;
- return 0;
+
+ return errors ? -1 : 0;
}
int test_async_copy_global_to_local2D(cl_device_id deviceID, cl_context context,