diff options
Diffstat (limited to 'test_conformance/basic/test_async_copy2D.cpp')
-rw-r--r-- | test_conformance/basic/test_async_copy2D.cpp | 182 |
1 files changed, 81 insertions, 101 deletions
diff --git a/test_conformance/basic/test_async_copy2D.cpp b/test_conformance/basic/test_async_copy2D.cpp index bf3f1552..11ef84bd 100644 --- a/test_conformance/basic/test_async_copy2D.cpp +++ b/test_conformance/basic/test_async_copy2D.cpp @@ -27,17 +27,25 @@ static const char *async_global_to_local_kernel2D = R"OpenCLC( #pragma OPENCL EXTENSION cl_khr_extended_async_copies : enable -%s // optional pragma string -__kernel void test_fn(const __global %s *src, __global %s *dst, - __local %s *localBuffer, int numElementsPerLine, +#define STRUCT_SIZE %d +typedef struct __attribute__((packed)) +{ + uchar byte[STRUCT_SIZE]; +} VarSizeStruct __attribute__((aligned(1))); + + +__kernel void test_fn(const __global VarSizeStruct *src, __global VarSizeStruct *dst, + __local VarSizeStruct *localBuffer, int numElementsPerLine, int lineCopiesPerWorkgroup, int lineCopiesPerWorkItem, int srcStride, int dstStride) { // Zero the local storage first for (int i = 0; i < lineCopiesPerWorkItem; i++) { for (int j = 0; j < numElementsPerLine; j++) { const int index = (get_local_id(0) * lineCopiesPerWorkItem + i) * dstStride + j; - localBuffer[index] = (%s)(%s)0; + for (int k = 0; k < STRUCT_SIZE; k++) { + localBuffer[index].byte[k] = 0; + } } } @@ -45,7 +53,7 @@ __kernel void test_fn(const __global %s *src, __global %s *dst, // try the copy barrier( CLK_LOCAL_MEM_FENCE ); event_t event = async_work_group_copy_2D2D(localBuffer, 0, src, - lineCopiesPerWorkgroup * get_group_id(0) * srcStride, sizeof(%s), + lineCopiesPerWorkgroup * get_group_id(0) * srcStride, sizeof(VarSizeStruct), (size_t)numElementsPerLine, (size_t)lineCopiesPerWorkgroup, srcStride, dstStride, 0); // Wait for the copy to complete, then verify by manually copying to the dest @@ -63,16 +71,24 @@ __kernel void test_fn(const __global %s *src, __global %s *dst, static const char *async_local_to_global_kernel2D = R"OpenCLC( #pragma OPENCL EXTENSION cl_khr_extended_async_copies : enable -%s // optional pragma string -__kernel void test_fn(const __global %s *src, __global %s *dst, __local %s *localBuffer, +#define STRUCT_SIZE %d +typedef struct __attribute__((packed)) +{ + uchar byte[STRUCT_SIZE]; +} VarSizeStruct __attribute__((aligned(1))); + + +__kernel void test_fn(const __global VarSizeStruct *src, __global VarSizeStruct *dst, __local VarSizeStruct *localBuffer, int numElementsPerLine, int lineCopiesPerWorkgroup, int lineCopiesPerWorkItem, int srcStride, int dstStride) { // Zero the local storage first for (int i = 0; i < lineCopiesPerWorkItem; i++) { for (int j = 0; j < numElementsPerLine; j++) { const int index = (get_local_id(0) * lineCopiesPerWorkItem + i) * srcStride + j; - localBuffer[index] = (%s)(%s)0; + for (int k = 0; k < STRUCT_SIZE; k++) { + localBuffer[index].byte[k] = 0; + } } } @@ -90,36 +106,22 @@ __kernel void test_fn(const __global %s *src, __global %s *dst, __local %s *loca // Do this to verify all kernels are done copying to the local buffer before we try the copy barrier(CLK_LOCAL_MEM_FENCE); event_t event = async_work_group_copy_2D2D(dst, lineCopiesPerWorkgroup * get_group_id(0) * dstStride, - localBuffer, 0, sizeof(%s), (size_t)numElementsPerLine, (size_t)lineCopiesPerWorkgroup, srcStride, + localBuffer, 0, sizeof(VarSizeStruct), (size_t)numElementsPerLine, (size_t)lineCopiesPerWorkgroup, srcStride, dstStride, 0 ); wait_group_events(1, &event); }; )OpenCLC"; -int test_copy2D(cl_device_id deviceID, cl_context context, - cl_command_queue queue, const char *kernelCode, - ExplicitType vecType, int vecSize, int srcMargin, int dstMargin, - bool localIsDst) +int test_copy2D(const cl_device_id deviceID, const cl_context context, + const cl_command_queue queue, const char *const kernelCode, + const size_t elementSize, const int srcMargin, + const int dstMargin, const bool localIsDst) { int error; - clProgramWrapper program; - clKernelWrapper kernel; - clMemWrapper streams[2]; - size_t threads[1], localThreads[1]; - void *inBuffer, *outBuffer, *outBufferCopy; - MTdata d; - char vecNameString[64]; - vecNameString[0] = 0; - if (vecSize == 1) - sprintf(vecNameString, "%s", get_explicit_type_name(vecType)); - else - sprintf(vecNameString, "%s%d", get_explicit_type_name(vecType), - vecSize); - size_t elementSize = get_explicit_type_size(vecType) * vecSize; - log_info("Testing %s with srcMargin = %d, dstMargin = %d\n", vecNameString, - srcMargin, dstMargin); + log_info("Testing %d byte element with srcMargin = %d, dstMargin = %d\n", + elementSize, srcMargin, dstMargin); cl_long max_local_mem_size; error = @@ -139,6 +141,13 @@ int test_copy2D(cl_device_id deviceID, cl_context context, test_error(error, "clGetDeviceInfo for CL_DEVICE_MAX_MEM_ALLOC_SIZE failed."); + cl_long max_work_group_size; + error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_WORK_GROUP_SIZE, + sizeof(max_work_group_size), &max_work_group_size, + NULL); + test_error(error, + "clGetDeviceInfo for CL_DEVICE_MAX_WORK_GROUP_SIZE failed."); + if (max_alloc_size > max_global_mem_size / 2) max_alloc_size = max_global_mem_size / 2; @@ -149,20 +158,17 @@ int test_copy2D(cl_device_id deviceID, cl_context context, test_error(error, "clGetDeviceInfo for CL_DEVICE_MAX_COMPUTE_UNITS failed."); - char programSource[4096]; - programSource[0] = 0; - char *programPtr; + char programSource[4096] = { 0 }; + const char *programPtr = programSource; - sprintf(programSource, kernelCode, - vecType == kDouble ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" - : "", - vecNameString, vecNameString, vecNameString, vecNameString, - get_explicit_type_name(vecType), vecNameString); + sprintf(programSource, kernelCode, elementSize); // log_info("program: %s\n", programSource); - programPtr = programSource; + + clProgramWrapper program; + clKernelWrapper kernel; error = create_single_kernel_helper(context, &program, &kernel, 1, - (const char **)&programPtr, "test_fn"); + &programPtr, "test_fn"); test_error(error, "Unable to create testing kernel"); size_t max_workgroup_size; @@ -188,9 +194,6 @@ int test_copy2D(cl_device_id deviceID, cl_context context, const cl_int dstStride = numElementsPerLine + dstMargin; const cl_int srcStride = numElementsPerLine + srcMargin; - elementSize = - get_explicit_type_size(vecType) * ((vecSize == 3) ? 4 : vecSize); - const size_t lineCopiesPerWorkItem = 13; const size_t localStorageSpacePerWorkitem = lineCopiesPerWorkItem * elementSize * (localIsDst ? dstStride : srcStride); @@ -208,7 +211,6 @@ int test_copy2D(cl_device_id deviceID, cl_context context, if (maxLocalWorkgroupSize > max_workgroup_size) localWorkgroupSize = max_workgroup_size; - const size_t maxTotalLinesIn = (max_alloc_size / elementSize + srcMargin) / srcStride; const size_t maxTotalLinesOut = @@ -231,9 +233,17 @@ int test_copy2D(cl_device_id deviceID, cl_context context, const size_t globalWorkgroupSize = numberOfLocalWorkgroups * localWorkgroupSize; - inBuffer = (void *)malloc(inBufferSize); - outBuffer = (void *)malloc(outBufferSize); - outBufferCopy = (void *)malloc(outBufferSize); + if ((localBufferSize / 4) > max_work_group_size) + { + log_info("Skipping due to resource requirements local:%db " + "max_work_group_size:%d\n", + localBufferSize, max_work_group_size); + return 0; + } + + void *const inBuffer = (void *)malloc(inBufferSize); + void *const outBuffer = (void *)malloc(outBufferSize); + void *const outBufferCopy = (void *)malloc(outBufferSize); const cl_int lineCopiesPerWorkItemInt = static_cast<cl_int>(lineCopiesPerWorkItem); @@ -250,18 +260,20 @@ int test_copy2D(cl_device_id deviceID, cl_context context, (int)inBufferSize, (int)outBufferSize, lineCopiesPerWorkgroup, lineCopiesPerWorkItemInt); + size_t threads[1], localThreads[1]; + threads[0] = globalWorkgroupSize; localThreads[0] = localWorkgroupSize; - d = init_genrand(gRandomSeed); - generate_random_data( - vecType, inBufferSize / get_explicit_type_size(vecType), d, inBuffer); - generate_random_data( - vecType, outBufferSize / get_explicit_type_size(vecType), d, outBuffer); + MTdata d = init_genrand(gRandomSeed); + generate_random_data(kChar, inBufferSize, d, inBuffer); + generate_random_data(kChar, outBufferSize, d, outBuffer); free_mtdata(d); d = NULL; memcpy(outBufferCopy, outBuffer, outBufferSize); + clMemWrapper streams[2]; + streams[0] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, inBufferSize, inBuffer, &error); test_error(error, "Unable to create input buffer"); @@ -301,8 +313,7 @@ int test_copy2D(cl_device_id deviceID, cl_context context, // Verify int failuresPrinted = 0; - // Verify - size_t typeSize = get_explicit_type_size(vecType) * vecSize; + for (int i = 0; i < (int)globalWorkgroupSize * lineCopiesPerWorkItem * elementSize; i += elementSize) @@ -313,13 +324,12 @@ int test_copy2D(cl_device_id deviceID, cl_context context, int inIdx = i * srcStride + j; int outIdx = i * dstStride + j; if (memcmp(((char *)inBuffer) + inIdx, ((char *)outBuffer) + outIdx, - typeSize) + elementSize) != 0) { unsigned char *inchar = (unsigned char *)inBuffer + inIdx; unsigned char *outchar = (unsigned char *)outBuffer + outIdx; - char values[4096]; - values[0] = 0; + char values[4096] = { 0 }; if (failuresPrinted == 0) { @@ -382,16 +392,14 @@ int test_copy2D_all_types(cl_device_id deviceID, cl_context context, cl_command_queue queue, const char *kernelCode, bool localIsDst) { - ExplicitType vecType[] = { - kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, - kULong, kFloat, kDouble, kNumExplicitTypes - }; + const unsigned int elemSizes[] = { 1, 2, 3, 4, 5, 6, 7, + 8, 13, 16, 32, 47, 64 }; // The margins below represent the number of elements between the end of // one line and the start of the next. The strides are equivalent to the // length of the line plus the chosen margin. - unsigned int vecSizes[] = { 1, 2, 3, 4, 8, 16, 0 }; - unsigned int smallTypesMarginSizes[] = { 0, 10, 100 }; - unsigned int size, typeIndex, srcMargin, dstMargin; + // These have to be multipliers, because the margin must be a multiple of + // element size. + const unsigned int marginMultipliers[] = { 0, 10, 100 }; int errors = 0; @@ -399,55 +407,27 @@ int test_copy2D_all_types(cl_device_id deviceID, cl_context context, { log_info( "Device does not support extended async copies. Skipping test.\n"); - return 0; } - - for (typeIndex = 0; vecType[typeIndex] != kNumExplicitTypes; typeIndex++) + else { - if (vecType[typeIndex] == kDouble - && !is_extension_available(deviceID, "cl_khr_fp64")) - continue; - - if ((vecType[typeIndex] == kLong || vecType[typeIndex] == kULong) - && !gHasLong) - continue; - - for (size = 0; vecSizes[size] != 0; size++) + for (const unsigned int elemSize : elemSizes) { - if (get_explicit_type_size(vecType[typeIndex]) * vecSizes[size] - <= 2) // small type + for (const unsigned int srcMarginMultiplier : marginMultipliers) { - for (srcMargin = 0; srcMargin < sizeof(smallTypesMarginSizes) - / sizeof(smallTypesMarginSizes[0]); - srcMargin++) + for (const unsigned int dstMarginMultiplier : marginMultipliers) { - for (dstMargin = 0; - dstMargin < sizeof(smallTypesMarginSizes) - / sizeof(smallTypesMarginSizes[0]); - dstMargin++) + if (test_copy2D(deviceID, context, queue, kernelCode, + elemSize, srcMarginMultiplier * elemSize, + dstMarginMultiplier * elemSize, localIsDst)) { - if (test_copy2D(deviceID, context, queue, kernelCode, - vecType[typeIndex], vecSizes[size], - smallTypesMarginSizes[srcMargin], - smallTypesMarginSizes[dstMargin], - localIsDst)) - { - errors++; - } + errors++; } } } - // not a small type, check only zero stride - else if (test_copy2D(deviceID, context, queue, kernelCode, - vecType[typeIndex], vecSizes[size], 0, 0, - localIsDst)) - { - errors++; - } } } - if (errors) return -1; - return 0; + + return errors ? -1 : 0; } int test_async_copy_global_to_local2D(cl_device_id deviceID, cl_context context, |