From 17a0d0956795b1805814066f861a37ea1a607d46 Mon Sep 17 00:00:00 2001 From: Marco Antognini Date: Tue, 18 May 2021 18:09:46 +0100 Subject: Cleanup usage of static, extern and typedef (#1256) * Cleanup usage of static, extern and typedef Remove static on functions defined headers, as it can result in duplication in binaries. Remove unnecessary extern keyword on a function declaration, as it is the default behavior and can be puzzling when reading the code. Remove the unused declaration of my_ilogb, which is never defined. Remove unnecessary usage of typedef, as they are only increasing the cognitive load of the code for no purpose. Signed-off-by: Marco Antognini * Improve usage of inline and static in harness Functions declared in header as static can trigger unused warnings when (indirectly) included in translation units that do not use such functions. Use inline instead, which also avoids duplicating symbols in binaries. Signed-off-by: Marco Antognini --- test_common/harness/alloc.h | 4 +-- test_common/harness/fpcontrol.h | 6 ++-- test_conformance/math_brute_force/function_list.h | 16 +++++------ test_conformance/math_brute_force/main.cpp | 1 - .../math_brute_force/reference_math.cpp | 16 +++++------ test_conformance/math_brute_force/utility.h | 33 +++++++++++----------- 6 files changed, 37 insertions(+), 39 deletions(-) diff --git a/test_common/harness/alloc.h b/test_common/harness/alloc.h index 653dde05..3b00d7c9 100644 --- a/test_common/harness/alloc.h +++ b/test_common/harness/alloc.h @@ -29,7 +29,7 @@ #include "mingw_compat.h" #endif -static void* align_malloc(size_t size, size_t alignment) +inline void* align_malloc(size_t size, size_t alignment) { #if defined(_WIN32) && defined(_MSC_VER) return _aligned_malloc(size, alignment); @@ -53,7 +53,7 @@ static void* align_malloc(size_t size, size_t alignment) #endif } -static void align_free(void* ptr) +inline void align_free(void* ptr) { #if defined(_WIN32) && defined(_MSC_VER) _aligned_free(ptr); diff --git a/test_common/harness/fpcontrol.h b/test_common/harness/fpcontrol.h index 40826c5c..9f065044 100644 --- a/test_common/harness/fpcontrol.h +++ b/test_common/harness/fpcontrol.h @@ -39,7 +39,7 @@ typedef int FPU_mode_type; extern __thread fpu_control_t fpu_control; #endif // Set the reference hardware floating point unit to FTZ mode -static inline void ForceFTZ(FPU_mode_type *mode) +inline void ForceFTZ(FPU_mode_type *mode) { #if defined(__i386__) || defined(__x86_64__) || defined(_MSC_VER) \ || defined(__MINGW32__) @@ -65,7 +65,7 @@ static inline void ForceFTZ(FPU_mode_type *mode) } // Disable the denorm flush to zero -static inline void DisableFTZ(FPU_mode_type *mode) +inline void DisableFTZ(FPU_mode_type *mode) { #if defined(__i386__) || defined(__x86_64__) || defined(_MSC_VER) \ || defined(__MINGW32__) @@ -91,7 +91,7 @@ static inline void DisableFTZ(FPU_mode_type *mode) } // Restore the reference hardware to floating point state indicated by *mode -static inline void RestoreFPState(FPU_mode_type *mode) +inline void RestoreFPState(FPU_mode_type *mode) { #if defined(__i386__) || defined(__x86_64__) || defined(_MSC_VER) \ || defined(__MINGW32__) diff --git a/test_conformance/math_brute_force/function_list.h b/test_conformance/math_brute_force/function_list.h index 38f739ce..95a29459 100644 --- a/test_conformance/math_brute_force/function_list.h +++ b/test_conformance/math_brute_force/function_list.h @@ -30,7 +30,7 @@ #include "harness/mt19937.h" -typedef union fptr { +union fptr { void *p; double (*f_f)(double); double (*f_u)(cl_uint); @@ -45,9 +45,9 @@ typedef union fptr { double (*f_ffpI)(double, double, int *); double (*f_fff)(double, double, double); float (*f_fma)(float, float, float, int); -} fptr; +}; -typedef union dptr { +union dptr { void *p; long double (*f_f)(long double); long double (*f_u)(cl_ulong); @@ -59,20 +59,20 @@ typedef union dptr { long double (*f_fpI)(long double, int *); long double (*f_ffpI)(long double, long double, int *); long double (*f_fff)(long double, long double, long double); -} dptr; +}; struct Func; -typedef struct vtbl +struct vtbl { const char *type_name; int (*TestFunc)(const struct Func *, MTdata, bool); int (*DoubleTestFunc)( const struct Func *, MTdata, bool); // may be NULL if function is single precision only -} vtbl; +}; -typedef struct Func +struct Func { const char *name; // common name, to be used as an argument in the shell const char *nameInCode; // name as it appears in the __kernel, usually the @@ -88,7 +88,7 @@ typedef struct Func int ftz; int relaxed; const vtbl *vtbl_ptr; -} Func; +}; extern const Func functionList[]; diff --git a/test_conformance/math_brute_force/main.cpp b/test_conformance/math_brute_force/main.cpp index d6c2f11f..e52f2f0a 100644 --- a/test_conformance/math_brute_force/main.cpp +++ b/test_conformance/math_brute_force/main.cpp @@ -167,7 +167,6 @@ static int doTest(const char *name) } { - extern int my_ilogb(double); if (0 == strcmp("ilogb", func_data->name)) { InitILogbConstants(); diff --git a/test_conformance/math_brute_force/reference_math.cpp b/test_conformance/math_brute_force/reference_math.cpp index 3a6516ba..0b037e01 100644 --- a/test_conformance/math_brute_force/reference_math.cpp +++ b/test_conformance/math_brute_force/reference_math.cpp @@ -41,10 +41,10 @@ #pragma STDC FP_CONTRACT OFF static void __log2_ep(double *hi, double *lo, double x); -typedef union { +union uint64d_t { uint64_t i; double d; -} uint64d_t; +}; static const uint64d_t _CL_NAN = { 0x7ff8000000000000ULL }; @@ -2259,10 +2259,10 @@ long double reference_dividel(long double x, long double y) return dx / dy; } -typedef struct +struct double_double { double hi, lo; -} double_double; +}; // Split doubles_double into a series of consecutive 26-bit precise doubles and // a remainder. Note for later -- for multiplication, it might be better to @@ -3767,10 +3767,10 @@ static uint32_t two_over_pi[] = { static uint32_t pi_over_two[] = { 0x1, 0x2487ed51, 0x42d1846, 0x26263314, 0x1701b839, 0x28948127 }; -typedef union { +union d_ui64_t { uint64_t u; double d; -} d_ui64_t; +}; // radix or base of representation #define RADIX (30) @@ -3786,13 +3786,13 @@ d_ui64_t two_pow_two_mradix = { (uint64_t)(1023 - 2 * RADIX) << 52 }; // extended fixed point representation of double precision // floating point number. // x = sign * [ sum_{i = 0 to 2} ( X[i] * 2^(index - i)*RADIX ) ] -typedef struct +struct eprep_t { uint32_t X[3]; // three 32 bit integers are sufficient to represnt double in // base_30 int index; // exponent bias int sign; // sign of double -} eprep_t; +}; static eprep_t double_to_eprep(double x) { diff --git a/test_conformance/math_brute_force/utility.h b/test_conformance/math_brute_force/utility.h index ac4db9c8..b4a59edb 100644 --- a/test_conformance/math_brute_force/utility.h +++ b/test_conformance/math_brute_force/utility.h @@ -90,8 +90,7 @@ int MakeKernels(const char **c, cl_uint count, const char *name, bool relaxedMode); // used to convert a bucket of bits into a search pattern through double -static inline double DoubleFromUInt32(uint32_t bits); -static inline double DoubleFromUInt32(uint32_t bits) +inline double DoubleFromUInt32(uint32_t bits) { union { uint64_t u; @@ -117,25 +116,25 @@ void _LogBuildError(cl_program p, int line, const char *file); // premature flushing to zero. // However, to avoid conflict for 1.0, we are letting results at TYPE_MIN + // ulp_limit to be flushed to zero. -static inline int IsFloatResultSubnormal(double x, float ulps) +inline int IsFloatResultSubnormal(double x, float ulps) { x = fabs(x) - MAKE_HEX_DOUBLE(0x1.0p-149, 0x1, -149) * (double)ulps; return x < MAKE_HEX_DOUBLE(0x1.0p-126, 0x1, -126); } -static inline int IsFloatResultSubnormalAbsError(double x, float abs_err) +inline int IsFloatResultSubnormalAbsError(double x, float abs_err) { x = x - abs_err; return x < MAKE_HEX_DOUBLE(0x1.0p-126, 0x1, -126); } -static inline int IsDoubleResultSubnormal(long double x, float ulps) +inline int IsDoubleResultSubnormal(long double x, float ulps) { x = fabsl(x) - MAKE_HEX_LONG(0x1.0p-1074, 0x1, -1074) * (long double)ulps; return x < MAKE_HEX_LONG(0x1.0p-1022, 0x1, -1022); } -static inline int IsFloatInfinity(double x) +inline int IsFloatInfinity(double x) { union { cl_float d; @@ -145,7 +144,7 @@ static inline int IsFloatInfinity(double x) return ((u.u & 0x7fffffffU) == 0x7F800000U); } -static inline int IsFloatMaxFloat(double x) +inline int IsFloatMaxFloat(double x) { union { cl_float d; @@ -155,7 +154,7 @@ static inline int IsFloatMaxFloat(double x) return ((u.u & 0x7fffffffU) == 0x7F7FFFFFU); } -static inline int IsFloatNaN(double x) +inline int IsFloatNaN(double x) { union { cl_float d; @@ -165,13 +164,13 @@ static inline int IsFloatNaN(double x) return ((u.u & 0x7fffffffU) > 0x7F800000U); } -extern cl_uint RoundUpToNextPowerOfTwo(cl_uint x); +cl_uint RoundUpToNextPowerOfTwo(cl_uint x); // Windows (since long double got deprecated) sets the x87 to 53-bit precision // (that's x87 default state). This causes problems with the tests that // convert long and ulong to float and double or otherwise deal with values // that need more precision than 53-bit. So, set the x87 to 64-bit precision. -static inline void Force64BitFPUPrecision(void) +inline void Force64BitFPUPrecision(void) { #if __MINGW32__ // The usual method is to use _controlfp as follows: @@ -202,17 +201,17 @@ static inline void Force64BitFPUPrecision(void) #endif } -extern void memset_pattern4(void *dest, const void *src_pattern, size_t bytes); +void memset_pattern4(void *dest, const void *src_pattern, size_t bytes); -typedef union { +union int32f_t { int32_t i; float f; -} int32f_t; +}; -typedef union { +union int64d_t { int64_t l; double d; -} int64d_t; +}; void MulD(double *rhi, double *rlo, double u, double v); void AddD(double *rhi, double *rlo, double a, double b); @@ -229,7 +228,7 @@ void logFunctionInfo(const char *fname, unsigned int float_size, float getAllowedUlpError(const Func *f, const bool relaxed); -static inline cl_uint getTestScale(size_t typeSize) +inline cl_uint getTestScale(size_t typeSize) { if (gWimpyMode) { @@ -245,7 +244,7 @@ static inline cl_uint getTestScale(size_t typeSize) } } -static inline uint64_t getTestStep(size_t typeSize, size_t bufferSize) +inline uint64_t getTestStep(size_t typeSize, size_t bufferSize) { if (gWimpyMode) { -- cgit v1.2.3 From 6c8045911ab193143eae48eef68fc966d0d96b1f Mon Sep 17 00:00:00 2001 From: Sreelakshmi Haridas Maruthur Date: Tue, 18 May 2021 11:10:24 -0600 Subject: gles: Fix compile warnings. (#1070) * gles: Fix compile warnings. For 32 and 64-bit Visual Studio and the Android Q NDK. * Fix formatting violations Co-authored-by: spauls --- CMakeLists.txt | 4 - test_common/CMakeLists.txt | 5 +- test_common/gles/helpers.cpp | 6 +- test_common/gles/helpers.h | 5 +- test_common/harness/ThreadPool.cpp | 11 ++- test_common/harness/compat.h | 4 +- test_common/harness/conversions.cpp | 4 +- test_common/harness/errorHelpers.cpp | 6 +- test_common/harness/errorHelpers.h | 5 -- test_common/harness/fpcontrol.h | 8 +- test_common/harness/imageHelpers.cpp | 128 +++++++++++++++------------- test_common/harness/kernelHelpers.cpp | 4 +- test_common/harness/os_helpers.cpp | 3 +- test_common/harness/propertyHelpers.cpp | 11 +-- test_common/harness/rounding_mode.cpp | 10 +-- test_common/harness/rounding_mode.h | 2 - test_common/harness/threadTesting.cpp | 98 --------------------- test_common/harness/threadTesting.h | 5 +- test_conformance/gles/CMakeLists.txt | 8 ++ test_conformance/gles/setup_egl.cpp | 5 +- test_conformance/gles/test_fence_sync.cpp | 10 ++- test_conformance/gles/test_images_2D.cpp | 2 + test_conformance/gles/test_renderbuffer.cpp | 2 + 23 files changed, 136 insertions(+), 210 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 083ea96d..5b1f48fd 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -152,10 +152,6 @@ if(LINK_PTHREAD) list(APPEND CLConform_LIBRARIES pthread) endif() -if(DEFINED USE_GLES3) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DGLES3") -endif() - if(APPLE) find_library(corefoundation CoreFoundation) find_library(iokit IOKit) diff --git a/test_common/CMakeLists.txt b/test_common/CMakeLists.txt index 2d4bc190..61580300 100644 --- a/test_common/CMakeLists.txt +++ b/test_common/CMakeLists.txt @@ -1,6 +1,5 @@ set(HARNESS_SOURCES - harness/threadTesting.cpp harness/typeWrappers.cpp harness/mt19937.cpp harness/conversions.cpp @@ -23,3 +22,7 @@ set(HARNESS_SOURCES add_library(harness STATIC ${HARNESS_SOURCES}) +if(MSVC) + # Don't warn about using the portable "strdup" function. + target_compile_definitions(harness PRIVATE _CRT_NONSTDC_NO_DEPRECATE) +endif() \ No newline at end of file diff --git a/test_common/gles/helpers.cpp b/test_common/gles/helpers.cpp index 34f40b4c..57a4ddc1 100644 --- a/test_common/gles/helpers.cpp +++ b/test_common/gles/helpers.cpp @@ -22,7 +22,7 @@ {GLint __error = glGetError(); if(__error) {log_error( "GL ERROR: %s!\n", gluErrorString( err ));}} #if defined(__linux__) || defined(GL_ES_VERSION_2_0) -// On linux we dont link to GLU library to avoid comaptibility issues with +// On linux we don't link to GLU library to avoid compatibility issues with // libstdc++ // FIXME: Implement this const GLubyte* gluErrorString (GLenum error) @@ -271,8 +271,6 @@ void * ReadGLTexture( GLenum glTarget, GLuint glTexture, // Read results from the GL texture glBindTexture(get_base_gl_target(glTarget), glTexture); - GLint realWidth, realHeight; - GLint realInternalFormat; GLenum readBackFormat = GL_RGBA; GLenum readBackType = glType; glFramebufferWrapper glFramebuffer; @@ -301,7 +299,7 @@ void * ReadGLTexture( GLenum glTarget, GLuint glTexture, GetGLFormatName(readBackFormat), GetGLTypeName(readBackType)); - DumpGLBuffer(readBackType, realWidth, realHeight, (void*)outBuffer); + DumpGLBuffer(readBackType, outWidth, outHeight, (void *)outBuffer); #endif diff --git a/test_common/gles/helpers.h b/test_common/gles/helpers.h index 5bd0fdf1..20768787 100644 --- a/test_common/gles/helpers.h +++ b/test_common/gles/helpers.h @@ -30,11 +30,10 @@ #if !defined (__APPLE__) #include -#include "gl_headers.h" #include -#else -#include "gl_headers.h" +#include #endif +#include "gl_headers.h" #include "harness/errorHelpers.h" #include "harness/kernelHelpers.h" diff --git a/test_common/harness/ThreadPool.cpp b/test_common/harness/ThreadPool.cpp index 31985aa0..5dae1b4a 100644 --- a/test_common/harness/ThreadPool.cpp +++ b/test_common/harness/ThreadPool.cpp @@ -523,7 +523,7 @@ void ThreadPool_Init(void) { // Count the number of bits in ProcessorMask (number of // logical cores) - ULONG mask = ptr->ProcessorMask; + ULONG_PTR mask = ptr->ProcessorMask; while (mask) { ++gThreadCount; @@ -688,7 +688,10 @@ static BOOL CALLBACK _ThreadPool_Init(_PINIT_ONCE InitOnce, PVOID Parameter, void ThreadPool_Exit(void) { - int err, count; +#ifndef _WIN32 + int err; +#endif + int count; gRunCount = CL_INT_MAX; #if defined(__GNUC__) @@ -738,7 +741,9 @@ void ThreadPool_Exit(void) // all available then it would make more sense to use those features. cl_int ThreadPool_Do(TPFuncPtr func_ptr, cl_uint count, void *userInfo) { +#ifndef _WIN32 cl_int newErr; +#endif cl_int err = 0; // Lazily set up our threads #if defined(_MSC_VER) && (_WIN32_WINNT >= 0x600) @@ -913,7 +918,9 @@ cl_int ThreadPool_Do(TPFuncPtr func_ptr, cl_uint count, void *userInfo) err = jobError; +#ifndef _WIN32 exit: +#endif // exit critical region #if defined(_WIN32) LeaveCriticalSection(gThreadPoolLock); diff --git a/test_common/harness/compat.h b/test_common/harness/compat.h index 7aad15a0..3b557852 100644 --- a/test_common/harness/compat.h +++ b/test_common/harness/compat.h @@ -18,13 +18,13 @@ #if defined(_WIN32) && defined(_MSC_VER) #include -#endif - +#else #ifdef __cplusplus #define EXTERN_C extern "C" #else #define EXTERN_C #endif +#endif // diff --git a/test_common/harness/conversions.cpp b/test_common/harness/conversions.cpp index fc3317c7..c7731269 100644 --- a/test_common/harness/conversions.cpp +++ b/test_common/harness/conversions.cpp @@ -181,8 +181,8 @@ static ULong sUpperLimits[kNumExplicitTypes] = { 0xffffffffLL, 0xffffffffLL, 0x7fffffffffffffffLL, - 0xffffffffffffffffLL, - 0xffffffffffffffffLL, + 0xffffffffffffffffULL, + 0xffffffffffffffffULL, 0, 0 }; // Last two values aren't stored here diff --git a/test_common/harness/errorHelpers.cpp b/test_common/harness/errorHelpers.cpp index 22a2677d..3ddbc37b 100644 --- a/test_common/harness/errorHelpers.cpp +++ b/test_common/harness/errorHelpers.cpp @@ -564,7 +564,7 @@ cl_int OutputBuildLogs(cl_program program, cl_uint num_devices, error = clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL, &size_ret); test_error(error, "Unable to query context's device size"); - num_devices = size_ret / sizeof(cl_device_id); + num_devices = static_cast(size_ret / sizeof(cl_device_id)); device_list = (cl_device_id *)malloc(size_ret); if (device_list == NULL) { @@ -695,7 +695,7 @@ int check_functions_for_offline_compiler(const char *subtestname, { if (gCompilationMode != kOnline) { - int nNotRequiredWithOfflineCompiler = + size_t nNotRequiredWithOfflineCompiler = sizeof(subtests_to_skip_with_offline_compiler) / sizeof(char *); size_t i; for (i = 0; i < nNotRequiredWithOfflineCompiler; ++i) @@ -707,4 +707,4 @@ int check_functions_for_offline_compiler(const char *subtestname, } } return 0; -} +} \ No newline at end of file diff --git a/test_common/harness/errorHelpers.h b/test_common/harness/errorHelpers.h index 19446014..c7f49e3d 100644 --- a/test_common/harness/errorHelpers.h +++ b/test_common/harness/errorHelpers.h @@ -56,11 +56,6 @@ static int vlog_win32(const char *format, ...); #define vlog printf #endif -#define ct_assert(b) ct_assert_i(b, __LINE__) -#define ct_assert_i(b, line) ct_assert_ii(b, line) -#define ct_assert_ii(b, line) \ - int _compile_time_assertion_on_line_##line[b ? 1 : -1]; - #define test_fail(msg, ...) \ { \ log_error(msg, ##__VA_ARGS__); \ diff --git a/test_common/harness/fpcontrol.h b/test_common/harness/fpcontrol.h index 9f065044..2add9baf 100644 --- a/test_common/harness/fpcontrol.h +++ b/test_common/harness/fpcontrol.h @@ -30,7 +30,11 @@ // that rounding mode. #if defined(__APPLE__) || defined(_MSC_VER) || defined(__linux__) \ || defined(__MINGW32__) +#ifdef _MSC_VER typedef int FPU_mode_type; +#else +typedef int64_t FPU_mode_type; +#endif #if defined(__i386__) || defined(__x86_64__) || defined(_MSC_VER) \ || defined(__MINGW32__) #include @@ -55,7 +59,7 @@ inline void ForceFTZ(FPU_mode_type *mode) __asm__ volatile("fmxr fpscr, %0" ::"r"(fpscr | (1U << 24))); // Add 64 bit support #elif defined(__aarch64__) - unsigned fpscr; + uint64_t fpscr; __asm__ volatile("mrs %0, fpcr" : "=r"(fpscr)); *mode = fpscr; __asm__ volatile("msr fpcr, %0" ::"r"(fpscr | (1U << 24))); @@ -81,7 +85,7 @@ inline void DisableFTZ(FPU_mode_type *mode) __asm__ volatile("fmxr fpscr, %0" ::"r"(fpscr & ~(1U << 24))); // Add 64 bit support #elif defined(__aarch64__) - unsigned fpscr; + uint64_t fpscr; __asm__ volatile("mrs %0, fpcr" : "=r"(fpscr)); *mode = fpscr; __asm__ volatile("msr fpcr, %0" ::"r"(fpscr & ~(1U << 24))); diff --git a/test_common/harness/imageHelpers.cpp b/test_common/harness/imageHelpers.cpp index 72a2f0c0..d1754653 100644 --- a/test_common/harness/imageHelpers.cpp +++ b/test_common/harness/imageHelpers.cpp @@ -554,8 +554,8 @@ struct AddressingTable { AddressingTable() { - ct_assert((CL_ADDRESS_MIRRORED_REPEAT - CL_ADDRESS_NONE < 6)); - ct_assert(CL_FILTER_NEAREST - CL_FILTER_LINEAR < 2); + static_assert(CL_ADDRESS_MIRRORED_REPEAT - CL_ADDRESS_NONE < 6, ""); + static_assert(CL_FILTER_NEAREST - CL_FILTER_LINEAR < 2, ""); mTable[CL_ADDRESS_NONE - CL_ADDRESS_NONE] [CL_FILTER_NEAREST - CL_FILTER_NEAREST] = NoAddressFn; @@ -719,7 +719,7 @@ void get_max_sizes( if (usingMaxPixelSizeBuffer || raw_pixel_size == 12) raw_pixel_size = 16; size_t max_pixels = (size_t)maxAllocSize / raw_pixel_size; - log_info("Maximums: [%ld x %ld x %ld], raw pixel size %lu bytes, " + log_info("Maximums: [%zu x %zu x %zu], raw pixel size %zu bytes, " "per-allocation limit %gMB.\n", maxWidth, maxHeight, isArray ? maxArraySize : maxDepth, raw_pixel_size, (maxAllocSize / (1024.0 * 1024.0))); @@ -760,10 +760,10 @@ void get_max_sizes( if (image_type == CL_MEM_OBJECT_IMAGE1D) { - double M = maximum_sizes[0]; + size_t M = maximum_sizes[0]; // Store the size - sizes[(*numberOfSizes)][0] = (size_t)M; + sizes[(*numberOfSizes)][0] = M; sizes[(*numberOfSizes)][1] = 1; sizes[(*numberOfSizes)][2] = 1; ++(*numberOfSizes); @@ -777,17 +777,17 @@ void get_max_sizes( { // Determine the size of the fixed dimension - double M = maximum_sizes[fixed_dim]; - double A = max_pixels; + size_t M = maximum_sizes[fixed_dim]; + size_t A = max_pixels; int x0_dim = !fixed_dim; - double x0 = + size_t x0 = static_cast( fmin(fmin(other_sizes[(other_size++) % num_other_sizes], A / M), - maximum_sizes[x0_dim]); + maximum_sizes[x0_dim])); // Store the size - sizes[(*numberOfSizes)][fixed_dim] = (size_t)M; - sizes[(*numberOfSizes)][x0_dim] = (size_t)x0; + sizes[(*numberOfSizes)][fixed_dim] = M; + sizes[(*numberOfSizes)][x0_dim] = x0; sizes[(*numberOfSizes)][2] = 1; ++(*numberOfSizes); } @@ -802,16 +802,17 @@ void get_max_sizes( { // Determine the size of the fixed dimension - double M = maximum_sizes[fixed_dim]; - double A = max_pixels; + size_t M = maximum_sizes[fixed_dim]; + size_t A = max_pixels; // Find two other dimensions, x0 and x1 int x0_dim = (fixed_dim == 0) ? 1 : 0; int x1_dim = (fixed_dim == 2) ? 1 : 2; // Choose two other sizes for these dimensions - double x0 = fmin(fmin(A / M, maximum_sizes[x0_dim]), - other_sizes[(other_size++) % num_other_sizes]); + size_t x0 = static_cast( + fmin(fmin(A / M, maximum_sizes[x0_dim]), + other_sizes[(other_size++) % num_other_sizes])); // GPUs have certain restrictions on minimum width (row alignment) // of images which has given us issues testing small widths in this // test (say we set width to 3 for testing, and compute size based @@ -820,8 +821,9 @@ void get_max_sizes( // width of 16 which doesnt fit in vram). For this purpose we are // not testing width < 16 for this test. if (x0_dim == 0 && x0 < 16) x0 = 16; - double x1 = fmin(fmin(A / M / x0, maximum_sizes[x1_dim]), - other_sizes[(other_size++) % num_other_sizes]); + size_t x1 = static_cast( + fmin(fmin(A / M / x0, maximum_sizes[x1_dim]), + other_sizes[(other_size++) % num_other_sizes])); // Valid image sizes cannot be below 1. Due to the workaround for // the xo_dim where x0 is overidden to 16 there might not be enough @@ -834,9 +836,9 @@ void get_max_sizes( assert(x0 > 0 && M > 0); // Store the size - sizes[(*numberOfSizes)][fixed_dim] = (size_t)M; - sizes[(*numberOfSizes)][x0_dim] = (size_t)x0; - sizes[(*numberOfSizes)][x1_dim] = (size_t)x1; + sizes[(*numberOfSizes)][fixed_dim] = M; + sizes[(*numberOfSizes)][x0_dim] = x0; + sizes[(*numberOfSizes)][x1_dim] = x1; ++(*numberOfSizes); } } @@ -847,20 +849,20 @@ void get_max_sizes( switch (image_type) { case CL_MEM_OBJECT_IMAGE1D: - log_info(" size[%d] = [%ld] (%g MB image)\n", j, sizes[j][0], + log_info(" size[%d] = [%zu] (%g MB image)\n", j, sizes[j][0], raw_pixel_size * sizes[j][0] * sizes[j][1] * sizes[j][2] / (1024.0 * 1024.0)); break; case CL_MEM_OBJECT_IMAGE1D_ARRAY: case CL_MEM_OBJECT_IMAGE2D: - log_info(" size[%d] = [%ld %ld] (%g MB image)\n", j, + log_info(" size[%d] = [%zu %zu] (%g MB image)\n", j, sizes[j][0], sizes[j][1], raw_pixel_size * sizes[j][0] * sizes[j][1] * sizes[j][2] / (1024.0 * 1024.0)); break; case CL_MEM_OBJECT_IMAGE2D_ARRAY: case CL_MEM_OBJECT_IMAGE3D: - log_info(" size[%d] = [%ld %ld %ld] (%g MB image)\n", j, + log_info(" size[%d] = [%zu %zu %zu] (%g MB image)\n", j, sizes[j][0], sizes[j][1], sizes[j][2], raw_pixel_size * sizes[j][0] * sizes[j][1] * sizes[j][2] / (1024.0 * 1024.0)); @@ -1124,12 +1126,13 @@ void escape_inf_nan_values(char *data, size_t allocSize) char *generate_random_image_data(image_descriptor *imageInfo, BufferOwningPtr &P, MTdata d) { - size_t allocSize = get_image_size(imageInfo); + size_t allocSize = static_cast(get_image_size(imageInfo)); size_t pixelRowBytes = imageInfo->width * get_pixel_size(imageInfo->format); size_t i; if (imageInfo->num_mip_levels > 1) - allocSize = compute_mipmapped_image_size(*imageInfo); + allocSize = + static_cast(compute_mipmapped_image_size(*imageInfo)); #if defined(__APPLE__) char *data = NULL; @@ -1161,7 +1164,7 @@ char *generate_random_image_data(image_descriptor *imageInfo, if (data == NULL) { - log_error("ERROR: Unable to malloc %lu bytes for " + log_error("ERROR: Unable to malloc %zu bytes for " "generate_random_image_data\n", allocSize); return 0; @@ -1678,24 +1681,26 @@ bool get_integer_coords_offset(float x, float y, float z, float xAddressOffset, // At this point, we're dealing with non-normalized coordinates. - outX = adFn(floorf(x), width); + outX = adFn(static_cast(floorf(x)), width); // 1D and 2D arrays require special care for the index coordinate: switch (imageInfo->type) { case CL_MEM_OBJECT_IMAGE1D_ARRAY: - outY = calculate_array_index(y, (float)imageInfo->arraySize - 1.0f); - outZ = 0.0f; /* don't care! */ + outY = static_cast( + calculate_array_index(y, (float)imageInfo->arraySize - 1.0f)); + outZ = 0; /* don't care! */ break; case CL_MEM_OBJECT_IMAGE2D_ARRAY: - outY = adFn(floorf(y), height); - outZ = calculate_array_index(z, (float)imageInfo->arraySize - 1.0f); + outY = adFn(static_cast(floorf(y)), height); + outZ = static_cast( + calculate_array_index(z, (float)imageInfo->arraySize - 1.0f)); break; default: // legacy path: - if (height != 0) outY = adFn(floorf(y), height); - if (depth != 0) outZ = adFn(floorf(z), depth); + if (height != 0) outY = adFn(static_cast(floorf(y)), height); + if (depth != 0) outZ = adFn(static_cast(floorf(z)), depth); } return !((int)refX == outX && (int)refY == outY && (int)refZ == outZ); @@ -1766,7 +1771,7 @@ static float unnormalize_coordinate(const char *name, float coord, float offset, switch (addressing_mode) { case CL_ADDRESS_REPEAT: - ret = RepeatNormalizedAddressFn(coord, extent); + ret = RepeatNormalizedAddressFn(coord, static_cast(extent)); if (verbose) { @@ -1790,7 +1795,8 @@ static float unnormalize_coordinate(const char *name, float coord, float offset, break; case CL_ADDRESS_MIRRORED_REPEAT: - ret = MirroredRepeatNormalizedAddressFn(coord, extent); + ret = MirroredRepeatNormalizedAddressFn( + coord, static_cast(extent)); if (verbose) { @@ -1968,13 +1974,13 @@ FloatPixel sample_image_pixel_float_offset( // coordinates. Note that the array cases again require special // care, per section 8.4 in the OpenCL 1.2 Specification. - ix = adFn(floorf(x), width_lod); + ix = adFn(static_cast(floorf(x)), width_lod); switch (imageInfo->type) { case CL_MEM_OBJECT_IMAGE1D_ARRAY: - iy = - calculate_array_index(y, (float)(imageInfo->arraySize - 1)); + iy = static_cast(calculate_array_index( + y, (float)(imageInfo->arraySize - 1))); iz = 0; if (verbose) { @@ -1982,18 +1988,18 @@ FloatPixel sample_image_pixel_float_offset( } break; case CL_MEM_OBJECT_IMAGE2D_ARRAY: - iy = adFn(floorf(y), height_lod); - iz = - calculate_array_index(z, (float)(imageInfo->arraySize - 1)); + iy = adFn(static_cast(floorf(y)), height_lod); + iz = static_cast(calculate_array_index( + z, (float)(imageInfo->arraySize - 1))); if (verbose) { log_info("\tArray index %f evaluates to %d\n", z, iz); } break; default: - iy = adFn(floorf(y), height_lod); + iy = adFn(static_cast(floorf(y)), height_lod); if (depth_lod != 0) - iz = adFn(floorf(z), depth_lod); + iz = adFn(static_cast(floorf(z)), depth_lod); else iz = 0; } @@ -2047,16 +2053,16 @@ FloatPixel sample_image_pixel_float_offset( height = 1; } - int x1 = adFn(floorf(x - 0.5f), width); + int x1 = adFn(static_cast(floorf(x - 0.5f)), width); int y1 = 0; - int x2 = adFn(floorf(x - 0.5f) + 1, width); + int x2 = adFn(static_cast(floorf(x - 0.5f) + 1), width); int y2 = 0; if ((imageInfo->type != CL_MEM_OBJECT_IMAGE1D) && (imageInfo->type != CL_MEM_OBJECT_IMAGE1D_ARRAY) && (imageInfo->type != CL_MEM_OBJECT_IMAGE1D_BUFFER)) { - y1 = adFn(floorf(y - 0.5f), height); - y2 = adFn(floorf(y - 0.5f) + 1, height); + y1 = adFn(static_cast(floorf(y - 0.5f)), height); + y2 = adFn(static_cast(floorf(y - 0.5f) + 1), height); } else { @@ -2147,12 +2153,12 @@ FloatPixel sample_image_pixel_float_offset( else { // 3D linear filtering - int x1 = adFn(floorf(x - 0.5f), width_lod); - int y1 = adFn(floorf(y - 0.5f), height_lod); - int z1 = adFn(floorf(z - 0.5f), depth_lod); - int x2 = adFn(floorf(x - 0.5f) + 1, width_lod); - int y2 = adFn(floorf(y - 0.5f) + 1, height_lod); - int z2 = adFn(floorf(z - 0.5f) + 1, depth_lod); + int x1 = adFn(static_cast(floorf(x - 0.5f)), width_lod); + int y1 = adFn(static_cast(floorf(y - 0.5f)), height_lod); + int z1 = adFn(static_cast(floorf(z - 0.5f)), depth_lod); + int x2 = adFn(static_cast(floorf(x - 0.5f) + 1), width_lod); + int y2 = adFn(static_cast(floorf(y - 0.5f) + 1), height_lod); + int z2 = adFn(static_cast(floorf(z - 0.5f) + 1), depth_lod); if (verbose) log_info("\tActual integer coords used (i = floor(x-.5)): " @@ -2899,15 +2905,18 @@ void pack_image_pixel_error(const float *srcVector, case CL_UNSIGNED_INT8: { const cl_uchar *ptr = (const cl_uchar *)results; for (unsigned int i = 0; i < channelCount; i++) - errors[i] = (cl_int)ptr[i] - - (cl_int)CONVERT_UINT(srcVector[i], 255.f, CL_UCHAR_MAX); + errors[i] = static_cast( + (cl_int)ptr[i] + - (cl_int)CONVERT_UINT(srcVector[i], 255.f, CL_UCHAR_MAX)); break; } case CL_UNSIGNED_INT16: { const cl_ushort *ptr = (const cl_ushort *)results; for (unsigned int i = 0; i < channelCount; i++) - errors[i] = (cl_int)ptr[i] - - (cl_int)CONVERT_UINT(srcVector[i], 32767.f, CL_USHRT_MAX); + errors[i] = static_cast( + (cl_int)ptr[i] + - (cl_int)CONVERT_UINT(srcVector[i], 32767.f, + CL_USHRT_MAX)); break; } case CL_UNSIGNED_INT32: { @@ -3228,7 +3237,7 @@ char *create_random_image_data(ExplicitType dataType, if (data == NULL) { log_error( - "ERROR: Unable to malloc %lu bytes for create_random_image_data\n", + "ERROR: Unable to malloc %zu bytes for create_random_image_data\n", allocSize); return NULL; } @@ -3988,7 +3997,8 @@ bool is_image_format_required(cl_image_format format, cl_mem_flags flags, cl_uint compute_max_mip_levels(size_t width, size_t height, size_t depth) { - cl_uint retMaxMipLevels = 0, max_dim = 0; + cl_uint retMaxMipLevels = 0; + size_t max_dim = 0; max_dim = width; max_dim = height > max_dim ? height : max_dim; diff --git a/test_common/harness/kernelHelpers.cpp b/test_common/harness/kernelHelpers.cpp index 95b9555e..aaf0d689 100644 --- a/test_common/harness/kernelHelpers.cpp +++ b/test_common/harness/kernelHelpers.cpp @@ -579,7 +579,7 @@ static int create_single_kernel_helper_create_program_offline( if (error != CL_SUCCESS) return error; ifs.seekg(0, ifs.end); - int length = ifs.tellg(); + size_t length = static_cast(ifs.tellg()); ifs.seekg(0, ifs.beg); // treat modifiedProgram as input for clCreateProgramWithBinary @@ -1226,7 +1226,7 @@ int is_image_format_supported(cl_context context, cl_mem_flags flags, list = (cl_image_format *)malloc(count * sizeof(cl_image_format)); if (NULL == list) { - log_error("Error: unable to allocate %ld byte buffer for image format " + log_error("Error: unable to allocate %zu byte buffer for image format " "list at %s:%d (err = %d)\n", count * sizeof(cl_image_format), __FILE__, __LINE__, err); return 0; diff --git a/test_common/harness/os_helpers.cpp b/test_common/harness/os_helpers.cpp index cd350cf8..daf21958 100644 --- a/test_common/harness/os_helpers.cpp +++ b/test_common/harness/os_helpers.cpp @@ -404,7 +404,8 @@ std::string exe_path() for (;;) { - DWORD len = GetModuleFileNameA(NULL, &path.front(), path.size()); + DWORD len = GetModuleFileNameA(NULL, &path.front(), + static_cast(path.size())); if (len == 0) { diff --git a/test_common/harness/propertyHelpers.cpp b/test_common/harness/propertyHelpers.cpp index 3157ca80..e368f9b6 100644 --- a/test_common/harness/propertyHelpers.cpp +++ b/test_common/harness/propertyHelpers.cpp @@ -97,15 +97,16 @@ int compareProperties(const std::vector& queried, if (!found) { - log_error("ERROR: expected property 0x%x not found!\n", + log_error("ERROR: expected property 0x%llx not found!\n", check_prop); return TEST_FAIL; } else if (check_value != queried_value) { - log_error("ERROR: mis-matched value for property 0x%x: wanted " - "0x%x, got 0x%x\n", - check_prop, check_value, queried_value); + log_error( + "ERROR: mis-matched value for property 0x%llx: wanted " + "0x%llx, got 0x%llx\n", + check_prop, check_value, queried_value); return TEST_FAIL; } } @@ -113,7 +114,7 @@ int compareProperties(const std::vector& queried, if (queried.size() > check.size()) { log_error("ERROR: all properties found but there are extra " - "properties: expected %d, got %d.\n", + "properties: expected %zu, got %zu.\n", check.size(), queried.size()); return TEST_FAIL; } diff --git a/test_common/harness/rounding_mode.cpp b/test_common/harness/rounding_mode.cpp index 681ccdd8..1f531478 100644 --- a/test_common/harness/rounding_mode.cpp +++ b/test_common/harness/rounding_mode.cpp @@ -48,7 +48,7 @@ RoundingMode set_round(RoundingMode r, Type outType) const int *p = int_rounds; if (outType == kfloat || outType == kdouble) p = flt_rounds; - int fpscr = 0; + int64_t fpscr = 0; RoundingMode oldRound = get_round(); _FPU_GETCW(fpscr); @@ -59,7 +59,7 @@ RoundingMode set_round(RoundingMode r, Type outType) RoundingMode get_round(void) { - int fpscr; + int64_t fpscr; int oldRound; _FPU_GETCW(fpscr); @@ -203,13 +203,13 @@ void *FlushToZero(void) #if defined(__APPLE__) || defined(__linux__) || defined(_WIN32) #if defined(__i386__) || defined(__x86_64__) || defined(_MSC_VER) union { - int i; + unsigned int i; void *p; } u = { _mm_getcsr() }; _mm_setcsr(u.i | 0x8040); return u.p; #elif defined(__arm__) || defined(__aarch64__) - int fpscr; + int64_t fpscr; _FPU_GETCW(fpscr); _FPU_SETCW(fpscr | FPSCR_FZ); return NULL; @@ -239,7 +239,7 @@ void UnFlushToZero(void *p) } u = { p }; _mm_setcsr(u.i); #elif defined(__arm__) || defined(__aarch64__) - int fpscr; + int64_t fpscr; _FPU_GETCW(fpscr); _FPU_SETCW(fpscr & ~FPSCR_FZ); #elif defined(__PPC__) diff --git a/test_common/harness/rounding_mode.h b/test_common/harness/rounding_mode.h index 064a3a63..6f52f0a0 100644 --- a/test_common/harness/rounding_mode.h +++ b/test_common/harness/rounding_mode.h @@ -16,8 +16,6 @@ #ifndef __ROUNDING_MODE_H__ #define __ROUNDING_MODE_H__ -#pragma STDC FENV_ACCESS ON - #include "compat.h" #if (defined(_WIN32) && defined(_MSC_VER)) diff --git a/test_common/harness/threadTesting.cpp b/test_common/harness/threadTesting.cpp index 875ee59b..e69de29b 100644 --- a/test_common/harness/threadTesting.cpp +++ b/test_common/harness/threadTesting.cpp @@ -1,98 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#include "compat.h" -#include "threadTesting.h" -#include "errorHelpers.h" -#include -#include - -#if !defined(_WIN32) -#include -#endif - -#if 0 // Disabed for now - -typedef struct -{ - basefn mFunction; - cl_device_id mDevice; - cl_context mContext; - int mNumElements; -} TestFnArgs; - -//////////////////////////////////////////////////////////////////////////////// -// Thread-based testing. Spawns a new thread to run the given test function, -// then waits for it to complete. The entire idea is that, if the thread crashes, -// we can catch it and report it as a failure instead of crashing the entire suite -//////////////////////////////////////////////////////////////////////////////// - -void *test_thread_wrapper( void *data ) -{ - TestFnArgs *args; - int retVal; - cl_context context; - - args = (TestFnArgs *)data; - - /* Create a new context to use (contexts can't cross threads) */ - context = clCreateContext(NULL, args->mDeviceGroup); - if( context == NULL ) - { - log_error("clCreateContext failed for new thread\n"); - return (void *)(-1); - } - - /* Call function */ - retVal = args->mFunction( args->mDeviceGroup, args->mDevice, context, args->mNumElements ); - - clReleaseContext( context ); - - return (void *)retVal; -} - -int test_threaded_function( basefn fnToTest, cl_device_id device, cl_context context, cl_command_queue queue, int numElements ) -{ - int error; - pthread_t threadHdl; - void *retVal; - TestFnArgs args; - - - args.mFunction = fnToTest; - args.mDeviceGroup = deviceGroup; - args.mDevice = device; - args.mContext = context; - args.mNumElements = numElements; - - - error = pthread_create( &threadHdl, NULL, test_thread_wrapper, (void *)&args ); - if( error != 0 ) - { - log_error( "ERROR: Unable to create thread for testing!\n" ); - return -1; - } - - /* Thread has been started, now just wait for it to complete (or crash) */ - error = pthread_join( threadHdl, &retVal ); - if( error != 0 ) - { - log_error( "ERROR: Unable to join testing thread!\n" ); - return -1; - } - - return (int)((intptr_t)retVal); -} -#endif diff --git a/test_common/harness/threadTesting.h b/test_common/harness/threadTesting.h index 765eabcc..2f3c1873 100644 --- a/test_common/harness/threadTesting.h +++ b/test_common/harness/threadTesting.h @@ -24,8 +24,5 @@ typedef int (*basefn)(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); -extern int test_threaded_function(basefn fnToTest, cl_device_id device, - cl_context context, cl_command_queue queue, - int numElements); -#endif // _threadTesting_h +#endif // _threadTesting_h \ No newline at end of file diff --git a/test_conformance/gles/CMakeLists.txt b/test_conformance/gles/CMakeLists.txt index c76fe512..4f4ba532 100644 --- a/test_conformance/gles/CMakeLists.txt +++ b/test_conformance/gles/CMakeLists.txt @@ -18,3 +18,11 @@ set (${MODULE_NAME}_SOURCES list(APPEND CLConform_LIBRARIES EGL GLESv2) include(../CMakeCommon.txt) + +if(DEFINED USE_GLES3) + target_compile_definitions(${${MODULE_NAME}_OUT} PRIVATE GLES3) +endif() +if(MSVC) + # Don't warn about using the portable "strdup" function. + target_compile_definitions(${${MODULE_NAME}_OUT} PRIVATE _CRT_NONSTDC_NO_DEPRECATE) +endif() \ No newline at end of file diff --git a/test_conformance/gles/setup_egl.cpp b/test_conformance/gles/setup_egl.cpp index fe0f8ca3..95a12a66 100644 --- a/test_conformance/gles/setup_egl.cpp +++ b/test_conformance/gles/setup_egl.cpp @@ -117,7 +117,8 @@ public: _platform, "clGetGLContextInfoKHR"); if (GetGLContextInfo == NULL) { - print_error(status, "clGetGLContextInfoKHR failed"); + log_error("ERROR: clGetGLContextInfoKHR failed! (%s:%d)\n", + __FILE__, __LINE__); return NULL; } @@ -128,7 +129,7 @@ public: return NULL; } dev_size /= sizeof(cl_device_id); - log_info("GL _context supports %d compute devices\n", dev_size); + log_info("GL _context supports %zu compute devices\n", dev_size); status = GetGLContextInfo(properties, CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR, diff --git a/test_conformance/gles/test_fence_sync.cpp b/test_conformance/gles/test_fence_sync.cpp index 0af91a46..968d9695 100644 --- a/test_conformance/gles/test_fence_sync.cpp +++ b/test_conformance/gles/test_fence_sync.cpp @@ -570,10 +570,12 @@ int test_fence_sync_single( cl_device_id device, cl_context context, cl_command_ { if( p[ t ] == 0 ) { - log_error( "RUN: %ld to %ld (%d,%d to %d,%d) 0x%08x\n", a, t - 1, - (int)( a % framebufferSize ), (int)( a / framebufferSize ), - (int)( ( t - 1 ) % framebufferSize ), (int)( ( t - 1 ) / framebufferSize ), - p[ a ] ); + log_error( + "RUN: %zu to %zu (%d,%d to %d,%d) 0x%08x\n", + a, t - 1, (int)(a % framebufferSize), + (int)(a / framebufferSize), + (int)((t - 1) % framebufferSize), + (int)((t - 1) / framebufferSize), p[a]); a = t; } } diff --git a/test_conformance/gles/test_images_2D.cpp b/test_conformance/gles/test_images_2D.cpp index c1a17fc8..f6554023 100644 --- a/test_conformance/gles/test_images_2D.cpp +++ b/test_conformance/gles/test_images_2D.cpp @@ -369,7 +369,9 @@ int test_images_read_cube( cl_device_id device, cl_context context, cl_command_q } +#ifdef __APPLE__ #pragma mark -------------------- Write tests ------------------------- +#endif int test_cl_image_write( cl_context context, cl_command_queue queue, cl_mem clImage, diff --git a/test_conformance/gles/test_renderbuffer.cpp b/test_conformance/gles/test_renderbuffer.cpp index 20127aca..0f6d289b 100644 --- a/test_conformance/gles/test_renderbuffer.cpp +++ b/test_conformance/gles/test_renderbuffer.cpp @@ -197,7 +197,9 @@ int test_renderbuffer_read( cl_device_id device, cl_context context, cl_command_ } +#ifdef __APPLE__ #pragma mark -------------------- Write tests ------------------------- +#endif int test_attach_renderbuffer_write_to_image( cl_context context, cl_command_queue queue, GLenum glTarget, GLuint glRenderbuffer, size_t imageWidth, size_t imageHeight, cl_image_format *outFormat, ExplicitType *outType, MTdata d, void **outSourceBuffer ) -- cgit v1.2.3 From de49d59c8dfad1171d7dd7c0df929ae3a68aea1a Mon Sep 17 00:00:00 2001 From: Pierre Moreau Date: Tue, 18 May 2021 19:12:55 +0200 Subject: Allocations fixes (#1245) * allocations: Run buffer non-blocking even without images Testing buffer non-blocking should not be dependent on whether images are supported by a device or not. * allocations: Fix typos --- test_conformance/allocations/allocation_fill.cpp | 6 ++++-- test_conformance/allocations/allocation_functions.cpp | 4 ++-- test_conformance/allocations/main.cpp | 6 ++++-- 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/test_conformance/allocations/allocation_fill.cpp b/test_conformance/allocations/allocation_fill.cpp index a7558942..b4ea3798 100644 --- a/test_conformance/allocations/allocation_fill.cpp +++ b/test_conformance/allocations/allocation_fill.cpp @@ -200,8 +200,10 @@ int fill_image_with_data(cl_context context, cl_device_id device_id, cl_command_ result = clFinish(*queue); if (result != SUCCEEDED) { - print_error(error, "clFinish failed after successful enquing filling buffer with data."); - return result; + print_error(error, + "clFinish failed after successful enqueuing filling " + "buffer with data."); + return result; } } else { error = clEnqueueWriteImage(*queue, mem, CL_FALSE, origin, region, 0, 0, data, 0, NULL, &event); diff --git a/test_conformance/allocations/allocation_functions.cpp b/test_conformance/allocations/allocation_functions.cpp index 7182c727..827ee104 100644 --- a/test_conformance/allocations/allocation_functions.cpp +++ b/test_conformance/allocations/allocation_functions.cpp @@ -37,8 +37,8 @@ int find_good_image_size(cl_device_id device_id, size_t size_to_allocate, size_t } if (size_to_allocate == 0) { - log_error("Trying to allcoate a zero sized image.\n"); - return FAILED_ABORT; + log_error("Trying to allocate a zero sized image.\n"); + return FAILED_ABORT; } error = clGetDeviceInfo( device_id, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof( max_width ), &max_width, NULL ); diff --git a/test_conformance/allocations/main.cpp b/test_conformance/allocations/main.cpp index 0dec4c6d..43e81277 100644 --- a/test_conformance/allocations/main.cpp +++ b/test_conformance/allocations/main.cpp @@ -112,6 +112,8 @@ int doTest( cl_device_id device, cl_context context, cl_command_queue queue, All int number_of_mems_used; cl_ulong max_individual_allocation_size = g_max_individual_allocation_size; cl_ulong global_mem_size = g_global_mem_size ; + const bool allocate_image = + (alloc_type != BUFFER) && (alloc_type != BUFFER_NON_BLOCKING); static const char* alloc_description[] = { "buffer(s)", @@ -123,7 +125,7 @@ int doTest( cl_device_id device, cl_context context, cl_command_queue queue, All }; // Skip image tests if we don't support images on the device - if( alloc_type > BUFFER && checkForImageSupport( device ) ) + if (allocate_image && checkForImageSupport(device)) { log_info( "Can not test image allocation because device does not support images.\n" ); return 0; @@ -132,7 +134,7 @@ int doTest( cl_device_id device, cl_context context, cl_command_queue queue, All // This section was added in order to fix a bug in the test // If CL_DEVICE_MAX_MEM_ALLOC_SIZE is much grater than CL_DEVICE_IMAGE2D_MAX_WIDTH * CL_DEVICE_IMAGE2D_MAX_HEIGHT // The test will fail in image allocations as the size requested for the allocation will be much grater than the maximum size allowed for image - if( ( alloc_type != BUFFER ) && ( alloc_type != BUFFER_NON_BLOCKING ) ) + if (allocate_image) { size_t max_width, max_height; -- cgit v1.2.3 From 01aa55029d49a7c788e9edd97ff686816ff84267 Mon Sep 17 00:00:00 2001 From: Marco Antognini Date: Fri, 21 May 2021 10:06:13 +0100 Subject: Update warning options (#1252) Remove workaround for #783, this was fixed by #1237. Remove workaround for overflow, #699 has been merged. Disable errors from -Wimplicit-const-int-float-conversion, the issue is covered by #1250. Signed-off-by: Marco Antognini --- CMakeLists.txt | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 5b1f48fd..8d947ed1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -106,10 +106,9 @@ if(CMAKE_COMPILER_IS_GNUCC OR "${CMAKE_CXX_COMPILER_ID}" MATCHES "(Apple)?Clang" add_cxx_flag_if_supported(-Wno-format) add_cxx_flag_if_supported(-Werror) add_cxx_flag_if_supported(-Wno-error=cpp) # Allow #warning directive - add_cxx_flag_if_supported(-Wno-error=absolute-value) # Issue 783 add_cxx_flag_if_supported(-Wno-error=unknown-pragmas) # Issue #785 add_cxx_flag_if_supported(-Wno-error=asm-operand-widths) # Issue #784 - add_cxx_flag_if_supported(-Wno-error=overflow) # Fixed by #699 + add_cxx_flag_if_supported(-Wno-error=implicit-const-int-float-conversion) # Issue #1250 # -msse -mfpmath=sse to force gcc to use sse for float math, # avoiding excess precision problems that cause tests like int2float -- cgit v1.2.3 From ce1687a408686d38e2629a4426ef7c38e10f0e23 Mon Sep 17 00:00:00 2001 From: James Price Date: Fri, 21 May 2021 05:07:12 -0400 Subject: Add missing cstdint include (#1259) --- test_common/harness/fpcontrol.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/test_common/harness/fpcontrol.h b/test_common/harness/fpcontrol.h index 2add9baf..222aa2c4 100644 --- a/test_common/harness/fpcontrol.h +++ b/test_common/harness/fpcontrol.h @@ -16,6 +16,8 @@ #ifndef _fpcontrol_h #define _fpcontrol_h +#include + // In order to get tests for correctly rounded operations (e.g. multiply) to // work properly we need to be able to set the reference hardware to FTZ mode if // the device hardware is running in that mode. We have explored all other -- cgit v1.2.3 From ba9312e4a2e2431a716150a3df3491834076d046 Mon Sep 17 00:00:00 2001 From: Marco Antognini Date: Fri, 21 May 2021 10:07:54 +0100 Subject: Fix ODR violations in math_brute_force (#1255) A program having a type (such as ThreadInfo) defined differently in multiple translation units exhibits undefined behaviour. This commit fixes such issues in the math_brute_force component by ensuring most types are local to their translation unit with the help of anonymous namespaces. Later refactoring will be able to extract common definitions to a single place. This patch also removes unnecessary static and typedef keywords. Otherwise, code is only moved around with no change. Signed-off-by: Marco Antognini --- .../math_brute_force/binary_double.cpp | 396 ++++++++++---------- test_conformance/math_brute_force/binary_float.cpp | 398 ++++++++++---------- .../math_brute_force/binary_i_double.cpp | 399 ++++++++++---------- .../math_brute_force/binary_i_float.cpp | 401 ++++++++++---------- .../math_brute_force/binary_operator_double.cpp | 392 ++++++++++---------- .../math_brute_force/binary_operator_float.cpp | 396 ++++++++++---------- .../binary_two_results_i_double.cpp | 20 +- .../binary_two_results_i_float.cpp | 20 +- .../math_brute_force/function_list.cpp | 1 + .../math_brute_force/i_unary_double.cpp | 14 +- .../math_brute_force/i_unary_float.cpp | 14 +- .../math_brute_force/macro_binary_double.cpp | 355 +++++++++--------- .../math_brute_force/macro_binary_float.cpp | 357 +++++++++--------- .../math_brute_force/macro_unary_double.cpp | 323 ++++++++-------- .../math_brute_force/macro_unary_float.cpp | 325 ++++++++--------- test_conformance/math_brute_force/mad_double.cpp | 14 +- test_conformance/math_brute_force/mad_float.cpp | 14 +- .../math_brute_force/ternary_double.cpp | 18 +- .../math_brute_force/ternary_float.cpp | 18 +- test_conformance/math_brute_force/unary_double.cpp | 352 +++++++++--------- test_conformance/math_brute_force/unary_float.cpp | 404 +++++++++++---------- .../math_brute_force/unary_two_results_double.cpp | 14 +- .../math_brute_force/unary_two_results_float.cpp | 14 +- .../unary_two_results_i_double.cpp | 16 +- .../math_brute_force/unary_two_results_i_float.cpp | 16 +- .../math_brute_force/unary_u_double.cpp | 16 +- .../math_brute_force/unary_u_float.cpp | 14 +- 27 files changed, 2400 insertions(+), 2321 deletions(-) diff --git a/test_conformance/math_brute_force/binary_double.cpp b/test_conformance/math_brute_force/binary_double.cpp index 4baa4991..9c6b59b4 100644 --- a/test_conformance/math_brute_force/binary_double.cpp +++ b/test_conformance/math_brute_force/binary_double.cpp @@ -20,10 +20,12 @@ #include +namespace { + const double twoToMinus1022 = MAKE_HEX_DOUBLE(0x1p-1022, 1, -1022); -static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count, - cl_kernel *k, cl_program *p, bool relaxedMode) +int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count, + cl_kernel *k, cl_program *p, bool relaxedMode) { const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n", "__kernel void math_kernel", @@ -109,7 +111,7 @@ static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count, relaxedMode); } -typedef struct BuildKernelInfo +struct BuildKernelInfo { cl_uint offset; // the first vector size to build cl_uint kernel_count; @@ -117,9 +119,9 @@ typedef struct BuildKernelInfo cl_program *programs; const char *nameInCode; bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. -} BuildKernelInfo; +}; -static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) +cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) { BuildKernelInfo *info = (BuildKernelInfo *)p; cl_uint i = info->offset + job_id; @@ -128,7 +130,7 @@ static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) } // Thread specific data for a worker thread -typedef struct ThreadInfo +struct ThreadInfo { cl_mem inBuf; // input buffer for the thread cl_mem inBuf2; // input buffer for the thread @@ -140,9 +142,9 @@ typedef struct ThreadInfo // to 0. MTdata d; cl_command_queue tQueue; // per thread command queue to improve performance -} ThreadInfo; +}; -typedef struct TestInfo +struct TestInfo { size_t subBufferSize; // Size of the sub-buffer in elements const Func *f; // A pointer to the function info @@ -164,10 +166,10 @@ typedef struct TestInfo int isNextafter; bool relaxedMode; // True if test is running in relaxed mode, false // otherwise. -} TestInfo; +}; // A table of more difficult cases to get right -static const double specialValues[] = { +const double specialValues[] = { -NAN, -INFINITY, -DBL_MAX, @@ -277,195 +279,10 @@ static const double specialValues[] = { +0.0, }; -static size_t specialValuesCount = +constexpr size_t specialValuesCount = sizeof(specialValues) / sizeof(specialValues[0]); -static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data); - -int TestFunc_Double_Double_Double(const Func *f, MTdata d, bool relaxedMode) -{ - TestInfo test_info; - cl_int error; - float maxError = 0.0f; - double maxErrorVal = 0.0; - double maxErrorVal2 = 0.0; - - logFunctionInfo(f->name, sizeof(cl_double), relaxedMode); - - // Init test_info - memset(&test_info, 0, sizeof(test_info)); - test_info.threadCount = GetThreadCount(); - test_info.subBufferSize = BUFFER_SIZE - / (sizeof(cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount)); - test_info.scale = getTestScale(sizeof(cl_double)); - - test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale; - if (test_info.step / test_info.subBufferSize != test_info.scale) - { - // there was overflow - test_info.jobCount = 1; - } - else - { - test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step); - } - - test_info.f = f; - test_info.ulps = f->double_ulps; - test_info.ftz = f->ftz || gForceFTZ; - - test_info.isFDim = 0 == strcmp("fdim", f->nameInCode); - test_info.skipNanInf = 0; - test_info.isNextafter = 0 == strcmp("nextafter", f->nameInCode); - - // cl_kernels aren't thread safe, so we make one for each vector size for - // every thread - for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) - { - size_t array_size = test_info.threadCount * sizeof(cl_kernel); - test_info.k[i] = (cl_kernel *)malloc(array_size); - if (NULL == test_info.k[i]) - { - vlog_error("Error: Unable to allocate storage for kernels!\n"); - error = CL_OUT_OF_HOST_MEMORY; - goto exit; - } - memset(test_info.k[i], 0, array_size); - } - test_info.tinfo = - (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo)); - if (NULL == test_info.tinfo) - { - vlog_error( - "Error: Unable to allocate storage for thread specific data.\n"); - error = CL_OUT_OF_HOST_MEMORY; - goto exit; - } - memset(test_info.tinfo, 0, - test_info.threadCount * sizeof(*test_info.tinfo)); - for (cl_uint i = 0; i < test_info.threadCount; i++) - { - cl_buffer_region region = { - i * test_info.subBufferSize * sizeof(cl_double), - test_info.subBufferSize * sizeof(cl_double) - }; - test_info.tinfo[i].inBuf = - clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY, - CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error); - if (error || NULL == test_info.tinfo[i].inBuf) - { - vlog_error("Error: Unable to create sub-buffer of gInBuffer for " - "region {%zd, %zd}\n", - region.origin, region.size); - goto exit; - } - test_info.tinfo[i].inBuf2 = - clCreateSubBuffer(gInBuffer2, CL_MEM_READ_ONLY, - CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error); - if (error || NULL == test_info.tinfo[i].inBuf2) - { - vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for " - "region {%zd, %zd}\n", - region.origin, region.size); - goto exit; - } - - for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) - { - test_info.tinfo[i].outBuf[j] = clCreateSubBuffer( - gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION, - ®ion, &error); - if (error || NULL == test_info.tinfo[i].outBuf[j]) - { - vlog_error("Error: Unable to create sub-buffer of " - "gOutBuffer[%d] for region {%zd, %zd}\n", - (int)j, region.origin, region.size); - goto exit; - } - } - test_info.tinfo[i].tQueue = - clCreateCommandQueue(gContext, gDevice, 0, &error); - if (NULL == test_info.tinfo[i].tQueue || error) - { - vlog_error("clCreateCommandQueue failed. (%d)\n", error); - goto exit; - } - - test_info.tinfo[i].d = init_genrand(genrand_int32(d)); - } - - // Init the kernels - { - BuildKernelInfo build_info = { - gMinVectorSizeIndex, test_info.threadCount, test_info.k, - test_info.programs, f->nameInCode, relaxedMode - }; - if ((error = ThreadPool_Do(BuildKernelFn, - gMaxVectorSizeIndex - gMinVectorSizeIndex, - &build_info))) - goto exit; - } - - // Run the kernels - if (!gSkipCorrectnessTesting) - { - error = ThreadPool_Do(Test, test_info.jobCount, &test_info); - - // Accumulate the arithmetic errors - for (cl_uint i = 0; i < test_info.threadCount; i++) - { - if (test_info.tinfo[i].maxError > maxError) - { - maxError = test_info.tinfo[i].maxError; - maxErrorVal = test_info.tinfo[i].maxErrorValue; - maxErrorVal2 = test_info.tinfo[i].maxErrorValue2; - } - } - - if (error) goto exit; - - if (gWimpyMode) - vlog("Wimp pass"); - else - vlog("passed"); - - vlog("\t%8.2f @ {%a, %a}", maxError, maxErrorVal, maxErrorVal2); - } - - vlog("\n"); - -exit: - // Release - for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) - { - clReleaseProgram(test_info.programs[i]); - if (test_info.k[i]) - { - for (cl_uint j = 0; j < test_info.threadCount; j++) - clReleaseKernel(test_info.k[i][j]); - - free(test_info.k[i]); - } - } - if (test_info.tinfo) - { - for (cl_uint i = 0; i < test_info.threadCount; i++) - { - free_mtdata(test_info.tinfo[i].d); - clReleaseMemObject(test_info.tinfo[i].inBuf); - clReleaseMemObject(test_info.tinfo[i].inBuf2); - for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) - clReleaseMemObject(test_info.tinfo[i].outBuf[j]); - clReleaseCommandQueue(test_info.tinfo[i].tQueue); - } - - free(test_info.tinfo); - } - - return error; -} - -static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) +cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) { const TestInfo *job = (const TestInfo *)data; size_t buffer_elements = job->subBufferSize; @@ -825,3 +642,188 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) exit: return error; } + +} // anonymous namespace + +int TestFunc_Double_Double_Double(const Func *f, MTdata d, bool relaxedMode) +{ + TestInfo test_info; + cl_int error; + float maxError = 0.0f; + double maxErrorVal = 0.0; + double maxErrorVal2 = 0.0; + + logFunctionInfo(f->name, sizeof(cl_double), relaxedMode); + + // Init test_info + memset(&test_info, 0, sizeof(test_info)); + test_info.threadCount = GetThreadCount(); + test_info.subBufferSize = BUFFER_SIZE + / (sizeof(cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount)); + test_info.scale = getTestScale(sizeof(cl_double)); + + test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale; + if (test_info.step / test_info.subBufferSize != test_info.scale) + { + // there was overflow + test_info.jobCount = 1; + } + else + { + test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step); + } + + test_info.f = f; + test_info.ulps = f->double_ulps; + test_info.ftz = f->ftz || gForceFTZ; + + test_info.isFDim = 0 == strcmp("fdim", f->nameInCode); + test_info.skipNanInf = 0; + test_info.isNextafter = 0 == strcmp("nextafter", f->nameInCode); + + // cl_kernels aren't thread safe, so we make one for each vector size for + // every thread + for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) + { + size_t array_size = test_info.threadCount * sizeof(cl_kernel); + test_info.k[i] = (cl_kernel *)malloc(array_size); + if (NULL == test_info.k[i]) + { + vlog_error("Error: Unable to allocate storage for kernels!\n"); + error = CL_OUT_OF_HOST_MEMORY; + goto exit; + } + memset(test_info.k[i], 0, array_size); + } + test_info.tinfo = + (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo)); + if (NULL == test_info.tinfo) + { + vlog_error( + "Error: Unable to allocate storage for thread specific data.\n"); + error = CL_OUT_OF_HOST_MEMORY; + goto exit; + } + memset(test_info.tinfo, 0, + test_info.threadCount * sizeof(*test_info.tinfo)); + for (cl_uint i = 0; i < test_info.threadCount; i++) + { + cl_buffer_region region = { + i * test_info.subBufferSize * sizeof(cl_double), + test_info.subBufferSize * sizeof(cl_double) + }; + test_info.tinfo[i].inBuf = + clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY, + CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error); + if (error || NULL == test_info.tinfo[i].inBuf) + { + vlog_error("Error: Unable to create sub-buffer of gInBuffer for " + "region {%zd, %zd}\n", + region.origin, region.size); + goto exit; + } + test_info.tinfo[i].inBuf2 = + clCreateSubBuffer(gInBuffer2, CL_MEM_READ_ONLY, + CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error); + if (error || NULL == test_info.tinfo[i].inBuf2) + { + vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for " + "region {%zd, %zd}\n", + region.origin, region.size); + goto exit; + } + + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + { + test_info.tinfo[i].outBuf[j] = clCreateSubBuffer( + gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION, + ®ion, &error); + if (error || NULL == test_info.tinfo[i].outBuf[j]) + { + vlog_error("Error: Unable to create sub-buffer of " + "gOutBuffer[%d] for region {%zd, %zd}\n", + (int)j, region.origin, region.size); + goto exit; + } + } + test_info.tinfo[i].tQueue = + clCreateCommandQueue(gContext, gDevice, 0, &error); + if (NULL == test_info.tinfo[i].tQueue || error) + { + vlog_error("clCreateCommandQueue failed. (%d)\n", error); + goto exit; + } + + test_info.tinfo[i].d = init_genrand(genrand_int32(d)); + } + + // Init the kernels + { + BuildKernelInfo build_info = { + gMinVectorSizeIndex, test_info.threadCount, test_info.k, + test_info.programs, f->nameInCode, relaxedMode + }; + if ((error = ThreadPool_Do(BuildKernelFn, + gMaxVectorSizeIndex - gMinVectorSizeIndex, + &build_info))) + goto exit; + } + + // Run the kernels + if (!gSkipCorrectnessTesting) + { + error = ThreadPool_Do(Test, test_info.jobCount, &test_info); + + // Accumulate the arithmetic errors + for (cl_uint i = 0; i < test_info.threadCount; i++) + { + if (test_info.tinfo[i].maxError > maxError) + { + maxError = test_info.tinfo[i].maxError; + maxErrorVal = test_info.tinfo[i].maxErrorValue; + maxErrorVal2 = test_info.tinfo[i].maxErrorValue2; + } + } + + if (error) goto exit; + + if (gWimpyMode) + vlog("Wimp pass"); + else + vlog("passed"); + + vlog("\t%8.2f @ {%a, %a}", maxError, maxErrorVal, maxErrorVal2); + } + + vlog("\n"); + +exit: + // Release + for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) + { + clReleaseProgram(test_info.programs[i]); + if (test_info.k[i]) + { + for (cl_uint j = 0; j < test_info.threadCount; j++) + clReleaseKernel(test_info.k[i][j]); + + free(test_info.k[i]); + } + } + if (test_info.tinfo) + { + for (cl_uint i = 0; i < test_info.threadCount; i++) + { + free_mtdata(test_info.tinfo[i].d); + clReleaseMemObject(test_info.tinfo[i].inBuf); + clReleaseMemObject(test_info.tinfo[i].inBuf2); + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + clReleaseMemObject(test_info.tinfo[i].outBuf[j]); + clReleaseCommandQueue(test_info.tinfo[i].tQueue); + } + + free(test_info.tinfo); + } + + return error; +} diff --git a/test_conformance/math_brute_force/binary_float.cpp b/test_conformance/math_brute_force/binary_float.cpp index 32caafa3..9c7081dc 100644 --- a/test_conformance/math_brute_force/binary_float.cpp +++ b/test_conformance/math_brute_force/binary_float.cpp @@ -20,10 +20,12 @@ #include +namespace { + const float twoToMinus126 = MAKE_HEX_FLOAT(0x1p-126f, 1, -126); -static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count, - cl_kernel *k, cl_program *p, bool relaxedMode) +int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count, + cl_kernel *k, cl_program *p, bool relaxedMode) { const char *c[] = { "__kernel void math_kernel", sizeNames[vectorSize], @@ -107,7 +109,7 @@ static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count, relaxedMode); } -typedef struct BuildKernelInfo +struct BuildKernelInfo { cl_uint offset; // the first vector size to build cl_uint kernel_count; @@ -115,9 +117,9 @@ typedef struct BuildKernelInfo cl_program *programs; const char *nameInCode; bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. -} BuildKernelInfo; +}; -static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) +cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) { BuildKernelInfo *info = (BuildKernelInfo *)p; cl_uint i = info->offset + job_id; @@ -126,7 +128,7 @@ static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) } // Thread specific data for a worker thread -typedef struct ThreadInfo +struct ThreadInfo { cl_mem inBuf; // input buffer for the thread cl_mem inBuf2; // input buffer for the thread @@ -138,9 +140,9 @@ typedef struct ThreadInfo // to 0. MTdata d; cl_command_queue tQueue; // per thread command queue to improve performance -} ThreadInfo; +}; -typedef struct TestInfo +struct TestInfo { size_t subBufferSize; // Size of the sub-buffer in elements const Func *f; // A pointer to the function info @@ -162,10 +164,10 @@ typedef struct TestInfo int isNextafter; bool relaxedMode; // True if test is running in relaxed mode, false // otherwise. -} TestInfo; +}; // A table of more difficult cases to get right -static const float specialValues[] = { +const float specialValues[] = { -NAN, -INFINITY, -FLT_MAX, @@ -267,196 +269,10 @@ static const float specialValues[] = { +0.0f, }; -static const size_t specialValuesCount = +constexpr size_t specialValuesCount = sizeof(specialValues) / sizeof(specialValues[0]); -static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data); - -int TestFunc_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode) -{ - TestInfo test_info; - cl_int error; - float maxError = 0.0f; - double maxErrorVal = 0.0; - double maxErrorVal2 = 0.0; - - logFunctionInfo(f->name, sizeof(cl_float), relaxedMode); - - // Init test_info - memset(&test_info, 0, sizeof(test_info)); - test_info.threadCount = GetThreadCount(); - test_info.subBufferSize = BUFFER_SIZE - / (sizeof(cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount)); - test_info.scale = getTestScale(sizeof(cl_float)); - - test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale; - if (test_info.step / test_info.subBufferSize != test_info.scale) - { - // there was overflow - test_info.jobCount = 1; - } - else - { - test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step); - } - - test_info.f = f; - test_info.ulps = gIsEmbedded ? f->float_embedded_ulps : f->float_ulps; - test_info.ftz = - f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities); - test_info.relaxedMode = relaxedMode; - test_info.isFDim = 0 == strcmp("fdim", f->nameInCode); - test_info.skipNanInf = test_info.isFDim && !gInfNanSupport; - test_info.isNextafter = 0 == strcmp("nextafter", f->nameInCode); - - // cl_kernels aren't thread safe, so we make one for each vector size for - // every thread - for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) - { - size_t array_size = test_info.threadCount * sizeof(cl_kernel); - test_info.k[i] = (cl_kernel *)malloc(array_size); - if (NULL == test_info.k[i]) - { - vlog_error("Error: Unable to allocate storage for kernels!\n"); - error = CL_OUT_OF_HOST_MEMORY; - goto exit; - } - memset(test_info.k[i], 0, array_size); - } - test_info.tinfo = - (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo)); - if (NULL == test_info.tinfo) - { - vlog_error( - "Error: Unable to allocate storage for thread specific data.\n"); - error = CL_OUT_OF_HOST_MEMORY; - goto exit; - } - memset(test_info.tinfo, 0, - test_info.threadCount * sizeof(*test_info.tinfo)); - for (cl_uint i = 0; i < test_info.threadCount; i++) - { - cl_buffer_region region = { - i * test_info.subBufferSize * sizeof(cl_float), - test_info.subBufferSize * sizeof(cl_float) - }; - test_info.tinfo[i].inBuf = - clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY, - CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error); - if (error || NULL == test_info.tinfo[i].inBuf) - { - vlog_error("Error: Unable to create sub-buffer of gInBuffer for " - "region {%zd, %zd}\n", - region.origin, region.size); - goto exit; - } - test_info.tinfo[i].inBuf2 = - clCreateSubBuffer(gInBuffer2, CL_MEM_READ_ONLY, - CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error); - if (error || NULL == test_info.tinfo[i].inBuf2) - { - vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for " - "region {%zd, %zd}\n", - region.origin, region.size); - goto exit; - } - - for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) - { - test_info.tinfo[i].outBuf[j] = clCreateSubBuffer( - gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION, - ®ion, &error); - if (error || NULL == test_info.tinfo[i].outBuf[j]) - { - vlog_error("Error: Unable to create sub-buffer of " - "gOutBuffer[%d] for region {%zd, %zd}\n", - (int)j, region.origin, region.size); - goto exit; - } - } - test_info.tinfo[i].tQueue = - clCreateCommandQueue(gContext, gDevice, 0, &error); - if (NULL == test_info.tinfo[i].tQueue || error) - { - vlog_error("clCreateCommandQueue failed. (%d)\n", error); - goto exit; - } - - test_info.tinfo[i].d = init_genrand(genrand_int32(d)); - } - - // Init the kernels - { - BuildKernelInfo build_info = { - gMinVectorSizeIndex, test_info.threadCount, test_info.k, - test_info.programs, f->nameInCode, relaxedMode - }; - if ((error = ThreadPool_Do(BuildKernelFn, - gMaxVectorSizeIndex - gMinVectorSizeIndex, - &build_info))) - goto exit; - } - - // Run the kernels - if (!gSkipCorrectnessTesting) - { - error = ThreadPool_Do(Test, test_info.jobCount, &test_info); - - // Accumulate the arithmetic errors - for (cl_uint i = 0; i < test_info.threadCount; i++) - { - if (test_info.tinfo[i].maxError > maxError) - { - maxError = test_info.tinfo[i].maxError; - maxErrorVal = test_info.tinfo[i].maxErrorValue; - maxErrorVal2 = test_info.tinfo[i].maxErrorValue2; - } - } - - if (error) goto exit; - - if (gWimpyMode) - vlog("Wimp pass"); - else - vlog("passed"); - - vlog("\t%8.2f @ {%a, %a}", maxError, maxErrorVal, maxErrorVal2); - } - - vlog("\n"); - -exit: - // Release - for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) - { - clReleaseProgram(test_info.programs[i]); - if (test_info.k[i]) - { - for (cl_uint j = 0; j < test_info.threadCount; j++) - clReleaseKernel(test_info.k[i][j]); - - free(test_info.k[i]); - } - } - if (test_info.tinfo) - { - for (cl_uint i = 0; i < test_info.threadCount; i++) - { - free_mtdata(test_info.tinfo[i].d); - clReleaseMemObject(test_info.tinfo[i].inBuf); - clReleaseMemObject(test_info.tinfo[i].inBuf2); - for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) - clReleaseMemObject(test_info.tinfo[i].outBuf[j]); - clReleaseCommandQueue(test_info.tinfo[i].tQueue); - } - - free(test_info.tinfo); - } - - return error; -} - -static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) +cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) { const TestInfo *job = (const TestInfo *)data; size_t buffer_elements = job->subBufferSize; @@ -986,3 +802,189 @@ exit: if (overflow) free(overflow); return error; } + +} // anonymous namespace + +int TestFunc_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode) +{ + TestInfo test_info; + cl_int error; + float maxError = 0.0f; + double maxErrorVal = 0.0; + double maxErrorVal2 = 0.0; + + logFunctionInfo(f->name, sizeof(cl_float), relaxedMode); + + // Init test_info + memset(&test_info, 0, sizeof(test_info)); + test_info.threadCount = GetThreadCount(); + test_info.subBufferSize = BUFFER_SIZE + / (sizeof(cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount)); + test_info.scale = getTestScale(sizeof(cl_float)); + + test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale; + if (test_info.step / test_info.subBufferSize != test_info.scale) + { + // there was overflow + test_info.jobCount = 1; + } + else + { + test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step); + } + + test_info.f = f; + test_info.ulps = gIsEmbedded ? f->float_embedded_ulps : f->float_ulps; + test_info.ftz = + f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities); + test_info.relaxedMode = relaxedMode; + test_info.isFDim = 0 == strcmp("fdim", f->nameInCode); + test_info.skipNanInf = test_info.isFDim && !gInfNanSupport; + test_info.isNextafter = 0 == strcmp("nextafter", f->nameInCode); + + // cl_kernels aren't thread safe, so we make one for each vector size for + // every thread + for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) + { + size_t array_size = test_info.threadCount * sizeof(cl_kernel); + test_info.k[i] = (cl_kernel *)malloc(array_size); + if (NULL == test_info.k[i]) + { + vlog_error("Error: Unable to allocate storage for kernels!\n"); + error = CL_OUT_OF_HOST_MEMORY; + goto exit; + } + memset(test_info.k[i], 0, array_size); + } + test_info.tinfo = + (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo)); + if (NULL == test_info.tinfo) + { + vlog_error( + "Error: Unable to allocate storage for thread specific data.\n"); + error = CL_OUT_OF_HOST_MEMORY; + goto exit; + } + memset(test_info.tinfo, 0, + test_info.threadCount * sizeof(*test_info.tinfo)); + for (cl_uint i = 0; i < test_info.threadCount; i++) + { + cl_buffer_region region = { + i * test_info.subBufferSize * sizeof(cl_float), + test_info.subBufferSize * sizeof(cl_float) + }; + test_info.tinfo[i].inBuf = + clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY, + CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error); + if (error || NULL == test_info.tinfo[i].inBuf) + { + vlog_error("Error: Unable to create sub-buffer of gInBuffer for " + "region {%zd, %zd}\n", + region.origin, region.size); + goto exit; + } + test_info.tinfo[i].inBuf2 = + clCreateSubBuffer(gInBuffer2, CL_MEM_READ_ONLY, + CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error); + if (error || NULL == test_info.tinfo[i].inBuf2) + { + vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for " + "region {%zd, %zd}\n", + region.origin, region.size); + goto exit; + } + + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + { + test_info.tinfo[i].outBuf[j] = clCreateSubBuffer( + gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION, + ®ion, &error); + if (error || NULL == test_info.tinfo[i].outBuf[j]) + { + vlog_error("Error: Unable to create sub-buffer of " + "gOutBuffer[%d] for region {%zd, %zd}\n", + (int)j, region.origin, region.size); + goto exit; + } + } + test_info.tinfo[i].tQueue = + clCreateCommandQueue(gContext, gDevice, 0, &error); + if (NULL == test_info.tinfo[i].tQueue || error) + { + vlog_error("clCreateCommandQueue failed. (%d)\n", error); + goto exit; + } + + test_info.tinfo[i].d = init_genrand(genrand_int32(d)); + } + + // Init the kernels + { + BuildKernelInfo build_info = { + gMinVectorSizeIndex, test_info.threadCount, test_info.k, + test_info.programs, f->nameInCode, relaxedMode + }; + if ((error = ThreadPool_Do(BuildKernelFn, + gMaxVectorSizeIndex - gMinVectorSizeIndex, + &build_info))) + goto exit; + } + + // Run the kernels + if (!gSkipCorrectnessTesting) + { + error = ThreadPool_Do(Test, test_info.jobCount, &test_info); + + // Accumulate the arithmetic errors + for (cl_uint i = 0; i < test_info.threadCount; i++) + { + if (test_info.tinfo[i].maxError > maxError) + { + maxError = test_info.tinfo[i].maxError; + maxErrorVal = test_info.tinfo[i].maxErrorValue; + maxErrorVal2 = test_info.tinfo[i].maxErrorValue2; + } + } + + if (error) goto exit; + + if (gWimpyMode) + vlog("Wimp pass"); + else + vlog("passed"); + + vlog("\t%8.2f @ {%a, %a}", maxError, maxErrorVal, maxErrorVal2); + } + + vlog("\n"); + +exit: + // Release + for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) + { + clReleaseProgram(test_info.programs[i]); + if (test_info.k[i]) + { + for (cl_uint j = 0; j < test_info.threadCount; j++) + clReleaseKernel(test_info.k[i][j]); + + free(test_info.k[i]); + } + } + if (test_info.tinfo) + { + for (cl_uint i = 0; i < test_info.threadCount; i++) + { + free_mtdata(test_info.tinfo[i].d); + clReleaseMemObject(test_info.tinfo[i].inBuf); + clReleaseMemObject(test_info.tinfo[i].inBuf2); + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + clReleaseMemObject(test_info.tinfo[i].outBuf[j]); + clReleaseCommandQueue(test_info.tinfo[i].tQueue); + } + + free(test_info.tinfo); + } + + return error; +} diff --git a/test_conformance/math_brute_force/binary_i_double.cpp b/test_conformance/math_brute_force/binary_i_double.cpp index 69e620aa..2fcc8c10 100644 --- a/test_conformance/math_brute_force/binary_i_double.cpp +++ b/test_conformance/math_brute_force/binary_i_double.cpp @@ -21,8 +21,10 @@ #include #include -static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count, - cl_kernel *k, cl_program *p, bool relaxedMode) +namespace { + +int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count, + cl_kernel *k, cl_program *p, bool relaxedMode) { const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n", "__kernel void math_kernel", @@ -108,7 +110,7 @@ static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count, relaxedMode); } -typedef struct BuildKernelInfo +struct BuildKernelInfo { cl_uint offset; // the first vector size to build cl_uint kernel_count; @@ -116,9 +118,9 @@ typedef struct BuildKernelInfo cl_program *programs; const char *nameInCode; bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. -} BuildKernelInfo; +}; -static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) +cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) { BuildKernelInfo *info = (BuildKernelInfo *)p; cl_uint i = info->offset + job_id; @@ -127,7 +129,7 @@ static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) } // Thread specific data for a worker thread -typedef struct ThreadInfo +struct ThreadInfo { cl_mem inBuf; // input buffer for the thread cl_mem inBuf2; // input buffer for the thread @@ -139,9 +141,9 @@ typedef struct ThreadInfo // to 0. MTdata d; cl_command_queue tQueue; // per thread command queue to improve performance -} ThreadInfo; +}; -typedef struct TestInfo +struct TestInfo { size_t subBufferSize; // Size of the sub-buffer in elements const Func *f; // A pointer to the function info @@ -159,10 +161,10 @@ typedef struct TestInfo int ftz; // non-zero if running in flush to zero mode // no special values -} TestInfo; +}; // A table of more difficult cases to get right -static const double specialValues[] = { +const double specialValues[] = { -NAN, -INFINITY, -DBL_MAX, @@ -272,201 +274,18 @@ static const double specialValues[] = { +0.0, }; -static size_t specialValuesCount = +constexpr size_t specialValuesCount = sizeof(specialValues) / sizeof(specialValues[0]); -static const int specialValuesInt[] = { +const int specialValuesInt[] = { 0, 1, 2, 3, 1022, 1023, 1024, INT_MIN, INT_MAX, -1, -2, -3, -1022, -1023, -11024, -INT_MAX, }; -static constexpr size_t specialValuesIntCount = - sizeof(specialValuesInt) / sizeof(specialValuesInt[0]); - -static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data); - -int TestFunc_Double_Double_Int(const Func *f, MTdata d, bool relaxedMode) -{ - TestInfo test_info; - cl_int error; - float maxError = 0.0f; - double maxErrorVal = 0.0; - cl_int maxErrorVal2 = 0; - - logFunctionInfo(f->name, sizeof(cl_double), relaxedMode); - - // Init test_info - memset(&test_info, 0, sizeof(test_info)); - test_info.threadCount = GetThreadCount(); - test_info.subBufferSize = BUFFER_SIZE - / (sizeof(cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount)); - test_info.scale = getTestScale(sizeof(cl_double)); - - test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale; - if (test_info.step / test_info.subBufferSize != test_info.scale) - { - // there was overflow - test_info.jobCount = 1; - } - else - { - test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step); - } - - test_info.f = f; - test_info.ulps = f->double_ulps; - test_info.ftz = f->ftz || gForceFTZ; - - // cl_kernels aren't thread safe, so we make one for each vector size for - // every thread - for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) - { - size_t array_size = test_info.threadCount * sizeof(cl_kernel); - test_info.k[i] = (cl_kernel *)malloc(array_size); - if (NULL == test_info.k[i]) - { - vlog_error("Error: Unable to allocate storage for kernels!\n"); - error = CL_OUT_OF_HOST_MEMORY; - goto exit; - } - memset(test_info.k[i], 0, array_size); - } - test_info.tinfo = - (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo)); - if (NULL == test_info.tinfo) - { - vlog_error( - "Error: Unable to allocate storage for thread specific data.\n"); - error = CL_OUT_OF_HOST_MEMORY; - goto exit; - } - memset(test_info.tinfo, 0, - test_info.threadCount * sizeof(*test_info.tinfo)); - for (cl_uint i = 0; i < test_info.threadCount; i++) - { - cl_buffer_region region = { - i * test_info.subBufferSize * sizeof(cl_double), - test_info.subBufferSize * sizeof(cl_double) - }; - test_info.tinfo[i].inBuf = - clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY, - CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error); - if (error || NULL == test_info.tinfo[i].inBuf) - { - vlog_error("Error: Unable to create sub-buffer of gInBuffer for " - "region {%zd, %zd}\n", - region.origin, region.size); - goto exit; - } - cl_buffer_region region2 = { i * test_info.subBufferSize - * sizeof(cl_int), - test_info.subBufferSize * sizeof(cl_int) }; - test_info.tinfo[i].inBuf2 = - clCreateSubBuffer(gInBuffer2, CL_MEM_READ_ONLY, - CL_BUFFER_CREATE_TYPE_REGION, ®ion2, &error); - if (error || NULL == test_info.tinfo[i].inBuf2) - { - vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for " - "region {%zd, %zd}\n", - region.origin, region.size); - goto exit; - } - for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) - { - test_info.tinfo[i].outBuf[j] = clCreateSubBuffer( - gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION, - ®ion, &error); - if (error || NULL == test_info.tinfo[i].outBuf[j]) - { - vlog_error("Error: Unable to create sub-buffer of " - "gOutBuffer[%d] for region {%zd, %zd}\n", - (int)j, region.origin, region.size); - goto exit; - } - } - test_info.tinfo[i].tQueue = - clCreateCommandQueue(gContext, gDevice, 0, &error); - if (NULL == test_info.tinfo[i].tQueue || error) - { - vlog_error("clCreateCommandQueue failed. (%d)\n", error); - goto exit; - } - - test_info.tinfo[i].d = init_genrand(genrand_int32(d)); - } - - // Init the kernels - { - BuildKernelInfo build_info = { - gMinVectorSizeIndex, test_info.threadCount, test_info.k, - test_info.programs, f->nameInCode, relaxedMode - }; - if ((error = ThreadPool_Do(BuildKernelFn, - gMaxVectorSizeIndex - gMinVectorSizeIndex, - &build_info))) - goto exit; - } - - // Run the kernels - if (!gSkipCorrectnessTesting) - { - error = ThreadPool_Do(Test, test_info.jobCount, &test_info); - - // Accumulate the arithmetic errors - for (cl_uint i = 0; i < test_info.threadCount; i++) - { - if (test_info.tinfo[i].maxError > maxError) - { - maxError = test_info.tinfo[i].maxError; - maxErrorVal = test_info.tinfo[i].maxErrorValue; - maxErrorVal2 = test_info.tinfo[i].maxErrorValue2; - } - } - - if (error) goto exit; - - if (gWimpyMode) - vlog("Wimp pass"); - else - vlog("passed"); - - vlog("\t%8.2f @ {%a, %d}", maxError, maxErrorVal, maxErrorVal2); - } - - vlog("\n"); - -exit: - // Release - for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) - { - clReleaseProgram(test_info.programs[i]); - if (test_info.k[i]) - { - for (cl_uint j = 0; j < test_info.threadCount; j++) - clReleaseKernel(test_info.k[i][j]); - - free(test_info.k[i]); - } - } - if (test_info.tinfo) - { - for (cl_uint i = 0; i < test_info.threadCount; i++) - { - free_mtdata(test_info.tinfo[i].d); - clReleaseMemObject(test_info.tinfo[i].inBuf); - clReleaseMemObject(test_info.tinfo[i].inBuf2); - for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) - clReleaseMemObject(test_info.tinfo[i].outBuf[j]); - clReleaseCommandQueue(test_info.tinfo[i].tQueue); - } - - free(test_info.tinfo); - } - - return error; -} +constexpr size_t specialValuesIntCount = + sizeof(specialValuesInt) / sizeof(specialValuesInt[0]); -static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) +cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) { const TestInfo *job = (const TestInfo *)data; size_t buffer_elements = job->subBufferSize; @@ -744,3 +563,187 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) exit: return error; } + +} // anonymous namespace + +int TestFunc_Double_Double_Int(const Func *f, MTdata d, bool relaxedMode) +{ + TestInfo test_info; + cl_int error; + float maxError = 0.0f; + double maxErrorVal = 0.0; + cl_int maxErrorVal2 = 0; + + logFunctionInfo(f->name, sizeof(cl_double), relaxedMode); + + // Init test_info + memset(&test_info, 0, sizeof(test_info)); + test_info.threadCount = GetThreadCount(); + test_info.subBufferSize = BUFFER_SIZE + / (sizeof(cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount)); + test_info.scale = getTestScale(sizeof(cl_double)); + + test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale; + if (test_info.step / test_info.subBufferSize != test_info.scale) + { + // there was overflow + test_info.jobCount = 1; + } + else + { + test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step); + } + + test_info.f = f; + test_info.ulps = f->double_ulps; + test_info.ftz = f->ftz || gForceFTZ; + + // cl_kernels aren't thread safe, so we make one for each vector size for + // every thread + for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) + { + size_t array_size = test_info.threadCount * sizeof(cl_kernel); + test_info.k[i] = (cl_kernel *)malloc(array_size); + if (NULL == test_info.k[i]) + { + vlog_error("Error: Unable to allocate storage for kernels!\n"); + error = CL_OUT_OF_HOST_MEMORY; + goto exit; + } + memset(test_info.k[i], 0, array_size); + } + test_info.tinfo = + (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo)); + if (NULL == test_info.tinfo) + { + vlog_error( + "Error: Unable to allocate storage for thread specific data.\n"); + error = CL_OUT_OF_HOST_MEMORY; + goto exit; + } + memset(test_info.tinfo, 0, + test_info.threadCount * sizeof(*test_info.tinfo)); + for (cl_uint i = 0; i < test_info.threadCount; i++) + { + cl_buffer_region region = { + i * test_info.subBufferSize * sizeof(cl_double), + test_info.subBufferSize * sizeof(cl_double) + }; + test_info.tinfo[i].inBuf = + clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY, + CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error); + if (error || NULL == test_info.tinfo[i].inBuf) + { + vlog_error("Error: Unable to create sub-buffer of gInBuffer for " + "region {%zd, %zd}\n", + region.origin, region.size); + goto exit; + } + cl_buffer_region region2 = { i * test_info.subBufferSize + * sizeof(cl_int), + test_info.subBufferSize * sizeof(cl_int) }; + test_info.tinfo[i].inBuf2 = + clCreateSubBuffer(gInBuffer2, CL_MEM_READ_ONLY, + CL_BUFFER_CREATE_TYPE_REGION, ®ion2, &error); + if (error || NULL == test_info.tinfo[i].inBuf2) + { + vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for " + "region {%zd, %zd}\n", + region.origin, region.size); + goto exit; + } + + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + { + test_info.tinfo[i].outBuf[j] = clCreateSubBuffer( + gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION, + ®ion, &error); + if (error || NULL == test_info.tinfo[i].outBuf[j]) + { + vlog_error("Error: Unable to create sub-buffer of " + "gOutBuffer[%d] for region {%zd, %zd}\n", + (int)j, region.origin, region.size); + goto exit; + } + } + test_info.tinfo[i].tQueue = + clCreateCommandQueue(gContext, gDevice, 0, &error); + if (NULL == test_info.tinfo[i].tQueue || error) + { + vlog_error("clCreateCommandQueue failed. (%d)\n", error); + goto exit; + } + + test_info.tinfo[i].d = init_genrand(genrand_int32(d)); + } + + // Init the kernels + { + BuildKernelInfo build_info = { + gMinVectorSizeIndex, test_info.threadCount, test_info.k, + test_info.programs, f->nameInCode, relaxedMode + }; + if ((error = ThreadPool_Do(BuildKernelFn, + gMaxVectorSizeIndex - gMinVectorSizeIndex, + &build_info))) + goto exit; + } + + // Run the kernels + if (!gSkipCorrectnessTesting) + { + error = ThreadPool_Do(Test, test_info.jobCount, &test_info); + + // Accumulate the arithmetic errors + for (cl_uint i = 0; i < test_info.threadCount; i++) + { + if (test_info.tinfo[i].maxError > maxError) + { + maxError = test_info.tinfo[i].maxError; + maxErrorVal = test_info.tinfo[i].maxErrorValue; + maxErrorVal2 = test_info.tinfo[i].maxErrorValue2; + } + } + + if (error) goto exit; + + if (gWimpyMode) + vlog("Wimp pass"); + else + vlog("passed"); + + vlog("\t%8.2f @ {%a, %d}", maxError, maxErrorVal, maxErrorVal2); + } + + vlog("\n"); + +exit: + // Release + for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) + { + clReleaseProgram(test_info.programs[i]); + if (test_info.k[i]) + { + for (cl_uint j = 0; j < test_info.threadCount; j++) + clReleaseKernel(test_info.k[i][j]); + + free(test_info.k[i]); + } + } + if (test_info.tinfo) + { + for (cl_uint i = 0; i < test_info.threadCount; i++) + { + free_mtdata(test_info.tinfo[i].d); + clReleaseMemObject(test_info.tinfo[i].inBuf); + clReleaseMemObject(test_info.tinfo[i].inBuf2); + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + clReleaseMemObject(test_info.tinfo[i].outBuf[j]); + clReleaseCommandQueue(test_info.tinfo[i].tQueue); + } + + free(test_info.tinfo); + } + + return error; +} diff --git a/test_conformance/math_brute_force/binary_i_float.cpp b/test_conformance/math_brute_force/binary_i_float.cpp index e65a9aaf..e1538e3c 100644 --- a/test_conformance/math_brute_force/binary_i_float.cpp +++ b/test_conformance/math_brute_force/binary_i_float.cpp @@ -21,8 +21,10 @@ #include #include -static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count, - cl_kernel *k, cl_program *p, bool relaxedMode) +namespace { + +int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count, + cl_kernel *k, cl_program *p, bool relaxedMode) { const char *c[] = { "__kernel void math_kernel", sizeNames[vectorSize], @@ -106,7 +108,7 @@ static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count, relaxedMode); } -typedef struct BuildKernelInfo +struct BuildKernelInfo { cl_uint offset; // the first vector size to build cl_uint kernel_count; @@ -114,9 +116,9 @@ typedef struct BuildKernelInfo cl_program *programs; const char *nameInCode; bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. -} BuildKernelInfo; +}; -static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) +cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) { BuildKernelInfo *info = (BuildKernelInfo *)p; cl_uint i = info->offset + job_id; @@ -125,7 +127,7 @@ static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) } // Thread specific data for a worker thread -typedef struct ThreadInfo +struct ThreadInfo { cl_mem inBuf; // input buffer for the thread cl_mem inBuf2; // input buffer for the thread @@ -137,9 +139,9 @@ typedef struct ThreadInfo // to 0. MTdata d; cl_command_queue tQueue; // per thread command queue to improve performance -} ThreadInfo; +}; -typedef struct TestInfo +struct TestInfo { size_t subBufferSize; // Size of the sub-buffer in elements const Func *f; // A pointer to the function info @@ -157,10 +159,10 @@ typedef struct TestInfo int ftz; // non-zero if running in flush to zero mode // no special values -} TestInfo; +}; // A table of more difficult cases to get right -static const float specialValues[] = { +const float specialValues[] = { -NAN, -INFINITY, -FLT_MAX, @@ -262,204 +264,20 @@ static const float specialValues[] = { +0.0f, }; -static const size_t specialValuesCount = +constexpr size_t specialValuesCount = sizeof(specialValues) / sizeof(specialValues[0]); -static const int specialValuesInt[] = { +const int specialValuesInt[] = { 0, 1, 2, 3, 126, 127, 128, 0x02000001, 0x04000001, 1465264071, 1488522147, -1, -2, -3, -126, -127, -128, -0x02000001, -0x04000001, -1465264071, -1488522147, }; -static size_t specialValuesIntCount = - sizeof(specialValuesInt) / sizeof(specialValuesInt[0]); - -static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data); - -int TestFunc_Float_Float_Int(const Func *f, MTdata d, bool relaxedMode) -{ - TestInfo test_info; - cl_int error; - float maxError = 0.0f; - double maxErrorVal = 0.0; - cl_int maxErrorVal2 = 0; - - logFunctionInfo(f->name, sizeof(cl_float), relaxedMode); - - // Init test_info - memset(&test_info, 0, sizeof(test_info)); - test_info.threadCount = GetThreadCount(); - test_info.subBufferSize = BUFFER_SIZE - / (sizeof(cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount)); - test_info.scale = getTestScale(sizeof(cl_float)); - - test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale; - if (test_info.step / test_info.subBufferSize != test_info.scale) - { - // there was overflow - test_info.jobCount = 1; - } - else - { - test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step); - } - - test_info.f = f; - test_info.ulps = gIsEmbedded ? f->float_embedded_ulps : f->float_ulps; - test_info.ftz = - f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities); - - // cl_kernels aren't thread safe, so we make one for each vector size for - // every thread - for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) - { - size_t array_size = test_info.threadCount * sizeof(cl_kernel); - test_info.k[i] = (cl_kernel *)malloc(array_size); - if (NULL == test_info.k[i]) - { - vlog_error("Error: Unable to allocate storage for kernels!\n"); - error = CL_OUT_OF_HOST_MEMORY; - goto exit; - } - memset(test_info.k[i], 0, array_size); - } - test_info.tinfo = - (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo)); - if (NULL == test_info.tinfo) - { - vlog_error( - "Error: Unable to allocate storage for thread specific data.\n"); - error = CL_OUT_OF_HOST_MEMORY; - goto exit; - } - memset(test_info.tinfo, 0, - test_info.threadCount * sizeof(*test_info.tinfo)); - for (cl_uint i = 0; i < test_info.threadCount; i++) - { - cl_buffer_region region = { - i * test_info.subBufferSize * sizeof(cl_float), - test_info.subBufferSize * sizeof(cl_float) - }; - test_info.tinfo[i].inBuf = - clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY, - CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error); - if (error || NULL == test_info.tinfo[i].inBuf) - { - vlog_error("Error: Unable to create sub-buffer of gInBuffer for " - "region {%zd, %zd}\n", - region.origin, region.size); - goto exit; - } - cl_buffer_region region2 = { i * test_info.subBufferSize - * sizeof(cl_int), - test_info.subBufferSize * sizeof(cl_int) }; - test_info.tinfo[i].inBuf2 = - clCreateSubBuffer(gInBuffer2, CL_MEM_READ_ONLY, - CL_BUFFER_CREATE_TYPE_REGION, ®ion2, &error); - if (error || NULL == test_info.tinfo[i].inBuf2) - { - vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for " - "region {%zd, %zd}\n", - region.origin, region.size); - goto exit; - } - for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) - { - test_info.tinfo[i].outBuf[j] = clCreateSubBuffer( - gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION, - ®ion, &error); - if (error || NULL == test_info.tinfo[i].outBuf[j]) - { - vlog_error("Error: Unable to create sub-buffer of " - "gOutBuffer[%d] for region {%zd, %zd}\n", - (int)j, region.origin, region.size); - goto exit; - } - } - test_info.tinfo[i].tQueue = - clCreateCommandQueue(gContext, gDevice, 0, &error); - if (NULL == test_info.tinfo[i].tQueue || error) - { - vlog_error("clCreateCommandQueue failed. (%d)\n", error); - goto exit; - } - - test_info.tinfo[i].d = init_genrand(genrand_int32(d)); - } - - // Init the kernels - { - BuildKernelInfo build_info = { - gMinVectorSizeIndex, test_info.threadCount, test_info.k, - test_info.programs, f->nameInCode, relaxedMode - }; - if ((error = ThreadPool_Do(BuildKernelFn, - gMaxVectorSizeIndex - gMinVectorSizeIndex, - &build_info))) - goto exit; - } - - // Run the kernels - if (!gSkipCorrectnessTesting) - { - error = ThreadPool_Do(Test, test_info.jobCount, &test_info); - - // Accumulate the arithmetic errors - for (cl_uint i = 0; i < test_info.threadCount; i++) - { - if (test_info.tinfo[i].maxError > maxError) - { - maxError = test_info.tinfo[i].maxError; - maxErrorVal = test_info.tinfo[i].maxErrorValue; - maxErrorVal2 = test_info.tinfo[i].maxErrorValue2; - } - } - - if (error) goto exit; - - if (gWimpyMode) - vlog("Wimp pass"); - else - vlog("passed"); - - vlog("\t%8.2f @ {%a, %d}", maxError, maxErrorVal, maxErrorVal2); - } - - vlog("\n"); - -exit: - // Release - for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) - { - clReleaseProgram(test_info.programs[i]); - if (test_info.k[i]) - { - for (cl_uint j = 0; j < test_info.threadCount; j++) - clReleaseKernel(test_info.k[i][j]); - - free(test_info.k[i]); - } - } - if (test_info.tinfo) - { - for (cl_uint i = 0; i < test_info.threadCount; i++) - { - free_mtdata(test_info.tinfo[i].d); - clReleaseMemObject(test_info.tinfo[i].inBuf); - clReleaseMemObject(test_info.tinfo[i].inBuf2); - for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) - clReleaseMemObject(test_info.tinfo[i].outBuf[j]); - clReleaseCommandQueue(test_info.tinfo[i].tQueue); - } - - free(test_info.tinfo); - } - - return error; -} +constexpr size_t specialValuesIntCount = + sizeof(specialValuesInt) / sizeof(specialValuesInt[0]); -static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) +cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) { const TestInfo *job = (const TestInfo *)data; size_t buffer_elements = job->subBufferSize; @@ -738,3 +556,188 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) exit: return error; } + +} // anonymous namespace + +int TestFunc_Float_Float_Int(const Func *f, MTdata d, bool relaxedMode) +{ + TestInfo test_info; + cl_int error; + float maxError = 0.0f; + double maxErrorVal = 0.0; + cl_int maxErrorVal2 = 0; + + logFunctionInfo(f->name, sizeof(cl_float), relaxedMode); + + // Init test_info + memset(&test_info, 0, sizeof(test_info)); + test_info.threadCount = GetThreadCount(); + test_info.subBufferSize = BUFFER_SIZE + / (sizeof(cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount)); + test_info.scale = getTestScale(sizeof(cl_float)); + + test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale; + if (test_info.step / test_info.subBufferSize != test_info.scale) + { + // there was overflow + test_info.jobCount = 1; + } + else + { + test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step); + } + + test_info.f = f; + test_info.ulps = gIsEmbedded ? f->float_embedded_ulps : f->float_ulps; + test_info.ftz = + f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities); + + // cl_kernels aren't thread safe, so we make one for each vector size for + // every thread + for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) + { + size_t array_size = test_info.threadCount * sizeof(cl_kernel); + test_info.k[i] = (cl_kernel *)malloc(array_size); + if (NULL == test_info.k[i]) + { + vlog_error("Error: Unable to allocate storage for kernels!\n"); + error = CL_OUT_OF_HOST_MEMORY; + goto exit; + } + memset(test_info.k[i], 0, array_size); + } + test_info.tinfo = + (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo)); + if (NULL == test_info.tinfo) + { + vlog_error( + "Error: Unable to allocate storage for thread specific data.\n"); + error = CL_OUT_OF_HOST_MEMORY; + goto exit; + } + memset(test_info.tinfo, 0, + test_info.threadCount * sizeof(*test_info.tinfo)); + for (cl_uint i = 0; i < test_info.threadCount; i++) + { + cl_buffer_region region = { + i * test_info.subBufferSize * sizeof(cl_float), + test_info.subBufferSize * sizeof(cl_float) + }; + test_info.tinfo[i].inBuf = + clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY, + CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error); + if (error || NULL == test_info.tinfo[i].inBuf) + { + vlog_error("Error: Unable to create sub-buffer of gInBuffer for " + "region {%zd, %zd}\n", + region.origin, region.size); + goto exit; + } + cl_buffer_region region2 = { i * test_info.subBufferSize + * sizeof(cl_int), + test_info.subBufferSize * sizeof(cl_int) }; + test_info.tinfo[i].inBuf2 = + clCreateSubBuffer(gInBuffer2, CL_MEM_READ_ONLY, + CL_BUFFER_CREATE_TYPE_REGION, ®ion2, &error); + if (error || NULL == test_info.tinfo[i].inBuf2) + { + vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for " + "region {%zd, %zd}\n", + region.origin, region.size); + goto exit; + } + + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + { + test_info.tinfo[i].outBuf[j] = clCreateSubBuffer( + gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION, + ®ion, &error); + if (error || NULL == test_info.tinfo[i].outBuf[j]) + { + vlog_error("Error: Unable to create sub-buffer of " + "gOutBuffer[%d] for region {%zd, %zd}\n", + (int)j, region.origin, region.size); + goto exit; + } + } + test_info.tinfo[i].tQueue = + clCreateCommandQueue(gContext, gDevice, 0, &error); + if (NULL == test_info.tinfo[i].tQueue || error) + { + vlog_error("clCreateCommandQueue failed. (%d)\n", error); + goto exit; + } + + test_info.tinfo[i].d = init_genrand(genrand_int32(d)); + } + + // Init the kernels + { + BuildKernelInfo build_info = { + gMinVectorSizeIndex, test_info.threadCount, test_info.k, + test_info.programs, f->nameInCode, relaxedMode + }; + if ((error = ThreadPool_Do(BuildKernelFn, + gMaxVectorSizeIndex - gMinVectorSizeIndex, + &build_info))) + goto exit; + } + + // Run the kernels + if (!gSkipCorrectnessTesting) + { + error = ThreadPool_Do(Test, test_info.jobCount, &test_info); + + // Accumulate the arithmetic errors + for (cl_uint i = 0; i < test_info.threadCount; i++) + { + if (test_info.tinfo[i].maxError > maxError) + { + maxError = test_info.tinfo[i].maxError; + maxErrorVal = test_info.tinfo[i].maxErrorValue; + maxErrorVal2 = test_info.tinfo[i].maxErrorValue2; + } + } + + if (error) goto exit; + + if (gWimpyMode) + vlog("Wimp pass"); + else + vlog("passed"); + + vlog("\t%8.2f @ {%a, %d}", maxError, maxErrorVal, maxErrorVal2); + } + + vlog("\n"); + +exit: + // Release + for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) + { + clReleaseProgram(test_info.programs[i]); + if (test_info.k[i]) + { + for (cl_uint j = 0; j < test_info.threadCount; j++) + clReleaseKernel(test_info.k[i][j]); + + free(test_info.k[i]); + } + } + if (test_info.tinfo) + { + for (cl_uint i = 0; i < test_info.threadCount; i++) + { + free_mtdata(test_info.tinfo[i].d); + clReleaseMemObject(test_info.tinfo[i].inBuf); + clReleaseMemObject(test_info.tinfo[i].inBuf2); + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + clReleaseMemObject(test_info.tinfo[i].outBuf[j]); + clReleaseCommandQueue(test_info.tinfo[i].tQueue); + } + + free(test_info.tinfo); + } + + return error; +} diff --git a/test_conformance/math_brute_force/binary_operator_double.cpp b/test_conformance/math_brute_force/binary_operator_double.cpp index 21e76c85..605a3144 100644 --- a/test_conformance/math_brute_force/binary_operator_double.cpp +++ b/test_conformance/math_brute_force/binary_operator_double.cpp @@ -20,9 +20,11 @@ #include -static int BuildKernel(const char *operator_symbol, int vectorSize, - cl_uint kernel_count, cl_kernel *k, cl_program *p, - bool relaxedMode) +namespace { + +int BuildKernel(const char *operator_symbol, int vectorSize, + cl_uint kernel_count, cl_kernel *k, cl_program *p, + bool relaxedMode) { const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n", "__kernel void math_kernel", @@ -108,7 +110,7 @@ static int BuildKernel(const char *operator_symbol, int vectorSize, relaxedMode); } -typedef struct BuildKernelInfo +struct BuildKernelInfo { cl_uint offset; // the first vector size to build cl_uint kernel_count; @@ -116,9 +118,9 @@ typedef struct BuildKernelInfo cl_program *programs; const char *operator_symbol; bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. -} BuildKernelInfo; +}; -static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) +cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) { BuildKernelInfo *info = (BuildKernelInfo *)p; cl_uint i = info->offset + job_id; @@ -127,7 +129,7 @@ static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) } // Thread specific data for a worker thread -typedef struct ThreadInfo +struct ThreadInfo { cl_mem inBuf; // input buffer for the thread cl_mem inBuf2; // input buffer for the thread @@ -139,9 +141,9 @@ typedef struct ThreadInfo // to 0. MTdata d; cl_command_queue tQueue; // per thread command queue to improve performance -} ThreadInfo; +}; -typedef struct TestInfo +struct TestInfo { size_t subBufferSize; // Size of the sub-buffer in elements const Func *f; // A pointer to the function info @@ -161,10 +163,10 @@ typedef struct TestInfo // otherwise. // no special fields -} TestInfo; +}; // A table of more difficult cases to get right -static const double specialValues[] = { +const double specialValues[] = { -NAN, -INFINITY, -DBL_MAX, @@ -274,192 +276,10 @@ static const double specialValues[] = { +0.0, }; -static const size_t specialValuesCount = +constexpr size_t specialValuesCount = sizeof(specialValues) / sizeof(specialValues[0]); -static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data); - -int TestFunc_Double_Double_Double_Operator(const Func *f, MTdata d, - bool relaxedMode) -{ - TestInfo test_info; - cl_int error; - float maxError = 0.0f; - double maxErrorVal = 0.0; - double maxErrorVal2 = 0.0; - - logFunctionInfo(f->name, sizeof(cl_double), relaxedMode); - - // Init test_info - memset(&test_info, 0, sizeof(test_info)); - test_info.threadCount = GetThreadCount(); - test_info.subBufferSize = BUFFER_SIZE - / (sizeof(cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount)); - test_info.scale = getTestScale(sizeof(cl_double)); - - test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale; - if (test_info.step / test_info.subBufferSize != test_info.scale) - { - // there was overflow - test_info.jobCount = 1; - } - else - { - test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step); - } - - test_info.f = f; - test_info.ulps = f->double_ulps; - test_info.ftz = f->ftz || gForceFTZ; - - // cl_kernels aren't thread safe, so we make one for each vector size for - // every thread - for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) - { - size_t array_size = test_info.threadCount * sizeof(cl_kernel); - test_info.k[i] = (cl_kernel *)malloc(array_size); - if (NULL == test_info.k[i]) - { - vlog_error("Error: Unable to allocate storage for kernels!\n"); - error = CL_OUT_OF_HOST_MEMORY; - goto exit; - } - memset(test_info.k[i], 0, array_size); - } - test_info.tinfo = - (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo)); - if (NULL == test_info.tinfo) - { - vlog_error( - "Error: Unable to allocate storage for thread specific data.\n"); - error = CL_OUT_OF_HOST_MEMORY; - goto exit; - } - memset(test_info.tinfo, 0, - test_info.threadCount * sizeof(*test_info.tinfo)); - for (cl_uint i = 0; i < test_info.threadCount; i++) - { - cl_buffer_region region = { - i * test_info.subBufferSize * sizeof(cl_double), - test_info.subBufferSize * sizeof(cl_double) - }; - test_info.tinfo[i].inBuf = - clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY, - CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error); - if (error || NULL == test_info.tinfo[i].inBuf) - { - vlog_error("Error: Unable to create sub-buffer of gInBuffer for " - "region {%zd, %zd}\n", - region.origin, region.size); - goto exit; - } - test_info.tinfo[i].inBuf2 = - clCreateSubBuffer(gInBuffer2, CL_MEM_READ_ONLY, - CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error); - if (error || NULL == test_info.tinfo[i].inBuf2) - { - vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for " - "region {%zd, %zd}\n", - region.origin, region.size); - goto exit; - } - - for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) - { - test_info.tinfo[i].outBuf[j] = clCreateSubBuffer( - gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION, - ®ion, &error); - if (error || NULL == test_info.tinfo[i].outBuf[j]) - { - vlog_error("Error: Unable to create sub-buffer of " - "gOutBuffer[%d] for region {%zd, %zd}\n", - (int)j, region.origin, region.size); - goto exit; - } - } - test_info.tinfo[i].tQueue = - clCreateCommandQueue(gContext, gDevice, 0, &error); - if (NULL == test_info.tinfo[i].tQueue || error) - { - vlog_error("clCreateCommandQueue failed. (%d)\n", error); - goto exit; - } - - test_info.tinfo[i].d = init_genrand(genrand_int32(d)); - } - - // Init the kernels - { - BuildKernelInfo build_info = { - gMinVectorSizeIndex, test_info.threadCount, test_info.k, - test_info.programs, f->nameInCode, relaxedMode - }; - if ((error = ThreadPool_Do(BuildKernelFn, - gMaxVectorSizeIndex - gMinVectorSizeIndex, - &build_info))) - goto exit; - } - - // Run the kernels - if (!gSkipCorrectnessTesting) - { - error = ThreadPool_Do(Test, test_info.jobCount, &test_info); - - // Accumulate the arithmetic errors - for (cl_uint i = 0; i < test_info.threadCount; i++) - { - if (test_info.tinfo[i].maxError > maxError) - { - maxError = test_info.tinfo[i].maxError; - maxErrorVal = test_info.tinfo[i].maxErrorValue; - maxErrorVal2 = test_info.tinfo[i].maxErrorValue2; - } - } - - if (error) goto exit; - - if (gWimpyMode) - vlog("Wimp pass"); - else - vlog("passed"); - - vlog("\t%8.2f @ {%a, %a}", maxError, maxErrorVal, maxErrorVal2); - } - - vlog("\n"); - -exit: - // Release - for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) - { - clReleaseProgram(test_info.programs[i]); - if (test_info.k[i]) - { - for (cl_uint j = 0; j < test_info.threadCount; j++) - clReleaseKernel(test_info.k[i][j]); - - free(test_info.k[i]); - } - } - if (test_info.tinfo) - { - for (cl_uint i = 0; i < test_info.threadCount; i++) - { - free_mtdata(test_info.tinfo[i].d); - clReleaseMemObject(test_info.tinfo[i].inBuf); - clReleaseMemObject(test_info.tinfo[i].inBuf2); - for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) - clReleaseMemObject(test_info.tinfo[i].outBuf[j]); - clReleaseCommandQueue(test_info.tinfo[i].tQueue); - } - - free(test_info.tinfo); - } - - return error; -} - -static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) +cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) { const TestInfo *job = (const TestInfo *)data; size_t buffer_elements = job->subBufferSize; @@ -793,3 +613,185 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) exit: return error; } + +} // anonymous namespace + +int TestFunc_Double_Double_Double_Operator(const Func *f, MTdata d, + bool relaxedMode) +{ + TestInfo test_info; + cl_int error; + float maxError = 0.0f; + double maxErrorVal = 0.0; + double maxErrorVal2 = 0.0; + + logFunctionInfo(f->name, sizeof(cl_double), relaxedMode); + + // Init test_info + memset(&test_info, 0, sizeof(test_info)); + test_info.threadCount = GetThreadCount(); + test_info.subBufferSize = BUFFER_SIZE + / (sizeof(cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount)); + test_info.scale = getTestScale(sizeof(cl_double)); + + test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale; + if (test_info.step / test_info.subBufferSize != test_info.scale) + { + // there was overflow + test_info.jobCount = 1; + } + else + { + test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step); + } + + test_info.f = f; + test_info.ulps = f->double_ulps; + test_info.ftz = f->ftz || gForceFTZ; + + // cl_kernels aren't thread safe, so we make one for each vector size for + // every thread + for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) + { + size_t array_size = test_info.threadCount * sizeof(cl_kernel); + test_info.k[i] = (cl_kernel *)malloc(array_size); + if (NULL == test_info.k[i]) + { + vlog_error("Error: Unable to allocate storage for kernels!\n"); + error = CL_OUT_OF_HOST_MEMORY; + goto exit; + } + memset(test_info.k[i], 0, array_size); + } + test_info.tinfo = + (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo)); + if (NULL == test_info.tinfo) + { + vlog_error( + "Error: Unable to allocate storage for thread specific data.\n"); + error = CL_OUT_OF_HOST_MEMORY; + goto exit; + } + memset(test_info.tinfo, 0, + test_info.threadCount * sizeof(*test_info.tinfo)); + for (cl_uint i = 0; i < test_info.threadCount; i++) + { + cl_buffer_region region = { + i * test_info.subBufferSize * sizeof(cl_double), + test_info.subBufferSize * sizeof(cl_double) + }; + test_info.tinfo[i].inBuf = + clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY, + CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error); + if (error || NULL == test_info.tinfo[i].inBuf) + { + vlog_error("Error: Unable to create sub-buffer of gInBuffer for " + "region {%zd, %zd}\n", + region.origin, region.size); + goto exit; + } + test_info.tinfo[i].inBuf2 = + clCreateSubBuffer(gInBuffer2, CL_MEM_READ_ONLY, + CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error); + if (error || NULL == test_info.tinfo[i].inBuf2) + { + vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for " + "region {%zd, %zd}\n", + region.origin, region.size); + goto exit; + } + + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + { + test_info.tinfo[i].outBuf[j] = clCreateSubBuffer( + gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION, + ®ion, &error); + if (error || NULL == test_info.tinfo[i].outBuf[j]) + { + vlog_error("Error: Unable to create sub-buffer of " + "gOutBuffer[%d] for region {%zd, %zd}\n", + (int)j, region.origin, region.size); + goto exit; + } + } + test_info.tinfo[i].tQueue = + clCreateCommandQueue(gContext, gDevice, 0, &error); + if (NULL == test_info.tinfo[i].tQueue || error) + { + vlog_error("clCreateCommandQueue failed. (%d)\n", error); + goto exit; + } + + test_info.tinfo[i].d = init_genrand(genrand_int32(d)); + } + + // Init the kernels + { + BuildKernelInfo build_info = { + gMinVectorSizeIndex, test_info.threadCount, test_info.k, + test_info.programs, f->nameInCode, relaxedMode + }; + if ((error = ThreadPool_Do(BuildKernelFn, + gMaxVectorSizeIndex - gMinVectorSizeIndex, + &build_info))) + goto exit; + } + + // Run the kernels + if (!gSkipCorrectnessTesting) + { + error = ThreadPool_Do(Test, test_info.jobCount, &test_info); + + // Accumulate the arithmetic errors + for (cl_uint i = 0; i < test_info.threadCount; i++) + { + if (test_info.tinfo[i].maxError > maxError) + { + maxError = test_info.tinfo[i].maxError; + maxErrorVal = test_info.tinfo[i].maxErrorValue; + maxErrorVal2 = test_info.tinfo[i].maxErrorValue2; + } + } + + if (error) goto exit; + + if (gWimpyMode) + vlog("Wimp pass"); + else + vlog("passed"); + + vlog("\t%8.2f @ {%a, %a}", maxError, maxErrorVal, maxErrorVal2); + } + + vlog("\n"); + +exit: + // Release + for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) + { + clReleaseProgram(test_info.programs[i]); + if (test_info.k[i]) + { + for (cl_uint j = 0; j < test_info.threadCount; j++) + clReleaseKernel(test_info.k[i][j]); + + free(test_info.k[i]); + } + } + if (test_info.tinfo) + { + for (cl_uint i = 0; i < test_info.threadCount; i++) + { + free_mtdata(test_info.tinfo[i].d); + clReleaseMemObject(test_info.tinfo[i].inBuf); + clReleaseMemObject(test_info.tinfo[i].inBuf2); + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + clReleaseMemObject(test_info.tinfo[i].outBuf[j]); + clReleaseCommandQueue(test_info.tinfo[i].tQueue); + } + + free(test_info.tinfo); + } + + return error; +} diff --git a/test_conformance/math_brute_force/binary_operator_float.cpp b/test_conformance/math_brute_force/binary_operator_float.cpp index ccaef604..8448af54 100644 --- a/test_conformance/math_brute_force/binary_operator_float.cpp +++ b/test_conformance/math_brute_force/binary_operator_float.cpp @@ -20,9 +20,11 @@ #include -static int BuildKernel(const char *operator_symbol, int vectorSize, - cl_uint kernel_count, cl_kernel *k, cl_program *p, - bool relaxedMode) +namespace { + +int BuildKernel(const char *operator_symbol, int vectorSize, + cl_uint kernel_count, cl_kernel *k, cl_program *p, + bool relaxedMode) { const char *c[] = { "__kernel void math_kernel", sizeNames[vectorSize], @@ -106,7 +108,7 @@ static int BuildKernel(const char *operator_symbol, int vectorSize, relaxedMode); } -typedef struct BuildKernelInfo +struct BuildKernelInfo { cl_uint offset; // the first vector size to build cl_uint kernel_count; @@ -114,9 +116,9 @@ typedef struct BuildKernelInfo cl_program *programs; const char *operator_symbol; bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. -} BuildKernelInfo; +}; -static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) +cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) { BuildKernelInfo *info = (BuildKernelInfo *)p; cl_uint i = info->offset + job_id; @@ -125,7 +127,7 @@ static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) } // Thread specific data for a worker thread -typedef struct ThreadInfo +struct ThreadInfo { cl_mem inBuf; // input buffer for the thread cl_mem inBuf2; // input buffer for the thread @@ -137,9 +139,9 @@ typedef struct ThreadInfo // to 0. MTdata d; cl_command_queue tQueue; // per thread command queue to improve performance -} ThreadInfo; +}; -typedef struct TestInfo +struct TestInfo { size_t subBufferSize; // Size of the sub-buffer in elements const Func *f; // A pointer to the function info @@ -159,10 +161,10 @@ typedef struct TestInfo // otherwise. // no special fields -} TestInfo; +}; // A table of more difficult cases to get right -static const float specialValues[] = { +const float specialValues[] = { -NAN, -INFINITY, -FLT_MAX, @@ -264,194 +266,10 @@ static const float specialValues[] = { +0.0f, }; -static const size_t specialValuesCount = +constexpr size_t specialValuesCount = sizeof(specialValues) / sizeof(specialValues[0]); -static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data); - -int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d, - bool relaxedMode) -{ - TestInfo test_info; - cl_int error; - float maxError = 0.0f; - double maxErrorVal = 0.0; - double maxErrorVal2 = 0.0; - - logFunctionInfo(f->name, sizeof(cl_float), relaxedMode); - - // Init test_info - memset(&test_info, 0, sizeof(test_info)); - test_info.threadCount = GetThreadCount(); - test_info.subBufferSize = BUFFER_SIZE - / (sizeof(cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount)); - test_info.scale = getTestScale(sizeof(cl_float)); - - test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale; - if (test_info.step / test_info.subBufferSize != test_info.scale) - { - // there was overflow - test_info.jobCount = 1; - } - else - { - test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step); - } - - test_info.f = f; - test_info.ulps = gIsEmbedded ? f->float_embedded_ulps : f->float_ulps; - test_info.ftz = - f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities); - test_info.relaxedMode = relaxedMode; - - // cl_kernels aren't thread safe, so we make one for each vector size for - // every thread - for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) - { - size_t array_size = test_info.threadCount * sizeof(cl_kernel); - test_info.k[i] = (cl_kernel *)malloc(array_size); - if (NULL == test_info.k[i]) - { - vlog_error("Error: Unable to allocate storage for kernels!\n"); - error = CL_OUT_OF_HOST_MEMORY; - goto exit; - } - memset(test_info.k[i], 0, array_size); - } - test_info.tinfo = - (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo)); - if (NULL == test_info.tinfo) - { - vlog_error( - "Error: Unable to allocate storage for thread specific data.\n"); - error = CL_OUT_OF_HOST_MEMORY; - goto exit; - } - memset(test_info.tinfo, 0, - test_info.threadCount * sizeof(*test_info.tinfo)); - for (cl_uint i = 0; i < test_info.threadCount; i++) - { - cl_buffer_region region = { - i * test_info.subBufferSize * sizeof(cl_float), - test_info.subBufferSize * sizeof(cl_float) - }; - test_info.tinfo[i].inBuf = - clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY, - CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error); - if (error || NULL == test_info.tinfo[i].inBuf) - { - vlog_error("Error: Unable to create sub-buffer of gInBuffer for " - "region {%zd, %zd}\n", - region.origin, region.size); - goto exit; - } - test_info.tinfo[i].inBuf2 = - clCreateSubBuffer(gInBuffer2, CL_MEM_READ_ONLY, - CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error); - if (error || NULL == test_info.tinfo[i].inBuf2) - { - vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for " - "region {%zd, %zd}\n", - region.origin, region.size); - goto exit; - } - - for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) - { - test_info.tinfo[i].outBuf[j] = clCreateSubBuffer( - gOutBuffer[j], CL_MEM_READ_WRITE, CL_BUFFER_CREATE_TYPE_REGION, - ®ion, &error); - if (error || NULL == test_info.tinfo[i].outBuf[j]) - { - vlog_error("Error: Unable to create sub-buffer of " - "gOutBuffer[%d] for region {%zd, %zd}\n", - (int)j, region.origin, region.size); - goto exit; - } - } - test_info.tinfo[i].tQueue = - clCreateCommandQueue(gContext, gDevice, 0, &error); - if (NULL == test_info.tinfo[i].tQueue || error) - { - vlog_error("clCreateCommandQueue failed. (%d)\n", error); - goto exit; - } - - test_info.tinfo[i].d = init_genrand(genrand_int32(d)); - } - - // Init the kernels - { - BuildKernelInfo build_info = { - gMinVectorSizeIndex, test_info.threadCount, test_info.k, - test_info.programs, f->nameInCode, relaxedMode - }; - if ((error = ThreadPool_Do(BuildKernelFn, - gMaxVectorSizeIndex - gMinVectorSizeIndex, - &build_info))) - goto exit; - } - - // Run the kernels - if (!gSkipCorrectnessTesting) - { - error = ThreadPool_Do(Test, test_info.jobCount, &test_info); - - // Accumulate the arithmetic errors - for (cl_uint i = 0; i < test_info.threadCount; i++) - { - if (test_info.tinfo[i].maxError > maxError) - { - maxError = test_info.tinfo[i].maxError; - maxErrorVal = test_info.tinfo[i].maxErrorValue; - maxErrorVal2 = test_info.tinfo[i].maxErrorValue2; - } - } - - if (error) goto exit; - - if (gWimpyMode) - vlog("Wimp pass"); - else - vlog("passed"); - - vlog("\t%8.2f @ {%a, %a}", maxError, maxErrorVal, maxErrorVal2); - } - - vlog("\n"); - -exit: - // Release - for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) - { - clReleaseProgram(test_info.programs[i]); - if (test_info.k[i]) - { - for (cl_uint j = 0; j < test_info.threadCount; j++) - clReleaseKernel(test_info.k[i][j]); - - free(test_info.k[i]); - } - } - if (test_info.tinfo) - { - for (cl_uint i = 0; i < test_info.threadCount; i++) - { - free_mtdata(test_info.tinfo[i].d); - clReleaseMemObject(test_info.tinfo[i].inBuf); - clReleaseMemObject(test_info.tinfo[i].inBuf2); - for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) - clReleaseMemObject(test_info.tinfo[i].outBuf[j]); - clReleaseCommandQueue(test_info.tinfo[i].tQueue); - } - - free(test_info.tinfo); - } - - return error; -} - -static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) +cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) { const TestInfo *job = (const TestInfo *)data; size_t buffer_elements = job->subBufferSize; @@ -923,3 +741,187 @@ exit: if (overflow) free(overflow); return error; } + +} // anonymous namespace + +int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d, + bool relaxedMode) +{ + TestInfo test_info; + cl_int error; + float maxError = 0.0f; + double maxErrorVal = 0.0; + double maxErrorVal2 = 0.0; + + logFunctionInfo(f->name, sizeof(cl_float), relaxedMode); + + // Init test_info + memset(&test_info, 0, sizeof(test_info)); + test_info.threadCount = GetThreadCount(); + test_info.subBufferSize = BUFFER_SIZE + / (sizeof(cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount)); + test_info.scale = getTestScale(sizeof(cl_float)); + + test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale; + if (test_info.step / test_info.subBufferSize != test_info.scale) + { + // there was overflow + test_info.jobCount = 1; + } + else + { + test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step); + } + + test_info.f = f; + test_info.ulps = gIsEmbedded ? f->float_embedded_ulps : f->float_ulps; + test_info.ftz = + f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities); + test_info.relaxedMode = relaxedMode; + + // cl_kernels aren't thread safe, so we make one for each vector size for + // every thread + for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) + { + size_t array_size = test_info.threadCount * sizeof(cl_kernel); + test_info.k[i] = (cl_kernel *)malloc(array_size); + if (NULL == test_info.k[i]) + { + vlog_error("Error: Unable to allocate storage for kernels!\n"); + error = CL_OUT_OF_HOST_MEMORY; + goto exit; + } + memset(test_info.k[i], 0, array_size); + } + test_info.tinfo = + (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo)); + if (NULL == test_info.tinfo) + { + vlog_error( + "Error: Unable to allocate storage for thread specific data.\n"); + error = CL_OUT_OF_HOST_MEMORY; + goto exit; + } + memset(test_info.tinfo, 0, + test_info.threadCount * sizeof(*test_info.tinfo)); + for (cl_uint i = 0; i < test_info.threadCount; i++) + { + cl_buffer_region region = { + i * test_info.subBufferSize * sizeof(cl_float), + test_info.subBufferSize * sizeof(cl_float) + }; + test_info.tinfo[i].inBuf = + clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY, + CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error); + if (error || NULL == test_info.tinfo[i].inBuf) + { + vlog_error("Error: Unable to create sub-buffer of gInBuffer for " + "region {%zd, %zd}\n", + region.origin, region.size); + goto exit; + } + test_info.tinfo[i].inBuf2 = + clCreateSubBuffer(gInBuffer2, CL_MEM_READ_ONLY, + CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error); + if (error || NULL == test_info.tinfo[i].inBuf2) + { + vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for " + "region {%zd, %zd}\n", + region.origin, region.size); + goto exit; + } + + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + { + test_info.tinfo[i].outBuf[j] = clCreateSubBuffer( + gOutBuffer[j], CL_MEM_READ_WRITE, CL_BUFFER_CREATE_TYPE_REGION, + ®ion, &error); + if (error || NULL == test_info.tinfo[i].outBuf[j]) + { + vlog_error("Error: Unable to create sub-buffer of " + "gOutBuffer[%d] for region {%zd, %zd}\n", + (int)j, region.origin, region.size); + goto exit; + } + } + test_info.tinfo[i].tQueue = + clCreateCommandQueue(gContext, gDevice, 0, &error); + if (NULL == test_info.tinfo[i].tQueue || error) + { + vlog_error("clCreateCommandQueue failed. (%d)\n", error); + goto exit; + } + + test_info.tinfo[i].d = init_genrand(genrand_int32(d)); + } + + // Init the kernels + { + BuildKernelInfo build_info = { + gMinVectorSizeIndex, test_info.threadCount, test_info.k, + test_info.programs, f->nameInCode, relaxedMode + }; + if ((error = ThreadPool_Do(BuildKernelFn, + gMaxVectorSizeIndex - gMinVectorSizeIndex, + &build_info))) + goto exit; + } + + // Run the kernels + if (!gSkipCorrectnessTesting) + { + error = ThreadPool_Do(Test, test_info.jobCount, &test_info); + + // Accumulate the arithmetic errors + for (cl_uint i = 0; i < test_info.threadCount; i++) + { + if (test_info.tinfo[i].maxError > maxError) + { + maxError = test_info.tinfo[i].maxError; + maxErrorVal = test_info.tinfo[i].maxErrorValue; + maxErrorVal2 = test_info.tinfo[i].maxErrorValue2; + } + } + + if (error) goto exit; + + if (gWimpyMode) + vlog("Wimp pass"); + else + vlog("passed"); + + vlog("\t%8.2f @ {%a, %a}", maxError, maxErrorVal, maxErrorVal2); + } + + vlog("\n"); + +exit: + // Release + for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) + { + clReleaseProgram(test_info.programs[i]); + if (test_info.k[i]) + { + for (cl_uint j = 0; j < test_info.threadCount; j++) + clReleaseKernel(test_info.k[i][j]); + + free(test_info.k[i]); + } + } + if (test_info.tinfo) + { + for (cl_uint i = 0; i < test_info.threadCount; i++) + { + free_mtdata(test_info.tinfo[i].d); + clReleaseMemObject(test_info.tinfo[i].inBuf); + clReleaseMemObject(test_info.tinfo[i].inBuf2); + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + clReleaseMemObject(test_info.tinfo[i].outBuf[j]); + clReleaseCommandQueue(test_info.tinfo[i].tQueue); + } + + free(test_info.tinfo); + } + + return error; +} diff --git a/test_conformance/math_brute_force/binary_two_results_i_double.cpp b/test_conformance/math_brute_force/binary_two_results_i_double.cpp index 14f41092..43dc1d30 100644 --- a/test_conformance/math_brute_force/binary_two_results_i_double.cpp +++ b/test_conformance/math_brute_force/binary_two_results_i_double.cpp @@ -21,8 +21,10 @@ #include #include -static int BuildKernel(const char *name, int vectorSize, cl_kernel *k, - cl_program *p, bool relaxedMode) +namespace { + +int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p, + bool relaxedMode) { const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n", "__kernel void math_kernel", @@ -115,16 +117,16 @@ static int BuildKernel(const char *name, int vectorSize, cl_kernel *k, return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode); } -typedef struct BuildKernelInfo +struct BuildKernelInfo { cl_uint offset; // the first vector size to build cl_kernel *kernels; cl_program *programs; const char *nameInCode; bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. -} BuildKernelInfo; +}; -static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) +cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) { BuildKernelInfo *info = (BuildKernelInfo *)p; cl_uint i = info->offset + job_id; @@ -132,7 +134,7 @@ static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) info->programs + i, info->relaxedMode); } -typedef struct ComputeReferenceInfoD_ +struct ComputeReferenceInfoD { const double *x; const double *y; @@ -141,9 +143,9 @@ typedef struct ComputeReferenceInfoD_ long double (*f_ffpI)(long double, long double, int *); cl_uint lim; cl_uint count; -} ComputeReferenceInfoD; +}; -static cl_int ReferenceD(cl_uint jid, cl_uint tid, void *userInfo) +cl_int ReferenceD(cl_uint jid, cl_uint tid, void *userInfo) { ComputeReferenceInfoD *cri = (ComputeReferenceInfoD *)userInfo; cl_uint lim = cri->lim; @@ -165,6 +167,8 @@ static cl_int ReferenceD(cl_uint jid, cl_uint tid, void *userInfo) return CL_SUCCESS; } +} // anonymous namespace + int TestFunc_DoubleI_Double_Double(const Func *f, MTdata d, bool relaxedMode) { int error; diff --git a/test_conformance/math_brute_force/binary_two_results_i_float.cpp b/test_conformance/math_brute_force/binary_two_results_i_float.cpp index 5ef44b6e..83ceeaab 100644 --- a/test_conformance/math_brute_force/binary_two_results_i_float.cpp +++ b/test_conformance/math_brute_force/binary_two_results_i_float.cpp @@ -21,8 +21,10 @@ #include #include -static int BuildKernel(const char *name, int vectorSize, cl_kernel *k, - cl_program *p, bool relaxedMode) +namespace { + +int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p, + bool relaxedMode) { const char *c[] = { "__kernel void math_kernel", sizeNames[vectorSize], @@ -113,16 +115,16 @@ static int BuildKernel(const char *name, int vectorSize, cl_kernel *k, return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode); } -typedef struct BuildKernelInfo +struct BuildKernelInfo { cl_uint offset; // the first vector size to build cl_kernel *kernels; cl_program *programs; const char *nameInCode; bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. -} BuildKernelInfo; +}; -static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) +cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) { BuildKernelInfo *info = (BuildKernelInfo *)p; cl_uint i = info->offset + job_id; @@ -130,7 +132,7 @@ static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) info->programs + i, info->relaxedMode); } -typedef struct ComputeReferenceInfoF_ +struct ComputeReferenceInfoF { const float *x; const float *y; @@ -139,9 +141,9 @@ typedef struct ComputeReferenceInfoF_ double (*f_ffpI)(double, double, int *); cl_uint lim; cl_uint count; -} ComputeReferenceInfoF; +}; -static cl_int ReferenceF(cl_uint jid, cl_uint tid, void *userInfo) +cl_int ReferenceF(cl_uint jid, cl_uint tid, void *userInfo) { ComputeReferenceInfoF *cri = (ComputeReferenceInfoF *)userInfo; cl_uint lim = cri->lim; @@ -161,6 +163,8 @@ static cl_int ReferenceF(cl_uint jid, cl_uint tid, void *userInfo) return CL_SUCCESS; } +} // anonymous namespace + int TestFunc_FloatI_Float_Float(const Func *f, MTdata d, bool relaxedMode) { int error; diff --git a/test_conformance/math_brute_force/function_list.cpp b/test_conformance/math_brute_force/function_list.cpp index 3edbb485..91736285 100644 --- a/test_conformance/math_brute_force/function_list.cpp +++ b/test_conformance/math_brute_force/function_list.cpp @@ -53,6 +53,7 @@ STRINGIFY(_name), _operator, { NULL }, { NULL }, { NULL }, _ulp, _ulp, \ _embedded_ulp, INFINITY, INFINITY, _rmode, RELAXED_OFF, _type \ } + #define unaryF NULL #define i_unaryF NULL #define unaryF_u NULL diff --git a/test_conformance/math_brute_force/i_unary_double.cpp b/test_conformance/math_brute_force/i_unary_double.cpp index 4383fa8b..d09e14c1 100644 --- a/test_conformance/math_brute_force/i_unary_double.cpp +++ b/test_conformance/math_brute_force/i_unary_double.cpp @@ -20,8 +20,10 @@ #include -static int BuildKernel(const char *name, int vectorSize, cl_kernel *k, - cl_program *p, bool relaxedMode) +namespace { + +int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p, + bool relaxedMode) { const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n", "__kernel void math_kernel", @@ -100,16 +102,16 @@ static int BuildKernel(const char *name, int vectorSize, cl_kernel *k, return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode); } -typedef struct BuildKernelInfo +struct BuildKernelInfo { cl_uint offset; // the first vector size to build cl_kernel *kernels; cl_program *programs; const char *nameInCode; bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. -} BuildKernelInfo; +}; -static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) +cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) { BuildKernelInfo *info = (BuildKernelInfo *)p; cl_uint i = info->offset + job_id; @@ -117,6 +119,8 @@ static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) info->programs + i, info->relaxedMode); } +} // anonymous namespace + int TestFunc_Int_Double(const Func *f, MTdata d, bool relaxedMode) { int error; diff --git a/test_conformance/math_brute_force/i_unary_float.cpp b/test_conformance/math_brute_force/i_unary_float.cpp index c803aa32..89b566d9 100644 --- a/test_conformance/math_brute_force/i_unary_float.cpp +++ b/test_conformance/math_brute_force/i_unary_float.cpp @@ -20,8 +20,10 @@ #include -static int BuildKernel(const char *name, int vectorSize, cl_kernel *k, - cl_program *p, bool relaxedMode) +namespace { + +int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p, + bool relaxedMode) { const char *c[] = { "__kernel void math_kernel", sizeNames[vectorSize], @@ -98,16 +100,16 @@ static int BuildKernel(const char *name, int vectorSize, cl_kernel *k, return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode); } -typedef struct BuildKernelInfo +struct BuildKernelInfo { cl_uint offset; // the first vector size to build cl_kernel *kernels; cl_program *programs; const char *nameInCode; bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. -} BuildKernelInfo; +}; -static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) +cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) { BuildKernelInfo *info = (BuildKernelInfo *)p; cl_uint i = info->offset + job_id; @@ -115,6 +117,8 @@ static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) info->programs + i, info->relaxedMode); } +} // anonymous namespace + int TestFunc_Int_Float(const Func *f, MTdata d, bool relaxedMode) { int error; diff --git a/test_conformance/math_brute_force/macro_binary_double.cpp b/test_conformance/math_brute_force/macro_binary_double.cpp index d09915f6..11281261 100644 --- a/test_conformance/math_brute_force/macro_binary_double.cpp +++ b/test_conformance/math_brute_force/macro_binary_double.cpp @@ -20,8 +20,10 @@ #include -static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count, - cl_kernel *k, cl_program *p, bool relaxedMode) +namespace { + +int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count, + cl_kernel *k, cl_program *p, bool relaxedMode) { const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n", "__kernel void math_kernel", @@ -107,7 +109,7 @@ static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count, relaxedMode); } -typedef struct BuildKernelInfo +struct BuildKernelInfo { cl_uint offset; // the first vector size to build cl_uint kernel_count; @@ -115,9 +117,9 @@ typedef struct BuildKernelInfo cl_program *programs; const char *nameInCode; bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. -} BuildKernelInfo; +}; -static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) +cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) { BuildKernelInfo *info = (BuildKernelInfo *)p; cl_uint i = info->offset + job_id; @@ -126,16 +128,16 @@ static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) } // Thread specific data for a worker thread -typedef struct ThreadInfo +struct ThreadInfo { cl_mem inBuf; // input buffer for the thread cl_mem inBuf2; // input buffer for the thread cl_mem outBuf[VECTOR_SIZE_COUNT]; // output buffers for the thread MTdata d; cl_command_queue tQueue; // per thread command queue to improve performance -} ThreadInfo; +}; -typedef struct TestInfo +struct TestInfo { size_t subBufferSize; // Size of the sub-buffer in elements const Func *f; // A pointer to the function info @@ -150,11 +152,10 @@ typedef struct TestInfo cl_uint step; // step between each chunk and the next. cl_uint scale; // stride between individual test values int ftz; // non-zero if running in flush to zero mode - -} TestInfo; +}; // A table of more difficult cases to get right -static const double specialValues[] = { +const double specialValues[] = { -NAN, -INFINITY, -DBL_MAX, @@ -264,174 +265,10 @@ static const double specialValues[] = { +0.0, }; -static const size_t specialValuesCount = +constexpr size_t specialValuesCount = sizeof(specialValues) / sizeof(specialValues[0]); -static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data); - -int TestMacro_Int_Double_Double(const Func *f, MTdata d, bool relaxedMode) -{ - TestInfo test_info; - cl_int error; - - logFunctionInfo(f->name, sizeof(cl_double), relaxedMode); - - // Init test_info - memset(&test_info, 0, sizeof(test_info)); - test_info.threadCount = GetThreadCount(); - test_info.subBufferSize = BUFFER_SIZE - / (sizeof(cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount)); - test_info.scale = getTestScale(sizeof(cl_double)); - - test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale; - if (test_info.step / test_info.subBufferSize != test_info.scale) - { - // there was overflow - test_info.jobCount = 1; - } - else - { - test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step); - } - - test_info.f = f; - test_info.ftz = f->ftz || gForceFTZ; - - // cl_kernels aren't thread safe, so we make one for each vector size for - // every thread - for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) - { - size_t array_size = test_info.threadCount * sizeof(cl_kernel); - test_info.k[i] = (cl_kernel *)malloc(array_size); - if (NULL == test_info.k[i]) - { - vlog_error("Error: Unable to allocate storage for kernels!\n"); - error = CL_OUT_OF_HOST_MEMORY; - goto exit; - } - memset(test_info.k[i], 0, array_size); - } - test_info.tinfo = - (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo)); - if (NULL == test_info.tinfo) - { - vlog_error( - "Error: Unable to allocate storage for thread specific data.\n"); - error = CL_OUT_OF_HOST_MEMORY; - goto exit; - } - memset(test_info.tinfo, 0, - test_info.threadCount * sizeof(*test_info.tinfo)); - for (size_t i = 0; i < test_info.threadCount; i++) - { - cl_buffer_region region = { - i * test_info.subBufferSize * sizeof(cl_double), - test_info.subBufferSize * sizeof(cl_double) - }; - test_info.tinfo[i].inBuf = - clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY, - CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error); - if (error || NULL == test_info.tinfo[i].inBuf) - { - vlog_error("Error: Unable to create sub-buffer of gInBuffer for " - "region {%zd, %zd}\n", - region.origin, region.size); - goto exit; - } - test_info.tinfo[i].inBuf2 = - clCreateSubBuffer(gInBuffer2, CL_MEM_READ_ONLY, - CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error); - if (error || NULL == test_info.tinfo[i].inBuf2) - { - vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for " - "region {%zd, %zd}\n", - region.origin, region.size); - goto exit; - } - - for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) - { - test_info.tinfo[i].outBuf[j] = clCreateSubBuffer( - gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION, - ®ion, &error); - if (error || NULL == test_info.tinfo[i].outBuf[j]) - { - vlog_error("Error: Unable to create sub-buffer of " - "gOutBuffer[%d] for region {%zd, %zd}\n", - (int)j, region.origin, region.size); - goto exit; - } - } - test_info.tinfo[i].tQueue = - clCreateCommandQueue(gContext, gDevice, 0, &error); - if (NULL == test_info.tinfo[i].tQueue || error) - { - vlog_error("clCreateCommandQueue failed. (%d)\n", error); - goto exit; - } - - test_info.tinfo[i].d = init_genrand(genrand_int32(d)); - } - - // Init the kernels - { - BuildKernelInfo build_info = { - gMinVectorSizeIndex, test_info.threadCount, test_info.k, - test_info.programs, f->nameInCode, relaxedMode - }; - if ((error = ThreadPool_Do(BuildKernelFn, - gMaxVectorSizeIndex - gMinVectorSizeIndex, - &build_info))) - goto exit; - } - - // Run the kernels - if (!gSkipCorrectnessTesting) - { - error = ThreadPool_Do(Test, test_info.jobCount, &test_info); - - if (error) goto exit; - - if (gWimpyMode) - vlog("Wimp pass"); - else - vlog("passed"); - } - - vlog("\n"); - -exit: - // Release - for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) - { - clReleaseProgram(test_info.programs[i]); - if (test_info.k[i]) - { - for (cl_uint j = 0; j < test_info.threadCount; j++) - clReleaseKernel(test_info.k[i][j]); - - free(test_info.k[i]); - } - } - if (test_info.tinfo) - { - for (cl_uint i = 0; i < test_info.threadCount; i++) - { - free_mtdata(test_info.tinfo[i].d); - clReleaseMemObject(test_info.tinfo[i].inBuf); - clReleaseMemObject(test_info.tinfo[i].inBuf2); - for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) - clReleaseMemObject(test_info.tinfo[i].outBuf[j]); - clReleaseCommandQueue(test_info.tinfo[i].tQueue); - } - - free(test_info.tinfo); - } - - return error; -} - -static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) +cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) { const TestInfo *job = (const TestInfo *)data; size_t buffer_elements = job->subBufferSize; @@ -735,3 +572,167 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) exit: return error; } + +} // anonymous namespace + +int TestMacro_Int_Double_Double(const Func *f, MTdata d, bool relaxedMode) +{ + TestInfo test_info; + cl_int error; + + logFunctionInfo(f->name, sizeof(cl_double), relaxedMode); + + // Init test_info + memset(&test_info, 0, sizeof(test_info)); + test_info.threadCount = GetThreadCount(); + test_info.subBufferSize = BUFFER_SIZE + / (sizeof(cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount)); + test_info.scale = getTestScale(sizeof(cl_double)); + + test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale; + if (test_info.step / test_info.subBufferSize != test_info.scale) + { + // there was overflow + test_info.jobCount = 1; + } + else + { + test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step); + } + + test_info.f = f; + test_info.ftz = f->ftz || gForceFTZ; + + // cl_kernels aren't thread safe, so we make one for each vector size for + // every thread + for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) + { + size_t array_size = test_info.threadCount * sizeof(cl_kernel); + test_info.k[i] = (cl_kernel *)malloc(array_size); + if (NULL == test_info.k[i]) + { + vlog_error("Error: Unable to allocate storage for kernels!\n"); + error = CL_OUT_OF_HOST_MEMORY; + goto exit; + } + memset(test_info.k[i], 0, array_size); + } + test_info.tinfo = + (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo)); + if (NULL == test_info.tinfo) + { + vlog_error( + "Error: Unable to allocate storage for thread specific data.\n"); + error = CL_OUT_OF_HOST_MEMORY; + goto exit; + } + memset(test_info.tinfo, 0, + test_info.threadCount * sizeof(*test_info.tinfo)); + for (size_t i = 0; i < test_info.threadCount; i++) + { + cl_buffer_region region = { + i * test_info.subBufferSize * sizeof(cl_double), + test_info.subBufferSize * sizeof(cl_double) + }; + test_info.tinfo[i].inBuf = + clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY, + CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error); + if (error || NULL == test_info.tinfo[i].inBuf) + { + vlog_error("Error: Unable to create sub-buffer of gInBuffer for " + "region {%zd, %zd}\n", + region.origin, region.size); + goto exit; + } + test_info.tinfo[i].inBuf2 = + clCreateSubBuffer(gInBuffer2, CL_MEM_READ_ONLY, + CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error); + if (error || NULL == test_info.tinfo[i].inBuf2) + { + vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for " + "region {%zd, %zd}\n", + region.origin, region.size); + goto exit; + } + + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + { + test_info.tinfo[i].outBuf[j] = clCreateSubBuffer( + gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION, + ®ion, &error); + if (error || NULL == test_info.tinfo[i].outBuf[j]) + { + vlog_error("Error: Unable to create sub-buffer of " + "gOutBuffer[%d] for region {%zd, %zd}\n", + (int)j, region.origin, region.size); + goto exit; + } + } + test_info.tinfo[i].tQueue = + clCreateCommandQueue(gContext, gDevice, 0, &error); + if (NULL == test_info.tinfo[i].tQueue || error) + { + vlog_error("clCreateCommandQueue failed. (%d)\n", error); + goto exit; + } + + test_info.tinfo[i].d = init_genrand(genrand_int32(d)); + } + + // Init the kernels + { + BuildKernelInfo build_info = { + gMinVectorSizeIndex, test_info.threadCount, test_info.k, + test_info.programs, f->nameInCode, relaxedMode + }; + if ((error = ThreadPool_Do(BuildKernelFn, + gMaxVectorSizeIndex - gMinVectorSizeIndex, + &build_info))) + goto exit; + } + + // Run the kernels + if (!gSkipCorrectnessTesting) + { + error = ThreadPool_Do(Test, test_info.jobCount, &test_info); + + if (error) goto exit; + + if (gWimpyMode) + vlog("Wimp pass"); + else + vlog("passed"); + } + + vlog("\n"); + +exit: + // Release + for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) + { + clReleaseProgram(test_info.programs[i]); + if (test_info.k[i]) + { + for (cl_uint j = 0; j < test_info.threadCount; j++) + clReleaseKernel(test_info.k[i][j]); + + free(test_info.k[i]); + } + } + if (test_info.tinfo) + { + for (cl_uint i = 0; i < test_info.threadCount; i++) + { + free_mtdata(test_info.tinfo[i].d); + clReleaseMemObject(test_info.tinfo[i].inBuf); + clReleaseMemObject(test_info.tinfo[i].inBuf2); + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + clReleaseMemObject(test_info.tinfo[i].outBuf[j]); + clReleaseCommandQueue(test_info.tinfo[i].tQueue); + } + + free(test_info.tinfo); + } + + return error; +} diff --git a/test_conformance/math_brute_force/macro_binary_float.cpp b/test_conformance/math_brute_force/macro_binary_float.cpp index c530cdaf..6475e4bb 100644 --- a/test_conformance/math_brute_force/macro_binary_float.cpp +++ b/test_conformance/math_brute_force/macro_binary_float.cpp @@ -20,8 +20,10 @@ #include -static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count, - cl_kernel *k, cl_program *p, bool relaxedMode) +namespace { + +int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count, + cl_kernel *k, cl_program *p, bool relaxedMode) { const char *c[] = { "__kernel void math_kernel", sizeNames[vectorSize], @@ -105,7 +107,7 @@ static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count, relaxedMode); } -typedef struct BuildKernelInfo +struct BuildKernelInfo { cl_uint offset; // the first vector size to build cl_uint kernel_count; @@ -113,9 +115,9 @@ typedef struct BuildKernelInfo cl_program *programs; const char *nameInCode; bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. -} BuildKernelInfo; +}; -static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) +cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) { BuildKernelInfo *info = (BuildKernelInfo *)p; cl_uint i = info->offset + job_id; @@ -124,16 +126,16 @@ static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) } // Thread specific data for a worker thread -typedef struct ThreadInfo +struct ThreadInfo { cl_mem inBuf; // input buffer for the thread cl_mem inBuf2; // input buffer for the thread cl_mem outBuf[VECTOR_SIZE_COUNT]; // output buffers for the thread MTdata d; cl_command_queue tQueue; // per thread command queue to improve performance -} ThreadInfo; +}; -typedef struct TestInfo +struct TestInfo { size_t subBufferSize; // Size of the sub-buffer in elements const Func *f; // A pointer to the function info @@ -148,11 +150,10 @@ typedef struct TestInfo cl_uint step; // step between each chunk and the next. cl_uint scale; // stride between individual test values int ftz; // non-zero if running in flush to zero mode - -} TestInfo; +}; // A table of more difficult cases to get right -static const float specialValues[] = { +const float specialValues[] = { -NAN, -INFINITY, -FLT_MAX, @@ -254,175 +255,10 @@ static const float specialValues[] = { +0.0f, }; -static const size_t specialValuesCount = +constexpr size_t specialValuesCount = sizeof(specialValues) / sizeof(specialValues[0]); -static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data); - -int TestMacro_Int_Float_Float(const Func *f, MTdata d, bool relaxedMode) -{ - TestInfo test_info; - cl_int error; - - logFunctionInfo(f->name, sizeof(cl_float), relaxedMode); - - // Init test_info - memset(&test_info, 0, sizeof(test_info)); - test_info.threadCount = GetThreadCount(); - test_info.subBufferSize = BUFFER_SIZE - / (sizeof(cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount)); - test_info.scale = getTestScale(sizeof(cl_float)); - - test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale; - if (test_info.step / test_info.subBufferSize != test_info.scale) - { - // there was overflow - test_info.jobCount = 1; - } - else - { - test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step); - } - - test_info.f = f; - test_info.ftz = - f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities); - - // cl_kernels aren't thread safe, so we make one for each vector size for - // every thread - for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) - { - size_t array_size = test_info.threadCount * sizeof(cl_kernel); - test_info.k[i] = (cl_kernel *)malloc(array_size); - if (NULL == test_info.k[i]) - { - vlog_error("Error: Unable to allocate storage for kernels!\n"); - error = CL_OUT_OF_HOST_MEMORY; - goto exit; - } - memset(test_info.k[i], 0, array_size); - } - test_info.tinfo = - (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo)); - if (NULL == test_info.tinfo) - { - vlog_error( - "Error: Unable to allocate storage for thread specific data.\n"); - error = CL_OUT_OF_HOST_MEMORY; - goto exit; - } - memset(test_info.tinfo, 0, - test_info.threadCount * sizeof(*test_info.tinfo)); - for (cl_uint i = 0; i < test_info.threadCount; i++) - { - cl_buffer_region region = { - i * test_info.subBufferSize * sizeof(cl_float), - test_info.subBufferSize * sizeof(cl_float) - }; - test_info.tinfo[i].inBuf = - clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY, - CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error); - if (error || NULL == test_info.tinfo[i].inBuf) - { - vlog_error("Error: Unable to create sub-buffer of gInBuffer for " - "region {%zd, %zd}\n", - region.origin, region.size); - goto exit; - } - test_info.tinfo[i].inBuf2 = - clCreateSubBuffer(gInBuffer2, CL_MEM_READ_ONLY, - CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error); - if (error || NULL == test_info.tinfo[i].inBuf2) - { - vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for " - "region {%zd, %zd}\n", - region.origin, region.size); - goto exit; - } - - for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) - { - test_info.tinfo[i].outBuf[j] = clCreateSubBuffer( - gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION, - ®ion, &error); - if (error || NULL == test_info.tinfo[i].outBuf[j]) - { - vlog_error("Error: Unable to create sub-buffer of " - "gOutBuffer[%d] for region {%zd, %zd}\n", - (int)j, region.origin, region.size); - goto exit; - } - } - test_info.tinfo[i].tQueue = - clCreateCommandQueue(gContext, gDevice, 0, &error); - if (NULL == test_info.tinfo[i].tQueue || error) - { - vlog_error("clCreateCommandQueue failed. (%d)\n", error); - goto exit; - } - - test_info.tinfo[i].d = init_genrand(genrand_int32(d)); - } - - // Init the kernels - { - BuildKernelInfo build_info = { - gMinVectorSizeIndex, test_info.threadCount, test_info.k, - test_info.programs, f->nameInCode, relaxedMode - }; - if ((error = ThreadPool_Do(BuildKernelFn, - gMaxVectorSizeIndex - gMinVectorSizeIndex, - &build_info))) - goto exit; - } - - // Run the kernels - if (!gSkipCorrectnessTesting) - { - error = ThreadPool_Do(Test, test_info.jobCount, &test_info); - - if (error) goto exit; - - if (gWimpyMode) - vlog("Wimp pass"); - else - vlog("passed"); - } - - vlog("\n"); - -exit: - // Release - for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) - { - clReleaseProgram(test_info.programs[i]); - if (test_info.k[i]) - { - for (cl_uint j = 0; j < test_info.threadCount; j++) - clReleaseKernel(test_info.k[i][j]); - - free(test_info.k[i]); - } - } - if (test_info.tinfo) - { - for (cl_uint i = 0; i < test_info.threadCount; i++) - { - free_mtdata(test_info.tinfo[i].d); - clReleaseMemObject(test_info.tinfo[i].inBuf); - clReleaseMemObject(test_info.tinfo[i].inBuf2); - for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) - clReleaseMemObject(test_info.tinfo[i].outBuf[j]); - clReleaseCommandQueue(test_info.tinfo[i].tQueue); - } - - free(test_info.tinfo); - } - - return error; -} - -static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) +cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) { const TestInfo *job = (const TestInfo *)data; size_t buffer_elements = job->subBufferSize; @@ -724,3 +560,168 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) exit: return error; } + +} // anonymous namespace + +int TestMacro_Int_Float_Float(const Func *f, MTdata d, bool relaxedMode) +{ + TestInfo test_info; + cl_int error; + + logFunctionInfo(f->name, sizeof(cl_float), relaxedMode); + + // Init test_info + memset(&test_info, 0, sizeof(test_info)); + test_info.threadCount = GetThreadCount(); + test_info.subBufferSize = BUFFER_SIZE + / (sizeof(cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount)); + test_info.scale = getTestScale(sizeof(cl_float)); + + test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale; + if (test_info.step / test_info.subBufferSize != test_info.scale) + { + // there was overflow + test_info.jobCount = 1; + } + else + { + test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step); + } + + test_info.f = f; + test_info.ftz = + f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities); + + // cl_kernels aren't thread safe, so we make one for each vector size for + // every thread + for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) + { + size_t array_size = test_info.threadCount * sizeof(cl_kernel); + test_info.k[i] = (cl_kernel *)malloc(array_size); + if (NULL == test_info.k[i]) + { + vlog_error("Error: Unable to allocate storage for kernels!\n"); + error = CL_OUT_OF_HOST_MEMORY; + goto exit; + } + memset(test_info.k[i], 0, array_size); + } + test_info.tinfo = + (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo)); + if (NULL == test_info.tinfo) + { + vlog_error( + "Error: Unable to allocate storage for thread specific data.\n"); + error = CL_OUT_OF_HOST_MEMORY; + goto exit; + } + memset(test_info.tinfo, 0, + test_info.threadCount * sizeof(*test_info.tinfo)); + for (cl_uint i = 0; i < test_info.threadCount; i++) + { + cl_buffer_region region = { + i * test_info.subBufferSize * sizeof(cl_float), + test_info.subBufferSize * sizeof(cl_float) + }; + test_info.tinfo[i].inBuf = + clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY, + CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error); + if (error || NULL == test_info.tinfo[i].inBuf) + { + vlog_error("Error: Unable to create sub-buffer of gInBuffer for " + "region {%zd, %zd}\n", + region.origin, region.size); + goto exit; + } + test_info.tinfo[i].inBuf2 = + clCreateSubBuffer(gInBuffer2, CL_MEM_READ_ONLY, + CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error); + if (error || NULL == test_info.tinfo[i].inBuf2) + { + vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for " + "region {%zd, %zd}\n", + region.origin, region.size); + goto exit; + } + + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + { + test_info.tinfo[i].outBuf[j] = clCreateSubBuffer( + gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION, + ®ion, &error); + if (error || NULL == test_info.tinfo[i].outBuf[j]) + { + vlog_error("Error: Unable to create sub-buffer of " + "gOutBuffer[%d] for region {%zd, %zd}\n", + (int)j, region.origin, region.size); + goto exit; + } + } + test_info.tinfo[i].tQueue = + clCreateCommandQueue(gContext, gDevice, 0, &error); + if (NULL == test_info.tinfo[i].tQueue || error) + { + vlog_error("clCreateCommandQueue failed. (%d)\n", error); + goto exit; + } + + test_info.tinfo[i].d = init_genrand(genrand_int32(d)); + } + + // Init the kernels + { + BuildKernelInfo build_info = { + gMinVectorSizeIndex, test_info.threadCount, test_info.k, + test_info.programs, f->nameInCode, relaxedMode + }; + if ((error = ThreadPool_Do(BuildKernelFn, + gMaxVectorSizeIndex - gMinVectorSizeIndex, + &build_info))) + goto exit; + } + + // Run the kernels + if (!gSkipCorrectnessTesting) + { + error = ThreadPool_Do(Test, test_info.jobCount, &test_info); + + if (error) goto exit; + + if (gWimpyMode) + vlog("Wimp pass"); + else + vlog("passed"); + } + + vlog("\n"); + +exit: + // Release + for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) + { + clReleaseProgram(test_info.programs[i]); + if (test_info.k[i]) + { + for (cl_uint j = 0; j < test_info.threadCount; j++) + clReleaseKernel(test_info.k[i][j]); + + free(test_info.k[i]); + } + } + if (test_info.tinfo) + { + for (cl_uint i = 0; i < test_info.threadCount; i++) + { + free_mtdata(test_info.tinfo[i].d); + clReleaseMemObject(test_info.tinfo[i].inBuf); + clReleaseMemObject(test_info.tinfo[i].inBuf2); + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + clReleaseMemObject(test_info.tinfo[i].outBuf[j]); + clReleaseCommandQueue(test_info.tinfo[i].tQueue); + } + + free(test_info.tinfo); + } + + return error; +} diff --git a/test_conformance/math_brute_force/macro_unary_double.cpp b/test_conformance/math_brute_force/macro_unary_double.cpp index 00e65a2c..860e4596 100644 --- a/test_conformance/math_brute_force/macro_unary_double.cpp +++ b/test_conformance/math_brute_force/macro_unary_double.cpp @@ -20,8 +20,10 @@ #include -static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count, - cl_kernel *k, cl_program *p, bool relaxedMode) +namespace { + +int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count, + cl_kernel *k, cl_program *p, bool relaxedMode) { const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n", "__kernel void math_kernel", @@ -101,7 +103,7 @@ static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count, relaxedMode); } -typedef struct BuildKernelInfo +struct BuildKernelInfo { cl_uint offset; // the first vector size to build cl_uint kernel_count; @@ -109,9 +111,9 @@ typedef struct BuildKernelInfo cl_program *programs; const char *nameInCode; bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. -} BuildKernelInfo; +}; -static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) +cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) { BuildKernelInfo *info = (BuildKernelInfo *)p; cl_uint i = info->offset + job_id; @@ -120,14 +122,14 @@ static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) } // Thread specific data for a worker thread -typedef struct ThreadInfo +struct ThreadInfo { cl_mem inBuf; // input buffer for the thread cl_mem outBuf[VECTOR_SIZE_COUNT]; // output buffers for the thread cl_command_queue tQueue; // per thread command queue to improve performance -} ThreadInfo; +}; -typedef struct TestInfo +struct TestInfo { size_t subBufferSize; // Size of the sub-buffer in elements const Func *f; // A pointer to the function info @@ -142,160 +144,9 @@ typedef struct TestInfo cl_uint step; // step between each chunk and the next. cl_uint scale; // stride between individual test values int ftz; // non-zero if running in flush to zero mode +}; -} TestInfo; - -static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data); - -int TestMacro_Int_Double(const Func *f, MTdata d, bool relaxedMode) -{ - TestInfo test_info; - cl_int error; - - logFunctionInfo(f->name, sizeof(cl_double), relaxedMode); - - // Init test_info - memset(&test_info, 0, sizeof(test_info)); - test_info.threadCount = GetThreadCount(); - test_info.subBufferSize = BUFFER_SIZE - / (sizeof(cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount)); - test_info.scale = getTestScale(sizeof(cl_double)); - - test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale; - if (test_info.step / test_info.subBufferSize != test_info.scale) - { - // there was overflow - test_info.jobCount = 1; - } - else - { - test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step); - } - - test_info.f = f; - test_info.ftz = f->ftz || gForceFTZ; - - // cl_kernels aren't thread safe, so we make one for each vector size for - // every thread - for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) - { - size_t array_size = test_info.threadCount * sizeof(cl_kernel); - test_info.k[i] = (cl_kernel *)malloc(array_size); - if (NULL == test_info.k[i]) - { - vlog_error("Error: Unable to allocate storage for kernels!\n"); - error = CL_OUT_OF_HOST_MEMORY; - goto exit; - } - memset(test_info.k[i], 0, array_size); - } - test_info.tinfo = - (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo)); - if (NULL == test_info.tinfo) - { - vlog_error( - "Error: Unable to allocate storage for thread specific data.\n"); - error = CL_OUT_OF_HOST_MEMORY; - goto exit; - } - memset(test_info.tinfo, 0, - test_info.threadCount * sizeof(*test_info.tinfo)); - for (cl_uint i = 0; i < test_info.threadCount; i++) - { - cl_buffer_region region = { - i * test_info.subBufferSize * sizeof(cl_double), - test_info.subBufferSize * sizeof(cl_double) - }; - test_info.tinfo[i].inBuf = - clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY, - CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error); - if (error || NULL == test_info.tinfo[i].inBuf) - { - vlog_error("Error: Unable to create sub-buffer of gInBuffer for " - "region {%zd, %zd}\n", - region.origin, region.size); - goto exit; - } - - for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) - { - test_info.tinfo[i].outBuf[j] = clCreateSubBuffer( - gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION, - ®ion, &error); - if (error || NULL == test_info.tinfo[i].outBuf[j]) - { - vlog_error("Error: Unable to create sub-buffer of " - "gOutBuffer[%d] for region {%zd, %zd}\n", - (int)j, region.origin, region.size); - goto exit; - } - } - test_info.tinfo[i].tQueue = - clCreateCommandQueue(gContext, gDevice, 0, &error); - if (NULL == test_info.tinfo[i].tQueue || error) - { - vlog_error("clCreateCommandQueue failed. (%d)\n", error); - goto exit; - } - } - - // Init the kernels - { - BuildKernelInfo build_info = { - gMinVectorSizeIndex, test_info.threadCount, test_info.k, - test_info.programs, f->nameInCode, relaxedMode - }; - if ((error = ThreadPool_Do(BuildKernelFn, - gMaxVectorSizeIndex - gMinVectorSizeIndex, - &build_info))) - goto exit; - } - - // Run the kernels - if (!gSkipCorrectnessTesting) - { - error = ThreadPool_Do(Test, test_info.jobCount, &test_info); - - if (error) goto exit; - - if (gWimpyMode) - vlog("Wimp pass"); - else - vlog("passed"); - } - - vlog("\n"); - -exit: - // Release - for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) - { - clReleaseProgram(test_info.programs[i]); - if (test_info.k[i]) - { - for (cl_uint j = 0; j < test_info.threadCount; j++) - clReleaseKernel(test_info.k[i][j]); - - free(test_info.k[i]); - } - } - if (test_info.tinfo) - { - for (cl_uint i = 0; i < test_info.threadCount; i++) - { - clReleaseMemObject(test_info.tinfo[i].inBuf); - for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) - clReleaseMemObject(test_info.tinfo[i].outBuf[j]); - clReleaseCommandQueue(test_info.tinfo[i].tQueue); - } - - free(test_info.tinfo); - } - - return error; -} - -static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) +cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) { const TestInfo *job = (const TestInfo *)data; size_t buffer_elements = job->subBufferSize; @@ -506,3 +357,153 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) return CL_SUCCESS; } + +} // anonymous namespace + +int TestMacro_Int_Double(const Func *f, MTdata d, bool relaxedMode) +{ + TestInfo test_info; + cl_int error; + + logFunctionInfo(f->name, sizeof(cl_double), relaxedMode); + + // Init test_info + memset(&test_info, 0, sizeof(test_info)); + test_info.threadCount = GetThreadCount(); + test_info.subBufferSize = BUFFER_SIZE + / (sizeof(cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount)); + test_info.scale = getTestScale(sizeof(cl_double)); + + test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale; + if (test_info.step / test_info.subBufferSize != test_info.scale) + { + // there was overflow + test_info.jobCount = 1; + } + else + { + test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step); + } + + test_info.f = f; + test_info.ftz = f->ftz || gForceFTZ; + + // cl_kernels aren't thread safe, so we make one for each vector size for + // every thread + for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) + { + size_t array_size = test_info.threadCount * sizeof(cl_kernel); + test_info.k[i] = (cl_kernel *)malloc(array_size); + if (NULL == test_info.k[i]) + { + vlog_error("Error: Unable to allocate storage for kernels!\n"); + error = CL_OUT_OF_HOST_MEMORY; + goto exit; + } + memset(test_info.k[i], 0, array_size); + } + test_info.tinfo = + (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo)); + if (NULL == test_info.tinfo) + { + vlog_error( + "Error: Unable to allocate storage for thread specific data.\n"); + error = CL_OUT_OF_HOST_MEMORY; + goto exit; + } + memset(test_info.tinfo, 0, + test_info.threadCount * sizeof(*test_info.tinfo)); + for (cl_uint i = 0; i < test_info.threadCount; i++) + { + cl_buffer_region region = { + i * test_info.subBufferSize * sizeof(cl_double), + test_info.subBufferSize * sizeof(cl_double) + }; + test_info.tinfo[i].inBuf = + clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY, + CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error); + if (error || NULL == test_info.tinfo[i].inBuf) + { + vlog_error("Error: Unable to create sub-buffer of gInBuffer for " + "region {%zd, %zd}\n", + region.origin, region.size); + goto exit; + } + + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + { + test_info.tinfo[i].outBuf[j] = clCreateSubBuffer( + gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION, + ®ion, &error); + if (error || NULL == test_info.tinfo[i].outBuf[j]) + { + vlog_error("Error: Unable to create sub-buffer of " + "gOutBuffer[%d] for region {%zd, %zd}\n", + (int)j, region.origin, region.size); + goto exit; + } + } + test_info.tinfo[i].tQueue = + clCreateCommandQueue(gContext, gDevice, 0, &error); + if (NULL == test_info.tinfo[i].tQueue || error) + { + vlog_error("clCreateCommandQueue failed. (%d)\n", error); + goto exit; + } + } + + // Init the kernels + { + BuildKernelInfo build_info = { + gMinVectorSizeIndex, test_info.threadCount, test_info.k, + test_info.programs, f->nameInCode, relaxedMode + }; + if ((error = ThreadPool_Do(BuildKernelFn, + gMaxVectorSizeIndex - gMinVectorSizeIndex, + &build_info))) + goto exit; + } + + // Run the kernels + if (!gSkipCorrectnessTesting) + { + error = ThreadPool_Do(Test, test_info.jobCount, &test_info); + + if (error) goto exit; + + if (gWimpyMode) + vlog("Wimp pass"); + else + vlog("passed"); + } + + vlog("\n"); + +exit: + // Release + for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) + { + clReleaseProgram(test_info.programs[i]); + if (test_info.k[i]) + { + for (cl_uint j = 0; j < test_info.threadCount; j++) + clReleaseKernel(test_info.k[i][j]); + + free(test_info.k[i]); + } + } + if (test_info.tinfo) + { + for (cl_uint i = 0; i < test_info.threadCount; i++) + { + clReleaseMemObject(test_info.tinfo[i].inBuf); + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + clReleaseMemObject(test_info.tinfo[i].outBuf[j]); + clReleaseCommandQueue(test_info.tinfo[i].tQueue); + } + + free(test_info.tinfo); + } + + return error; +} diff --git a/test_conformance/math_brute_force/macro_unary_float.cpp b/test_conformance/math_brute_force/macro_unary_float.cpp index 3c1717ac..58a2a954 100644 --- a/test_conformance/math_brute_force/macro_unary_float.cpp +++ b/test_conformance/math_brute_force/macro_unary_float.cpp @@ -20,8 +20,10 @@ #include -static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count, - cl_kernel *k, cl_program *p, bool relaxedMode) +namespace { + +int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count, + cl_kernel *k, cl_program *p, bool relaxedMode) { const char *c[] = { "__kernel void math_kernel", sizeNames[vectorSize], @@ -100,7 +102,7 @@ static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count, relaxedMode); } -typedef struct BuildKernelInfo +struct BuildKernelInfo { cl_uint offset; // the first vector size to build cl_uint kernel_count; @@ -108,9 +110,9 @@ typedef struct BuildKernelInfo cl_program *programs; const char *nameInCode; bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. -} BuildKernelInfo; +}; -static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) +cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) { BuildKernelInfo *info = (BuildKernelInfo *)p; cl_uint i = info->offset + job_id; @@ -119,14 +121,14 @@ static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) } // Thread specific data for a worker thread -typedef struct ThreadInfo +struct ThreadInfo { cl_mem inBuf; // input buffer for the thread cl_mem outBuf[VECTOR_SIZE_COUNT]; // output buffers for the thread cl_command_queue tQueue; // per thread command queue to improve performance -} ThreadInfo; +}; -typedef struct TestInfo +struct TestInfo { size_t subBufferSize; // Size of the sub-buffer in elements const Func *f; // A pointer to the function info @@ -141,161 +143,9 @@ typedef struct TestInfo cl_uint step; // step between each chunk and the next. cl_uint scale; // stride between individual test values int ftz; // non-zero if running in flush to zero mode +}; -} TestInfo; - -static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data); - -int TestMacro_Int_Float(const Func *f, MTdata d, bool relaxedMode) -{ - TestInfo test_info; - cl_int error; - - logFunctionInfo(f->name, sizeof(cl_float), relaxedMode); - - // Init test_info - memset(&test_info, 0, sizeof(test_info)); - test_info.threadCount = GetThreadCount(); - test_info.subBufferSize = BUFFER_SIZE - / (sizeof(cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount)); - test_info.scale = getTestScale(sizeof(cl_float)); - - test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale; - if (test_info.step / test_info.subBufferSize != test_info.scale) - { - // there was overflow - test_info.jobCount = 1; - } - else - { - test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step); - } - - test_info.f = f; - test_info.ftz = - f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities); - - // cl_kernels aren't thread safe, so we make one for each vector size for - // every thread - for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) - { - size_t array_size = test_info.threadCount * sizeof(cl_kernel); - test_info.k[i] = (cl_kernel *)malloc(array_size); - if (NULL == test_info.k[i]) - { - vlog_error("Error: Unable to allocate storage for kernels!\n"); - error = CL_OUT_OF_HOST_MEMORY; - goto exit; - } - memset(test_info.k[i], 0, array_size); - } - test_info.tinfo = - (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo)); - if (NULL == test_info.tinfo) - { - vlog_error( - "Error: Unable to allocate storage for thread specific data.\n"); - error = CL_OUT_OF_HOST_MEMORY; - goto exit; - } - memset(test_info.tinfo, 0, - test_info.threadCount * sizeof(*test_info.tinfo)); - for (cl_uint i = 0; i < test_info.threadCount; i++) - { - cl_buffer_region region = { - i * test_info.subBufferSize * sizeof(cl_float), - test_info.subBufferSize * sizeof(cl_float) - }; - test_info.tinfo[i].inBuf = - clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY, - CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error); - if (error || NULL == test_info.tinfo[i].inBuf) - { - vlog_error("Error: Unable to create sub-buffer of gInBuffer for " - "region {%zd, %zd}\n", - region.origin, region.size); - goto exit; - } - - for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) - { - test_info.tinfo[i].outBuf[j] = clCreateSubBuffer( - gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION, - ®ion, &error); - if (error || NULL == test_info.tinfo[i].outBuf[j]) - { - vlog_error("Error: Unable to create sub-buffer of " - "gOutBuffer[%d] for region {%zd, %zd}\n", - (int)j, region.origin, region.size); - goto exit; - } - } - test_info.tinfo[i].tQueue = - clCreateCommandQueue(gContext, gDevice, 0, &error); - if (NULL == test_info.tinfo[i].tQueue || error) - { - vlog_error("clCreateCommandQueue failed. (%d)\n", error); - goto exit; - } - } - - // Init the kernels - { - BuildKernelInfo build_info = { - gMinVectorSizeIndex, test_info.threadCount, test_info.k, - test_info.programs, f->nameInCode, relaxedMode - }; - if ((error = ThreadPool_Do(BuildKernelFn, - gMaxVectorSizeIndex - gMinVectorSizeIndex, - &build_info))) - goto exit; - } - - // Run the kernels - if (!gSkipCorrectnessTesting) - { - error = ThreadPool_Do(Test, test_info.jobCount, &test_info); - - if (error) goto exit; - - if (gWimpyMode) - vlog("Wimp pass"); - else - vlog("passed"); - } - - vlog("\n"); - -exit: - // Release - for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) - { - clReleaseProgram(test_info.programs[i]); - if (test_info.k[i]) - { - for (cl_uint j = 0; j < test_info.threadCount; j++) - clReleaseKernel(test_info.k[i][j]); - - free(test_info.k[i]); - } - } - if (test_info.tinfo) - { - for (cl_uint i = 0; i < test_info.threadCount; i++) - { - clReleaseMemObject(test_info.tinfo[i].inBuf); - for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) - clReleaseMemObject(test_info.tinfo[i].outBuf[j]); - clReleaseCommandQueue(test_info.tinfo[i].tQueue); - } - - free(test_info.tinfo); - } - - return error; -} - -static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) +cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) { const TestInfo *job = (const TestInfo *)data; size_t buffer_elements = job->subBufferSize; @@ -521,3 +371,154 @@ exit: return ret; } + +} // anonymous namespace + +int TestMacro_Int_Float(const Func *f, MTdata d, bool relaxedMode) +{ + TestInfo test_info; + cl_int error; + + logFunctionInfo(f->name, sizeof(cl_float), relaxedMode); + + // Init test_info + memset(&test_info, 0, sizeof(test_info)); + test_info.threadCount = GetThreadCount(); + test_info.subBufferSize = BUFFER_SIZE + / (sizeof(cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount)); + test_info.scale = getTestScale(sizeof(cl_float)); + + test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale; + if (test_info.step / test_info.subBufferSize != test_info.scale) + { + // there was overflow + test_info.jobCount = 1; + } + else + { + test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step); + } + + test_info.f = f; + test_info.ftz = + f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities); + + // cl_kernels aren't thread safe, so we make one for each vector size for + // every thread + for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) + { + size_t array_size = test_info.threadCount * sizeof(cl_kernel); + test_info.k[i] = (cl_kernel *)malloc(array_size); + if (NULL == test_info.k[i]) + { + vlog_error("Error: Unable to allocate storage for kernels!\n"); + error = CL_OUT_OF_HOST_MEMORY; + goto exit; + } + memset(test_info.k[i], 0, array_size); + } + test_info.tinfo = + (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo)); + if (NULL == test_info.tinfo) + { + vlog_error( + "Error: Unable to allocate storage for thread specific data.\n"); + error = CL_OUT_OF_HOST_MEMORY; + goto exit; + } + memset(test_info.tinfo, 0, + test_info.threadCount * sizeof(*test_info.tinfo)); + for (cl_uint i = 0; i < test_info.threadCount; i++) + { + cl_buffer_region region = { + i * test_info.subBufferSize * sizeof(cl_float), + test_info.subBufferSize * sizeof(cl_float) + }; + test_info.tinfo[i].inBuf = + clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY, + CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error); + if (error || NULL == test_info.tinfo[i].inBuf) + { + vlog_error("Error: Unable to create sub-buffer of gInBuffer for " + "region {%zd, %zd}\n", + region.origin, region.size); + goto exit; + } + + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + { + test_info.tinfo[i].outBuf[j] = clCreateSubBuffer( + gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION, + ®ion, &error); + if (error || NULL == test_info.tinfo[i].outBuf[j]) + { + vlog_error("Error: Unable to create sub-buffer of " + "gOutBuffer[%d] for region {%zd, %zd}\n", + (int)j, region.origin, region.size); + goto exit; + } + } + test_info.tinfo[i].tQueue = + clCreateCommandQueue(gContext, gDevice, 0, &error); + if (NULL == test_info.tinfo[i].tQueue || error) + { + vlog_error("clCreateCommandQueue failed. (%d)\n", error); + goto exit; + } + } + + // Init the kernels + { + BuildKernelInfo build_info = { + gMinVectorSizeIndex, test_info.threadCount, test_info.k, + test_info.programs, f->nameInCode, relaxedMode + }; + if ((error = ThreadPool_Do(BuildKernelFn, + gMaxVectorSizeIndex - gMinVectorSizeIndex, + &build_info))) + goto exit; + } + + // Run the kernels + if (!gSkipCorrectnessTesting) + { + error = ThreadPool_Do(Test, test_info.jobCount, &test_info); + + if (error) goto exit; + + if (gWimpyMode) + vlog("Wimp pass"); + else + vlog("passed"); + } + + vlog("\n"); + +exit: + // Release + for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) + { + clReleaseProgram(test_info.programs[i]); + if (test_info.k[i]) + { + for (cl_uint j = 0; j < test_info.threadCount; j++) + clReleaseKernel(test_info.k[i][j]); + + free(test_info.k[i]); + } + } + if (test_info.tinfo) + { + for (cl_uint i = 0; i < test_info.threadCount; i++) + { + clReleaseMemObject(test_info.tinfo[i].inBuf); + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + clReleaseMemObject(test_info.tinfo[i].outBuf[j]); + clReleaseCommandQueue(test_info.tinfo[i].tQueue); + } + + free(test_info.tinfo); + } + + return error; +} diff --git a/test_conformance/math_brute_force/mad_double.cpp b/test_conformance/math_brute_force/mad_double.cpp index a32cd5a8..8e88f9f6 100644 --- a/test_conformance/math_brute_force/mad_double.cpp +++ b/test_conformance/math_brute_force/mad_double.cpp @@ -20,8 +20,10 @@ #include -static int BuildKernel(const char *name, int vectorSize, cl_kernel *k, - cl_program *p, bool relaxedMode) +namespace { + +int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p, + bool relaxedMode) { const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n", "__kernel void math_kernel", @@ -113,16 +115,16 @@ static int BuildKernel(const char *name, int vectorSize, cl_kernel *k, return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode); } -typedef struct BuildKernelInfo +struct BuildKernelInfo { cl_uint offset; // the first vector size to build cl_kernel *kernels; cl_program *programs; const char *nameInCode; bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. -} BuildKernelInfo; +}; -static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) +cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) { BuildKernelInfo *info = (BuildKernelInfo *)p; cl_uint i = info->offset + job_id; @@ -130,6 +132,8 @@ static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) info->programs + i, info->relaxedMode); } +} // anonymous namespace + int TestFunc_mad_Double(const Func *f, MTdata d, bool relaxedMode) { int error; diff --git a/test_conformance/math_brute_force/mad_float.cpp b/test_conformance/math_brute_force/mad_float.cpp index 095a22ff..0552ba4b 100644 --- a/test_conformance/math_brute_force/mad_float.cpp +++ b/test_conformance/math_brute_force/mad_float.cpp @@ -20,8 +20,10 @@ #include -static int BuildKernel(const char *name, int vectorSize, cl_kernel *k, - cl_program *p, bool relaxedMode) +namespace { + +int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p, + bool relaxedMode) { const char *c[] = { "__kernel void math_kernel", sizeNames[vectorSize], @@ -111,16 +113,16 @@ static int BuildKernel(const char *name, int vectorSize, cl_kernel *k, return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode); } -typedef struct BuildKernelInfo +struct BuildKernelInfo { cl_uint offset; // the first vector size to build cl_kernel *kernels; cl_program *programs; const char *nameInCode; bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. -} BuildKernelInfo; +}; -static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) +cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) { BuildKernelInfo *info = (BuildKernelInfo *)p; cl_uint i = info->offset + job_id; @@ -128,6 +130,8 @@ static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) info->programs + i, info->relaxedMode); } +} // anonymous namespace + int TestFunc_mad_Float(const Func *f, MTdata d, bool relaxedMode) { int error; diff --git a/test_conformance/math_brute_force/ternary_double.cpp b/test_conformance/math_brute_force/ternary_double.cpp index 606fdc5a..8af136ac 100644 --- a/test_conformance/math_brute_force/ternary_double.cpp +++ b/test_conformance/math_brute_force/ternary_double.cpp @@ -23,8 +23,10 @@ #define CORRECTLY_ROUNDED 0 #define FLUSHED 1 -static int BuildKernel(const char *name, int vectorSize, cl_kernel *k, - cl_program *p, bool relaxedMode) +namespace { + +int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p, + bool relaxedMode) { const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n", "__kernel void math_kernel", @@ -116,16 +118,16 @@ static int BuildKernel(const char *name, int vectorSize, cl_kernel *k, return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode); } -typedef struct BuildKernelInfo +struct BuildKernelInfo { cl_uint offset; // the first vector size to build cl_kernel *kernels; cl_program *programs; const char *nameInCode; bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. -} BuildKernelInfo; +}; -static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) +cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) { BuildKernelInfo *info = (BuildKernelInfo *)p; cl_uint i = info->offset + job_id; @@ -134,7 +136,7 @@ static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) } // A table of more difficult cases to get right -static const double specialValues[] = { +const double specialValues[] = { -NAN, -INFINITY, -DBL_MAX, @@ -202,9 +204,11 @@ static const double specialValues[] = { +0.0, }; -static const size_t specialValuesCount = +constexpr size_t specialValuesCount = sizeof(specialValues) / sizeof(specialValues[0]); +} // anonymous namespace + int TestFunc_Double_Double_Double_Double(const Func *f, MTdata d, bool relaxedMode) { diff --git a/test_conformance/math_brute_force/ternary_float.cpp b/test_conformance/math_brute_force/ternary_float.cpp index e52c0a0f..c69083ad 100644 --- a/test_conformance/math_brute_force/ternary_float.cpp +++ b/test_conformance/math_brute_force/ternary_float.cpp @@ -23,8 +23,10 @@ #define CORRECTLY_ROUNDED 0 #define FLUSHED 1 -static int BuildKernel(const char *name, int vectorSize, cl_kernel *k, - cl_program *p, bool relaxedMode) +namespace { + +int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p, + bool relaxedMode) { const char *c[] = { "__kernel void math_kernel", sizeNames[vectorSize], @@ -114,16 +116,16 @@ static int BuildKernel(const char *name, int vectorSize, cl_kernel *k, return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode); } -typedef struct BuildKernelInfo +struct BuildKernelInfo { cl_uint offset; // the first vector size to build cl_kernel *kernels; cl_program *programs; const char *nameInCode; bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. -} BuildKernelInfo; +}; -static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) +cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) { BuildKernelInfo *info = (BuildKernelInfo *)p; cl_uint i = info->offset + job_id; @@ -132,7 +134,7 @@ static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) } // A table of more difficult cases to get right -static const float specialValues[] = { +const float specialValues[] = { -NAN, -INFINITY, -FLT_MAX, @@ -210,9 +212,11 @@ static const float specialValues[] = { +0.0f, }; -static const size_t specialValuesCount = +constexpr size_t specialValuesCount = sizeof(specialValues) / sizeof(specialValues[0]); +} // anonymous namespace + int TestFunc_Float_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode) { int error; diff --git a/test_conformance/math_brute_force/unary_double.cpp b/test_conformance/math_brute_force/unary_double.cpp index f6fa3264..dcd21884 100644 --- a/test_conformance/math_brute_force/unary_double.cpp +++ b/test_conformance/math_brute_force/unary_double.cpp @@ -20,8 +20,10 @@ #include -static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count, - cl_kernel *k, cl_program *p, bool relaxedMode) +namespace { + +int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count, + cl_kernel *k, cl_program *p, bool relaxedMode) { const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n", "__kernel void math_kernel", @@ -101,7 +103,7 @@ static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count, relaxedMode); } -typedef struct BuildKernelInfo +struct BuildKernelInfo { cl_uint offset; // the first vector size to build cl_uint kernel_count; @@ -109,9 +111,9 @@ typedef struct BuildKernelInfo cl_program *programs; const char *nameInCode; bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. -} BuildKernelInfo; +}; -static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) +cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) { BuildKernelInfo *info = (BuildKernelInfo *)p; cl_uint i = info->offset + job_id; @@ -120,16 +122,16 @@ static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) } // Thread specific data for a worker thread -typedef struct ThreadInfo +struct ThreadInfo { cl_mem inBuf; // input buffer for the thread cl_mem outBuf[VECTOR_SIZE_COUNT]; // output buffers for the thread float maxError; // max error value. Init to 0. double maxErrorValue; // position of the max error value. Init to 0. cl_command_queue tQueue; // per thread command queue to improve performance -} ThreadInfo; +}; -typedef struct TestInfo +struct TestInfo { size_t subBufferSize; // Size of the sub-buffer in elements const Func *f; // A pointer to the function info @@ -151,174 +153,9 @@ typedef struct TestInfo float half_sin_cos_tan_limit; bool relaxedMode; // True if test is running in relaxed mode, false // otherwise. -} TestInfo; - -static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data); - -int TestFunc_Double_Double(const Func *f, MTdata d, bool relaxedMode) -{ - TestInfo test_info; - cl_int error; - float maxError = 0.0f; - double maxErrorVal = 0.0; - - logFunctionInfo(f->name, sizeof(cl_double), relaxedMode); - // Init test_info - memset(&test_info, 0, sizeof(test_info)); - test_info.threadCount = GetThreadCount(); - test_info.subBufferSize = BUFFER_SIZE - / (sizeof(cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount)); - test_info.scale = getTestScale(sizeof(cl_double)); - - test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale; - if (test_info.step / test_info.subBufferSize != test_info.scale) - { - // there was overflow - test_info.jobCount = 1; - } - else - { - test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step); - } - - test_info.f = f; - test_info.ulps = f->double_ulps; - test_info.ftz = f->ftz || gForceFTZ; - test_info.relaxedMode = relaxedMode; - - // cl_kernels aren't thread safe, so we make one for each vector size for - // every thread - for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) - { - size_t array_size = test_info.threadCount * sizeof(cl_kernel); - test_info.k[i] = (cl_kernel *)malloc(array_size); - if (NULL == test_info.k[i]) - { - vlog_error("Error: Unable to allocate storage for kernels!\n"); - error = CL_OUT_OF_HOST_MEMORY; - goto exit; - } - memset(test_info.k[i], 0, array_size); - } - test_info.tinfo = - (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo)); - if (NULL == test_info.tinfo) - { - vlog_error( - "Error: Unable to allocate storage for thread specific data.\n"); - error = CL_OUT_OF_HOST_MEMORY; - goto exit; - } - memset(test_info.tinfo, 0, - test_info.threadCount * sizeof(*test_info.tinfo)); - for (cl_uint i = 0; i < test_info.threadCount; i++) - { - cl_buffer_region region = { - i * test_info.subBufferSize * sizeof(cl_double), - test_info.subBufferSize * sizeof(cl_double) - }; - test_info.tinfo[i].inBuf = - clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY, - CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error); - if (error || NULL == test_info.tinfo[i].inBuf) - { - vlog_error("Error: Unable to create sub-buffer of gInBuffer for " - "region {%zd, %zd}\n", - region.origin, region.size); - goto exit; - } - - for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) - { - test_info.tinfo[i].outBuf[j] = clCreateSubBuffer( - gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION, - ®ion, &error); - if (error || NULL == test_info.tinfo[i].outBuf[j]) - { - vlog_error("Error: Unable to create sub-buffer of " - "gOutBuffer[%d] for region {%zd, %zd}\n", - (int)j, region.origin, region.size); - goto exit; - } - } - test_info.tinfo[i].tQueue = - clCreateCommandQueue(gContext, gDevice, 0, &error); - if (NULL == test_info.tinfo[i].tQueue || error) - { - vlog_error("clCreateCommandQueue failed. (%d)\n", error); - goto exit; - } - } - - // Init the kernels - { - BuildKernelInfo build_info = { - gMinVectorSizeIndex, test_info.threadCount, test_info.k, - test_info.programs, f->nameInCode, relaxedMode - }; - if ((error = ThreadPool_Do(BuildKernelFn, - gMaxVectorSizeIndex - gMinVectorSizeIndex, - &build_info))) - goto exit; - } - - // Run the kernels - if (!gSkipCorrectnessTesting) - { - error = ThreadPool_Do(Test, test_info.jobCount, &test_info); - - // Accumulate the arithmetic errors - for (cl_uint i = 0; i < test_info.threadCount; i++) - { - if (test_info.tinfo[i].maxError > maxError) - { - maxError = test_info.tinfo[i].maxError; - maxErrorVal = test_info.tinfo[i].maxErrorValue; - } - } - - if (error) goto exit; - - if (gWimpyMode) - vlog("Wimp pass"); - else - vlog("passed"); - - vlog("\t%8.2f @ %a", maxError, maxErrorVal); - } - - vlog("\n"); - -exit: - // Release - for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) - { - clReleaseProgram(test_info.programs[i]); - if (test_info.k[i]) - { - for (cl_uint j = 0; j < test_info.threadCount; j++) - clReleaseKernel(test_info.k[i][j]); - - free(test_info.k[i]); - } - } - if (test_info.tinfo) - { - for (cl_uint i = 0; i < test_info.threadCount; i++) - { - clReleaseMemObject(test_info.tinfo[i].inBuf); - for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) - clReleaseMemObject(test_info.tinfo[i].outBuf[j]); - clReleaseCommandQueue(test_info.tinfo[i].tQueue); - } - - free(test_info.tinfo); - } - - return error; -} +}; -static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) +cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) { const TestInfo *job = (const TestInfo *)data; size_t buffer_elements = job->subBufferSize; @@ -547,3 +384,168 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) return CL_SUCCESS; } + +} // anonymous namespace + +int TestFunc_Double_Double(const Func *f, MTdata d, bool relaxedMode) +{ + TestInfo test_info; + cl_int error; + float maxError = 0.0f; + double maxErrorVal = 0.0; + + logFunctionInfo(f->name, sizeof(cl_double), relaxedMode); + // Init test_info + memset(&test_info, 0, sizeof(test_info)); + test_info.threadCount = GetThreadCount(); + test_info.subBufferSize = BUFFER_SIZE + / (sizeof(cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount)); + test_info.scale = getTestScale(sizeof(cl_double)); + + test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale; + if (test_info.step / test_info.subBufferSize != test_info.scale) + { + // there was overflow + test_info.jobCount = 1; + } + else + { + test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step); + } + + test_info.f = f; + test_info.ulps = f->double_ulps; + test_info.ftz = f->ftz || gForceFTZ; + test_info.relaxedMode = relaxedMode; + + // cl_kernels aren't thread safe, so we make one for each vector size for + // every thread + for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) + { + size_t array_size = test_info.threadCount * sizeof(cl_kernel); + test_info.k[i] = (cl_kernel *)malloc(array_size); + if (NULL == test_info.k[i]) + { + vlog_error("Error: Unable to allocate storage for kernels!\n"); + error = CL_OUT_OF_HOST_MEMORY; + goto exit; + } + memset(test_info.k[i], 0, array_size); + } + test_info.tinfo = + (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo)); + if (NULL == test_info.tinfo) + { + vlog_error( + "Error: Unable to allocate storage for thread specific data.\n"); + error = CL_OUT_OF_HOST_MEMORY; + goto exit; + } + memset(test_info.tinfo, 0, + test_info.threadCount * sizeof(*test_info.tinfo)); + for (cl_uint i = 0; i < test_info.threadCount; i++) + { + cl_buffer_region region = { + i * test_info.subBufferSize * sizeof(cl_double), + test_info.subBufferSize * sizeof(cl_double) + }; + test_info.tinfo[i].inBuf = + clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY, + CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error); + if (error || NULL == test_info.tinfo[i].inBuf) + { + vlog_error("Error: Unable to create sub-buffer of gInBuffer for " + "region {%zd, %zd}\n", + region.origin, region.size); + goto exit; + } + + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + { + test_info.tinfo[i].outBuf[j] = clCreateSubBuffer( + gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION, + ®ion, &error); + if (error || NULL == test_info.tinfo[i].outBuf[j]) + { + vlog_error("Error: Unable to create sub-buffer of " + "gOutBuffer[%d] for region {%zd, %zd}\n", + (int)j, region.origin, region.size); + goto exit; + } + } + test_info.tinfo[i].tQueue = + clCreateCommandQueue(gContext, gDevice, 0, &error); + if (NULL == test_info.tinfo[i].tQueue || error) + { + vlog_error("clCreateCommandQueue failed. (%d)\n", error); + goto exit; + } + } + + // Init the kernels + { + BuildKernelInfo build_info = { + gMinVectorSizeIndex, test_info.threadCount, test_info.k, + test_info.programs, f->nameInCode, relaxedMode + }; + if ((error = ThreadPool_Do(BuildKernelFn, + gMaxVectorSizeIndex - gMinVectorSizeIndex, + &build_info))) + goto exit; + } + + // Run the kernels + if (!gSkipCorrectnessTesting) + { + error = ThreadPool_Do(Test, test_info.jobCount, &test_info); + + // Accumulate the arithmetic errors + for (cl_uint i = 0; i < test_info.threadCount; i++) + { + if (test_info.tinfo[i].maxError > maxError) + { + maxError = test_info.tinfo[i].maxError; + maxErrorVal = test_info.tinfo[i].maxErrorValue; + } + } + + if (error) goto exit; + + if (gWimpyMode) + vlog("Wimp pass"); + else + vlog("passed"); + + vlog("\t%8.2f @ %a", maxError, maxErrorVal); + } + + vlog("\n"); + +exit: + // Release + for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) + { + clReleaseProgram(test_info.programs[i]); + if (test_info.k[i]) + { + for (cl_uint j = 0; j < test_info.threadCount; j++) + clReleaseKernel(test_info.k[i][j]); + + free(test_info.k[i]); + } + } + if (test_info.tinfo) + { + for (cl_uint i = 0; i < test_info.threadCount; i++) + { + clReleaseMemObject(test_info.tinfo[i].inBuf); + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + clReleaseMemObject(test_info.tinfo[i].outBuf[j]); + clReleaseCommandQueue(test_info.tinfo[i].tQueue); + } + + free(test_info.tinfo); + } + + return error; +} diff --git a/test_conformance/math_brute_force/unary_float.cpp b/test_conformance/math_brute_force/unary_float.cpp index 17edc58d..f176fb95 100644 --- a/test_conformance/math_brute_force/unary_float.cpp +++ b/test_conformance/math_brute_force/unary_float.cpp @@ -20,8 +20,10 @@ #include -static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count, - cl_kernel *k, cl_program *p, bool relaxedMode) +namespace { + +int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count, + cl_kernel *k, cl_program *p, bool relaxedMode) { const char *c[] = { "__kernel void math_kernel", sizeNames[vectorSize], @@ -99,7 +101,7 @@ static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count, relaxedMode); } -typedef struct BuildKernelInfo +struct BuildKernelInfo { cl_uint offset; // the first vector size to build cl_uint kernel_count; @@ -107,9 +109,9 @@ typedef struct BuildKernelInfo cl_program *programs; const char *nameInCode; bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. -} BuildKernelInfo; +}; -static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) +cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) { BuildKernelInfo *info = (BuildKernelInfo *)p; cl_uint i = info->offset + job_id; @@ -118,16 +120,16 @@ static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) } // Thread specific data for a worker thread -typedef struct ThreadInfo +struct ThreadInfo { cl_mem inBuf; // input buffer for the thread cl_mem outBuf[VECTOR_SIZE_COUNT]; // output buffers for the thread float maxError; // max error value. Init to 0. double maxErrorValue; // position of the max error value. Init to 0. cl_command_queue tQueue; // per thread command queue to improve performance -} ThreadInfo; +}; -typedef struct TestInfo +struct TestInfo { size_t subBufferSize; // Size of the sub-buffer in elements const Func *f; // A pointer to the function info @@ -149,200 +151,9 @@ typedef struct TestInfo float half_sin_cos_tan_limit; bool relaxedMode; // True if test is running in relaxed mode, false // otherwise. -} TestInfo; - -static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data); - -int TestFunc_Float_Float(const Func *f, MTdata d, bool relaxedMode) -{ - TestInfo test_info; - cl_int error; - float maxError = 0.0f; - double maxErrorVal = 0.0; - int skipTestingRelaxed = (relaxedMode && strcmp(f->name, "tan") == 0); - - logFunctionInfo(f->name, sizeof(cl_float), relaxedMode); - - // Init test_info - memset(&test_info, 0, sizeof(test_info)); - test_info.threadCount = GetThreadCount(); - test_info.subBufferSize = BUFFER_SIZE - / (sizeof(cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount)); - test_info.scale = getTestScale(sizeof(cl_float)); - - test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale; - if (test_info.step / test_info.subBufferSize != test_info.scale) - { - // there was overflow - test_info.jobCount = 1; - } - else - { - test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step); - } - - test_info.f = f; - test_info.ulps = gIsEmbedded ? f->float_embedded_ulps : f->float_ulps; - test_info.ftz = - f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities); - test_info.relaxedMode = relaxedMode; - // cl_kernels aren't thread safe, so we make one for each vector size for - // every thread - for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) - { - size_t array_size = test_info.threadCount * sizeof(cl_kernel); - test_info.k[i] = (cl_kernel *)malloc(array_size); - if (NULL == test_info.k[i]) - { - vlog_error("Error: Unable to allocate storage for kernels!\n"); - error = CL_OUT_OF_HOST_MEMORY; - goto exit; - } - memset(test_info.k[i], 0, array_size); - } - test_info.tinfo = - (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo)); - if (NULL == test_info.tinfo) - { - vlog_error( - "Error: Unable to allocate storage for thread specific data.\n"); - error = CL_OUT_OF_HOST_MEMORY; - goto exit; - } - memset(test_info.tinfo, 0, - test_info.threadCount * sizeof(*test_info.tinfo)); - for (cl_uint i = 0; i < test_info.threadCount; i++) - { - cl_buffer_region region = { - i * test_info.subBufferSize * sizeof(cl_float), - test_info.subBufferSize * sizeof(cl_float) - }; - test_info.tinfo[i].inBuf = - clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY, - CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error); - if (error || NULL == test_info.tinfo[i].inBuf) - { - vlog_error("Error: Unable to create sub-buffer of gInBuffer for " - "region {%zd, %zd}\n", - region.origin, region.size); - goto exit; - } - - for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) - { - test_info.tinfo[i].outBuf[j] = clCreateSubBuffer( - gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION, - ®ion, &error); - if (error || NULL == test_info.tinfo[i].outBuf[j]) - { - vlog_error("Error: Unable to create sub-buffer of " - "gOutBuffer[%d] for region {%zd, %zd}\n", - (int)j, region.origin, region.size); - goto exit; - } - } - test_info.tinfo[i].tQueue = - clCreateCommandQueue(gContext, gDevice, 0, &error); - if (NULL == test_info.tinfo[i].tQueue || error) - { - vlog_error("clCreateCommandQueue failed. (%d)\n", error); - goto exit; - } - } - - // Check for special cases for unary float - test_info.isRangeLimited = 0; - test_info.half_sin_cos_tan_limit = 0; - if (0 == strcmp(f->name, "half_sin") || 0 == strcmp(f->name, "half_cos")) - { - test_info.isRangeLimited = 1; - test_info.half_sin_cos_tan_limit = 1.0f - + test_info.ulps - * (FLT_EPSILON / 2.0f); // out of range results from finite - // inputs must be in [-1,1] - } - else if (0 == strcmp(f->name, "half_tan")) - { - test_info.isRangeLimited = 1; - test_info.half_sin_cos_tan_limit = - INFINITY; // out of range resut from finite inputs must be numeric - } - - // Init the kernels - { - BuildKernelInfo build_info = { - gMinVectorSizeIndex, test_info.threadCount, test_info.k, - test_info.programs, f->nameInCode, relaxedMode - }; - if ((error = ThreadPool_Do(BuildKernelFn, - gMaxVectorSizeIndex - gMinVectorSizeIndex, - &build_info))) - goto exit; - } - - // Run the kernels - if (!gSkipCorrectnessTesting || skipTestingRelaxed) - { - error = ThreadPool_Do(Test, test_info.jobCount, &test_info); - - // Accumulate the arithmetic errors - for (cl_uint i = 0; i < test_info.threadCount; i++) - { - if (test_info.tinfo[i].maxError > maxError) - { - maxError = test_info.tinfo[i].maxError; - maxErrorVal = test_info.tinfo[i].maxErrorValue; - } - } - - if (error) goto exit; - - if (gWimpyMode) - vlog("Wimp pass"); - else - vlog("passed"); - - if (skipTestingRelaxed) - { - vlog(" (rlx skip correctness testing)\n"); - goto exit; - } - - vlog("\t%8.2f @ %a", maxError, maxErrorVal); - } - - vlog("\n"); - -exit: - // Release - for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) - { - clReleaseProgram(test_info.programs[i]); - if (test_info.k[i]) - { - for (cl_uint j = 0; j < test_info.threadCount; j++) - clReleaseKernel(test_info.k[i][j]); - - free(test_info.k[i]); - } - } - if (test_info.tinfo) - { - for (cl_uint i = 0; i < test_info.threadCount; i++) - { - clReleaseMemObject(test_info.tinfo[i].inBuf); - for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) - clReleaseMemObject(test_info.tinfo[i].outBuf[j]); - clReleaseCommandQueue(test_info.tinfo[i].tQueue); - } - - free(test_info.tinfo); - } - - return error; -} +}; -static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) +cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) { const TestInfo *job = (const TestInfo *)data; size_t buffer_elements = job->subBufferSize; @@ -725,3 +536,194 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) return CL_SUCCESS; } + +} // anonymous namespace + +int TestFunc_Float_Float(const Func *f, MTdata d, bool relaxedMode) +{ + TestInfo test_info; + cl_int error; + float maxError = 0.0f; + double maxErrorVal = 0.0; + int skipTestingRelaxed = (relaxedMode && strcmp(f->name, "tan") == 0); + + logFunctionInfo(f->name, sizeof(cl_float), relaxedMode); + + // Init test_info + memset(&test_info, 0, sizeof(test_info)); + test_info.threadCount = GetThreadCount(); + test_info.subBufferSize = BUFFER_SIZE + / (sizeof(cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount)); + test_info.scale = getTestScale(sizeof(cl_float)); + + test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale; + if (test_info.step / test_info.subBufferSize != test_info.scale) + { + // there was overflow + test_info.jobCount = 1; + } + else + { + test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step); + } + + test_info.f = f; + test_info.ulps = gIsEmbedded ? f->float_embedded_ulps : f->float_ulps; + test_info.ftz = + f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities); + test_info.relaxedMode = relaxedMode; + // cl_kernels aren't thread safe, so we make one for each vector size for + // every thread + for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) + { + size_t array_size = test_info.threadCount * sizeof(cl_kernel); + test_info.k[i] = (cl_kernel *)malloc(array_size); + if (NULL == test_info.k[i]) + { + vlog_error("Error: Unable to allocate storage for kernels!\n"); + error = CL_OUT_OF_HOST_MEMORY; + goto exit; + } + memset(test_info.k[i], 0, array_size); + } + test_info.tinfo = + (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo)); + if (NULL == test_info.tinfo) + { + vlog_error( + "Error: Unable to allocate storage for thread specific data.\n"); + error = CL_OUT_OF_HOST_MEMORY; + goto exit; + } + memset(test_info.tinfo, 0, + test_info.threadCount * sizeof(*test_info.tinfo)); + for (cl_uint i = 0; i < test_info.threadCount; i++) + { + cl_buffer_region region = { + i * test_info.subBufferSize * sizeof(cl_float), + test_info.subBufferSize * sizeof(cl_float) + }; + test_info.tinfo[i].inBuf = + clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY, + CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error); + if (error || NULL == test_info.tinfo[i].inBuf) + { + vlog_error("Error: Unable to create sub-buffer of gInBuffer for " + "region {%zd, %zd}\n", + region.origin, region.size); + goto exit; + } + + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + { + test_info.tinfo[i].outBuf[j] = clCreateSubBuffer( + gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION, + ®ion, &error); + if (error || NULL == test_info.tinfo[i].outBuf[j]) + { + vlog_error("Error: Unable to create sub-buffer of " + "gOutBuffer[%d] for region {%zd, %zd}\n", + (int)j, region.origin, region.size); + goto exit; + } + } + test_info.tinfo[i].tQueue = + clCreateCommandQueue(gContext, gDevice, 0, &error); + if (NULL == test_info.tinfo[i].tQueue || error) + { + vlog_error("clCreateCommandQueue failed. (%d)\n", error); + goto exit; + } + } + + // Check for special cases for unary float + test_info.isRangeLimited = 0; + test_info.half_sin_cos_tan_limit = 0; + if (0 == strcmp(f->name, "half_sin") || 0 == strcmp(f->name, "half_cos")) + { + test_info.isRangeLimited = 1; + test_info.half_sin_cos_tan_limit = 1.0f + + test_info.ulps + * (FLT_EPSILON / 2.0f); // out of range results from finite + // inputs must be in [-1,1] + } + else if (0 == strcmp(f->name, "half_tan")) + { + test_info.isRangeLimited = 1; + test_info.half_sin_cos_tan_limit = + INFINITY; // out of range resut from finite inputs must be numeric + } + + // Init the kernels + { + BuildKernelInfo build_info = { + gMinVectorSizeIndex, test_info.threadCount, test_info.k, + test_info.programs, f->nameInCode, relaxedMode + }; + if ((error = ThreadPool_Do(BuildKernelFn, + gMaxVectorSizeIndex - gMinVectorSizeIndex, + &build_info))) + goto exit; + } + + // Run the kernels + if (!gSkipCorrectnessTesting || skipTestingRelaxed) + { + error = ThreadPool_Do(Test, test_info.jobCount, &test_info); + + // Accumulate the arithmetic errors + for (cl_uint i = 0; i < test_info.threadCount; i++) + { + if (test_info.tinfo[i].maxError > maxError) + { + maxError = test_info.tinfo[i].maxError; + maxErrorVal = test_info.tinfo[i].maxErrorValue; + } + } + + if (error) goto exit; + + if (gWimpyMode) + vlog("Wimp pass"); + else + vlog("passed"); + + if (skipTestingRelaxed) + { + vlog(" (rlx skip correctness testing)\n"); + goto exit; + } + + vlog("\t%8.2f @ %a", maxError, maxErrorVal); + } + + vlog("\n"); + +exit: + // Release + for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) + { + clReleaseProgram(test_info.programs[i]); + if (test_info.k[i]) + { + for (cl_uint j = 0; j < test_info.threadCount; j++) + clReleaseKernel(test_info.k[i][j]); + + free(test_info.k[i]); + } + } + if (test_info.tinfo) + { + for (cl_uint i = 0; i < test_info.threadCount; i++) + { + clReleaseMemObject(test_info.tinfo[i].inBuf); + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + clReleaseMemObject(test_info.tinfo[i].outBuf[j]); + clReleaseCommandQueue(test_info.tinfo[i].tQueue); + } + + free(test_info.tinfo); + } + + return error; +} diff --git a/test_conformance/math_brute_force/unary_two_results_double.cpp b/test_conformance/math_brute_force/unary_two_results_double.cpp index 71dd4f44..8757fbc4 100644 --- a/test_conformance/math_brute_force/unary_two_results_double.cpp +++ b/test_conformance/math_brute_force/unary_two_results_double.cpp @@ -20,8 +20,10 @@ #include -static int BuildKernel(const char *name, int vectorSize, cl_kernel *k, - cl_program *p, bool relaxedMode) +namespace { + +int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p, + bool relaxedMode) { const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n", "__kernel void math_kernel", @@ -107,16 +109,16 @@ static int BuildKernel(const char *name, int vectorSize, cl_kernel *k, return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode); } -typedef struct BuildKernelInfo +struct BuildKernelInfo { cl_uint offset; // the first vector size to build cl_kernel *kernels; cl_program *programs; const char *nameInCode; bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. -} BuildKernelInfo; +}; -static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) +cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) { BuildKernelInfo *info = (BuildKernelInfo *)p; cl_uint i = info->offset + job_id; @@ -124,6 +126,8 @@ static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) info->programs + i, info->relaxedMode); } +} // anonymous namespace + int TestFunc_Double2_Double(const Func *f, MTdata d, bool relaxedMode) { int error; diff --git a/test_conformance/math_brute_force/unary_two_results_float.cpp b/test_conformance/math_brute_force/unary_two_results_float.cpp index 4a375ce3..a54bd024 100644 --- a/test_conformance/math_brute_force/unary_two_results_float.cpp +++ b/test_conformance/math_brute_force/unary_two_results_float.cpp @@ -20,8 +20,10 @@ #include -static int BuildKernel(const char *name, int vectorSize, cl_kernel *k, - cl_program *p, bool relaxedMode) +namespace { + +int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p, + bool relaxedMode) { const char *c[] = { "__kernel void math_kernel", sizeNames[vectorSize], @@ -105,16 +107,16 @@ static int BuildKernel(const char *name, int vectorSize, cl_kernel *k, return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode); } -typedef struct BuildKernelInfo +struct BuildKernelInfo { cl_uint offset; // the first vector size to build cl_kernel *kernels; cl_program *programs; const char *nameInCode; bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. -} BuildKernelInfo; +}; -static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) +cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) { BuildKernelInfo *info = (BuildKernelInfo *)p; cl_uint i = info->offset + job_id; @@ -122,6 +124,8 @@ static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) info->programs + i, info->relaxedMode); } +} // anonymous namespace + int TestFunc_Float2_Float(const Func *f, MTdata d, bool relaxedMode) { int error; diff --git a/test_conformance/math_brute_force/unary_two_results_i_double.cpp b/test_conformance/math_brute_force/unary_two_results_i_double.cpp index 14d1fb99..9ed77dce 100644 --- a/test_conformance/math_brute_force/unary_two_results_i_double.cpp +++ b/test_conformance/math_brute_force/unary_two_results_i_double.cpp @@ -21,8 +21,10 @@ #include #include -static int BuildKernel(const char *name, int vectorSize, cl_kernel *k, - cl_program *p, bool relaxedMode) +namespace { + +int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p, + bool relaxedMode) { const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n", "__kernel void math_kernel", @@ -108,16 +110,16 @@ static int BuildKernel(const char *name, int vectorSize, cl_kernel *k, return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode); } -typedef struct BuildKernelInfo +struct BuildKernelInfo { cl_uint offset; // the first vector size to build cl_kernel *kernels; cl_program *programs; const char *nameInCode; bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. -} BuildKernelInfo; +}; -static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) +cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) { BuildKernelInfo *info = (BuildKernelInfo *)p; cl_uint i = info->offset + job_id; @@ -125,12 +127,14 @@ static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) info->programs + i, info->relaxedMode); } -static cl_ulong abs_cl_long(cl_long i) +cl_ulong abs_cl_long(cl_long i) { cl_long mask = i >> 63; return (i ^ mask) - mask; } +} // anonymous namespace + int TestFunc_DoubleI_Double(const Func *f, MTdata d, bool relaxedMode) { int error; diff --git a/test_conformance/math_brute_force/unary_two_results_i_float.cpp b/test_conformance/math_brute_force/unary_two_results_i_float.cpp index 23b0d707..d048220b 100644 --- a/test_conformance/math_brute_force/unary_two_results_i_float.cpp +++ b/test_conformance/math_brute_force/unary_two_results_i_float.cpp @@ -21,8 +21,10 @@ #include #include -static int BuildKernel(const char *name, int vectorSize, cl_kernel *k, - cl_program *p, bool relaxedMode) +namespace { + +int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p, + bool relaxedMode) { const char *c[] = { "__kernel void math_kernel", sizeNames[vectorSize], @@ -106,16 +108,16 @@ static int BuildKernel(const char *name, int vectorSize, cl_kernel *k, return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode); } -typedef struct BuildKernelInfo +struct BuildKernelInfo { cl_uint offset; // the first vector size to build cl_kernel *kernels; cl_program *programs; const char *nameInCode; bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. -} BuildKernelInfo; +}; -static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) +cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) { BuildKernelInfo *info = (BuildKernelInfo *)p; cl_uint i = info->offset + job_id; @@ -123,12 +125,14 @@ static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) info->programs + i, info->relaxedMode); } -static cl_ulong abs_cl_long(cl_long i) +cl_ulong abs_cl_long(cl_long i) { cl_long mask = i >> 63; return (i ^ mask) - mask; } +} // anonymous namespace + int TestFunc_FloatI_Float(const Func *f, MTdata d, bool relaxedMode) { int error; diff --git a/test_conformance/math_brute_force/unary_u_double.cpp b/test_conformance/math_brute_force/unary_u_double.cpp index 3c5f99da..9478d0bc 100644 --- a/test_conformance/math_brute_force/unary_u_double.cpp +++ b/test_conformance/math_brute_force/unary_u_double.cpp @@ -20,8 +20,10 @@ #include -static int BuildKernel(const char *name, int vectorSize, cl_kernel *k, - cl_program *p, bool relaxedMode) +namespace { + +int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p, + bool relaxedMode) { const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n", "__kernel void math_kernel", @@ -102,16 +104,16 @@ static int BuildKernel(const char *name, int vectorSize, cl_kernel *k, return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode); } -typedef struct BuildKernelInfo +struct BuildKernelInfo { cl_uint offset; // the first vector size to build cl_kernel *kernels; cl_program *programs; const char *nameInCode; bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. -} BuildKernelInfo; +}; -static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) +cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) { BuildKernelInfo *info = (BuildKernelInfo *)p; cl_uint i = info->offset + job_id; @@ -119,11 +121,13 @@ static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) info->programs + i, info->relaxedMode); } -static cl_ulong random64(MTdata d) +cl_ulong random64(MTdata d) { return (cl_ulong)genrand_int32(d) | ((cl_ulong)genrand_int32(d) << 32); } +} // anonymous namespace + int TestFunc_Double_ULong(const Func *f, MTdata d, bool relaxedMode) { int error; diff --git a/test_conformance/math_brute_force/unary_u_float.cpp b/test_conformance/math_brute_force/unary_u_float.cpp index 44c5af47..848a9bac 100644 --- a/test_conformance/math_brute_force/unary_u_float.cpp +++ b/test_conformance/math_brute_force/unary_u_float.cpp @@ -20,8 +20,10 @@ #include -static int BuildKernel(const char *name, int vectorSize, cl_kernel *k, - cl_program *p, bool relaxedMode) +namespace { + +int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p, + bool relaxedMode) { const char *c[] = { "__kernel void math_kernel", sizeNames[vectorSize], @@ -99,16 +101,16 @@ static int BuildKernel(const char *name, int vectorSize, cl_kernel *k, return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode); } -typedef struct BuildKernelInfo +struct BuildKernelInfo { cl_uint offset; // the first vector size to build cl_kernel *kernels; cl_program *programs; const char *nameInCode; bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. -} BuildKernelInfo; +}; -static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) +cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) { BuildKernelInfo *info = (BuildKernelInfo *)p; cl_uint i = info->offset + job_id; @@ -116,6 +118,8 @@ static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) info->programs + i, info->relaxedMode); } +} // anonymous namespace + int TestFunc_Float_UInt(const Func *f, MTdata d, bool relaxedMode) { int error; -- cgit v1.2.3 From a08cacc67334788e8135964ca8edce373017ac55 Mon Sep 17 00:00:00 2001 From: ouakheli <53617630+ouakheli@users.noreply.github.com> Date: Mon, 24 May 2021 11:31:37 +0100 Subject: Fix clang-format-9 install (#1261) --- .github/workflows/presubmit.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/presubmit.yml b/.github/workflows/presubmit.yml index 0c1778eb..8ef7e663 100644 --- a/.github/workflows/presubmit.yml +++ b/.github/workflows/presubmit.yml @@ -33,7 +33,7 @@ jobs: runs-on: ubuntu-20.04 steps: - name: Install packages - run: sudo apt install -y clang-format + run: sudo apt install -y clang-format clang-format-9 - uses: actions/checkout@v2 with: fetch-depth: 0 -- cgit v1.2.3 From ed839ebf10c5b7334ac16b0fe13e324f3b47799a Mon Sep 17 00:00:00 2001 From: Marco Antognini Date: Mon, 24 May 2021 16:34:54 +0100 Subject: Avoid manual memory management (#1260) * Avoid manual memory management Prefer std::vector over malloc and free. This will allow removing goto statements by leveraging RAII. Use appropriate type (bool) to store overflow predicates and allocate std::vector of appropriate sizes: before this change the allocation was unnecessary bigger than required. No longer attempt to catch "out of host memory" issues, given that in such situation it is generally not possible to cleanly report an error. Rely on std::bad_alloc exception to report such issues. Introduce a new header for common code in the math_brute_force component. It is currently complementary to utility.h and is expected to hold cleaned up content extracted from future refactoring operations. List all headers as source in CMake for better compatibility with IDEs. Signed-off-by: Marco Antognini * Remove manual or unnecessary memset In order to use non-POD types as fields of TestInfo, memset must be replaced with a compatible zero-initialisation. Remove an unnecessary memset in MakeKernels. Signed-off-by: Marco Antognini --- test_conformance/math_brute_force/CMakeLists.txt | 6 ++ .../math_brute_force/binary_double.cpp | 76 ++++++++------------ test_conformance/math_brute_force/binary_float.cpp | 80 ++++++++-------------- .../math_brute_force/binary_i_double.cpp | 76 ++++++++------------ .../math_brute_force/binary_i_float.cpp | 76 ++++++++------------ .../math_brute_force/binary_operator_double.cpp | 76 ++++++++------------ .../math_brute_force/binary_operator_float.cpp | 80 ++++++++-------------- test_conformance/math_brute_force/common.h | 27 ++++++++ .../math_brute_force/macro_binary_double.cpp | 78 ++++++++------------- .../math_brute_force/macro_binary_float.cpp | 76 ++++++++------------ .../math_brute_force/macro_unary_double.cpp | 72 +++++++------------ .../math_brute_force/macro_unary_float.cpp | 72 +++++++------------ test_conformance/math_brute_force/main.cpp | 8 +-- test_conformance/math_brute_force/unary_double.cpp | 72 +++++++------------ test_conformance/math_brute_force/unary_float.cpp | 72 +++++++------------ 15 files changed, 366 insertions(+), 581 deletions(-) create mode 100644 test_conformance/math_brute_force/common.h diff --git a/test_conformance/math_brute_force/CMakeLists.txt b/test_conformance/math_brute_force/CMakeLists.txt index d8dfc403..28d2716f 100644 --- a/test_conformance/math_brute_force/CMakeLists.txt +++ b/test_conformance/math_brute_force/CMakeLists.txt @@ -9,7 +9,9 @@ set(${MODULE_NAME}_SOURCES binary_operator_float.cpp binary_two_results_i_double.cpp binary_two_results_i_float.cpp + common.h function_list.cpp + function_list.h i_unary_double.cpp i_unary_float.cpp macro_binary_double.cpp @@ -20,9 +22,12 @@ set(${MODULE_NAME}_SOURCES mad_float.cpp main.cpp reference_math.cpp + reference_math.h sleep.cpp + sleep.h ternary_double.cpp ternary_float.cpp + test_functions.h unary_double.cpp unary_float.cpp unary_two_results_double.cpp @@ -32,6 +37,7 @@ set(${MODULE_NAME}_SOURCES unary_u_double.cpp unary_u_float.cpp utility.cpp + utility.h ) include(../CMakeCommon.txt) diff --git a/test_conformance/math_brute_force/binary_double.cpp b/test_conformance/math_brute_force/binary_double.cpp index 9c6b59b4..a2b7d28b 100644 --- a/test_conformance/math_brute_force/binary_double.cpp +++ b/test_conformance/math_brute_force/binary_double.cpp @@ -14,6 +14,7 @@ // limitations under the License. // +#include "common.h" #include "function_list.h" #include "test_functions.h" #include "utility.h" @@ -115,7 +116,7 @@ struct BuildKernelInfo { cl_uint offset; // the first vector size to build cl_uint kernel_count; - cl_kernel **kernels; + KernelMatrix &kernels; cl_program *programs; const char *nameInCode; bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. @@ -126,7 +127,8 @@ cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) BuildKernelInfo *info = (BuildKernelInfo *)p; cl_uint i = info->offset + job_id; return BuildKernel(info->nameInCode, i, info->kernel_count, - info->kernels[i], info->programs + i, info->relaxedMode); + info->kernels[i].data(), info->programs + i, + info->relaxedMode); } // Thread specific data for a worker thread @@ -149,11 +151,14 @@ struct TestInfo size_t subBufferSize; // Size of the sub-buffer in elements const Func *f; // A pointer to the function info cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes - cl_kernel - *k[VECTOR_SIZE_COUNT]; // arrays of thread-specific kernels for each - // worker thread: k[vector_size][thread_id] - ThreadInfo * - tinfo; // An array of thread specific information for each worker thread + + // Thread-specific kernels for each vector size: + // k[vector_size][thread_id] + KernelMatrix k; + + // Array of thread specific information + std::vector tinfo; + cl_uint threadCount; // Number of worker threads cl_uint jobCount; // Number of jobs cl_uint step; // step between each chunk and the next. @@ -284,11 +289,11 @@ constexpr size_t specialValuesCount = cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) { - const TestInfo *job = (const TestInfo *)data; + TestInfo *job = (TestInfo *)data; size_t buffer_elements = job->subBufferSize; size_t buffer_size = buffer_elements * sizeof(cl_double); cl_uint base = job_id * (cl_uint)job->step; - ThreadInfo *tinfo = job->tinfo + thread_id; + ThreadInfo *tinfo = &(job->tinfo[thread_id]); float ulps = job->ulps; dptr func = job->f->dfunc; int ftz = job->ftz; @@ -647,7 +652,7 @@ exit: int TestFunc_Double_Double_Double(const Func *f, MTdata d, bool relaxedMode) { - TestInfo test_info; + TestInfo test_info{}; cl_int error; float maxError = 0.0f; double maxErrorVal = 0.0; @@ -656,7 +661,6 @@ int TestFunc_Double_Double_Double(const Func *f, MTdata d, bool relaxedMode) logFunctionInfo(f->name, sizeof(cl_double), relaxedMode); // Init test_info - memset(&test_info, 0, sizeof(test_info)); test_info.threadCount = GetThreadCount(); test_info.subBufferSize = BUFFER_SIZE / (sizeof(cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount)); @@ -685,27 +689,10 @@ int TestFunc_Double_Double_Double(const Func *f, MTdata d, bool relaxedMode) // every thread for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) { - size_t array_size = test_info.threadCount * sizeof(cl_kernel); - test_info.k[i] = (cl_kernel *)malloc(array_size); - if (NULL == test_info.k[i]) - { - vlog_error("Error: Unable to allocate storage for kernels!\n"); - error = CL_OUT_OF_HOST_MEMORY; - goto exit; - } - memset(test_info.k[i], 0, array_size); - } - test_info.tinfo = - (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo)); - if (NULL == test_info.tinfo) - { - vlog_error( - "Error: Unable to allocate storage for thread specific data.\n"); - error = CL_OUT_OF_HOST_MEMORY; - goto exit; + test_info.k[i].resize(test_info.threadCount, nullptr); } - memset(test_info.tinfo, 0, - test_info.threadCount * sizeof(*test_info.tinfo)); + + test_info.tinfo.resize(test_info.threadCount, ThreadInfo{}); for (cl_uint i = 0; i < test_info.threadCount; i++) { cl_buffer_region region = { @@ -802,27 +789,20 @@ exit: for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) { clReleaseProgram(test_info.programs[i]); - if (test_info.k[i]) + for (auto &kernel : test_info.k[i]) { - for (cl_uint j = 0; j < test_info.threadCount; j++) - clReleaseKernel(test_info.k[i][j]); - - free(test_info.k[i]); + clReleaseKernel(kernel); } } - if (test_info.tinfo) - { - for (cl_uint i = 0; i < test_info.threadCount; i++) - { - free_mtdata(test_info.tinfo[i].d); - clReleaseMemObject(test_info.tinfo[i].inBuf); - clReleaseMemObject(test_info.tinfo[i].inBuf2); - for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) - clReleaseMemObject(test_info.tinfo[i].outBuf[j]); - clReleaseCommandQueue(test_info.tinfo[i].tQueue); - } - free(test_info.tinfo); + for (auto &threadInfo : test_info.tinfo) + { + free_mtdata(threadInfo.d); + clReleaseMemObject(threadInfo.inBuf); + clReleaseMemObject(threadInfo.inBuf2); + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + clReleaseMemObject(threadInfo.outBuf[j]); + clReleaseCommandQueue(threadInfo.tQueue); } return error; diff --git a/test_conformance/math_brute_force/binary_float.cpp b/test_conformance/math_brute_force/binary_float.cpp index 9c7081dc..97712ee8 100644 --- a/test_conformance/math_brute_force/binary_float.cpp +++ b/test_conformance/math_brute_force/binary_float.cpp @@ -14,6 +14,7 @@ // limitations under the License. // +#include "common.h" #include "function_list.h" #include "test_functions.h" #include "utility.h" @@ -113,7 +114,7 @@ struct BuildKernelInfo { cl_uint offset; // the first vector size to build cl_uint kernel_count; - cl_kernel **kernels; + KernelMatrix &kernels; cl_program *programs; const char *nameInCode; bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. @@ -124,7 +125,8 @@ cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) BuildKernelInfo *info = (BuildKernelInfo *)p; cl_uint i = info->offset + job_id; return BuildKernel(info->nameInCode, i, info->kernel_count, - info->kernels[i], info->programs + i, info->relaxedMode); + info->kernels[i].data(), info->programs + i, + info->relaxedMode); } // Thread specific data for a worker thread @@ -147,11 +149,14 @@ struct TestInfo size_t subBufferSize; // Size of the sub-buffer in elements const Func *f; // A pointer to the function info cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes - cl_kernel - *k[VECTOR_SIZE_COUNT]; // arrays of thread-specific kernels for each - // worker thread: k[vector_size][thread_id] - ThreadInfo * - tinfo; // An array of thread specific information for each worker thread + + // Thread-specific kernels for each vector size: + // k[vector_size][thread_id] + KernelMatrix k; + + // Array of thread specific information + std::vector tinfo; + cl_uint threadCount; // Number of worker threads cl_uint jobCount; // Number of jobs cl_uint step; // step between each chunk and the next. @@ -274,18 +279,18 @@ constexpr size_t specialValuesCount = cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) { - const TestInfo *job = (const TestInfo *)data; + TestInfo *job = (TestInfo *)data; size_t buffer_elements = job->subBufferSize; size_t buffer_size = buffer_elements * sizeof(cl_float); cl_uint base = job_id * (cl_uint)job->step; - ThreadInfo *tinfo = job->tinfo + thread_id; + ThreadInfo *tinfo = &(job->tinfo[thread_id]); fptr func = job->f->func; int ftz = job->ftz; bool relaxedMode = job->relaxedMode; float ulps = getAllowedUlpError(job->f, relaxedMode); MTdata d = tinfo->d; cl_int error; - cl_uchar *overflow = (cl_uchar *)malloc(buffer_size); + std::vector overflow(buffer_elements, false); const char *name = job->f->name; int isFDim = job->isFDim; int skipNanInf = job->skipNanInf; @@ -447,7 +452,6 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) vlog_error("Error: clFinish failed! err: %d\n", error); goto exit; } - free(overflow); return CL_SUCCESS; } @@ -799,7 +803,6 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) } exit: - if (overflow) free(overflow); return error; } @@ -807,7 +810,7 @@ exit: int TestFunc_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode) { - TestInfo test_info; + TestInfo test_info{}; cl_int error; float maxError = 0.0f; double maxErrorVal = 0.0; @@ -816,7 +819,6 @@ int TestFunc_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode) logFunctionInfo(f->name, sizeof(cl_float), relaxedMode); // Init test_info - memset(&test_info, 0, sizeof(test_info)); test_info.threadCount = GetThreadCount(); test_info.subBufferSize = BUFFER_SIZE / (sizeof(cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount)); @@ -846,27 +848,10 @@ int TestFunc_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode) // every thread for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) { - size_t array_size = test_info.threadCount * sizeof(cl_kernel); - test_info.k[i] = (cl_kernel *)malloc(array_size); - if (NULL == test_info.k[i]) - { - vlog_error("Error: Unable to allocate storage for kernels!\n"); - error = CL_OUT_OF_HOST_MEMORY; - goto exit; - } - memset(test_info.k[i], 0, array_size); - } - test_info.tinfo = - (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo)); - if (NULL == test_info.tinfo) - { - vlog_error( - "Error: Unable to allocate storage for thread specific data.\n"); - error = CL_OUT_OF_HOST_MEMORY; - goto exit; + test_info.k[i].resize(test_info.threadCount, nullptr); } - memset(test_info.tinfo, 0, - test_info.threadCount * sizeof(*test_info.tinfo)); + + test_info.tinfo.resize(test_info.threadCount, ThreadInfo{}); for (cl_uint i = 0; i < test_info.threadCount; i++) { cl_buffer_region region = { @@ -963,27 +948,20 @@ exit: for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) { clReleaseProgram(test_info.programs[i]); - if (test_info.k[i]) + for (auto &kernel : test_info.k[i]) { - for (cl_uint j = 0; j < test_info.threadCount; j++) - clReleaseKernel(test_info.k[i][j]); - - free(test_info.k[i]); + clReleaseKernel(kernel); } } - if (test_info.tinfo) - { - for (cl_uint i = 0; i < test_info.threadCount; i++) - { - free_mtdata(test_info.tinfo[i].d); - clReleaseMemObject(test_info.tinfo[i].inBuf); - clReleaseMemObject(test_info.tinfo[i].inBuf2); - for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) - clReleaseMemObject(test_info.tinfo[i].outBuf[j]); - clReleaseCommandQueue(test_info.tinfo[i].tQueue); - } - free(test_info.tinfo); + for (auto &threadInfo : test_info.tinfo) + { + free_mtdata(threadInfo.d); + clReleaseMemObject(threadInfo.inBuf); + clReleaseMemObject(threadInfo.inBuf2); + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + clReleaseMemObject(threadInfo.outBuf[j]); + clReleaseCommandQueue(threadInfo.tQueue); } return error; diff --git a/test_conformance/math_brute_force/binary_i_double.cpp b/test_conformance/math_brute_force/binary_i_double.cpp index 2fcc8c10..f15c21ed 100644 --- a/test_conformance/math_brute_force/binary_i_double.cpp +++ b/test_conformance/math_brute_force/binary_i_double.cpp @@ -14,6 +14,7 @@ // limitations under the License. // +#include "common.h" #include "function_list.h" #include "test_functions.h" #include "utility.h" @@ -114,7 +115,7 @@ struct BuildKernelInfo { cl_uint offset; // the first vector size to build cl_uint kernel_count; - cl_kernel **kernels; + KernelMatrix &kernels; cl_program *programs; const char *nameInCode; bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. @@ -125,7 +126,8 @@ cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) BuildKernelInfo *info = (BuildKernelInfo *)p; cl_uint i = info->offset + job_id; return BuildKernel(info->nameInCode, i, info->kernel_count, - info->kernels[i], info->programs + i, info->relaxedMode); + info->kernels[i].data(), info->programs + i, + info->relaxedMode); } // Thread specific data for a worker thread @@ -148,11 +150,14 @@ struct TestInfo size_t subBufferSize; // Size of the sub-buffer in elements const Func *f; // A pointer to the function info cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes - cl_kernel - *k[VECTOR_SIZE_COUNT]; // arrays of thread-specific kernels for each - // worker thread: k[vector_size][thread_id] - ThreadInfo * - tinfo; // An array of thread specific information for each worker thread + + // Thread-specific kernels for each vector size: + // k[vector_size][thread_id] + KernelMatrix k; + + // Array of thread specific information + std::vector tinfo; + cl_uint threadCount; // Number of worker threads cl_uint jobCount; // Number of jobs cl_uint step; // step between each chunk and the next. @@ -287,11 +292,11 @@ constexpr size_t specialValuesIntCount = cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) { - const TestInfo *job = (const TestInfo *)data; + TestInfo *job = (TestInfo *)data; size_t buffer_elements = job->subBufferSize; size_t buffer_size = buffer_elements * sizeof(cl_double); cl_uint base = job_id * (cl_uint)job->step; - ThreadInfo *tinfo = job->tinfo + thread_id; + ThreadInfo *tinfo = &(job->tinfo[thread_id]); float ulps = job->ulps; dptr func = job->f->dfunc; int ftz = job->ftz; @@ -568,7 +573,7 @@ exit: int TestFunc_Double_Double_Int(const Func *f, MTdata d, bool relaxedMode) { - TestInfo test_info; + TestInfo test_info{}; cl_int error; float maxError = 0.0f; double maxErrorVal = 0.0; @@ -577,7 +582,6 @@ int TestFunc_Double_Double_Int(const Func *f, MTdata d, bool relaxedMode) logFunctionInfo(f->name, sizeof(cl_double), relaxedMode); // Init test_info - memset(&test_info, 0, sizeof(test_info)); test_info.threadCount = GetThreadCount(); test_info.subBufferSize = BUFFER_SIZE / (sizeof(cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount)); @@ -602,27 +606,10 @@ int TestFunc_Double_Double_Int(const Func *f, MTdata d, bool relaxedMode) // every thread for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) { - size_t array_size = test_info.threadCount * sizeof(cl_kernel); - test_info.k[i] = (cl_kernel *)malloc(array_size); - if (NULL == test_info.k[i]) - { - vlog_error("Error: Unable to allocate storage for kernels!\n"); - error = CL_OUT_OF_HOST_MEMORY; - goto exit; - } - memset(test_info.k[i], 0, array_size); - } - test_info.tinfo = - (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo)); - if (NULL == test_info.tinfo) - { - vlog_error( - "Error: Unable to allocate storage for thread specific data.\n"); - error = CL_OUT_OF_HOST_MEMORY; - goto exit; + test_info.k[i].resize(test_info.threadCount, nullptr); } - memset(test_info.tinfo, 0, - test_info.threadCount * sizeof(*test_info.tinfo)); + + test_info.tinfo.resize(test_info.threadCount, ThreadInfo{}); for (cl_uint i = 0; i < test_info.threadCount; i++) { cl_buffer_region region = { @@ -722,27 +709,20 @@ exit: for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) { clReleaseProgram(test_info.programs[i]); - if (test_info.k[i]) + for (auto &kernel : test_info.k[i]) { - for (cl_uint j = 0; j < test_info.threadCount; j++) - clReleaseKernel(test_info.k[i][j]); - - free(test_info.k[i]); + clReleaseKernel(kernel); } } - if (test_info.tinfo) - { - for (cl_uint i = 0; i < test_info.threadCount; i++) - { - free_mtdata(test_info.tinfo[i].d); - clReleaseMemObject(test_info.tinfo[i].inBuf); - clReleaseMemObject(test_info.tinfo[i].inBuf2); - for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) - clReleaseMemObject(test_info.tinfo[i].outBuf[j]); - clReleaseCommandQueue(test_info.tinfo[i].tQueue); - } - free(test_info.tinfo); + for (auto &threadInfo : test_info.tinfo) + { + free_mtdata(threadInfo.d); + clReleaseMemObject(threadInfo.inBuf); + clReleaseMemObject(threadInfo.inBuf2); + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + clReleaseMemObject(threadInfo.outBuf[j]); + clReleaseCommandQueue(threadInfo.tQueue); } return error; diff --git a/test_conformance/math_brute_force/binary_i_float.cpp b/test_conformance/math_brute_force/binary_i_float.cpp index e1538e3c..9e27b007 100644 --- a/test_conformance/math_brute_force/binary_i_float.cpp +++ b/test_conformance/math_brute_force/binary_i_float.cpp @@ -14,6 +14,7 @@ // limitations under the License. // +#include "common.h" #include "function_list.h" #include "test_functions.h" #include "utility.h" @@ -112,7 +113,7 @@ struct BuildKernelInfo { cl_uint offset; // the first vector size to build cl_uint kernel_count; - cl_kernel **kernels; + KernelMatrix &kernels; cl_program *programs; const char *nameInCode; bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. @@ -123,7 +124,8 @@ cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) BuildKernelInfo *info = (BuildKernelInfo *)p; cl_uint i = info->offset + job_id; return BuildKernel(info->nameInCode, i, info->kernel_count, - info->kernels[i], info->programs + i, info->relaxedMode); + info->kernels[i].data(), info->programs + i, + info->relaxedMode); } // Thread specific data for a worker thread @@ -146,11 +148,14 @@ struct TestInfo size_t subBufferSize; // Size of the sub-buffer in elements const Func *f; // A pointer to the function info cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes - cl_kernel - *k[VECTOR_SIZE_COUNT]; // arrays of thread-specific kernels for each - // worker thread: k[vector_size][thread_id] - ThreadInfo * - tinfo; // An array of thread specific information for each worker thread + + // Thread-specific kernels for each vector size: + // k[vector_size][thread_id] + KernelMatrix k; + + // Array of thread specific information + std::vector tinfo; + cl_uint threadCount; // Number of worker threads cl_uint jobCount; // Number of jobs cl_uint step; // step between each chunk and the next. @@ -279,11 +284,11 @@ constexpr size_t specialValuesIntCount = cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) { - const TestInfo *job = (const TestInfo *)data; + TestInfo *job = (TestInfo *)data; size_t buffer_elements = job->subBufferSize; size_t buffer_size = buffer_elements * sizeof(cl_float); cl_uint base = job_id * (cl_uint)job->step; - ThreadInfo *tinfo = job->tinfo + thread_id; + ThreadInfo *tinfo = &(job->tinfo[thread_id]); fptr func = job->f->func; int ftz = job->ftz; float ulps = job->ulps; @@ -561,7 +566,7 @@ exit: int TestFunc_Float_Float_Int(const Func *f, MTdata d, bool relaxedMode) { - TestInfo test_info; + TestInfo test_info{}; cl_int error; float maxError = 0.0f; double maxErrorVal = 0.0; @@ -570,7 +575,6 @@ int TestFunc_Float_Float_Int(const Func *f, MTdata d, bool relaxedMode) logFunctionInfo(f->name, sizeof(cl_float), relaxedMode); // Init test_info - memset(&test_info, 0, sizeof(test_info)); test_info.threadCount = GetThreadCount(); test_info.subBufferSize = BUFFER_SIZE / (sizeof(cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount)); @@ -596,27 +600,10 @@ int TestFunc_Float_Float_Int(const Func *f, MTdata d, bool relaxedMode) // every thread for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) { - size_t array_size = test_info.threadCount * sizeof(cl_kernel); - test_info.k[i] = (cl_kernel *)malloc(array_size); - if (NULL == test_info.k[i]) - { - vlog_error("Error: Unable to allocate storage for kernels!\n"); - error = CL_OUT_OF_HOST_MEMORY; - goto exit; - } - memset(test_info.k[i], 0, array_size); - } - test_info.tinfo = - (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo)); - if (NULL == test_info.tinfo) - { - vlog_error( - "Error: Unable to allocate storage for thread specific data.\n"); - error = CL_OUT_OF_HOST_MEMORY; - goto exit; + test_info.k[i].resize(test_info.threadCount, nullptr); } - memset(test_info.tinfo, 0, - test_info.threadCount * sizeof(*test_info.tinfo)); + + test_info.tinfo.resize(test_info.threadCount, ThreadInfo{}); for (cl_uint i = 0; i < test_info.threadCount; i++) { cl_buffer_region region = { @@ -716,27 +703,20 @@ exit: for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) { clReleaseProgram(test_info.programs[i]); - if (test_info.k[i]) + for (auto &kernel : test_info.k[i]) { - for (cl_uint j = 0; j < test_info.threadCount; j++) - clReleaseKernel(test_info.k[i][j]); - - free(test_info.k[i]); + clReleaseKernel(kernel); } } - if (test_info.tinfo) - { - for (cl_uint i = 0; i < test_info.threadCount; i++) - { - free_mtdata(test_info.tinfo[i].d); - clReleaseMemObject(test_info.tinfo[i].inBuf); - clReleaseMemObject(test_info.tinfo[i].inBuf2); - for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) - clReleaseMemObject(test_info.tinfo[i].outBuf[j]); - clReleaseCommandQueue(test_info.tinfo[i].tQueue); - } - free(test_info.tinfo); + for (auto &threadInfo : test_info.tinfo) + { + free_mtdata(threadInfo.d); + clReleaseMemObject(threadInfo.inBuf); + clReleaseMemObject(threadInfo.inBuf2); + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + clReleaseMemObject(threadInfo.outBuf[j]); + clReleaseCommandQueue(threadInfo.tQueue); } return error; diff --git a/test_conformance/math_brute_force/binary_operator_double.cpp b/test_conformance/math_brute_force/binary_operator_double.cpp index 605a3144..c407fdaa 100644 --- a/test_conformance/math_brute_force/binary_operator_double.cpp +++ b/test_conformance/math_brute_force/binary_operator_double.cpp @@ -14,6 +14,7 @@ // limitations under the License. // +#include "common.h" #include "function_list.h" #include "test_functions.h" #include "utility.h" @@ -114,7 +115,7 @@ struct BuildKernelInfo { cl_uint offset; // the first vector size to build cl_uint kernel_count; - cl_kernel **kernels; + KernelMatrix &kernels; cl_program *programs; const char *operator_symbol; bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. @@ -125,7 +126,8 @@ cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) BuildKernelInfo *info = (BuildKernelInfo *)p; cl_uint i = info->offset + job_id; return BuildKernel(info->operator_symbol, i, info->kernel_count, - info->kernels[i], info->programs + i, info->relaxedMode); + info->kernels[i].data(), info->programs + i, + info->relaxedMode); } // Thread specific data for a worker thread @@ -148,11 +150,14 @@ struct TestInfo size_t subBufferSize; // Size of the sub-buffer in elements const Func *f; // A pointer to the function info cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes - cl_kernel - *k[VECTOR_SIZE_COUNT]; // arrays of thread-specific kernels for each - // worker thread: k[vector_size][thread_id] - ThreadInfo * - tinfo; // An array of thread specific information for each worker thread + + // Thread-specific kernels for each vector size: + // k[vector_size][thread_id] + KernelMatrix k; + + // Array of thread specific information + std::vector tinfo; + cl_uint threadCount; // Number of worker threads cl_uint jobCount; // Number of jobs cl_uint step; // step between each chunk and the next. @@ -281,11 +286,11 @@ constexpr size_t specialValuesCount = cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) { - const TestInfo *job = (const TestInfo *)data; + TestInfo *job = (TestInfo *)data; size_t buffer_elements = job->subBufferSize; size_t buffer_size = buffer_elements * sizeof(cl_double); cl_uint base = job_id * (cl_uint)job->step; - ThreadInfo *tinfo = job->tinfo + thread_id; + ThreadInfo *tinfo = &(job->tinfo[thread_id]); float ulps = job->ulps; dptr func = job->f->dfunc; int ftz = job->ftz; @@ -619,7 +624,7 @@ exit: int TestFunc_Double_Double_Double_Operator(const Func *f, MTdata d, bool relaxedMode) { - TestInfo test_info; + TestInfo test_info{}; cl_int error; float maxError = 0.0f; double maxErrorVal = 0.0; @@ -628,7 +633,6 @@ int TestFunc_Double_Double_Double_Operator(const Func *f, MTdata d, logFunctionInfo(f->name, sizeof(cl_double), relaxedMode); // Init test_info - memset(&test_info, 0, sizeof(test_info)); test_info.threadCount = GetThreadCount(); test_info.subBufferSize = BUFFER_SIZE / (sizeof(cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount)); @@ -653,27 +657,10 @@ int TestFunc_Double_Double_Double_Operator(const Func *f, MTdata d, // every thread for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) { - size_t array_size = test_info.threadCount * sizeof(cl_kernel); - test_info.k[i] = (cl_kernel *)malloc(array_size); - if (NULL == test_info.k[i]) - { - vlog_error("Error: Unable to allocate storage for kernels!\n"); - error = CL_OUT_OF_HOST_MEMORY; - goto exit; - } - memset(test_info.k[i], 0, array_size); - } - test_info.tinfo = - (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo)); - if (NULL == test_info.tinfo) - { - vlog_error( - "Error: Unable to allocate storage for thread specific data.\n"); - error = CL_OUT_OF_HOST_MEMORY; - goto exit; + test_info.k[i].resize(test_info.threadCount, nullptr); } - memset(test_info.tinfo, 0, - test_info.threadCount * sizeof(*test_info.tinfo)); + + test_info.tinfo.resize(test_info.threadCount, ThreadInfo{}); for (cl_uint i = 0; i < test_info.threadCount; i++) { cl_buffer_region region = { @@ -770,27 +757,20 @@ exit: for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) { clReleaseProgram(test_info.programs[i]); - if (test_info.k[i]) + for (auto &kernel : test_info.k[i]) { - for (cl_uint j = 0; j < test_info.threadCount; j++) - clReleaseKernel(test_info.k[i][j]); - - free(test_info.k[i]); + clReleaseKernel(kernel); } } - if (test_info.tinfo) - { - for (cl_uint i = 0; i < test_info.threadCount; i++) - { - free_mtdata(test_info.tinfo[i].d); - clReleaseMemObject(test_info.tinfo[i].inBuf); - clReleaseMemObject(test_info.tinfo[i].inBuf2); - for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) - clReleaseMemObject(test_info.tinfo[i].outBuf[j]); - clReleaseCommandQueue(test_info.tinfo[i].tQueue); - } - free(test_info.tinfo); + for (auto &threadInfo : test_info.tinfo) + { + free_mtdata(threadInfo.d); + clReleaseMemObject(threadInfo.inBuf); + clReleaseMemObject(threadInfo.inBuf2); + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + clReleaseMemObject(threadInfo.outBuf[j]); + clReleaseCommandQueue(threadInfo.tQueue); } return error; diff --git a/test_conformance/math_brute_force/binary_operator_float.cpp b/test_conformance/math_brute_force/binary_operator_float.cpp index 8448af54..7fbb07c2 100644 --- a/test_conformance/math_brute_force/binary_operator_float.cpp +++ b/test_conformance/math_brute_force/binary_operator_float.cpp @@ -14,6 +14,7 @@ // limitations under the License. // +#include "common.h" #include "function_list.h" #include "test_functions.h" #include "utility.h" @@ -112,7 +113,7 @@ struct BuildKernelInfo { cl_uint offset; // the first vector size to build cl_uint kernel_count; - cl_kernel **kernels; + KernelMatrix &kernels; cl_program *programs; const char *operator_symbol; bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. @@ -123,7 +124,8 @@ cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) BuildKernelInfo *info = (BuildKernelInfo *)p; cl_uint i = info->offset + job_id; return BuildKernel(info->operator_symbol, i, info->kernel_count, - info->kernels[i], info->programs + i, info->relaxedMode); + info->kernels[i].data(), info->programs + i, + info->relaxedMode); } // Thread specific data for a worker thread @@ -146,11 +148,14 @@ struct TestInfo size_t subBufferSize; // Size of the sub-buffer in elements const Func *f; // A pointer to the function info cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes - cl_kernel - *k[VECTOR_SIZE_COUNT]; // arrays of thread-specific kernels for each - // worker thread: k[vector_size][thread_id] - ThreadInfo * - tinfo; // An array of thread specific information for each worker thread + + // Thread-specific kernels for each vector size: + // k[vector_size][thread_id] + KernelMatrix k; + + // Array of thread specific information + std::vector tinfo; + cl_uint threadCount; // Number of worker threads cl_uint jobCount; // Number of jobs cl_uint step; // step between each chunk and the next. @@ -271,18 +276,18 @@ constexpr size_t specialValuesCount = cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) { - const TestInfo *job = (const TestInfo *)data; + TestInfo *job = (TestInfo *)data; size_t buffer_elements = job->subBufferSize; size_t buffer_size = buffer_elements * sizeof(cl_float); cl_uint base = job_id * (cl_uint)job->step; - ThreadInfo *tinfo = job->tinfo + thread_id; + ThreadInfo *tinfo = &(job->tinfo[thread_id]); fptr func = job->f->func; int ftz = job->ftz; bool relaxedMode = job->relaxedMode; float ulps = getAllowedUlpError(job->f, relaxedMode); MTdata d = tinfo->d; cl_int error; - cl_uchar *overflow = (cl_uchar *)malloc(buffer_size); + std::vector overflow(buffer_elements, false); const char *name = job->f->name; cl_uint *t = 0; cl_float *r = 0; @@ -445,7 +450,6 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) if (gSkipCorrectnessTesting) { - free(overflow); return CL_SUCCESS; } @@ -738,7 +742,6 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) } exit: - if (overflow) free(overflow); return error; } @@ -747,7 +750,7 @@ exit: int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d, bool relaxedMode) { - TestInfo test_info; + TestInfo test_info{}; cl_int error; float maxError = 0.0f; double maxErrorVal = 0.0; @@ -756,7 +759,6 @@ int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d, logFunctionInfo(f->name, sizeof(cl_float), relaxedMode); // Init test_info - memset(&test_info, 0, sizeof(test_info)); test_info.threadCount = GetThreadCount(); test_info.subBufferSize = BUFFER_SIZE / (sizeof(cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount)); @@ -783,27 +785,10 @@ int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d, // every thread for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) { - size_t array_size = test_info.threadCount * sizeof(cl_kernel); - test_info.k[i] = (cl_kernel *)malloc(array_size); - if (NULL == test_info.k[i]) - { - vlog_error("Error: Unable to allocate storage for kernels!\n"); - error = CL_OUT_OF_HOST_MEMORY; - goto exit; - } - memset(test_info.k[i], 0, array_size); - } - test_info.tinfo = - (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo)); - if (NULL == test_info.tinfo) - { - vlog_error( - "Error: Unable to allocate storage for thread specific data.\n"); - error = CL_OUT_OF_HOST_MEMORY; - goto exit; + test_info.k[i].resize(test_info.threadCount, nullptr); } - memset(test_info.tinfo, 0, - test_info.threadCount * sizeof(*test_info.tinfo)); + + test_info.tinfo.resize(test_info.threadCount, ThreadInfo{}); for (cl_uint i = 0; i < test_info.threadCount; i++) { cl_buffer_region region = { @@ -900,27 +885,20 @@ exit: for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) { clReleaseProgram(test_info.programs[i]); - if (test_info.k[i]) + for (auto &kernel : test_info.k[i]) { - for (cl_uint j = 0; j < test_info.threadCount; j++) - clReleaseKernel(test_info.k[i][j]); - - free(test_info.k[i]); + clReleaseKernel(kernel); } } - if (test_info.tinfo) - { - for (cl_uint i = 0; i < test_info.threadCount; i++) - { - free_mtdata(test_info.tinfo[i].d); - clReleaseMemObject(test_info.tinfo[i].inBuf); - clReleaseMemObject(test_info.tinfo[i].inBuf2); - for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) - clReleaseMemObject(test_info.tinfo[i].outBuf[j]); - clReleaseCommandQueue(test_info.tinfo[i].tQueue); - } - free(test_info.tinfo); + for (auto &threadInfo : test_info.tinfo) + { + free_mtdata(threadInfo.d); + clReleaseMemObject(threadInfo.inBuf); + clReleaseMemObject(threadInfo.inBuf2); + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + clReleaseMemObject(threadInfo.outBuf[j]); + clReleaseCommandQueue(threadInfo.tQueue); } return error; diff --git a/test_conformance/math_brute_force/common.h b/test_conformance/math_brute_force/common.h new file mode 100644 index 00000000..3eafb6de --- /dev/null +++ b/test_conformance/math_brute_force/common.h @@ -0,0 +1,27 @@ +// +// Copyright (c) 2021 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +#ifndef COMMON_H +#define COMMON_H + +#include "utility.h" + +#include +#include + +// Array of thread-specific kernels for each vector size. +using KernelMatrix = std::array, VECTOR_SIZE_COUNT>; + +#endif /* COMMON_H */ diff --git a/test_conformance/math_brute_force/macro_binary_double.cpp b/test_conformance/math_brute_force/macro_binary_double.cpp index 11281261..6db6aa56 100644 --- a/test_conformance/math_brute_force/macro_binary_double.cpp +++ b/test_conformance/math_brute_force/macro_binary_double.cpp @@ -14,6 +14,7 @@ // limitations under the License. // +#include "common.h" #include "function_list.h" #include "test_functions.h" #include "utility.h" @@ -113,7 +114,7 @@ struct BuildKernelInfo { cl_uint offset; // the first vector size to build cl_uint kernel_count; - cl_kernel **kernels; + KernelMatrix &kernels; cl_program *programs; const char *nameInCode; bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. @@ -124,7 +125,8 @@ cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) BuildKernelInfo *info = (BuildKernelInfo *)p; cl_uint i = info->offset + job_id; return BuildKernel(info->nameInCode, i, info->kernel_count, - info->kernels[i], info->programs + i, info->relaxedMode); + info->kernels[i].data(), info->programs + i, + info->relaxedMode); } // Thread specific data for a worker thread @@ -142,11 +144,14 @@ struct TestInfo size_t subBufferSize; // Size of the sub-buffer in elements const Func *f; // A pointer to the function info cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes - cl_kernel - *k[VECTOR_SIZE_COUNT]; // arrays of thread-specific kernels for each - // worker thread: k[vector_size][thread_id] - ThreadInfo * - tinfo; // An array of thread specific information for each worker thread + + // Thread-specific kernels for each vector size: + // k[vector_size][thread_id] + KernelMatrix k; + + // Array of thread specific information + std::vector tinfo; + cl_uint threadCount; // Number of worker threads cl_uint jobCount; // Number of jobs cl_uint step; // step between each chunk and the next. @@ -270,11 +275,11 @@ constexpr size_t specialValuesCount = cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) { - const TestInfo *job = (const TestInfo *)data; + TestInfo *job = (TestInfo *)data; size_t buffer_elements = job->subBufferSize; size_t buffer_size = buffer_elements * sizeof(cl_double); cl_uint base = job_id * (cl_uint)job->step; - ThreadInfo *tinfo = job->tinfo + thread_id; + ThreadInfo *tinfo = &(job->tinfo[thread_id]); dptr dfunc = job->f->dfunc; int ftz = job->ftz; MTdata d = tinfo->d; @@ -577,13 +582,12 @@ exit: int TestMacro_Int_Double_Double(const Func *f, MTdata d, bool relaxedMode) { - TestInfo test_info; + TestInfo test_info{}; cl_int error; logFunctionInfo(f->name, sizeof(cl_double), relaxedMode); // Init test_info - memset(&test_info, 0, sizeof(test_info)); test_info.threadCount = GetThreadCount(); test_info.subBufferSize = BUFFER_SIZE / (sizeof(cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount)); @@ -607,28 +611,11 @@ int TestMacro_Int_Double_Double(const Func *f, MTdata d, bool relaxedMode) // every thread for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) { - size_t array_size = test_info.threadCount * sizeof(cl_kernel); - test_info.k[i] = (cl_kernel *)malloc(array_size); - if (NULL == test_info.k[i]) - { - vlog_error("Error: Unable to allocate storage for kernels!\n"); - error = CL_OUT_OF_HOST_MEMORY; - goto exit; - } - memset(test_info.k[i], 0, array_size); - } - test_info.tinfo = - (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo)); - if (NULL == test_info.tinfo) - { - vlog_error( - "Error: Unable to allocate storage for thread specific data.\n"); - error = CL_OUT_OF_HOST_MEMORY; - goto exit; + test_info.k[i].resize(test_info.threadCount, nullptr); } - memset(test_info.tinfo, 0, - test_info.threadCount * sizeof(*test_info.tinfo)); - for (size_t i = 0; i < test_info.threadCount; i++) + + test_info.tinfo.resize(test_info.threadCount, ThreadInfo{}); + for (cl_uint i = 0; i < test_info.threadCount; i++) { cl_buffer_region region = { i * test_info.subBufferSize * sizeof(cl_double), @@ -711,27 +698,20 @@ exit: for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) { clReleaseProgram(test_info.programs[i]); - if (test_info.k[i]) + for (auto &kernel : test_info.k[i]) { - for (cl_uint j = 0; j < test_info.threadCount; j++) - clReleaseKernel(test_info.k[i][j]); - - free(test_info.k[i]); + clReleaseKernel(kernel); } } - if (test_info.tinfo) - { - for (cl_uint i = 0; i < test_info.threadCount; i++) - { - free_mtdata(test_info.tinfo[i].d); - clReleaseMemObject(test_info.tinfo[i].inBuf); - clReleaseMemObject(test_info.tinfo[i].inBuf2); - for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) - clReleaseMemObject(test_info.tinfo[i].outBuf[j]); - clReleaseCommandQueue(test_info.tinfo[i].tQueue); - } - free(test_info.tinfo); + for (auto &threadInfo : test_info.tinfo) + { + free_mtdata(threadInfo.d); + clReleaseMemObject(threadInfo.inBuf); + clReleaseMemObject(threadInfo.inBuf2); + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + clReleaseMemObject(threadInfo.outBuf[j]); + clReleaseCommandQueue(threadInfo.tQueue); } return error; diff --git a/test_conformance/math_brute_force/macro_binary_float.cpp b/test_conformance/math_brute_force/macro_binary_float.cpp index 6475e4bb..d6d5c8eb 100644 --- a/test_conformance/math_brute_force/macro_binary_float.cpp +++ b/test_conformance/math_brute_force/macro_binary_float.cpp @@ -14,6 +14,7 @@ // limitations under the License. // +#include "common.h" #include "function_list.h" #include "test_functions.h" #include "utility.h" @@ -111,7 +112,7 @@ struct BuildKernelInfo { cl_uint offset; // the first vector size to build cl_uint kernel_count; - cl_kernel **kernels; + KernelMatrix &kernels; cl_program *programs; const char *nameInCode; bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. @@ -122,7 +123,8 @@ cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) BuildKernelInfo *info = (BuildKernelInfo *)p; cl_uint i = info->offset + job_id; return BuildKernel(info->nameInCode, i, info->kernel_count, - info->kernels[i], info->programs + i, info->relaxedMode); + info->kernels[i].data(), info->programs + i, + info->relaxedMode); } // Thread specific data for a worker thread @@ -140,11 +142,14 @@ struct TestInfo size_t subBufferSize; // Size of the sub-buffer in elements const Func *f; // A pointer to the function info cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes - cl_kernel - *k[VECTOR_SIZE_COUNT]; // arrays of thread-specific kernels for each - // worker thread: k[vector_size][thread_id] - ThreadInfo * - tinfo; // An array of thread specific information for each worker thread + + // Thread-specific kernels for each vector size: + // k[vector_size][thread_id] + KernelMatrix k; + + // Array of thread specific information + std::vector tinfo; + cl_uint threadCount; // Number of worker threads cl_uint jobCount; // Number of jobs cl_uint step; // step between each chunk and the next. @@ -260,11 +265,11 @@ constexpr size_t specialValuesCount = cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) { - const TestInfo *job = (const TestInfo *)data; + TestInfo *job = (TestInfo *)data; size_t buffer_elements = job->subBufferSize; size_t buffer_size = buffer_elements * sizeof(cl_float); cl_uint base = job_id * (cl_uint)job->step; - ThreadInfo *tinfo = job->tinfo + thread_id; + ThreadInfo *tinfo = &(job->tinfo[thread_id]); fptr func = job->f->func; int ftz = job->ftz; MTdata d = tinfo->d; @@ -565,13 +570,12 @@ exit: int TestMacro_Int_Float_Float(const Func *f, MTdata d, bool relaxedMode) { - TestInfo test_info; + TestInfo test_info{}; cl_int error; logFunctionInfo(f->name, sizeof(cl_float), relaxedMode); // Init test_info - memset(&test_info, 0, sizeof(test_info)); test_info.threadCount = GetThreadCount(); test_info.subBufferSize = BUFFER_SIZE / (sizeof(cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount)); @@ -596,27 +600,10 @@ int TestMacro_Int_Float_Float(const Func *f, MTdata d, bool relaxedMode) // every thread for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) { - size_t array_size = test_info.threadCount * sizeof(cl_kernel); - test_info.k[i] = (cl_kernel *)malloc(array_size); - if (NULL == test_info.k[i]) - { - vlog_error("Error: Unable to allocate storage for kernels!\n"); - error = CL_OUT_OF_HOST_MEMORY; - goto exit; - } - memset(test_info.k[i], 0, array_size); - } - test_info.tinfo = - (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo)); - if (NULL == test_info.tinfo) - { - vlog_error( - "Error: Unable to allocate storage for thread specific data.\n"); - error = CL_OUT_OF_HOST_MEMORY; - goto exit; + test_info.k[i].resize(test_info.threadCount, nullptr); } - memset(test_info.tinfo, 0, - test_info.threadCount * sizeof(*test_info.tinfo)); + + test_info.tinfo.resize(test_info.threadCount, ThreadInfo{}); for (cl_uint i = 0; i < test_info.threadCount; i++) { cl_buffer_region region = { @@ -700,27 +687,20 @@ exit: for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) { clReleaseProgram(test_info.programs[i]); - if (test_info.k[i]) + for (auto &kernel : test_info.k[i]) { - for (cl_uint j = 0; j < test_info.threadCount; j++) - clReleaseKernel(test_info.k[i][j]); - - free(test_info.k[i]); + clReleaseKernel(kernel); } } - if (test_info.tinfo) - { - for (cl_uint i = 0; i < test_info.threadCount; i++) - { - free_mtdata(test_info.tinfo[i].d); - clReleaseMemObject(test_info.tinfo[i].inBuf); - clReleaseMemObject(test_info.tinfo[i].inBuf2); - for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) - clReleaseMemObject(test_info.tinfo[i].outBuf[j]); - clReleaseCommandQueue(test_info.tinfo[i].tQueue); - } - free(test_info.tinfo); + for (auto &threadInfo : test_info.tinfo) + { + free_mtdata(threadInfo.d); + clReleaseMemObject(threadInfo.inBuf); + clReleaseMemObject(threadInfo.inBuf2); + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + clReleaseMemObject(threadInfo.outBuf[j]); + clReleaseCommandQueue(threadInfo.tQueue); } return error; diff --git a/test_conformance/math_brute_force/macro_unary_double.cpp b/test_conformance/math_brute_force/macro_unary_double.cpp index 860e4596..1978c185 100644 --- a/test_conformance/math_brute_force/macro_unary_double.cpp +++ b/test_conformance/math_brute_force/macro_unary_double.cpp @@ -14,6 +14,7 @@ // limitations under the License. // +#include "common.h" #include "function_list.h" #include "test_functions.h" #include "utility.h" @@ -107,7 +108,7 @@ struct BuildKernelInfo { cl_uint offset; // the first vector size to build cl_uint kernel_count; - cl_kernel **kernels; + KernelMatrix &kernels; cl_program *programs; const char *nameInCode; bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. @@ -118,7 +119,8 @@ cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) BuildKernelInfo *info = (BuildKernelInfo *)p; cl_uint i = info->offset + job_id; return BuildKernel(info->nameInCode, i, info->kernel_count, - info->kernels[i], info->programs + i, info->relaxedMode); + info->kernels[i].data(), info->programs + i, + info->relaxedMode); } // Thread specific data for a worker thread @@ -134,11 +136,14 @@ struct TestInfo size_t subBufferSize; // Size of the sub-buffer in elements const Func *f; // A pointer to the function info cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes - cl_kernel - *k[VECTOR_SIZE_COUNT]; // arrays of thread-specific kernels for each - // worker thread: k[vector_size][thread_id] - ThreadInfo * - tinfo; // An array of thread specific information for each worker thread + + // Thread-specific kernels for each vector size: + // k[vector_size][thread_id] + KernelMatrix k; + + // Array of thread specific information + std::vector tinfo; + cl_uint threadCount; // Number of worker threads cl_uint jobCount; // Number of jobs cl_uint step; // step between each chunk and the next. @@ -148,12 +153,12 @@ struct TestInfo cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) { - const TestInfo *job = (const TestInfo *)data; + TestInfo *job = (TestInfo *)data; size_t buffer_elements = job->subBufferSize; size_t buffer_size = buffer_elements * sizeof(cl_double); cl_uint scale = job->scale; cl_uint base = job_id * (cl_uint)job->step; - ThreadInfo *tinfo = job->tinfo + thread_id; + ThreadInfo *tinfo = &(job->tinfo[thread_id]); dptr dfunc = job->f->dfunc; int ftz = job->ftz; cl_int error; @@ -362,13 +367,12 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) int TestMacro_Int_Double(const Func *f, MTdata d, bool relaxedMode) { - TestInfo test_info; + TestInfo test_info{}; cl_int error; logFunctionInfo(f->name, sizeof(cl_double), relaxedMode); // Init test_info - memset(&test_info, 0, sizeof(test_info)); test_info.threadCount = GetThreadCount(); test_info.subBufferSize = BUFFER_SIZE / (sizeof(cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount)); @@ -392,27 +396,10 @@ int TestMacro_Int_Double(const Func *f, MTdata d, bool relaxedMode) // every thread for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) { - size_t array_size = test_info.threadCount * sizeof(cl_kernel); - test_info.k[i] = (cl_kernel *)malloc(array_size); - if (NULL == test_info.k[i]) - { - vlog_error("Error: Unable to allocate storage for kernels!\n"); - error = CL_OUT_OF_HOST_MEMORY; - goto exit; - } - memset(test_info.k[i], 0, array_size); - } - test_info.tinfo = - (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo)); - if (NULL == test_info.tinfo) - { - vlog_error( - "Error: Unable to allocate storage for thread specific data.\n"); - error = CL_OUT_OF_HOST_MEMORY; - goto exit; + test_info.k[i].resize(test_info.threadCount, nullptr); } - memset(test_info.tinfo, 0, - test_info.threadCount * sizeof(*test_info.tinfo)); + + test_info.tinfo.resize(test_info.threadCount, ThreadInfo{}); for (cl_uint i = 0; i < test_info.threadCount; i++) { cl_buffer_region region = { @@ -484,25 +471,18 @@ exit: for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) { clReleaseProgram(test_info.programs[i]); - if (test_info.k[i]) + for (auto &kernel : test_info.k[i]) { - for (cl_uint j = 0; j < test_info.threadCount; j++) - clReleaseKernel(test_info.k[i][j]); - - free(test_info.k[i]); + clReleaseKernel(kernel); } } - if (test_info.tinfo) - { - for (cl_uint i = 0; i < test_info.threadCount; i++) - { - clReleaseMemObject(test_info.tinfo[i].inBuf); - for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) - clReleaseMemObject(test_info.tinfo[i].outBuf[j]); - clReleaseCommandQueue(test_info.tinfo[i].tQueue); - } - free(test_info.tinfo); + for (auto &threadInfo : test_info.tinfo) + { + clReleaseMemObject(threadInfo.inBuf); + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + clReleaseMemObject(threadInfo.outBuf[j]); + clReleaseCommandQueue(threadInfo.tQueue); } return error; diff --git a/test_conformance/math_brute_force/macro_unary_float.cpp b/test_conformance/math_brute_force/macro_unary_float.cpp index 58a2a954..ece5e9b6 100644 --- a/test_conformance/math_brute_force/macro_unary_float.cpp +++ b/test_conformance/math_brute_force/macro_unary_float.cpp @@ -14,6 +14,7 @@ // limitations under the License. // +#include "common.h" #include "function_list.h" #include "test_functions.h" #include "utility.h" @@ -106,7 +107,7 @@ struct BuildKernelInfo { cl_uint offset; // the first vector size to build cl_uint kernel_count; - cl_kernel **kernels; + KernelMatrix &kernels; cl_program *programs; const char *nameInCode; bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. @@ -117,7 +118,8 @@ cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) BuildKernelInfo *info = (BuildKernelInfo *)p; cl_uint i = info->offset + job_id; return BuildKernel(info->nameInCode, i, info->kernel_count, - info->kernels[i], info->programs + i, info->relaxedMode); + info->kernels[i].data(), info->programs + i, + info->relaxedMode); } // Thread specific data for a worker thread @@ -133,11 +135,14 @@ struct TestInfo size_t subBufferSize; // Size of the sub-buffer in elements const Func *f; // A pointer to the function info cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes - cl_kernel - *k[VECTOR_SIZE_COUNT]; // arrays of thread-specific kernels for each - // worker thread: k[vector_size][thread_id] - ThreadInfo * - tinfo; // An array of thread specific information for each worker thread + + // Thread-specific kernels for each vector size: + // k[vector_size][thread_id] + KernelMatrix k; + + // Array of thread specific information + std::vector tinfo; + cl_uint threadCount; // Number of worker threads cl_uint jobCount; // Number of jobs cl_uint step; // step between each chunk and the next. @@ -147,12 +152,12 @@ struct TestInfo cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) { - const TestInfo *job = (const TestInfo *)data; + TestInfo *job = (TestInfo *)data; size_t buffer_elements = job->subBufferSize; size_t buffer_size = buffer_elements * sizeof(cl_float); cl_uint scale = job->scale; cl_uint base = job_id * (cl_uint)job->step; - ThreadInfo *tinfo = job->tinfo + thread_id; + ThreadInfo *tinfo = &(job->tinfo[thread_id]); fptr func = job->f->func; int ftz = job->ftz; cl_int error = CL_SUCCESS; @@ -376,13 +381,12 @@ exit: int TestMacro_Int_Float(const Func *f, MTdata d, bool relaxedMode) { - TestInfo test_info; + TestInfo test_info{}; cl_int error; logFunctionInfo(f->name, sizeof(cl_float), relaxedMode); // Init test_info - memset(&test_info, 0, sizeof(test_info)); test_info.threadCount = GetThreadCount(); test_info.subBufferSize = BUFFER_SIZE / (sizeof(cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount)); @@ -407,27 +411,10 @@ int TestMacro_Int_Float(const Func *f, MTdata d, bool relaxedMode) // every thread for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) { - size_t array_size = test_info.threadCount * sizeof(cl_kernel); - test_info.k[i] = (cl_kernel *)malloc(array_size); - if (NULL == test_info.k[i]) - { - vlog_error("Error: Unable to allocate storage for kernels!\n"); - error = CL_OUT_OF_HOST_MEMORY; - goto exit; - } - memset(test_info.k[i], 0, array_size); - } - test_info.tinfo = - (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo)); - if (NULL == test_info.tinfo) - { - vlog_error( - "Error: Unable to allocate storage for thread specific data.\n"); - error = CL_OUT_OF_HOST_MEMORY; - goto exit; + test_info.k[i].resize(test_info.threadCount, nullptr); } - memset(test_info.tinfo, 0, - test_info.threadCount * sizeof(*test_info.tinfo)); + + test_info.tinfo.resize(test_info.threadCount, ThreadInfo{}); for (cl_uint i = 0; i < test_info.threadCount; i++) { cl_buffer_region region = { @@ -499,25 +486,18 @@ exit: for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) { clReleaseProgram(test_info.programs[i]); - if (test_info.k[i]) + for (auto &kernel : test_info.k[i]) { - for (cl_uint j = 0; j < test_info.threadCount; j++) - clReleaseKernel(test_info.k[i][j]); - - free(test_info.k[i]); + clReleaseKernel(kernel); } } - if (test_info.tinfo) - { - for (cl_uint i = 0; i < test_info.threadCount; i++) - { - clReleaseMemObject(test_info.tinfo[i].inBuf); - for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) - clReleaseMemObject(test_info.tinfo[i].outBuf[j]); - clReleaseCommandQueue(test_info.tinfo[i].tQueue); - } - free(test_info.tinfo); + for (auto &threadInfo : test_info.tinfo) + { + clReleaseMemObject(threadInfo.inBuf); + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + clReleaseMemObject(threadInfo.outBuf[j]); + clReleaseCommandQueue(threadInfo.tQueue); } return error; diff --git a/test_conformance/math_brute_force/main.cpp b/test_conformance/math_brute_force/main.cpp index e52f2f0a..6691f462 100644 --- a/test_conformance/math_brute_force/main.cpp +++ b/test_conformance/math_brute_force/main.cpp @@ -1055,8 +1055,6 @@ int MakeKernels(const char **c, cl_uint count, const char *name, cl_uint kernel_count, cl_kernel *k, cl_program *p, bool relaxedMode) { - int error = 0; - cl_uint i; char options[200] = ""; if (gForceFTZ) @@ -1074,7 +1072,7 @@ int MakeKernels(const char **c, cl_uint count, const char *name, strcat(options, " -cl-fast-relaxed-math"); } - error = + int error = create_single_kernel_helper(gContext, p, NULL, count, c, NULL, options); if (error != CL_SUCCESS) { @@ -1082,9 +1080,7 @@ int MakeKernels(const char **c, cl_uint count, const char *name, return error; } - - memset(k, 0, kernel_count * sizeof(*k)); - for (i = 0; i < kernel_count; i++) + for (cl_uint i = 0; i < kernel_count; i++) { k[i] = clCreateKernel(*p, name, &error); if (NULL == k[i] || error) diff --git a/test_conformance/math_brute_force/unary_double.cpp b/test_conformance/math_brute_force/unary_double.cpp index dcd21884..2d455047 100644 --- a/test_conformance/math_brute_force/unary_double.cpp +++ b/test_conformance/math_brute_force/unary_double.cpp @@ -14,6 +14,7 @@ // limitations under the License. // +#include "common.h" #include "function_list.h" #include "test_functions.h" #include "utility.h" @@ -107,7 +108,7 @@ struct BuildKernelInfo { cl_uint offset; // the first vector size to build cl_uint kernel_count; - cl_kernel **kernels; + KernelMatrix &kernels; cl_program *programs; const char *nameInCode; bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. @@ -118,7 +119,8 @@ cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) BuildKernelInfo *info = (BuildKernelInfo *)p; cl_uint i = info->offset + job_id; return BuildKernel(info->nameInCode, i, info->kernel_count, - info->kernels[i], info->programs + i, info->relaxedMode); + info->kernels[i].data(), info->programs + i, + info->relaxedMode); } // Thread specific data for a worker thread @@ -136,11 +138,14 @@ struct TestInfo size_t subBufferSize; // Size of the sub-buffer in elements const Func *f; // A pointer to the function info cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes - cl_kernel - *k[VECTOR_SIZE_COUNT]; // arrays of thread-specific kernels for each - // worker thread: k[vector_size][thread_id] - ThreadInfo * - tinfo; // An array of thread specific information for each worker thread + + // Thread-specific kernels for each vector size: + // k[vector_size][thread_id] + KernelMatrix k; + + // Array of thread specific information + std::vector tinfo; + cl_uint threadCount; // Number of worker threads cl_uint jobCount; // Number of jobs cl_uint step; // step between each chunk and the next. @@ -157,12 +162,12 @@ struct TestInfo cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) { - const TestInfo *job = (const TestInfo *)data; + TestInfo *job = (TestInfo *)data; size_t buffer_elements = job->subBufferSize; size_t buffer_size = buffer_elements * sizeof(cl_double); cl_uint scale = job->scale; cl_uint base = job_id * (cl_uint)job->step; - ThreadInfo *tinfo = job->tinfo + thread_id; + ThreadInfo *tinfo = &(job->tinfo[thread_id]); float ulps = job->ulps; dptr func = job->f->dfunc; cl_int error; @@ -389,14 +394,13 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) int TestFunc_Double_Double(const Func *f, MTdata d, bool relaxedMode) { - TestInfo test_info; + TestInfo test_info{}; cl_int error; float maxError = 0.0f; double maxErrorVal = 0.0; logFunctionInfo(f->name, sizeof(cl_double), relaxedMode); // Init test_info - memset(&test_info, 0, sizeof(test_info)); test_info.threadCount = GetThreadCount(); test_info.subBufferSize = BUFFER_SIZE / (sizeof(cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount)); @@ -422,27 +426,10 @@ int TestFunc_Double_Double(const Func *f, MTdata d, bool relaxedMode) // every thread for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) { - size_t array_size = test_info.threadCount * sizeof(cl_kernel); - test_info.k[i] = (cl_kernel *)malloc(array_size); - if (NULL == test_info.k[i]) - { - vlog_error("Error: Unable to allocate storage for kernels!\n"); - error = CL_OUT_OF_HOST_MEMORY; - goto exit; - } - memset(test_info.k[i], 0, array_size); - } - test_info.tinfo = - (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo)); - if (NULL == test_info.tinfo) - { - vlog_error( - "Error: Unable to allocate storage for thread specific data.\n"); - error = CL_OUT_OF_HOST_MEMORY; - goto exit; + test_info.k[i].resize(test_info.threadCount, nullptr); } - memset(test_info.tinfo, 0, - test_info.threadCount * sizeof(*test_info.tinfo)); + + test_info.tinfo.resize(test_info.threadCount, ThreadInfo{}); for (cl_uint i = 0; i < test_info.threadCount; i++) { cl_buffer_region region = { @@ -526,25 +513,18 @@ exit: for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) { clReleaseProgram(test_info.programs[i]); - if (test_info.k[i]) + for (auto &kernel : test_info.k[i]) { - for (cl_uint j = 0; j < test_info.threadCount; j++) - clReleaseKernel(test_info.k[i][j]); - - free(test_info.k[i]); + clReleaseKernel(kernel); } } - if (test_info.tinfo) - { - for (cl_uint i = 0; i < test_info.threadCount; i++) - { - clReleaseMemObject(test_info.tinfo[i].inBuf); - for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) - clReleaseMemObject(test_info.tinfo[i].outBuf[j]); - clReleaseCommandQueue(test_info.tinfo[i].tQueue); - } - free(test_info.tinfo); + for (auto &threadInfo : test_info.tinfo) + { + clReleaseMemObject(threadInfo.inBuf); + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + clReleaseMemObject(threadInfo.outBuf[j]); + clReleaseCommandQueue(threadInfo.tQueue); } return error; diff --git a/test_conformance/math_brute_force/unary_float.cpp b/test_conformance/math_brute_force/unary_float.cpp index f176fb95..83d27b0b 100644 --- a/test_conformance/math_brute_force/unary_float.cpp +++ b/test_conformance/math_brute_force/unary_float.cpp @@ -14,6 +14,7 @@ // limitations under the License. // +#include "common.h" #include "function_list.h" #include "test_functions.h" #include "utility.h" @@ -105,7 +106,7 @@ struct BuildKernelInfo { cl_uint offset; // the first vector size to build cl_uint kernel_count; - cl_kernel **kernels; + KernelMatrix &kernels; cl_program *programs; const char *nameInCode; bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. @@ -116,7 +117,8 @@ cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) BuildKernelInfo *info = (BuildKernelInfo *)p; cl_uint i = info->offset + job_id; return BuildKernel(info->nameInCode, i, info->kernel_count, - info->kernels[i], info->programs + i, info->relaxedMode); + info->kernels[i].data(), info->programs + i, + info->relaxedMode); } // Thread specific data for a worker thread @@ -134,11 +136,14 @@ struct TestInfo size_t subBufferSize; // Size of the sub-buffer in elements const Func *f; // A pointer to the function info cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes - cl_kernel - *k[VECTOR_SIZE_COUNT]; // arrays of thread-specific kernels for each - // worker thread: k[vector_size][thread_id] - ThreadInfo * - tinfo; // An array of thread specific information for each worker thread + + // Thread-specific kernels for each vector size: + // k[vector_size][thread_id] + KernelMatrix k; + + // Array of thread specific information + std::vector tinfo; + cl_uint threadCount; // Number of worker threads cl_uint jobCount; // Number of jobs cl_uint step; // step between each chunk and the next. @@ -155,12 +160,12 @@ struct TestInfo cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) { - const TestInfo *job = (const TestInfo *)data; + TestInfo *job = (TestInfo *)data; size_t buffer_elements = job->subBufferSize; size_t buffer_size = buffer_elements * sizeof(cl_float); cl_uint scale = job->scale; cl_uint base = job_id * (cl_uint)job->step; - ThreadInfo *tinfo = job->tinfo + thread_id; + ThreadInfo *tinfo = &(job->tinfo[thread_id]); fptr func = job->f->func; const char *fname = job->f->name; bool relaxedMode = job->relaxedMode; @@ -541,7 +546,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) int TestFunc_Float_Float(const Func *f, MTdata d, bool relaxedMode) { - TestInfo test_info; + TestInfo test_info{}; cl_int error; float maxError = 0.0f; double maxErrorVal = 0.0; @@ -550,7 +555,6 @@ int TestFunc_Float_Float(const Func *f, MTdata d, bool relaxedMode) logFunctionInfo(f->name, sizeof(cl_float), relaxedMode); // Init test_info - memset(&test_info, 0, sizeof(test_info)); test_info.threadCount = GetThreadCount(); test_info.subBufferSize = BUFFER_SIZE / (sizeof(cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount)); @@ -576,27 +580,10 @@ int TestFunc_Float_Float(const Func *f, MTdata d, bool relaxedMode) // every thread for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) { - size_t array_size = test_info.threadCount * sizeof(cl_kernel); - test_info.k[i] = (cl_kernel *)malloc(array_size); - if (NULL == test_info.k[i]) - { - vlog_error("Error: Unable to allocate storage for kernels!\n"); - error = CL_OUT_OF_HOST_MEMORY; - goto exit; - } - memset(test_info.k[i], 0, array_size); - } - test_info.tinfo = - (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo)); - if (NULL == test_info.tinfo) - { - vlog_error( - "Error: Unable to allocate storage for thread specific data.\n"); - error = CL_OUT_OF_HOST_MEMORY; - goto exit; + test_info.k[i].resize(test_info.threadCount, nullptr); } - memset(test_info.tinfo, 0, - test_info.threadCount * sizeof(*test_info.tinfo)); + + test_info.tinfo.resize(test_info.threadCount, ThreadInfo{}); for (cl_uint i = 0; i < test_info.threadCount; i++) { cl_buffer_region region = { @@ -704,25 +691,18 @@ exit: for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) { clReleaseProgram(test_info.programs[i]); - if (test_info.k[i]) + for (auto &kernel : test_info.k[i]) { - for (cl_uint j = 0; j < test_info.threadCount; j++) - clReleaseKernel(test_info.k[i][j]); - - free(test_info.k[i]); + clReleaseKernel(kernel); } } - if (test_info.tinfo) - { - for (cl_uint i = 0; i < test_info.threadCount; i++) - { - clReleaseMemObject(test_info.tinfo[i].inBuf); - for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) - clReleaseMemObject(test_info.tinfo[i].outBuf[j]); - clReleaseCommandQueue(test_info.tinfo[i].tQueue); - } - free(test_info.tinfo); + for (auto &threadInfo : test_info.tinfo) + { + clReleaseMemObject(threadInfo.inBuf); + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + clReleaseMemObject(threadInfo.outBuf[j]); + clReleaseCommandQueue(threadInfo.tQueue); } return error; -- cgit v1.2.3 From 0876ea10be4783340683c9970c5899ac8ed1d6ab Mon Sep 17 00:00:00 2001 From: Stuart Brady Date: Mon, 24 May 2021 16:59:03 +0100 Subject: Ignore padding bits in clCopyImage/clFillImage testing (#1184) The CL_UNORM_SHORT_555 and CL_UNORM_INT_101010 formats contain padding bits which need to be ignored in clCopyImage and clFillImage testing. For clFillImage tests, padding was not ignored for the CL_UNORM_SHORT_555 format, and was ignored for CL_UNORM_INT_101010 by modifying actual and reference data. For clCopyImage tests, padding was not ignored, both for CL_UNORM_SHORT_555 and for CL_UNORM_INT_101010. Fix this by adding a new compare_scanlines() function, which is used for both of these formats, and does not modify the actual or reference data. Signed-off-by: Stuart Brady --- test_common/harness/imageHelpers.cpp | 41 ++++++++++++++++++++++ test_common/harness/imageHelpers.h | 3 ++ .../images/clCopyImage/test_copy_generic.cpp | 23 ++++++------ .../images/clFillImage/test_fill_generic.cpp | 28 ++++++--------- 4 files changed, 66 insertions(+), 29 deletions(-) diff --git a/test_common/harness/imageHelpers.cpp b/test_common/harness/imageHelpers.cpp index d1754653..314709f8 100644 --- a/test_common/harness/imageHelpers.cpp +++ b/test_common/harness/imageHelpers.cpp @@ -479,6 +479,47 @@ void print_first_pixel_difference_error(size_t where, const char *sourcePixel, } } +size_t compare_scanlines(const image_descriptor *imageInfo, const char *aPtr, + const char *bPtr) +{ + size_t pixel_size = get_pixel_size(imageInfo->format); + size_t column; + + for (column = 0; column < imageInfo->width; column++) + { + switch (imageInfo->format->image_channel_data_type) + { + // If the data type is 101010, then ignore bits 31 and 32 when + // comparing the row + case CL_UNORM_INT_101010: { + cl_uint aPixel = *(cl_uint *)aPtr; + cl_uint bPixel = *(cl_uint *)bPtr; + if ((aPixel & 0x3fffffff) != (bPixel & 0x3fffffff)) + return column; + } + break; + + // If the data type is 555, ignore bit 15 when comparing the row + case CL_UNORM_SHORT_555: { + cl_ushort aPixel = *(cl_ushort *)aPtr; + cl_ushort bPixel = *(cl_ushort *)bPtr; + if ((aPixel & 0x7fff) != (bPixel & 0x7fff)) return column; + } + break; + + default: + if (memcmp(aPtr, bPtr, pixel_size) != 0) return column; + break; + } + + aPtr += pixel_size; + bPtr += pixel_size; + } + + // If we didn't find a difference, return the width of the image + return column; +} + int random_log_in_range(int minV, int maxV, MTdata d) { double v = log2(((double)genrand_int32(d) / (double)0xffffffff) + 1); diff --git a/test_common/harness/imageHelpers.h b/test_common/harness/imageHelpers.h index 848ec655..e728a939 100644 --- a/test_common/harness/imageHelpers.h +++ b/test_common/harness/imageHelpers.h @@ -139,6 +139,9 @@ void print_first_pixel_difference_error(size_t where, const char *sourcePixel, image_descriptor *imageInfo, size_t y, size_t thirdDim); +size_t compare_scanlines(const image_descriptor *imageInfo, const char *aPtr, + const char *bPtr); + void get_max_sizes(size_t *numberOfSizes, const int maxNumberOfSizes, size_t sizes[][3], size_t maxWidth, size_t maxHeight, size_t maxDepth, size_t maxArraySize, diff --git a/test_conformance/images/clCopyImage/test_copy_generic.cpp b/test_conformance/images/clCopyImage/test_copy_generic.cpp index 026916e8..bd935e7f 100644 --- a/test_conformance/images/clCopyImage/test_copy_generic.cpp +++ b/test_conformance/images/clCopyImage/test_copy_generic.cpp @@ -547,18 +547,19 @@ int test_copy_image_generic( cl_context context, cl_command_queue queue, image_d { if( memcmp( sourcePtr, destPtr, scanlineSize ) != 0 ) { - // Find the first missing pixel + // Find the first differing pixel size_t pixel_size = get_pixel_size( dstImageInfo->format ); - size_t where = 0; - for( where = 0; where < dstImageInfo->width; where++ ) - if( memcmp( sourcePtr + pixel_size * where, destPtr + pixel_size * where, pixel_size) ) - break; - - print_first_pixel_difference_error( - where, sourcePtr + pixel_size * where, - destPtr + pixel_size * where, dstImageInfo, y, - dstImageInfo->depth); - return -1; + size_t where = + compare_scanlines(dstImageInfo, sourcePtr, destPtr); + + if (where < dstImageInfo->width) + { + print_first_pixel_difference_error( + where, sourcePtr + pixel_size * where, + destPtr + pixel_size * where, dstImageInfo, y, + dstImageInfo->depth); + return -1; + } } sourcePtr += rowPitch; if((dstImageInfo->type == CL_MEM_OBJECT_IMAGE1D_ARRAY || dstImageInfo->type == CL_MEM_OBJECT_IMAGE1D)) diff --git a/test_conformance/images/clFillImage/test_fill_generic.cpp b/test_conformance/images/clFillImage/test_fill_generic.cpp index 59bf24ad..6cd6beb0 100644 --- a/test_conformance/images/clFillImage/test_fill_generic.cpp +++ b/test_conformance/images/clFillImage/test_fill_generic.cpp @@ -468,27 +468,19 @@ int test_fill_image_generic( cl_context context, cl_command_queue queue, image_d { for ( size_t y = 0; y < secondDim; y++ ) { - // If the data type is 101010 ignore bits 31 and 32 when comparing the row - if (imageInfo->format->image_channel_data_type == CL_UNORM_INT_101010) { - for (size_t w=0;w!=scanlineSize/4;++w) { - ((cl_uint*)sourcePtr)[w] &= 0x3FFFFFFF; - ((cl_uint*)destPtr)[w] &= 0x3FFFFFFF; - } - } - if (memcmp( sourcePtr, destPtr, scanlineSize ) != 0) { - // Find the first missing pixel + // Find the first differing pixel size_t pixel_size = get_pixel_size( imageInfo->format ); - size_t where = 0; - for ( where = 0; where < imageInfo->width; where++ ) - if ( memcmp( sourcePtr + pixel_size * where, destPtr + pixel_size * where, pixel_size) ) - break; - - print_first_pixel_difference_error( - where, sourcePtr + pixel_size * where, - destPtr + pixel_size * where, imageInfo, y, thirdDim); - return -1; + size_t where = compare_scanlines(imageInfo, sourcePtr, destPtr); + + if (where < imageInfo->width) + { + print_first_pixel_difference_error( + where, sourcePtr + pixel_size * where, + destPtr + pixel_size * where, imageInfo, y, thirdDim); + return -1; + } } total_matched += scanlineSize; -- cgit v1.2.3 From bd3135dd016aae7ae6454725ef3761d132a38926 Mon Sep 17 00:00:00 2001 From: Grzegorz Wawiorko Date: Thu, 27 May 2021 10:05:27 +0200 Subject: Extend list of known extensions (#1262) --- .../compiler/test_compiler_defines_for_extensions.cpp | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/test_conformance/compiler/test_compiler_defines_for_extensions.cpp b/test_conformance/compiler/test_compiler_defines_for_extensions.cpp index 483adac9..a1d8d8bd 100644 --- a/test_conformance/compiler/test_compiler_defines_for_extensions.cpp +++ b/test_conformance/compiler/test_compiler_defines_for_extensions.cpp @@ -49,7 +49,7 @@ const char *known_extensions[] = { "cl_khr_subgroup_shuffle", "cl_khr_subgroup_shuffle_relative", "cl_khr_subgroup_clustered_reduce", - + "cl_khr_extended_bit_ops", // API-only extensions after this point. If you add above here, modify // first_API_extension below. "cl_khr_icd", @@ -71,10 +71,13 @@ const char *known_extensions[] = { "cl_khr_spirv_no_integer_wrap_decoration", "cl_khr_extended_versioning", "cl_khr_device_uuid", + "cl_khr_pci_bus_info", + "cl_khr_suggested_local_work_size", + "cl_khr_spirv_linkonce_odr", }; -size_t num_known_extensions = sizeof(known_extensions)/sizeof(char*); -size_t first_API_extension = 27; +size_t num_known_extensions = sizeof(known_extensions) / sizeof(char *); +size_t first_API_extension = 28; const char *known_embedded_extensions[] = { "cles_khr_int64", -- cgit v1.2.3 From 315998511abe3959be21962a696911b43d4d5f59 Mon Sep 17 00:00:00 2001 From: Marco Antognini Date: Thu, 27 May 2021 09:06:13 +0100 Subject: Address data race in ThreadPool (#1265) ThreadSanitizer detects some data race in ThreadPool. They stem from inappropriate usage of volatile which are replaced with std::atomic variables in this patch. This patch focuses on data races identified while running the math_brute_force component. For example, it doesn't fully remove usage of ThreadPool_AtomicAdd from other components of the CTS. Furthermore, thread leaks, most likely because threads are not joined, are not addressed. Signed-off-by: Marco Antognini --- test_common/harness/ThreadPool.cpp | 44 ++++++++++++++++---------------------- 1 file changed, 19 insertions(+), 25 deletions(-) diff --git a/test_common/harness/ThreadPool.cpp b/test_common/harness/ThreadPool.cpp index 5dae1b4a..62798045 100644 --- a/test_common/harness/ThreadPool.cpp +++ b/test_common/harness/ThreadPool.cpp @@ -22,6 +22,8 @@ #if defined(__APPLE__) || defined(__linux__) || defined(_WIN32) // or any other POSIX system +#include + #if defined(_WIN32) #include #if defined(_MSC_VER) @@ -241,7 +243,7 @@ pthread_cond_t cond_var; // Condition variable state. How many iterations on the function left to run, // set to CL_INT_MAX to cause worker threads to exit. Note: this value might // go negative. -volatile cl_int gRunCount = 0; +std::atomic gRunCount{ 0 }; // State that only changes when the threadpool is not working. volatile TPFuncPtr gFunc_ptr = NULL; @@ -261,19 +263,20 @@ pthread_cond_t caller_cond_var; // # of threads intended to be running. Running threads will decrement this // as they discover they've run out of work to do. -volatile cl_int gRunning = 0; +std::atomic gRunning{ 0 }; // The total number of threads launched. -volatile cl_int gThreadCount = 0; +std::atomic gThreadCount{ 0 }; + #ifdef _WIN32 void ThreadPool_WorkerFunc(void *p) #else void *ThreadPool_WorkerFunc(void *p) #endif { - cl_uint threadID = ThreadPool_AtomicAdd((volatile cl_int *)p, 1); - cl_int item = ThreadPool_AtomicAdd(&gRunCount, -1); - // log_info( "ThreadPool_WorkerFunc start: gRunning = %d\n", gRunning ); + auto &tid = *static_cast *>(p); + cl_uint threadID = tid++; + cl_int item = gRunCount--; while (MAX_COUNT > item) { @@ -282,8 +285,6 @@ void *ThreadPool_WorkerFunc(void *p) // check for more work to do if (0 >= item) { - // log_info("Thread %d has run out of work.\n", threadID); - // No work to do. Attempt to block waiting for work #if defined(_WIN32) EnterCriticalSection(cond_lock); @@ -298,9 +299,7 @@ void *ThreadPool_WorkerFunc(void *p) } #endif // !_WIN32 - cl_int remaining = ThreadPool_AtomicAdd(&gRunning, -1); - // log_info("ThreadPool_WorkerFunc: gRunning = %d\n", - // remaining - 1); + cl_int remaining = gRunning--; if (1 == remaining) { // last thread out signal the main thread to wake up #if defined(_WIN32) @@ -350,7 +349,7 @@ void *ThreadPool_WorkerFunc(void *p) #endif // !_WIN32 // try again to get a valid item id - item = ThreadPool_AtomicAdd(&gRunCount, -1); + item = gRunCount--; if (MAX_COUNT <= item) // exit if we are done { #if defined(_WIN32) @@ -362,8 +361,7 @@ void *ThreadPool_WorkerFunc(void *p) } } - ThreadPool_AtomicAdd(&gRunning, 1); - // log_info("Thread %d has found work.\n", threadID); + gRunning++; #if defined(_WIN32) LeaveCriticalSection(cond_lock); @@ -447,12 +445,12 @@ void *ThreadPool_WorkerFunc(void *p) } // get the next item - item = ThreadPool_AtomicAdd(&gRunCount, -1); + item = gRunCount--; } exit: log_info("ThreadPool: thread %d exiting.\n", threadID); - ThreadPool_AtomicAdd(&gThreadCount, -1); + gThreadCount--; #if !defined(_WIN32) return NULL; #endif @@ -487,7 +485,7 @@ void ThreadPool_Init(void) { cl_int i; int err; - volatile cl_uint threadID = 0; + std::atomic threadID{ 0 }; // Check for manual override of multithreading code. We add this for better // debuggability. @@ -624,7 +622,7 @@ void ThreadPool_Init(void) } #endif // !_WIN32 - gRunning = gThreadCount; + gRunning = gThreadCount.load(); // init threads for (i = 0; i < gThreadCount; i++) { @@ -688,10 +686,6 @@ static BOOL CALLBACK _ThreadPool_Init(_PINIT_ONCE InitOnce, PVOID Parameter, void ThreadPool_Exit(void) { -#ifndef _WIN32 - int err; -#endif - int count; gRunCount = CL_INT_MAX; #if defined(__GNUC__) @@ -705,13 +699,13 @@ void ThreadPool_Exit(void) #endif // spin waiting for threads to die - for (count = 0; 0 != gThreadCount && count < 1000; count++) + for (int count = 0; 0 != gThreadCount && count < 1000; count++) { #if defined(_WIN32) _WakeAllConditionVariable(cond_var); Sleep(1); #else // !_WIN32 - if ((err = pthread_cond_broadcast(&cond_var))) + if (int err = pthread_cond_broadcast(&cond_var)) { log_error("Error %d from pthread_cond_broadcast. Unable to wake up " "work threads. ThreadPool_Exit failed.\n", @@ -725,7 +719,7 @@ void ThreadPool_Exit(void) if (gThreadCount) log_error("Error: Thread pool timed out after 1 second with %d threads " "still active.\n", - gThreadCount); + gThreadCount.load()); else log_info("Thread pool exited in a orderly fashion.\n"); } -- cgit v1.2.3 From 76ace61314e061fbf0f8a058dab19fa7e04df937 Mon Sep 17 00:00:00 2001 From: Marco Antognini Date: Wed, 9 Jun 2021 11:08:08 +0100 Subject: Fix leaks in callSingleTestFunction (#1224) The context and queue were not released when the test is not supported in offline mode or the queue couldn't be created. Inline test_missing_support_offline_cmpiler_ret macro, remove dead parameter of check_functions_for_offline_compiler and slightly refactor callSingleTestFunction to address leaks. Signed-off-by: Marco Antognini --- test_common/harness/errorHelpers.cpp | 15 +++++++-------- test_common/harness/errorHelpers.h | 18 +----------------- test_common/harness/testHarness.cpp | 12 +++++++++--- 3 files changed, 17 insertions(+), 28 deletions(-) diff --git a/test_common/harness/errorHelpers.cpp b/test_common/harness/errorHelpers.cpp index 3ddbc37b..ea928bc3 100644 --- a/test_common/harness/errorHelpers.cpp +++ b/test_common/harness/errorHelpers.cpp @@ -21,6 +21,7 @@ #include "errorHelpers.h" #include "parseParameters.h" +#include "testHarness.h" #include @@ -690,21 +691,19 @@ const char *subtests_to_skip_with_offline_compiler[] = { "library_function" }; -int check_functions_for_offline_compiler(const char *subtestname, - cl_device_id device) +bool check_functions_for_offline_compiler(const char *subtestname) { if (gCompilationMode != kOnline) { size_t nNotRequiredWithOfflineCompiler = - sizeof(subtests_to_skip_with_offline_compiler) / sizeof(char *); - size_t i; - for (i = 0; i < nNotRequiredWithOfflineCompiler; ++i) + ARRAY_SIZE(subtests_to_skip_with_offline_compiler); + for (size_t i = 0; i < nNotRequiredWithOfflineCompiler; ++i) { if (!strcmp(subtestname, subtests_to_skip_with_offline_compiler[i])) { - return 1; + return false; } } } - return 0; -} \ No newline at end of file + return true; +} diff --git a/test_common/harness/errorHelpers.h b/test_common/harness/errorHelpers.h index c7f49e3d..d59bc78d 100644 --- a/test_common/harness/errorHelpers.h +++ b/test_common/harness/errorHelpers.h @@ -92,21 +92,6 @@ static int vlog_win32(const char *format, ...); "the device version! (from %s:%d)\n", \ msg, __FILE__, __LINE__); -#define test_missing_support_offline_cmpiler(errCode, msg) \ - test_missing_support_offline_cmpiler_ret(errCode, msg, errCode) -// this macro should always return CL_SUCCESS, but print the skip message on -// test not supported with offline compiler -#define test_missing_support_offline_cmpiler_ret(errCode, msg, retValue) \ - { \ - if (errCode != CL_SUCCESS) \ - { \ - log_info("INFO: Subtest %s tests is not supported in offline " \ - "compiler execution path! (from %s:%d)\n", \ - msg, __FILE__, __LINE__); \ - return TEST_SKIP; \ - } \ - } - // expected error code vs. what we got #define test_failure_error(errCode, expectedErrCode, msg) \ test_failure_error_ret(errCode, expectedErrCode, msg, \ @@ -181,8 +166,7 @@ extern const char *GetAddressModeName(cl_addressing_mode mode); extern const char *GetQueuePropertyName(cl_command_queue_properties properties); extern const char *GetDeviceTypeName(cl_device_type type); -int check_functions_for_offline_compiler(const char *subtestname, - cl_device_id device); +bool check_functions_for_offline_compiler(const char *subtestname); cl_int OutputBuildLogs(cl_program program, cl_uint num_devices, cl_device_id *device_list); diff --git a/test_common/harness/testHarness.cpp b/test_common/harness/testHarness.cpp index 1aec3d07..b3863918 100644 --- a/test_common/harness/testHarness.cpp +++ b/test_common/harness/testHarness.cpp @@ -783,6 +783,14 @@ test_status callSingleTestFunction(test_definition test, return TEST_SKIP; } + if (!check_functions_for_offline_compiler(test.name)) + { + log_info("Subtest %s tests is not supported in offline compiler " + "execution path!\n", + test.name); + return TEST_SKIP; + } + /* Create a context to work with, unless we're told not to */ if (!forceNoContextCreation) { @@ -812,14 +820,12 @@ test_status callSingleTestFunction(test_definition test, if (queue == NULL) { print_error(error, "Unable to create testing command queue"); + clReleaseContext(context); return TEST_FAIL; } } /* Run the test and print the result */ - error = check_functions_for_offline_compiler(test.name, deviceToUse); - test_missing_support_offline_cmpiler(error, test.name); - if (test.func == NULL) { // Skip unimplemented test, can happen when all of the tests are -- cgit v1.2.3 From 277d029608ed0f7fdb0823f010d653dd0169c82c Mon Sep 17 00:00:00 2001 From: Stuart Brady Date: Fri, 11 Jun 2021 09:42:20 +0100 Subject: Run spirv-val for SPIR-V offline compilation (#1108) The common --disable-spirv-validation option has been added to disable this functionality. Signed-off-by: Stuart Brady --- test_common/harness/kernelHelpers.cpp | 22 +++++++++++++++++++++- test_common/harness/parseParameters.cpp | 32 +++++++++++++++++++++++++++++++- test_common/harness/parseParameters.h | 2 ++ 3 files changed, 54 insertions(+), 2 deletions(-) diff --git a/test_common/harness/kernelHelpers.cpp b/test_common/harness/kernelHelpers.cpp index aaf0d689..18f51cbe 100644 --- a/test_common/harness/kernelHelpers.cpp +++ b/test_common/harness/kernelHelpers.cpp @@ -530,7 +530,7 @@ static int get_offline_compiler_output( sourceFilename, outputFilename); if (error != CL_SUCCESS) return error; - // read output file + // open output file for reading ifs.open(outputFilename.c_str(), std::ios::binary); if (!ifs.good()) { @@ -540,6 +540,26 @@ static int get_offline_compiler_output( } } } + + if (compilationMode == kSpir_v && !gDisableSPIRVValidation) + { + std::string runString = gSPIRVValidator + " " + outputFilename; + + int returnCode = system(runString.c_str()); + if (returnCode == -1) + { + log_error("Error: failed to invoke SPIR-V validator\n"); + return CL_COMPILE_PROGRAM_FAILURE; + } + else if (returnCode != 0) + { + log_error( + "Failed to validate SPIR-V file %s: system() returned 0x%x\n", + outputFilename.c_str(), returnCode); + return CL_COMPILE_PROGRAM_FAILURE; + } + } + return CL_SUCCESS; } diff --git a/test_common/harness/parseParameters.cpp b/test_common/harness/parseParameters.cpp index b2ab5b02..e946d744 100644 --- a/test_common/harness/parseParameters.cpp +++ b/test_common/harness/parseParameters.cpp @@ -28,11 +28,14 @@ using namespace std; #define DEFAULT_COMPILATION_PROGRAM "cl_offline_compiler" +#define DEFAULT_SPIRV_VALIDATOR "spirv-val" CompilationMode gCompilationMode = kOnline; CompilationCacheMode gCompilationCacheMode = kCacheModeCompileIfAbsent; std::string gCompilationCachePath = "."; std::string gCompilationProgram = DEFAULT_COMPILATION_PROGRAM; +bool gDisableSPIRVValidation = false; +std::string gSPIRVValidator = DEFAULT_SPIRV_VALIDATOR; void helpInfo() { @@ -62,7 +65,14 @@ For offline compilation (binary and spir-v modes) only: Path for offline compiler output and CL source --compilation-program Program to use for offline compilation, defaults to: - )" DEFAULT_COMPILATION_PROGRAM "\n\n"); + )" DEFAULT_COMPILATION_PROGRAM R"( + +For spir-v mode only: + --disable-spirv-validation + Disable validation of SPIR-V using the SPIR-V validator + --spirv-validator + Path for SPIR-V validator, defaults to )" DEFAULT_SPIRV_VALIDATOR "\n" + "\n"); } int parseCustomParam(int argc, const char *argv[], const char *ignore) @@ -198,6 +208,26 @@ int parseCustomParam(int argc, const char *argv[], const char *ignore) return -1; } } + else if (!strcmp(argv[i], "--disable-spirv-validation")) + { + delArg++; + gDisableSPIRVValidation = true; + } + else if (!strcmp(argv[i], "--spirv-validator")) + { + delArg++; + if ((i + 1) < argc) + { + delArg++; + gSPIRVValidator = argv[i + 1]; + } + else + { + log_error("Program argument for --spirv-validator was not " + "specified.\n"); + return -1; + } + } // cleaning parameters from argv tab for (int j = i; j < argc - delArg; j++) argv[j] = argv[j + delArg]; diff --git a/test_common/harness/parseParameters.h b/test_common/harness/parseParameters.h index b0f8328a..437e12f9 100644 --- a/test_common/harness/parseParameters.h +++ b/test_common/harness/parseParameters.h @@ -38,6 +38,8 @@ extern CompilationMode gCompilationMode; extern CompilationCacheMode gCompilationCacheMode; extern std::string gCompilationCachePath; extern std::string gCompilationProgram; +extern bool gDisableSPIRVValidation; +extern std::string gSPIRVValidator; extern int parseCustomParam(int argc, const char *argv[], const char *ignore = 0); -- cgit v1.2.3 From 80a4a833be9bc390574801dc5a47b02a579bf47b Mon Sep 17 00:00:00 2001 From: John Kesapides <46718829+JohnKesapidesARM@users.noreply.github.com> Date: Fri, 11 Jun 2021 09:44:16 +0100 Subject: Minor fixes for CL_UNORM_SHORT_565, CL_UNORM_SHORT_555 (#1129) * Minor fixes for CL_UNORM_SHORT_565, CL_UNORM_SHORT_555 * Fix verification for undefined bit * Relax current infinitely precision requirement for these formats and move check in common function. * Add proper debug output. Signed-off-by: John Kesapides * Minor Formating fix. Signed-off-by: John Kesapides --- .../images/kernel_read_write/test_common.cpp | 37 +++++++++- .../images/kernel_read_write/test_common.h | 5 ++ .../images/kernel_read_write/test_write_1D.cpp | 70 +++++++++++++++---- .../kernel_read_write/test_write_1D_array.cpp | 71 +++++++++++++++---- .../kernel_read_write/test_write_2D_array.cpp | 80 ++++++++++++++++++---- .../images/kernel_read_write/test_write_3D.cpp | 80 ++++++++++++++++++---- .../images/kernel_read_write/test_write_image.cpp | 72 +++++++++++++++---- 7 files changed, 339 insertions(+), 76 deletions(-) diff --git a/test_conformance/images/kernel_read_write/test_common.cpp b/test_conformance/images/kernel_read_write/test_common.cpp index e76710b5..375ee587 100644 --- a/test_conformance/images/kernel_read_write/test_common.cpp +++ b/test_conformance/images/kernel_read_write/test_common.cpp @@ -1543,4 +1543,39 @@ int test_read_image(cl_context context, cl_command_queue queue, } return numTries != MAX_TRIES || numClamped != MAX_CLAMPED; -} \ No newline at end of file +} + +void filter_undefined_bits(image_descriptor *imageInfo, char *resultPtr) +{ + // mask off the top bit (bit 15) if the image format is (CL_UNORM_SHORT_555, + // CL_RGB). (Note: OpenCL says: the top bit is undefined meaning it can be + // either 0 or 1.) + if (imageInfo->format->image_channel_data_type == CL_UNORM_SHORT_555) + { + cl_ushort *temp = (cl_ushort *)resultPtr; + temp[0] &= 0x7fff; + } +} + +int filter_rounding_errors(int forceCorrectlyRoundedWrites, + image_descriptor *imageInfo, float *errors) +{ + // We are allowed 0.6 absolute error vs. infinitely precise for some + // normalized formats + if (0 == forceCorrectlyRoundedWrites + && (imageInfo->format->image_channel_data_type == CL_UNORM_INT8 + || imageInfo->format->image_channel_data_type == CL_UNORM_INT_101010 + || imageInfo->format->image_channel_data_type == CL_UNORM_INT16 + || imageInfo->format->image_channel_data_type == CL_SNORM_INT8 + || imageInfo->format->image_channel_data_type == CL_SNORM_INT16 + || imageInfo->format->image_channel_data_type == CL_UNORM_SHORT_555 + || imageInfo->format->image_channel_data_type + == CL_UNORM_SHORT_565)) + { + if (!(fabsf(errors[0]) > 0.6f) && !(fabsf(errors[1]) > 0.6f) + && !(fabsf(errors[2]) > 0.6f) && !(fabsf(errors[3]) > 0.6f)) + return 0; + } + + return 1; +} diff --git a/test_conformance/images/kernel_read_write/test_common.h b/test_conformance/images/kernel_read_write/test_common.h index e7ecbe0b..656c41f4 100644 --- a/test_conformance/images/kernel_read_write/test_common.h +++ b/test_conformance/images/kernel_read_write/test_common.h @@ -229,3 +229,8 @@ int determine_validation_error_offset( } return 0; } + + +extern int filter_rounding_errors(int forceCorrectlyRoundedWrites, + image_descriptor *imageInfo, float *errors); +extern void filter_undefined_bits(image_descriptor *imageInfo, char *resultPtr); diff --git a/test_conformance/images/kernel_read_write/test_write_1D.cpp b/test_conformance/images/kernel_read_write/test_write_1D.cpp index 41983edf..1556a76a 100644 --- a/test_conformance/images/kernel_read_write/test_write_1D.cpp +++ b/test_conformance/images/kernel_read_write/test_write_1D.cpp @@ -14,6 +14,7 @@ // limitations under the License. // #include "../testBase.h" +#include "test_common.h" #if !defined(_WIN32) #include @@ -395,6 +396,8 @@ int test_write_image_1D( cl_device_id device, cl_context context, cl_command_que } else { + filter_undefined_bits(imageInfo, resultPtr); + // Exact result passes every time if( memcmp( resultBuffer, resultPtr, get_pixel_size( imageInfo->format ) ) != 0 ) { @@ -403,21 +406,8 @@ int test_write_image_1D( cl_device_id device, cl_context context, cl_command_que float errors[4] = {NAN, NAN, NAN, NAN}; pack_image_pixel_error( (float *)imagePtr, imageInfo->format, resultBuffer, errors ); - // We are allowed 0.6 absolute error vs. infinitely precise for some normalized formats - if( 0 == forceCorrectlyRoundedWrites && - ( - imageInfo->format->image_channel_data_type == CL_UNORM_INT8 || - imageInfo->format->image_channel_data_type == CL_UNORM_INT_101010 || - imageInfo->format->image_channel_data_type == CL_UNORM_INT16 || - imageInfo->format->image_channel_data_type == CL_SNORM_INT8 || - imageInfo->format->image_channel_data_type == CL_SNORM_INT16 - )) - { - if( ! (fabsf( errors[0] ) > 0.6f) && ! (fabsf( errors[1] ) > 0.6f) && - ! (fabsf( errors[2] ) > 0.6f) && ! (fabsf( errors[3] ) > 0.6f) ) - failure = 0; - } - + failure = filter_rounding_errors( + forceCorrectlyRoundedWrites, imageInfo, errors); if( failure ) { @@ -458,6 +448,56 @@ int test_write_image_1D( cl_device_id device, cl_context context, cl_command_que log_error( " Actual: 0x%2.2x 0x%2.2x 0x%2.2x 0x%2.2x\n", ((cl_uchar*)resultPtr)[0], ((cl_uchar*)resultPtr)[1], ((cl_uchar*)resultPtr)[2], ((cl_uchar*)resultPtr)[3] ); log_error( " Error: %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] ); break; + case CL_UNORM_SHORT_565: { + cl_uint *ref_value = + (cl_uint *)resultBuffer; + cl_uint *test_value = + (cl_uint *)resultPtr; + + log_error(" Expected: 0x%2.2x Actual: " + "0x%2.2x \n", + ref_value[0], test_value[0]); + + log_error(" Expected: 0x%2.2x " + "0x%2.2x 0x%2.2x \n", + ref_value[0] & 0x1F, + (ref_value[0] >> 5) & 0x3F, + (ref_value[0] >> 11) & 0x1F); + log_error(" Actual: 0x%2.2x " + "0x%2.2x 0x%2.2x \n", + test_value[0] & 0x1F, + (test_value[0] >> 5) & 0x3F, + (test_value[0] >> 11) & 0x1F); + log_error(" Error: %f %f %f %f\n", + errors[0], errors[1], + errors[2]); + break; + } + case CL_UNORM_SHORT_555: { + cl_uint *ref_value = + (cl_uint *)resultBuffer; + cl_uint *test_value = + (cl_uint *)resultPtr; + + log_error(" Expected: 0x%2.2x Actual: " + "0x%2.2x \n", + ref_value[0], test_value[0]); + + log_error(" Expected: 0x%2.2x " + "0x%2.2x 0x%2.2x \n", + ref_value[0] & 0x1F, + (ref_value[0] >> 5) & 0x1F, + (ref_value[0] >> 10) & 0x1F); + log_error(" Actual: 0x%2.2x " + "0x%2.2x 0x%2.2x \n", + test_value[0] & 0x1F, + (test_value[0] >> 5) & 0x1F, + (test_value[0] >> 10) & 0x1F); + log_error(" Error: %f %f %f %f\n", + errors[0], errors[1], + errors[2]); + break; + } case CL_UNORM_INT16: case CL_SNORM_INT16: case CL_UNSIGNED_INT16: diff --git a/test_conformance/images/kernel_read_write/test_write_1D_array.cpp b/test_conformance/images/kernel_read_write/test_write_1D_array.cpp index c771704c..e9aa8d2a 100644 --- a/test_conformance/images/kernel_read_write/test_write_1D_array.cpp +++ b/test_conformance/images/kernel_read_write/test_write_1D_array.cpp @@ -14,6 +14,7 @@ // limitations under the License. // #include "../testBase.h" +#include "test_common.h" #if !defined(_WIN32) #include @@ -415,6 +416,9 @@ int test_write_image_1D_array( cl_device_id device, cl_context context, cl_comma } else { + + filter_undefined_bits(imageInfo, resultPtr); + // Exact result passes every time if( memcmp( resultBuffer, resultPtr, pixelSize ) != 0 ) { @@ -423,21 +427,8 @@ int test_write_image_1D_array( cl_device_id device, cl_context context, cl_comma float errors[4] = {NAN, NAN, NAN, NAN}; pack_image_pixel_error( (float *)imagePtr, imageInfo->format, resultBuffer, errors ); - // We are allowed 0.6 absolute error vs. infinitely precise for some normalized formats - if( 0 == forceCorrectlyRoundedWrites && - ( - imageInfo->format->image_channel_data_type == CL_UNORM_INT8 || - imageInfo->format->image_channel_data_type == CL_UNORM_INT_101010 || - imageInfo->format->image_channel_data_type == CL_UNORM_INT16 || - imageInfo->format->image_channel_data_type == CL_SNORM_INT8 || - imageInfo->format->image_channel_data_type == CL_SNORM_INT16 - )) - { - if( ! (fabsf( errors[0] ) > 0.6f) && ! (fabsf( errors[1] ) > 0.6f) && - ! (fabsf( errors[2] ) > 0.6f) && ! (fabsf( errors[3] ) > 0.6f) ) - failure = 0; - } - + failure = filter_rounding_errors( + forceCorrectlyRoundedWrites, imageInfo, errors); if( failure ) { @@ -478,6 +469,56 @@ int test_write_image_1D_array( cl_device_id device, cl_context context, cl_comma log_error( " Actual: 0x%2.2x 0x%2.2x 0x%2.2x 0x%2.2x\n", ((cl_uchar*)resultPtr)[0], ((cl_uchar*)resultPtr)[1], ((cl_uchar*)resultPtr)[2], ((cl_uchar*)resultPtr)[3] ); log_error( " Error: %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] ); break; + case CL_UNORM_SHORT_565: { + cl_uint *ref_value = + (cl_uint *)resultBuffer; + cl_uint *test_value = + (cl_uint *)resultPtr; + + log_error(" Expected: 0x%2.2x Actual: " + "0x%2.2x \n", + ref_value[0], test_value[0]); + + log_error(" Expected: 0x%2.2x " + "0x%2.2x 0x%2.2x \n", + ref_value[0] & 0x1F, + (ref_value[0] >> 5) & 0x3F, + (ref_value[0] >> 11) & 0x1F); + log_error(" Actual: 0x%2.2x " + "0x%2.2x 0x%2.2x \n", + test_value[0] & 0x1F, + (test_value[0] >> 5) & 0x3F, + (test_value[0] >> 11) & 0x1F); + log_error(" Error: %f %f %f %f\n", + errors[0], errors[1], + errors[2]); + break; + } + case CL_UNORM_SHORT_555: { + cl_uint *ref_value = + (cl_uint *)resultBuffer; + cl_uint *test_value = + (cl_uint *)resultPtr; + + log_error(" Expected: 0x%2.2x Actual: " + "0x%2.2x \n", + ref_value[0], test_value[0]); + + log_error(" Expected: 0x%2.2x " + "0x%2.2x 0x%2.2x \n", + ref_value[0] & 0x1F, + (ref_value[0] >> 5) & 0x1F, + (ref_value[0] >> 10) & 0x1F); + log_error(" Actual: 0x%2.2x " + "0x%2.2x 0x%2.2x \n", + test_value[0] & 0x1F, + (test_value[0] >> 5) & 0x1F, + (test_value[0] >> 10) & 0x1F); + log_error(" Error: %f %f %f %f\n", + errors[0], errors[1], + errors[2]); + break; + } case CL_UNORM_INT16: case CL_SNORM_INT16: case CL_UNSIGNED_INT16: diff --git a/test_conformance/images/kernel_read_write/test_write_2D_array.cpp b/test_conformance/images/kernel_read_write/test_write_2D_array.cpp index 08a7a803..5bca7124 100644 --- a/test_conformance/images/kernel_read_write/test_write_2D_array.cpp +++ b/test_conformance/images/kernel_read_write/test_write_2D_array.cpp @@ -14,6 +14,7 @@ // limitations under the License. // #include "../testBase.h" +#include "test_common.h" #if !defined(_WIN32) #include @@ -438,6 +439,9 @@ int test_write_image_2D_array( cl_device_id device, cl_context context, cl_comma } else { + + filter_undefined_bits(imageInfo, resultPtr); + // Exact result passes every time if( memcmp( resultBuffer, resultPtr, get_pixel_size( imageInfo->format ) ) != 0 ) { @@ -446,21 +450,9 @@ int test_write_image_2D_array( cl_device_id device, cl_context context, cl_comma float errors[4] = {NAN, NAN, NAN, NAN}; pack_image_pixel_error( (float *)imagePtr, imageInfo->format, resultBuffer, errors ); - // We are allowed 0.6 absolute error vs. infinitely precise for some normalized formats - if( 0 == forceCorrectlyRoundedWrites && - ( - imageInfo->format->image_channel_data_type == CL_UNORM_INT8 || - imageInfo->format->image_channel_data_type == CL_UNORM_INT_101010 || - imageInfo->format->image_channel_data_type == CL_UNORM_INT16 || - imageInfo->format->image_channel_data_type == CL_SNORM_INT8 || - imageInfo->format->image_channel_data_type == CL_SNORM_INT16 - )) - { - if( ! (fabsf( errors[0] ) > 0.6f) && ! (fabsf( errors[1] ) > 0.6f) && - ! (fabsf( errors[2] ) > 0.6f) && ! (fabsf( errors[3] ) > 0.6f) ) - failure = 0; - } - + failure = filter_rounding_errors( + forceCorrectlyRoundedWrites, imageInfo, + errors); if( failure ) { @@ -501,6 +493,64 @@ int test_write_image_2D_array( cl_device_id device, cl_context context, cl_comma log_error( " Actual: 0x%2.2x 0x%2.2x 0x%2.2x 0x%2.2x\n", ((cl_uchar*)resultPtr)[0], ((cl_uchar*)resultPtr)[1], ((cl_uchar*)resultPtr)[2], ((cl_uchar*)resultPtr)[3] ); log_error( " Error: %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] ); break; + case CL_UNORM_SHORT_565: { + cl_uint *ref_value = + (cl_uint *)resultBuffer; + cl_uint *test_value = + (cl_uint *)resultPtr; + + log_error(" Expected: 0x%2.2x " + "Actual: 0x%2.2x \n", + ref_value[0], + test_value[0]); + + log_error( + " Expected: 0x%2.2x 0x%2.2x " + "0x%2.2x \n", + ref_value[0] & 0x1F, + (ref_value[0] >> 5) & 0x3F, + (ref_value[0] >> 11) & 0x1F); + log_error( + " Actual: 0x%2.2x 0x%2.2x " + "0x%2.2x \n", + test_value[0] & 0x1F, + (test_value[0] >> 5) & 0x3F, + (test_value[0] >> 11) & 0x1F); + log_error( + " Error: %f %f %f %f\n", + errors[0], errors[1], + errors[2]); + break; + } + case CL_UNORM_SHORT_555: { + cl_uint *ref_value = + (cl_uint *)resultBuffer; + cl_uint *test_value = + (cl_uint *)resultPtr; + + log_error(" Expected: 0x%2.2x " + "Actual: 0x%2.2x \n", + ref_value[0], + test_value[0]); + + log_error( + " Expected: 0x%2.2x 0x%2.2x " + "0x%2.2x \n", + ref_value[0] & 0x1F, + (ref_value[0] >> 5) & 0x1F, + (ref_value[0] >> 10) & 0x1F); + log_error( + " Actual: 0x%2.2x 0x%2.2x " + "0x%2.2x \n", + test_value[0] & 0x1F, + (test_value[0] >> 5) & 0x1F, + (test_value[0] >> 10) & 0x1F); + log_error( + " Error: %f %f %f %f\n", + errors[0], errors[1], + errors[2]); + break; + } case CL_UNORM_INT16: case CL_SNORM_INT16: case CL_UNSIGNED_INT16: diff --git a/test_conformance/images/kernel_read_write/test_write_3D.cpp b/test_conformance/images/kernel_read_write/test_write_3D.cpp index 5cc96bb4..d9a69627 100644 --- a/test_conformance/images/kernel_read_write/test_write_3D.cpp +++ b/test_conformance/images/kernel_read_write/test_write_3D.cpp @@ -14,6 +14,7 @@ // limitations under the License. // #include "../testBase.h" +#include "test_common.h" #if !defined(_WIN32) #include @@ -445,6 +446,9 @@ int test_write_image_3D( cl_device_id device, cl_context context, cl_command_que } else { + + filter_undefined_bits(imageInfo, resultPtr); + // Exact result passes every time if( memcmp( resultBuffer, resultPtr, get_pixel_size( imageInfo->format ) ) != 0 ) { @@ -453,21 +457,9 @@ int test_write_image_3D( cl_device_id device, cl_context context, cl_command_que float errors[4] = {NAN, NAN, NAN, NAN}; pack_image_pixel_error( (float *)imagePtr, imageInfo->format, resultBuffer, errors ); - // We are allowed 0.6 absolute error vs. infinitely precise for some normalized formats - if( 0 == forceCorrectlyRoundedWrites && - ( - imageInfo->format->image_channel_data_type == CL_UNORM_INT8 || - imageInfo->format->image_channel_data_type == CL_UNORM_INT_101010 || - imageInfo->format->image_channel_data_type == CL_UNORM_INT16 || - imageInfo->format->image_channel_data_type == CL_SNORM_INT8 || - imageInfo->format->image_channel_data_type == CL_SNORM_INT16 - )) - { - if( ! (fabsf( errors[0] ) > 0.6f) && ! (fabsf( errors[1] ) > 0.6f) && - ! (fabsf( errors[2] ) > 0.6f) && ! (fabsf( errors[3] ) > 0.6f) ) - failure = 0; - } - + failure = filter_rounding_errors( + forceCorrectlyRoundedWrites, imageInfo, + errors); if( failure ) { @@ -508,6 +500,64 @@ int test_write_image_3D( cl_device_id device, cl_context context, cl_command_que log_error( " Actual: 0x%2.2x 0x%2.2x 0x%2.2x 0x%2.2x\n", ((cl_uchar*)resultPtr)[0], ((cl_uchar*)resultPtr)[1], ((cl_uchar*)resultPtr)[2], ((cl_uchar*)resultPtr)[3] ); log_error( " Error: %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] ); break; + case CL_UNORM_SHORT_565: { + cl_uint *ref_value = + (cl_uint *)resultBuffer; + cl_uint *test_value = + (cl_uint *)resultPtr; + + log_error(" Expected: 0x%2.2x " + "Actual: 0x%2.2x \n", + ref_value[0], + test_value[0]); + + log_error( + " Expected: 0x%2.2x 0x%2.2x " + "0x%2.2x \n", + ref_value[0] & 0x1F, + (ref_value[0] >> 5) & 0x3F, + (ref_value[0] >> 11) & 0x1F); + log_error( + " Actual: 0x%2.2x 0x%2.2x " + "0x%2.2x \n", + test_value[0] & 0x1F, + (test_value[0] >> 5) & 0x3F, + (test_value[0] >> 11) & 0x1F); + log_error( + " Error: %f %f %f %f\n", + errors[0], errors[1], + errors[2]); + break; + } + case CL_UNORM_SHORT_555: { + cl_uint *ref_value = + (cl_uint *)resultBuffer; + cl_uint *test_value = + (cl_uint *)resultPtr; + + log_error(" Expected: 0x%2.2x " + "Actual: 0x%2.2x \n", + ref_value[0], + test_value[0]); + + log_error( + " Expected: 0x%2.2x 0x%2.2x " + "0x%2.2x \n", + ref_value[0] & 0x1F, + (ref_value[0] >> 5) & 0x1F, + (ref_value[0] >> 10) & 0x1F); + log_error( + " Actual: 0x%2.2x 0x%2.2x " + "0x%2.2x \n", + test_value[0] & 0x1F, + (test_value[0] >> 5) & 0x1F, + (test_value[0] >> 10) & 0x1F); + log_error( + " Error: %f %f %f %f\n", + errors[0], errors[1], + errors[2]); + break; + } case CL_UNORM_INT16: case CL_SNORM_INT16: case CL_UNSIGNED_INT16: diff --git a/test_conformance/images/kernel_read_write/test_write_image.cpp b/test_conformance/images/kernel_read_write/test_write_image.cpp index e40e80d6..9cc9698c 100644 --- a/test_conformance/images/kernel_read_write/test_write_image.cpp +++ b/test_conformance/images/kernel_read_write/test_write_image.cpp @@ -14,6 +14,7 @@ // limitations under the License. // #include "../testBase.h" +#include "test_common.h" #if !defined(_WIN32) #include @@ -477,6 +478,9 @@ int test_write_image( cl_device_id device, cl_context context, cl_command_queue } else { + + filter_undefined_bits(imageInfo, resultPtr); + // Exact result passes every time if( memcmp( resultBuffer, resultPtr, get_pixel_size( imageInfo->format ) ) != 0 ) { @@ -485,21 +489,8 @@ int test_write_image( cl_device_id device, cl_context context, cl_command_queue float errors[4] = {NAN, NAN, NAN, NAN}; pack_image_pixel_error( (float *)imagePtr, imageInfo->format, resultBuffer, errors ); - // We are allowed 0.6 absolute error vs. infinitely precise for some normalized formats - if( 0 == forceCorrectlyRoundedWrites && - ( - imageInfo->format->image_channel_data_type == CL_UNORM_INT8 || - imageInfo->format->image_channel_data_type == CL_UNORM_INT_101010 || - imageInfo->format->image_channel_data_type == CL_UNORM_INT16 || - imageInfo->format->image_channel_data_type == CL_SNORM_INT8 || - imageInfo->format->image_channel_data_type == CL_SNORM_INT16 - )) - { - if( ! (fabsf( errors[0] ) > 0.6f) && ! (fabsf( errors[1] ) > 0.6f) && - ! (fabsf( errors[2] ) > 0.6f) && ! (fabsf( errors[3] ) > 0.6f) ) - failure = 0; - } - + failure = filter_rounding_errors( + forceCorrectlyRoundedWrites, imageInfo, errors); if( failure ) { @@ -577,6 +568,57 @@ int test_write_image( cl_device_id device, cl_context context, cl_command_queue log_error( " Actual: %a %a %a %a\n", ((cl_float*)resultPtr)[0], ((cl_float*)resultPtr)[1], ((cl_float*)resultPtr)[2], ((cl_float*)resultPtr)[3] ); log_error( " Ulps: %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] ); break; + case CL_UNORM_SHORT_565: { + cl_uint *ref_value = + (cl_uint *)resultBuffer; + cl_uint *test_value = + (cl_uint *)resultPtr; + + log_error(" Expected: 0x%2.2x Actual: " + "0x%2.2x \n", + ref_value[0], test_value[0]); + + log_error(" Expected: 0x%2.2x " + "0x%2.2x 0x%2.2x \n", + ref_value[0] & 0x1F, + (ref_value[0] >> 5) & 0x3F, + (ref_value[0] >> 11) & 0x1F); + log_error(" Actual: 0x%2.2x " + "0x%2.2x 0x%2.2x \n", + test_value[0] & 0x1F, + (test_value[0] >> 5) & 0x3F, + (test_value[0] >> 11) & 0x1F); + log_error(" Error: %f %f %f %f\n", + errors[0], errors[1], + errors[2]); + break; + } + + case CL_UNORM_SHORT_555: { + cl_uint *ref_value = + (cl_uint *)resultBuffer; + cl_uint *test_value = + (cl_uint *)resultPtr; + + log_error(" Expected: 0x%2.2x Actual: " + "0x%2.2x \n", + ref_value[0], test_value[0]); + + log_error(" Expected: 0x%2.2x " + "0x%2.2x 0x%2.2x \n", + ref_value[0] & 0x1F, + (ref_value[0] >> 5) & 0x1F, + (ref_value[0] >> 10) & 0x1F); + log_error(" Actual: 0x%2.2x " + "0x%2.2x 0x%2.2x \n", + test_value[0] & 0x1F, + (test_value[0] >> 5) & 0x1F, + (test_value[0] >> 10) & 0x1F); + log_error(" Error: %f %f %f %f\n", + errors[0], errors[1], + errors[2]); + break; + } } float *v = (float *)(char *)imagePtr; -- cgit v1.2.3 From 69f0054001438078c11478546b855c06e07e1817 Mon Sep 17 00:00:00 2001 From: Marco Antognini Date: Thu, 17 Jun 2021 14:05:05 +0100 Subject: Fix copy and move semantics of wrapper classes (#1268) * Remove unnecessary code These custom equality operators are not necessary because of the conversion operators which already allow using the standard equality operators between two pointers. Signed-off-by: Marco Antognini * Fix copy and move semantics of wrapper classes Related to #465. The Wrapper classes are rewritten to properly handle copy and move semantics, while preserving the existing API and removing code duplication. Add error handling around clRelase* and clRetain*. Signed-off-by: Marco Antognini * Address build issue on 32-bit Windows Include linkage in RetainReleaseType function type. Signed-off-by: Marco Antognini --- test_common/harness/typeWrappers.h | 246 ++++++++++---------------- test_conformance/buffers/test_sub_buffers.cpp | 3 +- 2 files changed, 91 insertions(+), 158 deletions(-) diff --git a/test_common/harness/typeWrappers.h b/test_common/harness/typeWrappers.h index 9a58a9d2..50c7c938 100644 --- a/test_common/harness/typeWrappers.h +++ b/test_common/harness/typeWrappers.h @@ -16,122 +16,134 @@ #ifndef _typeWrappers_h #define _typeWrappers_h -#include -#include - #if !defined(_WIN32) #include #endif #include "compat.h" -#include #include "mt19937.h" #include "errorHelpers.h" #include "kernelHelpers.h" -/* cl_context wrapper */ +#include +#include -class clContextWrapper { -public: - clContextWrapper() { mContext = NULL; } - clContextWrapper(cl_context program) { mContext = program; } - ~clContextWrapper() - { - if (mContext != NULL) clReleaseContext(mContext); - } +namespace wrapper_details { + +// clRetain*() and clRelease*() functions share the same type. +template // T should be cl_context, cl_program, ... +using RetainReleaseType = cl_int CL_API_CALL(T); - clContextWrapper &operator=(const cl_context &rhs) +// A generic wrapper class that follows OpenCL retain/release semantics. +// +// This Wrapper class implement copy and move semantics, which makes it +// compatible with standard containers for example. +// +// Template parameters: +// - T is the cl_* type (e.g. cl_context, cl_program, ...) +// - Retain is the clRetain* function (e.g. clRetainContext, ...) +// - Release is the clRelease* function (e.g. clReleaseContext, ...) +template Retain, RetainReleaseType Release> +class Wrapper { + static_assert(std::is_pointer::value, "T should be a pointer type."); + T object = nullptr; + + void retain() { - mContext = rhs; - return *this; + if (!object) return; + + auto err = Retain(object); + if (err != CL_SUCCESS) + { + print_error(err, "clRetain*() failed"); + std::abort(); + } } - operator cl_context() const { return mContext; } - cl_context *operator&() { return &mContext; } + void release() + { + if (!object) return; - bool operator==(const cl_context &rhs) { return mContext == rhs; } + auto err = Release(object); + if (err != CL_SUCCESS) + { + print_error(err, "clRelease*() failed"); + std::abort(); + } + } -protected: - cl_context mContext; -}; +public: + Wrapper() = default; -/* cl_program wrapper */ + // On initialisation, assume the object has a refcount of one. + Wrapper(T object): object(object) {} -class clProgramWrapper { -public: - clProgramWrapper() { mProgram = NULL; } - clProgramWrapper(cl_program program) { mProgram = program; } - ~clProgramWrapper() + // On assignment, assume the object has a refcount of one. + Wrapper &operator=(T rhs) { - if (mProgram != NULL) clReleaseProgram(mProgram); + reset(rhs); + return *this; } - clProgramWrapper &operator=(const cl_program &rhs) + // Copy semantics, increase retain count. + Wrapper(Wrapper const &w) { *this = w; } + Wrapper &operator=(Wrapper const &w) { - mProgram = rhs; + reset(w.object); + retain(); return *this; } - operator cl_program() const { return mProgram; } - - cl_program *operator&() { return &mProgram; } - bool operator==(const cl_program &rhs) { return mProgram == rhs; } - -protected: - cl_program mProgram; -}; - -/* cl_kernel wrapper */ - -class clKernelWrapper { -public: - clKernelWrapper() { mKernel = NULL; } - clKernelWrapper(cl_kernel kernel) { mKernel = kernel; } - ~clKernelWrapper() + // Move semantics, directly take ownership. + Wrapper(Wrapper &&w) { *this = std::move(w); } + Wrapper &operator=(Wrapper &&w) { - if (mKernel != NULL) clReleaseKernel(mKernel); + reset(w.object); + w.object = nullptr; + return *this; } - clKernelWrapper &operator=(const cl_kernel &rhs) + ~Wrapper() { reset(); } + + // Release the existing object, if any, and own the new one, if any. + void reset(T new_object = nullptr) { - mKernel = rhs; - return *this; + release(); + object = new_object; } - operator cl_kernel() const { return mKernel; } - cl_kernel *operator&() { return &mKernel; } + operator T() const { return object; } - bool operator==(const cl_kernel &rhs) { return mKernel == rhs; } - -protected: - cl_kernel mKernel; + // Ideally this function should not exist as it breaks encapsulation by + // allowing external mutation of the Wrapper internal state. However, too + // much code currently relies on this. For example, instead of using T* as + // output parameters, existing code can be updated to use Wrapper& instead. + T *operator&() { return &object; } }; -/* cl_mem (stream) wrapper */ +} // namespace wrapper_details -class clMemWrapper { -public: - clMemWrapper() { mMem = NULL; } - clMemWrapper(cl_mem mem) { mMem = mem; } - ~clMemWrapper() - { - if (mMem != NULL) clReleaseMemObject(mMem); - } +using clContextWrapper = + wrapper_details::Wrapper; - clMemWrapper &operator=(const cl_mem &rhs) - { - mMem = rhs; - return *this; - } - operator cl_mem() const { return mMem; } +using clProgramWrapper = + wrapper_details::Wrapper; - cl_mem *operator&() { return &mMem; } +using clKernelWrapper = + wrapper_details::Wrapper; - bool operator==(const cl_mem &rhs) { return mMem == rhs; } +using clMemWrapper = + wrapper_details::Wrapper; -protected: - cl_mem mMem; -}; +using clCommandQueueWrapper = + wrapper_details::Wrapper; + +using clSamplerWrapper = + wrapper_details::Wrapper; + +using clEventWrapper = + wrapper_details::Wrapper; class clProtectedImage { public: @@ -183,92 +195,12 @@ public: cl_mem *operator&() { return ℑ } - bool operator==(const cl_mem &rhs) { return image == rhs; } - protected: void *backingStore; size_t backingStoreSize; cl_mem image; }; -/* cl_command_queue wrapper */ -class clCommandQueueWrapper { -public: - clCommandQueueWrapper() { mMem = NULL; } - clCommandQueueWrapper(cl_command_queue mem) { mMem = mem; } - ~clCommandQueueWrapper() - { - if (mMem != NULL) - { - clReleaseCommandQueue(mMem); - } - } - - clCommandQueueWrapper &operator=(const cl_command_queue &rhs) - { - mMem = rhs; - return *this; - } - operator cl_command_queue() const { return mMem; } - - cl_command_queue *operator&() { return &mMem; } - - bool operator==(const cl_command_queue &rhs) { return mMem == rhs; } - -protected: - cl_command_queue mMem; -}; - -/* cl_sampler wrapper */ -class clSamplerWrapper { -public: - clSamplerWrapper() { mMem = NULL; } - clSamplerWrapper(cl_sampler mem) { mMem = mem; } - ~clSamplerWrapper() - { - if (mMem != NULL) clReleaseSampler(mMem); - } - - clSamplerWrapper &operator=(const cl_sampler &rhs) - { - mMem = rhs; - return *this; - } - operator cl_sampler() const { return mMem; } - - cl_sampler *operator&() { return &mMem; } - - bool operator==(const cl_sampler &rhs) { return mMem == rhs; } - -protected: - cl_sampler mMem; -}; - -/* cl_event wrapper */ -class clEventWrapper { -public: - clEventWrapper() { mMem = NULL; } - clEventWrapper(cl_event mem) { mMem = mem; } - ~clEventWrapper() - { - if (mMem != NULL) clReleaseEvent(mMem); - } - - clEventWrapper &operator=(const cl_event &rhs) - { - mMem = rhs; - return *this; - } - operator cl_event() const { return mMem; } - - cl_event *operator&() { return &mMem; } - - bool operator==(const cl_event &rhs) { return mMem == rhs; } - -protected: - cl_event mMem; -}; - /* Generic protected memory buffer, for verifying access within bounds */ class clProtectedArray { public: diff --git a/test_conformance/buffers/test_sub_buffers.cpp b/test_conformance/buffers/test_sub_buffers.cpp index 3e50121a..691509fd 100644 --- a/test_conformance/buffers/test_sub_buffers.cpp +++ b/test_conformance/buffers/test_sub_buffers.cpp @@ -39,7 +39,8 @@ public: region.size = mSize; cl_int error; - mMem = clCreateSubBuffer( mParentBuffer, flags, CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error ); + reset(clCreateSubBuffer(mParentBuffer, flags, + CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error)); return error; } }; -- cgit v1.2.3 From 236cd73fa17ed0c280b7aa6cd8a3dd116c4e5d2c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?K=C3=A9vin=20Petit?= Date: Fri, 2 Jul 2021 10:34:13 +0100 Subject: Use macOS 10 in CI (#1282) macOS jobs frequently fail. Since macos-11.0 support is considered experimental, move to macos-10, using macos-latest so we automatically move to 11 when stable. See https://docs.github.com/en/actions/using-github-hosted-runners/about-github-hosted-runners Signed-off-by: Kevin Petit --- .github/workflows/presubmit.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/presubmit.yml b/.github/workflows/presubmit.yml index 8ef7e663..2aedc199 100644 --- a/.github/workflows/presubmit.yml +++ b/.github/workflows/presubmit.yml @@ -11,7 +11,7 @@ jobs: strategy: matrix: mainmatrix: [true] - os: [ubuntu-20.04, macos-11.0] + os: [ubuntu-20.04, macos-latest] include: - os: ubuntu-20.04 mainmatrix: true -- cgit v1.2.3 From 4a03bb79cb8fbd6012b02783e59565cce0b1f376 Mon Sep 17 00:00:00 2001 From: James Price Date: Mon, 5 Jul 2021 10:35:39 -0400 Subject: Fix double-release of memory objects (#1277) A recent update to the object wrapper classes (#1268) changed the behavior of assigning to a wrapper, whereby the wrapped object is now released upon assignment. A couple of tests were manually calling clReleaseMemObject and then assigning `nullptr` to the wrapper, resulting in the wrapper calling clReleaseMemObject on an object that had already been destroyed. --- test_conformance/api/test_mem_object_info.cpp | 7 ------- test_conformance/api/test_mem_objects.cpp | 7 +------ 2 files changed, 1 insertion(+), 13 deletions(-) diff --git a/test_conformance/api/test_mem_object_info.cpp b/test_conformance/api/test_mem_object_info.cpp index ccfeaafa..2afe0437 100644 --- a/test_conformance/api/test_mem_object_info.cpp +++ b/test_conformance/api/test_mem_object_info.cpp @@ -348,14 +348,7 @@ int test_get_buffer_info( cl_device_id deviceID, cl_context context, cl_command_ TEST_MEM_OBJECT_PARAM( subBufferObject, CL_MEM_ASSOCIATED_MEMOBJECT, origObj, (cl_mem)bufferObject, "associated mem object", "%p", void * ) TEST_MEM_OBJECT_PARAM( subBufferObject, CL_MEM_OFFSET, offset, (size_t)( addressAlign ), "offset", "%ld", size_t ) - - clReleaseMemObject( subBufferObject ); - subBufferObject = NULL; - } - - clReleaseMemObject( bufferObject ); - bufferObject = NULL; } return CL_SUCCESS; diff --git a/test_conformance/api/test_mem_objects.cpp b/test_conformance/api/test_mem_objects.cpp index c29613f9..f1a4e993 100644 --- a/test_conformance/api/test_mem_objects.cpp +++ b/test_conformance/api/test_mem_objects.cpp @@ -48,12 +48,7 @@ int test_mem_object_destructor_callback_single(clMemWrapper &memObject) test_error(error, "Unable to set destructor callback"); // Now release the buffer, which SHOULD call the callbacks - error = clReleaseMemObject(memObject); - test_error(error, "Unable to release test buffer"); - - // Note: since we manually released the mem wrapper, we need to set it to - // NULL to prevent a double-release - memObject = NULL; + memObject.reset(); // At this point, all three callbacks should have already been called int numErrors = 0; -- cgit v1.2.3 From 433974fd2810f91b093f10121adca64e1eefd789 Mon Sep 17 00:00:00 2001 From: BKoscielak Date: Tue, 13 Jul 2021 18:15:33 +0200 Subject: Fix check for image support in test_basic sizeof (#1269) --- test_conformance/basic/test_sizeof.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/test_conformance/basic/test_sizeof.cpp b/test_conformance/basic/test_sizeof.cpp index 66a6c563..6b1ddb56 100644 --- a/test_conformance/basic/test_sizeof.cpp +++ b/test_conformance/basic/test_sizeof.cpp @@ -292,11 +292,11 @@ int test_sizeof(cl_device_id device, cl_context context, cl_command_queue queue, continue; } - if( gIsEmbedded && - 0 == strcmp(other_types[i], "image3d_t") && - checkFor3DImageSupport( device ) == CL_IMAGE_FORMAT_NOT_SUPPORTED) + if (0 == strcmp(other_types[i], "image3d_t") + && checkFor3DImageSupport(device) == CL_IMAGE_FORMAT_NOT_SUPPORTED) { - log_info("\n3D images are not supported by this device. Skipping test.\t"); + log_info("\n3D images are not supported by this device. " + "Skipping test.\t"); continue; } -- cgit v1.2.3 From b500da5fbc97a2fc73ee39e30c00e7d759a11215 Mon Sep 17 00:00:00 2001 From: Ben Ashbaugh Date: Wed, 21 Jul 2021 00:48:48 -0700 Subject: add basic test for cl_khr_pci_bus_info (#1227) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * add basic test for cl_khr_pci_bus_info * correctly use TEST_SKIPPED_ITSELF Co-authored-by: Kévin Petit * fix related usage of TEST_SKIPPED_ITSELF Co-authored-by: Kévin Petit --- test_conformance/computeinfo/CMakeLists.txt | 1 + test_conformance/computeinfo/device_uuid.cpp | 2 +- test_conformance/computeinfo/main.cpp | 3 +- test_conformance/computeinfo/pci_bus_info.cpp | 53 +++++++++++++++++++++++++++ 4 files changed, 57 insertions(+), 2 deletions(-) create mode 100644 test_conformance/computeinfo/pci_bus_info.cpp diff --git a/test_conformance/computeinfo/CMakeLists.txt b/test_conformance/computeinfo/CMakeLists.txt index 207223a3..06f0599c 100644 --- a/test_conformance/computeinfo/CMakeLists.txt +++ b/test_conformance/computeinfo/CMakeLists.txt @@ -5,6 +5,7 @@ set(${MODULE_NAME}_SOURCES device_uuid.cpp extended_versioning.cpp conforming_version.cpp + pci_bus_info.cpp ) include(../CMakeCommon.txt) diff --git a/test_conformance/computeinfo/device_uuid.cpp b/test_conformance/computeinfo/device_uuid.cpp index 1ef9dad2..7f29d0b6 100644 --- a/test_conformance/computeinfo/device_uuid.cpp +++ b/test_conformance/computeinfo/device_uuid.cpp @@ -105,7 +105,7 @@ int test_device_uuid(cl_device_id deviceID, cl_context context, if (!is_extension_available(deviceID, "cl_khr_device_uuid")) { log_info("cl_khr_device_uuid not supported. Skipping test...\n"); - return 0; + return TEST_SKIPPED_ITSELF; } int total_errors = 0; diff --git a/test_conformance/computeinfo/main.cpp b/test_conformance/computeinfo/main.cpp index 4860b445..d993655b 100644 --- a/test_conformance/computeinfo/main.cpp +++ b/test_conformance/computeinfo/main.cpp @@ -1421,15 +1421,16 @@ int test_computeinfo(cl_device_id deviceID, cl_context context, extern int test_extended_versioning(cl_device_id, cl_context, cl_command_queue, int); extern int test_device_uuid(cl_device_id, cl_context, cl_command_queue, int); - extern int test_conformance_version(cl_device_id, cl_context, cl_command_queue, int); +extern int test_pci_bus_info(cl_device_id, cl_context, cl_command_queue, int); test_definition test_list[] = { ADD_TEST(computeinfo), ADD_TEST(extended_versioning), ADD_TEST(device_uuid), ADD_TEST_VERSION(conformance_version, Version(3, 0)), + ADD_TEST(pci_bus_info), }; const int test_num = ARRAY_SIZE(test_list); diff --git a/test_conformance/computeinfo/pci_bus_info.cpp b/test_conformance/computeinfo/pci_bus_info.cpp new file mode 100644 index 00000000..cd62ca05 --- /dev/null +++ b/test_conformance/computeinfo/pci_bus_info.cpp @@ -0,0 +1,53 @@ +// +// Copyright (c) 2021 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +#include "harness/compat.h" + +#include +#include + +#include "harness/testHarness.h" +#include "harness/deviceInfo.h" + +int test_pci_bus_info(cl_device_id deviceID, cl_context context, + cl_command_queue ignoreQueue, int num_elements) +{ + if (!is_extension_available(deviceID, "cl_khr_pci_bus_info")) + { + log_info("cl_khr_pci_bus_info not supported. Skipping test...\n"); + return TEST_SKIPPED_ITSELF; + } + + cl_int error; + + cl_device_pci_bus_info_khr info; + + size_t size_ret; + error = clGetDeviceInfo(deviceID, CL_DEVICE_PCI_BUS_INFO_KHR, 0, NULL, + &size_ret); + test_error(error, "Unable to query CL_DEVICE_PCI_BUS_INFO_KHR size"); + test_assert_error( + size_ret == sizeof(info), + "Query for CL_DEVICE_PCI_BUS_INFO_KHR returned an unexpected size"); + + error = clGetDeviceInfo(deviceID, CL_DEVICE_PCI_BUS_INFO_KHR, sizeof(info), + &info, NULL); + test_error(error, "Unable to query CL_DEVICE_PCI_BUS_INFO_KHR"); + + log_info("\tPCI Bus Info: %04x:%02x:%02x.%x\n", info.pci_domain, + info.pci_bus, info.pci_device, info.pci_function); + + return TEST_PASS; +} -- cgit v1.2.3 From 12637114ac81d292861daf4bff2397a36581f712 Mon Sep 17 00:00:00 2001 From: Grzegorz Wawiorko Date: Wed, 21 Jul 2021 09:50:22 +0200 Subject: Fix double release of object in test_api and test_gl (#1287) * Fix clang format only * Fix double release of objects --- .../api/test_context_destructor_callback.cpp | 7 +- test_conformance/gl/test_buffers.cpp | 415 ++++++++------ test_conformance/gl/test_fence_sync.cpp | 624 ++++++++++++--------- 3 files changed, 586 insertions(+), 460 deletions(-) diff --git a/test_conformance/api/test_context_destructor_callback.cpp b/test_conformance/api/test_context_destructor_callback.cpp index 1d73a3c4..d29d9039 100644 --- a/test_conformance/api/test_context_destructor_callback.cpp +++ b/test_conformance/api/test_context_destructor_callback.cpp @@ -52,12 +52,7 @@ int test_context_destructor_callback(cl_device_id deviceID, cl_context context, test_error(error, "Unable to set destructor callback"); // Now release the context, which SHOULD call the callbacks - error = clReleaseContext(localContext); - test_error(error, "Unable to release local context"); - - // Note: since we manually released the context, we need to set it to NULL - // to prevent a double-release - localContext = NULL; + localContext.reset(); // At this point, all three callbacks should have already been called int numErrors = 0; diff --git a/test_conformance/gl/test_buffers.cpp b/test_conformance/gl/test_buffers.cpp index 35f01ee6..c61610d0 100644 --- a/test_conformance/gl/test_buffers.cpp +++ b/test_conformance/gl/test_buffers.cpp @@ -1,6 +1,6 @@ // // Copyright (c) 2017 The Khronos Group Inc. -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -17,126 +17,126 @@ #include "harness/conversions.h" #include "harness/typeWrappers.h" -#if !defined (__APPLE__) - #include +#if !defined(__APPLE__) +#include #endif static const char *bufferKernelPattern = -"__kernel void sample_test( __global %s%s *source, __global %s%s *clDest, __global %s%s *glDest )\n" -"{\n" -" int tid = get_global_id(0);\n" -" clDest[ tid ] = source[ tid ] + (%s%s)(1);\n" -" glDest[ tid ] = source[ tid ] + (%s%s)(2);\n" -"}\n"; - -#define TYPE_CASE( enum, type, range, offset ) \ - case enum: \ - { \ - cl_##type *ptr = (cl_##type *)outData; \ - for( i = 0; i < count; i++ ) \ - ptr[ i ] = (cl_##type)( ( genrand_int32(d) & range ) - offset ); \ - break; \ + "__kernel void sample_test( __global %s%s *source, __global %s%s *clDest, " + "__global %s%s *glDest )\n" + "{\n" + " int tid = get_global_id(0);\n" + " clDest[ tid ] = source[ tid ] + (%s%s)(1);\n" + " glDest[ tid ] = source[ tid ] + (%s%s)(2);\n" + "}\n"; + +#define TYPE_CASE(enum, type, range, offset) \ + case enum: { \ + cl_##type *ptr = (cl_##type *)outData; \ + for (i = 0; i < count; i++) \ + ptr[i] = (cl_##type)((genrand_int32(d) & range) - offset); \ + break; \ } -void gen_input_data( ExplicitType type, size_t count, MTdata d, void *outData ) +void gen_input_data(ExplicitType type, size_t count, MTdata d, void *outData) { size_t i; - switch( type ) + switch (type) { - case kBool: - { + case kBool: { bool *boolPtr = (bool *)outData; - for( i = 0; i < count; i++ ) + for (i = 0; i < count; i++) { - boolPtr[i] = ( genrand_int32(d) & 1 ) ? true : false; + boolPtr[i] = (genrand_int32(d) & 1) ? true : false; } break; } - TYPE_CASE( kChar, char, 250, 127 ) - TYPE_CASE( kUChar, uchar, 250, 0 ) - TYPE_CASE( kShort, short, 65530, 32767 ) - TYPE_CASE( kUShort, ushort, 65530, 0 ) - TYPE_CASE( kInt, int, 0x0fffffff, 0x70000000 ) - TYPE_CASE( kUInt, uint, 0x0fffffff, 0 ) + TYPE_CASE(kChar, char, 250, 127) + TYPE_CASE(kUChar, uchar, 250, 0) + TYPE_CASE(kShort, short, 65530, 32767) + TYPE_CASE(kUShort, ushort, 65530, 0) + TYPE_CASE(kInt, int, 0x0fffffff, 0x70000000) + TYPE_CASE(kUInt, uint, 0x0fffffff, 0) - case kLong: - { + case kLong: { cl_long *longPtr = (cl_long *)outData; - for( i = 0; i < count; i++ ) + for (i = 0; i < count; i++) { - longPtr[i] = (cl_long)genrand_int32(d) | ( (cl_ulong)genrand_int32(d) << 32 ); + longPtr[i] = (cl_long)genrand_int32(d) + | ((cl_ulong)genrand_int32(d) << 32); } break; } - case kULong: - { + case kULong: { cl_ulong *ulongPtr = (cl_ulong *)outData; - for( i = 0; i < count; i++ ) + for (i = 0; i < count; i++) { - ulongPtr[i] = (cl_ulong)genrand_int32(d) | ( (cl_ulong)genrand_int32(d) << 32 ); + ulongPtr[i] = (cl_ulong)genrand_int32(d) + | ((cl_ulong)genrand_int32(d) << 32); } break; } - case kFloat: - { + case kFloat: { cl_float *floatPtr = (float *)outData; - for( i = 0; i < count; i++ ) - floatPtr[i] = get_random_float( -100000.f, 100000.f, d ); + for (i = 0; i < count; i++) + floatPtr[i] = get_random_float(-100000.f, 100000.f, d); break; } default: - log_error( "ERROR: Invalid type passed in to generate_random_data!\n" ); + log_error( + "ERROR: Invalid type passed in to generate_random_data!\n"); break; } } -#define INC_CASE( enum, type ) \ - case enum: \ - { \ - cl_##type *src = (cl_##type *)inData; \ - cl_##type *dst = (cl_##type *)outData; \ - *dst = *src + 1; \ - break; \ +#define INC_CASE(enum, type) \ + case enum: { \ + cl_##type *src = (cl_##type *)inData; \ + cl_##type *dst = (cl_##type *)outData; \ + *dst = *src + 1; \ + break; \ } -void get_incremented_value( void *inData, void *outData, ExplicitType type ) +void get_incremented_value(void *inData, void *outData, ExplicitType type) { - switch( type ) + switch (type) { - INC_CASE( kChar, char ) - INC_CASE( kUChar, uchar ) - INC_CASE( kShort, short ) - INC_CASE( kUShort, ushort ) - INC_CASE( kInt, int ) - INC_CASE( kUInt, uint ) - INC_CASE( kLong, long ) - INC_CASE( kULong, ulong ) - INC_CASE( kFloat, float ) - default: - break; + INC_CASE(kChar, char) + INC_CASE(kUChar, uchar) + INC_CASE(kShort, short) + INC_CASE(kUShort, ushort) + INC_CASE(kInt, int) + INC_CASE(kUInt, uint) + INC_CASE(kLong, long) + INC_CASE(kULong, ulong) + INC_CASE(kFloat, float) + default: break; } } -int test_buffer_kernel(cl_context context, cl_command_queue queue, ExplicitType vecType, size_t vecSize, int numElements, int validate_only, MTdata d) +int test_buffer_kernel(cl_context context, cl_command_queue queue, + ExplicitType vecType, size_t vecSize, int numElements, + int validate_only, MTdata d) { clProgramWrapper program; clKernelWrapper kernel; - clMemWrapper streams[ 3 ]; + clMemWrapper streams[3]; size_t dataSize = numElements * 16 * sizeof(cl_long); #if !(defined(_WIN32) && defined(_MSC_VER)) - cl_long inData[numElements * 16], outDataCL[numElements * 16], outDataGL[ numElements * 16 ]; + cl_long inData[numElements * 16], outDataCL[numElements * 16], + outDataGL[numElements * 16]; #else - cl_long* inData = (cl_long*)_malloca(dataSize); - cl_long* outDataCL = (cl_long*)_malloca(dataSize); - cl_long* outDataGL = (cl_long*)_malloca(dataSize); + cl_long *inData = (cl_long *)_malloca(dataSize); + cl_long *outDataCL = (cl_long *)_malloca(dataSize); + cl_long *outDataGL = (cl_long *)_malloca(dataSize); #endif glBufferWrapper inGLBuffer, outGLBuffer; - int i; + int i; size_t bufferSize; int error; @@ -146,210 +146,259 @@ int test_buffer_kernel(cl_context context, cl_command_queue queue, ExplicitType char sizeName[4]; /* Create the source */ - if( vecSize == 1 ) - sizeName[ 0 ] = 0; + if (vecSize == 1) + sizeName[0] = 0; else - sprintf( sizeName, "%d", (int)vecSize ); + sprintf(sizeName, "%d", (int)vecSize); - sprintf( kernelSource, bufferKernelPattern, get_explicit_type_name( vecType ), sizeName, - get_explicit_type_name( vecType ), sizeName, - get_explicit_type_name( vecType ), sizeName, - get_explicit_type_name( vecType ), sizeName, - get_explicit_type_name( vecType ), sizeName ); + sprintf(kernelSource, bufferKernelPattern, get_explicit_type_name(vecType), + sizeName, get_explicit_type_name(vecType), sizeName, + get_explicit_type_name(vecType), sizeName, + get_explicit_type_name(vecType), sizeName, + get_explicit_type_name(vecType), sizeName); /* Create kernels */ programPtr = kernelSource; - if( create_single_kernel_helper( context, &program, &kernel, 1, (const char **)&programPtr, "sample_test" ) ) + if (create_single_kernel_helper(context, &program, &kernel, 1, + (const char **)&programPtr, "sample_test")) { return -1; } - bufferSize = numElements * vecSize * get_explicit_type_size( vecType ); + bufferSize = numElements * vecSize * get_explicit_type_size(vecType); /* Generate some almost-random input data */ - gen_input_data( vecType, vecSize * numElements, d, inData ); - memset( outDataCL, 0, dataSize ); - memset( outDataGL, 0, dataSize ); + gen_input_data(vecType, vecSize * numElements, d, inData); + memset(outDataCL, 0, dataSize); + memset(outDataGL, 0, dataSize); /* Generate some GL buffers to go against */ - glGenBuffers( 1, &inGLBuffer ); - glGenBuffers( 1, &outGLBuffer ); + glGenBuffers(1, &inGLBuffer); + glGenBuffers(1, &outGLBuffer); - glBindBuffer( GL_ARRAY_BUFFER, inGLBuffer ); - glBufferData( GL_ARRAY_BUFFER, bufferSize, inData, GL_STATIC_DRAW ); + glBindBuffer(GL_ARRAY_BUFFER, inGLBuffer); + glBufferData(GL_ARRAY_BUFFER, bufferSize, inData, GL_STATIC_DRAW); - // Note: we need to bind the output buffer, even though we don't care about its values yet, - // because CL needs it to get the buffer size - glBindBuffer( GL_ARRAY_BUFFER, outGLBuffer ); - glBufferData( GL_ARRAY_BUFFER, bufferSize, outDataGL, GL_STATIC_DRAW ); + // Note: we need to bind the output buffer, even though we don't care about + // its values yet, because CL needs it to get the buffer size + glBindBuffer(GL_ARRAY_BUFFER, outGLBuffer); + glBufferData(GL_ARRAY_BUFFER, bufferSize, outDataGL, GL_STATIC_DRAW); - glBindBuffer( GL_ARRAY_BUFFER, 0 ); + glBindBuffer(GL_ARRAY_BUFFER, 0); glFinish(); - /* Generate some streams. The first and last ones are GL, middle one just vanilla CL */ - streams[ 0 ] = (*clCreateFromGLBuffer_ptr)( context, CL_MEM_READ_ONLY, inGLBuffer, &error ); - test_error( error, "Unable to create input GL buffer" ); + /* Generate some streams. The first and last ones are GL, middle one just + * vanilla CL */ + streams[0] = (*clCreateFromGLBuffer_ptr)(context, CL_MEM_READ_ONLY, + inGLBuffer, &error); + test_error(error, "Unable to create input GL buffer"); - streams[ 1 ] = clCreateBuffer( context, CL_MEM_READ_WRITE, bufferSize, NULL, &error ); - test_error( error, "Unable to create output CL buffer" ); + streams[1] = + clCreateBuffer(context, CL_MEM_READ_WRITE, bufferSize, NULL, &error); + test_error(error, "Unable to create output CL buffer"); - streams[ 2 ] = (*clCreateFromGLBuffer_ptr)( context, CL_MEM_WRITE_ONLY, outGLBuffer, &error ); - test_error( error, "Unable to create output GL buffer" ); + streams[2] = (*clCreateFromGLBuffer_ptr)(context, CL_MEM_WRITE_ONLY, + outGLBuffer, &error); + test_error(error, "Unable to create output GL buffer"); - /* Validate the info */ - if (validate_only) { - int result = (CheckGLObjectInfo(streams[0], CL_GL_OBJECT_BUFFER, (GLuint)inGLBuffer, (GLenum)0, 0) | - CheckGLObjectInfo(streams[2], CL_GL_OBJECT_BUFFER, (GLuint)outGLBuffer, (GLenum)0, 0) ); - for(i=0;i<3;i++) + /* Validate the info */ + if (validate_only) { - clReleaseMemObject(streams[i]); - streams[i] = NULL; - } + int result = (CheckGLObjectInfo(streams[0], CL_GL_OBJECT_BUFFER, + (GLuint)inGLBuffer, (GLenum)0, 0) + | CheckGLObjectInfo(streams[2], CL_GL_OBJECT_BUFFER, + (GLuint)outGLBuffer, (GLenum)0, 0)); + for (i = 0; i < 3; i++) + { + streams[i].reset(); + } - glDeleteBuffers(1, &inGLBuffer); inGLBuffer = 0; - glDeleteBuffers(1, &outGLBuffer); outGLBuffer = 0; + glDeleteBuffers(1, &inGLBuffer); + inGLBuffer = 0; + glDeleteBuffers(1, &outGLBuffer); + outGLBuffer = 0; - return result; - } + return result; + } /* Assign streams and execute */ - for( int i = 0; i < 3; i++ ) + for (int i = 0; i < 3; i++) { - error = clSetKernelArg( kernel, i, sizeof( streams[ i ] ), &streams[ i ] ); - test_error( error, "Unable to set kernel arguments" ); + error = clSetKernelArg(kernel, i, sizeof(streams[i]), &streams[i]); + test_error(error, "Unable to set kernel arguments"); } - error = (*clEnqueueAcquireGLObjects_ptr)( queue, 1, &streams[ 0 ], 0, NULL, NULL); - test_error( error, "Unable to acquire GL obejcts"); - error = (*clEnqueueAcquireGLObjects_ptr)( queue, 1, &streams[ 2 ], 0, NULL, NULL); - test_error( error, "Unable to acquire GL obejcts"); + error = + (*clEnqueueAcquireGLObjects_ptr)(queue, 1, &streams[0], 0, NULL, NULL); + test_error(error, "Unable to acquire GL obejcts"); + error = + (*clEnqueueAcquireGLObjects_ptr)(queue, 1, &streams[2], 0, NULL, NULL); + test_error(error, "Unable to acquire GL obejcts"); /* Run the kernel */ threads[0] = numElements; - error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] ); - test_error( error, "Unable to get work group size to use" ); - - error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL ); - test_error( error, "Unable to execute test kernel" ); - - error = (*clEnqueueReleaseGLObjects_ptr)( queue, 1, &streams[ 0 ], 0, NULL, NULL ); - test_error(error, "clEnqueueReleaseGLObjects failed"); - error = (*clEnqueueReleaseGLObjects_ptr)( queue, 1, &streams[ 2 ], 0, NULL, NULL ); - test_error(error, "clEnqueueReleaseGLObjects failed"); - - // Get the results from both CL and GL and make sure everything looks correct - error = clEnqueueReadBuffer( queue, streams[ 1 ], CL_TRUE, 0, bufferSize, outDataCL, 0, NULL, NULL ); - test_error( error, "Unable to read output CL array!" ); - - glBindBuffer( GL_ARRAY_BUFFER, outGLBuffer ); - void *glMem = glMapBuffer( GL_ARRAY_BUFFER, GL_READ_ONLY ); - memcpy( outDataGL, glMem, bufferSize ); - glUnmapBuffer( GL_ARRAY_BUFFER ); - - char *inP = (char *)inData, *glP = (char *)outDataGL, *clP = (char *)outDataCL; + error = get_max_common_work_group_size(context, kernel, threads[0], + &localThreads[0]); + test_error(error, "Unable to get work group size to use"); + + error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threads, + localThreads, 0, NULL, NULL); + test_error(error, "Unable to execute test kernel"); + + error = + (*clEnqueueReleaseGLObjects_ptr)(queue, 1, &streams[0], 0, NULL, NULL); + test_error(error, "clEnqueueReleaseGLObjects failed"); + error = + (*clEnqueueReleaseGLObjects_ptr)(queue, 1, &streams[2], 0, NULL, NULL); + test_error(error, "clEnqueueReleaseGLObjects failed"); + + // Get the results from both CL and GL and make sure everything looks + // correct + error = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, bufferSize, + outDataCL, 0, NULL, NULL); + test_error(error, "Unable to read output CL array!"); + + glBindBuffer(GL_ARRAY_BUFFER, outGLBuffer); + void *glMem = glMapBuffer(GL_ARRAY_BUFFER, GL_READ_ONLY); + memcpy(outDataGL, glMem, bufferSize); + glUnmapBuffer(GL_ARRAY_BUFFER); + + char *inP = (char *)inData, *glP = (char *)outDataGL, + *clP = (char *)outDataCL; error = 0; - for( size_t i = 0; i < numElements * vecSize; i++ ) + for (size_t i = 0; i < numElements * vecSize; i++) { cl_long expectedCLValue, expectedGLValue; - get_incremented_value( inP, &expectedCLValue, vecType ); - get_incremented_value( &expectedCLValue, &expectedGLValue, vecType ); + get_incremented_value(inP, &expectedCLValue, vecType); + get_incremented_value(&expectedCLValue, &expectedGLValue, vecType); - if( memcmp( clP, &expectedCLValue, get_explicit_type_size( vecType ) ) != 0 ) + if (memcmp(clP, &expectedCLValue, get_explicit_type_size(vecType)) != 0) { - char scratch[ 64 ]; - log_error( "ERROR: Data sample %d from the CL output did not validate!\n", (int)i ); - log_error( "\t Input: %s\n", GetDataVectorString( inP, get_explicit_type_size( vecType ), 1, scratch ) ); - log_error( "\tExpected: %s\n", GetDataVectorString( &expectedCLValue, get_explicit_type_size( vecType ), 1, scratch ) ); - log_error( "\t Actual: %s\n", GetDataVectorString( clP, get_explicit_type_size( vecType ), 1, scratch ) ); + char scratch[64]; + log_error( + "ERROR: Data sample %d from the CL output did not validate!\n", + (int)i); + log_error("\t Input: %s\n", + GetDataVectorString(inP, get_explicit_type_size(vecType), + 1, scratch)); + log_error("\tExpected: %s\n", + GetDataVectorString(&expectedCLValue, + get_explicit_type_size(vecType), 1, + scratch)); + log_error("\t Actual: %s\n", + GetDataVectorString(clP, get_explicit_type_size(vecType), + 1, scratch)); error = -1; } - if( memcmp( glP, &expectedGLValue, get_explicit_type_size( vecType ) ) != 0 ) + if (memcmp(glP, &expectedGLValue, get_explicit_type_size(vecType)) != 0) { - char scratch[ 64 ]; - log_error( "ERROR: Data sample %d from the GL output did not validate!\n", (int)i ); - log_error( "\t Input: %s\n", GetDataVectorString( inP, get_explicit_type_size( vecType ), 1, scratch ) ); - log_error( "\tExpected: %s\n", GetDataVectorString( &expectedGLValue, get_explicit_type_size( vecType ), 1, scratch ) ); - log_error( "\t Actual: %s\n", GetDataVectorString( glP, get_explicit_type_size( vecType ), 1, scratch ) ); + char scratch[64]; + log_error( + "ERROR: Data sample %d from the GL output did not validate!\n", + (int)i); + log_error("\t Input: %s\n", + GetDataVectorString(inP, get_explicit_type_size(vecType), + 1, scratch)); + log_error("\tExpected: %s\n", + GetDataVectorString(&expectedGLValue, + get_explicit_type_size(vecType), 1, + scratch)); + log_error("\t Actual: %s\n", + GetDataVectorString(glP, get_explicit_type_size(vecType), + 1, scratch)); error = -1; } - if( error ) - return error; + if (error) return error; - inP += get_explicit_type_size( vecType ); - glP += get_explicit_type_size( vecType ); - clP += get_explicit_type_size( vecType ); + inP += get_explicit_type_size(vecType); + glP += get_explicit_type_size(vecType); + clP += get_explicit_type_size(vecType); } - for(i=0;i<3;i++) + for (i = 0; i < 3; i++) { - clReleaseMemObject(streams[i]); - streams[i] = NULL; + streams[i].reset(); } - glDeleteBuffers(1, &inGLBuffer); inGLBuffer = 0; - glDeleteBuffers(1, &outGLBuffer); outGLBuffer = 0; + glDeleteBuffers(1, &inGLBuffer); + inGLBuffer = 0; + glDeleteBuffers(1, &outGLBuffer); + outGLBuffer = 0; return 0; } -int test_buffers( cl_device_id device, cl_context context, cl_command_queue queue, int numElements ) +int test_buffers(cl_device_id device, cl_context context, + cl_command_queue queue, int numElements) { - ExplicitType vecType[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kFloat, kNumExplicitTypes }; + ExplicitType vecType[] = { + kChar, kUChar, kShort, kUShort, kInt, + kUInt, kLong, kULong, kFloat, kNumExplicitTypes + }; unsigned int vecSizes[] = { 1, 2, 4, 8, 16, 0 }; unsigned int index, typeIndex; int retVal = 0; RandomSeed seed(gRandomSeed); - for( typeIndex = 0; vecType[ typeIndex ] != kNumExplicitTypes; typeIndex++ ) + for (typeIndex = 0; vecType[typeIndex] != kNumExplicitTypes; typeIndex++) { - for( index = 0; vecSizes[ index ] != 0; index++ ) + for (index = 0; vecSizes[index] != 0; index++) { // Test! - if( test_buffer_kernel( context, queue, vecType[ typeIndex ], vecSizes[ index ], numElements, 0, seed) != 0 ) + if (test_buffer_kernel(context, queue, vecType[typeIndex], + vecSizes[index], numElements, 0, seed) + != 0) { - char sizeNames[][ 4 ] = { "", "", "2", "", "4", "", "", "", "8", "", "", "", "", "", "", "", "16" }; - log_error( " Buffer test %s%s FAILED\n", get_explicit_type_name( vecType[ typeIndex ] ), sizeNames[ vecSizes[ index ] ] ); + char sizeNames[][4] = { "", "", "2", "", "4", "", "", "", "8", + "", "", "", "", "", "", "", "16" }; + log_error(" Buffer test %s%s FAILED\n", + get_explicit_type_name(vecType[typeIndex]), + sizeNames[vecSizes[index]]); retVal++; } } } return retVal; - } -int test_buffers_getinfo( cl_device_id device, cl_context context, cl_command_queue queue, int numElements ) +int test_buffers_getinfo(cl_device_id device, cl_context context, + cl_command_queue queue, int numElements) { - ExplicitType vecType[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kFloat, kNumExplicitTypes }; + ExplicitType vecType[] = { + kChar, kUChar, kShort, kUShort, kInt, + kUInt, kLong, kULong, kFloat, kNumExplicitTypes + }; unsigned int vecSizes[] = { 1, 2, 4, 8, 16, 0 }; unsigned int index, typeIndex; int retVal = 0; - RandomSeed seed( gRandomSeed ); + RandomSeed seed(gRandomSeed); - for( typeIndex = 0; vecType[ typeIndex ] != kNumExplicitTypes; typeIndex++ ) + for (typeIndex = 0; vecType[typeIndex] != kNumExplicitTypes; typeIndex++) { - for( index = 0; vecSizes[ index ] != 0; index++ ) + for (index = 0; vecSizes[index] != 0; index++) { // Test! - if( test_buffer_kernel( context, queue, vecType[ typeIndex ], vecSizes[ index ], numElements, 1, seed ) != 0 ) + if (test_buffer_kernel(context, queue, vecType[typeIndex], + vecSizes[index], numElements, 1, seed) + != 0) { - char sizeNames[][ 4 ] = { "", "", "2", "", "4", "", "", "", "8", "", "", "", "", "", "", "", "16" }; - log_error( " Buffer test %s%s FAILED\n", get_explicit_type_name( vecType[ typeIndex ] ), sizeNames[ vecSizes[ index ] ] ); + char sizeNames[][4] = { "", "", "2", "", "4", "", "", "", "8", + "", "", "", "", "", "", "", "16" }; + log_error(" Buffer test %s%s FAILED\n", + get_explicit_type_name(vecType[typeIndex]), + sizeNames[vecSizes[index]]); retVal++; } } } return retVal; - } - - - diff --git a/test_conformance/gl/test_fence_sync.cpp b/test_conformance/gl/test_fence_sync.cpp index 00bf2cc9..35cc62de 100644 --- a/test_conformance/gl/test_fence_sync.cpp +++ b/test_conformance/gl/test_fence_sync.cpp @@ -1,6 +1,6 @@ // // Copyright (c) 2017 The Khronos Group Inc. -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -17,7 +17,7 @@ #include "gl/setup.h" #include "harness/genericThread.h" -#if defined( __APPLE__ ) +#if defined(__APPLE__) #include #else #include @@ -40,112 +40,121 @@ typedef struct __GLsync *GLsync; #define APIENTRY #endif -typedef GLsync (APIENTRY *glFenceSyncPtr)(GLenum condition,GLbitfield flags); +typedef GLsync(APIENTRY *glFenceSyncPtr)(GLenum condition, GLbitfield flags); glFenceSyncPtr glFenceSyncFunc; -typedef bool (APIENTRY *glIsSyncPtr)(GLsync sync); +typedef bool(APIENTRY *glIsSyncPtr)(GLsync sync); glIsSyncPtr glIsSyncFunc; -typedef void (APIENTRY *glDeleteSyncPtr)(GLsync sync); +typedef void(APIENTRY *glDeleteSyncPtr)(GLsync sync); glDeleteSyncPtr glDeleteSyncFunc; -typedef GLenum (APIENTRY *glClientWaitSyncPtr)(GLsync sync,GLbitfield flags,GLuint64 timeout); +typedef GLenum(APIENTRY *glClientWaitSyncPtr)(GLsync sync, GLbitfield flags, + GLuint64 timeout); glClientWaitSyncPtr glClientWaitSyncFunc; -typedef void (APIENTRY *glWaitSyncPtr)(GLsync sync,GLbitfield flags,GLuint64 timeout); +typedef void(APIENTRY *glWaitSyncPtr)(GLsync sync, GLbitfield flags, + GLuint64 timeout); glWaitSyncPtr glWaitSyncFunc; -typedef void (APIENTRY *glGetInteger64vPtr)(GLenum pname, GLint64 *params); +typedef void(APIENTRY *glGetInteger64vPtr)(GLenum pname, GLint64 *params); glGetInteger64vPtr glGetInteger64vFunc; -typedef void (APIENTRY *glGetSyncivPtr)(GLsync sync,GLenum pname,GLsizei bufSize,GLsizei *length, - GLint *values); +typedef void(APIENTRY *glGetSyncivPtr)(GLsync sync, GLenum pname, + GLsizei bufSize, GLsizei *length, + GLint *values); glGetSyncivPtr glGetSyncivFunc; #define CHK_GL_ERR() printf("%s\n", gluErrorString(glGetError())) -static void InitSyncFns( void ) +static void InitSyncFns(void) { - glFenceSyncFunc = (glFenceSyncPtr)glutGetProcAddress( "glFenceSync" ); - glIsSyncFunc = (glIsSyncPtr)glutGetProcAddress( "glIsSync" ); - glDeleteSyncFunc = (glDeleteSyncPtr)glutGetProcAddress( "glDeleteSync" ); - glClientWaitSyncFunc = (glClientWaitSyncPtr)glutGetProcAddress( "glClientWaitSync" ); - glWaitSyncFunc = (glWaitSyncPtr)glutGetProcAddress( "glWaitSync" ); - glGetInteger64vFunc = (glGetInteger64vPtr)glutGetProcAddress( "glGetInteger64v" ); - glGetSyncivFunc = (glGetSyncivPtr)glutGetProcAddress( "glGetSynciv" ); + glFenceSyncFunc = (glFenceSyncPtr)glutGetProcAddress("glFenceSync"); + glIsSyncFunc = (glIsSyncPtr)glutGetProcAddress("glIsSync"); + glDeleteSyncFunc = (glDeleteSyncPtr)glutGetProcAddress("glDeleteSync"); + glClientWaitSyncFunc = + (glClientWaitSyncPtr)glutGetProcAddress("glClientWaitSync"); + glWaitSyncFunc = (glWaitSyncPtr)glutGetProcAddress("glWaitSync"); + glGetInteger64vFunc = + (glGetInteger64vPtr)glutGetProcAddress("glGetInteger64v"); + glGetSyncivFunc = (glGetSyncivPtr)glutGetProcAddress("glGetSynciv"); } #ifndef GL_ARB_sync -#define GL_MAX_SERVER_WAIT_TIMEOUT 0x9111 +#define GL_MAX_SERVER_WAIT_TIMEOUT 0x9111 -#define GL_OBJECT_TYPE 0x9112 -#define GL_SYNC_CONDITION 0x9113 -#define GL_SYNC_STATUS 0x9114 -#define GL_SYNC_FLAGS 0x9115 +#define GL_OBJECT_TYPE 0x9112 +#define GL_SYNC_CONDITION 0x9113 +#define GL_SYNC_STATUS 0x9114 +#define GL_SYNC_FLAGS 0x9115 -#define GL_SYNC_FENCE 0x9116 +#define GL_SYNC_FENCE 0x9116 -#define GL_SYNC_GPU_COMMANDS_COMPLETE 0x9117 +#define GL_SYNC_GPU_COMMANDS_COMPLETE 0x9117 -#define GL_UNSIGNALED 0x9118 -#define GL_SIGNALED 0x9119 +#define GL_UNSIGNALED 0x9118 +#define GL_SIGNALED 0x9119 -#define GL_SYNC_FLUSH_COMMANDS_BIT 0x00000001 +#define GL_SYNC_FLUSH_COMMANDS_BIT 0x00000001 -#define GL_TIMEOUT_IGNORED 0xFFFFFFFFFFFFFFFFull +#define GL_TIMEOUT_IGNORED 0xFFFFFFFFFFFFFFFFull -#define GL_ALREADY_SIGNALED 0x911A -#define GL_TIMEOUT_EXPIRED 0x911B -#define GL_CONDITION_SATISFIED 0x911C -#define GL_WAIT_FAILED 0x911D +#define GL_ALREADY_SIGNALED 0x911A +#define GL_TIMEOUT_EXPIRED 0x911B +#define GL_CONDITION_SATISFIED 0x911C +#define GL_WAIT_FAILED 0x911D #endif #define USING_ARB_sync 1 #endif -typedef cl_event (CL_API_CALL *clCreateEventFromGLsyncKHR_fn)( cl_context context, GLsync sync, cl_int *errCode_ret) ; +typedef cl_event(CL_API_CALL *clCreateEventFromGLsyncKHR_fn)( + cl_context context, GLsync sync, cl_int *errCode_ret); clCreateEventFromGLsyncKHR_fn clCreateEventFromGLsyncKHR_ptr; static const char *updateBuffersKernel[] = { - "__kernel void update( __global float4 * vertices, __global float4 *colors, int horizWrap, int rowIdx )\n" + "__kernel void update( __global float4 * vertices, __global float4 " + "*colors, int horizWrap, int rowIdx )\n" "{\n" " size_t tid = get_global_id(0);\n" "\n" " size_t xVal = ( tid & ( horizWrap - 1 ) );\n" " vertices[ tid * 2 + 0 ] = (float4)( xVal, rowIdx*16.f, 0.0f, 1.f );\n" - " vertices[ tid * 2 + 1 ] = (float4)( xVal, rowIdx*16.f + 4.0f, 0.0f, 1.f );\n" + " vertices[ tid * 2 + 1 ] = (float4)( xVal, rowIdx*16.f + 4.0f, 0.0f, " + "1.f );\n" "\n" " int rowV = rowIdx + 1;\n" - " colors[ tid * 2 + 0 ] = (float4)( ( rowV & 1 ) / 255.f, ( ( rowV & 2 ) >> 1 ) / 255.f, ( ( rowV & 4 ) >> 2 ) / 255.f, 1.f );\n" - " //colors[ tid * 2 + 0 ] = (float4)( (float)xVal/(float)horizWrap, 1.0f, 1.0f, 1.0f );\n" + " colors[ tid * 2 + 0 ] = (float4)( ( rowV & 1 ) / 255.f, ( ( rowV & 2 " + ") >> 1 ) / 255.f, ( ( rowV & 4 ) >> 2 ) / 255.f, 1.f );\n" + " //colors[ tid * 2 + 0 ] = (float4)( (float)xVal/(float)horizWrap, " + "1.0f, 1.0f, 1.0f );\n" " colors[ tid * 2 + 1 ] = colors[ tid * 2 + 0 ];\n" - "}\n" }; - -//Passthrough VertexShader -static const char *vertexshader = -"#version 150\n" -"uniform mat4 projMatrix;\n" -"in vec4 inPosition;\n" -"in vec4 inColor;\n" -"out vec4 vertColor;\n" -"void main (void) {\n" -" gl_Position = projMatrix*inPosition;\n" -" vertColor = inColor;\n" -"}\n"; - -//Passthrough FragmentShader -static const char *fragmentshader = -"#version 150\n" -"in vec4 vertColor;\n" -"out vec4 outColor;\n" -"void main (void) {\n" -" outColor = vertColor;\n" -"}\n"; + "}\n" +}; + +// Passthrough VertexShader +static const char *vertexshader = "#version 150\n" + "uniform mat4 projMatrix;\n" + "in vec4 inPosition;\n" + "in vec4 inColor;\n" + "out vec4 vertColor;\n" + "void main (void) {\n" + " gl_Position = projMatrix*inPosition;\n" + " vertColor = inColor;\n" + "}\n"; + +// Passthrough FragmentShader +static const char *fragmentshader = "#version 150\n" + "in vec4 vertColor;\n" + "out vec4 outColor;\n" + "void main (void) {\n" + " outColor = vertColor;\n" + "}\n"; GLuint createShaderProgram(GLint *posLoc, GLint *colLoc) { - GLint logLength, status; + GLint logLength, status; GLuint program = glCreateProgram(); GLuint vpShader; @@ -153,8 +162,9 @@ GLuint createShaderProgram(GLint *posLoc, GLint *colLoc) glShaderSource(vpShader, 1, (const GLchar **)&vertexshader, NULL); glCompileShader(vpShader); glGetShaderiv(vpShader, GL_INFO_LOG_LENGTH, &logLength); - if (logLength > 0) { - GLchar *log = (GLchar*) malloc(logLength); + if (logLength > 0) + { + GLchar *log = (GLchar *)malloc(logLength); glGetShaderInfoLog(vpShader, logLength, &logLength, log); log_info("Vtx Shader compile log:\n%s", log); free(log); @@ -175,8 +185,9 @@ GLuint createShaderProgram(GLint *posLoc, GLint *colLoc) glCompileShader(fpShader); glGetShaderiv(fpShader, GL_INFO_LOG_LENGTH, &logLength); - if (logLength > 0) { - GLchar *log = (GLchar*)malloc(logLength); + if (logLength > 0) + { + GLchar *log = (GLchar *)malloc(logLength); glGetShaderInfoLog(fpShader, logLength, &logLength, log); log_info("Frag Shader compile log:\n%s", log); free(log); @@ -192,8 +203,9 @@ GLuint createShaderProgram(GLint *posLoc, GLint *colLoc) glLinkProgram(program); glGetProgramiv(program, GL_INFO_LOG_LENGTH, &logLength); - if (logLength > 0) { - GLchar *log = (GLchar*)malloc(logLength); + if (logLength > 0) + { + GLchar *log = (GLchar *)malloc(logLength); glGetProgramInfoLog(program, logLength, &logLength, log); log_info("Program link log:\n%s", log); free(log); @@ -219,7 +231,7 @@ void destroyShaderProgram(GLuint program) glUseProgram(0); glGetAttachedShaders(program, 2, &count, shaders); int i; - for(i = 0; i < count; i++) + for (i = 0; i < count; i++) { glDetachShader(program, shaders[i]); glDeleteShader(shaders[i]); @@ -227,44 +239,49 @@ void destroyShaderProgram(GLuint program) glDeleteProgram(program); } -// This function queues up and runs the above CL kernel that writes the vertex data -cl_int run_cl_kernel( cl_kernel kernel, cl_command_queue queue, cl_mem stream0, cl_mem stream1, - cl_int rowIdx, cl_event fenceEvent, size_t numThreads ) +// This function queues up and runs the above CL kernel that writes the vertex +// data +cl_int run_cl_kernel(cl_kernel kernel, cl_command_queue queue, cl_mem stream0, + cl_mem stream1, cl_int rowIdx, cl_event fenceEvent, + size_t numThreads) { - cl_int error = clSetKernelArg( kernel, 3, sizeof( rowIdx ), &rowIdx ); - test_error( error, "Unable to set kernel arguments" ); + cl_int error = clSetKernelArg(kernel, 3, sizeof(rowIdx), &rowIdx); + test_error(error, "Unable to set kernel arguments"); clEventWrapper acqEvent1, acqEvent2, kernEvent, relEvent1, relEvent2; - int numEvents = ( fenceEvent != NULL ) ? 1 : 0; - cl_event *fence_evt = ( fenceEvent != NULL ) ? &fenceEvent : NULL; + int numEvents = (fenceEvent != NULL) ? 1 : 0; + cl_event *fence_evt = (fenceEvent != NULL) ? &fenceEvent : NULL; - error = (*clEnqueueAcquireGLObjects_ptr)( queue, 1, &stream0, numEvents, fence_evt, &acqEvent1 ); - test_error( error, "Unable to acquire GL obejcts"); - error = (*clEnqueueAcquireGLObjects_ptr)( queue, 1, &stream1, numEvents, fence_evt, &acqEvent2 ); - test_error( error, "Unable to acquire GL obejcts"); + error = (*clEnqueueAcquireGLObjects_ptr)(queue, 1, &stream0, numEvents, + fence_evt, &acqEvent1); + test_error(error, "Unable to acquire GL obejcts"); + error = (*clEnqueueAcquireGLObjects_ptr)(queue, 1, &stream1, numEvents, + fence_evt, &acqEvent2); + test_error(error, "Unable to acquire GL obejcts"); - cl_event evts[ 2 ] = { acqEvent1, acqEvent2 }; + cl_event evts[2] = { acqEvent1, acqEvent2 }; - error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, &numThreads, NULL, 2, evts, &kernEvent ); - test_error( error, "Unable to execute test kernel" ); + error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &numThreads, NULL, 2, + evts, &kernEvent); + test_error(error, "Unable to execute test kernel"); - error = (*clEnqueueReleaseGLObjects_ptr)( queue, 1, &stream0, 1, &kernEvent, &relEvent1 ); + error = (*clEnqueueReleaseGLObjects_ptr)(queue, 1, &stream0, 1, &kernEvent, + &relEvent1); test_error(error, "clEnqueueReleaseGLObjects failed"); - error = (*clEnqueueReleaseGLObjects_ptr)( queue, 1, &stream1, 1, &kernEvent, &relEvent2 ); + error = (*clEnqueueReleaseGLObjects_ptr)(queue, 1, &stream1, 1, &kernEvent, + &relEvent2); test_error(error, "clEnqueueReleaseGLObjects failed"); - evts[ 0 ] = relEvent1; - evts[ 1 ] = relEvent2; - error = clWaitForEvents( 2, evts ); - test_error( error, "Unable to wait for release events" ); + evts[0] = relEvent1; + evts[1] = relEvent2; + error = clWaitForEvents(2, evts); + test_error(error, "Unable to wait for release events"); return 0; } -class RunThread : public genericThread -{ +class RunThread : public genericThread { public: - cl_kernel mKernel; cl_command_queue mQueue; cl_mem mStream0, mStream1; @@ -272,34 +289,40 @@ public: cl_event mFenceEvent; size_t mNumThreads; - RunThread( cl_kernel kernel, cl_command_queue queue, cl_mem stream0, cl_mem stream1, size_t numThreads ) - : mKernel( kernel ), mQueue( queue ), mStream0( stream0 ), mStream1( stream1 ), mNumThreads( numThreads ) - { - } + RunThread(cl_kernel kernel, cl_command_queue queue, cl_mem stream0, + cl_mem stream1, size_t numThreads) + : mKernel(kernel), mQueue(queue), mStream0(stream0), mStream1(stream1), + mNumThreads(numThreads) + {} - void SetRunData( cl_int rowIdx, cl_event fenceEvent ) + void SetRunData(cl_int rowIdx, cl_event fenceEvent) { mRowIdx = rowIdx; mFenceEvent = fenceEvent; } - virtual void * IRun( void ) + virtual void *IRun(void) { - cl_int error = run_cl_kernel( mKernel, mQueue, mStream0, mStream1, mRowIdx, mFenceEvent, mNumThreads ); + cl_int error = run_cl_kernel(mKernel, mQueue, mStream0, mStream1, + mRowIdx, mFenceEvent, mNumThreads); return (void *)(uintptr_t)error; } }; -int test_fence_sync_single( cl_device_id device, cl_context context, cl_command_queue queue, bool separateThreads, GLint rend_vs, GLint read_vs, cl_device_id rend_device ) +int test_fence_sync_single(cl_device_id device, cl_context context, + cl_command_queue queue, bool separateThreads, + GLint rend_vs, GLint read_vs, + cl_device_id rend_device) { int error; const int framebufferSize = 512; - if( !is_extension_available( device, "cl_khr_gl_event" ) ) + if (!is_extension_available(device, "cl_khr_gl_event")) { - log_info( "NOTE: cl_khr_gl_event extension not present on this device; skipping fence sync test\n" ); + log_info("NOTE: cl_khr_gl_event extension not present on this device; " + "skipping fence sync test\n"); return 0; } @@ -312,10 +335,11 @@ int test_fence_sync_single( cl_device_id device, cl_context context, cl_command_ clGetPlatformIDs(0, NULL, &nplatforms); clGetPlatformIDs(1, &platform, NULL); - if (nplatforms > 1) { + if (nplatforms > 1) + { log_info("clGetPlatformIDs returned multiple values. This is not " - "an error, but might result in obtaining incorrect function " - "pointers if you do not want the first returned platform.\n"); + "an error, but might result in obtaining incorrect function " + "pointers if you do not want the first returned platform.\n"); // Show them the platform name, in case it is a problem. @@ -323,28 +347,35 @@ int test_fence_sync_single( cl_device_id device, cl_context context, cl_command_ char *name; clGetPlatformInfo(platform, CL_PLATFORM_NAME, 0, NULL, &size); - name = (char*)malloc(size); + name = (char *)malloc(size); clGetPlatformInfo(platform, CL_PLATFORM_NAME, size, name, NULL); log_info("Using platform with name: %s \n", name); free(name); } - clCreateEventFromGLsyncKHR_ptr = (clCreateEventFromGLsyncKHR_fn)clGetExtensionFunctionAddressForPlatform(platform, "clCreateEventFromGLsyncKHR"); - if( clCreateEventFromGLsyncKHR_ptr == NULL ) + clCreateEventFromGLsyncKHR_ptr = + (clCreateEventFromGLsyncKHR_fn)clGetExtensionFunctionAddressForPlatform( + platform, "clCreateEventFromGLsyncKHR"); + if (clCreateEventFromGLsyncKHR_ptr == NULL) { - log_error( "ERROR: Unable to run fence_sync test (clCreateEventFromGLsyncKHR function not discovered!)\n" ); - clCreateEventFromGLsyncKHR_ptr = (clCreateEventFromGLsyncKHR_fn)clGetExtensionFunctionAddressForPlatform(platform, "clCreateEventFromGLsyncAPPLE"); + log_error("ERROR: Unable to run fence_sync test " + "(clCreateEventFromGLsyncKHR function not discovered!)\n"); + clCreateEventFromGLsyncKHR_ptr = (clCreateEventFromGLsyncKHR_fn) + clGetExtensionFunctionAddressForPlatform( + platform, "clCreateEventFromGLsyncAPPLE"); return -1; } #ifdef USING_ARB_sync - char *gl_version_str = (char*)glGetString( GL_VERSION ); + char *gl_version_str = (char *)glGetString(GL_VERSION); float glCoreVersion; sscanf(gl_version_str, "%f", &glCoreVersion); - if( glCoreVersion < 3.0f ) + if (glCoreVersion < 3.0f) { - log_info( "OpenGL version %f does not support fence/sync! Skipping test.\n", glCoreVersion ); + log_info( + "OpenGL version %f does not support fence/sync! Skipping test.\n", + glCoreVersion); return 0; } @@ -354,10 +385,13 @@ int test_fence_sync_single( cl_device_id device, cl_context context, cl_command_ GLint val, screen; CGLGetVirtualScreen(currCtx, &screen); CGLDescribePixelFormat(pixFmt, screen, kCGLPFAOpenGLProfile, &val); - if(val != kCGLOGLPVersion_3_2_Core) + if (val != kCGLOGLPVersion_3_2_Core) { - log_error( "OpenGL context was not created with OpenGL version >= 3.0 profile even though platform supports it" - "OpenGL profile %f does not support fence/sync! Skipping test.\n", glCoreVersion ); + log_error( + "OpenGL context was not created with OpenGL version >= 3.0 profile " + "even though platform supports it" + "OpenGL profile %f does not support fence/sync! Skipping test.\n", + glCoreVersion); return -1; } #else @@ -365,7 +399,7 @@ int test_fence_sync_single( cl_device_id device, cl_context context, cl_command_ HDC hdc = wglGetCurrentDC(); HGLRC hglrc = wglGetCurrentContext(); #else - Display* dpy = glXGetCurrentDisplay(); + Display *dpy = glXGetCurrentDisplay(); GLXDrawable drawable = glXGetCurrentDrawable(); GLXContext ctx = glXGetCurrentContext(); #endif @@ -386,51 +420,66 @@ int test_fence_sync_single( cl_device_id device, cl_context context, cl_command_ GLint posLoc, colLoc; GLuint shaderprogram = createShaderProgram(&posLoc, &colLoc); - if(!shaderprogram) + if (!shaderprogram) { log_error("Failed to create shader program\n"); return -1; } - float l = 0.0f; float r = framebufferSize; - float b = 0.0f; float t = framebufferSize; - - float projMatrix[16] = { 2.0f/(r-l), 0.0f, 0.0f, 0.0f, - 0.0f, 2.0f/(t-b), 0.0f, 0.0f, - 0.0f, 0.0f, -1.0f, 0.0f, - -(r+l)/(r-l), -(t+b)/(t-b), 0.0f, 1.0f - }; + float l = 0.0f; + float r = framebufferSize; + float b = 0.0f; + float t = framebufferSize; + + float projMatrix[16] = { 2.0f / (r - l), + 0.0f, + 0.0f, + 0.0f, + 0.0f, + 2.0f / (t - b), + 0.0f, + 0.0f, + 0.0f, + 0.0f, + -1.0f, + 0.0f, + -(r + l) / (r - l), + -(t + b) / (t - b), + 0.0f, + 1.0f }; glUseProgram(shaderprogram); GLuint projMatLoc = glGetUniformLocation(shaderprogram, "projMatrix"); glUniformMatrix4fv(projMatLoc, 1, 0, projMatrix); glUseProgram(0); - // Note: the framebuffer is just the target to verify our results against, so we don't - // really care to go through all the possible formats in this case + // Note: the framebuffer is just the target to verify our results against, + // so we don't really care to go through all the possible formats in this + // case glFramebufferWrapper glFramebuffer; glRenderbufferWrapper glRenderbuffer; - error = CreateGLRenderbufferRaw( framebufferSize, 128, GL_COLOR_ATTACHMENT0_EXT, - GL_RGBA, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, - &glFramebuffer, &glRenderbuffer ); - if( error != 0 ) - return error; + error = CreateGLRenderbufferRaw( + framebufferSize, 128, GL_COLOR_ATTACHMENT0_EXT, GL_RGBA, GL_RGBA, + GL_UNSIGNED_INT_8_8_8_8_REV, &glFramebuffer, &glRenderbuffer); + if (error != 0) return error; GLuint vao; glGenVertexArrays(1, &vao); glBindVertexArray(vao); glBufferWrapper vtxBuffer, colorBuffer; - glGenBuffers( 1, &vtxBuffer ); - glGenBuffers( 1, &colorBuffer ); + glGenBuffers(1, &vtxBuffer); + glGenBuffers(1, &colorBuffer); - const int numHorizVertices = ( framebufferSize * 64 ) + 1; + const int numHorizVertices = (framebufferSize * 64) + 1; - glBindBuffer( GL_ARRAY_BUFFER, vtxBuffer ); - glBufferData( GL_ARRAY_BUFFER, sizeof( GLfloat ) * numHorizVertices * 2 * 4, NULL, GL_STATIC_DRAW ); + glBindBuffer(GL_ARRAY_BUFFER, vtxBuffer); + glBufferData(GL_ARRAY_BUFFER, sizeof(GLfloat) * numHorizVertices * 2 * 4, + NULL, GL_STATIC_DRAW); - glBindBuffer( GL_ARRAY_BUFFER, colorBuffer ); - glBufferData( GL_ARRAY_BUFFER, sizeof( GLfloat ) * numHorizVertices * 2 * 4, NULL, GL_STATIC_DRAW ); + glBindBuffer(GL_ARRAY_BUFFER, colorBuffer); + glBufferData(GL_ARRAY_BUFFER, sizeof(GLfloat) * numHorizVertices * 2 * 4, + NULL, GL_STATIC_DRAW); // Now that the requisite objects are bound, we can attempt program // validation: @@ -439,8 +488,9 @@ int test_fence_sync_single( cl_device_id device, cl_context context, cl_command_ GLint logLength, status; glGetProgramiv(shaderprogram, GL_INFO_LOG_LENGTH, &logLength); - if (logLength > 0) { - GLchar *log = (GLchar*)malloc(logLength); + if (logLength > 0) + { + GLchar *log = (GLchar *)malloc(logLength); glGetProgramInfoLog(shaderprogram, logLength, &logLength, log); log_info("Program validate log:\n%s", log); free(log); @@ -455,125 +505,131 @@ int test_fence_sync_single( cl_device_id device, cl_context context, cl_command_ clProgramWrapper program; clKernelWrapper kernel; - clMemWrapper streams[ 2 ]; + clMemWrapper streams[2]; - if( create_single_kernel_helper( context, &program, &kernel, 1, updateBuffersKernel, "update" ) ) + if (create_single_kernel_helper(context, &program, &kernel, 1, + updateBuffersKernel, "update")) return -1; - streams[ 0 ] = (*clCreateFromGLBuffer_ptr)( context, CL_MEM_READ_WRITE, vtxBuffer, &error ); - test_error( error, "Unable to create CL buffer from GL vertex buffer" ); + streams[0] = (*clCreateFromGLBuffer_ptr)(context, CL_MEM_READ_WRITE, + vtxBuffer, &error); + test_error(error, "Unable to create CL buffer from GL vertex buffer"); - streams[ 1 ] = (*clCreateFromGLBuffer_ptr)( context, CL_MEM_READ_WRITE, colorBuffer, &error ); - test_error( error, "Unable to create CL buffer from GL color buffer" ); + streams[1] = (*clCreateFromGLBuffer_ptr)(context, CL_MEM_READ_WRITE, + colorBuffer, &error); + test_error(error, "Unable to create CL buffer from GL color buffer"); - error = clSetKernelArg( kernel, 0, sizeof( streams[ 0 ] ), &streams[ 0 ] ); - test_error( error, "Unable to set kernel arguments" ); + error = clSetKernelArg(kernel, 0, sizeof(streams[0]), &streams[0]); + test_error(error, "Unable to set kernel arguments"); - error = clSetKernelArg( kernel, 1, sizeof( streams[ 1 ] ), &streams[ 1 ] ); - test_error( error, "Unable to set kernel arguments" ); + error = clSetKernelArg(kernel, 1, sizeof(streams[1]), &streams[1]); + test_error(error, "Unable to set kernel arguments"); cl_int horizWrap = (cl_int)framebufferSize; - error = clSetKernelArg( kernel, 2, sizeof( horizWrap ), &horizWrap ); - test_error( error, "Unable to set kernel arguments" ); + error = clSetKernelArg(kernel, 2, sizeof(horizWrap), &horizWrap); + test_error(error, "Unable to set kernel arguments"); - glViewport( 0, 0, framebufferSize, framebufferSize ); - glClearColor( 0, 0, 0, 0 ); - glClear( GL_COLOR_BUFFER_BIT ); - glClear( GL_DEPTH_BUFFER_BIT ); - glDisable( GL_DEPTH_TEST ); - glEnable( GL_BLEND ); - glBlendFunc( GL_ONE, GL_ONE ); + glViewport(0, 0, framebufferSize, framebufferSize); + glClearColor(0, 0, 0, 0); + glClear(GL_COLOR_BUFFER_BIT); + glClear(GL_DEPTH_BUFFER_BIT); + glDisable(GL_DEPTH_TEST); + glEnable(GL_BLEND); + glBlendFunc(GL_ONE, GL_ONE); clEventWrapper fenceEvent; GLsync glFence = 0; // Do a loop through 8 different horizontal stripes against the framebuffer - RunThread thread( kernel, queue, streams[ 0 ], streams[ 1 ], (size_t)numHorizVertices ); + RunThread thread(kernel, queue, streams[0], streams[1], + (size_t)numHorizVertices); - for( int i = 0; i < 8; i++ ) + for (int i = 0; i < 8; i++) { // if current rendering device is not the compute device and // separateThreads == false which means compute is going on same // thread and we are using implicit synchronization (no GLSync obj used) - // then glFlush by clEnqueueAcquireGLObject is not sufficient ... we need - // to wait for rendering to finish on other device before CL can start - // writing to CL/GL shared mem objects. When separateThreads is true i.e. - // we are using GLSync obj to synchronize then we dont need to call glFinish - // here since CL should wait for rendering on other device before this - // GLSync object to finish before it starts writing to shared mem object. - // Also rend_device == compute_device no need to call glFinish - if(rend_device != device && !separateThreads) - glFinish(); - - if( separateThreads ) + // then glFlush by clEnqueueAcquireGLObject is not sufficient ... we + // need to wait for rendering to finish on other device before CL can + // start writing to CL/GL shared mem objects. When separateThreads is + // true i.e. we are using GLSync obj to synchronize then we dont need to + // call glFinish here since CL should wait for rendering on other device + // before this GLSync object to finish before it starts writing to + // shared mem object. Also rend_device == compute_device no need to call + // glFinish + if (rend_device != device && !separateThreads) glFinish(); + + if (separateThreads) { - if (fenceEvent != NULL) - { - clReleaseEvent(fenceEvent); - glDeleteSyncFunc(glFence); - } + glDeleteSyncFunc(glFence); glFence = glFenceSyncFunc(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); - fenceEvent = clCreateEventFromGLsyncKHR_ptr(context, glFence, &error); + fenceEvent = + clCreateEventFromGLsyncKHR_ptr(context, glFence, &error); test_error(error, "Unable to create CL event from GL fence"); - // in case of explicit synchronization, we just wait for the sync object to complete - // in clEnqueueAcquireGLObject but we dont flush. Its application's responsibility - // to flush on the context on which glSync is created + // in case of explicit synchronization, we just wait for the sync + // object to complete in clEnqueueAcquireGLObject but we dont flush. + // Its application's responsibility to flush on the context on which + // glSync is created glFlush(); - thread.SetRunData( (cl_int)i, fenceEvent ); + thread.SetRunData((cl_int)i, fenceEvent); thread.Start(); error = (cl_int)(size_t)thread.Join(); } else { - error = run_cl_kernel( kernel, queue, streams[ 0 ], streams[ 1 ], (cl_int)i, fenceEvent, (size_t)numHorizVertices ); + error = + run_cl_kernel(kernel, queue, streams[0], streams[1], (cl_int)i, + fenceEvent, (size_t)numHorizVertices); } - test_error( error, "Unable to run CL kernel" ); + test_error(error, "Unable to run CL kernel"); glUseProgram(shaderprogram); glEnableVertexAttribArray(posLoc); glEnableVertexAttribArray(colLoc); - glBindBuffer( GL_ARRAY_BUFFER, vtxBuffer ); - glVertexAttribPointer(posLoc, 4, GL_FLOAT, GL_FALSE, 4*sizeof(GLfloat), 0); - glBindBuffer( GL_ARRAY_BUFFER, colorBuffer ); - glVertexAttribPointer(colLoc, 4, GL_FLOAT, GL_FALSE, 4*sizeof(GLfloat), 0); - glBindBuffer( GL_ARRAY_BUFFER, 0 ); + glBindBuffer(GL_ARRAY_BUFFER, vtxBuffer); + glVertexAttribPointer(posLoc, 4, GL_FLOAT, GL_FALSE, + 4 * sizeof(GLfloat), 0); + glBindBuffer(GL_ARRAY_BUFFER, colorBuffer); + glVertexAttribPointer(colLoc, 4, GL_FLOAT, GL_FALSE, + 4 * sizeof(GLfloat), 0); + glBindBuffer(GL_ARRAY_BUFFER, 0); - glDrawArrays( GL_TRIANGLE_STRIP, 0, numHorizVertices * 2 ); + glDrawArrays(GL_TRIANGLE_STRIP, 0, numHorizVertices * 2); glDisableVertexAttribArray(posLoc); glDisableVertexAttribArray(colLoc); glUseProgram(0); - if( separateThreads ) + if (separateThreads) { - // If we're on the same thread, then we're testing implicit syncing, so we - // don't need the actual fence code - if( fenceEvent != NULL ) - { - clReleaseEvent( fenceEvent ); - glDeleteSyncFunc( glFence ); - } + // If we're on the same thread, then we're testing implicit syncing, + // so we don't need the actual fence code + glDeleteSyncFunc(glFence); + - glFence = glFenceSyncFunc( GL_SYNC_GPU_COMMANDS_COMPLETE, 0 ); - fenceEvent = clCreateEventFromGLsyncKHR_ptr( context, glFence, &error ); - test_error( error, "Unable to create CL event from GL fence" ); + glFence = glFenceSyncFunc(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); + fenceEvent = + clCreateEventFromGLsyncKHR_ptr(context, glFence, &error); + test_error(error, "Unable to create CL event from GL fence"); - // in case of explicit synchronization, we just wait for the sync object to complete - // in clEnqueueAcquireGLObject but we dont flush. Its application's responsibility - // to flush on the context on which glSync is created + // in case of explicit synchronization, we just wait for the sync + // object to complete in clEnqueueAcquireGLObject but we dont flush. + // Its application's responsibility to flush on the context on which + // glSync is created glFlush(); } else glFinish(); } - if( glFence != 0 ) - // Don't need the final release for fenceEvent, because the wrapper will take care of that - glDeleteSyncFunc( glFence ); + if (glFence != 0) + // Don't need the final release for fenceEvent, because the wrapper will + // take care of that + glDeleteSyncFunc(glFence); #ifdef __APPLE__ CGLSetVirtualScreen(CGLGetCurrentContext(), read_vs); @@ -585,54 +641,62 @@ int test_fence_sync_single( cl_device_id device, cl_context context, cl_command_ #endif #endif // Grab the contents of the final framebuffer - BufferOwningPtr resultData( ReadGLRenderbuffer( glFramebuffer, glRenderbuffer, - GL_COLOR_ATTACHMENT0_EXT, - GL_RGBA, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, kUChar, - framebufferSize, 128 ) ); - - // Check the contents now. We should end up with solid color bands 32 pixels high and the - // full width of the framebuffer, at values (128,128,128) due to the additive blending - for( int i = 0; i < 8; i++ ) + BufferOwningPtr resultData(ReadGLRenderbuffer( + glFramebuffer, glRenderbuffer, GL_COLOR_ATTACHMENT0_EXT, GL_RGBA, + GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, kUChar, framebufferSize, 128)); + + // Check the contents now. We should end up with solid color bands 32 pixels + // high and the full width of the framebuffer, at values (128,128,128) due + // to the additive blending + for (int i = 0; i < 8; i++) { - for( int y = 0; y < 4; y++ ) + for (int y = 0; y < 4; y++) { - // Note: coverage will be double because the 63-0 triangle overwrites again at the end of the pass - cl_uchar valA = ( ( ( i + 1 ) & 1 ) ) * numHorizVertices * 2 / framebufferSize; - cl_uchar valB = ( ( ( i + 1 ) & 2 ) >> 1 ) * numHorizVertices * 2 / framebufferSize; - cl_uchar valC = ( ( ( i + 1 ) & 4 ) >> 2 ) * numHorizVertices * 2 / framebufferSize; - - cl_uchar *row = (cl_uchar *)&resultData[ ( i * 16 + y ) * framebufferSize * 4 ]; - for( int x = 0; x < ( framebufferSize - 1 ) - 1; x++ ) + // Note: coverage will be double because the 63-0 triangle + // overwrites again at the end of the pass + cl_uchar valA = + (((i + 1) & 1)) * numHorizVertices * 2 / framebufferSize; + cl_uchar valB = + (((i + 1) & 2) >> 1) * numHorizVertices * 2 / framebufferSize; + cl_uchar valC = + (((i + 1) & 4) >> 2) * numHorizVertices * 2 / framebufferSize; + + cl_uchar *row = + (cl_uchar *)&resultData[(i * 16 + y) * framebufferSize * 4]; + for (int x = 0; x < (framebufferSize - 1) - 1; x++) { - if( ( row[ x * 4 ] != valA ) || ( row[ x * 4 + 1 ] != valB ) || - ( row[ x * 4 + 2 ] != valC ) ) + if ((row[x * 4] != valA) || (row[x * 4 + 1] != valB) + || (row[x * 4 + 2] != valC)) { - log_error( "ERROR: Output framebuffer did not validate!\n" ); - DumpGLBuffer( GL_UNSIGNED_BYTE, framebufferSize, 128, resultData ); - log_error( "RUNS:\n" ); + log_error("ERROR: Output framebuffer did not validate!\n"); + DumpGLBuffer(GL_UNSIGNED_BYTE, framebufferSize, 128, + resultData); + log_error("RUNS:\n"); uint32_t *p = (uint32_t *)(char *)resultData; size_t a = 0; - for( size_t t = 1; t < framebufferSize * framebufferSize; t++ ) + for (size_t t = 1; t < framebufferSize * framebufferSize; + t++) { - if( p[ a ] != 0 ) + if (p[a] != 0) { - if( p[ t ] == 0 ) + if (p[t] == 0) { - log_error( "RUN: %ld to %ld (%d,%d to %d,%d) 0x%08x\n", a, t - 1, - (int)( a % framebufferSize ), (int)( a / framebufferSize ), - (int)( ( t - 1 ) % framebufferSize ), (int)( ( t - 1 ) / framebufferSize ), - p[ a ] ); + log_error( + "RUN: %ld to %ld (%d,%d to %d,%d) 0x%08x\n", + a, t - 1, (int)(a % framebufferSize), + (int)(a / framebufferSize), + (int)((t - 1) % framebufferSize), + (int)((t - 1) / framebufferSize), p[a]); a = t; } } else { - if( p[ t ] != 0 ) + if (p[t] != 0) { a = t; } } - } return -1; } @@ -645,46 +709,56 @@ int test_fence_sync_single( cl_device_id device, cl_context context, cl_command_ return 0; } -int test_fence_sync( cl_device_id device, cl_context context, cl_command_queue queue, int numElements ) +int test_fence_sync(cl_device_id device, cl_context context, + cl_command_queue queue, int numElements) { GLint vs_count = 0; cl_device_id *device_list = NULL; - if( !is_extension_available( device, "cl_khr_gl_event" ) ) + if (!is_extension_available(device, "cl_khr_gl_event")) { - log_info( "NOTE: cl_khr_gl_event extension not present on this device; skipping fence sync test\n" ); + log_info("NOTE: cl_khr_gl_event extension not present on this device; " + "skipping fence sync test\n"); return 0; } #ifdef __APPLE__ CGLContextObj ctx = CGLGetCurrentContext(); CGLPixelFormatObj pix = CGLGetPixelFormat(ctx); - CGLError err = CGLDescribePixelFormat(pix, 0, kCGLPFAVirtualScreenCount, &vs_count); + CGLError err = + CGLDescribePixelFormat(pix, 0, kCGLPFAVirtualScreenCount, &vs_count); - device_list = (cl_device_id *) malloc(sizeof(cl_device_id)*vs_count); - clGetGLContextInfoAPPLE(context, ctx, CL_CGL_DEVICES_FOR_SUPPORTED_VIRTUAL_SCREENS_APPLE, sizeof(cl_device_id)*vs_count, device_list, NULL); + device_list = (cl_device_id *)malloc(sizeof(cl_device_id) * vs_count); + clGetGLContextInfoAPPLE(context, ctx, + CL_CGL_DEVICES_FOR_SUPPORTED_VIRTUAL_SCREENS_APPLE, + sizeof(cl_device_id) * vs_count, device_list, NULL); #else - // Need platform specific way of getting devices from CL context to which OpenGL can render - // If not available it can be replaced with clGetContextInfo with CL_CONTEXT_DEVICES + // Need platform specific way of getting devices from CL context to which + // OpenGL can render If not available it can be replaced with + // clGetContextInfo with CL_CONTEXT_DEVICES size_t device_cb; - cl_int err = clGetContextInfo( context, CL_CONTEXT_DEVICES, 0, NULL, &device_cb); - if( err != CL_SUCCESS ) + cl_int err = + clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL, &device_cb); + if (err != CL_SUCCESS) { - print_error( err, "Unable to get device count from context" ); - return -1; + print_error(err, "Unable to get device count from context"); + return -1; } vs_count = (GLint)device_cb / sizeof(cl_device_id); - if (vs_count < 1) { - log_error("No devices found.\n"); - return -1; + if (vs_count < 1) + { + log_error("No devices found.\n"); + return -1; } - device_list = (cl_device_id *) malloc(device_cb); - err = clGetContextInfo( context, CL_CONTEXT_DEVICES, device_cb, device_list, NULL); - if( err != CL_SUCCESS ) { - free(device_list); - print_error( err, "Unable to get device list from context" ); - return -1; + device_list = (cl_device_id *)malloc(device_cb); + err = clGetContextInfo(context, CL_CONTEXT_DEVICES, device_cb, device_list, + NULL); + if (err != CL_SUCCESS) + { + free(device_list); + print_error(err, "Unable to get device list from context"); + return -1; } #endif @@ -695,30 +769,38 @@ int test_fence_sync( cl_device_id device, cl_context context, cl_command_queue q // Loop through all the devices capable to OpenGL rendering // and set them as current rendering target - for(rend_vs = 0; rend_vs < vs_count; rend_vs++) + for (rend_vs = 0; rend_vs < vs_count; rend_vs++) { // Loop through all the devices and set them as current // compute target - for(read_vs = 0; read_vs < vs_count; read_vs++) + for (read_vs = 0; read_vs < vs_count; read_vs++) { - cl_device_id rend_device = device_list[rend_vs], read_device = device_list[read_vs]; + cl_device_id rend_device = device_list[rend_vs], + read_device = device_list[read_vs]; char rend_name[200], read_name[200]; - clGetDeviceInfo(rend_device, CL_DEVICE_NAME, sizeof(rend_name), rend_name, NULL); - clGetDeviceInfo(read_device, CL_DEVICE_NAME, sizeof(read_name), read_name, NULL); + clGetDeviceInfo(rend_device, CL_DEVICE_NAME, sizeof(rend_name), + rend_name, NULL); + clGetDeviceInfo(read_device, CL_DEVICE_NAME, sizeof(read_name), + read_name, NULL); - log_info("Rendering on: %s, read back on: %s\n", rend_name, read_name); - error = test_fence_sync_single( device, context, queue, false, rend_vs, read_vs, rend_device ); + log_info("Rendering on: %s, read back on: %s\n", rend_name, + read_name); + error = test_fence_sync_single(device, context, queue, false, + rend_vs, read_vs, rend_device); any_failed |= error; - if( error != 0 ) - log_error( "ERROR: Implicit syncing with GL sync events failed!\n\n" ); + if (error != 0) + log_error( + "ERROR: Implicit syncing with GL sync events failed!\n\n"); else log_info("Implicit syncing Passed\n"); - error = test_fence_sync_single( device, context, queue, true, rend_vs, read_vs, rend_device ); + error = test_fence_sync_single(device, context, queue, true, + rend_vs, read_vs, rend_device); any_failed |= error; - if( error != 0 ) - log_error( "ERROR: Explicit syncing with GL sync events failed!\n\n" ); + if (error != 0) + log_error( + "ERROR: Explicit syncing with GL sync events failed!\n\n"); else log_info("Explicit syncing Passed\n"); } -- cgit v1.2.3 From 79f692d8e59f37236c179ebbca086231d5f5c9bc Mon Sep 17 00:00:00 2001 From: Sreelakshmi Haridas Maruthur Date: Wed, 21 Jul 2021 01:51:29 -0600 Subject: subgroups: Fix setting cl_halfs and progress check. (#1278) * subgroups: Fix setting cl_halfs and progress check. cl_float testing uses set_value such that a generated cl_ulong of 1 is stored as 1.0F in a logical sense. However, cl_half values aren't intrinsic to C++ and generated cl_ulongs less than 1024 in particular are interpreted bitwise as subnormals. The test fails on compute devices lacking subnormal support. Perform the logical conversion to cl_half. Fix independent forward progress check. * subgroups_half: Address review comments * subgroups_half: Formatting fixes required by check-format * subgroups_half: Modified to query and use rounding mode supported by device Co-authored-by: spauls --- test_conformance/subgroups/main.cpp | 18 +++++++++++++++ .../subgroups/subgroup_common_templates.h | 2 +- test_conformance/subgroups/subhelpers.h | 3 ++- test_conformance/subgroups/test_ifp.cpp | 26 +++++++++++++--------- 4 files changed, 36 insertions(+), 13 deletions(-) diff --git a/test_conformance/subgroups/main.cpp b/test_conformance/subgroups/main.cpp index 44416dd7..ebe94558 100644 --- a/test_conformance/subgroups/main.cpp +++ b/test_conformance/subgroups/main.cpp @@ -19,8 +19,10 @@ #include #include "procs.h" #include "harness/testHarness.h" +#include "CL/cl_half.h" MTdata gMTdata; +cl_half_rounding_mode g_rounding_mode; test_definition test_list[] = { ADD_TEST_VERSION(sub_group_info_ext, Version(2, 0)), @@ -66,6 +68,22 @@ static test_status InitCL(cl_device_id device) ret = TEST_SKIP; } } + // Determine the rounding mode to be used in float to half conversions in + // init and reference code + const cl_device_fp_config fpConfig = get_default_rounding_mode(device); + + if (fpConfig == CL_FP_ROUND_TO_NEAREST) + { + g_rounding_mode = CL_HALF_RTE; + } + else if (fpConfig == CL_FP_ROUND_TO_ZERO && gIsEmbedded) + { + g_rounding_mode = CL_HALF_RTZ; + } + else + { + assert(false && "Unreachable"); + } return ret; } diff --git a/test_conformance/subgroups/subgroup_common_templates.h b/test_conformance/subgroups/subgroup_common_templates.h index b30c416b..4333e95b 100644 --- a/test_conformance/subgroups/subgroup_common_templates.h +++ b/test_conformance/subgroups/subgroup_common_templates.h @@ -301,7 +301,7 @@ static float to_float(subgroups::cl_half x) { return cl_half_to_float(x.data); } static subgroups::cl_half to_half(float x) { subgroups::cl_half value; - value.data = cl_half_from_float(x, CL_HALF_RTE); + value.data = cl_half_from_float(x, g_rounding_mode); return value; } diff --git a/test_conformance/subgroups/subhelpers.h b/test_conformance/subgroups/subhelpers.h index 93673b35..9232cded 100644 --- a/test_conformance/subgroups/subhelpers.h +++ b/test_conformance/subgroups/subhelpers.h @@ -28,6 +28,7 @@ #define NR_OF_ACTIVE_WORK_ITEMS 4 extern MTdata gMTdata; +extern cl_half_rounding_mode g_rounding_mode; struct WorkGroupParams { @@ -1080,7 +1081,7 @@ template typename std::enable_if::is_sb_scalar_type::value>::type set_value(Ty &lhs, const cl_ulong &rhs) { - lhs.data = rhs; + lhs.data = cl_half_from_float(static_cast(rhs), g_rounding_mode); } // compare for common vectors diff --git a/test_conformance/subgroups/test_ifp.cpp b/test_conformance/subgroups/test_ifp.cpp index 428f2cdc..fccaa8c7 100644 --- a/test_conformance/subgroups/test_ifp.cpp +++ b/test_conformance/subgroups/test_ifp.cpp @@ -360,17 +360,21 @@ int test_ifp_ext(cl_device_id device, cl_context context, } // ifp only in subgroup functions tests: test_status error; - error = checkIFPSupport(device, ifpSupport); - if (error != TEST_PASS) + auto device_cl_version = get_device_cl_version(device); + if (device_cl_version >= Version(2, 1)) { - return error; - } - if (ifpSupport == false) - { - log_info( - "Error reason: the extension cl_khr_subgroups requires that " - "Independed forward progress has to be supported by device.\n"); - return TEST_FAIL; + error = checkIFPSupport(device, ifpSupport); + if (error != TEST_PASS) + { + return error; + } + if (ifpSupport == false) + { + log_info( + "Error reason: the extension cl_khr_subgroups requires that " + "Independed forward progress has to be supported by device.\n"); + return TEST_FAIL; + } } return test_ifp(device, context, queue, num_elements, false); -} \ No newline at end of file +} -- cgit v1.2.3 From cc0b46e4570d936c38795a20c11315f13fa25c85 Mon Sep 17 00:00:00 2001 From: kalchr01 <83217667+kalchr01@users.noreply.github.com> Date: Mon, 9 Aug 2021 11:20:40 +0100 Subject: Add tests for entrypoint cl_khr_suggested_local_work_size (#1264) * Add tests for entrypoint cl_khr_suggested_local_work_size Tests added within test_conformance/workgroups. The tests cover several shapes (num dimensions) and sizes of global work size, kernels using local memory (dynamic and static) and present/non-present global work offset. Signed-off-by: Kallia Chronaki * Fix in comparison for error checking Signed-off-by: Kallia Chronaki * 'test_wg_suggested_local_work_size' fixes * Refactoring of 'test_wg_suggested_local_work_size' Modifications to reduce code duplication and minimize build time --- test_conformance/workgroups/CMakeLists.txt | 1 + test_conformance/workgroups/main.cpp | 33 +- test_conformance/workgroups/procs.h | 18 +- .../test_wg_suggested_local_work_size.cpp | 611 +++++++++++++++++++++ 4 files changed, 646 insertions(+), 17 deletions(-) create mode 100644 test_conformance/workgroups/test_wg_suggested_local_work_size.cpp diff --git a/test_conformance/workgroups/CMakeLists.txt b/test_conformance/workgroups/CMakeLists.txt index 08886086..c90bef88 100644 --- a/test_conformance/workgroups/CMakeLists.txt +++ b/test_conformance/workgroups/CMakeLists.txt @@ -14,6 +14,7 @@ set(${MODULE_NAME}_SOURCES test_wg_scan_inclusive_add.cpp test_wg_scan_inclusive_min.cpp test_wg_scan_inclusive_max.cpp + test_wg_suggested_local_work_size.cpp ) include(../CMakeCommon.txt) diff --git a/test_conformance/workgroups/main.cpp b/test_conformance/workgroups/main.cpp index 41ffa741..abb1145b 100644 --- a/test_conformance/workgroups/main.cpp +++ b/test_conformance/workgroups/main.cpp @@ -24,27 +24,30 @@ #endif test_definition test_list[] = { - ADD_TEST(work_group_all), - ADD_TEST(work_group_any), - ADD_TEST(work_group_reduce_add), - ADD_TEST(work_group_reduce_min), - ADD_TEST(work_group_reduce_max), - ADD_TEST(work_group_scan_inclusive_add), - ADD_TEST(work_group_scan_inclusive_min), - ADD_TEST(work_group_scan_inclusive_max), - ADD_TEST(work_group_scan_exclusive_add), - ADD_TEST(work_group_scan_exclusive_min), - ADD_TEST(work_group_scan_exclusive_max), - ADD_TEST(work_group_broadcast_1D), - ADD_TEST(work_group_broadcast_2D), - ADD_TEST(work_group_broadcast_3D), + ADD_TEST_VERSION(work_group_all, Version(2, 0)), + ADD_TEST_VERSION(work_group_any, Version(2, 0)), + ADD_TEST_VERSION(work_group_reduce_add, Version(2, 0)), + ADD_TEST_VERSION(work_group_reduce_min, Version(2, 0)), + ADD_TEST_VERSION(work_group_reduce_max, Version(2, 0)), + ADD_TEST_VERSION(work_group_scan_inclusive_add, Version(2, 0)), + ADD_TEST_VERSION(work_group_scan_inclusive_min, Version(2, 0)), + ADD_TEST_VERSION(work_group_scan_inclusive_max, Version(2, 0)), + ADD_TEST_VERSION(work_group_scan_exclusive_add, Version(2, 0)), + ADD_TEST_VERSION(work_group_scan_exclusive_min, Version(2, 0)), + ADD_TEST_VERSION(work_group_scan_exclusive_max, Version(2, 0)), + ADD_TEST_VERSION(work_group_broadcast_1D, Version(2, 0)), + ADD_TEST_VERSION(work_group_broadcast_2D, Version(2, 0)), + ADD_TEST_VERSION(work_group_broadcast_3D, Version(2, 0)), + ADD_TEST(work_group_suggested_local_size_1D), + ADD_TEST(work_group_suggested_local_size_2D), + ADD_TEST(work_group_suggested_local_size_3D) }; const int test_num = ARRAY_SIZE(test_list); test_status InitCL(cl_device_id device) { auto version = get_device_cl_version(device); - auto expected_min_version = Version(2, 0); + auto expected_min_version = Version(1, 2); if (version < expected_min_version) { version_expected_info("Test", "OpenCL", diff --git a/test_conformance/workgroups/procs.h b/test_conformance/workgroups/procs.h index 2e6e79e2..6143d525 100644 --- a/test_conformance/workgroups/procs.h +++ b/test_conformance/workgroups/procs.h @@ -1,6 +1,6 @@ // -// Copyright (c) 2017 The Khronos Group Inc. -// +// Copyright (c) 2017, 2021 The Khronos Group Inc. +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -16,6 +16,7 @@ #include "harness/testHarness.h" #include "harness/kernelHelpers.h" #include "harness/errorHelpers.h" +#include "harness/typeWrappers.h" #include "harness/conversions.h" #include "harness/mt19937.h" @@ -36,3 +37,16 @@ extern int test_work_group_scan_exclusive_max(cl_device_id deviceID, cl_context extern int test_work_group_scan_inclusive_add(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); extern int test_work_group_scan_inclusive_min(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); extern int test_work_group_scan_inclusive_max(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); + +extern int test_work_group_suggested_local_size_1D(cl_device_id device, + cl_context context, + cl_command_queue queue, + int n_elems); +extern int test_work_group_suggested_local_size_2D(cl_device_id device, + cl_context context, + cl_command_queue queue, + int n_elems); +extern int test_work_group_suggested_local_size_3D(cl_device_id device, + cl_context context, + cl_command_queue queue, + int n_elems); diff --git a/test_conformance/workgroups/test_wg_suggested_local_work_size.cpp b/test_conformance/workgroups/test_wg_suggested_local_work_size.cpp new file mode 100644 index 00000000..1dc1b39c --- /dev/null +++ b/test_conformance/workgroups/test_wg_suggested_local_work_size.cpp @@ -0,0 +1,611 @@ +// +// Copyright (c) 2021 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +#include "harness/compat.h" + +#include +#include +#include +#include +#include + +#include "procs.h" +#include + +/** @brief Gets the number of elements of type s in a fixed length array of s */ +#define NELEMS(s) (sizeof(s) / sizeof((s)[0])) +#define test_error_ret_and_free(errCode, msg, retValue, ptr) \ + { \ + auto errCodeResult = errCode; \ + if (errCodeResult != CL_SUCCESS) \ + { \ + print_error(errCodeResult, msg); \ + free(ptr); \ + return retValue; \ + } \ + } + +const char* wg_scan_local_work_group_size = R"( + bool is_zero_linear_id() + { + size_t linear_id; +#if __OPENCL_VERSION__ < CL_VERSION_2_0 + linear_id = ((get_global_id(2) – get_global_offset(2)) * get_global_size(1) * get_global_size(0)) + + ((get_global_id(1) – get_global_offset(1)) * get_global_size(0)) + + (get_global_id(0) – get_global_offset(0)); +#else + linear_id = get_global_linear_id(); +#endif + return linear_id == 0; + } + + uint get_l_size(size_t dim) + { +#if __OPENCL_VERSION__ < CL_VERSION_2_0 + return get_local_size(dim); +#else + return get_enqueued_local_size(dim); +#endif + } + + __kernel void test_wg_scan_local_work_group_size(global uint *output) + { + if(!is_zero_linear_id()) return; + for (uint i = 0; i < 3; i++) + { + output[i] = get_l_size(i); + } + } + __kernel void test_wg_scan_local_work_group_size_static_local( + global uint *output) + { + __local char c[LOCAL_MEM_SIZE]; + + if(!is_zero_linear_id()) return; + for (uint i = 0; i < 3; i++) + { + output[i] = get_l_size(i); + } + } + __kernel void test_wg_scan_local_work_group_size_dynlocal( + global uint *output, + __local char * c) + { + if(!is_zero_linear_id()) return; + for (uint i = 0; i < 3; i++) + { + output[i] = get_l_size(i); + } + };)"; + +bool is_prime(size_t a) +{ + size_t c; + + for (c = 2; c < a; c++) + { + if (a % c == 0) return false; + } + return true; +} + +bool is_not_prime(size_t a) { return !is_prime(a); } + +bool is_not_even(size_t a) { return (is_prime(a) || (a % 2 == 1)); } + +bool is_not_odd(size_t a) { return (is_prime(a) || (a % 2 == 0)); } + +#define NELEMS(s) (sizeof(s) / sizeof((s)[0])) +/* The numbers we chose in the value_range are to be used for the second and + third dimension of the global work group size. The numbers below cover many + different cases: 1024 is a power of 2, 3 is an odd and small prime number, 12 + is a multiple of 4 but not a power of 2, 1031 is a large odd and prime number + and 1 is to test the lack of this dimension if the others are present */ +const size_t value_range[] = { 1024, 3, 12, 1031, 1 }; +/* The value_range_nD contains numbers to be used for the experiments with 2D + and 3D global work sizes. This is because we need smaller numbers so that the + resulting number of work items is meaningful and does not become too large. + The cases here are: 64 that is a power of 2, 3 is an odd and small prime + number, 12 is a multiple of 4 but not a power of 2, 113 is a large prime + number + and 1 is to test the lack of this dimension if the others are present */ +const size_t value_range_nD[] = { 64, 3, 12, 113, 1 }; +const size_t basic_increment = 16; +const size_t primes_increment = 1; +enum num_dims +{ + _1D = 1, + _2D = 2, + _3D = 3 +}; + +int do_test(cl_device_id device, cl_context context, cl_command_queue queue, + cl_kernel scan_kernel, int work_dim, size_t global_work_offset[3], + size_t test_values[3], size_t dyn_mem_size) +{ + size_t local_work_size[] = { 1, 1, 1 }; + size_t suggested_total_size; + size_t workgroupinfo_size; + cl_uint kernel_work_size[3] = { 0 }; + clMemWrapper buffer; + cl_platform_id platform; + + int err = clGetDeviceInfo(device, CL_DEVICE_PLATFORM, sizeof(platform), + &platform, NULL); + test_error_ret(err, "clGetDeviceInfo failed", -1); + clGetKernelSuggestedLocalWorkSizeKHR_fn + clGetKernelSuggestedLocalWorkSizeKHR = + (clGetKernelSuggestedLocalWorkSizeKHR_fn) + clGetExtensionFunctionAddressForPlatform( + platform, "clGetKernelSuggestedLocalWorkSizeKHR"); + + if (clGetKernelSuggestedLocalWorkSizeKHR == NULL) + { + log_info("Extension 'cl_khr_suggested_local_work_size' could not be " + "found.\n"); + return TEST_FAIL; + } + + /* Create the actual buffer, using local_buffer as the host pointer, and ask + * to copy that into the buffer */ + buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, + sizeof(kernel_work_size), NULL, &err); + test_error_ret(err, "clCreateBuffer failed", -1); + err = clSetKernelArg(scan_kernel, 0, sizeof(buffer), &buffer); + test_error_ret(err, "clSetKernelArg failed", -1); + if (dyn_mem_size) + { + err = clSetKernelArg(scan_kernel, 1, dyn_mem_size, NULL); + test_error_ret(err, "clSetKernelArg failed", -1); + } + err = clGetKernelSuggestedLocalWorkSizeKHR(queue, scan_kernel, work_dim, + global_work_offset, test_values, + local_work_size); + test_error_ret(err, "clGetKernelSuggestedLocalWorkSizeKHR failed", -1); + suggested_total_size = + local_work_size[0] * local_work_size[1] * local_work_size[2]; + err = clGetKernelWorkGroupInfo( + scan_kernel, device, CL_KERNEL_WORK_GROUP_SIZE, + sizeof(workgroupinfo_size), &workgroupinfo_size, NULL); + test_error_ret(err, "clGetKernelWorkGroupInfo failed", -1); + if (suggested_total_size > workgroupinfo_size) + { + std::cout << "The suggested work group size consist of " + << suggested_total_size << " work items.\n" + << "Work items are limited by " << workgroupinfo_size + << std::endl; + std::cout << "Size from clGetKernelWorkGroupInfo: " + << workgroupinfo_size; + std::cout << "\nSize from clGetKernelSuggestedLocalWorkSizeKHR: " + << local_work_size[0] * local_work_size[1] + * local_work_size[2] + << std::endl; + return -1; + } + + err = + clEnqueueNDRangeKernel(queue, scan_kernel, work_dim, global_work_offset, + test_values, // global work size + NULL, 0, NULL, NULL); + test_error_ret(err, "clEnqueueNDRangeKernel failed", -1); + err = clEnqueueReadBuffer(queue, buffer, CL_NON_BLOCKING, 0, + sizeof(kernel_work_size), kernel_work_size, 0, + NULL, NULL); + test_error_ret(err, "clEnqueueReadBuffer failed", -1); + err = clFinish(queue); + test_error_ret(err, "clFinish failed", -1); + + if (kernel_work_size[0] != local_work_size[0] + || kernel_work_size[1] != local_work_size[1] + || kernel_work_size[2] != local_work_size[2]) + { + std::cout + << "Kernel work size differs from local work size suggested:\n" + << "Kernel work size: (" << kernel_work_size[0] << ", " + << kernel_work_size[1] << ", " << kernel_work_size[2] << ")" + << "Local work size: (" << local_work_size[0] << ", " + << local_work_size[1] << ", " << local_work_size[2] << ")\n"; + return -1; + } + return err; +} + +int do_test_work_group_suggested_local_size( + cl_device_id device, cl_context context, cl_command_queue queue, + bool (*skip_cond)(size_t), size_t start, size_t end, size_t incr, + cl_long max_local_mem_size, size_t global_work_offset[], num_dims dim) +{ + clProgramWrapper scan_program; + clKernelWrapper scan_kernel; + int err; + size_t test_values[] = { 1, 1, 1 }; + std::string kernel_names[6] = { + "test_wg_scan_local_work_group_size", + "test_wg_scan_local_work_group_size_static_local", + "test_wg_scan_local_work_group_size_static_local", + "test_wg_scan_local_work_group_size_static_local", + "test_wg_scan_local_work_group_size_static_local", + "test_wg_scan_local_work_group_size_dynlocal" + }; + std::string str_local_mem_size[6] = { + "-DLOCAL_MEM_SIZE=1", "-DLOCAL_MEM_SIZE=1024", + "-DLOCAL_MEM_SIZE=4096", "-DLOCAL_MEM_SIZE=16384", + "-DLOCAL_MEM_SIZE=32768", "-DLOCAL_MEM_SIZE=1" + }; + size_t local_mem_size[6] = { 1, 1024, 4096, 16384, 32768, 1 }; + size_t dyn_mem_size[6] = { 0, 0, 0, 0, 0, 1024 }; + cl_ulong kernel_local_mem_size; + for (int kernel_num = 0; kernel_num < 6; kernel_num++) + { + if (max_local_mem_size < local_mem_size[kernel_num]) continue; + // Create the kernel + err = create_single_kernel_helper( + context, &scan_program, &scan_kernel, 1, + &wg_scan_local_work_group_size, (kernel_names[kernel_num]).c_str(), + (str_local_mem_size[kernel_num]).c_str()); + test_error_ret(err, + ("create_single_kernel_helper failed for kernel " + + kernel_names[kernel_num]) + .c_str(), + -1); + + // Check if the local memory used by the kernel is going to exceed the + // max_local_mem_size + err = clGetKernelWorkGroupInfo( + scan_kernel, device, CL_KERNEL_LOCAL_MEM_SIZE, + sizeof(kernel_local_mem_size), &kernel_local_mem_size, NULL); + test_error_ret(err, "clGetKernelWorkGroupInfo failed", -1); + if (kernel_local_mem_size > max_local_mem_size) continue; + // return error if no number is found due to the skip condition + err = -1; + unsigned int j = 0; + size_t num_elems = NELEMS(value_range); + for (size_t i = start; i < end; i += incr) + { + if (skip_cond(i)) continue; + err = 0; + test_values[0] = i; + if (dim == _2D) test_values[1] = value_range_nD[j++ % num_elems]; + if (dim == _3D) + { + test_values[1] = value_range_nD[j++ % num_elems]; + test_values[2] = value_range_nD[rand() % num_elems]; + } + err |= do_test(device, context, queue, scan_kernel, dim, + global_work_offset, test_values, + dyn_mem_size[kernel_num]); + test_error_ret( + err, + ("do_test failed for kernel " + kernel_names[kernel_num]) + .c_str(), + -1); + } + } + return err; +} + +int test_work_group_suggested_local_size_1D(cl_device_id device, + cl_context context, + cl_command_queue queue, int n_elems) +{ + if (!is_extension_available(device, "cl_khr_suggested_local_work_size")) + { + log_info("Device does not support 'cl_khr_suggested_local_work_size'. " + "Skipping the test.\n"); + return TEST_SKIPPED_ITSELF; + } + cl_long max_local_mem_size; + cl_int err = + clGetDeviceInfo(device, CL_DEVICE_LOCAL_MEM_SIZE, + sizeof(max_local_mem_size), &max_local_mem_size, NULL); + test_error_ret(err, "clGetDeviceInfo for CL_DEVICE_LOCAL_MEM_SIZE failed.", + -1); + + size_t start, end, incr; + size_t global_work_offset[] = { 0, 0, 0 }; + size_t max_work_items = 0; + clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE, + sizeof(max_work_items), &max_work_items, NULL); + + // odds + start = 1; + end = max_work_items; + incr = basic_increment; + err = do_test_work_group_suggested_local_size( + device, context, queue, is_not_odd, start, end, incr, + max_local_mem_size, global_work_offset, _1D); + test_error_ret( + err, "test_work_group_suggested_local_size_1D for odds failed.", -1); + log_info("test_work_group_suggested_local_size_1D odds passed\n"); + + // evens + start = 2; + end = max_work_items; + incr = basic_increment; + err = do_test_work_group_suggested_local_size( + device, context, queue, is_not_even, start, end, incr, + max_local_mem_size, global_work_offset, _1D); + test_error_ret( + err, "test_work_group_suggested_local_size_1D for evens failed.", -1); + log_info("test_work_group_suggested_local_size_1D evens passed\n"); + + // primes + start = max_work_items + 1; + end = 2 * max_work_items; + incr = primes_increment; + err = do_test_work_group_suggested_local_size( + device, context, queue, is_not_prime, start, end, incr, + max_local_mem_size, global_work_offset, _1D); + test_error_ret( + err, "test_work_group_suggested_local_size_1D for primes failed.", -1); + log_info("test_work_group_suggested_local_size_1D primes passed\n"); + + global_work_offset[0] = 10; + global_work_offset[1] = 10; + global_work_offset[2] = 10; + // odds + start = 1; + end = max_work_items; + incr = basic_increment; + err = do_test_work_group_suggested_local_size( + device, context, queue, is_not_odd, start, end, incr, + max_local_mem_size, global_work_offset, _1D); + test_error_ret(err, + "test_work_group_suggested_local_size_1D for odds with " + "global_work_offset failed.", + -1); + log_info("test_work_group_suggested_local_size_1D odds with " + "global_work_offset passed\n"); + + // evens + start = 2; + end = max_work_items; + incr = basic_increment; + err = do_test_work_group_suggested_local_size( + device, context, queue, is_not_even, start, end, incr, + max_local_mem_size, global_work_offset, _1D); + test_error_ret(err, + "test_work_group_suggested_local_size_1D for evens with " + "global_work_offset failed.", + -1); + log_info("test_work_group_suggested_local_size_1D evens with " + "global_work_offset passed\n"); + + // primes + start = max_work_items + 1; + end = 2 * max_work_items; + incr = primes_increment; + err = do_test_work_group_suggested_local_size( + device, context, queue, is_not_prime, start, end, incr, + max_local_mem_size, global_work_offset, _1D); + test_error_ret(err, + "test_work_group_suggested_local_size_1D for primes with " + "global_work_offset failed.", + -1); + log_info("test_work_group_suggested_local_size_1D primes with " + "global_work_offset passed\n"); + + return err; +} + +int test_work_group_suggested_local_size_2D(cl_device_id device, + cl_context context, + cl_command_queue queue, int n_elems) +{ + if (!is_extension_available(device, "cl_khr_suggested_local_work_size")) + { + log_info("Device does not support 'cl_khr_suggested_local_work_size'. " + "Skipping the test.\n"); + return TEST_SKIPPED_ITSELF; + } + cl_long max_local_mem_size; + cl_int err = + clGetDeviceInfo(device, CL_DEVICE_LOCAL_MEM_SIZE, + sizeof(max_local_mem_size), &max_local_mem_size, NULL); + test_error_ret(err, "clGetDeviceInfo for CL_DEVICE_LOCAL_MEM_SIZE failed.", + -1); + + size_t start, end, incr; + size_t global_work_offset[] = { 0, 0, 0 }; + size_t max_work_items = 0; + clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE, + sizeof(max_work_items), &max_work_items, NULL); + + // odds + start = 1; + end = max_work_items; + incr = basic_increment; + err = do_test_work_group_suggested_local_size( + device, context, queue, is_not_odd, start, end, incr, + max_local_mem_size, global_work_offset, _2D); + test_error_ret( + err, "test_work_group_suggested_local_size_2D for odds failed.", -1); + log_info("test_work_group_suggested_local_size_2D odds passed\n"); + + // evens + start = 2; + end = max_work_items; + incr = basic_increment; + err = do_test_work_group_suggested_local_size( + device, context, queue, is_not_even, start, end, incr, + max_local_mem_size, global_work_offset, _2D); + test_error_ret( + err, "test_work_group_suggested_local_size_2D for evens failed.", -1); + log_info("test_work_group_suggested_local_size_2D evens passed\n"); + + // primes + start = max_work_items + 1; + end = max_work_items + max_work_items / 4; + incr = primes_increment; + err = do_test_work_group_suggested_local_size( + device, context, queue, is_not_prime, start, end, incr, + max_local_mem_size, global_work_offset, _2D); + test_error_ret( + err, "test_work_group_suggested_local_size_2D for primes failed.", -1); + log_info("test_work_group_suggested_local_size_2D primes passed\n"); + + global_work_offset[0] = 10; + global_work_offset[1] = 10; + global_work_offset[2] = 10; + + // odds + start = 1; + end = max_work_items; + incr = basic_increment; + err = do_test_work_group_suggested_local_size( + device, context, queue, is_not_odd, start, end, incr, + max_local_mem_size, global_work_offset, _2D); + test_error_ret(err, + "test_work_group_suggested_local_size_2D for odds with " + "global_work_offset failed.", + -1); + log_info("test_work_group_suggested_local_size_2D odds with " + "global_work_offset passed\n"); + + // evens + start = 2; + end = max_work_items; + incr = basic_increment; + err = do_test_work_group_suggested_local_size( + device, context, queue, is_not_even, start, end, incr, + max_local_mem_size, global_work_offset, _2D); + test_error_ret(err, + "test_work_group_suggested_local_size_2D for evens with " + "global_work_offset failed.", + -1); + log_info("test_work_group_suggested_local_size_2D evens with " + "global_work_offset passed\n"); + + // primes + start = max_work_items + 1; + end = max_work_items + max_work_items / 4; + incr = primes_increment; + err = do_test_work_group_suggested_local_size( + device, context, queue, is_not_prime, start, end, incr, + max_local_mem_size, global_work_offset, _2D); + test_error_ret(err, + "test_work_group_suggested_local_size_2D for primes with " + "global_work_offset failed.", + -1); + log_info("test_work_group_suggested_local_size_2D primes with " + "global_work_offset passed\n"); + + return err; +} + +int test_work_group_suggested_local_size_3D(cl_device_id device, + cl_context context, + cl_command_queue queue, int n_elems) +{ + if (!is_extension_available(device, "cl_khr_suggested_local_work_size")) + { + log_info("Device does not support 'cl_khr_suggested_local_work_size'. " + "Skipping the test.\n"); + return TEST_SKIPPED_ITSELF; + } + cl_long max_local_mem_size; + cl_int err = + clGetDeviceInfo(device, CL_DEVICE_LOCAL_MEM_SIZE, + sizeof(max_local_mem_size), &max_local_mem_size, NULL); + test_error_ret(err, "clGetDeviceInfo for CL_DEVICE_LOCAL_MEM_SIZE failed.", + -1); + + size_t start, end, incr; + size_t global_work_offset[] = { 0, 0, 0 }; + size_t max_work_items = 0; + clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE, + sizeof(max_work_items), &max_work_items, NULL); + + // odds + start = 1; + end = max_work_items / 2; + incr = basic_increment; + err = do_test_work_group_suggested_local_size( + device, context, queue, is_not_odd, start, end, incr, + max_local_mem_size, global_work_offset, _3D); + test_error_ret( + err, "test_work_group_suggested_local_size_3D for odds failed.", -1); + log_info("test_work_group_suggested_local_size_3D odds passed\n"); + + // evens + start = 2; + end = max_work_items / 2; + incr = basic_increment; + err = do_test_work_group_suggested_local_size( + device, context, queue, is_not_even, start, end, incr, + max_local_mem_size, global_work_offset, _3D); + test_error_ret( + err, "test_work_group_suggested_local_size_3D for evens failed.", -1); + log_info("test_work_group_suggested_local_size_3D evens passed\n"); + + // primes + start = max_work_items + 1; + end = max_work_items + max_work_items / 4; + incr = primes_increment; + err = do_test_work_group_suggested_local_size( + device, context, queue, is_not_prime, start, end, incr, + max_local_mem_size, global_work_offset, _3D); + test_error_ret( + err, "test_work_group_suggested_local_size_3D for primes failed.", -1); + log_info("test_work_group_suggested_local_size_3D primes passed\n"); + + global_work_offset[0] = 10; + global_work_offset[1] = 10; + global_work_offset[2] = 10; + + // odds + start = 1; + end = max_work_items / 2; + incr = basic_increment; + err = do_test_work_group_suggested_local_size( + device, context, queue, is_not_odd, start, end, incr, + max_local_mem_size, global_work_offset, _3D); + test_error_ret(err, + "test_work_group_suggested_local_size_3D for odds with " + "global_work_offset failed.", + -1); + log_info("test_work_group_suggested_local_size_3D odds with " + "global_work_offset passed\n"); + + // evens + start = 2; + end = max_work_items / 2; + incr = basic_increment; + err = do_test_work_group_suggested_local_size( + device, context, queue, is_not_even, start, end, incr, + max_local_mem_size, global_work_offset, _3D); + test_error_ret(err, + "test_work_group_suggested_local_size_3D for evens with " + "global_work_offset failed.", + -1); + log_info("test_work_group_suggested_local_size_3D evens with " + "global_work_offset passed\n"); + + // primes + start = max_work_items + 1; + end = max_work_items + max_work_items / 4; + incr = primes_increment; + err = do_test_work_group_suggested_local_size( + device, context, queue, is_not_prime, start, end, incr, + max_local_mem_size, global_work_offset, _3D); + test_error_ret(err, + "test_work_group_suggested_local_size_3D for primes with " + "global_work_offset failed.", + -1); + log_info("test_work_group_suggested_local_size_3D primes with " + "global_work_offset passed\n"); + + return err; +} \ No newline at end of file -- cgit v1.2.3 From 4759e5cae0e3b3b6dd841fe28ad01f4b4f2478e6 Mon Sep 17 00:00:00 2001 From: Ben Ashbaugh Date: Wed, 11 Aug 2021 10:03:44 -0700 Subject: remove testing for scalar vloada_half (#1293) --- test_conformance/half/Test_vLoadHalf.cpp | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/test_conformance/half/Test_vLoadHalf.cpp b/test_conformance/half/Test_vLoadHalf.cpp index 52867c25..5dfac7a3 100644 --- a/test_conformance/half/Test_vLoadHalf.cpp +++ b/test_conformance/half/Test_vLoadHalf.cpp @@ -37,14 +37,12 @@ int Test_vLoadHalf_private( cl_device_id device, bool aligned ) const char *vector_size_names[] = {"1", "2", "4", "8", "16", "3"}; int minVectorSize = kMinVectorSize; - // There is no aligned scalar vloada_half in CL 1.1 -#if ! defined( CL_VERSION_1_1 ) && ! defined(__APPLE__) - vlog("Note: testing vloada_half.\n"); - if (aligned && minVectorSize == 0) - minVectorSize = 1; -#endif - for( vectorSize = minVectorSize; vectorSize < kLastVectorSizeToTest; vectorSize++) + // There is no aligned scalar vloada_half + if (aligned && minVectorSize == 0) minVectorSize = 1; + + for (vectorSize = minVectorSize; vectorSize < kLastVectorSizeToTest; + vectorSize++) { int effectiveVectorSize = g_arrVecSizes[vectorSize]; @@ -81,7 +79,7 @@ int Test_vLoadHalf_private( cl_device_id device, bool aligned ) "{\n" " size_t i = get_global_id(0);\n" " f[i] = vloada_half3( i, p );\n" - " ((__global float *)f)[4*i+3] = vloada_half(4*i+3,p);\n" + " ((__global float *)f)[4*i+3] = vload_half(4*i+3,p);\n" "}\n" }; -- cgit v1.2.3 From 1aa930957a3f7ca6df30b64f61d082f2359fe486 Mon Sep 17 00:00:00 2001 From: Feng Zou Date: Thu, 12 Aug 2021 01:04:21 +0800 Subject: Temporarily disable the test_kernel_attributes test case (#1297) * Temporarily disable the test_kernel_attributes test case Per OpenCL spec on CL_KERNEL_ATTRIBUTES, for kernels not created from OpenCL C source and the clCreateProgramWithSource API call the string returned from this query will be empty. But in test_kernel_attributes test, it read from bc binary and expect to get kernel attribute, which is not consistent with OpenCL spec. * Fix clang format issue --- test_conformance/spir/main.cpp | 73 ++++++++++++++++++++++-------------------- 1 file changed, 39 insertions(+), 34 deletions(-) diff --git a/test_conformance/spir/main.cpp b/test_conformance/spir/main.cpp index 3a18988c..06caf33b 100644 --- a/test_conformance/spir/main.cpp +++ b/test_conformance/spir/main.cpp @@ -6615,40 +6615,45 @@ struct sub_suite }; static const sub_suite spir_suites[] = { - {"api", "api", test_api}, - {"api_double", "api", test_api_double}, - {"atomics", "atomics", test_atomics}, - {"basic", "basic", test_basic}, - {"basic_double", "basic", test_basic_double}, - {"commonfns", "commonfns", test_commonfns}, - {"commonfns_double", "commonfns", test_commonfns_double}, - {"conversions", "conversions", test_conversions}, - {"conversions_double", "conversions", test_conversions_double}, - {"geometrics", "geometrics", test_geometrics}, - {"geometrics_double", "geometrics", test_geometrics_double}, - {"half", "half", test_half}, - {"half_double", "half", test_half_double}, - {"kernel_image_methods", "kernel_image_methods", test_kernel_image_methods}, - {"images_kernel_read_write", "images_kernel_read_write", test_images_kernel_read_write}, - {"images_samplerlessRead", "images_samplerlessRead", test_images_samplerless_read}, - {"integer_ops", "integer_ops", test_integer_ops}, - {"math_brute_force", "math_brute_force", test_math_brute_force}, - {"math_brute_force_double", "math_brute_force", test_math_brute_force_double}, - {"printf", "printf", test_printf}, - {"profiling", "profiling", test_profiling}, - {"relationals", "relationals", test_relationals}, - {"relationals_double", "relationals", test_relationals_double}, - {"select", "select", test_select}, - {"select_double", "select", test_select_double}, - {"vec_align", "vec_align", test_vec_align}, - {"vec_align_double", "vec_align", test_vec_align_double}, - {"vec_step", "vec_step", test_vec_step}, - {"vec_step_double", "vec_step", test_vec_step_double}, - {"compile_and_link", "compile_and_link", test_compile_and_link}, - {"sampler_enumeration", "sampler_enumeration", test_sampler_enumeration}, - {"enum_values", "enum_values", test_enum_values}, - {"kernel_attributes", "kernel_attributes", test_kernel_attributes}, - {"binary_type", "binary_type", test_binary_type}, + { "api", "api", test_api }, + { "api_double", "api", test_api_double }, + { "atomics", "atomics", test_atomics }, + { "basic", "basic", test_basic }, + { "basic_double", "basic", test_basic_double }, + { "commonfns", "commonfns", test_commonfns }, + { "commonfns_double", "commonfns", test_commonfns_double }, + { "conversions", "conversions", test_conversions }, + { "conversions_double", "conversions", test_conversions_double }, + { "geometrics", "geometrics", test_geometrics }, + { "geometrics_double", "geometrics", test_geometrics_double }, + { "half", "half", test_half }, + { "half_double", "half", test_half_double }, + { "kernel_image_methods", "kernel_image_methods", + test_kernel_image_methods }, + { "images_kernel_read_write", "images_kernel_read_write", + test_images_kernel_read_write }, + { "images_samplerlessRead", "images_samplerlessRead", + test_images_samplerless_read }, + { "integer_ops", "integer_ops", test_integer_ops }, + { "math_brute_force", "math_brute_force", test_math_brute_force }, + { "math_brute_force_double", "math_brute_force", + test_math_brute_force_double }, + { "printf", "printf", test_printf }, + { "profiling", "profiling", test_profiling }, + { "relationals", "relationals", test_relationals }, + { "relationals_double", "relationals", test_relationals_double }, + { "select", "select", test_select }, + { "select_double", "select", test_select_double }, + { "vec_align", "vec_align", test_vec_align }, + { "vec_align_double", "vec_align", test_vec_align_double }, + { "vec_step", "vec_step", test_vec_step }, + { "vec_step_double", "vec_step", test_vec_step_double }, + { "compile_and_link", "compile_and_link", test_compile_and_link }, + { "sampler_enumeration", "sampler_enumeration", test_sampler_enumeration }, + { "enum_values", "enum_values", test_enum_values }, + // {"kernel_attributes", "kernel_attributes", + // test_kernel_attributes}, // disabling temporarily, see GitHub #1284 + { "binary_type", "binary_type", test_binary_type }, }; -- cgit v1.2.3 From 6da9c6b68f9643a077f7281451b59f444a77a991 Mon Sep 17 00:00:00 2001 From: Grzegorz Wawiorko Date: Wed, 11 Aug 2021 19:06:10 +0200 Subject: Fix double free in c11_atomics tests for SVM allocations (#1286) * Only Clang format changes * Fix double free object for SVM allocations * Fix double free - review fixes --- test_conformance/c11_atomics/common.h | 2556 ++++++++++++++++++--------------- 1 file changed, 1381 insertions(+), 1175 deletions(-) diff --git a/test_conformance/c11_atomics/common.h b/test_conformance/c11_atomics/common.h index bbcc68c6..d30259f0 100644 --- a/test_conformance/c11_atomics/common.h +++ b/test_conformance/c11_atomics/common.h @@ -28,10 +28,9 @@ #define MAX_DEVICE_THREADS (gHost ? 0U : gMaxDeviceThreads) #define MAX_HOST_THREADS GetThreadCount() -#define EXECUTE_TEST(error, test)\ - error |= test;\ - if(error && !gContinueOnError)\ - return error; +#define EXECUTE_TEST(error, test) \ + error |= test; \ + if (error && !gContinueOnError) return error; enum TExplicitAtomicType { @@ -57,764 +56,918 @@ enum TExplicitMemoryScopeType MEMORY_SCOPE_ALL_SVM_DEVICES }; -extern bool gHost; // temporary flag for testing native host threads (test verification) +extern bool + gHost; // temporary flag for testing native host threads (test verification) extern bool gOldAPI; // temporary flag for testing with old API (OpenCL 1.2) extern bool gContinueOnError; // execute all cases even when errors detected -extern bool gNoGlobalVariables; // disable cases with global atomics in program scope +extern bool + gNoGlobalVariables; // disable cases with global atomics in program scope extern bool gNoGenericAddressSpace; // disable cases with generic address space extern bool gUseHostPtr; // use malloc/free instead of clSVMAlloc/clSVMFree extern bool gDebug; // print OpenCL kernel code -extern int gInternalIterations; // internal test iterations for atomic operation, sufficient to verify atomicity -extern int gMaxDeviceThreads; // maximum number of threads executed on OCL device +extern int gInternalIterations; // internal test iterations for atomic + // operation, sufficient to verify atomicity +extern int + gMaxDeviceThreads; // maximum number of threads executed on OCL device extern cl_device_atomic_capabilities gAtomicMemCap, gAtomicFenceCap; // atomic memory and fence capabilities for this device -extern const char *get_memory_order_type_name(TExplicitMemoryOrderType orderType); -extern const char *get_memory_scope_type_name(TExplicitMemoryScopeType scopeType); +extern const char * +get_memory_order_type_name(TExplicitMemoryOrderType orderType); +extern const char * +get_memory_scope_type_name(TExplicitMemoryScopeType scopeType); extern cl_int getSupportedMemoryOrdersAndScopes( cl_device_id device, std::vector &memoryOrders, std::vector &memoryScopes); -class AtomicTypeInfo -{ +class AtomicTypeInfo { public: - TExplicitAtomicType _type; - AtomicTypeInfo(TExplicitAtomicType type): _type(type) {} - cl_uint Size(cl_device_id device); - const char* AtomicTypeName(); - const char* RegularTypeName(); - const char* AddSubOperandTypeName(); - int IsSupported(cl_device_id device); + TExplicitAtomicType _type; + AtomicTypeInfo(TExplicitAtomicType type): _type(type) {} + cl_uint Size(cl_device_id device); + const char *AtomicTypeName(); + const char *RegularTypeName(); + const char *AddSubOperandTypeName(); + int IsSupported(cl_device_id device); }; -template -class AtomicTypeExtendedInfo : public AtomicTypeInfo -{ +template +class AtomicTypeExtendedInfo : public AtomicTypeInfo { public: - AtomicTypeExtendedInfo(TExplicitAtomicType type) : AtomicTypeInfo(type) {} - HostDataType MinValue(); - HostDataType MaxValue(); - HostDataType SpecialValue(cl_uchar x) - { - HostDataType tmp; - cl_uchar *ptr = (cl_uchar*)&tmp; - for(cl_uint i = 0; i < sizeof(HostDataType)/sizeof(cl_uchar); i++) - ptr[i] = x; - return tmp; - } - HostDataType SpecialValue(cl_ushort x) - { - HostDataType tmp; - cl_ushort *ptr = (cl_ushort*)&tmp; - for(cl_uint i = 0; i < sizeof(HostDataType)/sizeof(cl_ushort); i++) - ptr[i] = x; - return tmp; - } + AtomicTypeExtendedInfo(TExplicitAtomicType type): AtomicTypeInfo(type) {} + HostDataType MinValue(); + HostDataType MaxValue(); + HostDataType SpecialValue(cl_uchar x) + { + HostDataType tmp; + cl_uchar *ptr = (cl_uchar *)&tmp; + for (cl_uint i = 0; i < sizeof(HostDataType) / sizeof(cl_uchar); i++) + ptr[i] = x; + return tmp; + } + HostDataType SpecialValue(cl_ushort x) + { + HostDataType tmp; + cl_ushort *ptr = (cl_ushort *)&tmp; + for (cl_uint i = 0; i < sizeof(HostDataType) / sizeof(cl_ushort); i++) + ptr[i] = x; + return tmp; + } }; -class CTest { +class CTest { public: - virtual int Execute(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) = 0; + virtual int Execute(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) = 0; }; -template -class CBasicTest : CTest -{ +template +class CBasicTest : CTest { public: - typedef struct { - CBasicTest *test; - cl_uint tid; - cl_uint threadCount; - volatile HostAtomicType *destMemory; - HostDataType *oldValues; - } THostThreadContext; - static cl_int HostThreadFunction(cl_uint job_id, cl_uint thread_id, void *userInfo) - { - THostThreadContext *threadContext = ((THostThreadContext*)userInfo)+job_id; - threadContext->test->HostFunction(threadContext->tid, threadContext->threadCount, threadContext->destMemory, threadContext->oldValues); - return 0; - } - CBasicTest(TExplicitAtomicType dataType, bool useSVM) : CTest(), - _maxDeviceThreads(MAX_DEVICE_THREADS), - _dataType(dataType), _useSVM(useSVM), _startValue(255), - _localMemory(false), _declaredInProgram(false), - _usedInFunction(false), _genericAddrSpace(false), - _oldValueCheck(true), _localRefValues(false), - _maxGroupSize(0), _passCount(0), _iterations(gInternalIterations) - { - } - virtual ~CBasicTest() - { - if(_passCount) - log_info(" %u tests executed successfully for %s\n", _passCount, DataType().AtomicTypeName()); - } - virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) - { - return 1; - } - virtual cl_uint NumNonAtomicVariablesPerThread() - { - return 1; - } - virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue) - { - return false; - } - virtual bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues, MTdata d) - { - return false; - } - virtual bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues, HostAtomicType *finalValues) - { - return false; - } - virtual std::string PragmaHeader(cl_device_id deviceID); - virtual std::string ProgramHeader(cl_uint maxNumDestItems); - virtual std::string FunctionCode(); - virtual std::string KernelCode(cl_uint maxNumDestItems); - virtual std::string ProgramCore() = 0; - virtual std::string SingleTestName() - { - std::string testName = LocalMemory() ? "local" : "global"; - testName += " "; - testName += DataType().AtomicTypeName(); - if(DeclaredInProgram()) - { - testName += " declared in program"; - } - if(DeclaredInProgram() && UsedInFunction()) - testName += ","; - if(UsedInFunction()) - { - testName += " used in "; - if(GenericAddrSpace()) - testName += "generic "; - testName += "function"; - } - return testName; - } - virtual int ExecuteSingleTest(cl_device_id deviceID, cl_context context, cl_command_queue queue); - int ExecuteForEachPointerType(cl_device_id deviceID, cl_context context, cl_command_queue queue) - { - int error = 0; - UsedInFunction(false); - EXECUTE_TEST(error, ExecuteSingleTest(deviceID, context, queue)); - UsedInFunction(true); - GenericAddrSpace(false); - EXECUTE_TEST(error, ExecuteSingleTest(deviceID, context, queue)); - GenericAddrSpace(true); - EXECUTE_TEST(error, ExecuteSingleTest(deviceID, context, queue)); - GenericAddrSpace(false); - return error; - } - int ExecuteForEachDeclarationType(cl_device_id deviceID, cl_context context, cl_command_queue queue) - { - int error = 0; - DeclaredInProgram(false); - EXECUTE_TEST(error, ExecuteForEachPointerType(deviceID, context, queue)); - if(!UseSVM()) - { - DeclaredInProgram(true); - EXECUTE_TEST(error, ExecuteForEachPointerType(deviceID, context, queue)); - } - return error; - } - virtual int ExecuteForEachParameterSet(cl_device_id deviceID, cl_context context, cl_command_queue queue) - { - int error = 0; - if(_maxDeviceThreads > 0 && !UseSVM()) - { - LocalMemory(true); - EXECUTE_TEST(error, ExecuteForEachDeclarationType(deviceID, context, queue)); - } - if(_maxDeviceThreads+MaxHostThreads() > 0) - { - LocalMemory(false); - EXECUTE_TEST(error, ExecuteForEachDeclarationType(deviceID, context, queue)); - } - return error; - } - virtual int Execute(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) - { - if(sizeof(HostAtomicType) != DataType().Size(deviceID)) - { - log_info("Invalid test: Host atomic type size (%u) is different than OpenCL type size (%u)\n", (cl_uint)sizeof(HostAtomicType), DataType().Size(deviceID)); - return -1; - } - if(sizeof(HostAtomicType) != sizeof(HostDataType)) - { - log_info("Invalid test: Host atomic type size (%u) is different than corresponding type size (%u)\n", (cl_uint)sizeof(HostAtomicType), (cl_uint)sizeof(HostDataType)); - return -1; - } - // Verify we can run first - if(UseSVM() && !gUseHostPtr) - { - cl_device_svm_capabilities caps; - cl_int error = clGetDeviceInfo(deviceID, CL_DEVICE_SVM_CAPABILITIES, sizeof(caps), &caps, 0); - test_error(error, "clGetDeviceInfo failed"); - if((caps & CL_DEVICE_SVM_ATOMICS) == 0) - { - log_info("\t%s - SVM_ATOMICS not supported\n", DataType().AtomicTypeName()); - // implicit pass + typedef struct + { + CBasicTest *test; + cl_uint tid; + cl_uint threadCount; + volatile HostAtomicType *destMemory; + HostDataType *oldValues; + } THostThreadContext; + static cl_int HostThreadFunction(cl_uint job_id, cl_uint thread_id, + void *userInfo) + { + THostThreadContext *threadContext = + ((THostThreadContext *)userInfo) + job_id; + threadContext->test->HostFunction( + threadContext->tid, threadContext->threadCount, + threadContext->destMemory, threadContext->oldValues); return 0; - } } - if(!DataType().IsSupported(deviceID)) + CBasicTest(TExplicitAtomicType dataType, bool useSVM) + : CTest(), _maxDeviceThreads(MAX_DEVICE_THREADS), _dataType(dataType), + _useSVM(useSVM), _startValue(255), _localMemory(false), + _declaredInProgram(false), _usedInFunction(false), + _genericAddrSpace(false), _oldValueCheck(true), + _localRefValues(false), _maxGroupSize(0), _passCount(0), + _iterations(gInternalIterations) + {} + virtual ~CBasicTest() + { + if (_passCount) + log_info(" %u tests executed successfully for %s\n", _passCount, + DataType().AtomicTypeName()); + } + virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) + { + return 1; + } + virtual cl_uint NumNonAtomicVariablesPerThread() { return 1; } + virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, + HostDataType *startRefValues, + cl_uint whichDestValue) + { + return false; + } + virtual bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues, + MTdata d) + { + return false; + } + virtual bool VerifyRefs(bool &correct, cl_uint threadCount, + HostDataType *refValues, + HostAtomicType *finalValues) { - log_info("\t%s not supported\n", DataType().AtomicTypeName()); - // implicit pass or host test (debug feature) - if(UseSVM()) + return false; + } + virtual std::string PragmaHeader(cl_device_id deviceID); + virtual std::string ProgramHeader(cl_uint maxNumDestItems); + virtual std::string FunctionCode(); + virtual std::string KernelCode(cl_uint maxNumDestItems); + virtual std::string ProgramCore() = 0; + virtual std::string SingleTestName() + { + std::string testName = LocalMemory() ? "local" : "global"; + testName += " "; + testName += DataType().AtomicTypeName(); + if (DeclaredInProgram()) + { + testName += " declared in program"; + } + if (DeclaredInProgram() && UsedInFunction()) testName += ","; + if (UsedInFunction()) + { + testName += " used in "; + if (GenericAddrSpace()) testName += "generic "; + testName += "function"; + } + return testName; + } + virtual int ExecuteSingleTest(cl_device_id deviceID, cl_context context, + cl_command_queue queue); + int ExecuteForEachPointerType(cl_device_id deviceID, cl_context context, + cl_command_queue queue) + { + int error = 0; + UsedInFunction(false); + EXECUTE_TEST(error, ExecuteSingleTest(deviceID, context, queue)); + UsedInFunction(true); + GenericAddrSpace(false); + EXECUTE_TEST(error, ExecuteSingleTest(deviceID, context, queue)); + GenericAddrSpace(true); + EXECUTE_TEST(error, ExecuteSingleTest(deviceID, context, queue)); + GenericAddrSpace(false); + return error; + } + int ExecuteForEachDeclarationType(cl_device_id deviceID, cl_context context, + cl_command_queue queue) + { + int error = 0; + DeclaredInProgram(false); + EXECUTE_TEST(error, + ExecuteForEachPointerType(deviceID, context, queue)); + if (!UseSVM()) + { + DeclaredInProgram(true); + EXECUTE_TEST(error, + ExecuteForEachPointerType(deviceID, context, queue)); + } + return error; + } + virtual int ExecuteForEachParameterSet(cl_device_id deviceID, + cl_context context, + cl_command_queue queue) + { + int error = 0; + if (_maxDeviceThreads > 0 && !UseSVM()) + { + LocalMemory(true); + EXECUTE_TEST( + error, ExecuteForEachDeclarationType(deviceID, context, queue)); + } + if (_maxDeviceThreads + MaxHostThreads() > 0) + { + LocalMemory(false); + EXECUTE_TEST( + error, ExecuteForEachDeclarationType(deviceID, context, queue)); + } + return error; + } + virtual int Execute(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) + { + if (sizeof(HostAtomicType) != DataType().Size(deviceID)) + { + log_info("Invalid test: Host atomic type size (%u) is different " + "than OpenCL type size (%u)\n", + (cl_uint)sizeof(HostAtomicType), + DataType().Size(deviceID)); + return -1; + } + if (sizeof(HostAtomicType) != sizeof(HostDataType)) + { + log_info("Invalid test: Host atomic type size (%u) is different " + "than corresponding type size (%u)\n", + (cl_uint)sizeof(HostAtomicType), + (cl_uint)sizeof(HostDataType)); + return -1; + } + // Verify we can run first + if (UseSVM() && !gUseHostPtr) + { + cl_device_svm_capabilities caps; + cl_int error = clGetDeviceInfo(deviceID, CL_DEVICE_SVM_CAPABILITIES, + sizeof(caps), &caps, 0); + test_error(error, "clGetDeviceInfo failed"); + if ((caps & CL_DEVICE_SVM_ATOMICS) == 0) + { + log_info("\t%s - SVM_ATOMICS not supported\n", + DataType().AtomicTypeName()); + // implicit pass + return 0; + } + } + if (!DataType().IsSupported(deviceID)) + { + log_info("\t%s not supported\n", DataType().AtomicTypeName()); + // implicit pass or host test (debug feature) + if (UseSVM()) return 0; + _maxDeviceThreads = 0; + } + if (_maxDeviceThreads + MaxHostThreads() == 0) return 0; + return ExecuteForEachParameterSet(deviceID, context, queue); + } + virtual void HostFunction(cl_uint tid, cl_uint threadCount, + volatile HostAtomicType *destMemory, + HostDataType *oldValues) + { + log_info("Empty thread function %u\n", (cl_uint)tid); + } + AtomicTypeExtendedInfo DataType() const + { + return AtomicTypeExtendedInfo(_dataType); + } + cl_uint _maxDeviceThreads; + virtual cl_uint MaxHostThreads() + { + if (UseSVM() || gHost) + return MAX_HOST_THREADS; + else + return 0; + } + + int CheckCapabilities(TExplicitMemoryScopeType memoryScope, + TExplicitMemoryOrderType memoryOrder) + { + /* + Differentiation between atomic fence and other atomic operations + does not need to occur here. + + The initialisation of this test checks that the minimum required + capabilities are supported by this device. + + The following switches allow the test to skip if optional + capabilites are not supported by the device. + */ + switch (memoryScope) + { + case MEMORY_SCOPE_EMPTY: { + break; + } + case MEMORY_SCOPE_WORK_GROUP: { + if ((gAtomicMemCap & CL_DEVICE_ATOMIC_SCOPE_WORK_GROUP) == 0) + { + return TEST_SKIPPED_ITSELF; + } + break; + } + case MEMORY_SCOPE_DEVICE: { + if ((gAtomicMemCap & CL_DEVICE_ATOMIC_SCOPE_DEVICE) == 0) + { + return TEST_SKIPPED_ITSELF; + } + break; + } + case MEMORY_SCOPE_ALL_DEVICES: // fallthough + case MEMORY_SCOPE_ALL_SVM_DEVICES: { + if ((gAtomicMemCap & CL_DEVICE_ATOMIC_SCOPE_ALL_DEVICES) == 0) + { + return TEST_SKIPPED_ITSELF; + } + break; + } + default: { + log_info("Invalid memory scope\n"); + break; + } + } + + switch (memoryOrder) + { + case MEMORY_ORDER_EMPTY: { + break; + } + case MEMORY_ORDER_RELAXED: { + if ((gAtomicMemCap & CL_DEVICE_ATOMIC_ORDER_RELAXED) == 0) + { + return TEST_SKIPPED_ITSELF; + } + break; + } + case MEMORY_ORDER_ACQUIRE: + case MEMORY_ORDER_RELEASE: + case MEMORY_ORDER_ACQ_REL: { + if ((gAtomicMemCap & CL_DEVICE_ATOMIC_ORDER_ACQ_REL) == 0) + { + return TEST_SKIPPED_ITSELF; + } + break; + } + case MEMORY_ORDER_SEQ_CST: { + if ((gAtomicMemCap & CL_DEVICE_ATOMIC_ORDER_SEQ_CST) == 0) + { + return TEST_SKIPPED_ITSELF; + } + break; + } + default: { + log_info("Invalid memory order\n"); + break; + } + } + return 0; - _maxDeviceThreads = 0; - } - if(_maxDeviceThreads+MaxHostThreads() == 0) - return 0; - return ExecuteForEachParameterSet(deviceID, context, queue); - } - virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues) - { - log_info("Empty thread function %u\n", (cl_uint)tid); - } - AtomicTypeExtendedInfo DataType() const - { - return AtomicTypeExtendedInfo(_dataType); - } - cl_uint _maxDeviceThreads; - virtual cl_uint MaxHostThreads() - { - if(UseSVM() || gHost) - return MAX_HOST_THREADS; - else - return 0; - } - - int CheckCapabilities(TExplicitMemoryScopeType memoryScope, - TExplicitMemoryOrderType memoryOrder) - { - /* - Differentiation between atomic fence and other atomic operations - does not need to occur here. - - The initialisation of this test checks that the minimum required - capabilities are supported by this device. - - The following switches allow the test to skip if optional capabilites - are not supported by the device. - */ - switch (memoryScope) - { - case MEMORY_SCOPE_EMPTY: { - break; - } - case MEMORY_SCOPE_WORK_GROUP: { - if ((gAtomicMemCap & CL_DEVICE_ATOMIC_SCOPE_WORK_GROUP) == 0) - { - return TEST_SKIPPED_ITSELF; - } - break; - } - case MEMORY_SCOPE_DEVICE: { - if ((gAtomicMemCap & CL_DEVICE_ATOMIC_SCOPE_DEVICE) == 0) - { - return TEST_SKIPPED_ITSELF; - } - break; - } - case MEMORY_SCOPE_ALL_DEVICES: // fallthough - case MEMORY_SCOPE_ALL_SVM_DEVICES: { - if ((gAtomicMemCap & CL_DEVICE_ATOMIC_SCOPE_ALL_DEVICES) == 0) - { - return TEST_SKIPPED_ITSELF; - } - break; - } - default: { - log_info("Invalid memory scope\n"); - break; - } - } - - switch (memoryOrder) - { - case MEMORY_ORDER_EMPTY: { - break; - } - case MEMORY_ORDER_RELAXED: { - if ((gAtomicMemCap & CL_DEVICE_ATOMIC_ORDER_RELAXED) == 0) - { - return TEST_SKIPPED_ITSELF; - } - break; - } - case MEMORY_ORDER_ACQUIRE: - case MEMORY_ORDER_RELEASE: - case MEMORY_ORDER_ACQ_REL: { - if ((gAtomicMemCap & CL_DEVICE_ATOMIC_ORDER_ACQ_REL) == 0) - { - return TEST_SKIPPED_ITSELF; - } - break; - } - case MEMORY_ORDER_SEQ_CST: { - if ((gAtomicMemCap & CL_DEVICE_ATOMIC_ORDER_SEQ_CST) == 0) - { - return TEST_SKIPPED_ITSELF; - } - break; - } - default: { - log_info("Invalid memory order\n"); - break; - } - } - - return 0; - } - virtual bool SVMDataBufferAllSVMConsistent() {return false;} - bool UseSVM() {return _useSVM;} - void StartValue(HostDataType startValue) {_startValue = startValue;} - HostDataType StartValue() {return _startValue;} - void LocalMemory(bool local) {_localMemory = local;} - bool LocalMemory() {return _localMemory;} - void DeclaredInProgram(bool declaredInProgram) {_declaredInProgram = declaredInProgram;} - bool DeclaredInProgram() {return _declaredInProgram;} - void UsedInFunction(bool local) {_usedInFunction = local;} - bool UsedInFunction() {return _usedInFunction;} - void GenericAddrSpace(bool genericAddrSpace) {_genericAddrSpace = genericAddrSpace;} - bool GenericAddrSpace() {return _genericAddrSpace;} - void OldValueCheck(bool check) {_oldValueCheck = check;} - bool OldValueCheck() {return _oldValueCheck;} - void LocalRefValues(bool localRefValues) {_localRefValues = localRefValues;} - bool LocalRefValues() {return _localRefValues;} - void MaxGroupSize(cl_uint maxGroupSize) {_maxGroupSize = maxGroupSize;} - cl_uint MaxGroupSize() {return _maxGroupSize;} - void CurrentGroupSize(cl_uint currentGroupSize) - { - if(MaxGroupSize() && MaxGroupSize() < currentGroupSize) - _currentGroupSize = MaxGroupSize(); - else - _currentGroupSize = currentGroupSize; - } - cl_uint CurrentGroupSize() {return _currentGroupSize;} - virtual cl_uint CurrentGroupNum(cl_uint threadCount) - { - if(threadCount == 0) - return 0; - if(LocalMemory()) - return 1; - return threadCount/CurrentGroupSize(); - } - cl_int Iterations() {return _iterations;} - std::string IterationsStr() {std::stringstream ss; ss << _iterations; return ss.str();} + } + virtual bool SVMDataBufferAllSVMConsistent() { return false; } + bool UseSVM() { return _useSVM; } + void StartValue(HostDataType startValue) { _startValue = startValue; } + HostDataType StartValue() { return _startValue; } + void LocalMemory(bool local) { _localMemory = local; } + bool LocalMemory() { return _localMemory; } + void DeclaredInProgram(bool declaredInProgram) + { + _declaredInProgram = declaredInProgram; + } + bool DeclaredInProgram() { return _declaredInProgram; } + void UsedInFunction(bool local) { _usedInFunction = local; } + bool UsedInFunction() { return _usedInFunction; } + void GenericAddrSpace(bool genericAddrSpace) + { + _genericAddrSpace = genericAddrSpace; + } + bool GenericAddrSpace() { return _genericAddrSpace; } + void OldValueCheck(bool check) { _oldValueCheck = check; } + bool OldValueCheck() { return _oldValueCheck; } + void LocalRefValues(bool localRefValues) + { + _localRefValues = localRefValues; + } + bool LocalRefValues() { return _localRefValues; } + void MaxGroupSize(cl_uint maxGroupSize) { _maxGroupSize = maxGroupSize; } + cl_uint MaxGroupSize() { return _maxGroupSize; } + void CurrentGroupSize(cl_uint currentGroupSize) + { + if (MaxGroupSize() && MaxGroupSize() < currentGroupSize) + _currentGroupSize = MaxGroupSize(); + else + _currentGroupSize = currentGroupSize; + } + cl_uint CurrentGroupSize() { return _currentGroupSize; } + virtual cl_uint CurrentGroupNum(cl_uint threadCount) + { + if (threadCount == 0) return 0; + if (LocalMemory()) return 1; + return threadCount / CurrentGroupSize(); + } + cl_int Iterations() { return _iterations; } + std::string IterationsStr() + { + std::stringstream ss; + ss << _iterations; + return ss.str(); + } + private: - const TExplicitAtomicType _dataType; - const bool _useSVM; - HostDataType _startValue; - bool _localMemory; - bool _declaredInProgram; - bool _usedInFunction; - bool _genericAddrSpace; - bool _oldValueCheck; - bool _localRefValues; - cl_uint _maxGroupSize; - cl_uint _currentGroupSize; - cl_uint _passCount; - const cl_int _iterations; + const TExplicitAtomicType _dataType; + const bool _useSVM; + HostDataType _startValue; + bool _localMemory; + bool _declaredInProgram; + bool _usedInFunction; + bool _genericAddrSpace; + bool _oldValueCheck; + bool _localRefValues; + cl_uint _maxGroupSize; + cl_uint _currentGroupSize; + cl_uint _passCount; + const cl_int _iterations; }; -template -class CBasicTestMemOrderScope : public CBasicTest -{ +template +class CBasicTestMemOrderScope + : public CBasicTest { public: - using CBasicTest::LocalMemory; - using CBasicTest::MaxGroupSize; - using CBasicTest::CheckCapabilities; - CBasicTestMemOrderScope(TExplicitAtomicType dataType, bool useSVM = false) : CBasicTest(dataType, useSVM) - { - } - virtual std::string ProgramHeader(cl_uint maxNumDestItems) - { - std::string header; - if(gOldAPI) - { - std::string s = MemoryScope() == MEMORY_SCOPE_EMPTY ? "" : ",s"; - header += - "#define atomic_store_explicit(x,y,o"+s+") atomic_store(x,y)\n" - "#define atomic_load_explicit(x,o"+s+") atomic_load(x)\n" - "#define atomic_exchange_explicit(x,y,o"+s+") atomic_exchange(x,y)\n" - "#define atomic_compare_exchange_strong_explicit(x,y,z,os,of"+s+") atomic_compare_exchange_strong(x,y,z)\n" - "#define atomic_compare_exchange_weak_explicit(x,y,z,os,of"+s+") atomic_compare_exchange_weak(x,y,z)\n" - "#define atomic_fetch_add_explicit(x,y,o"+s+") atomic_fetch_add(x,y)\n" - "#define atomic_fetch_sub_explicit(x,y,o"+s+") atomic_fetch_sub(x,y)\n" - "#define atomic_fetch_or_explicit(x,y,o"+s+") atomic_fetch_or(x,y)\n" - "#define atomic_fetch_xor_explicit(x,y,o"+s+") atomic_fetch_xor(x,y)\n" - "#define atomic_fetch_and_explicit(x,y,o"+s+") atomic_fetch_and(x,y)\n" - "#define atomic_fetch_min_explicit(x,y,o"+s+") atomic_fetch_min(x,y)\n" - "#define atomic_fetch_max_explicit(x,y,o"+s+") atomic_fetch_max(x,y)\n" - "#define atomic_flag_test_and_set_explicit(x,o"+s+") atomic_flag_test_and_set(x)\n" - "#define atomic_flag_clear_explicit(x,o"+s+") atomic_flag_clear(x)\n"; - } - return header+CBasicTest::ProgramHeader(maxNumDestItems); - } - virtual std::string SingleTestName() - { - std::string testName = CBasicTest::SingleTestName(); - if(MemoryOrder() != MEMORY_ORDER_EMPTY) - { - testName += std::string(", ")+std::string(get_memory_order_type_name(MemoryOrder())).substr(sizeof("memory")); - } - if(MemoryScope() != MEMORY_SCOPE_EMPTY) - { - testName += std::string(", ")+std::string(get_memory_scope_type_name(MemoryScope())).substr(sizeof("memory")); - } - return testName; - } - virtual int ExecuteSingleTest(cl_device_id deviceID, cl_context context, cl_command_queue queue) - { - if(LocalMemory() && - MemoryScope() != MEMORY_SCOPE_EMPTY && - MemoryScope() != MEMORY_SCOPE_WORK_GROUP) //memory scope should only be used for global memory - return 0; - if(MemoryScope() == MEMORY_SCOPE_DEVICE) - MaxGroupSize(16); // increase number of groups by forcing smaller group size - else - MaxGroupSize(0); // group size limited by device capabilities - - if (CheckCapabilities(MemoryScope(), MemoryOrder()) == TEST_SKIPPED_ITSELF) - return 0; // skip test - not applicable - - return CBasicTest::ExecuteSingleTest(deviceID, context, queue); - } - virtual int ExecuteForEachParameterSet(cl_device_id deviceID, cl_context context, cl_command_queue queue) - { - // repeat test for each reasonable memory order/scope combination - std::vector memoryOrder; - std::vector memoryScope; - int error = 0; - - // For OpenCL-3.0 and later some orderings and scopes are optional, so here - // we query for the supported ones. - test_error_ret( - getSupportedMemoryOrdersAndScopes(deviceID, memoryOrder, memoryScope), - "getSupportedMemoryOrdersAndScopes failed\n", TEST_FAIL); - - for(unsigned oi = 0; oi < memoryOrder.size(); oi++) - { - for(unsigned si = 0; si < memoryScope.size(); si++) - { - if(memoryOrder[oi] == MEMORY_ORDER_EMPTY && memoryScope[si] != MEMORY_SCOPE_EMPTY) - continue; - MemoryOrder(memoryOrder[oi]); - MemoryScope(memoryScope[si]); - EXECUTE_TEST(error, (CBasicTest::ExecuteForEachParameterSet(deviceID, context, queue))); - } - } - return error; - } - void MemoryOrder(TExplicitMemoryOrderType memoryOrder) {_memoryOrder = memoryOrder;} - TExplicitMemoryOrderType MemoryOrder() {return _memoryOrder;} - std::string MemoryOrderStr() - { - if(MemoryOrder() != MEMORY_ORDER_EMPTY) - return std::string(", ")+get_memory_order_type_name(MemoryOrder()); - return ""; - } - void MemoryScope(TExplicitMemoryScopeType memoryScope) {_memoryScope = memoryScope;} - TExplicitMemoryScopeType MemoryScope() {return _memoryScope;} - std::string MemoryScopeStr() - { - if(MemoryScope() != MEMORY_SCOPE_EMPTY) - return std::string(", ")+get_memory_scope_type_name(MemoryScope()); - return ""; - } - std::string MemoryOrderScopeStr() - { - return MemoryOrderStr()+MemoryScopeStr(); - } - virtual cl_uint CurrentGroupNum(cl_uint threadCount) - { - if(MemoryScope() == MEMORY_SCOPE_WORK_GROUP) - return 1; - return CBasicTest::CurrentGroupNum(threadCount); - } - virtual cl_uint MaxHostThreads() - { - // block host threads execution for memory scope different than - // memory_scope_all_svm_devices - if (MemoryScope() == MEMORY_SCOPE_ALL_DEVICES - || MemoryScope() == MEMORY_SCOPE_ALL_SVM_DEVICES || gHost) - { - return CBasicTest::MaxHostThreads(); - } - else - { - return 0; - } - } + using CBasicTest::LocalMemory; + using CBasicTest::MaxGroupSize; + using CBasicTest::CheckCapabilities; + CBasicTestMemOrderScope(TExplicitAtomicType dataType, bool useSVM = false) + : CBasicTest(dataType, useSVM) + {} + virtual std::string ProgramHeader(cl_uint maxNumDestItems) + { + std::string header; + if (gOldAPI) + { + std::string s = MemoryScope() == MEMORY_SCOPE_EMPTY ? "" : ",s"; + header += "#define atomic_store_explicit(x,y,o" + s + + ") atomic_store(x,y)\n" + "#define atomic_load_explicit(x,o" + + s + + ") atomic_load(x)\n" + "#define atomic_exchange_explicit(x,y,o" + + s + + ") atomic_exchange(x,y)\n" + "#define atomic_compare_exchange_strong_explicit(x,y,z,os,of" + + s + + ") atomic_compare_exchange_strong(x,y,z)\n" + "#define atomic_compare_exchange_weak_explicit(x,y,z,os,of" + + s + + ") atomic_compare_exchange_weak(x,y,z)\n" + "#define atomic_fetch_add_explicit(x,y,o" + + s + + ") atomic_fetch_add(x,y)\n" + "#define atomic_fetch_sub_explicit(x,y,o" + + s + + ") atomic_fetch_sub(x,y)\n" + "#define atomic_fetch_or_explicit(x,y,o" + + s + + ") atomic_fetch_or(x,y)\n" + "#define atomic_fetch_xor_explicit(x,y,o" + + s + + ") atomic_fetch_xor(x,y)\n" + "#define atomic_fetch_and_explicit(x,y,o" + + s + + ") atomic_fetch_and(x,y)\n" + "#define atomic_fetch_min_explicit(x,y,o" + + s + + ") atomic_fetch_min(x,y)\n" + "#define atomic_fetch_max_explicit(x,y,o" + + s + + ") atomic_fetch_max(x,y)\n" + "#define atomic_flag_test_and_set_explicit(x,o" + + s + + ") atomic_flag_test_and_set(x)\n" + "#define atomic_flag_clear_explicit(x,o" + + s + ") atomic_flag_clear(x)\n"; + } + return header + + CBasicTest::ProgramHeader( + maxNumDestItems); + } + virtual std::string SingleTestName() + { + std::string testName = + CBasicTest::SingleTestName(); + if (MemoryOrder() != MEMORY_ORDER_EMPTY) + { + testName += std::string(", ") + + std::string(get_memory_order_type_name(MemoryOrder())) + .substr(sizeof("memory")); + } + if (MemoryScope() != MEMORY_SCOPE_EMPTY) + { + testName += std::string(", ") + + std::string(get_memory_scope_type_name(MemoryScope())) + .substr(sizeof("memory")); + } + return testName; + } + virtual int ExecuteSingleTest(cl_device_id deviceID, cl_context context, + cl_command_queue queue) + { + if (LocalMemory() && MemoryScope() != MEMORY_SCOPE_EMPTY + && MemoryScope() + != MEMORY_SCOPE_WORK_GROUP) // memory scope should only be used + // for global memory + return 0; + if (MemoryScope() == MEMORY_SCOPE_DEVICE) + MaxGroupSize( + 16); // increase number of groups by forcing smaller group size + else + MaxGroupSize(0); // group size limited by device capabilities + + if (CheckCapabilities(MemoryScope(), MemoryOrder()) + == TEST_SKIPPED_ITSELF) + return 0; // skip test - not applicable + + return CBasicTest::ExecuteSingleTest( + deviceID, context, queue); + } + virtual int ExecuteForEachParameterSet(cl_device_id deviceID, + cl_context context, + cl_command_queue queue) + { + // repeat test for each reasonable memory order/scope combination + std::vector memoryOrder; + std::vector memoryScope; + int error = 0; + + // For OpenCL-3.0 and later some orderings and scopes are optional, so + // here we query for the supported ones. + test_error_ret(getSupportedMemoryOrdersAndScopes(deviceID, memoryOrder, + memoryScope), + "getSupportedMemoryOrdersAndScopes failed\n", TEST_FAIL); + + for (unsigned oi = 0; oi < memoryOrder.size(); oi++) + { + for (unsigned si = 0; si < memoryScope.size(); si++) + { + if (memoryOrder[oi] == MEMORY_ORDER_EMPTY + && memoryScope[si] != MEMORY_SCOPE_EMPTY) + continue; + MemoryOrder(memoryOrder[oi]); + MemoryScope(memoryScope[si]); + EXECUTE_TEST( + error, + (CBasicTest:: + ExecuteForEachParameterSet(deviceID, context, queue))); + } + } + return error; + } + void MemoryOrder(TExplicitMemoryOrderType memoryOrder) + { + _memoryOrder = memoryOrder; + } + TExplicitMemoryOrderType MemoryOrder() { return _memoryOrder; } + std::string MemoryOrderStr() + { + if (MemoryOrder() != MEMORY_ORDER_EMPTY) + return std::string(", ") + + get_memory_order_type_name(MemoryOrder()); + return ""; + } + void MemoryScope(TExplicitMemoryScopeType memoryScope) + { + _memoryScope = memoryScope; + } + TExplicitMemoryScopeType MemoryScope() { return _memoryScope; } + std::string MemoryScopeStr() + { + if (MemoryScope() != MEMORY_SCOPE_EMPTY) + return std::string(", ") + + get_memory_scope_type_name(MemoryScope()); + return ""; + } + std::string MemoryOrderScopeStr() + { + return MemoryOrderStr() + MemoryScopeStr(); + } + virtual cl_uint CurrentGroupNum(cl_uint threadCount) + { + if (MemoryScope() == MEMORY_SCOPE_WORK_GROUP) return 1; + return CBasicTest::CurrentGroupNum( + threadCount); + } + virtual cl_uint MaxHostThreads() + { + // block host threads execution for memory scope different than + // memory_scope_all_svm_devices + if (MemoryScope() == MEMORY_SCOPE_ALL_DEVICES + || MemoryScope() == MEMORY_SCOPE_ALL_SVM_DEVICES || gHost) + { + return CBasicTest::MaxHostThreads(); + } + else + { + return 0; + } + } + private: - TExplicitMemoryOrderType _memoryOrder; - TExplicitMemoryScopeType _memoryScope; + TExplicitMemoryOrderType _memoryOrder; + TExplicitMemoryScopeType _memoryScope; }; -template -class CBasicTestMemOrder2Scope : public CBasicTestMemOrderScope -{ +template +class CBasicTestMemOrder2Scope + : public CBasicTestMemOrderScope { public: - using CBasicTestMemOrderScope::LocalMemory; - using CBasicTestMemOrderScope::MemoryOrder; - using CBasicTestMemOrderScope::MemoryScope; - using CBasicTestMemOrderScope::MemoryOrderStr; - using CBasicTestMemOrderScope::MemoryScopeStr; - using CBasicTest::CheckCapabilities; - - CBasicTestMemOrder2Scope(TExplicitAtomicType dataType, bool useSVM = false) : CBasicTestMemOrderScope(dataType, useSVM) - { - } - virtual std::string SingleTestName() - { - std::string testName = CBasicTest::SingleTestName(); - if(MemoryOrder() != MEMORY_ORDER_EMPTY) - testName += std::string(", ")+std::string(get_memory_order_type_name(MemoryOrder())).substr(sizeof("memory")); - if(MemoryOrder2() != MEMORY_ORDER_EMPTY) - testName += std::string(", ")+std::string(get_memory_order_type_name(MemoryOrder2())).substr(sizeof("memory")); - if(MemoryScope() != MEMORY_SCOPE_EMPTY) - testName += std::string(", ")+std::string(get_memory_scope_type_name(MemoryScope())).substr(sizeof("memory")); - return testName; - } - virtual int ExecuteForEachParameterSet(cl_device_id deviceID, cl_context context, cl_command_queue queue) - { - // repeat test for each reasonable memory order/scope combination - std::vector memoryOrder; - std::vector memoryScope; - int error = 0; - - // For OpenCL-3.0 and later some orderings and scopes are optional, so here - // we query for the supported ones. - test_error_ret( - getSupportedMemoryOrdersAndScopes(deviceID, memoryOrder, memoryScope), - "getSupportedMemoryOrdersAndScopes failed\n", TEST_FAIL); - - for(unsigned oi = 0; oi < memoryOrder.size(); oi++) - { - for(unsigned o2i = 0; o2i < memoryOrder.size(); o2i++) - { - for(unsigned si = 0; si < memoryScope.size(); si++) + using CBasicTestMemOrderScope::LocalMemory; + using CBasicTestMemOrderScope::MemoryOrder; + using CBasicTestMemOrderScope::MemoryScope; + using CBasicTestMemOrderScope::MemoryOrderStr; + using CBasicTestMemOrderScope::MemoryScopeStr; + using CBasicTest::CheckCapabilities; + + CBasicTestMemOrder2Scope(TExplicitAtomicType dataType, bool useSVM = false) + : CBasicTestMemOrderScope(dataType, + useSVM) + {} + virtual std::string SingleTestName() + { + std::string testName = + CBasicTest::SingleTestName(); + if (MemoryOrder() != MEMORY_ORDER_EMPTY) + testName += std::string(", ") + + std::string(get_memory_order_type_name(MemoryOrder())) + .substr(sizeof("memory")); + if (MemoryOrder2() != MEMORY_ORDER_EMPTY) + testName += std::string(", ") + + std::string(get_memory_order_type_name(MemoryOrder2())) + .substr(sizeof("memory")); + if (MemoryScope() != MEMORY_SCOPE_EMPTY) + testName += std::string(", ") + + std::string(get_memory_scope_type_name(MemoryScope())) + .substr(sizeof("memory")); + return testName; + } + virtual int ExecuteForEachParameterSet(cl_device_id deviceID, + cl_context context, + cl_command_queue queue) + { + // repeat test for each reasonable memory order/scope combination + std::vector memoryOrder; + std::vector memoryScope; + int error = 0; + + // For OpenCL-3.0 and later some orderings and scopes are optional, so + // here we query for the supported ones. + test_error_ret(getSupportedMemoryOrdersAndScopes(deviceID, memoryOrder, + memoryScope), + "getSupportedMemoryOrdersAndScopes failed\n", TEST_FAIL); + + for (unsigned oi = 0; oi < memoryOrder.size(); oi++) { - if((memoryOrder[oi] == MEMORY_ORDER_EMPTY || memoryOrder[o2i] == MEMORY_ORDER_EMPTY) - && memoryOrder[oi] != memoryOrder[o2i]) - continue; // both memory order arguments must be set (or none) - if((memoryOrder[oi] == MEMORY_ORDER_EMPTY || memoryOrder[o2i] == MEMORY_ORDER_EMPTY) - && memoryScope[si] != MEMORY_SCOPE_EMPTY) - continue; // memory scope without memory order is not allowed - MemoryOrder(memoryOrder[oi]); - MemoryOrder2(memoryOrder[o2i]); - MemoryScope(memoryScope[si]); - - if (CheckCapabilities(MemoryScope(), MemoryOrder()) - == TEST_SKIPPED_ITSELF) - continue; // skip test - not applicable - - if (CheckCapabilities(MemoryScope(), MemoryOrder2()) - == TEST_SKIPPED_ITSELF) - continue; // skip test - not applicable - - EXECUTE_TEST(error, (CBasicTest::ExecuteForEachParameterSet(deviceID, context, queue))); + for (unsigned o2i = 0; o2i < memoryOrder.size(); o2i++) + { + for (unsigned si = 0; si < memoryScope.size(); si++) + { + if ((memoryOrder[oi] == MEMORY_ORDER_EMPTY + || memoryOrder[o2i] == MEMORY_ORDER_EMPTY) + && memoryOrder[oi] != memoryOrder[o2i]) + continue; // both memory order arguments must be set (or + // none) + if ((memoryOrder[oi] == MEMORY_ORDER_EMPTY + || memoryOrder[o2i] == MEMORY_ORDER_EMPTY) + && memoryScope[si] != MEMORY_SCOPE_EMPTY) + continue; // memory scope without memory order is not + // allowed + MemoryOrder(memoryOrder[oi]); + MemoryOrder2(memoryOrder[o2i]); + MemoryScope(memoryScope[si]); + + if (CheckCapabilities(MemoryScope(), MemoryOrder()) + == TEST_SKIPPED_ITSELF) + continue; // skip test - not applicable + + if (CheckCapabilities(MemoryScope(), MemoryOrder2()) + == TEST_SKIPPED_ITSELF) + continue; // skip test - not applicable + + EXECUTE_TEST(error, + (CBasicTest:: + ExecuteForEachParameterSet( + deviceID, context, queue))); + } + } } - } - } - return error; - } - void MemoryOrder2(TExplicitMemoryOrderType memoryOrderFail) {_memoryOrder2 = memoryOrderFail;} - TExplicitMemoryOrderType MemoryOrder2() {return _memoryOrder2;} - std::string MemoryOrderFailStr() - { - if(MemoryOrder2() != MEMORY_ORDER_EMPTY) - return std::string(", ")+get_memory_order_type_name(MemoryOrder2()); - return ""; - } - std::string MemoryOrderScope() - { - return MemoryOrderStr()+MemoryOrderFailStr()+MemoryScopeStr(); - } + return error; + } + void MemoryOrder2(TExplicitMemoryOrderType memoryOrderFail) + { + _memoryOrder2 = memoryOrderFail; + } + TExplicitMemoryOrderType MemoryOrder2() { return _memoryOrder2; } + std::string MemoryOrderFailStr() + { + if (MemoryOrder2() != MEMORY_ORDER_EMPTY) + return std::string(", ") + + get_memory_order_type_name(MemoryOrder2()); + return ""; + } + std::string MemoryOrderScope() + { + return MemoryOrderStr() + MemoryOrderFailStr() + MemoryScopeStr(); + } + private: - TExplicitMemoryOrderType _memoryOrder2; + TExplicitMemoryOrderType _memoryOrder2; }; -template -std::string CBasicTest::PragmaHeader(cl_device_id deviceID) +template +std::string +CBasicTest::PragmaHeader(cl_device_id deviceID) { - std::string pragma; - - if(gOldAPI) - { - pragma += "#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable\n"; - pragma += "#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable\n"; - pragma += "#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n"; - pragma += "#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable\n"; - } - // Create the pragma lines for this kernel - if(DataType().Size(deviceID) == 8) - { - pragma += "#pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable\n"; - pragma += "#pragma OPENCL EXTENSION cl_khr_int64_extended_atomics : enable\n"; - } - if(_dataType == TYPE_ATOMIC_DOUBLE) - pragma += "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"; - return pragma; + std::string pragma; + + if (gOldAPI) + { + pragma += "#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : " + "enable\n"; + pragma += "#pragma OPENCL EXTENSION " + "cl_khr_local_int32_extended_atomics : enable\n"; + pragma += "#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : " + "enable\n"; + pragma += "#pragma OPENCL EXTENSION " + "cl_khr_global_int32_extended_atomics : enable\n"; + } + // Create the pragma lines for this kernel + if (DataType().Size(deviceID) == 8) + { + pragma += + "#pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable\n"; + pragma += + "#pragma OPENCL EXTENSION cl_khr_int64_extended_atomics : enable\n"; + } + if (_dataType == TYPE_ATOMIC_DOUBLE) + pragma += "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"; + return pragma; } -template -std::string CBasicTest::ProgramHeader(cl_uint maxNumDestItems) +template +std::string +CBasicTest::ProgramHeader(cl_uint maxNumDestItems) { - // Create the program header - std::string header; - std::string aTypeName = DataType().AtomicTypeName(); - std::string cTypeName = DataType().RegularTypeName(); - std::string argListForKernel; - std::string argListForFunction; - std::string argListNoTypes; - std::string functionPrototype; - std::string addressSpace = LocalMemory() ? "__local " : "__global "; - - if(gOldAPI) - { - header += std::string("#define ")+aTypeName+" "+cTypeName+"\n" - "#define atomic_store(x,y) (*(x) = y)\n" - "#define atomic_load(x) (*(x))\n" - "#define ATOMIC_VAR_INIT(x) (x)\n" - "#define ATOMIC_FLAG_INIT 0\n" - "#define atomic_init(x,y) atomic_store(x,y)\n"; - if(aTypeName == "atomic_float") - header += "#define atomic_exchange(x,y) atomic_xchg(x,y)\n"; - else if(aTypeName == "atomic_double") - header += "double atomic_exchange(volatile "+addressSpace+"atomic_double *x, double y)\n" - "{\n" - " long tmp = *(long*)&y, res;\n" - " volatile "+addressSpace+"long *tmpA = (volatile "+addressSpace+"long)x;\n" - " res = atom_xchg(tmpA,tmp);\n" - " return *(double*)&res;\n" - "}\n"; - else - header += "#define atomic_exchange(x,y) atom_xchg(x,y)\n"; - if(aTypeName != "atomic_float" && aTypeName != "atomic_double") - header += - "bool atomic_compare_exchange_strong(volatile "+addressSpace+" "+aTypeName+" *a, "+cTypeName+" *expected, "+cTypeName+" desired)\n" - "{\n" - " "+cTypeName+" old = atom_cmpxchg(a, *expected, desired);\n" - " if(old == *expected)\n" - " return true;\n" - " *expected = old;\n" - " return false;\n" - "}\n" - "#define atomic_compare_exchange_weak atomic_compare_exchange_strong\n"; - header += - "#define atomic_fetch_add(x,y) atom_add(x,y)\n" - "#define atomic_fetch_sub(x,y) atom_sub(x,y)\n" - "#define atomic_fetch_or(x,y) atom_or(x,y)\n" - "#define atomic_fetch_xor(x,y) atom_xor(x,y)\n" - "#define atomic_fetch_and(x,y) atom_and(x,y)\n" - "#define atomic_fetch_min(x,y) atom_min(x,y)\n" - "#define atomic_fetch_max(x,y) atom_max(x,y)\n" - "#define atomic_flag_test_and_set(x) atomic_exchange(x,1)\n" - "#define atomic_flag_clear(x) atomic_store(x,0)\n" - "\n"; - } - if(!LocalMemory() && DeclaredInProgram()) - { - // additional atomic variable for results copying (last thread will do this) - header += "__global volatile atomic_uint finishedThreads = ATOMIC_VAR_INIT(0);\n"; - // atomic variables declared in program scope - test data - std::stringstream ss; - ss << maxNumDestItems; - header += std::string("__global volatile ")+aTypeName+" destMemory["+ss.str()+"] = {\n"; - ss.str(""); - ss << _startValue; - for(cl_uint i = 0; i < maxNumDestItems; i++) - { - if(aTypeName == "atomic_flag") - header += " ATOMIC_FLAG_INIT"; - else - header += " ATOMIC_VAR_INIT("+ss.str()+")"; - if(i+1 < maxNumDestItems) - header += ","; - header += "\n"; - } - header+= - "};\n" - "\n"; - } - return header; + // Create the program header + std::string header; + std::string aTypeName = DataType().AtomicTypeName(); + std::string cTypeName = DataType().RegularTypeName(); + std::string argListForKernel; + std::string argListForFunction; + std::string argListNoTypes; + std::string functionPrototype; + std::string addressSpace = LocalMemory() ? "__local " : "__global "; + + if (gOldAPI) + { + header += std::string("#define ") + aTypeName + " " + cTypeName + + "\n" + "#define atomic_store(x,y) (*(x) " + "= y)\n" + "#define atomic_load(x) " + "(*(x))\n" + "#define ATOMIC_VAR_INIT(x) (x)\n" + "#define ATOMIC_FLAG_INIT 0\n" + "#define atomic_init(x,y) " + "atomic_store(x,y)\n"; + if (aTypeName == "atomic_float") + header += "#define atomic_exchange(x,y) " + " atomic_xchg(x,y)\n"; + else if (aTypeName == "atomic_double") + header += "double atomic_exchange(volatile " + addressSpace + + "atomic_double *x, double y)\n" + "{\n" + " long tmp = *(long*)&y, res;\n" + " volatile " + + addressSpace + "long *tmpA = (volatile " + addressSpace + + "long)x;\n" + " res = atom_xchg(tmpA,tmp);\n" + " return *(double*)&res;\n" + "}\n"; + else + header += "#define atomic_exchange(x,y) " + " atom_xchg(x,y)\n"; + if (aTypeName != "atomic_float" && aTypeName != "atomic_double") + header += "bool atomic_compare_exchange_strong(volatile " + + addressSpace + " " + aTypeName + " *a, " + cTypeName + + " *expected, " + cTypeName + + " desired)\n" + "{\n" + " " + + cTypeName + + " old = atom_cmpxchg(a, *expected, desired);\n" + " if(old == *expected)\n" + " return true;\n" + " *expected = old;\n" + " return false;\n" + "}\n" + "#define atomic_compare_exchange_weak " + "atomic_compare_exchange_strong\n"; + header += "#define atomic_fetch_add(x,y) " + "atom_add(x,y)\n" + "#define atomic_fetch_sub(x,y) " + "atom_sub(x,y)\n" + "#define atomic_fetch_or(x,y) " + "atom_or(x,y)\n" + "#define atomic_fetch_xor(x,y) " + "atom_xor(x,y)\n" + "#define atomic_fetch_and(x,y) " + "atom_and(x,y)\n" + "#define atomic_fetch_min(x,y) " + "atom_min(x,y)\n" + "#define atomic_fetch_max(x,y) " + "atom_max(x,y)\n" + "#define atomic_flag_test_and_set(x) " + "atomic_exchange(x,1)\n" + "#define atomic_flag_clear(x) " + "atomic_store(x,0)\n" + "\n"; + } + if (!LocalMemory() && DeclaredInProgram()) + { + // additional atomic variable for results copying (last thread will do + // this) + header += "__global volatile atomic_uint finishedThreads = " + "ATOMIC_VAR_INIT(0);\n"; + // atomic variables declared in program scope - test data + std::stringstream ss; + ss << maxNumDestItems; + header += std::string("__global volatile ") + aTypeName + " destMemory[" + + ss.str() + "] = {\n"; + ss.str(""); + ss << _startValue; + for (cl_uint i = 0; i < maxNumDestItems; i++) + { + if (aTypeName == "atomic_flag") + header += " ATOMIC_FLAG_INIT"; + else + header += " ATOMIC_VAR_INIT(" + ss.str() + ")"; + if (i + 1 < maxNumDestItems) header += ","; + header += "\n"; + } + header += "};\n" + "\n"; + } + return header; } -template +template std::string CBasicTest::FunctionCode() { - if(!UsedInFunction()) - return ""; - std::string addressSpace = LocalMemory() ? "__local " : "__global "; - std::string code = "void test_atomic_function(uint tid, uint threadCount, uint numDestItems, volatile "; - if(!GenericAddrSpace()) - code += addressSpace; - code += std::string(DataType().AtomicTypeName())+" *destMemory, __global "+DataType().RegularTypeName()+ - " *oldValues"; - if(LocalRefValues()) - code += std::string(", __local ")+DataType().RegularTypeName()+" *localValues"; - code += ")\n" - "{\n"; - code += ProgramCore(); - code += "}\n" - "\n"; - return code; + if (!UsedInFunction()) return ""; + std::string addressSpace = LocalMemory() ? "__local " : "__global "; + std::string code = "void test_atomic_function(uint tid, uint threadCount, " + "uint numDestItems, volatile "; + if (!GenericAddrSpace()) code += addressSpace; + code += std::string(DataType().AtomicTypeName()) + " *destMemory, __global " + + DataType().RegularTypeName() + " *oldValues"; + if (LocalRefValues()) + code += std::string(", __local ") + DataType().RegularTypeName() + + " *localValues"; + code += ")\n" + "{\n"; + code += ProgramCore(); + code += "}\n" + "\n"; + return code; } -template -std::string CBasicTest::KernelCode(cl_uint maxNumDestItems) +template +std::string +CBasicTest::KernelCode(cl_uint maxNumDestItems) { - std::string aTypeName = DataType().AtomicTypeName(); - std::string cTypeName = DataType().RegularTypeName(); - std::string addressSpace = LocalMemory() ? "__local " : "__global "; - std::string code = "__kernel void test_atomic_kernel(uint threadCount, uint numDestItems, "; - - // prepare list of arguments for kernel - if(LocalMemory()) - { - code += std::string("__global ")+cTypeName+" *finalDest, __global "+cTypeName+" *oldValues," - " volatile "+addressSpace+aTypeName+" *"+(DeclaredInProgram() ? "notUsed" : "")+"destMemory"; - } - else - { - code += "volatile "+addressSpace+(DeclaredInProgram() ? (cTypeName+" *finalDest") : (aTypeName+" *destMemory"))+ - ", __global "+cTypeName+" *oldValues"; - } - if(LocalRefValues()) - code += std::string(", __local ")+cTypeName+" *localValues"; - code += ")\n" - "{\n"; - if(LocalMemory() && DeclaredInProgram()) - { - // local atomics declared in kernel scope - std::stringstream ss; - ss << maxNumDestItems; - code += std::string(" __local volatile ")+aTypeName+" destMemory["+ss.str()+"];\n"; - } - code += " uint tid = get_global_id(0);\n" - "\n"; - if(LocalMemory()) - { - // memory_order_relaxed is sufficient for these initialization operations - // as the barrier below will act as a fence, providing an order to the - // operations. memory_scope_work_group is sufficient as local memory is - // only visible within the work-group. - code += R"( + std::string aTypeName = DataType().AtomicTypeName(); + std::string cTypeName = DataType().RegularTypeName(); + std::string addressSpace = LocalMemory() ? "__local " : "__global "; + std::string code = "__kernel void test_atomic_kernel(uint threadCount, " + "uint numDestItems, "; + + // prepare list of arguments for kernel + if (LocalMemory()) + { + code += std::string("__global ") + cTypeName + " *finalDest, __global " + + cTypeName + + " *oldValues," + " volatile " + + addressSpace + aTypeName + " *" + + (DeclaredInProgram() ? "notUsed" : "") + "destMemory"; + } + else + { + code += "volatile " + addressSpace + + (DeclaredInProgram() ? (cTypeName + " *finalDest") + : (aTypeName + " *destMemory")) + + ", __global " + cTypeName + " *oldValues"; + } + if (LocalRefValues()) + code += std::string(", __local ") + cTypeName + " *localValues"; + code += ")\n" + "{\n"; + if (LocalMemory() && DeclaredInProgram()) + { + // local atomics declared in kernel scope + std::stringstream ss; + ss << maxNumDestItems; + code += std::string(" __local volatile ") + aTypeName + " destMemory[" + + ss.str() + "];\n"; + } + code += " uint tid = get_global_id(0);\n" + "\n"; + if (LocalMemory()) + { + // memory_order_relaxed is sufficient for these initialization + // operations as the barrier below will act as a fence, providing an + // order to the operations. memory_scope_work_group is sufficient as + // local memory is only visible within the work-group. + code += R"( // initialize atomics not reachable from host (first thread // is doing this, other threads are waiting on barrier) if(get_local_id(0) == 0) for(uint dstItemIdx = 0; dstItemIdx < numDestItems; dstItemIdx++) {)"; - if (aTypeName == "atomic_flag") - { - code += R"( + if (aTypeName == "atomic_flag") + { + code += R"( if(finalDest[dstItemIdx]) atomic_flag_test_and_set_explicit(destMemory+dstItemIdx, memory_order_relaxed, @@ -823,512 +976,565 @@ std::string CBasicTest::KernelCode(cl_uint maxNumD atomic_flag_clear_explicit(destMemory+dstItemIdx, memory_order_relaxed, memory_scope_work_group);)"; - } - else - { - code += R"( + } + else + { + code += R"( atomic_store_explicit(destMemory+dstItemIdx, finalDest[dstItemIdx], memory_order_relaxed, memory_scope_work_group);)"; + } + code += " }\n" + " barrier(CLK_LOCAL_MEM_FENCE);\n" + "\n"; } - code += - " }\n" - " barrier(CLK_LOCAL_MEM_FENCE);\n" - "\n"; - } - if (LocalRefValues()) - { - code += - " // Copy input reference values into local memory\n"; - if (NumNonAtomicVariablesPerThread() == 1) - code += " localValues[get_local_id(0)] = oldValues[tid];\n"; - else + if (LocalRefValues()) { - std::stringstream ss; - ss << NumNonAtomicVariablesPerThread(); - code += - " for(uint rfId = 0; rfId < " + ss.str() + "; rfId++)\n" - " localValues[get_local_id(0)*" + ss.str() + "+rfId] = oldValues[tid*" + ss.str() + "+rfId];\n"; - } - code += - " barrier(CLK_LOCAL_MEM_FENCE);\n" - "\n"; - } - if (UsedInFunction()) - code += std::string(" test_atomic_function(tid, threadCount, numDestItems, destMemory, oldValues")+ - (LocalRefValues() ? ", localValues" : "")+");\n"; - else - code += ProgramCore(); - code += "\n"; - if (LocalRefValues()) - { - code += - " // Copy local reference values into output array\n" - " barrier(CLK_LOCAL_MEM_FENCE);\n"; - if (NumNonAtomicVariablesPerThread() == 1) - code += " oldValues[tid] = localValues[get_local_id(0)];\n"; + code += " // Copy input reference values into local memory\n"; + if (NumNonAtomicVariablesPerThread() == 1) + code += " localValues[get_local_id(0)] = oldValues[tid];\n"; + else + { + std::stringstream ss; + ss << NumNonAtomicVariablesPerThread(); + code += " for(uint rfId = 0; rfId < " + ss.str() + + "; rfId++)\n" + " localValues[get_local_id(0)*" + + ss.str() + "+rfId] = oldValues[tid*" + ss.str() + "+rfId];\n"; + } + code += " barrier(CLK_LOCAL_MEM_FENCE);\n" + "\n"; + } + if (UsedInFunction()) + code += std::string(" test_atomic_function(tid, threadCount, " + "numDestItems, destMemory, oldValues") + + (LocalRefValues() ? ", localValues" : "") + ");\n"; else + code += ProgramCore(); + code += "\n"; + if (LocalRefValues()) { - std::stringstream ss; - ss << NumNonAtomicVariablesPerThread(); - code += - " for(uint rfId = 0; rfId < " + ss.str() + "; rfId++)\n" - " oldValues[tid*" + ss.str() + "+rfId] = localValues[get_local_id(0)*" + ss.str() + "+rfId];\n"; + code += " // Copy local reference values into output array\n" + " barrier(CLK_LOCAL_MEM_FENCE);\n"; + if (NumNonAtomicVariablesPerThread() == 1) + code += " oldValues[tid] = localValues[get_local_id(0)];\n"; + else + { + std::stringstream ss; + ss << NumNonAtomicVariablesPerThread(); + code += " for(uint rfId = 0; rfId < " + ss.str() + + "; rfId++)\n" + " oldValues[tid*" + + ss.str() + "+rfId] = localValues[get_local_id(0)*" + ss.str() + + "+rfId];\n"; + } + code += "\n"; } - code += "\n"; - } - if(LocalMemory() || DeclaredInProgram()) - { - code += " // Copy final values to host reachable buffer\n"; - if(LocalMemory()) - code += - " barrier(CLK_LOCAL_MEM_FENCE);\n" - " if(get_local_id(0) == 0) // first thread in workgroup\n"; - else - // global atomics declared in program scope - code += R"( + if (LocalMemory() || DeclaredInProgram()) + { + code += " // Copy final values to host reachable buffer\n"; + if (LocalMemory()) + code += " barrier(CLK_LOCAL_MEM_FENCE);\n" + " if(get_local_id(0) == 0) // first thread in workgroup\n"; + else + // global atomics declared in program scope + code += R"( if(atomic_fetch_add_explicit(&finishedThreads, 1u, memory_order_relaxed, memory_scope_work_group) == get_global_size(0)-1) // last finished thread )"; - code += - " for(uint dstItemIdx = 0; dstItemIdx < numDestItems; dstItemIdx++)\n"; - if(aTypeName == "atomic_flag") - { - code += R"( + code += " for(uint dstItemIdx = 0; dstItemIdx < numDestItems; " + "dstItemIdx++)\n"; + if (aTypeName == "atomic_flag") + { + code += R"( finalDest[dstItemIdx] = atomic_flag_test_and_set_explicit(destMemory+dstItemIdx, memory_order_relaxed, memory_scope_work_group);)"; - } - else - { - code += R"( + } + else + { + code += R"( finalDest[dstItemIdx] = atomic_load_explicit(destMemory+dstItemIdx, memory_order_relaxed, memory_scope_work_group);)"; + } } - } - code += "}\n" - "\n"; - return code; + code += "}\n" + "\n"; + return code; } template -int CBasicTest::ExecuteSingleTest(cl_device_id deviceID, cl_context context, cl_command_queue queue) +int CBasicTest::ExecuteSingleTest( + cl_device_id deviceID, cl_context context, cl_command_queue queue) { - int error; - clProgramWrapper program; - clKernelWrapper kernel; - size_t threadNum[1]; - clMemWrapper streams[2]; - std::vector destItems; - HostAtomicType *svmAtomicBuffer = 0; - std::vector refValues, startRefValues; - HostDataType *svmDataBuffer = 0; - cl_uint deviceThreadCount, hostThreadCount, threadCount; - size_t groupSize = 0; - std::string programSource; - const char *programLine; - MTdata d; - size_t typeSize = DataType().Size(deviceID); - - deviceThreadCount = _maxDeviceThreads; - hostThreadCount = MaxHostThreads(); - threadCount = deviceThreadCount+hostThreadCount; - - //log_info("\t%s %s%s...\n", local ? "local" : "global", DataType().AtomicTypeName(), memoryOrderScope.c_str()); - log_info("\t%s...\n", SingleTestName().c_str()); - - if(!LocalMemory() && DeclaredInProgram() && gNoGlobalVariables) // no support for program scope global variables - { - log_info("\t\tTest disabled\n"); - return 0; - } - if(UsedInFunction() && GenericAddrSpace() && gNoGenericAddressSpace) - { - log_info("\t\tTest disabled\n"); - return 0; - } - - // set up work sizes based on device capabilities and test configuration - error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(groupSize), &groupSize, NULL); - test_error(error, "Unable to obtain max work group size for device"); - CurrentGroupSize((cl_uint)groupSize); - if(CurrentGroupSize() > deviceThreadCount) - CurrentGroupSize(deviceThreadCount); - if(CurrentGroupNum(deviceThreadCount) == 1 || gOldAPI) - deviceThreadCount = CurrentGroupSize()*CurrentGroupNum(deviceThreadCount); - threadCount = deviceThreadCount+hostThreadCount; - - // If we're given a num_results function, we need to determine how many result objects we need. - // This is the first assessment for current maximum number of threads (exact thread count is not known here) - // - needed for program source code generation (arrays of atomics declared in program) - cl_uint numDestItems = NumResults(threadCount, deviceID); - - if(deviceThreadCount > 0) - { - // This loop iteratively reduces the workgroup size by 2 and then - // re-generates the kernel with the reduced - // workgroup size until we find a size which is admissible for the kernel - // being run or reduce the wg size - // to the trivial case of 1 (which was separately verified to be accurate - // for the kernel being run) - - while ((CurrentGroupSize() > 1)) - { - // Re-generate the kernel code with the current group size - if (kernel) clReleaseKernel(kernel); - if (program) clReleaseProgram(program); - programSource = PragmaHeader(deviceID) + ProgramHeader(numDestItems) - + FunctionCode() + KernelCode(numDestItems); - programLine = programSource.c_str(); - if (create_single_kernel_helper_with_build_options( - context, &program, &kernel, 1, &programLine, - "test_atomic_kernel", gOldAPI ? "" : nullptr)) - { - return -1; - } - // Get work group size for the new kernel - error = clGetKernelWorkGroupInfo(kernel, deviceID, - CL_KERNEL_WORK_GROUP_SIZE, - sizeof(groupSize), &groupSize, NULL); - test_error(error, - "Unable to obtain max work group size for device and " - "kernel combo"); - - if (LocalMemory()) - { - cl_ulong usedLocalMemory; - cl_ulong totalLocalMemory; - cl_uint maxWorkGroupSize; - - error = clGetKernelWorkGroupInfo( - kernel, deviceID, CL_KERNEL_LOCAL_MEM_SIZE, - sizeof(usedLocalMemory), &usedLocalMemory, NULL); - test_error(error, "clGetKernelWorkGroupInfo failed"); - - error = clGetDeviceInfo(deviceID, CL_DEVICE_LOCAL_MEM_SIZE, - sizeof(totalLocalMemory), - &totalLocalMemory, NULL); - test_error(error, "clGetDeviceInfo failed"); - - // We know that each work-group is going to use typeSize * - // deviceThreadCount bytes of local memory - // so pick the maximum value for deviceThreadCount that uses all - // the local memory. - maxWorkGroupSize = - ((totalLocalMemory - usedLocalMemory) / typeSize); - - if (maxWorkGroupSize < groupSize) groupSize = maxWorkGroupSize; - } - if (CurrentGroupSize() <= groupSize) - break; - else - CurrentGroupSize(CurrentGroupSize() / 2); - } - if(CurrentGroupSize() > deviceThreadCount) - CurrentGroupSize(deviceThreadCount); - if(CurrentGroupNum(deviceThreadCount) == 1 || gOldAPI) - deviceThreadCount = CurrentGroupSize()*CurrentGroupNum(deviceThreadCount); - threadCount = deviceThreadCount+hostThreadCount; - } - if (gDebug) - { - log_info("Program source:\n"); - log_info("%s\n", programLine); - } - if(deviceThreadCount > 0) - log_info("\t\t(thread count %u, group size %u)\n", deviceThreadCount, CurrentGroupSize()); - if(hostThreadCount > 0) - log_info("\t\t(host threads %u)\n", hostThreadCount); - - refValues.resize(threadCount*NumNonAtomicVariablesPerThread()); - - // Generate ref data if we have a ref generator provided - d = init_genrand(gRandomSeed); - startRefValues.resize(threadCount*NumNonAtomicVariablesPerThread()); - if(GenerateRefs(threadCount, &startRefValues[0], d)) - { - //copy ref values for host threads - memcpy(&refValues[0], &startRefValues[0], sizeof(HostDataType)*threadCount*NumNonAtomicVariablesPerThread()); - } - else - { - startRefValues.resize(0); - } - free_mtdata(d); - d = NULL; - - // If we're given a num_results function, we need to determine how many result objects we need. If - // we don't have it, we assume it's just 1 - // This is final value (exact thread count is known in this place) - numDestItems = NumResults(threadCount, deviceID); - - destItems.resize(numDestItems); - for(cl_uint i = 0; i < numDestItems; i++) - destItems[i] = _startValue; - - // Create main buffer with atomic variables (array size dependent on particular test) - if(UseSVM()) - { - if(gUseHostPtr) - svmAtomicBuffer = (HostAtomicType*)malloc(typeSize * numDestItems); - else - svmAtomicBuffer = (HostAtomicType*)clSVMAlloc(context, CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_ATOMICS, typeSize * numDestItems, 0); - if(!svmAtomicBuffer) - { - log_error("ERROR: clSVMAlloc failed!\n"); - return -1; - } - memcpy(svmAtomicBuffer, &destItems[0], typeSize * numDestItems); - streams[0] = clCreateBuffer(context, CL_MEM_USE_HOST_PTR, - typeSize * numDestItems, svmAtomicBuffer, NULL); - } - else - { - streams[0] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, - typeSize * numDestItems, &destItems[0], NULL); - } - if (!streams[0]) - { - log_error("ERROR: Creating output array failed!\n"); - return -1; - } - // Create buffer for per-thread input/output data - if(UseSVM()) - { - if(gUseHostPtr) - svmDataBuffer = (HostDataType*)malloc(typeSize*threadCount*NumNonAtomicVariablesPerThread()); - else - svmDataBuffer = (HostDataType*)clSVMAlloc(context, CL_MEM_SVM_FINE_GRAIN_BUFFER | (SVMDataBufferAllSVMConsistent() ? CL_MEM_SVM_ATOMICS : 0), typeSize*threadCount*NumNonAtomicVariablesPerThread(), 0); - if(!svmDataBuffer) - { - log_error("ERROR: clSVMAlloc failed!\n"); - return -1; - } - if(startRefValues.size()) - memcpy(svmDataBuffer, &startRefValues[0], typeSize*threadCount*NumNonAtomicVariablesPerThread()); - streams[1] = clCreateBuffer(context, CL_MEM_USE_HOST_PTR, - typeSize * threadCount - * NumNonAtomicVariablesPerThread(), - svmDataBuffer, NULL); - } - else - { - streams[1] = clCreateBuffer( - context, - ((startRefValues.size() ? CL_MEM_COPY_HOST_PTR : CL_MEM_READ_WRITE)), - typeSize * threadCount * NumNonAtomicVariablesPerThread(), - startRefValues.size() ? &startRefValues[0] : 0, NULL); - } - if (!streams[1]) - { - log_error("ERROR: Creating reference array failed!\n"); - return -1; - } - if(deviceThreadCount > 0) - { - cl_uint argInd = 0; - /* Set the arguments */ - error = clSetKernelArg(kernel, argInd++, sizeof(threadCount), &threadCount); - test_error(error, "Unable to set kernel argument"); - error = clSetKernelArg(kernel, argInd++, sizeof(numDestItems), &numDestItems); - test_error(error, "Unable to set indexed kernel argument"); - error = clSetKernelArg(kernel, argInd++, sizeof(streams[0]), &streams[0]); - test_error(error, "Unable to set indexed kernel arguments"); - error = clSetKernelArg(kernel, argInd++, sizeof(streams[1]), &streams[1]); - test_error(error, "Unable to set indexed kernel arguments"); - if(LocalMemory()) - { - error = clSetKernelArg(kernel, argInd++, typeSize * numDestItems, NULL); - test_error(error, "Unable to set indexed local kernel argument"); - } - if(LocalRefValues()) - { - error = clSetKernelArg(kernel, argInd++, LocalRefValues() ? typeSize*CurrentGroupSize()*NumNonAtomicVariablesPerThread() : 1, NULL); - test_error(error, "Unable to set indexed kernel argument"); - } - } - /* Configure host threads */ - std::vector hostThreadContexts(hostThreadCount); - for(unsigned int t = 0; t < hostThreadCount; t++) - { - hostThreadContexts[t].test = this; - hostThreadContexts[t].tid = deviceThreadCount+t; - hostThreadContexts[t].threadCount = threadCount; - hostThreadContexts[t].destMemory = UseSVM() ? svmAtomicBuffer : &destItems[0]; - hostThreadContexts[t].oldValues = UseSVM() ? svmDataBuffer : &refValues[0]; - } - - if(deviceThreadCount > 0) - { - /* Run the kernel */ - threadNum[0] = deviceThreadCount; - groupSize = CurrentGroupSize(); - error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threadNum, &groupSize, 0, NULL, NULL); - test_error(error, "Unable to execute test kernel"); - /* start device threads */ - error = clFlush(queue); - test_error(error, "clFlush failed"); - } - - /* Start host threads and wait for finish */ - if(hostThreadCount > 0) - ThreadPool_Do(HostThreadFunction, hostThreadCount, &hostThreadContexts[0]); - - if(UseSVM()) - { - error = clFinish(queue); - test_error(error, "clFinish failed"); - memcpy(&destItems[0], svmAtomicBuffer, typeSize*numDestItems); - memcpy(&refValues[0], svmDataBuffer, typeSize*threadCount*NumNonAtomicVariablesPerThread()); - } - else - { - if(deviceThreadCount > 0) - { - error = clEnqueueReadBuffer(queue, streams[0], CL_TRUE, 0, typeSize * numDestItems, &destItems[0], 0, NULL, NULL); - test_error(error, "Unable to read result value!"); - error = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, typeSize * deviceThreadCount*NumNonAtomicVariablesPerThread(), &refValues[0], 0, NULL, NULL); - test_error(error, "Unable to read reference values!"); - } - } - bool dataVerified = false; - // If we have an expectedFn, then we need to generate a final value to compare against. If we don't - // have one, it's because we're comparing ref values only - for(cl_uint i = 0; i < numDestItems; i++) - { - HostDataType expected; - - if(!ExpectedValue(expected, threadCount, startRefValues.size() ? &startRefValues[0] : 0, i)) - break; // no expected value function provided - - if(expected != destItems[i]) - { - std::stringstream logLine; - logLine << "ERROR: Result " << i << " from kernel does not validate! (should be " << expected << ", was " << destItems[i] << ")\n"; - log_error("%s", logLine.str().c_str()); - for(i = 0; i < threadCount; i++) - { - logLine.str(""); - logLine << " --- " << i << " - "; - if(startRefValues.size()) - logLine << startRefValues[i] << " -> " << refValues[i]; - else - logLine << refValues[i]; - logLine << " --- "; - if(i < numDestItems) - logLine << destItems[i]; - logLine << "\n"; - log_info("%s", logLine.str().c_str()); - } - if(!gDebug) - { - log_info("Program source:\n"); - log_info("%s\n", programLine); - } - return -1; - } - dataVerified = true; - } - - bool dataCorrect = false; - /* Use the verify function (if provided) to also check the results */ - if(VerifyRefs(dataCorrect, threadCount, &refValues[0], &destItems[0])) - { - if(!dataCorrect) - { - log_error("ERROR: Reference values did not validate!\n"); - std::stringstream logLine; - for(cl_uint i = 0; i < threadCount; i++) - for (cl_uint j = 0; j < NumNonAtomicVariablesPerThread(); j++) - { - logLine.str(""); - logLine << " --- " << i << " - " << refValues[i*NumNonAtomicVariablesPerThread()+j] << " --- "; - if(j == 0 && i < numDestItems) - logLine << destItems[i]; - logLine << "\n"; - log_info("%s", logLine.str().c_str()); - } - if(!gDebug) - { + int error; + clProgramWrapper program; + clKernelWrapper kernel; + size_t threadNum[1]; + clMemWrapper streams[2]; + std::vector destItems; + HostAtomicType *svmAtomicBuffer = 0; + std::vector refValues, startRefValues; + HostDataType *svmDataBuffer = 0; + cl_uint deviceThreadCount, hostThreadCount, threadCount; + size_t groupSize = 0; + std::string programSource; + const char *programLine; + MTdata d; + size_t typeSize = DataType().Size(deviceID); + + deviceThreadCount = _maxDeviceThreads; + hostThreadCount = MaxHostThreads(); + threadCount = deviceThreadCount + hostThreadCount; + + // log_info("\t%s %s%s...\n", local ? "local" : "global", + // DataType().AtomicTypeName(), memoryOrderScope.c_str()); + log_info("\t%s...\n", SingleTestName().c_str()); + + if (!LocalMemory() && DeclaredInProgram() + && gNoGlobalVariables) // no support for program scope global variables + { + log_info("\t\tTest disabled\n"); + return 0; + } + if (UsedInFunction() && GenericAddrSpace() && gNoGenericAddressSpace) + { + log_info("\t\tTest disabled\n"); + return 0; + } + + // set up work sizes based on device capabilities and test configuration + error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_WORK_GROUP_SIZE, + sizeof(groupSize), &groupSize, NULL); + test_error(error, "Unable to obtain max work group size for device"); + CurrentGroupSize((cl_uint)groupSize); + if (CurrentGroupSize() > deviceThreadCount) + CurrentGroupSize(deviceThreadCount); + if (CurrentGroupNum(deviceThreadCount) == 1 || gOldAPI) + deviceThreadCount = + CurrentGroupSize() * CurrentGroupNum(deviceThreadCount); + threadCount = deviceThreadCount + hostThreadCount; + + // If we're given a num_results function, we need to determine how many + // result objects we need. This is the first assessment for current maximum + // number of threads (exact thread count is not known here) + // - needed for program source code generation (arrays of atomics declared + // in program) + cl_uint numDestItems = NumResults(threadCount, deviceID); + + if (deviceThreadCount > 0) + { + // This loop iteratively reduces the workgroup size by 2 and then + // re-generates the kernel with the reduced + // workgroup size until we find a size which is admissible for the + // kernel being run or reduce the wg size to the trivial case of 1 + // (which was separately verified to be accurate for the kernel being + // run) + + while ((CurrentGroupSize() > 1)) + { + // Re-generate the kernel code with the current group size + if (kernel) clReleaseKernel(kernel); + if (program) clReleaseProgram(program); + programSource = PragmaHeader(deviceID) + ProgramHeader(numDestItems) + + FunctionCode() + KernelCode(numDestItems); + programLine = programSource.c_str(); + if (create_single_kernel_helper_with_build_options( + context, &program, &kernel, 1, &programLine, + "test_atomic_kernel", gOldAPI ? "" : nullptr)) + { + return -1; + } + // Get work group size for the new kernel + error = clGetKernelWorkGroupInfo( + kernel, deviceID, CL_KERNEL_WORK_GROUP_SIZE, sizeof(groupSize), + &groupSize, NULL); + test_error(error, + "Unable to obtain max work group size for device and " + "kernel combo"); + + if (LocalMemory()) + { + cl_ulong usedLocalMemory; + cl_ulong totalLocalMemory; + cl_uint maxWorkGroupSize; + + error = clGetKernelWorkGroupInfo( + kernel, deviceID, CL_KERNEL_LOCAL_MEM_SIZE, + sizeof(usedLocalMemory), &usedLocalMemory, NULL); + test_error(error, "clGetKernelWorkGroupInfo failed"); + + error = clGetDeviceInfo(deviceID, CL_DEVICE_LOCAL_MEM_SIZE, + sizeof(totalLocalMemory), + &totalLocalMemory, NULL); + test_error(error, "clGetDeviceInfo failed"); + + // We know that each work-group is going to use typeSize * + // deviceThreadCount bytes of local memory + // so pick the maximum value for deviceThreadCount that uses all + // the local memory. + maxWorkGroupSize = + ((totalLocalMemory - usedLocalMemory) / typeSize); + + if (maxWorkGroupSize < groupSize) groupSize = maxWorkGroupSize; + } + if (CurrentGroupSize() <= groupSize) + break; + else + CurrentGroupSize(CurrentGroupSize() / 2); + } + if (CurrentGroupSize() > deviceThreadCount) + CurrentGroupSize(deviceThreadCount); + if (CurrentGroupNum(deviceThreadCount) == 1 || gOldAPI) + deviceThreadCount = + CurrentGroupSize() * CurrentGroupNum(deviceThreadCount); + threadCount = deviceThreadCount + hostThreadCount; + } + if (gDebug) + { log_info("Program source:\n"); log_info("%s\n", programLine); - } - return -1; - } - } - else if(!dataVerified) - { - log_error("ERROR: Test doesn't check total or refs; no values are verified!\n"); - return -1; - } - - if(OldValueCheck() && - !(DeclaredInProgram() && !LocalMemory())) // don't test for programs scope global atomics - // 'old' value has been overwritten by previous clEnqueueNDRangeKernel - { - /* Re-write the starting value */ - for(size_t i = 0; i < numDestItems; i++) - destItems[i] = _startValue; - refValues[0] = 0; - if(deviceThreadCount > 0) - { - error = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, typeSize * numDestItems, &destItems[0], 0, NULL, NULL); - test_error(error, "Unable to write starting values!"); - - /* Run the kernel once for a single thread, so we can verify that the returned value is the original one */ - threadNum[0] = 1; - error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threadNum, threadNum, 0, NULL, NULL); - test_error(error, "Unable to execute test kernel"); - - error = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, typeSize, &refValues[0], 0, NULL, NULL); - test_error(error, "Unable to read reference values!"); + } + if (deviceThreadCount > 0) + log_info("\t\t(thread count %u, group size %u)\n", deviceThreadCount, + CurrentGroupSize()); + if (hostThreadCount > 0) + log_info("\t\t(host threads %u)\n", hostThreadCount); + + refValues.resize(threadCount * NumNonAtomicVariablesPerThread()); + + // Generate ref data if we have a ref generator provided + d = init_genrand(gRandomSeed); + startRefValues.resize(threadCount * NumNonAtomicVariablesPerThread()); + if (GenerateRefs(threadCount, &startRefValues[0], d)) + { + // copy ref values for host threads + memcpy(&refValues[0], &startRefValues[0], + sizeof(HostDataType) * threadCount + * NumNonAtomicVariablesPerThread()); } else { - /* Start host thread */ - HostFunction(0, 1, &destItems[0], &refValues[0]); + startRefValues.resize(0); } + free_mtdata(d); + d = NULL; + + // If we're given a num_results function, we need to determine how many + // result objects we need. If we don't have it, we assume it's just 1 This + // is final value (exact thread count is known in this place) + numDestItems = NumResults(threadCount, deviceID); - if(refValues[0] != _startValue)//destItems[0]) + destItems.resize(numDestItems); + for (cl_uint i = 0; i < numDestItems; i++) destItems[i] = _startValue; + + // Create main buffer with atomic variables (array size dependent on + // particular test) + if (UseSVM()) { - std::stringstream logLine; - logLine << "ERROR: atomic function operated correctly but did NOT return correct 'old' value " - " (should have been " << destItems[0] << ", returned " << refValues[0] << ")!\n"; - log_error("%s", logLine.str().c_str()); - if(!gDebug) - { - log_info("Program source:\n"); - log_info("%s\n", programLine); - } - return -1; - } - } - if(UseSVM()) - { - // the buffer object must first be released before the SVM buffer is freed - error = clReleaseMemObject(streams[0]); - streams[0] = 0; - test_error(error, "clReleaseMemObject failed"); - if(gUseHostPtr) - free(svmAtomicBuffer); + if (gUseHostPtr) + svmAtomicBuffer = (HostAtomicType *)malloc(typeSize * numDestItems); + else + svmAtomicBuffer = (HostAtomicType *)clSVMAlloc( + context, CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_ATOMICS, + typeSize * numDestItems, 0); + if (!svmAtomicBuffer) + { + log_error("ERROR: clSVMAlloc failed!\n"); + return -1; + } + memcpy(svmAtomicBuffer, &destItems[0], typeSize * numDestItems); + streams[0] = + clCreateBuffer(context, CL_MEM_USE_HOST_PTR, + typeSize * numDestItems, svmAtomicBuffer, NULL); + } + else + { + streams[0] = + clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, + typeSize * numDestItems, &destItems[0], NULL); + } + if (!streams[0]) + { + log_error("ERROR: Creating output array failed!\n"); + return -1; + } + // Create buffer for per-thread input/output data + if (UseSVM()) + { + if (gUseHostPtr) + svmDataBuffer = (HostDataType *)malloc( + typeSize * threadCount * NumNonAtomicVariablesPerThread()); + else + svmDataBuffer = (HostDataType *)clSVMAlloc( + context, + CL_MEM_SVM_FINE_GRAIN_BUFFER + | (SVMDataBufferAllSVMConsistent() ? CL_MEM_SVM_ATOMICS + : 0), + typeSize * threadCount * NumNonAtomicVariablesPerThread(), 0); + if (!svmDataBuffer) + { + log_error("ERROR: clSVMAlloc failed!\n"); + return -1; + } + if (startRefValues.size()) + memcpy(svmDataBuffer, &startRefValues[0], + typeSize * threadCount * NumNonAtomicVariablesPerThread()); + streams[1] = clCreateBuffer(context, CL_MEM_USE_HOST_PTR, + typeSize * threadCount + * NumNonAtomicVariablesPerThread(), + svmDataBuffer, NULL); + } else - clSVMFree(context, svmAtomicBuffer); - error = clReleaseMemObject(streams[1]); - streams[1] = 0; - test_error(error, "clReleaseMemObject failed"); - if(gUseHostPtr) - free(svmDataBuffer); + { + streams[1] = clCreateBuffer( + context, + ((startRefValues.size() ? CL_MEM_COPY_HOST_PTR + : CL_MEM_READ_WRITE)), + typeSize * threadCount * NumNonAtomicVariablesPerThread(), + startRefValues.size() ? &startRefValues[0] : 0, NULL); + } + if (!streams[1]) + { + log_error("ERROR: Creating reference array failed!\n"); + return -1; + } + if (deviceThreadCount > 0) + { + cl_uint argInd = 0; + /* Set the arguments */ + error = + clSetKernelArg(kernel, argInd++, sizeof(threadCount), &threadCount); + test_error(error, "Unable to set kernel argument"); + error = clSetKernelArg(kernel, argInd++, sizeof(numDestItems), + &numDestItems); + test_error(error, "Unable to set indexed kernel argument"); + error = + clSetKernelArg(kernel, argInd++, sizeof(streams[0]), &streams[0]); + test_error(error, "Unable to set indexed kernel arguments"); + error = + clSetKernelArg(kernel, argInd++, sizeof(streams[1]), &streams[1]); + test_error(error, "Unable to set indexed kernel arguments"); + if (LocalMemory()) + { + error = + clSetKernelArg(kernel, argInd++, typeSize * numDestItems, NULL); + test_error(error, "Unable to set indexed local kernel argument"); + } + if (LocalRefValues()) + { + error = + clSetKernelArg(kernel, argInd++, + LocalRefValues() ? typeSize * CurrentGroupSize() + * NumNonAtomicVariablesPerThread() + : 1, + NULL); + test_error(error, "Unable to set indexed kernel argument"); + } + } + /* Configure host threads */ + std::vector hostThreadContexts(hostThreadCount); + for (unsigned int t = 0; t < hostThreadCount; t++) + { + hostThreadContexts[t].test = this; + hostThreadContexts[t].tid = deviceThreadCount + t; + hostThreadContexts[t].threadCount = threadCount; + hostThreadContexts[t].destMemory = + UseSVM() ? svmAtomicBuffer : &destItems[0]; + hostThreadContexts[t].oldValues = + UseSVM() ? svmDataBuffer : &refValues[0]; + } + + if (deviceThreadCount > 0) + { + /* Run the kernel */ + threadNum[0] = deviceThreadCount; + groupSize = CurrentGroupSize(); + error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threadNum, + &groupSize, 0, NULL, NULL); + test_error(error, "Unable to execute test kernel"); + /* start device threads */ + error = clFlush(queue); + test_error(error, "clFlush failed"); + } + + /* Start host threads and wait for finish */ + if (hostThreadCount > 0) + ThreadPool_Do(HostThreadFunction, hostThreadCount, + &hostThreadContexts[0]); + + if (UseSVM()) + { + error = clFinish(queue); + test_error(error, "clFinish failed"); + memcpy(&destItems[0], svmAtomicBuffer, typeSize * numDestItems); + memcpy(&refValues[0], svmDataBuffer, + typeSize * threadCount * NumNonAtomicVariablesPerThread()); + } else - clSVMFree(context, svmDataBuffer); - } - _passCount++; - return 0; + { + if (deviceThreadCount > 0) + { + error = clEnqueueReadBuffer(queue, streams[0], CL_TRUE, 0, + typeSize * numDestItems, &destItems[0], + 0, NULL, NULL); + test_error(error, "Unable to read result value!"); + error = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, + typeSize * deviceThreadCount + * NumNonAtomicVariablesPerThread(), + &refValues[0], 0, NULL, NULL); + test_error(error, "Unable to read reference values!"); + } + } + bool dataVerified = false; + // If we have an expectedFn, then we need to generate a final value to + // compare against. If we don't have one, it's because we're comparing ref + // values only + for (cl_uint i = 0; i < numDestItems; i++) + { + HostDataType expected; + + if (!ExpectedValue(expected, threadCount, + startRefValues.size() ? &startRefValues[0] : 0, i)) + break; // no expected value function provided + + if (expected != destItems[i]) + { + std::stringstream logLine; + logLine << "ERROR: Result " << i + << " from kernel does not validate! (should be " << expected + << ", was " << destItems[i] << ")\n"; + log_error("%s", logLine.str().c_str()); + for (i = 0; i < threadCount; i++) + { + logLine.str(""); + logLine << " --- " << i << " - "; + if (startRefValues.size()) + logLine << startRefValues[i] << " -> " << refValues[i]; + else + logLine << refValues[i]; + logLine << " --- "; + if (i < numDestItems) logLine << destItems[i]; + logLine << "\n"; + log_info("%s", logLine.str().c_str()); + } + if (!gDebug) + { + log_info("Program source:\n"); + log_info("%s\n", programLine); + } + return -1; + } + dataVerified = true; + } + + bool dataCorrect = false; + /* Use the verify function (if provided) to also check the results */ + if (VerifyRefs(dataCorrect, threadCount, &refValues[0], &destItems[0])) + { + if (!dataCorrect) + { + log_error("ERROR: Reference values did not validate!\n"); + std::stringstream logLine; + for (cl_uint i = 0; i < threadCount; i++) + for (cl_uint j = 0; j < NumNonAtomicVariablesPerThread(); j++) + { + logLine.str(""); + logLine + << " --- " << i << " - " + << refValues[i * NumNonAtomicVariablesPerThread() + j] + << " --- "; + if (j == 0 && i < numDestItems) logLine << destItems[i]; + logLine << "\n"; + log_info("%s", logLine.str().c_str()); + } + if (!gDebug) + { + log_info("Program source:\n"); + log_info("%s\n", programLine); + } + return -1; + } + } + else if (!dataVerified) + { + log_error("ERROR: Test doesn't check total or refs; no values are " + "verified!\n"); + return -1; + } + + if (OldValueCheck() + && !(DeclaredInProgram() + && !LocalMemory())) // don't test for programs scope global atomics + // 'old' value has been overwritten by previous + // clEnqueueNDRangeKernel + { + /* Re-write the starting value */ + for (size_t i = 0; i < numDestItems; i++) destItems[i] = _startValue; + refValues[0] = 0; + if (deviceThreadCount > 0) + { + error = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, + typeSize * numDestItems, &destItems[0], + 0, NULL, NULL); + test_error(error, "Unable to write starting values!"); + + /* Run the kernel once for a single thread, so we can verify that + * the returned value is the original one */ + threadNum[0] = 1; + error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threadNum, + threadNum, 0, NULL, NULL); + test_error(error, "Unable to execute test kernel"); + + error = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, typeSize, + &refValues[0], 0, NULL, NULL); + test_error(error, "Unable to read reference values!"); + } + else + { + /* Start host thread */ + HostFunction(0, 1, &destItems[0], &refValues[0]); + } + + if (refValues[0] != _startValue) // destItems[0]) + { + std::stringstream logLine; + logLine << "ERROR: atomic function operated correctly but did NOT " + "return correct 'old' value " + " (should have been " + << destItems[0] << ", returned " << refValues[0] << ")!\n"; + log_error("%s", logLine.str().c_str()); + if (!gDebug) + { + log_info("Program source:\n"); + log_info("%s\n", programLine); + } + return -1; + } + } + if (UseSVM()) + { + // the buffer object must first be released before the SVM buffer is + // freed. The Wrapper Class method reset() will do that + streams[0].reset(); + if (gUseHostPtr) + free(svmAtomicBuffer); + else + clSVMFree(context, svmAtomicBuffer); + streams[1].reset(); + if (gUseHostPtr) + free(svmDataBuffer); + else + clSVMFree(context, svmDataBuffer); + } + _passCount++; + return 0; } #endif //_COMMON_H_ -- cgit v1.2.3 From 43e1397468053608134816cbcf6e8496e91cb227 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?K=C3=A9vin=20Petit?= Date: Wed, 18 Aug 2021 11:11:30 +0100 Subject: Fix kernel source for cl_khr_suggested_local_work_size (#1300) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use ASCII '-' instead of unicode '–' as subtration operator. Signed-off-by: Kévin Petit --- test_conformance/workgroups/test_wg_suggested_local_work_size.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/test_conformance/workgroups/test_wg_suggested_local_work_size.cpp b/test_conformance/workgroups/test_wg_suggested_local_work_size.cpp index 1dc1b39c..aa02391c 100644 --- a/test_conformance/workgroups/test_wg_suggested_local_work_size.cpp +++ b/test_conformance/workgroups/test_wg_suggested_local_work_size.cpp @@ -42,9 +42,9 @@ const char* wg_scan_local_work_group_size = R"( { size_t linear_id; #if __OPENCL_VERSION__ < CL_VERSION_2_0 - linear_id = ((get_global_id(2) – get_global_offset(2)) * get_global_size(1) * get_global_size(0)) + - ((get_global_id(1) – get_global_offset(1)) * get_global_size(0)) + - (get_global_id(0) – get_global_offset(0)); + linear_id = ((get_global_id(2) - get_global_offset(2)) * get_global_size(1) * get_global_size(0)) + + ((get_global_id(1) - get_global_offset(1)) * get_global_size(0)) + + (get_global_id(0) - get_global_offset(0)); #else linear_id = get_global_linear_id(); #endif @@ -608,4 +608,4 @@ int test_work_group_suggested_local_size_3D(cl_device_id device, "global_work_offset passed\n"); return err; -} \ No newline at end of file +} -- cgit v1.2.3 From 6c3c7e5266cddce9cfa466c02c14b43fee453110 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?K=C3=A9vin=20Petit?= Date: Thu, 19 Aug 2021 12:15:47 +0100 Subject: Remove unused definitions in CMakeLists.txt (#1302) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Kévin Petit --- CMakeLists.txt | 47 ----------------------------------------------- 1 file changed, 47 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 8d947ed1..a614649f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -10,12 +10,6 @@ set(CMAKE_C_STANDARD_REQUIRED ON) set(CMAKE_CXX_STANDARD 11) set(CMAKE_CXX_STANDARD_REQUIRED ON) -if(CMAKE_BUILD_TYPE STREQUAL "release") - set (BUILD_FLAVOR "release") -else(CMAKE_BUILD_TYPE STREQUAL "release") - set (BUILD_FLAVOR "debug") -endif(CMAKE_BUILD_TYPE STREQUAL "release") - add_definitions(-DCL_TARGET_OPENCL_VERSION=300) add_definitions(-DCL_USE_DEPRECATED_OPENCL_2_2_APIS=1) add_definitions(-DCL_USE_DEPRECATED_OPENCL_2_1_APIS=1) @@ -29,14 +23,6 @@ if(USE_CL_EXPERIMENTAL) add_definitions(-DCL_EXPERIMENTAL) endif(USE_CL_EXPERIMENTAL) -# Support both VS2008 and VS2012. -set(BUILD_DIR "$ENV{ADRENO_DRIVER}/build") -if(MSVC90) - set(VS_BUILD_DIR "${BUILD_DIR}/vs2008") -else(MSVC110) - set(VS_BUILD_DIR "${BUILD_DIR}/vs2012") -endif(MSVC90) - #----------------------------------------------------------- # Default Configurable Test Set #----------------------------------------------------------- @@ -164,38 +150,5 @@ include_directories(${CLConform_SOURCE_DIR}/test_common/harness ${CLConform_SOURCE_DIR}/test_common/gl ${CLConform_SOURCE_DIR}/test_common) -if(CMAKE_BUILD_TYPE STREQUAL "release") - set (BUILD_FLAVOR "release") -elseif (CMAKE_BUILD_TYPE STREQUAL "debug") - set (BUILD_FLAVOR "debug") -endif(CMAKE_BUILD_TYPE STREQUAL "release") - - add_subdirectory(test_common) add_subdirectory(test_conformance) - -# Support both VS2008 and VS2012. -set (DLL_FILES "${VS_BUILD_DIR}/Debug/*.dll") -set (DST_DIR "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/Debug/") - -if (WIN32) - set (COPY "echo") - add_custom_target(COPY_DLL${CONFORMANCE_SUFFIX} ALL - COMMAND ${COPY} "${DLL_FILES}" "${DST_DIR}" - COMMENT "Copying dll files.. ") -else (WIN32) - set (COPY cp) - add_custom_target(COPY_DLL${CONFORMANCE_SUFFIX}) -endif(WIN32) - -set_property(TARGET COPY_DLL${CONFORMANCE_SUFFIX} PROPERTY FOLDER "CONFORMANCE${CONFORMANCE_SUFFIX}") - -if(WIN32) - add_custom_target( COPY_FILES${CONFORMANCE_SUFFIX} ALL - COMMAND ${COPY} ${DLL_FILES} ${DST_DIR} - COMMENT "Copying other files to output folder..." ) -else(WIN32) - add_custom_target( COPY_FILES${CONFORMANCE_SUFFIX} ) -endif(WIN32) - -set_property(TARGET COPY_FILES${CONFORMANCE_SUFFIX} PROPERTY FOLDER "CONFORMANCE${CONFORMANCE_SUFFIX}") -- cgit v1.2.3 From 070f8c0c0ed8786e410584efa3fefa47bdab02c6 Mon Sep 17 00:00:00 2001 From: Ben Ashbaugh Date: Wed, 25 Aug 2021 02:14:58 -0700 Subject: add tests for cl_khr_integer_dot_product (#1276) * cl_khr_integer_dot_product_tests * remove emulated codepaths * fix formatting * address code review comments * remove emulated codepaths again * address one more review comment --- test_common/harness/integer_ops_test_info.h | 91 +++++ test_conformance/integer_ops/CMakeLists.txt | 1 + test_conformance/integer_ops/main.cpp | 236 ++++++------- test_conformance/integer_ops/procs.h | 2 + .../integer_ops/test_integer_dot_product.cpp | 380 +++++++++++++++++++++ 5 files changed, 593 insertions(+), 117 deletions(-) create mode 100644 test_common/harness/integer_ops_test_info.h create mode 100644 test_conformance/integer_ops/test_integer_dot_product.cpp diff --git a/test_common/harness/integer_ops_test_info.h b/test_common/harness/integer_ops_test_info.h new file mode 100644 index 00000000..c25843dd --- /dev/null +++ b/test_common/harness/integer_ops_test_info.h @@ -0,0 +1,91 @@ +// +// Copyright (c) 2021 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#ifndef INTEGER_OPS_TEST_INFO_H +#define INTEGER_OPS_TEST_INFO_H + +#include "conversions.h" + +// TODO: expand usage to other tests. + +template struct TestInfo +{ +}; +template <> struct TestInfo +{ + static const ExplicitType explicitType = kChar; + static constexpr const char* deviceTypeName = "char"; + static constexpr const char* deviceTypeNameSigned = "char"; + static constexpr const char* deviceTypeNameUnsigned = "uchar"; +}; +template <> struct TestInfo +{ + static const ExplicitType explicitType = kUChar; + static constexpr const char* deviceTypeName = "uchar"; + static constexpr const char* deviceTypeNameSigned = "char"; + static constexpr const char* deviceTypeNameUnsigned = "uchar"; +}; +template <> struct TestInfo +{ + static const ExplicitType explicitType = kShort; + static constexpr const char* deviceTypeName = "short"; + static constexpr const char* deviceTypeNameSigned = "short"; + static constexpr const char* deviceTypeNameUnsigned = "ushort"; +}; +template <> struct TestInfo +{ + static const ExplicitType explicitType = kUShort; + static constexpr const char* deviceTypeName = "ushort"; + static constexpr const char* deviceTypeNameSigned = "short"; + static constexpr const char* deviceTypeNameUnsigned = "ushort"; +}; +template <> struct TestInfo +{ + static const ExplicitType explicitType = kInt; + static constexpr const char* deviceTypeName = "int"; + static constexpr const char* deviceTypeNameSigned = "int"; + static constexpr const char* deviceTypeNameUnsigned = "uint"; +}; +template <> struct TestInfo +{ + static const ExplicitType explicitType = kUInt; + static constexpr const char* deviceTypeName = "uint"; + static constexpr const char* deviceTypeNameSigned = "int"; + static constexpr const char* deviceTypeNameUnsigned = "uint"; +}; +template <> struct TestInfo +{ + static const ExplicitType explicitType = kLong; + static constexpr const char* deviceTypeName = "long"; + static constexpr const char* deviceTypeNameSigned = "long"; + static constexpr const char* deviceTypeNameUnsigned = "ulong"; +}; +template <> struct TestInfo +{ + static const ExplicitType explicitType = kULong; + static constexpr const char* deviceTypeName = "ulong"; + static constexpr const char* deviceTypeNameSigned = "long"; + static constexpr const char* deviceTypeNameUnsigned = "ulong"; +}; + +template +static void fill_vector_with_random_data(std::vector& v) +{ + MTdataHolder d(gRandomSeed); + generate_random_data(TestInfo::explicitType, v.size(), d, v.data()); +} + +#endif /* INTEGER_OPS_TEST_INFO_H */ diff --git a/test_conformance/integer_ops/CMakeLists.txt b/test_conformance/integer_ops/CMakeLists.txt index a045ef81..5344eabc 100644 --- a/test_conformance/integer_ops/CMakeLists.txt +++ b/test_conformance/integer_ops/CMakeLists.txt @@ -11,6 +11,7 @@ set(${MODULE_NAME}_SOURCES test_unary_ops.cpp verification_and_generation_functions.cpp test_popcount.cpp + test_integer_dot_product.cpp ) include(../CMakeCommon.txt) diff --git a/test_conformance/integer_ops/main.cpp b/test_conformance/integer_ops/main.cpp index 00e91661..e57cffd9 100644 --- a/test_conformance/integer_ops/main.cpp +++ b/test_conformance/integer_ops/main.cpp @@ -25,127 +25,129 @@ #endif test_definition test_list[] = { - ADD_TEST( integer_clz ), - ADD_TEST_VERSION( integer_ctz, Version(2, 0)), - ADD_TEST( integer_hadd ), - ADD_TEST( integer_rhadd ), - ADD_TEST( integer_mul_hi ), - ADD_TEST( integer_rotate ), - ADD_TEST( integer_clamp ), - ADD_TEST( integer_mad_sat ), - ADD_TEST( integer_mad_hi ), - ADD_TEST( integer_min ), - ADD_TEST( integer_max ), - ADD_TEST( integer_upsample ), - - ADD_TEST( integer_abs ), - ADD_TEST( integer_abs_diff ), - ADD_TEST( integer_add_sat ), - ADD_TEST( integer_sub_sat ), - - ADD_TEST( integer_addAssign ), - ADD_TEST( integer_subtractAssign ), - ADD_TEST( integer_multiplyAssign ), - ADD_TEST( integer_divideAssign ), - ADD_TEST( integer_moduloAssign ), - ADD_TEST( integer_andAssign ), - ADD_TEST( integer_orAssign ), - ADD_TEST( integer_exclusiveOrAssign ), - - ADD_TEST( unary_ops_increment ), - ADD_TEST( unary_ops_decrement ), - ADD_TEST( unary_ops_full ), - - ADD_TEST( integer_mul24 ), - ADD_TEST( integer_mad24 ), - - ADD_TEST( long_math ), - ADD_TEST( long_logic ), - ADD_TEST( long_shift ), - ADD_TEST( long_compare ), - - ADD_TEST( ulong_math ), - ADD_TEST( ulong_logic ), - ADD_TEST( ulong_shift ), - ADD_TEST( ulong_compare ), - - ADD_TEST( int_math ), - ADD_TEST( int_logic ), - ADD_TEST( int_shift ), - ADD_TEST( int_compare ), - - ADD_TEST( uint_math ), - ADD_TEST( uint_logic ), - ADD_TEST( uint_shift ), - ADD_TEST( uint_compare ), - - ADD_TEST( short_math ), - ADD_TEST( short_logic ), - ADD_TEST( short_shift ), - ADD_TEST( short_compare ), - - ADD_TEST( ushort_math ), - ADD_TEST( ushort_logic ), - ADD_TEST( ushort_shift ), - ADD_TEST( ushort_compare ), - - ADD_TEST( char_math ), - ADD_TEST( char_logic ), - ADD_TEST( char_shift ), - ADD_TEST( char_compare ), - - ADD_TEST( uchar_math ), - ADD_TEST( uchar_logic ), - ADD_TEST( uchar_shift ), - ADD_TEST( uchar_compare ), - - ADD_TEST( popcount ), + ADD_TEST(integer_clz), + ADD_TEST_VERSION(integer_ctz, Version(2, 0)), + ADD_TEST(integer_hadd), + ADD_TEST(integer_rhadd), + ADD_TEST(integer_mul_hi), + ADD_TEST(integer_rotate), + ADD_TEST(integer_clamp), + ADD_TEST(integer_mad_sat), + ADD_TEST(integer_mad_hi), + ADD_TEST(integer_min), + ADD_TEST(integer_max), + ADD_TEST(integer_upsample), + + ADD_TEST(integer_abs), + ADD_TEST(integer_abs_diff), + ADD_TEST(integer_add_sat), + ADD_TEST(integer_sub_sat), + + ADD_TEST(integer_addAssign), + ADD_TEST(integer_subtractAssign), + ADD_TEST(integer_multiplyAssign), + ADD_TEST(integer_divideAssign), + ADD_TEST(integer_moduloAssign), + ADD_TEST(integer_andAssign), + ADD_TEST(integer_orAssign), + ADD_TEST(integer_exclusiveOrAssign), + + ADD_TEST(unary_ops_increment), + ADD_TEST(unary_ops_decrement), + ADD_TEST(unary_ops_full), + + ADD_TEST(integer_mul24), + ADD_TEST(integer_mad24), + + ADD_TEST(long_math), + ADD_TEST(long_logic), + ADD_TEST(long_shift), + ADD_TEST(long_compare), + + ADD_TEST(ulong_math), + ADD_TEST(ulong_logic), + ADD_TEST(ulong_shift), + ADD_TEST(ulong_compare), + + ADD_TEST(int_math), + ADD_TEST(int_logic), + ADD_TEST(int_shift), + ADD_TEST(int_compare), + + ADD_TEST(uint_math), + ADD_TEST(uint_logic), + ADD_TEST(uint_shift), + ADD_TEST(uint_compare), + + ADD_TEST(short_math), + ADD_TEST(short_logic), + ADD_TEST(short_shift), + ADD_TEST(short_compare), + + ADD_TEST(ushort_math), + ADD_TEST(ushort_logic), + ADD_TEST(ushort_shift), + ADD_TEST(ushort_compare), + + ADD_TEST(char_math), + ADD_TEST(char_logic), + ADD_TEST(char_shift), + ADD_TEST(char_compare), + + ADD_TEST(uchar_math), + ADD_TEST(uchar_logic), + ADD_TEST(uchar_shift), + ADD_TEST(uchar_compare), + + ADD_TEST(popcount), // Quick - ADD_TEST( quick_long_math ), - ADD_TEST( quick_long_logic ), - ADD_TEST( quick_long_shift ), - ADD_TEST( quick_long_compare ), - - ADD_TEST( quick_ulong_math ), - ADD_TEST( quick_ulong_logic ), - ADD_TEST( quick_ulong_shift ), - ADD_TEST( quick_ulong_compare ), - - ADD_TEST( quick_int_math ), - ADD_TEST( quick_int_logic ), - ADD_TEST( quick_int_shift ), - ADD_TEST( quick_int_compare ), - - ADD_TEST( quick_uint_math ), - ADD_TEST( quick_uint_logic ), - ADD_TEST( quick_uint_shift ), - ADD_TEST( quick_uint_compare ), - - ADD_TEST( quick_short_math ), - ADD_TEST( quick_short_logic ), - ADD_TEST( quick_short_shift ), - ADD_TEST( quick_short_compare ), - - ADD_TEST( quick_ushort_math ), - ADD_TEST( quick_ushort_logic ), - ADD_TEST( quick_ushort_shift ), - ADD_TEST( quick_ushort_compare ), - - ADD_TEST( quick_char_math ), - ADD_TEST( quick_char_logic ), - ADD_TEST( quick_char_shift ), - ADD_TEST( quick_char_compare ), - - ADD_TEST( quick_uchar_math ), - ADD_TEST( quick_uchar_logic ), - ADD_TEST( quick_uchar_shift ), - ADD_TEST( quick_uchar_compare ), - - ADD_TEST( vector_scalar ), + ADD_TEST(quick_long_math), + ADD_TEST(quick_long_logic), + ADD_TEST(quick_long_shift), + ADD_TEST(quick_long_compare), + + ADD_TEST(quick_ulong_math), + ADD_TEST(quick_ulong_logic), + ADD_TEST(quick_ulong_shift), + ADD_TEST(quick_ulong_compare), + + ADD_TEST(quick_int_math), + ADD_TEST(quick_int_logic), + ADD_TEST(quick_int_shift), + ADD_TEST(quick_int_compare), + + ADD_TEST(quick_uint_math), + ADD_TEST(quick_uint_logic), + ADD_TEST(quick_uint_shift), + ADD_TEST(quick_uint_compare), + + ADD_TEST(quick_short_math), + ADD_TEST(quick_short_logic), + ADD_TEST(quick_short_shift), + ADD_TEST(quick_short_compare), + + ADD_TEST(quick_ushort_math), + ADD_TEST(quick_ushort_logic), + ADD_TEST(quick_ushort_shift), + ADD_TEST(quick_ushort_compare), + + ADD_TEST(quick_char_math), + ADD_TEST(quick_char_logic), + ADD_TEST(quick_char_shift), + ADD_TEST(quick_char_compare), + + ADD_TEST(quick_uchar_math), + ADD_TEST(quick_uchar_logic), + ADD_TEST(quick_uchar_shift), + ADD_TEST(quick_uchar_compare), + + ADD_TEST(vector_scalar), + + ADD_TEST(integer_dot_product), }; -const int test_num = ARRAY_SIZE( test_list ); +const int test_num = ARRAY_SIZE(test_list); void fill_test_values( cl_long *outBufferA, cl_long *outBufferB, size_t numElements, MTdata d ) { diff --git a/test_conformance/integer_ops/procs.h b/test_conformance/integer_ops/procs.h index d5b77e70..82311fb9 100644 --- a/test_conformance/integer_ops/procs.h +++ b/test_conformance/integer_ops/procs.h @@ -141,3 +141,5 @@ extern int test_unary_ops_decrement(cl_device_id deviceID, cl_context context, c extern int test_vector_scalar(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); +extern int test_integer_dot_product(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements); diff --git a/test_conformance/integer_ops/test_integer_dot_product.cpp b/test_conformance/integer_ops/test_integer_dot_product.cpp new file mode 100644 index 00000000..b5378ae0 --- /dev/null +++ b/test_conformance/integer_ops/test_integer_dot_product.cpp @@ -0,0 +1,380 @@ +// +// Copyright (c) 2021 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +// This is needed for std::numeric_limits<>::min() and max() to work on Windows. +#if defined(_WIN32) +#define NOMINMAX +#endif + +#include +#include +#include +#include +#include + +#include "procs.h" +#include "harness/integer_ops_test_info.h" +#include "harness/testHarness.h" + +template +static void +calculate_reference(std::vector& ref, const std::vector& a, + const std::vector& b, const bool AccSat = false, + const std::vector& acc = {}) +{ + assert(a.size() == b.size()); + assert(AccSat == false || acc.size() == a.size() / N); + + ref.resize(a.size() / N); + for (size_t r = 0; r < ref.size(); r++) + { + cl_long result = AccSat ? acc[r] : 0; + for (size_t c = 0; c < N; c++) + { + // OK to assume no overflow? + result += a[r * N + c] * b[r * N + c]; + } + if (AccSat && result > std::numeric_limits::max()) + { + result = std::numeric_limits::max(); + } + ref[r] = static_cast(result); + } +} + +template +void generate_inputs_with_special_values(std::vector& a, + std::vector& b) +{ + const std::vector specialValuesA( + { static_cast(std::numeric_limits::min()), + static_cast(std::numeric_limits::min() + 1), + static_cast(std::numeric_limits::min() / 2), 0, + static_cast(std::numeric_limits::max() / 2), + static_cast(std::numeric_limits::max() - 1), + static_cast(std::numeric_limits::max()) }); + const std::vector specialValuesB( + { static_cast(std::numeric_limits::min()), + static_cast(std::numeric_limits::min() + 1), + static_cast(std::numeric_limits::min() / 2), 0, + static_cast(std::numeric_limits::max() / 2), + static_cast(std::numeric_limits::max() - 1), + static_cast(std::numeric_limits::max()) }); + + size_t count = 0; + for (auto svA : specialValuesA) + { + for (auto svB : specialValuesB) + { + a[count] = svA; + b[count] = svB; + ++count; + } + } + + // Generate random data for the rest of the inputs: + MTdataHolder d(gRandomSeed); + generate_random_data(TestInfo::explicitType, a.size() - count, d, + a.data() + count); + generate_random_data(TestInfo::explicitType, b.size() - count, d, + b.data() + count); +} + +template +void generate_acc_sat_inputs(std::vector& acc) +{ + // First generate random data: + fill_vector_with_random_data(acc); + + // Now go through the generated data, and make every other element large. + // This ensures we have some elements that need saturation. + for (size_t i = 0; i < acc.size(); i += 2) + { + acc[i] = std::numeric_limits::max() - acc[i]; + } +} + +template struct PackedTestInfo +{ + static constexpr const char* deviceTypeName = "UNSUPPORTED"; +}; +template <> struct PackedTestInfo +{ + static constexpr const char* deviceTypeName = "int"; +}; +template <> struct PackedTestInfo +{ + static constexpr const char* deviceTypeName = "uint"; +}; + +static constexpr const char* kernel_source_dot = R"CLC( +__kernel void test_dot(__global DSTTYPE* dst, __global SRCTYPEA* a, __global SRCTYPEB* b) +{ + int index = get_global_id(0); + dst[index] = DOT(a[index], b[index]); +} +)CLC"; + +static constexpr const char* kernel_source_dot_acc_sat = R"CLC( +__kernel void test_dot_acc_sat( + __global DSTTYPE* dst, + __global SRCTYPEA* a, __global SRCTYPEB* b, __global DSTTYPE* acc) +{ + int index = get_global_id(0); + dst[index] = DOT_ACC_SAT(a[index], b[index], acc[index]); +} +)CLC"; + +template +static int test_case_dot(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements, bool packed, + bool sat) +{ + log_info(" testing %s = dot%s%s(%s, %s)\n", + std::numeric_limits::is_signed ? "signed" : "unsigned", + sat ? "_acc_sat" : "", packed ? "_packed" : "", + std::numeric_limits::is_signed ? "signed" : "unsigned", + std::numeric_limits::is_signed ? "signed" : "unsigned"); + + cl_int error = CL_SUCCESS; + + clProgramWrapper program; + clKernelWrapper kernel; + + std::string buildOptions; + buildOptions += " -DDSTTYPE="; + buildOptions += TestInfo::deviceTypeName; + buildOptions += " -DSRCTYPEA="; + buildOptions += packed + ? PackedTestInfo::deviceTypeName + : TestInfo::deviceTypeName + std::to_string(N); + buildOptions += " -DSRCTYPEB="; + buildOptions += packed + ? PackedTestInfo::deviceTypeName + : TestInfo::deviceTypeName + std::to_string(N); + std::string packedSuffix; + packedSuffix += std::numeric_limits::is_signed ? "s" : "u"; + packedSuffix += std::numeric_limits::is_signed ? "s" : "u"; + packedSuffix += std::numeric_limits::is_signed ? "_int" : "_uint"; + if (sat) + { + buildOptions += packed + ? " -DDOT_ACC_SAT=dot_acc_sat_4x8packed_" + packedSuffix + : " -DDOT_ACC_SAT=dot_acc_sat"; + } + else + { + buildOptions += + packed ? " -DDOT=dot_4x8packed_" + packedSuffix : " -DDOT=dot"; + } + + std::vector a(N * num_elements); + std::vector b(N * num_elements); + generate_inputs_with_special_values(a, b); + + std::vector acc; + if (sat) + { + acc.resize(num_elements); + generate_acc_sat_inputs(acc); + } + + std::vector reference(num_elements); + calculate_reference(reference, a, b, sat, acc); + + const char* source = sat ? kernel_source_dot_acc_sat : kernel_source_dot; + const char* name = sat ? "test_dot_acc_sat" : "test_dot"; + error = create_single_kernel_helper(context, &program, &kernel, 1, &source, + name, buildOptions.c_str()); + test_error(error, "Unable to create test kernel"); + + clMemWrapper dst = clCreateBuffer( + context, 0, reference.size() * sizeof(DstType), NULL, &error); + test_error(error, "Unable to create output buffer"); + + clMemWrapper srcA = + clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, + a.size() * sizeof(SrcTypeA), a.data(), &error); + test_error(error, "Unable to create srcA buffer"); + + clMemWrapper srcB = + clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, + b.size() * sizeof(SrcTypeB), b.data(), &error); + test_error(error, "Unable to create srcB buffer"); + + clMemWrapper srcAcc; + if (sat) + { + srcAcc = + clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, + acc.size() * sizeof(DstType), acc.data(), &error); + test_error(error, "Unable to create acc buffer"); + } + + error = clSetKernelArg(kernel, 0, sizeof(dst), &dst); + test_error(error, "Unable to set output buffer kernel arg"); + + error = clSetKernelArg(kernel, 1, sizeof(srcA), &srcA); + test_error(error, "Unable to set srcA buffer kernel arg"); + + error = clSetKernelArg(kernel, 2, sizeof(srcB), &srcB); + test_error(error, "Unable to set srcB buffer kernel arg"); + + if (sat) + { + error = clSetKernelArg(kernel, 3, sizeof(srcAcc), &srcAcc); + test_error(error, "Unable to set acc buffer kernel arg"); + } + + size_t global_work_size[] = { reference.size() }; + error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global_work_size, + NULL, 0, NULL, NULL); + test_error(error, "Unable to enqueue test kernel"); + + error = clFinish(queue); + test_error(error, "clFinish failed after test kernel"); + + std::vector results(reference.size(), 99); + error = clEnqueueReadBuffer(queue, dst, CL_TRUE, 0, + results.size() * sizeof(DstType), + results.data(), 0, NULL, NULL); + test_error(error, "Unable to read data after test kernel"); + + if (results != reference) + { + log_error("Result buffer did not match reference buffer!\n"); + return TEST_FAIL; + } + + return TEST_PASS; +} + +template +static int test_vectype(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) +{ + int result = TEST_PASS; + + typedef typename std::make_signed::type SSrcType; + typedef typename std::make_signed::type SDstType; + + typedef typename std::make_unsigned::type USrcType; + typedef typename std::make_unsigned::type UDstType; + + // dot testing: + result |= test_case_dot( + deviceID, context, queue, num_elements, false, false); + result |= test_case_dot( + deviceID, context, queue, num_elements, false, false); + result |= test_case_dot( + deviceID, context, queue, num_elements, false, false); + result |= test_case_dot( + deviceID, context, queue, num_elements, false, false); + + // dot_acc_sat testing: + result |= test_case_dot( + deviceID, context, queue, num_elements, false, true); + result |= test_case_dot( + deviceID, context, queue, num_elements, false, true); + result |= test_case_dot( + deviceID, context, queue, num_elements, false, true); + result |= test_case_dot( + deviceID, context, queue, num_elements, false, true); + + return result; +} + +template +static int test_vectype_packed(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) +{ + int result = TEST_PASS; + + typedef typename std::make_signed::type SSrcType; + typedef typename std::make_signed::type SDstType; + + typedef typename std::make_unsigned::type USrcType; + typedef typename std::make_unsigned::type UDstType; + + // packed dot testing: + result |= test_case_dot( + deviceID, context, queue, num_elements, true, false); + result |= test_case_dot( + deviceID, context, queue, num_elements, true, false); + result |= test_case_dot( + deviceID, context, queue, num_elements, true, false); + result |= test_case_dot( + deviceID, context, queue, num_elements, true, false); + + // packed dot_acc_sat testing: + result |= test_case_dot( + deviceID, context, queue, num_elements, true, true); + result |= test_case_dot( + deviceID, context, queue, num_elements, true, true); + result |= test_case_dot( + deviceID, context, queue, num_elements, true, true); + result |= test_case_dot( + deviceID, context, queue, num_elements, true, true); + + return result; +} + +int test_integer_dot_product(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) +{ + if (!is_extension_available(deviceID, "cl_khr_integer_dot_product")) + { + log_info("cl_khr_integer_dot_product is not supported\n"); + return TEST_SKIPPED_ITSELF; + } + + cl_int error = CL_SUCCESS; + int result = TEST_PASS; + + cl_device_integer_dot_product_capabilities_khr dotCaps = 0; + error = clGetDeviceInfo(deviceID, + CL_DEVICE_INTEGER_DOT_PRODUCT_CAPABILITIES_KHR, + sizeof(dotCaps), &dotCaps, NULL); + test_error( + error, + "Unable to query CL_DEVICE_INTEGER_DOT_PRODUCT_CAPABILITIES_KHR"); + test_assert_error( + dotCaps & CL_DEVICE_INTEGER_DOT_PRODUCT_INPUT_4x8BIT_PACKED_KHR, + "When cl_khr_integer_dot_product is supported " + "CL_DEVICE_INTEGER_DOT_PRODUCT_INPUT_4x8BIT_PACKED_KHR must be " + "supported"); + + if (dotCaps + & ~(CL_DEVICE_INTEGER_DOT_PRODUCT_INPUT_4x8BIT_PACKED_KHR + | CL_DEVICE_INTEGER_DOT_PRODUCT_INPUT_4x8BIT_KHR)) + { + log_info("NOTE: found an unknown / untested capability!\n"); + } + + if (dotCaps & CL_DEVICE_INTEGER_DOT_PRODUCT_INPUT_4x8BIT_KHR) + { + result |= test_vectype(deviceID, context, queue, + num_elements); + } + + if (dotCaps & CL_DEVICE_INTEGER_DOT_PRODUCT_INPUT_4x8BIT_PACKED_KHR) + { + result |= test_vectype_packed( + deviceID, context, queue, num_elements); + } + + return result; +} -- cgit v1.2.3 From 39fdb462be7ea4bf2c2b2c6d23e84a70c3def78d Mon Sep 17 00:00:00 2001 From: Ben Ashbaugh Date: Sat, 28 Aug 2021 02:21:34 -0700 Subject: define NOMINMAX in the CMakefile to fix std::min and std::max on MSVC (#1308) --- CMakeLists.txt | 2 ++ test_common/harness/kernelHelpers.cpp | 2 +- test_common/harness/os_helpers.cpp | 3 --- test_conformance/basic/test_async_copy2D.cpp | 4 ++-- test_conformance/basic/test_async_copy3D.cpp | 4 ++-- test_conformance/integer_ops/test_integer_dot_product.cpp | 5 ----- 6 files changed, 7 insertions(+), 13 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index a614649f..04551dfb 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -115,6 +115,8 @@ endif() if(MSVC) # Don't warn when using standard non-secure functions. add_compile_definitions(_CRT_SECURE_NO_WARNINGS) + # Fix std::min and std::max handling with windows.harness. + add_compile_definitions(NOMINMAX) endif() if( WIN32 AND "${CMAKE_CXX_COMPILER_ID}" MATCHES "Intel" ) diff --git a/test_common/harness/kernelHelpers.cpp b/test_common/harness/kernelHelpers.cpp index 18f51cbe..1d1f8d8c 100644 --- a/test_common/harness/kernelHelpers.cpp +++ b/test_common/harness/kernelHelpers.cpp @@ -1707,7 +1707,7 @@ Version get_max_OpenCL_C_for_context(cl_context context) else { current_version = - (std::min)(device_version, current_version); + std::min(device_version, current_version); } }); return current_version; diff --git a/test_common/harness/os_helpers.cpp b/test_common/harness/os_helpers.cpp index daf21958..8fc91108 100644 --- a/test_common/harness/os_helpers.cpp +++ b/test_common/harness/os_helpers.cpp @@ -333,9 +333,6 @@ std::string exe_dir() #include -#if defined(max) -#undef max -#endif #include #include diff --git a/test_conformance/basic/test_async_copy2D.cpp b/test_conformance/basic/test_async_copy2D.cpp index 9fbdcb6e..fafcac83 100644 --- a/test_conformance/basic/test_async_copy2D.cpp +++ b/test_conformance/basic/test_async_copy2D.cpp @@ -203,13 +203,13 @@ int test_copy2D(cl_device_id deviceID, cl_context context, / (numElementsPerLine + srcStride); size_t maxTotalLinesOut = (max_alloc_size / elementSize + dstStride) / (numElementsPerLine + dstStride); - size_t maxTotalLines = (std::min)(maxTotalLinesIn, maxTotalLinesOut); + size_t maxTotalLines = std::min(maxTotalLinesIn, maxTotalLinesOut); size_t maxLocalWorkgroups = maxTotalLines / (localWorkgroupSize * lineCopiesPerWorkItem); size_t localBufferSize = localWorkgroupSize * localStorageSpacePerWorkitem - (localIsDst ? dstStride : srcStride); - size_t numberOfLocalWorkgroups = (std::min)(1111, (int)maxLocalWorkgroups); + size_t numberOfLocalWorkgroups = std::min(1111, (int)maxLocalWorkgroups); size_t totalLines = numberOfLocalWorkgroups * localWorkgroupSize * lineCopiesPerWorkItem; size_t inBufferSize = elementSize diff --git a/test_conformance/basic/test_async_copy3D.cpp b/test_conformance/basic/test_async_copy3D.cpp index 252159bc..2b184ee5 100644 --- a/test_conformance/basic/test_async_copy3D.cpp +++ b/test_conformance/basic/test_async_copy3D.cpp @@ -230,13 +230,13 @@ int test_copy3D(cl_device_id deviceID, cl_context context, size_t maxTotalPlanesOut = ((max_alloc_size / elementSize) + dstPlaneStride) / ((numLines * numElementsPerLine + numLines * dstLineStride) + dstPlaneStride); - size_t maxTotalPlanes = (std::min)(maxTotalPlanesIn, maxTotalPlanesOut); + size_t maxTotalPlanes = std::min(maxTotalPlanesIn, maxTotalPlanesOut); size_t maxLocalWorkgroups = maxTotalPlanes / (localWorkgroupSize * planesCopiesPerWorkItem); size_t localBufferSize = localWorkgroupSize * localStorageSpacePerWorkitem - (localIsDst ? dstPlaneStride : srcPlaneStride); - size_t numberOfLocalWorkgroups = (std::min)(1111, (int)maxLocalWorkgroups); + size_t numberOfLocalWorkgroups = std::min(1111, (int)maxLocalWorkgroups); size_t totalPlanes = numberOfLocalWorkgroups * localWorkgroupSize * planesCopiesPerWorkItem; size_t inBufferSize = elementSize diff --git a/test_conformance/integer_ops/test_integer_dot_product.cpp b/test_conformance/integer_ops/test_integer_dot_product.cpp index b5378ae0..be25b320 100644 --- a/test_conformance/integer_ops/test_integer_dot_product.cpp +++ b/test_conformance/integer_ops/test_integer_dot_product.cpp @@ -14,11 +14,6 @@ // limitations under the License. // -// This is needed for std::numeric_limits<>::min() and max() to work on Windows. -#if defined(_WIN32) -#define NOMINMAX -#endif - #include #include #include -- cgit v1.2.3 From 7cfd3a6033f547905da40c06fae32b9337df0b03 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?K=C3=A9vin=20Petit?= Date: Sun, 29 Aug 2021 23:12:52 +0100 Subject: Report failures in simple_{read,write}_image_pitch tests (#1309) --- test_conformance/basic/test_simple_image_pitch.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test_conformance/basic/test_simple_image_pitch.cpp b/test_conformance/basic/test_simple_image_pitch.cpp index 1cd82b6f..2eb43b3a 100644 --- a/test_conformance/basic/test_simple_image_pitch.cpp +++ b/test_conformance/basic/test_simple_image_pitch.cpp @@ -83,7 +83,7 @@ int test_simple_read_image_pitch(cl_device_id device, cl_context cl_context_, cl free(host_image); free(host_buffer); - return CL_SUCCESS; + return errors == 0 ? TEST_PASS : TEST_FAIL; } int test_simple_write_image_pitch(cl_device_id device, cl_context cl_context_, cl_command_queue q, int num_elements) @@ -149,5 +149,5 @@ int test_simple_write_image_pitch(cl_device_id device, cl_context cl_context_, c free(host_image); free(host_buffer); - return CL_SUCCESS; + return errors == 0 ? TEST_PASS : TEST_FAIL; } -- cgit v1.2.3 From e27a97fbd81b6b426a29857a3e1c04d37255931c Mon Sep 17 00:00:00 2001 From: Grzegorz Wawiorko Date: Tue, 31 Aug 2021 16:53:55 +0200 Subject: Add cl_khr_integer_dot_product to known extensions in test compiler. (#1316) --- test_conformance/compiler/test_compiler_defines_for_extensions.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test_conformance/compiler/test_compiler_defines_for_extensions.cpp b/test_conformance/compiler/test_compiler_defines_for_extensions.cpp index a1d8d8bd..de30e06b 100644 --- a/test_conformance/compiler/test_compiler_defines_for_extensions.cpp +++ b/test_conformance/compiler/test_compiler_defines_for_extensions.cpp @@ -50,6 +50,7 @@ const char *known_extensions[] = { "cl_khr_subgroup_shuffle_relative", "cl_khr_subgroup_clustered_reduce", "cl_khr_extended_bit_ops", + "cl_khr_integer_dot_product", // API-only extensions after this point. If you add above here, modify // first_API_extension below. "cl_khr_icd", @@ -77,7 +78,7 @@ const char *known_extensions[] = { }; size_t num_known_extensions = sizeof(known_extensions) / sizeof(char *); -size_t first_API_extension = 28; +size_t first_API_extension = 29; const char *known_embedded_extensions[] = { "cles_khr_int64", -- cgit v1.2.3 From 995c7dbfbbb7b38c4ad6ce59d66b01b53ef031b2 Mon Sep 17 00:00:00 2001 From: Ben Ashbaugh Date: Tue, 31 Aug 2021 11:44:17 -0700 Subject: suppress MSVC strdup warning (#1314) --- CMakeLists.txt | 2 ++ test_common/CMakeLists.txt | 5 ----- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 04551dfb..7b307a11 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -115,6 +115,8 @@ endif() if(MSVC) # Don't warn when using standard non-secure functions. add_compile_definitions(_CRT_SECURE_NO_WARNINGS) + # Don't warn about using the portable "strdup" function. + add_compile_definitions(_CRT_NONSTDC_NO_DEPRECATE) # Fix std::min and std::max handling with windows.harness. add_compile_definitions(NOMINMAX) endif() diff --git a/test_common/CMakeLists.txt b/test_common/CMakeLists.txt index 61580300..b0505345 100644 --- a/test_common/CMakeLists.txt +++ b/test_common/CMakeLists.txt @@ -21,8 +21,3 @@ set(HARNESS_SOURCES ) add_library(harness STATIC ${HARNESS_SOURCES}) - -if(MSVC) - # Don't warn about using the portable "strdup" function. - target_compile_definitions(harness PRIVATE _CRT_NONSTDC_NO_DEPRECATE) -endif() \ No newline at end of file -- cgit v1.2.3 From 0601c6f7658c80af50d6f6a2ac947682d75bcd50 Mon Sep 17 00:00:00 2001 From: James Price Date: Tue, 31 Aug 2021 14:45:24 -0400 Subject: Add missing include for gRandomSeed (#1307) --- test_common/harness/integer_ops_test_info.h | 1 + 1 file changed, 1 insertion(+) diff --git a/test_common/harness/integer_ops_test_info.h b/test_common/harness/integer_ops_test_info.h index c25843dd..ad7b303b 100644 --- a/test_common/harness/integer_ops_test_info.h +++ b/test_common/harness/integer_ops_test_info.h @@ -18,6 +18,7 @@ #define INTEGER_OPS_TEST_INFO_H #include "conversions.h" +#include "testHarness.h" // TODO: expand usage to other tests. -- cgit v1.2.3 From 34e47322db205d3c8c972ddebbf51bb4122e45f5 Mon Sep 17 00:00:00 2001 From: "Senran (Stephen) Zhang" Date: Tue, 7 Sep 2021 00:14:36 +0800 Subject: Limit workgroup size for atomics tests (#1197) * Limit workgroup size for atomics tests This avoids extremely large local buffer size and slow run * Always limit workgroup size --- test_conformance/atomics/test_atomics.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/test_conformance/atomics/test_atomics.cpp b/test_conformance/atomics/test_atomics.cpp index 34b34ed3..c0c01363 100644 --- a/test_conformance/atomics/test_atomics.cpp +++ b/test_conformance/atomics/test_atomics.cpp @@ -200,6 +200,10 @@ int test_atomic_function(cl_device_id deviceID, cl_context context, cl_command_q error = clGetKernelWorkGroupInfo( kernel, deviceID, CL_KERNEL_WORK_GROUP_SIZE, sizeof( workSize ), &workSize, NULL ); test_error( error, "Unable to obtain max work group size for device and kernel combo" ); + // Limit workSize to avoid extremely large local buffer size and slow + // run. + if (workSize > 65536) workSize = 65536; + // "workSize" is limited to that of the first dimension as only a 1DRange is executed. if( maxSizes[0] < workSize ) { -- cgit v1.2.3 From 1f26e1d8ba372f4c638f9c0cdae7566e349b9b9a Mon Sep 17 00:00:00 2001 From: Jeremy Kemp Date: Tue, 7 Sep 2021 12:47:44 +0100 Subject: Fix memory model issue in `atomic_flag`. (#1283) * Fix memory model issue in atomic_flag. In atomic_flag sub-tests that modify local memory, compilers may re-order memory accesses between the local and global address spaces which can lead to incorrect test failures. This commit ensures that both local and global memory operations are fenced to prevent this re-ordering from occurring. Fixes #134. * Clang format changes. * Added missing global acquire which is necessary for the corresponding global release. Thanks to @jlewis-austin for spotting. * Clang format changes. * Match the condition for applying acquire/release fences. --- test_conformance/c11_atomics/test_atomics.cpp | 36 ++++++++++++++++++--------- 1 file changed, 24 insertions(+), 12 deletions(-) diff --git a/test_conformance/c11_atomics/test_atomics.cpp b/test_conformance/c11_atomics/test_atomics.cpp index c3a190b7..38b4e9a7 100644 --- a/test_conformance/c11_atomics/test_atomics.cpp +++ b/test_conformance/c11_atomics/test_atomics.cpp @@ -1657,12 +1657,18 @@ public: " for(cnt = 0; !stop && cnt < threadCount; cnt++) // each thread must find critical section where it is the first visitor\n" " {\n" " bool set = atomic_flag_test_and_set" + postfix + "(&destMemory[cnt]" + memoryOrderScope + ");\n"; - if (MemoryOrder() == MEMORY_ORDER_RELAXED || MemoryOrder() == MEMORY_ORDER_RELEASE) - program += " atomic_work_item_fence(" + - std::string(LocalMemory() ? "CLK_LOCAL_MEM_FENCE, " : "CLK_GLOBAL_MEM_FENCE, ") + - "memory_order_acquire," + - std::string(LocalMemory() ? "memory_scope_work_group" : (UseSVM() ? "memory_scope_all_svm_devices" : "memory_scope_device") ) + - ");\n"; + if (MemoryOrder() == MEMORY_ORDER_RELAXED + || MemoryOrder() == MEMORY_ORDER_RELEASE || LocalMemory()) + program += " atomic_work_item_fence(" + + std::string(LocalMemory() + ? "CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE, " + : "CLK_GLOBAL_MEM_FENCE, ") + + "memory_order_acquire," + + std::string(LocalMemory() + ? "memory_scope_work_group" + : (UseSVM() ? "memory_scope_all_svm_devices" + : "memory_scope_device")) + + ");\n"; program += " if (!set)\n" @@ -1683,12 +1689,18 @@ public: " stop = 1;\n" " }\n"; - if (MemoryOrder() == MEMORY_ORDER_ACQUIRE || MemoryOrder() == MEMORY_ORDER_RELAXED) - program += " atomic_work_item_fence(" + - std::string(LocalMemory() ? "CLK_LOCAL_MEM_FENCE, " : "CLK_GLOBAL_MEM_FENCE, ") + - "memory_order_release," + - std::string(LocalMemory() ? "memory_scope_work_group" : (UseSVM() ? "memory_scope_all_svm_devices" : "memory_scope_device") ) + - ");\n"; + if (MemoryOrder() == MEMORY_ORDER_ACQUIRE + || MemoryOrder() == MEMORY_ORDER_RELAXED || LocalMemory()) + program += " atomic_work_item_fence(" + + std::string(LocalMemory() + ? "CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE, " + : "CLK_GLOBAL_MEM_FENCE, ") + + "memory_order_release," + + std::string(LocalMemory() + ? "memory_scope_work_group" + : (UseSVM() ? "memory_scope_all_svm_devices" + : "memory_scope_device")) + + ");\n"; program += " atomic_flag_clear" + postfix + "(&destMemory[cnt]" + MemoryOrderScopeStrForClear() + ");\n" -- cgit v1.2.3 From 02bf24d2b1684b1ffde079d3598a8fc70610d4fc Mon Sep 17 00:00:00 2001 From: Ben Ashbaugh Date: Mon, 13 Sep 2021 05:25:32 -0700 Subject: remove min max macros (#1310) * remove the MIN and MAX macros and use the std versions instead * fix formatting * fix Arm build * remove additional MIN and MAX macros from compat.h --- test_common/harness/compat.h | 7 --- test_common/harness/errorHelpers.cpp | 20 ++++---- test_common/harness/imageHelpers.cpp | 3 -- .../basic/test_enqueued_local_size.cpp | 8 +-- test_conformance/buffers/test_sub_buffers.cpp | 22 ++++---- test_conformance/conversions/test_conversions.cpp | 7 ++- .../device_execution/enqueue_ndrange.cpp | 3 +- .../device_execution/host_queue_order.cpp | 3 +- test_conformance/half/Test_roundTrip.cpp | 7 ++- test_conformance/half/Test_vLoadHalf.cpp | 7 ++- test_conformance/half/Test_vStoreHalf.cpp | 11 ++-- .../images/kernel_read_write/test_common.cpp | 25 ++++----- .../images/kernel_read_write/test_iterations.cpp | 32 ++++++++---- .../images/kernel_read_write/test_read_1D.cpp | 26 +++++++--- .../kernel_read_write/test_read_1D_array.cpp | 27 ++++++---- .../kernel_read_write/test_read_2D_array.cpp | 32 ++++++++---- test_conformance/integer_ops/test_add_sat.cpp | 31 ++++------- test_conformance/integer_ops/test_integers.cpp | 60 +++++++++++----------- test_conformance/integer_ops/test_sub_sat.cpp | 32 ++++-------- test_conformance/integer_ops/test_unary_ops.cpp | 2 +- .../math_brute_force/macro_binary_double.cpp | 3 +- .../math_brute_force/macro_binary_float.cpp | 3 +- .../math_brute_force/macro_unary_double.cpp | 3 +- .../math_brute_force/macro_unary_float.cpp | 4 +- test_conformance/math_brute_force/main.cpp | 5 +- test_conformance/profiling/execute.cpp | 12 ++--- test_conformance/workgroups/test_wg_broadcast.cpp | 6 ++- .../workgroups/test_wg_scan_exclusive_max.cpp | 11 ++-- .../workgroups/test_wg_scan_exclusive_min.cpp | 11 ++-- .../workgroups/test_wg_scan_inclusive_max.cpp | 10 ++-- .../workgroups/test_wg_scan_inclusive_min.cpp | 10 ++-- 31 files changed, 241 insertions(+), 202 deletions(-) diff --git a/test_common/harness/compat.h b/test_common/harness/compat.h index 3b557852..4053b7ee 100644 --- a/test_common/harness/compat.h +++ b/test_common/harness/compat.h @@ -309,13 +309,6 @@ EXTERN_C int __builtin_clz(unsigned int pattern); #endif -#ifndef MIN -#define MIN(x, y) (((x) < (y)) ? (x) : (y)) -#endif -#ifndef MAX -#define MAX(x, y) (((x) > (y)) ? (x) : (y)) -#endif - /*----------------------------------------------------------------------------- WARNING: DO NOT USE THESE MACROS: diff --git a/test_common/harness/errorHelpers.cpp b/test_common/harness/errorHelpers.cpp index ea928bc3..eaccf641 100644 --- a/test_common/harness/errorHelpers.cpp +++ b/test_common/harness/errorHelpers.cpp @@ -18,6 +18,8 @@ #include #include +#include + #include "errorHelpers.h" #include "parseParameters.h" @@ -301,10 +303,6 @@ const char *GetQueuePropertyName(cl_command_queue_properties property) } } -#ifndef MAX -#define MAX(_a, _b) ((_a) > (_b) ? (_a) : (_b)) -#endif - #if defined(_MSC_VER) #define scalbnf(_a, _i) ldexpf(_a, _i) #define scalbn(_a, _i) ldexp(_a, _i) @@ -357,7 +355,7 @@ static float Ulp_Error_Half_Float(float test, double reference) // The unbiased exponent of the ulp unit place int ulp_exp = - HALF_MANT_DIG - 1 - MAX(ilogb(reference), HALF_MIN_EXP - 1); + HALF_MANT_DIG - 1 - std::max(ilogb(reference), HALF_MIN_EXP - 1); // Scale the exponent of the error return (float)scalbn(testVal - reference, ulp_exp); @@ -365,7 +363,7 @@ static float Ulp_Error_Half_Float(float test, double reference) // reference is a normal power of two or a zero int ulp_exp = - HALF_MANT_DIG - 1 - MAX(ilogb(reference) - 1, HALF_MIN_EXP - 1); + HALF_MANT_DIG - 1 - std::max(ilogb(reference) - 1, HALF_MIN_EXP - 1); // Scale the exponent of the error return (float)scalbn(testVal - reference, ulp_exp); @@ -437,7 +435,8 @@ float Ulp_Error(float test, double reference) return 0.0f; // if we are expecting a NaN, any NaN is fine // The unbiased exponent of the ulp unit place - int ulp_exp = FLT_MANT_DIG - 1 - MAX(ilogb(reference), FLT_MIN_EXP - 1); + int ulp_exp = + FLT_MANT_DIG - 1 - std::max(ilogb(reference), FLT_MIN_EXP - 1); // Scale the exponent of the error return (float)scalbn(testVal - reference, ulp_exp); @@ -445,7 +444,8 @@ float Ulp_Error(float test, double reference) // reference is a normal power of two or a zero // The unbiased exponent of the ulp unit place - int ulp_exp = FLT_MANT_DIG - 1 - MAX(ilogb(reference) - 1, FLT_MIN_EXP - 1); + int ulp_exp = + FLT_MANT_DIG - 1 - std::max(ilogb(reference) - 1, FLT_MIN_EXP - 1); // Scale the exponent of the error return (float)scalbn(testVal - reference, ulp_exp); @@ -513,7 +513,7 @@ float Ulp_Error_Double(double test, long double reference) // The unbiased exponent of the ulp unit place int ulp_exp = - DBL_MANT_DIG - 1 - MAX(ilogbl(reference), DBL_MIN_EXP - 1); + DBL_MANT_DIG - 1 - std::max(ilogbl(reference), DBL_MIN_EXP - 1); // Scale the exponent of the error float result = (float)scalbnl(testVal - reference, ulp_exp); @@ -529,7 +529,7 @@ float Ulp_Error_Double(double test, long double reference) // reference is a normal power of two or a zero // The unbiased exponent of the ulp unit place int ulp_exp = - DBL_MANT_DIG - 1 - MAX(ilogbl(reference) - 1, DBL_MIN_EXP - 1); + DBL_MANT_DIG - 1 - std::max(ilogbl(reference) - 1, DBL_MIN_EXP - 1); // Scale the exponent of the error float result = (float)scalbnl(testVal - reference, ulp_exp); diff --git a/test_common/harness/imageHelpers.cpp b/test_common/harness/imageHelpers.cpp index 314709f8..3a5c5533 100644 --- a/test_common/harness/imageHelpers.cpp +++ b/test_common/harness/imageHelpers.cpp @@ -690,9 +690,6 @@ int has_alpha(const cl_image_format *format) _b ^= _a; \ _a ^= _b; \ } while (0) -#ifndef MAX -#define MAX(_a, _b) ((_a) > (_b) ? (_a) : (_b)) -#endif void get_max_sizes( size_t *numberOfSizes, const int maxNumberOfSizes, size_t sizes[][3], diff --git a/test_conformance/basic/test_enqueued_local_size.cpp b/test_conformance/basic/test_enqueued_local_size.cpp index f52162a8..91fe1434 100644 --- a/test_conformance/basic/test_enqueued_local_size.cpp +++ b/test_conformance/basic/test_enqueued_local_size.cpp @@ -14,13 +14,15 @@ // limitations under the License. // #include "harness/compat.h" +#include "harness/rounding_mode.h" #include #include #include #include #include -#include "harness/rounding_mode.h" + +#include #include "procs.h" @@ -124,8 +126,8 @@ test_enqueued_local_size(cl_device_id device, cl_context context, cl_command_que err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(max_wgs), &max_wgs, NULL); test_error( err, "clGetDeviceInfo failed."); - localsize[0] = MIN(16, max_wgs); - localsize[1] = MIN(11, max_wgs / localsize[0]); + localsize[0] = std::min(16, max_wgs); + localsize[1] = std::min(11, max_wgs / localsize[0]); // If we need to use uniform workgroups because non-uniform workgroups are // not supported, round up to the next global size that is divisible by the // local size. diff --git a/test_conformance/buffers/test_sub_buffers.cpp b/test_conformance/buffers/test_sub_buffers.cpp index 691509fd..d6ab111e 100644 --- a/test_conformance/buffers/test_sub_buffers.cpp +++ b/test_conformance/buffers/test_sub_buffers.cpp @@ -15,6 +15,8 @@ // #include "procs.h" +#include + // Design: // To test sub buffers, we first create one main buffer. We then create several sub-buffers and // queue Actions on each one. Each Action is encapsulated in a class so it can keep track of @@ -101,13 +103,6 @@ public: } }; -#ifndef MAX -#define MAX( _a, _b ) ( (_a) > (_b) ? (_a) : (_b) ) -#endif -#ifndef MIN -#define MIN( _a, _b ) ( (_a) < (_b) ? (_a) : (_b) ) -#endif - class CopyAction : public Action { public: @@ -117,7 +112,8 @@ public: virtual cl_int Execute( cl_context context, cl_command_queue queue, cl_char tag, SubBufferWrapper &buffer1, SubBufferWrapper &buffer2, cl_char *parentBufferState ) { // Copy from sub-buffer 1 to sub-buffer 2 - size_t size = get_random_size_t( 0, MIN( buffer1.mSize, buffer2.mSize ), GetRandSeed() ); + size_t size = get_random_size_t( + 0, std::min(buffer1.mSize, buffer2.mSize), GetRandSeed()); size_t startOffset = get_random_size_t( 0, buffer1.mSize - size, GetRandSeed() ); size_t endOffset = get_random_size_t( 0, buffer2.mSize - size, GetRandSeed() ); @@ -266,7 +262,11 @@ int test_sub_buffers_read_write_core( cl_context context, cl_command_queue queue endRange = mainSize; size_t offset = get_random_size_t( toStartFrom / addressAlign, endRange / addressAlign, Action::GetRandSeed() ) * addressAlign; - size_t size = get_random_size_t( 1, ( MIN( mainSize / 8, mainSize - offset ) ) / addressAlign, Action::GetRandSeed() ) * addressAlign; + size_t size = + get_random_size_t( + 1, (std::min(mainSize / 8, mainSize - offset)) / addressAlign, + Action::GetRandSeed()) + * addressAlign; error = subBuffers[ numSubBuffers ].Allocate( mainBuffer, CL_MEM_READ_WRITE, offset, size ); test_error( error, "Unable to allocate sub buffer" ); @@ -443,7 +443,7 @@ int test_sub_buffers_read_write_dual_devices( cl_device_id deviceID, cl_context error = get_reasonable_buffer_size( otherDevice, maxBuffer2 ); test_error( error, "Unable to get buffer size for secondary device" ); - maxBuffer1 = MIN( maxBuffer1, maxBuffer2 ); + maxBuffer1 = std::min(maxBuffer1, maxBuffer2); cl_uint addressAlign1Bits, addressAlign2Bits; error = clGetDeviceInfo( deviceID, CL_DEVICE_MEM_BASE_ADDR_ALIGN, sizeof( addressAlign1Bits ), &addressAlign1Bits, NULL ); @@ -452,7 +452,7 @@ int test_sub_buffers_read_write_dual_devices( cl_device_id deviceID, cl_context error = clGetDeviceInfo( otherDevice, CL_DEVICE_MEM_BASE_ADDR_ALIGN, sizeof( addressAlign2Bits ), &addressAlign2Bits, NULL ); test_error( error, "Unable to get secondary device's address alignment" ); - cl_uint addressAlign1 = MAX( addressAlign1Bits, addressAlign2Bits ) / 8; + cl_uint addressAlign1 = std::max(addressAlign1Bits, addressAlign2Bits) / 8; // Finally time to run! return test_sub_buffers_read_write_core( testingContext, queue1, queue2, maxBuffer1, addressAlign1 ); diff --git a/test_conformance/conversions/test_conversions.cpp b/test_conformance/conversions/test_conversions.cpp index 87b8ead7..e8e572e6 100644 --- a/test_conformance/conversions/test_conversions.cpp +++ b/test_conformance/conversions/test_conversions.cpp @@ -47,6 +47,8 @@ #endif #include +#include + #include "Sleep.h" #include "basic_test_conversions.h" @@ -1003,7 +1005,8 @@ static int DoTest( cl_device_id device, Type outType, Type inType, SaturationMod uint64_t i; gTestCount++; - size_t blockCount = BUFFER_SIZE / MAX( gTypeSizes[ inType ], gTypeSizes[ outType ] ); + size_t blockCount = + BUFFER_SIZE / std::max(gTypeSizes[inType], gTypeSizes[outType]); size_t step = blockCount; uint64_t lastCase = 1ULL << (8*gTypeSizes[ inType ]); cl_event writeInputBuffer = NULL; @@ -1078,7 +1081,7 @@ static int DoTest( cl_device_id device, Type outType, Type inType, SaturationMod fflush(stdout); } - cl_uint count = (uint32_t) MIN( blockCount, lastCase - i ); + cl_uint count = (uint32_t)std::min((uint64_t)blockCount, lastCase - i); writeInputBufferInfo.count = count; // Crate a user event to represent the status of the reference value computation completion diff --git a/test_conformance/device_execution/enqueue_ndrange.cpp b/test_conformance/device_execution/enqueue_ndrange.cpp index 8ced6629..f228f063 100644 --- a/test_conformance/device_execution/enqueue_ndrange.cpp +++ b/test_conformance/device_execution/enqueue_ndrange.cpp @@ -18,6 +18,7 @@ #include "harness/testHarness.h" #include "harness/typeWrappers.h" +#include #include #include "procs.h" @@ -645,7 +646,7 @@ int test_enqueue_ndrange(cl_device_id device, cl_context context, cl_command_que max_local_size = (max_local_size > MAX_GWS)? MAX_GWS: max_local_size; if(gWimpyMode) { - max_local_size = MIN(8, max_local_size); + max_local_size = std::min((size_t)8, max_local_size); } cl_uint num = 10; diff --git a/test_conformance/device_execution/host_queue_order.cpp b/test_conformance/device_execution/host_queue_order.cpp index 2b5688d1..5376ea40 100644 --- a/test_conformance/device_execution/host_queue_order.cpp +++ b/test_conformance/device_execution/host_queue_order.cpp @@ -18,6 +18,7 @@ #include "harness/testHarness.h" #include "harness/typeWrappers.h" +#include #include #include "procs.h" @@ -124,7 +125,7 @@ int test_host_queue_order(cl_device_id device, cl_context context, cl_command_qu cl_uint num = arr_size(result); if( gWimpyMode ) { - num = MAX(num / 16, 4); + num = std::max(num / 16, 4U); } clMemWrapper res_mem; diff --git a/test_conformance/half/Test_roundTrip.cpp b/test_conformance/half/Test_roundTrip.cpp index 69fc7e41..1ab40937 100644 --- a/test_conformance/half/Test_roundTrip.cpp +++ b/test_conformance/half/Test_roundTrip.cpp @@ -14,6 +14,9 @@ // limitations under the License. // #include + +#include + #include "cl_utils.h" #include "tests.h" #include "harness/testHarness.h" @@ -156,7 +159,7 @@ int test_roundTrip( cl_device_id device, cl_context context, cl_command_queue qu } // Figure out how many elements are in a work block - size_t elementSize = MAX( sizeof(cl_half), sizeof(cl_float)); + size_t elementSize = std::max(sizeof(cl_half), sizeof(cl_float)); size_t blockCount = (size_t)getBufferSize(device) / elementSize; //elementSize is a power of two uint64_t lastCase = 1ULL << (8*sizeof(cl_half)); // number of cl_half size_t stride = blockCount; @@ -168,7 +171,7 @@ int test_roundTrip( cl_device_id device, cl_context context, cl_command_queue qu for( i = 0; i < (uint64_t)lastCase; i += stride ) { - count = (uint32_t) MIN( blockCount, lastCase - i ); + count = (uint32_t)std::min((uint64_t)blockCount, lastCase - i); //Init the input stream uint16_t *p = (uint16_t *)gIn_half; diff --git a/test_conformance/half/Test_vLoadHalf.cpp b/test_conformance/half/Test_vLoadHalf.cpp index 5dfac7a3..e9354019 100644 --- a/test_conformance/half/Test_vLoadHalf.cpp +++ b/test_conformance/half/Test_vLoadHalf.cpp @@ -17,6 +17,9 @@ #include "harness/testHarness.h" #include + +#include + #include "cl_utils.h" #include "tests.h" @@ -429,7 +432,7 @@ int Test_vLoadHalf_private( cl_device_id device, bool aligned ) } // Figure out how many elements are in a work block - size_t elementSize = MAX( sizeof(cl_half), sizeof(cl_float)); + size_t elementSize = std::max(sizeof(cl_half), sizeof(cl_float)); size_t blockCount = getBufferSize(device) / elementSize; // elementSize is power of 2 uint64_t lastCase = 1ULL << (8*sizeof(cl_half)); // number of things of size cl_half @@ -447,7 +450,7 @@ int Test_vLoadHalf_private( cl_device_id device, bool aligned ) for( i = 0; i < (uint64_t)lastCase; i += blockCount ) { - count = (uint32_t) MIN( blockCount, lastCase - i ); + count = (uint32_t)std::min((uint64_t)blockCount, lastCase - i); //Init the input stream uint16_t *p = (uint16_t *)gIn_half; diff --git a/test_conformance/half/Test_vStoreHalf.cpp b/test_conformance/half/Test_vStoreHalf.cpp index c3a328ad..85824a9f 100644 --- a/test_conformance/half/Test_vStoreHalf.cpp +++ b/test_conformance/half/Test_vStoreHalf.cpp @@ -18,6 +18,9 @@ #include "harness/testHarness.h" #include + +#include + #include "cl_utils.h" #include "tests.h" @@ -674,7 +677,7 @@ int Test_vStoreHalf_private( cl_device_id device, f2h referenceFunc, d2h doubleR } // end for vector size // Figure out how many elements are in a work block - size_t elementSize = MAX( sizeof(cl_ushort), sizeof(float)); + size_t elementSize = std::max(sizeof(cl_ushort), sizeof(float)); size_t blockCount = BUFFER_SIZE / elementSize; // elementSize is power of 2 uint64_t lastCase = 1ULL << (8*sizeof(float)); // number of floats. size_t stride = blockCount; @@ -726,7 +729,7 @@ int Test_vStoreHalf_private( cl_device_id device, f2h referenceFunc, d2h doubleR for( i = 0; i < lastCase; i += stride ) { - count = (cl_uint) MIN( blockCount, lastCase - i ); + count = (cl_uint)std::min((uint64_t)blockCount, lastCase - i); fref.i = i; dref.i = i; @@ -1272,7 +1275,7 @@ int Test_vStoreaHalf_private( cl_device_id device, f2h referenceFunc, d2h double } // Figure out how many elements are in a work block - size_t elementSize = MAX( sizeof(cl_ushort), sizeof(float)); + size_t elementSize = std::max(sizeof(cl_ushort), sizeof(float)); size_t blockCount = BUFFER_SIZE / elementSize; uint64_t lastCase = 1ULL << (8*sizeof(float)); size_t stride = blockCount; @@ -1323,7 +1326,7 @@ int Test_vStoreaHalf_private( cl_device_id device, f2h referenceFunc, d2h double for( i = 0; i < (uint64_t)lastCase; i += stride ) { - count = (cl_uint) MIN( blockCount, lastCase - i ); + count = (cl_uint)std::min((uint64_t)blockCount, lastCase - i); fref.i = i; dref.i = i; diff --git a/test_conformance/images/kernel_read_write/test_common.cpp b/test_conformance/images/kernel_read_write/test_common.cpp index 375ee587..6b3cf849 100644 --- a/test_conformance/images/kernel_read_write/test_common.cpp +++ b/test_conformance/images/kernel_read_write/test_common.cpp @@ -16,6 +16,7 @@ #include "test_common.h" +#include cl_sampler create_sampler(cl_context context, image_sampler_data *sdata, bool test_mipmaps, cl_int *error) { cl_sampler sampler = nullptr; @@ -934,13 +935,13 @@ int test_read_image(cl_context context, cl_command_queue queue, { err4 = 0.0f; } - float maxErr1 = MAX( + float maxErr1 = std::max( maxErr * maxPixel.p[0], FLT_MIN); - float maxErr2 = MAX( + float maxErr2 = std::max( maxErr * maxPixel.p[1], FLT_MIN); - float maxErr3 = MAX( + float maxErr3 = std::max( maxErr * maxPixel.p[2], FLT_MIN); - float maxErr4 = MAX( + float maxErr4 = std::max( maxErr * maxPixel.p[3], FLT_MIN); if (!(err1 <= maxErr1) @@ -1039,17 +1040,17 @@ int test_read_image(cl_context context, cl_command_queue queue, float err4 = ABS_ERROR(resultPtr[3], expected[3]); float maxErr1 = - MAX(maxErr * maxPixel.p[0], - FLT_MIN); + std::max(maxErr * maxPixel.p[0], + FLT_MIN); float maxErr2 = - MAX(maxErr * maxPixel.p[1], - FLT_MIN); + std::max(maxErr * maxPixel.p[1], + FLT_MIN); float maxErr3 = - MAX(maxErr * maxPixel.p[2], - FLT_MIN); + std::max(maxErr * maxPixel.p[2], + FLT_MIN); float maxErr4 = - MAX(maxErr * maxPixel.p[3], - FLT_MIN); + std::max(maxErr * maxPixel.p[3], + FLT_MIN); if (!(err1 <= maxErr1) diff --git a/test_conformance/images/kernel_read_write/test_iterations.cpp b/test_conformance/images/kernel_read_write/test_iterations.cpp index 03ca9595..3b779fab 100644 --- a/test_conformance/images/kernel_read_write/test_iterations.cpp +++ b/test_conformance/images/kernel_read_write/test_iterations.cpp @@ -16,6 +16,8 @@ #include "test_common.h" #include +#include + #if defined( __APPLE__ ) #include #include @@ -434,7 +436,8 @@ int validate_image_2D_depth_results(void *imageValues, void *resultValues, doubl float err1 = ABS_ERROR(resultPtr[0], expected[0]); // Clamp to the minimum absolute error for the format if (err1 > 0 && err1 < formatAbsoluteError) { err1 = 0.0f; } - float maxErr1 = MAX( maxErr * maxPixel.p[0], FLT_MIN ); + float maxErr1 = + std::max(maxErr * maxPixel.p[0], FLT_MIN); // Check if the result matches. if( ! (err1 <= maxErr1) ) @@ -484,7 +487,8 @@ int validate_image_2D_depth_results(void *imageValues, void *resultValues, doubl imageSampler, expected, 0, &containsDenormals ); float err1 = ABS_ERROR(resultPtr[0], expected[0]); - float maxErr1 = MAX( maxErr * maxPixel.p[0], FLT_MIN ); + float maxErr1 = + std::max(maxErr * maxPixel.p[0], FLT_MIN); if( ! (err1 <= maxErr1) ) @@ -598,10 +602,14 @@ int validate_image_2D_results(void *imageValues, void *resultValues, double form if (err2 > 0 && err2 < formatAbsoluteError) { err2 = 0.0f; } if (err3 > 0 && err3 < formatAbsoluteError) { err3 = 0.0f; } if (err4 > 0 && err4 < formatAbsoluteError) { err4 = 0.0f; } - float maxErr1 = MAX( maxErr * maxPixel.p[0], FLT_MIN ); - float maxErr2 = MAX( maxErr * maxPixel.p[1], FLT_MIN ); - float maxErr3 = MAX( maxErr * maxPixel.p[2], FLT_MIN ); - float maxErr4 = MAX( maxErr * maxPixel.p[3], FLT_MIN ); + float maxErr1 = + std::max(maxErr * maxPixel.p[0], FLT_MIN); + float maxErr2 = + std::max(maxErr * maxPixel.p[1], FLT_MIN); + float maxErr3 = + std::max(maxErr * maxPixel.p[2], FLT_MIN); + float maxErr4 = + std::max(maxErr * maxPixel.p[3], FLT_MIN); // Check if the result matches. if( ! (err1 <= maxErr1) || ! (err2 <= maxErr2) || @@ -671,10 +679,14 @@ int validate_image_2D_results(void *imageValues, void *resultValues, double form float err2 = ABS_ERROR(resultPtr[1], expected[1]); float err3 = ABS_ERROR(resultPtr[2], expected[2]); float err4 = ABS_ERROR(resultPtr[3], expected[3]); - float maxErr1 = MAX( maxErr * maxPixel.p[0], FLT_MIN ); - float maxErr2 = MAX( maxErr * maxPixel.p[1], FLT_MIN ); - float maxErr3 = MAX( maxErr * maxPixel.p[2], FLT_MIN ); - float maxErr4 = MAX( maxErr * maxPixel.p[3], FLT_MIN ); + float maxErr1 = + std::max(maxErr * maxPixel.p[0], FLT_MIN); + float maxErr2 = + std::max(maxErr * maxPixel.p[1], FLT_MIN); + float maxErr3 = + std::max(maxErr * maxPixel.p[2], FLT_MIN); + float maxErr4 = + std::max(maxErr * maxPixel.p[3], FLT_MIN); if( ! (err1 <= maxErr1) || ! (err2 <= maxErr2) || diff --git a/test_conformance/images/kernel_read_write/test_read_1D.cpp b/test_conformance/images/kernel_read_write/test_read_1D.cpp index c9ba4e84..68113f9a 100644 --- a/test_conformance/images/kernel_read_write/test_read_1D.cpp +++ b/test_conformance/images/kernel_read_write/test_read_1D.cpp @@ -17,6 +17,8 @@ #include "test_common.h" #include +#include + #if defined( __APPLE__ ) #include #include @@ -669,10 +671,14 @@ int test_read_image_1D( cl_context context, cl_command_queue queue, cl_kernel ke if (err2 > 0 && err2 < formatAbsoluteError) { err2 = 0.0f; } if (err3 > 0 && err3 < formatAbsoluteError) { err3 = 0.0f; } if (err4 > 0 && err4 < formatAbsoluteError) { err4 = 0.0f; } - float maxErr1 = MAX( maxErr * maxPixel.p[0], FLT_MIN ); - float maxErr2 = MAX( maxErr * maxPixel.p[1], FLT_MIN ); - float maxErr3 = MAX( maxErr * maxPixel.p[2], FLT_MIN ); - float maxErr4 = MAX( maxErr * maxPixel.p[3], FLT_MIN ); + float maxErr1 = + std::max(maxErr * maxPixel.p[0], FLT_MIN); + float maxErr2 = + std::max(maxErr * maxPixel.p[1], FLT_MIN); + float maxErr3 = + std::max(maxErr * maxPixel.p[2], FLT_MIN); + float maxErr4 = + std::max(maxErr * maxPixel.p[3], FLT_MIN); // Check if the result matches. if( ! (err1 <= maxErr1) || ! (err2 <= maxErr2) || @@ -732,10 +738,14 @@ int test_read_image_1D( cl_context context, cl_command_queue queue, cl_kernel ke ABS_ERROR(resultPtr[2], expected[2]); float err4 = ABS_ERROR(resultPtr[3], expected[3]); - float maxErr1 = MAX( maxErr * maxPixel.p[0], FLT_MIN ); - float maxErr2 = MAX( maxErr * maxPixel.p[1], FLT_MIN ); - float maxErr3 = MAX( maxErr * maxPixel.p[2], FLT_MIN ); - float maxErr4 = MAX( maxErr * maxPixel.p[3], FLT_MIN ); + float maxErr1 = + std::max(maxErr * maxPixel.p[0], FLT_MIN); + float maxErr2 = + std::max(maxErr * maxPixel.p[1], FLT_MIN); + float maxErr3 = + std::max(maxErr * maxPixel.p[2], FLT_MIN); + float maxErr4 = + std::max(maxErr * maxPixel.p[3], FLT_MIN); if( ! (err1 <= maxErr1) || ! (err2 <= maxErr2) || diff --git a/test_conformance/images/kernel_read_write/test_read_1D_array.cpp b/test_conformance/images/kernel_read_write/test_read_1D_array.cpp index b3287ded..ac266ad7 100644 --- a/test_conformance/images/kernel_read_write/test_read_1D_array.cpp +++ b/test_conformance/images/kernel_read_write/test_read_1D_array.cpp @@ -16,13 +16,14 @@ #include "test_common.h" #include +#include + #if defined( __APPLE__ ) #include #include #include #endif - const char *read1DArrayKernelSourcePattern = "__kernel void sample_kernel( read_only image1d_array_t input,%s __global float *xOffsets, __global float *yOffsets, __global %s4 *results %s)\n" "{\n" @@ -772,10 +773,14 @@ int test_read_image_1D_array( cl_context context, cl_command_queue queue, cl_ker if (err2 > 0 && err2 < formatAbsoluteError) { err2 = 0.0f; } if (err3 > 0 && err3 < formatAbsoluteError) { err3 = 0.0f; } if (err4 > 0 && err4 < formatAbsoluteError) { err4 = 0.0f; } - float maxErr1 = MAX( maxErr * maxPixel.p[0], FLT_MIN ); - float maxErr2 = MAX( maxErr * maxPixel.p[1], FLT_MIN ); - float maxErr3 = MAX( maxErr * maxPixel.p[2], FLT_MIN ); - float maxErr4 = MAX( maxErr * maxPixel.p[3], FLT_MIN ); + float maxErr1 = + std::max(maxErr * maxPixel.p[0], FLT_MIN); + float maxErr2 = + std::max(maxErr * maxPixel.p[1], FLT_MIN); + float maxErr3 = + std::max(maxErr * maxPixel.p[2], FLT_MIN); + float maxErr4 = + std::max(maxErr * maxPixel.p[3], FLT_MIN); // Check if the result matches. if( ! (err1 <= maxErr1) || ! (err2 <= maxErr2) || @@ -838,10 +843,14 @@ int test_read_image_1D_array( cl_context context, cl_command_queue queue, cl_ker ABS_ERROR(resultPtr[2], expected[2]); float err4 = ABS_ERROR(resultPtr[3], expected[3]); - float maxErr1 = MAX( maxErr * maxPixel.p[0], FLT_MIN ); - float maxErr2 = MAX( maxErr * maxPixel.p[1], FLT_MIN ); - float maxErr3 = MAX( maxErr * maxPixel.p[2], FLT_MIN ); - float maxErr4 = MAX( maxErr * maxPixel.p[3], FLT_MIN ); + float maxErr1 = + std::max(maxErr * maxPixel.p[0], FLT_MIN); + float maxErr2 = + std::max(maxErr * maxPixel.p[1], FLT_MIN); + float maxErr3 = + std::max(maxErr * maxPixel.p[2], FLT_MIN); + float maxErr4 = + std::max(maxErr * maxPixel.p[3], FLT_MIN); if( ! (err1 <= maxErr1) || ! (err2 <= maxErr2) || diff --git a/test_conformance/images/kernel_read_write/test_read_2D_array.cpp b/test_conformance/images/kernel_read_write/test_read_2D_array.cpp index 7cb334b2..11b78814 100644 --- a/test_conformance/images/kernel_read_write/test_read_2D_array.cpp +++ b/test_conformance/images/kernel_read_write/test_read_2D_array.cpp @@ -16,6 +16,8 @@ #include "test_common.h" #include +#include + // Utility function to clamp down image sizes for certain tests to avoid // using too much memory. static size_t reduceImageSizeRange(size_t maxDimSize) { @@ -617,7 +619,8 @@ int test_read_image_2D_array( cl_context context, cl_command_queue queue, cl_ker ABS_ERROR(resultPtr[0], expected[0]); // Clamp to the minimum absolute error for the format if (err1 > 0 && err1 < formatAbsoluteError) { err1 = 0.0f; } - float maxErr1 = MAX( maxErr * maxPixel.p[0], FLT_MIN ); + float maxErr1 = std::max( + maxErr * maxPixel.p[0], FLT_MIN); if( ! (err1 <= maxErr1) ) { @@ -661,7 +664,8 @@ int test_read_image_2D_array( cl_context context, cl_command_queue queue, cl_ker float err1 = ABS_ERROR(resultPtr[0], expected[0]); - float maxErr1 = MAX( maxErr * maxPixel.p[0], FLT_MIN ); + float maxErr1 = std::max( + maxErr * maxPixel.p[0], FLT_MIN); if( ! (err1 <= maxErr1) ) @@ -942,10 +946,14 @@ int test_read_image_2D_array( cl_context context, cl_command_queue queue, cl_ker if (err2 > 0 && err2 < formatAbsoluteError) { err2 = 0.0f; } if (err3 > 0 && err3 < formatAbsoluteError) { err3 = 0.0f; } if (err4 > 0 && err4 < formatAbsoluteError) { err4 = 0.0f; } - float maxErr1 = MAX( maxErr * maxPixel.p[0], FLT_MIN ); - float maxErr2 = MAX( maxErr * maxPixel.p[1], FLT_MIN ); - float maxErr3 = MAX( maxErr * maxPixel.p[2], FLT_MIN ); - float maxErr4 = MAX( maxErr * maxPixel.p[3], FLT_MIN ); + float maxErr1 = std::max( + maxErr * maxPixel.p[0], FLT_MIN); + float maxErr2 = std::max( + maxErr * maxPixel.p[1], FLT_MIN); + float maxErr3 = std::max( + maxErr * maxPixel.p[2], FLT_MIN); + float maxErr4 = std::max( + maxErr * maxPixel.p[3], FLT_MIN); if( ! (err1 <= maxErr1) || ! (err2 <= maxErr2) || ! (err3 <= maxErr3) || ! (err4 <= maxErr4) ) { @@ -1004,10 +1012,14 @@ int test_read_image_2D_array( cl_context context, cl_command_queue queue, cl_ker expected[2]); float err4 = ABS_ERROR(resultPtr[3], expected[3]); - float maxErr1 = MAX( maxErr * maxPixel.p[0], FLT_MIN ); - float maxErr2 = MAX( maxErr * maxPixel.p[1], FLT_MIN ); - float maxErr3 = MAX( maxErr * maxPixel.p[2], FLT_MIN ); - float maxErr4 = MAX( maxErr * maxPixel.p[3], FLT_MIN ); + float maxErr1 = std::max( + maxErr * maxPixel.p[0], FLT_MIN); + float maxErr2 = std::max( + maxErr * maxPixel.p[1], FLT_MIN); + float maxErr3 = std::max( + maxErr * maxPixel.p[2], FLT_MIN); + float maxErr4 = std::max( + maxErr * maxPixel.p[3], FLT_MIN); if( ! (err1 <= maxErr1) || ! (err2 <= maxErr2) || ! (err3 <= maxErr3) || ! (err4 <= maxErr4) ) diff --git a/test_conformance/integer_ops/test_add_sat.cpp b/test_conformance/integer_ops/test_add_sat.cpp index c0e45d11..e33f5c67 100644 --- a/test_conformance/integer_ops/test_add_sat.cpp +++ b/test_conformance/integer_ops/test_add_sat.cpp @@ -21,18 +21,9 @@ #include #include -#include "procs.h" - -#define UCHAR_MIN 0 -#define USHRT_MIN 0 -#define UINT_MIN 0 +#include -#ifndef MAX -#define MAX( _a, _b ) ( (_a) > (_b) ? (_a) : (_b) ) -#endif -#ifndef MIN -#define MIN( _a, _b ) ( (_a) < (_b) ? (_a) : (_b) ) -#endif +#include "procs.h" static int verify_addsat_char( const cl_char *inA, const cl_char *inB, const cl_char *outptr, int n, const char *sizeName, int vecSize ) { @@ -40,8 +31,8 @@ static int verify_addsat_char( const cl_char *inA, const cl_char *inB, const cl_ for( i = 0; i < n; i++ ) { cl_int r = (cl_int) inA[i] + (cl_int) inB[i]; - r = MAX( r, CL_CHAR_MIN ); - r = MIN( r, CL_CHAR_MAX ); + r = std::max(r, CL_CHAR_MIN); + r = std::min(r, CL_CHAR_MAX); if( r != outptr[i] ) { log_info( "\n%d) Failure for add_sat( (char%s) 0x%2.2x, (char%s) 0x%2.2x) = *0x%2.2x vs 0x%2.2x\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; } @@ -55,9 +46,9 @@ static int verify_addsat_uchar( const cl_uchar *inA, const cl_uchar *inB, const for( i = 0; i < n; i++ ) { cl_int r = (int) inA[i] + (int) inB[i]; - r = MAX( r, 0 ); - r = MIN( r, CL_UCHAR_MAX ); - if( r != outptr[i] ) + r = std::max(r, 0); + r = std::min(r, CL_UCHAR_MAX); + if (r != outptr[i]) { log_info( "\n%d) Failure for add_sat( (uchar%s) 0x%2.2x, (uchar%s) 0x%2.2x) = *0x%2.2x vs 0x%2.2x\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; } } return 0; @@ -69,8 +60,8 @@ static int verify_addsat_short( const cl_short *inA, const cl_short *inB, const for( i = 0; i < n; i++ ) { cl_int r = (cl_int) inA[i] + (cl_int) inB[i]; - r = MAX( r, CL_SHRT_MIN ); - r = MIN( r, CL_SHRT_MAX ); + r = std::max(r, CL_SHRT_MIN); + r = std::min(r, CL_SHRT_MAX); if( r != outptr[i] ) { log_info( "\n%d) Failure for add_sat( (short%s) 0x%4.4x, (short%s) 0x%4.4x) = *0x%4.4x vs 0x%4.4x\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; } @@ -84,8 +75,8 @@ static int verify_addsat_ushort( const cl_ushort *inA, const cl_ushort *inB, con for( i = 0; i < n; i++ ) { cl_int r = (cl_int) inA[i] + (cl_int) inB[i]; - r = MAX( r, 0 ); - r = MIN( r, CL_USHRT_MAX ); + r = std::max(r, 0); + r = std::min(r, CL_USHRT_MAX); if( r != outptr[i] ) { log_info( "\n%d) Failure for add_sat( (ushort%s) 0x%4.4x, (ushort%s) 0x%4.4x) = *0x%4.4x vs 0x%4.4x\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; } diff --git a/test_conformance/integer_ops/test_integers.cpp b/test_conformance/integer_ops/test_integers.cpp index 8d77b24b..6fa18e1e 100644 --- a/test_conformance/integer_ops/test_integers.cpp +++ b/test_conformance/integer_ops/test_integers.cpp @@ -16,14 +16,9 @@ #include "testBase.h" #include "harness/conversions.h" -#define TEST_SIZE 512 +#include -#ifndef MIN - #define MIN( _a, _b ) ((_a) < (_b) ? (_a) : (_b)) -#endif -#ifndef MAX - #define MAX( _a, _b ) ((_a) > (_b) ? (_a) : (_b)) -#endif +#define TEST_SIZE 512 const char *singleParamIntegerKernelSourcePattern = "__kernel void sample_test(__global %s *sourceA, __global %s *destValues)\n" @@ -1512,19 +1507,20 @@ bool verify_integer_clamp( void *sourceA, void *sourceB, void *sourceC, void *de switch( vecAType ) { case kULong: - ((cl_ulong*) destination)[0] = MAX(MIN(valueA, valueC), valueB); + ((cl_ulong *)destination)[0] = + std::max(std::min(valueA, valueC), valueB); break; case kUInt: - ((cl_uint*) destination)[0] = (cl_uint) - (MAX(MIN(valueA, valueC), valueB)); + ((cl_uint *)destination)[0] = + (cl_uint)(std::max(std::min(valueA, valueC), valueB)); break; case kUShort: - ((cl_ushort*) destination)[0] = (cl_ushort) - (MAX(MIN(valueA, valueC), valueB)); + ((cl_ushort *)destination)[0] = + (cl_ushort)(std::max(std::min(valueA, valueC), valueB)); break; case kUChar: - ((cl_uchar*) destination)[0] = (cl_uchar) - (MAX(MIN(valueA, valueC), valueB)); + ((cl_uchar *)destination)[0] = + (cl_uchar)(std::max(std::min(valueA, valueC), valueB)); break; default: //error -- should never get here @@ -1576,19 +1572,20 @@ bool verify_integer_clamp( void *sourceA, void *sourceB, void *sourceC, void *de switch( vecAType ) { case kLong: - ((cl_long*) destination)[0] = MAX(MIN(valueA, valueC), valueB); + ((cl_long *)destination)[0] = + std::max(std::min(valueA, valueC), valueB); break; case kInt: - ((cl_int*) destination)[0] = (cl_int) - (MAX(MIN(valueA, valueC), valueB)); + ((cl_int *)destination)[0] = + (cl_int)(std::max(std::min(valueA, valueC), valueB)); break; case kShort: - ((cl_short*) destination)[0] = (cl_short) - (MAX(MIN(valueA, valueC), valueB)); + ((cl_short *)destination)[0] = + (cl_short)(std::max(std::min(valueA, valueC), valueB)); break; case kChar: - ((cl_char*) destination)[0] = (cl_char) - (MAX(MIN(valueA, valueC), valueB)); + ((cl_char *)destination)[0] = + (cl_char)(std::max(std::min(valueA, valueC), valueB)); break; default: //error -- should never get here @@ -1654,13 +1651,16 @@ bool verify_integer_mad_sat( void *sourceA, void *sourceB, void *sourceC, void * ((cl_ulong*) destination)[0] = multLo; break; case kUInt: - ((cl_uint*) destination)[0] = (cl_uint) MIN( multLo, (cl_ulong) CL_UINT_MAX ); + ((cl_uint *)destination)[0] = + (cl_uint)std::min(multLo, (cl_ulong)CL_UINT_MAX); break; case kUShort: - ((cl_ushort*) destination)[0] = (cl_ushort) MIN( multLo, (cl_ulong) CL_USHRT_MAX ); + ((cl_ushort *)destination)[0] = + (cl_ushort)std::min(multLo, (cl_ulong)CL_USHRT_MAX); break; case kUChar: - ((cl_uchar*) destination)[0] = (cl_uchar) MIN( multLo, (cl_ulong) CL_UCHAR_MAX ); + ((cl_uchar *)destination)[0] = + (cl_uchar)std::min(multLo, (cl_ulong)CL_UCHAR_MAX); break; default: //error -- should never get here @@ -1744,18 +1744,18 @@ bool verify_integer_mad_sat( void *sourceA, void *sourceB, void *sourceC, void * ((cl_long*) destination)[0] = result; break; case kInt: - result = MIN( result, (cl_long) CL_INT_MAX ); - result = MAX( result, (cl_long) CL_INT_MIN ); + result = std::min(result, (cl_long)CL_INT_MAX); + result = std::max(result, (cl_long)CL_INT_MIN); ((cl_int*) destination)[0] = (cl_int) result; break; case kShort: - result = MIN( result, (cl_long) CL_SHRT_MAX ); - result = MAX( result, (cl_long) CL_SHRT_MIN ); + result = std::min(result, (cl_long)CL_SHRT_MAX); + result = std::max(result, (cl_long)CL_SHRT_MIN); ((cl_short*) destination)[0] = (cl_short) result; break; case kChar: - result = MIN( result, (cl_long) CL_CHAR_MAX ); - result = MAX( result, (cl_long) CL_CHAR_MIN ); + result = std::min(result, (cl_long)CL_CHAR_MAX); + result = std::max(result, (cl_long)CL_CHAR_MIN); ((cl_char*) destination)[0] = (cl_char) result; break; default: diff --git a/test_conformance/integer_ops/test_sub_sat.cpp b/test_conformance/integer_ops/test_sub_sat.cpp index 845d1064..2a88ee0d 100644 --- a/test_conformance/integer_ops/test_sub_sat.cpp +++ b/test_conformance/integer_ops/test_sub_sat.cpp @@ -21,19 +21,9 @@ #include #include -#include "procs.h" - -#define UCHAR_MIN 0 -#define USHRT_MIN 0 -#define UINT_MIN 0 - -#ifndef MAX -#define MAX( _a, _b ) ( (_a) > (_b) ? (_a) : (_b) ) -#endif -#ifndef MIN -#define MIN( _a, _b ) ( (_a) < (_b) ? (_a) : (_b) ) -#endif +#include +#include "procs.h" static int verify_subsat_char( const cl_char *inA, const cl_char *inB, const cl_char *outptr, int n, const char *sizeName, int vecSize ) { @@ -41,8 +31,8 @@ static int verify_subsat_char( const cl_char *inA, const cl_char *inB, const cl_ for( i = 0; i < n; i++ ) { cl_int r = (cl_int) inA[i] - (cl_int) inB[i]; - r = MAX( r, CL_CHAR_MIN ); - r = MIN( r, CL_CHAR_MAX ); + r = std::max(r, CL_CHAR_MIN); + r = std::min(r, CL_CHAR_MAX); if( r != outptr[i] ) { log_info( "\n%d) Failure for sub_sat( (char%s) 0x%2.2x, (char%s) 0x%2.2x) = *0x%2.2x vs 0x%2.2x\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; } @@ -56,9 +46,9 @@ static int verify_subsat_uchar( const cl_uchar *inA, const cl_uchar *inB, const for( i = 0; i < n; i++ ) { cl_int r = (cl_int) inA[i] - (cl_int) inB[i]; - r = MAX( r, 0 ); - r = MIN( r, CL_UCHAR_MAX ); - if( r != outptr[i] ) + r = std::max(r, 0); + r = std::min(r, CL_UCHAR_MAX); + if (r != outptr[i]) { log_info( "\n%d) Failure for sub_sat( (uchar%s) 0x%2.2x, (uchar%s) 0x%2.2x) = *0x%2.2x vs 0x%2.2x\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; } } return 0; @@ -70,8 +60,8 @@ static int verify_subsat_short( const cl_short *inA, const cl_short *inB, const for( i = 0; i < n; i++ ) { cl_int r = (cl_int) inA[i] - (cl_int) inB[i]; - r = MAX( r, CL_SHRT_MIN ); - r = MIN( r, CL_SHRT_MAX ); + r = std::max(r, CL_SHRT_MIN); + r = std::min(r, CL_SHRT_MAX); if( r != outptr[i] ) { log_info( "\n%d) Failure for sub_sat( (short%s) 0x%4.4x, (short%s) 0x%4.4x) = *0x%4.4x vs 0x%4.4x\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; } @@ -85,8 +75,8 @@ static int verify_subsat_ushort( const cl_ushort *inA, const cl_ushort *inB, con for( i = 0; i < n; i++ ) { cl_int r = (cl_int) inA[i] - (cl_int) inB[i]; - r = MAX( r, 0 ); - r = MIN( r, CL_USHRT_MAX ); + r = std::max(r, 0); + r = std::min(r, CL_USHRT_MAX); if( r != outptr[i] ) { log_info( "\n%d) Failure for sub_sat( (ushort%s) 0x%4.4x, (ushort%s) 0x%4.4x) = *0x%4.4x vs 0x%4.4x\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; } diff --git a/test_conformance/integer_ops/test_unary_ops.cpp b/test_conformance/integer_ops/test_unary_ops.cpp index 72940eaa..c91c85ae 100644 --- a/test_conformance/integer_ops/test_unary_ops.cpp +++ b/test_conformance/integer_ops/test_unary_ops.cpp @@ -107,7 +107,7 @@ int test_unary_op( cl_command_queue queue, cl_context context, OpKonstants which // For sub ops, the min control value is 2. Otherwise, it's 0 controlData[ i ] |= 0x02; else if( whichOp == kIncrement ) - // For addition ops, the MAX control value is 1. Otherwise, it's 3 + // For addition ops, the max control value is 1. Otherwise, it's 3 controlData[ i ] &= ~0x02; } streams[1] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, diff --git a/test_conformance/math_brute_force/macro_binary_double.cpp b/test_conformance/math_brute_force/macro_binary_double.cpp index 6db6aa56..d3e8071f 100644 --- a/test_conformance/math_brute_force/macro_binary_double.cpp +++ b/test_conformance/math_brute_force/macro_binary_double.cpp @@ -496,7 +496,8 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) } - for (auto k = MAX(1, gMinVectorSizeIndex); k < gMaxVectorSizeIndex; k++) + for (auto k = std::max(1U, gMinVectorSizeIndex); + k < gMaxVectorSizeIndex; k++) { q = (cl_long *)out[k]; // If we aren't getting the correctly rounded result diff --git a/test_conformance/math_brute_force/macro_binary_float.cpp b/test_conformance/math_brute_force/macro_binary_float.cpp index d6d5c8eb..6c7c8c05 100644 --- a/test_conformance/math_brute_force/macro_binary_float.cpp +++ b/test_conformance/math_brute_force/macro_binary_float.cpp @@ -485,7 +485,8 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) goto exit; } - for (auto k = MAX(1, gMinVectorSizeIndex); k < gMaxVectorSizeIndex; k++) + for (auto k = std::max(1U, gMinVectorSizeIndex); + k < gMaxVectorSizeIndex; k++) { q = out[k]; // If we aren't getting the correctly rounded result diff --git a/test_conformance/math_brute_force/macro_unary_double.cpp b/test_conformance/math_brute_force/macro_unary_double.cpp index 1978c185..7f3521c6 100644 --- a/test_conformance/math_brute_force/macro_unary_double.cpp +++ b/test_conformance/math_brute_force/macro_unary_double.cpp @@ -304,7 +304,8 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) } - for (auto k = MAX(1, gMinVectorSizeIndex); k < gMaxVectorSizeIndex; k++) + for (auto k = std::max(1U, gMinVectorSizeIndex); + k < gMaxVectorSizeIndex; k++) { q = out[k]; // If we aren't getting the correctly rounded result diff --git a/test_conformance/math_brute_force/macro_unary_float.cpp b/test_conformance/math_brute_force/macro_unary_float.cpp index ece5e9b6..0cd54de4 100644 --- a/test_conformance/math_brute_force/macro_unary_float.cpp +++ b/test_conformance/math_brute_force/macro_unary_float.cpp @@ -309,8 +309,8 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) } - for (auto k = MAX(1, gMinVectorSizeIndex); k < gMaxVectorSizeIndex; - k++) + for (auto k = std::max(1U, gMinVectorSizeIndex); + k < gMaxVectorSizeIndex; k++) { q = out[k]; // If we aren't getting the correctly rounded result diff --git a/test_conformance/math_brute_force/main.cpp b/test_conformance/math_brute_force/main.cpp index 6691f462..1a6e0c4e 100644 --- a/test_conformance/math_brute_force/main.cpp +++ b/test_conformance/math_brute_force/main.cpp @@ -18,6 +18,7 @@ #include "sleep.h" #include "utility.h" +#include #include #include #include @@ -1239,7 +1240,7 @@ float Bruteforce_Ulp_Error_Double(double test, long double reference) // The unbiased exponent of the ulp unit place int ulp_exp = - DBL_MANT_DIG - 1 - MAX(ilogbl(reference), DBL_MIN_EXP - 1); + DBL_MANT_DIG - 1 - std::max(ilogbl(reference), DBL_MIN_EXP - 1); // Scale the exponent of the error float result = (float)scalbnl(testVal - reference, ulp_exp); @@ -1255,7 +1256,7 @@ float Bruteforce_Ulp_Error_Double(double test, long double reference) // reference is a normal power of two or a zero // The unbiased exponent of the ulp unit place int ulp_exp = - DBL_MANT_DIG - 1 - MAX(ilogbl(reference) - 1, DBL_MIN_EXP - 1); + DBL_MANT_DIG - 1 - std::max(ilogbl(reference) - 1, DBL_MIN_EXP - 1); // allow correctly rounded results to pass through unmolested. (We might add // error to it below.) There is something of a performance optimization here diff --git a/test_conformance/profiling/execute.cpp b/test_conformance/profiling/execute.cpp index edfc043c..0541bfa5 100644 --- a/test_conformance/profiling/execute.cpp +++ b/test_conformance/profiling/execute.cpp @@ -21,6 +21,8 @@ #include #include +#include + #include "procs.h" #include "harness/testHarness.h" #include "harness/errorHelpers.h" @@ -29,12 +31,6 @@ typedef unsigned char uchar; #endif -#undef MIN -#define MIN(x,y) ( (x) < (y) ? (x) : (y) ) - -#undef MAX -#define MAX(x,y) ( (x) > (y) ? (x) : (y) ) - //#define CREATE_OUTPUT 1 extern int writePPM( const char *filename, uchar *buf, int xsize, int ysize ); @@ -73,8 +69,8 @@ static const char *image_filter_src = static void read_imagef( int x, int y, int w, int h, int nChannels, uchar *src, float *srcRgb ) { // clamp the coords - int x0 = MIN( MAX( x, 0 ), w - 1 ); - int y0 = MIN( MAX( y, 0 ), h - 1 ); + int x0 = std::min(std::max(x, 0), w - 1); + int y0 = std::min(std::max(y, 0), h - 1); // get tine index int indx = ( y0 * w + x0 ) * nChannels; diff --git a/test_conformance/workgroups/test_wg_broadcast.cpp b/test_conformance/workgroups/test_wg_broadcast.cpp index 35559476..29380211 100644 --- a/test_conformance/workgroups/test_wg_broadcast.cpp +++ b/test_conformance/workgroups/test_wg_broadcast.cpp @@ -20,6 +20,8 @@ #include #include +#include + #include "procs.h" @@ -310,7 +312,7 @@ test_work_group_broadcast_2D(cl_device_id device, cl_context context, cl_command localsize[0] = localsize[1] = 1; } - num_workgroups = MAX(n_elems/wg_size[0], 16); + num_workgroups = std::max(n_elems / wg_size[0], (size_t)16); globalsize[0] = num_workgroups * localsize[0]; globalsize[1] = num_workgroups * localsize[1]; num_elements = globalsize[0] * globalsize[1]; @@ -437,7 +439,7 @@ test_work_group_broadcast_3D(cl_device_id device, cl_context context, cl_command localsize[0] = localsize[1] = localsize[2] = 1; } - num_workgroups = MAX(n_elems/wg_size[0], 8); + num_workgroups = std::max(n_elems / wg_size[0], (size_t)8); globalsize[0] = num_workgroups * localsize[0]; globalsize[1] = num_workgroups * localsize[1]; globalsize[2] = num_workgroups * localsize[2]; diff --git a/test_conformance/workgroups/test_wg_scan_exclusive_max.cpp b/test_conformance/workgroups/test_wg_scan_exclusive_max.cpp index 12338b68..644b3ccf 100644 --- a/test_conformance/workgroups/test_wg_scan_exclusive_max.cpp +++ b/test_conformance/workgroups/test_wg_scan_exclusive_max.cpp @@ -20,8 +20,9 @@ #include #include -#include "procs.h" +#include +#include "procs.h" const char *wg_scan_exclusive_max_kernel_code_int = "__kernel void test_wg_scan_exclusive_max_int(global int *input, global int *output)\n" @@ -79,7 +80,7 @@ verify_wg_scan_exclusive_max_int(int *inptr, int *outptr, size_t n, size_t wg_si log_info("work_group_scan_exclusive_max int: Error at %u: expected = %d, got = %d\n", (unsigned int)(j+i), max_, outptr[j+i]); return -1; } - max_ = MAX(inptr[j+i], max_); + max_ = std::max(inptr[j + i], max_); } } @@ -103,7 +104,7 @@ verify_wg_scan_exclusive_max_uint(unsigned int *inptr, unsigned int *outptr, siz log_info("work_group_scan_exclusive_max int: Error at %u: expected = %u, got = %u\n", (unsigned int)(j+i), max_, outptr[j+i]); return -1; } - max_ = MAX(inptr[j+i], max_); + max_ = std::max(inptr[j + i], max_); } } @@ -127,7 +128,7 @@ verify_wg_scan_exclusive_max_long(cl_long *inptr, cl_long *outptr, size_t n, siz log_info("work_group_scan_exclusive_max long: Error at %u: expected = %lld, got = %lld\n", (unsigned int)(j+i), max_, outptr[j+i]); return -1; } - max_ = MAX(inptr[j+i], max_); + max_ = std::max(inptr[j + i], max_); } } @@ -151,7 +152,7 @@ verify_wg_scan_exclusive_max_ulong(cl_ulong *inptr, cl_ulong *outptr, size_t n, log_info("work_group_scan_exclusive_max ulong: Error at %u: expected = %llu, got = %llu\n", (unsigned int)(j+i), max_, outptr[j+i]); return -1; } - max_ = MAX(inptr[j+i], max_); + max_ = std::max(inptr[j + i], max_); } } diff --git a/test_conformance/workgroups/test_wg_scan_exclusive_min.cpp b/test_conformance/workgroups/test_wg_scan_exclusive_min.cpp index f4e6bf97..3c6dfc87 100644 --- a/test_conformance/workgroups/test_wg_scan_exclusive_min.cpp +++ b/test_conformance/workgroups/test_wg_scan_exclusive_min.cpp @@ -20,8 +20,9 @@ #include #include -#include "procs.h" +#include +#include "procs.h" const char *wg_scan_exclusive_min_kernel_code_int = "__kernel void test_wg_scan_exclusive_min_int(global int *input, global int *output)\n" @@ -80,7 +81,7 @@ verify_wg_scan_exclusive_min_int(int *inptr, int *outptr, size_t n, size_t wg_si log_info("work_group_scan_exclusive_min int: Error at %u: expected = %d, got = %d\n", (unsigned int)(j+i), min_, outptr[j+i]); return -1; } - min_ = MIN(inptr[j+i], min_); + min_ = std::min(inptr[j + i], min_); } } @@ -104,7 +105,7 @@ verify_wg_scan_exclusive_min_uint(unsigned int *inptr, unsigned int *outptr, siz log_info("work_group_scan_exclusive_min int: Error at %u: expected = %u, got = %u\n", j+i, min_, outptr[j+i]); return -1; } - min_ = MIN(inptr[j+i], min_); + min_ = std::min(inptr[j + i], min_); } } @@ -128,7 +129,7 @@ verify_wg_scan_exclusive_min_long(cl_long *inptr, cl_long *outptr, size_t n, siz log_info("work_group_scan_exclusive_min long: Error at %u: expected = %lld, got = %lld\n", (unsigned int)(j+i), min_, outptr[j+i]); return -1; } - min_ = MIN(inptr[j+i], min_); + min_ = std::min(inptr[j + i], min_); } } @@ -152,7 +153,7 @@ verify_wg_scan_exclusive_min_ulong(cl_ulong *inptr, cl_ulong *outptr, size_t n, log_info("work_group_scan_exclusive_min ulong: Error at %u: expected = %llu, got = %llu\n", (unsigned int)(j+i), min_, outptr[j+i]); return -1; } - min_ = MIN(inptr[j+i], min_); + min_ = std::min(inptr[j + i], min_); } } diff --git a/test_conformance/workgroups/test_wg_scan_inclusive_max.cpp b/test_conformance/workgroups/test_wg_scan_inclusive_max.cpp index 44ebf805..2a2e230e 100644 --- a/test_conformance/workgroups/test_wg_scan_inclusive_max.cpp +++ b/test_conformance/workgroups/test_wg_scan_inclusive_max.cpp @@ -20,6 +20,8 @@ #include #include +#include + #include "procs.h" @@ -75,7 +77,7 @@ verify_wg_scan_inclusive_max_int(int *inptr, int *outptr, size_t n, size_t wg_si m = wg_size; for (i = 0; i < m; ++i) { - max_ = MAX(inptr[j+i], max_); + max_ = std::max(inptr[j + i], max_); if (outptr[j+i] != max_) { log_info("work_group_scan_inclusive_max int: Error at %u: expected = %d, got = %d\n", (unsigned int)(j+i), max_, outptr[j+i]); return -1; @@ -99,7 +101,7 @@ verify_wg_scan_inclusive_max_uint(unsigned int *inptr, unsigned int *outptr, siz m = wg_size; for (i = 0; i < m; ++i) { - max_ = MAX(inptr[j+i], max_); + max_ = std::max(inptr[j + i], max_); if (outptr[j+i] != max_) { log_info("work_group_scan_inclusive_max int: Error at %lu: expected = %u, got = %u\n", (unsigned long)(j+i), max_, outptr[j+i]); return -1; @@ -123,7 +125,7 @@ verify_wg_scan_inclusive_max_long(cl_long *inptr, cl_long *outptr, size_t n, siz m = wg_size; for (i = 0; i < m; ++i) { - max_ = MAX(inptr[j+i], max_); + max_ = std::max(inptr[j + i], max_); if (outptr[j+i] != max_) { log_info("work_group_scan_inclusive_max long: Error at %u: expected = %lld, got = %lld\n", (unsigned int)(j+i), max_, outptr[j+i]); return -1; @@ -147,7 +149,7 @@ verify_wg_scan_inclusive_max_ulong(cl_ulong *inptr, cl_ulong *outptr, size_t n, m = wg_size; for (i = 0; i < m; ++i) { - max_ = MAX(inptr[j+i], max_); + max_ = std::max(inptr[j + i], max_); if (outptr[j+i] != max_) { log_info("work_group_scan_inclusive_max ulong: Error at %u: expected = %llu, got = %llu\n", (unsigned int)(j+i), max_, outptr[j+i]); return -1; diff --git a/test_conformance/workgroups/test_wg_scan_inclusive_min.cpp b/test_conformance/workgroups/test_wg_scan_inclusive_min.cpp index f2f05788..adbdad56 100644 --- a/test_conformance/workgroups/test_wg_scan_inclusive_min.cpp +++ b/test_conformance/workgroups/test_wg_scan_inclusive_min.cpp @@ -20,6 +20,8 @@ #include #include +#include + #include "procs.h" @@ -75,7 +77,7 @@ verify_wg_scan_inclusive_min_int(int *inptr, int *outptr, size_t n, size_t wg_si m = wg_size; for (i = 0; i < m; ++i) { - min_ = MIN(inptr[j+i], min_); + min_ = std::min(inptr[j + i], min_); if (outptr[j+i] != min_) { log_info("work_group_scan_inclusive_min int: Error at %u: expected = %d, got = %d\n", (unsigned int)(j+i), min_, outptr[j+i]); return -1; @@ -99,7 +101,7 @@ verify_wg_scan_inclusive_min_uint(unsigned int *inptr, unsigned int *outptr, siz m = wg_size; for (i = 0; i < m; ++i) { - min_ = MIN(inptr[j+i], min_); + min_ = std::min(inptr[j + i], min_); if (outptr[j+i] != min_) { log_info("work_group_scan_inclusive_min int: Error at %u: expected = %u, got = %u\n", (unsigned int)(j+i), min_, outptr[j+i]); return -1; @@ -123,7 +125,7 @@ verify_wg_scan_inclusive_min_long(cl_long *inptr, cl_long *outptr, size_t n, siz m = wg_size; for (i = 0; i < m; ++i) { - min_ = MIN(inptr[j+i], min_); + min_ = std::min(inptr[j + i], min_); if (outptr[j+i] != min_) { log_info("work_group_scan_inclusive_min long: Error at %u: expected = %lld, got = %lld\n", (unsigned int)(j+i), min_, outptr[j+i]); return -1; @@ -147,7 +149,7 @@ verify_wg_scan_inclusive_min_ulong(cl_ulong *inptr, cl_ulong *outptr, size_t n, m = wg_size; for (i = 0; i < m; ++i) { - min_ = MIN(inptr[j+i], min_); + min_ = std::min(inptr[j + i], min_); if (outptr[j+i] != min_) { log_info("work_group_scan_inclusive_min ulong: Error at %u: expected = %llu, got = %llu\n", (unsigned int)(j+i), min_, outptr[j+i]); return -1; -- cgit v1.2.3 From ddca0f802bee72ff9ea90b1dab28dddc51ef9a20 Mon Sep 17 00:00:00 2001 From: Sreelakshmi Haridas Maruthur Date: Tue, 28 Sep 2021 11:19:17 -0600 Subject: gles: Fix double frees. (#1323) * gles: Fix double frees. Remove a few explicit frees in the redirect_buffers test which are already handled by a wrapper. * gles: Fix double frees A recent update to the object wrapper classes (#1268) changed the behavior of assigning to a wrapper, whereby the wrapped object is now released upon assignment. A couple of tests were manually calling clReleaseMemObject and then assigning `nullptr` to the wrapper, resulting in the wrapper calling clReleaseMemObject on an object that had already been destroyed. Co-authored-by: spauls --- test_conformance/gles/test_buffers.cpp | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/test_conformance/gles/test_buffers.cpp b/test_conformance/gles/test_buffers.cpp index a2d67322..73711261 100644 --- a/test_conformance/gles/test_buffers.cpp +++ b/test_conformance/gles/test_buffers.cpp @@ -205,10 +205,10 @@ int test_buffer_kernel(cl_context context, cl_command_queue queue, ExplicitType if (validate_only) { int result = (CheckGLObjectInfo(streams[0], CL_GL_OBJECT_BUFFER, (GLuint)inGLBuffer, (GLenum)0, 0) | CheckGLObjectInfo(streams[2], CL_GL_OBJECT_BUFFER, (GLuint)outGLBuffer, (GLenum)0, 0) ); - for(i=0;i<3;i++) + + for (i = 0; i < 3; i++) { - clReleaseMemObject(streams[i]); - streams[i] = NULL; + streams[i].reset(); } glDeleteBuffers(1, &inGLBuffer); inGLBuffer = 0; @@ -285,10 +285,9 @@ int test_buffer_kernel(cl_context context, cl_command_queue queue, ExplicitType clP += get_explicit_type_size( vecType ); } - for(i=0;i<3;i++) + for (i = 0; i < 3; i++) { - clReleaseMemObject(streams[i]); - streams[i] = NULL; + streams[i].reset(); } glDeleteBuffers(1, &inGLBuffer); inGLBuffer = 0; -- cgit v1.2.3 From 4fb5deeec1e38bfa796b1cc0e93294ba1983b473 Mon Sep 17 00:00:00 2001 From: Sreelakshmi Haridas Maruthur Date: Tue, 28 Sep 2021 11:19:40 -0600 Subject: api: Enable cl_khr_fp16 when using half types in kernel (#1327) --- test_conformance/api/test_kernel_arg_info.cpp | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/test_conformance/api/test_kernel_arg_info.cpp b/test_conformance/api/test_kernel_arg_info.cpp index 8073e0de..dddb4a23 100644 --- a/test_conformance/api/test_kernel_arg_info.cpp +++ b/test_conformance/api/test_kernel_arg_info.cpp @@ -167,7 +167,8 @@ static std::string generate_argument(const KernelArgInfo& kernel_arg) /* This function generates a kernel source and allows for multiple arguments to * be passed in and subsequently queried. */ static std::string generate_kernel(const std::vector& all_args, - const bool supports_3d_image_writes = false) + const bool supports_3d_image_writes = false, + const bool kernel_uses_half_type = false) { std::string ret; @@ -175,6 +176,10 @@ static std::string generate_kernel(const std::vector& all_args, { ret += "#pragma OPENCL EXTENSION cl_khr_3d_image_writes: enable\n"; } + if (kernel_uses_half_type) + { + ret += "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"; + } ret += "kernel void get_kernel_arg_info(\n"; for (int i = 0; i < all_args.size(); ++i) { @@ -673,8 +678,8 @@ static int run_scalar_vector_tests(cl_context context, cl_device_id deviceID) if (param_size + total_param_size >= max_param_size || all_args.size() == MAX_NUMBER_OF_KERNEL_ARGS) { - const std::string kernel_src = - generate_kernel(all_args); + const std::string kernel_src = generate_kernel( + all_args, false, device_supports_half(deviceID)); failed_tests += compare_kernel_with_expected( context, deviceID, kernel_src.c_str(), expected_args); @@ -696,7 +701,8 @@ static int run_scalar_vector_tests(cl_context context, cl_device_id deviceID) } } } - const std::string kernel_src = generate_kernel(all_args); + const std::string kernel_src = + generate_kernel(all_args, false, device_supports_half(deviceID)); failed_tests += compare_kernel_with_expected( context, deviceID, kernel_src.c_str(), expected_args); return failed_tests; -- cgit v1.2.3 From 2b770c4f348d9ad71a22c3b949a1cffe32e9d1f5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?K=C3=A9vin=20Petit?= Date: Wed, 29 Sep 2021 12:38:42 +0100 Subject: Update cl_khr_integer_dot_product tests for v2 (#1317) * Update cl_khr_integer_dot_product tests for v2 Signed-off-by: Kevin Petit Signed-off-by: Marco Cattani Change-Id: I97dbd820f1f32f6b377e47d0bf638f36bb91930a * only query acceleration properties with v2+ Change-Id: I3f13a0cba7f1f686365b10adf81690e089cd3d74 --- test_common/harness/deviceInfo.cpp | 34 +++++++++++ test_common/harness/deviceInfo.h | 5 ++ .../integer_ops/test_integer_dot_product.cpp | 67 ++++++++++++++++++++++ 3 files changed, 106 insertions(+) diff --git a/test_common/harness/deviceInfo.cpp b/test_common/harness/deviceInfo.cpp index 287a1423..97ab8c85 100644 --- a/test_common/harness/deviceInfo.cpp +++ b/test_common/harness/deviceInfo.cpp @@ -63,6 +63,40 @@ int is_extension_available(cl_device_id device, const char *extensionName) return false; } +cl_version get_extension_version(cl_device_id device, const char *extensionName) +{ + cl_int err; + size_t size; + + err = clGetDeviceInfo(device, CL_DEVICE_EXTENSIONS_WITH_VERSION, 0, nullptr, + &size); + if (err != CL_SUCCESS) + { + throw std::runtime_error("clGetDeviceInfo(CL_DEVICE_EXTENSIONS_WITH_" + "VERSION) failed to return size\n"); + } + + std::vector extensions(size / sizeof(cl_name_version)); + err = clGetDeviceInfo(device, CL_DEVICE_EXTENSIONS_WITH_VERSION, size, + extensions.data(), &size); + if (err != CL_SUCCESS) + { + throw std::runtime_error("clGetDeviceInfo(CL_DEVICE_EXTENSIONS_WITH_" + "VERSION) failed to return value\n"); + } + + for (auto &ext : extensions) + { + if (!strcmp(extensionName, ext.name)) + { + return ext.version; + } + } + + throw std::runtime_error("Extension " + std::string(extensionName) + + " not supported by device!"); +} + /* Returns a string containing the supported extensions list for a device. */ std::string get_device_extensions_string(cl_device_id device) { diff --git a/test_common/harness/deviceInfo.h b/test_common/harness/deviceInfo.h index f8c55805..912dd198 100644 --- a/test_common/harness/deviceInfo.h +++ b/test_common/harness/deviceInfo.h @@ -31,6 +31,11 @@ std::string get_device_info_string(cl_device_id device, /* Determines if an extension is supported by a device. */ int is_extension_available(cl_device_id device, const char *extensionName); +/* Returns the version of the extension the device supports or throws an + * exception if the extension is not supported by the device. */ +cl_version get_extension_version(cl_device_id device, + const char *extensionName); + /* Returns a string containing the supported extensions list for a device. */ std::string get_device_extensions_string(cl_device_id device); diff --git a/test_conformance/integer_ops/test_integer_dot_product.cpp b/test_conformance/integer_ops/test_integer_dot_product.cpp index be25b320..602d59b6 100644 --- a/test_conformance/integer_ops/test_integer_dot_product.cpp +++ b/test_conformance/integer_ops/test_integer_dot_product.cpp @@ -336,6 +336,21 @@ int test_integer_dot_product(cl_device_id deviceID, cl_context context, return TEST_SKIPPED_ITSELF; } + Version deviceVersion = get_device_cl_version(deviceID); + cl_version extensionVersion; + + if ((deviceVersion >= Version(3, 0)) + || is_extension_available(deviceID, "cl_khr_extended_versioning")) + { + extensionVersion = + get_extension_version(deviceID, "cl_khr_integer_dot_product"); + } + else + { + // Assume 1.0.0 is supported if the version can't be queried + extensionVersion = CL_MAKE_VERSION(1, 0, 0); + } + cl_int error = CL_SUCCESS; int result = TEST_PASS; @@ -346,12 +361,63 @@ int test_integer_dot_product(cl_device_id deviceID, cl_context context, test_error( error, "Unable to query CL_DEVICE_INTEGER_DOT_PRODUCT_CAPABILITIES_KHR"); + + // Check that the required capabilities are reported test_assert_error( dotCaps & CL_DEVICE_INTEGER_DOT_PRODUCT_INPUT_4x8BIT_PACKED_KHR, "When cl_khr_integer_dot_product is supported " "CL_DEVICE_INTEGER_DOT_PRODUCT_INPUT_4x8BIT_PACKED_KHR must be " "supported"); + if (extensionVersion >= CL_MAKE_VERSION(2, 0, 0)) + { + test_assert_error( + dotCaps & CL_DEVICE_INTEGER_DOT_PRODUCT_INPUT_4x8BIT_KHR, + "When cl_khr_integer_dot_product is supported with version >= 2.0.0" + "CL_DEVICE_INTEGER_DOT_PRODUCT_INPUT_4x8BIT_KHR must be " + "supported"); + } + + // Check that acceleration properties can be queried + if (extensionVersion >= CL_MAKE_VERSION(2, 0, 0)) + { + size_t size_ret; + error = clGetDeviceInfo( + deviceID, + CL_DEVICE_INTEGER_DOT_PRODUCT_ACCELERATION_PROPERTIES_8BIT_KHR, 0, + nullptr, &size_ret); + test_error( + error, + "Unable to query size of data returned by " + "CL_DEVICE_INTEGER_DOT_PRODUCT_ACCELERATION_PROPERTIES_8BIT_KHR"); + + cl_device_integer_dot_product_acceleration_properties_khr + accelerationProperties; + error = clGetDeviceInfo( + deviceID, + CL_DEVICE_INTEGER_DOT_PRODUCT_ACCELERATION_PROPERTIES_8BIT_KHR, + sizeof(accelerationProperties), &accelerationProperties, nullptr); + test_error(error, "Unable to query 8-bit acceleration properties"); + + error = clGetDeviceInfo( + deviceID, + CL_DEVICE_INTEGER_DOT_PRODUCT_ACCELERATION_PROPERTIES_4x8BIT_PACKED_KHR, + 0, nullptr, &size_ret); + test_error( + error, + "Unable to query size of data returned by " + "CL_DEVICE_INTEGER_DOT_PRODUCT_ACCELERATION_PROPERTIES_4x8BIT_" + "PACKED_KHR"); + + error = clGetDeviceInfo( + deviceID, + CL_DEVICE_INTEGER_DOT_PRODUCT_ACCELERATION_PROPERTIES_4x8BIT_PACKED_KHR, + sizeof(accelerationProperties), &accelerationProperties, nullptr); + test_error(error, + "Unable to query 4x8-bit packed acceleration properties"); + } + + // Report when unknown capabilities are found if (dotCaps & ~(CL_DEVICE_INTEGER_DOT_PRODUCT_INPUT_4x8BIT_PACKED_KHR | CL_DEVICE_INTEGER_DOT_PRODUCT_INPUT_4x8BIT_KHR)) @@ -359,6 +425,7 @@ int test_integer_dot_product(cl_device_id deviceID, cl_context context, log_info("NOTE: found an unknown / untested capability!\n"); } + // Test built-in functions if (dotCaps & CL_DEVICE_INTEGER_DOT_PRODUCT_INPUT_4x8BIT_KHR) { result |= test_vectype(deviceID, context, queue, -- cgit v1.2.3 From 903f1bf65dfe15956295eb9379f5706568d858a9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?K=C3=A9vin=20Petit?= Date: Thu, 30 Sep 2021 13:33:18 +0100 Subject: Report unsupported extended subgroup tests as skipped rather than passed (#1301) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Report unsupported extended subgroup tests as skipped rather than passed Also don't check the presence of extensions for each sub-test. Signed-off-by: Kévin Petit * address review comments --- test_conformance/subgroups/subhelpers.h | 17 +---------------- test_conformance/subgroups/test_subgroup_ballot.cpp | 10 +++++++--- .../subgroups/test_subgroup_clustered_reduce.cpp | 12 +++++++----- .../subgroups/test_subgroup_extended_types.cpp | 12 +++++++----- .../subgroups/test_subgroup_non_uniform_arithmetic.cpp | 15 +++++++++------ .../subgroups/test_subgroup_non_uniform_vote.cpp | 13 +++++++------ test_conformance/subgroups/test_subgroup_shuffle.cpp | 10 +++++++--- .../subgroups/test_subgroup_shuffle_relative.cpp | 12 +++++++----- 8 files changed, 52 insertions(+), 49 deletions(-) diff --git a/test_conformance/subgroups/subhelpers.h b/test_conformance/subgroups/subhelpers.h index 9232cded..0d497fb3 100644 --- a/test_conformance/subgroups/subhelpers.h +++ b/test_conformance/subgroups/subhelpers.h @@ -33,10 +33,9 @@ extern cl_half_rounding_mode g_rounding_mode; struct WorkGroupParams { WorkGroupParams(size_t gws, size_t lws, - const std::vector &req_ext = {}, const std::vector &all_wim = {}) : global_workgroup_size(gws), local_workgroup_size(lws), - required_extensions(req_ext), all_work_item_masks(all_wim) + all_work_item_masks(all_wim) { subgroup_size = 0; work_items_mask = 0; @@ -49,7 +48,6 @@ struct WorkGroupParams uint32_t work_items_mask; int dynsc; bool use_core_subgroups; - std::vector required_extensions; std::vector all_work_item_masks; }; @@ -1297,19 +1295,6 @@ template struct test } } - for (std::string extension : test_params.required_extensions) - { - if (!is_extension_available(device, extension.c_str())) - { - log_info("The extension %s not supported on this device. SKIP " - "testing - kernel %s data type %s\n", - extension.c_str(), kname, TypeManager::name()); - return TEST_PASS; - } - kernel_sstr << "#pragma OPENCL EXTENSION " + extension - + ": enable\n"; - } - error = clGetDeviceInfo(device, CL_DEVICE_PLATFORM, sizeof(platform), (void *)&platform, NULL); test_error(error, "clGetDeviceInfo failed for CL_DEVICE_PLATFORM"); diff --git a/test_conformance/subgroups/test_subgroup_ballot.cpp b/test_conformance/subgroups/test_subgroup_ballot.cpp index f2e4060b..9a2da5d9 100644 --- a/test_conformance/subgroups/test_subgroup_ballot.cpp +++ b/test_conformance/subgroups/test_subgroup_ballot.cpp @@ -926,11 +926,15 @@ template int run_non_uniform_broadcast_for_type(RunTestForType rft) int test_subgroup_functions_ballot(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements) { - std::vector required_extensions = { "cl_khr_subgroup_ballot" }; + if (!is_extension_available(device, "cl_khr_subgroup_ballot")) + { + log_info("cl_khr_subgroup_ballot is not supported on this device, " + "skipping test.\n"); + return TEST_SKIPPED_ITSELF; + } constexpr size_t global_work_size = 170; constexpr size_t local_work_size = 64; - WorkGroupParams test_params(global_work_size, local_work_size, - required_extensions); + WorkGroupParams test_params(global_work_size, local_work_size); RunTestForType rft(device, context, queue, num_elements, test_params); // non uniform broadcast functions diff --git a/test_conformance/subgroups/test_subgroup_clustered_reduce.cpp b/test_conformance/subgroups/test_subgroup_clustered_reduce.cpp index 588e9cee..87507e37 100644 --- a/test_conformance/subgroups/test_subgroup_clustered_reduce.cpp +++ b/test_conformance/subgroups/test_subgroup_clustered_reduce.cpp @@ -305,13 +305,15 @@ int test_subgroup_functions_clustered_reduce(cl_device_id device, cl_command_queue queue, int num_elements) { - std::vector required_extensions = { - "cl_khr_subgroup_clustered_reduce" - }; + if (!is_extension_available(device, "cl_khr_subgroup_clustered_reduce")) + { + log_info("cl_khr_subgroup_clustered_reduce is not supported on this " + "device, skipping test.\n"); + return TEST_SKIPPED_ITSELF; + } constexpr size_t global_work_size = 2000; constexpr size_t local_work_size = 200; - WorkGroupParams test_params(global_work_size, local_work_size, - required_extensions); + WorkGroupParams test_params(global_work_size, local_work_size); RunTestForType rft(device, context, queue, num_elements, test_params); int error = run_cluster_red_add_max_min_mul_for_type(rft); diff --git a/test_conformance/subgroups/test_subgroup_extended_types.cpp b/test_conformance/subgroups/test_subgroup_extended_types.cpp index 98401b8e..b281f618 100644 --- a/test_conformance/subgroups/test_subgroup_extended_types.cpp +++ b/test_conformance/subgroups/test_subgroup_extended_types.cpp @@ -59,13 +59,15 @@ int test_subgroup_functions_extended_types(cl_device_id device, cl_command_queue queue, int num_elements) { - std::vector required_extensions = { - "cl_khr_subgroup_extended_types" - }; + if (!is_extension_available(device, "cl_khr_subgroup_extended_types")) + { + log_info("cl_khr_subgroup_extended_types is not supported on this " + "device, skipping test.\n"); + return TEST_SKIPPED_ITSELF; + } constexpr size_t global_work_size = 2000; constexpr size_t local_work_size = 200; - WorkGroupParams test_params(global_work_size, local_work_size, - required_extensions); + WorkGroupParams test_params(global_work_size, local_work_size); RunTestForType rft(device, context, queue, num_elements, test_params); int error = run_broadcast_for_extended_type(rft); diff --git a/test_conformance/subgroups/test_subgroup_non_uniform_arithmetic.cpp b/test_conformance/subgroups/test_subgroup_non_uniform_arithmetic.cpp index eb46ff09..6c44249e 100644 --- a/test_conformance/subgroups/test_subgroup_non_uniform_arithmetic.cpp +++ b/test_conformance/subgroups/test_subgroup_non_uniform_arithmetic.cpp @@ -434,17 +434,20 @@ int test_subgroup_functions_non_uniform_arithmetic(cl_device_id device, cl_command_queue queue, int num_elements) { - std::vector required_extensions = { - "cl_khr_subgroup_non_uniform_arithmetic" - }; + if (!is_extension_available(device, + "cl_khr_subgroup_non_uniform_arithmetic")) + { + log_info("cl_khr_subgroup_non_uniform_arithmetic is not supported on " + "this device, skipping test.\n"); + return TEST_SKIPPED_ITSELF; + } std::vector masks{ 0xffffffff, 0x55aaaa55, 0x5555aaaa, 0xaaaa5555, 0x0f0ff0f0, 0x0f0f0f0f, 0xff0000ff, 0xff00ff00, 0x00ffff00, 0x80000000, 0xaaaaaaaa }; constexpr size_t global_work_size = 2000; constexpr size_t local_work_size = 200; - WorkGroupParams test_params(global_work_size, local_work_size, - required_extensions, masks); + WorkGroupParams test_params(global_work_size, local_work_size, masks); RunTestForType rft(device, context, queue, num_elements, test_params); int error = run_functions_add_mul_max_min_for_type(rft); @@ -470,4 +473,4 @@ int test_subgroup_functions_non_uniform_arithmetic(cl_device_id device, error |= run_functions_logical_and_or_xor_for_type(rft); return error; -} \ No newline at end of file +} diff --git a/test_conformance/subgroups/test_subgroup_non_uniform_vote.cpp b/test_conformance/subgroups/test_subgroup_non_uniform_vote.cpp index 2b00b4dd..484e9b6b 100644 --- a/test_conformance/subgroups/test_subgroup_non_uniform_vote.cpp +++ b/test_conformance/subgroups/test_subgroup_non_uniform_vote.cpp @@ -272,17 +272,18 @@ int test_subgroup_functions_non_uniform_vote(cl_device_id device, cl_command_queue queue, int num_elements) { - std::vector required_extensions = { - "cl_khr_subgroup_non_uniform_vote" - }; - + if (!is_extension_available(device, "cl_khr_subgroup_non_uniform_vote")) + { + log_info("cl_khr_subgroup_non_uniform_vote is not supported on this " + "device, skipping test.\n"); + return TEST_SKIPPED_ITSELF; + } std::vector masks{ 0xffffffff, 0x55aaaa55, 0x5555aaaa, 0xaaaa5555, 0x0f0ff0f0, 0x0f0f0f0f, 0xff0000ff, 0xff00ff00, 0x00ffff00, 0x80000000 }; constexpr size_t global_work_size = 170; constexpr size_t local_work_size = 64; - WorkGroupParams test_params(global_work_size, local_work_size, - required_extensions, masks); + WorkGroupParams test_params(global_work_size, local_work_size, masks); RunTestForType rft(device, context, queue, num_elements, test_params); int error = run_vote_all_equal_for_type(rft); diff --git a/test_conformance/subgroups/test_subgroup_shuffle.cpp b/test_conformance/subgroups/test_subgroup_shuffle.cpp index 049f0982..37b27ced 100644 --- a/test_conformance/subgroups/test_subgroup_shuffle.cpp +++ b/test_conformance/subgroups/test_subgroup_shuffle.cpp @@ -55,11 +55,15 @@ template int run_shuffle_for_type(RunTestForType rft) int test_subgroup_functions_shuffle(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements) { - std::vector required_extensions{ "cl_khr_subgroup_shuffle" }; + if (!is_extension_available(device, "cl_khr_subgroup_shuffle")) + { + log_info("cl_khr_subgroup_shuffle is not supported on this device, " + "skipping test.\n"); + return TEST_SKIPPED_ITSELF; + } constexpr size_t global_work_size = 2000; constexpr size_t local_work_size = 200; - WorkGroupParams test_params(global_work_size, local_work_size, - required_extensions); + WorkGroupParams test_params(global_work_size, local_work_size); RunTestForType rft(device, context, queue, num_elements, test_params); int error = run_shuffle_for_type(rft); diff --git a/test_conformance/subgroups/test_subgroup_shuffle_relative.cpp b/test_conformance/subgroups/test_subgroup_shuffle_relative.cpp index 6000c970..11401e80 100644 --- a/test_conformance/subgroups/test_subgroup_shuffle_relative.cpp +++ b/test_conformance/subgroups/test_subgroup_shuffle_relative.cpp @@ -56,13 +56,15 @@ int test_subgroup_functions_shuffle_relative(cl_device_id device, cl_command_queue queue, int num_elements) { - std::vector required_extensions = { - "cl_khr_subgroup_shuffle_relative" - }; + if (!is_extension_available(device, "cl_khr_subgroup_shuffle_relative")) + { + log_info("cl_khr_subgroup_shuffle_relative is not supported on this " + "device, skipping test.\n"); + return TEST_SKIPPED_ITSELF; + } constexpr size_t global_work_size = 2000; constexpr size_t local_work_size = 200; - WorkGroupParams test_params(global_work_size, local_work_size, - required_extensions); + WorkGroupParams test_params(global_work_size, local_work_size); RunTestForType rft(device, context, queue, num_elements, test_params); int error = run_shuffle_relative_for_type(rft); -- cgit v1.2.3 From 92844bead1afdf75b56085c2cda34be27458a582 Mon Sep 17 00:00:00 2001 From: Grzegorz Wawiorko Date: Fri, 1 Oct 2021 12:28:37 +0200 Subject: Extended subgroups - use 128bit masks (#1215) * Extended subgroups - use 128bit masks * Refactoring to avoid kernels code duplication * unification kernel names as test_ prefix +subgroups function name * use string literals that improve readability * use kernel templates that limit code duplication * WorkGroupParams allows define default kernel - kernel template for multiple functions * WorkGroupParams allows define kernel for specific one subgroup function Co-authored-by: Stuart Brady --- .../subgroups/subgroup_common_kernels.cpp | 104 +---- .../subgroups/subgroup_common_kernels.h | 12 +- .../subgroups/subgroup_common_templates.h | 98 +++-- test_conformance/subgroups/subhelpers.h | 181 ++++++++- test_conformance/subgroups/test_subgroup.cpp | 47 +-- .../subgroups/test_subgroup_ballot.cpp | 425 ++++++++------------- .../subgroups/test_subgroup_clustered_reduce.cpp | 176 ++------- .../subgroups/test_subgroup_extended_types.cpp | 44 ++- .../test_subgroup_non_uniform_arithmetic.cpp | 409 +++----------------- .../subgroups/test_subgroup_non_uniform_vote.cpp | 93 ++--- .../subgroups/test_subgroup_shuffle.cpp | 29 +- .../subgroups/test_subgroup_shuffle_relative.cpp | 28 +- 12 files changed, 592 insertions(+), 1054 deletions(-) diff --git a/test_conformance/subgroups/subgroup_common_kernels.cpp b/test_conformance/subgroups/subgroup_common_kernels.cpp index f8b24450..33a51637 100644 --- a/test_conformance/subgroups/subgroup_common_kernels.cpp +++ b/test_conformance/subgroups/subgroup_common_kernels.cpp @@ -15,92 +15,20 @@ // #include "subgroup_common_kernels.h" -const char* bcast_source = - "__kernel void test_bcast(const __global Type *in, " - "__global int4 *xy, __global Type *out)\n" - "{\n" - " int gid = get_global_id(0);\n" - " XY(xy,gid);\n" - " Type x = in[gid];\n" - " uint which_sub_group_local_id = xy[gid].z;\n" - " out[gid] = sub_group_broadcast(x, which_sub_group_local_id);\n" - "}\n"; - -const char* redadd_source = "__kernel void test_redadd(const __global Type " - "*in, __global int4 *xy, __global Type *out)\n" - "{\n" - " int gid = get_global_id(0);\n" - " XY(xy,gid);\n" - " out[gid] = sub_group_reduce_add(in[gid]);\n" - "}\n"; - -const char* redmax_source = "__kernel void test_redmax(const __global Type " - "*in, __global int4 *xy, __global Type *out)\n" - "{\n" - " int gid = get_global_id(0);\n" - " XY(xy,gid);\n" - " out[gid] = sub_group_reduce_max(in[gid]);\n" - "}\n"; - -const char* redmin_source = "__kernel void test_redmin(const __global Type " - "*in, __global int4 *xy, __global Type *out)\n" - "{\n" - " int gid = get_global_id(0);\n" - " XY(xy,gid);\n" - " out[gid] = sub_group_reduce_min(in[gid]);\n" - "}\n"; - -const char* scinadd_source = - "__kernel void test_scinadd(const __global Type *in, __global int4 *xy, " - "__global Type *out)\n" - "{\n" - " int gid = get_global_id(0);\n" - " XY(xy,gid);\n" - " out[gid] = sub_group_scan_inclusive_add(in[gid]);\n" - "}\n"; - -const char* scinmax_source = - "__kernel void test_scinmax(const __global Type *in, __global int4 *xy, " - "__global Type *out)\n" - "{\n" - " int gid = get_global_id(0);\n" - " XY(xy,gid);\n" - " out[gid] = sub_group_scan_inclusive_max(in[gid]);\n" - "}\n"; - -const char* scinmin_source = - "__kernel void test_scinmin(const __global Type *in, __global int4 *xy, " - "__global Type *out)\n" - "{\n" - " int gid = get_global_id(0);\n" - " XY(xy,gid);\n" - " out[gid] = sub_group_scan_inclusive_min(in[gid]);\n" - "}\n"; - -const char* scexadd_source = - "__kernel void test_scexadd(const __global Type *in, __global int4 *xy, " - "__global Type *out)\n" - "{\n" - " int gid = get_global_id(0);\n" - " XY(xy,gid);\n" - " out[gid] = sub_group_scan_exclusive_add(in[gid]);\n" - "}\n"; - -const char* scexmax_source = - "__kernel void test_scexmax(const __global Type *in, __global int4 *xy, " - "__global Type *out)\n" - "{\n" - " int gid = get_global_id(0);\n" - " XY(xy,gid);\n" - " out[gid] = sub_group_scan_exclusive_max(in[gid]);\n" - "}\n"; - -const char* scexmin_source = - "__kernel void test_scexmin(const __global Type *in, __global int4 *xy, " - "__global Type *out)\n" - "{\n" - " int gid = get_global_id(0);\n" - " XY(xy,gid);\n" - " out[gid] = sub_group_scan_exclusive_min(in[gid]);\n" - "}\n"; +std::string sub_group_reduction_scan_source = R"( + __kernel void test_%s(const __global Type *in, __global int4 *xy, __global Type *out) { + int gid = get_global_id(0); + XY(xy,gid); + out[gid] = %s(in[gid]); + } +)"; + +std::string sub_group_generic_source = R"( + __kernel void test_%s(const __global Type *in, __global int4 *xy, __global Type *out) { + int gid = get_global_id(0); + XY(xy,gid); + Type x = in[gid]; + out[gid] = %s(x, xy[gid].z); + } +)"; \ No newline at end of file diff --git a/test_conformance/subgroups/subgroup_common_kernels.h b/test_conformance/subgroups/subgroup_common_kernels.h index 8ae97d9a..bf2210ef 100644 --- a/test_conformance/subgroups/subgroup_common_kernels.h +++ b/test_conformance/subgroups/subgroup_common_kernels.h @@ -18,15 +18,7 @@ #include "subhelpers.h" -extern const char* bcast_source; -extern const char* redadd_source; -extern const char* redmax_source; -extern const char* redmin_source; -extern const char* scinadd_source; -extern const char* scinmax_source; -extern const char* scinmin_source; -extern const char* scexadd_source; -extern const char* scexmax_source; -extern const char* scexmin_source; +extern std::string sub_group_reduction_scan_source; +extern std::string sub_group_generic_source; #endif diff --git a/test_conformance/subgroups/subgroup_common_templates.h b/test_conformance/subgroups/subgroup_common_templates.h index 4333e95b..5c5f9560 100644 --- a/test_conformance/subgroups/subgroup_common_templates.h +++ b/test_conformance/subgroups/subgroup_common_templates.h @@ -17,13 +17,10 @@ #define SUBGROUPCOMMONTEMPLATES_H #include "typeWrappers.h" -#include #include "CL/cl_half.h" #include "subhelpers.h" - #include -typedef std::bitset<128> bs128; static cl_uint4 generate_bit_mask(cl_uint subgroup_local_id, const std::string &mask_type, cl_uint max_sub_group_size) @@ -577,16 +574,21 @@ template struct SCEX_NU int nw = test_params.local_workgroup_size; int ns = test_params.subgroup_size; int ng = test_params.global_workgroup_size; - uint32_t work_items_mask = test_params.work_items_mask; ng = ng / nw; std::string func_name; - work_items_mask ? func_name = "sub_group_non_uniform_scan_exclusive" - : func_name = "sub_group_scan_exclusive"; + test_params.work_items_mask.any() + ? func_name = "sub_group_non_uniform_scan_exclusive" + : func_name = "sub_group_scan_exclusive"; log_info(" %s_%s(%s)...\n", func_name.c_str(), operation_names(operation), TypeManager::name()); log_info(" test params: global size = %d local size = %d subgroups " - "size = %d work item mask = 0x%x \n", - test_params.global_workgroup_size, nw, ns, work_items_mask); + "size = %d \n", + test_params.global_workgroup_size, nw, ns); + if (test_params.work_items_mask.any()) + { + log_info(" work items mask: %s\n", + test_params.work_items_mask.to_string().c_str()); + } genrand(x, t, m, ns, nw, ng); } @@ -597,18 +599,22 @@ template struct SCEX_NU int nw = test_params.local_workgroup_size; int ns = test_params.subgroup_size; int ng = test_params.global_workgroup_size; - uint32_t work_items_mask = test_params.work_items_mask; + bs128 work_items_mask = test_params.work_items_mask; int nj = (nw + ns - 1) / ns; Ty tr, rr; ng = ng / nw; std::string func_name; - work_items_mask ? func_name = "sub_group_non_uniform_scan_exclusive" - : func_name = "sub_group_scan_exclusive"; + test_params.work_items_mask.any() + ? func_name = "sub_group_non_uniform_scan_exclusive" + : func_name = "sub_group_scan_exclusive"; + - uint32_t use_work_items_mask; // for uniform case take into consideration all workitems - use_work_items_mask = !work_items_mask ? 0xFFFFFFFF : work_items_mask; + if (!work_items_mask.any()) + { + work_items_mask.set(); + } for (k = 0; k < ng; ++k) { // for each work_group // Map to array indexed to array indexed by local ID and sub group @@ -624,8 +630,7 @@ template struct SCEX_NU std::set active_work_items; for (i = 0; i < n; ++i) { - uint32_t check_work_item = 1 << (i % 32); - if (use_work_items_mask & check_work_item) + if (work_items_mask.test(i)) { active_work_items.insert(i); } @@ -688,18 +693,23 @@ template struct SCIN_NU int nw = test_params.local_workgroup_size; int ns = test_params.subgroup_size; int ng = test_params.global_workgroup_size; - uint32_t work_items_mask = test_params.work_items_mask; ng = ng / nw; std::string func_name; - work_items_mask ? func_name = "sub_group_non_uniform_scan_inclusive" - : func_name = "sub_group_scan_inclusive"; + test_params.work_items_mask.any() + ? func_name = "sub_group_non_uniform_scan_inclusive" + : func_name = "sub_group_scan_inclusive"; genrand(x, t, m, ns, nw, ng); log_info(" %s_%s(%s)...\n", func_name.c_str(), operation_names(operation), TypeManager::name()); log_info(" test params: global size = %d local size = %d subgroups " - "size = %d work item mask = 0x%x \n", - test_params.global_workgroup_size, nw, ns, work_items_mask); + "size = %d \n", + test_params.global_workgroup_size, nw, ns); + if (test_params.work_items_mask.any()) + { + log_info(" work items mask: %s\n", + test_params.work_items_mask.to_string().c_str()); + } } static int chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m, @@ -709,18 +719,22 @@ template struct SCIN_NU int nw = test_params.local_workgroup_size; int ns = test_params.subgroup_size; int ng = test_params.global_workgroup_size; - uint32_t work_items_mask = test_params.work_items_mask; + bs128 work_items_mask = test_params.work_items_mask; + int nj = (nw + ns - 1) / ns; Ty tr, rr; ng = ng / nw; std::string func_name; - work_items_mask ? func_name = "sub_group_non_uniform_scan_inclusive" - : func_name = "sub_group_scan_inclusive"; + work_items_mask.any() + ? func_name = "sub_group_non_uniform_scan_inclusive" + : func_name = "sub_group_scan_inclusive"; - uint32_t use_work_items_mask; // for uniform case take into consideration all workitems - use_work_items_mask = !work_items_mask ? 0xFFFFFFFF : work_items_mask; + if (!work_items_mask.any()) + { + work_items_mask.set(); + } // std::bitset<32> mask32(use_work_items_mask); // for (int k) mask32.count(); for (k = 0; k < ng; ++k) @@ -740,8 +754,7 @@ template struct SCIN_NU for (i = 0; i < n; ++i) { - uint32_t check_work_item = 1 << (i % 32); - if (use_work_items_mask & check_work_item) + if (work_items_mask.test(i)) { if (catch_frist_active == -1) { @@ -807,17 +820,22 @@ template struct RED_NU int nw = test_params.local_workgroup_size; int ns = test_params.subgroup_size; int ng = test_params.global_workgroup_size; - uint32_t work_items_mask = test_params.work_items_mask; ng = ng / nw; std::string func_name; - work_items_mask ? func_name = "sub_group_non_uniform_reduce" - : func_name = "sub_group_reduce"; + test_params.work_items_mask.any() + ? func_name = "sub_group_non_uniform_reduce" + : func_name = "sub_group_reduce"; log_info(" %s_%s(%s)...\n", func_name.c_str(), operation_names(operation), TypeManager::name()); log_info(" test params: global size = %d local size = %d subgroups " - "size = %d work item mask = 0x%x \n", - test_params.global_workgroup_size, nw, ns, work_items_mask); + "size = %d \n", + test_params.global_workgroup_size, nw, ns); + if (test_params.work_items_mask.any()) + { + log_info(" work items mask: %s\n", + test_params.work_items_mask.to_string().c_str()); + } genrand(x, t, m, ns, nw, ng); } @@ -828,14 +846,14 @@ template struct RED_NU int nw = test_params.local_workgroup_size; int ns = test_params.subgroup_size; int ng = test_params.global_workgroup_size; - uint32_t work_items_mask = test_params.work_items_mask; + bs128 work_items_mask = test_params.work_items_mask; int nj = (nw + ns - 1) / ns; ng = ng / nw; Ty tr, rr; std::string func_name; - work_items_mask ? func_name = "sub_group_non_uniform_reduce" - : func_name = "sub_group_reduce"; + work_items_mask.any() ? func_name = "sub_group_non_uniform_reduce" + : func_name = "sub_group_reduce"; for (k = 0; k < ng; ++k) { @@ -847,9 +865,10 @@ template struct RED_NU my[j] = y[j]; } - uint32_t use_work_items_mask; - use_work_items_mask = - !work_items_mask ? 0xFFFFFFFF : work_items_mask; + if (!work_items_mask.any()) + { + work_items_mask.set(); + } for (j = 0; j < nj; ++j) { @@ -859,8 +878,7 @@ template struct RED_NU int catch_frist_active = -1; for (i = 0; i < n; ++i) { - uint32_t check_work_item = 1 << (i % 32); - if (use_work_items_mask & check_work_item) + if (work_items_mask.test(i)) { if (catch_frist_active == -1) { diff --git a/test_conformance/subgroups/subhelpers.h b/test_conformance/subgroups/subhelpers.h index 0d497fb3..6d32928a 100644 --- a/test_conformance/subgroups/subhelpers.h +++ b/test_conformance/subgroups/subhelpers.h @@ -24,31 +24,172 @@ #include #include #include +#include +#include +#include #define NR_OF_ACTIVE_WORK_ITEMS 4 extern MTdata gMTdata; +typedef std::bitset<128> bs128; extern cl_half_rounding_mode g_rounding_mode; struct WorkGroupParams { WorkGroupParams(size_t gws, size_t lws, - const std::vector &all_wim = {}) + bool use_mask = false) : global_workgroup_size(gws), local_workgroup_size(lws), - all_work_item_masks(all_wim) + use_masks(use_mask) { subgroup_size = 0; work_items_mask = 0; use_core_subgroups = true; dynsc = 0; + load_masks(); } size_t global_workgroup_size; size_t local_workgroup_size; size_t subgroup_size; - uint32_t work_items_mask; + bs128 work_items_mask; int dynsc; bool use_core_subgroups; - std::vector all_work_item_masks; + std::vector all_work_item_masks; + bool use_masks; + void save_kernel_source(const std::string &source, std::string name = "") + { + if (name == "") + { + name = "default"; + } + if (kernel_function_name.find(name) != kernel_function_name.end()) + { + log_info("Kernel definition duplication. Source will be " + "overwritten for function name %s", + name.c_str()); + } + kernel_function_name[name] = source; + }; + // return specific defined kernel or default. + std::string get_kernel_source(std::string name) + { + if (kernel_function_name.find(name) == kernel_function_name.end()) + { + return kernel_function_name["default"]; + } + return kernel_function_name[name]; + } + + +private: + std::map kernel_function_name; + void load_masks() + { + if (use_masks) + { + // 1 in string will be set 1, 0 will be set 0 + bs128 mask_0xf0f0f0f0("11110000111100001111000011110000" + "11110000111100001111000011110000" + "11110000111100001111000011110000" + "11110000111100001111000011110000", + 128, '0', '1'); + all_work_item_masks.push_back(mask_0xf0f0f0f0); + // 1 in string will be set 0, 0 will be set 1 + bs128 mask_0x0f0f0f0f("11110000111100001111000011110000" + "11110000111100001111000011110000" + "11110000111100001111000011110000" + "11110000111100001111000011110000", + 128, '1', '0'); + all_work_item_masks.push_back(mask_0x0f0f0f0f); + bs128 mask_0x5555aaaa("10101010101010101010101010101010" + "10101010101010101010101010101010" + "10101010101010101010101010101010" + "10101010101010101010101010101010", + 128, '0', '1'); + all_work_item_masks.push_back(mask_0x5555aaaa); + bs128 mask_0xaaaa5555("10101010101010101010101010101010" + "10101010101010101010101010101010" + "10101010101010101010101010101010" + "10101010101010101010101010101010", + 128, '1', '0'); + all_work_item_masks.push_back(mask_0xaaaa5555); + // 0x0f0ff0f0 + bs128 mask_0x0f0ff0f0("00001111000011111111000011110000" + "00001111000011111111000011110000" + "00001111000011111111000011110000" + "00001111000011111111000011110000", + 128, '0', '1'); + all_work_item_masks.push_back(mask_0x0f0ff0f0); + // 0xff0000ff + bs128 mask_0xff0000ff("11111111000000000000000011111111" + "11111111000000000000000011111111" + "11111111000000000000000011111111" + "11111111000000000000000011111111", + 128, '0', '1'); + all_work_item_masks.push_back(mask_0xff0000ff); + // 0xff00ff00 + bs128 mask_0xff00ff00("11111111000000001111111100000000" + "11111111000000001111111100000000" + "11111111000000001111111100000000" + "11111111000000001111111100000000", + 128, '0', '1'); + all_work_item_masks.push_back(mask_0xff00ff00); + // 0x00ffff00 + bs128 mask_0x00ffff00("00000000111111111111111100000000" + "00000000111111111111111100000000" + "00000000111111111111111100000000" + "00000000111111111111111100000000", + 128, '0', '1'); + all_work_item_masks.push_back(mask_0x00ffff00); + // 0x80 1 workitem highest id for 8 subgroup size + bs128 mask_0x80808080("10000000100000001000000010000000" + "10000000100000001000000010000000" + "10000000100000001000000010000000" + "10000000100000001000000010000000", + 128, '0', '1'); + + all_work_item_masks.push_back(mask_0x80808080); + // 0x8000 1 workitem highest id for 16 subgroup size + bs128 mask_0x80008000("10000000000000001000000000000000" + "10000000000000001000000000000000" + "10000000000000001000000000000000" + "10000000000000001000000000000000", + 128, '0', '1'); + all_work_item_masks.push_back(mask_0x80008000); + // 0x80000000 1 workitem highest id for 32 subgroup size + bs128 mask_0x80000000("10000000000000000000000000000000" + "10000000000000000000000000000000" + "10000000000000000000000000000000" + "10000000000000000000000000000000", + 128, '0', '1'); + all_work_item_masks.push_back(mask_0x80000000); + // 0x80000000 00000000 1 workitem highest id for 64 subgroup size + // 0x80000000 1 workitem highest id for 32 subgroup size + bs128 mask_0x8000000000000000("10000000000000000000000000000000" + "00000000000000000000000000000000" + "10000000000000000000000000000000" + "00000000000000000000000000000000", + 128, '0', '1'); + + all_work_item_masks.push_back(mask_0x8000000000000000); + // 0x80000000 00000000 00000000 00000000 1 workitem highest id for + // 128 subgroup size + bs128 mask_0x80000000000000000000000000000000( + "10000000000000000000000000000000" + "00000000000000000000000000000000" + "00000000000000000000000000000000" + "00000000000000000000000000000000", + 128, '0', '1'); + all_work_item_masks.push_back( + mask_0x80000000000000000000000000000000); + + bs128 mask_0xffffffff("11111111111111111111111111111111" + "11111111111111111111111111111111" + "11111111111111111111111111111111" + "11111111111111111111111111111111", + 128, '0', '1'); + all_work_item_masks.push_back(mask_0xffffffff); + } + } }; enum class SubgroupsBroadcastOp @@ -1267,11 +1408,23 @@ template struct test std::vector mapout; mapout.resize(local); std::stringstream kernel_sstr; - if (test_params.work_items_mask != 0) + if (test_params.use_masks) { - kernel_sstr << "#define WORK_ITEMS_MASK "; - kernel_sstr << "0x" << std::hex << test_params.work_items_mask - << "\n"; + // Prapare uint4 type to store bitmask on kernel OpenCL C side + // To keep order the first characet in string is the lowest bit + // there was a need to give such offset to bitset constructor + // (first highest offset = 96) + std::bitset<32> bits_1_32(test_params.work_items_mask.to_string(), + 96, 32); + std::bitset<32> bits_33_64(test_params.work_items_mask.to_string(), + 64, 32); + std::bitset<32> bits_65_96(test_params.work_items_mask.to_string(), + 32, 32); + std::bitset<32> bits_97_128(test_params.work_items_mask.to_string(), + 0, 32); + kernel_sstr << "global uint4 work_item_mask_vector = (uint4)(0b" + << bits_1_32 << ",0b" << bits_33_64 << ",0b" + << bits_65_96 << ",0b" << bits_97_128 << ");\n"; } @@ -1452,18 +1605,24 @@ struct RunTestForType num_elements_(num_elements), test_params_(test_params) {} template - int run_impl(const char *kernel_name, const char *source) + int run_impl(const std::string &function_name) { int error = TEST_PASS; + std::string source = + std::regex_replace(test_params_.get_kernel_source(function_name), + std::regex("\\%s"), function_name); + std::string kernel_name = "test_" + function_name; if (test_params_.all_work_item_masks.size() > 0) { error = test::mrun(device_, context_, queue_, num_elements_, - kernel_name, source, test_params_); + kernel_name.c_str(), source.c_str(), + test_params_); } else { error = test::run(device_, context_, queue_, num_elements_, - kernel_name, source, test_params_); + kernel_name.c_str(), source.c_str(), + test_params_); } return error; diff --git a/test_conformance/subgroups/test_subgroup.cpp b/test_conformance/subgroups/test_subgroup.cpp index c0e49524..63bfc453 100644 --- a/test_conformance/subgroups/test_subgroup.cpp +++ b/test_conformance/subgroups/test_subgroup.cpp @@ -150,25 +150,25 @@ template int run_broadcast_scan_reduction_for_type(RunTestForType rft) { int error = rft.run_impl>( - "test_bcast", bcast_source); - error |= rft.run_impl>("test_redadd", - redadd_source); - error |= rft.run_impl>("test_redmax", - redmax_source); - error |= rft.run_impl>("test_redmin", - redmin_source); - error |= rft.run_impl>("test_scinadd", - scinadd_source); - error |= rft.run_impl>("test_scinmax", - scinmax_source); - error |= rft.run_impl>("test_scinmin", - scinmin_source); - error |= rft.run_impl>("test_scexadd", - scexadd_source); - error |= rft.run_impl>("test_scexmax", - scexmax_source); - error |= rft.run_impl>("test_scexmin", - scexmin_source); + "sub_group_broadcast"); + error |= + rft.run_impl>("sub_group_reduce_add"); + error |= + rft.run_impl>("sub_group_reduce_max"); + error |= + rft.run_impl>("sub_group_reduce_min"); + error |= rft.run_impl>( + "sub_group_scan_inclusive_add"); + error |= rft.run_impl>( + "sub_group_scan_inclusive_max"); + error |= rft.run_impl>( + "sub_group_scan_inclusive_min"); + error |= rft.run_impl>( + "sub_group_scan_exclusive_add"); + error |= rft.run_impl>( + "sub_group_scan_exclusive_max"); + error |= rft.run_impl>( + "sub_group_scan_exclusive_min"); return error; } @@ -181,11 +181,14 @@ int test_subgroup_functions(cl_device_id device, cl_context context, constexpr size_t global_work_size = 2000; constexpr size_t local_work_size = 200; WorkGroupParams test_params(global_work_size, local_work_size); + test_params.save_kernel_source(sub_group_reduction_scan_source); + test_params.save_kernel_source(sub_group_generic_source, + "sub_group_broadcast"); + RunTestForType rft(device, context, queue, num_elements, test_params); int error = - rft.run_impl>("test_any", any_source); - error |= - rft.run_impl>("test_all", all_source); + rft.run_impl>("sub_group_any"); + error |= rft.run_impl>("sub_group_all"); error |= run_broadcast_scan_reduction_for_type(rft); error |= run_broadcast_scan_reduction_for_type(rft); error |= run_broadcast_scan_reduction_for_type(rft); diff --git a/test_conformance/subgroups/test_subgroup_ballot.cpp b/test_conformance/subgroups/test_subgroup_ballot.cpp index 9a2da5d9..2bd54e43 100644 --- a/test_conformance/subgroups/test_subgroup_ballot.cpp +++ b/test_conformance/subgroups/test_subgroup_ballot.cpp @@ -684,239 +684,127 @@ template struct SMASK } }; -static const char *bcast_non_uniform_source = - "__kernel void test_bcast_non_uniform(const __global Type *in, __global " - "int4 *xy, __global Type *out)\n" - "{\n" - " int gid = get_global_id(0);\n" - " XY(xy,gid);\n" - " Type x = in[gid];\n" - " if (xy[gid].x < NR_OF_ACTIVE_WORK_ITEMS) {\n" - " out[gid] = sub_group_non_uniform_broadcast(x, xy[gid].z);\n" - " } else {\n" - " out[gid] = sub_group_non_uniform_broadcast(x, xy[gid].w);\n" - " }\n" - "}\n"; - -static const char *bcast_first_source = - "__kernel void test_bcast_first(const __global Type *in, __global int4 " - "*xy, __global Type *out)\n" - "{\n" - " int gid = get_global_id(0);\n" - " XY(xy,gid);\n" - " Type x = in[gid];\n" - " if (xy[gid].x < NR_OF_ACTIVE_WORK_ITEMS) {\n" - " out[gid] = sub_group_broadcast_first(x);\n" - " } else {\n" - " out[gid] = sub_group_broadcast_first(x);\n" - " }\n" - "}\n"; - -static const char *ballot_bit_count_source = - "__kernel void test_sub_group_ballot_bit_count(const __global Type *in, " - "__global int4 *xy, __global Type *out)\n" - "{\n" - " int gid = get_global_id(0);\n" - " XY(xy,gid);\n" - " Type x = in[gid];\n" - " uint4 value = (uint4)(0,0,0,0);\n" - " value = (uint4)(sub_group_ballot_bit_count(x),0,0,0);\n" - " out[gid] = value;\n" - "}\n"; - -static const char *ballot_inclusive_scan_source = - "__kernel void test_sub_group_ballot_inclusive_scan(const __global Type " - "*in, __global int4 *xy, __global Type *out)\n" - "{\n" - " int gid = get_global_id(0);\n" - " XY(xy,gid);\n" - " Type x = in[gid];\n" - " uint4 value = (uint4)(0,0,0,0);\n" - " value = (uint4)(sub_group_ballot_inclusive_scan(x),0,0,0);\n" - " out[gid] = value;\n" - "}\n"; - -static const char *ballot_exclusive_scan_source = - "__kernel void test_sub_group_ballot_exclusive_scan(const __global Type " - "*in, __global int4 *xy, __global Type *out)\n" - "{\n" - " int gid = get_global_id(0);\n" - " XY(xy,gid);\n" - " Type x = in[gid];\n" - " uint4 value = (uint4)(0,0,0,0);\n" - " value = (uint4)(sub_group_ballot_exclusive_scan(x),0,0,0);\n" - " out[gid] = value;\n" - "}\n"; - -static const char *ballot_find_lsb_source = - "__kernel void test_sub_group_ballot_find_lsb(const __global Type *in, " - "__global int4 *xy, __global Type *out)\n" - "{\n" - " int gid = get_global_id(0);\n" - " XY(xy,gid);\n" - " Type x = in[gid];\n" - " uint4 value = (uint4)(0,0,0,0);\n" - " value = (uint4)(sub_group_ballot_find_lsb(x),0,0,0);\n" - " out[gid] = value;\n" - "}\n"; - -static const char *ballot_find_msb_source = - "__kernel void test_sub_group_ballot_find_msb(const __global Type *in, " - "__global int4 *xy, __global Type *out)\n" - "{\n" - " int gid = get_global_id(0);\n" - " XY(xy,gid);\n" - " Type x = in[gid];\n" - " uint4 value = (uint4)(0,0,0,0);" - " value = (uint4)(sub_group_ballot_find_msb(x),0,0,0);" - " out[gid] = value ;" - "}\n"; - -static const char *get_subgroup_ge_mask_source = - "__kernel void test_get_sub_group_ge_mask(const __global Type *in, " - "__global int4 *xy, __global Type *out)\n" - "{\n" - " int gid = get_global_id(0);\n" - " XY(xy,gid);\n" - " xy[gid].z = get_max_sub_group_size();\n" - " Type x = in[gid];\n" - " uint4 mask = get_sub_group_ge_mask();" - " out[gid] = mask;\n" - "}\n"; - -static const char *get_subgroup_gt_mask_source = - "__kernel void test_get_sub_group_gt_mask(const __global Type *in, " - "__global int4 *xy, __global Type *out)\n" - "{\n" - " int gid = get_global_id(0);\n" - " XY(xy,gid);\n" - " xy[gid].z = get_max_sub_group_size();\n" - " Type x = in[gid];\n" - " uint4 mask = get_sub_group_gt_mask();" - " out[gid] = mask;\n" - "}\n"; - -static const char *get_subgroup_le_mask_source = - "__kernel void test_get_sub_group_le_mask(const __global Type *in, " - "__global int4 *xy, __global Type *out)\n" - "{\n" - " int gid = get_global_id(0);\n" - " XY(xy,gid);\n" - " xy[gid].z = get_max_sub_group_size();\n" - " Type x = in[gid];\n" - " uint4 mask = get_sub_group_le_mask();" - " out[gid] = mask;\n" - "}\n"; - -static const char *get_subgroup_lt_mask_source = - "__kernel void test_get_sub_group_lt_mask(const __global Type *in, " - "__global int4 *xy, __global Type *out)\n" - "{\n" - " int gid = get_global_id(0);\n" - " XY(xy,gid);\n" - " xy[gid].z = get_max_sub_group_size();\n" - " Type x = in[gid];\n" - " uint4 mask = get_sub_group_lt_mask();" - " out[gid] = mask;\n" - "}\n"; - -static const char *get_subgroup_eq_mask_source = - "__kernel void test_get_sub_group_eq_mask(const __global Type *in, " - "__global int4 *xy, __global Type *out)\n" - "{\n" - " int gid = get_global_id(0);\n" - " XY(xy,gid);\n" - " xy[gid].z = get_max_sub_group_size();\n" - " Type x = in[gid];\n" - " uint4 mask = get_sub_group_eq_mask();" - " out[gid] = mask;\n" - "}\n"; - -static const char *ballot_source = - "__kernel void test_sub_group_ballot(const __global Type *in, " - "__global int4 *xy, __global Type *out)\n" - "{\n" - "uint4 full_ballot = sub_group_ballot(1);\n" - "uint divergence_mask;\n" - "uint4 partial_ballot;\n" - "uint gid = get_global_id(0);" - "XY(xy,gid);\n" - "if (get_sub_group_local_id() & 1) {\n" - " divergence_mask = 0xaaaaaaaa;\n" - " partial_ballot = sub_group_ballot(1);\n" - "} else {\n" - " divergence_mask = 0x55555555;\n" - " partial_ballot = sub_group_ballot(1);\n" - "}\n" - " size_t lws = get_local_size(0);\n" - "uint4 masked_ballot = full_ballot;\n" - "masked_ballot.x &= divergence_mask;\n" - "masked_ballot.y &= divergence_mask;\n" - "masked_ballot.z &= divergence_mask;\n" - "masked_ballot.w &= divergence_mask;\n" - "out[gid] = all(masked_ballot == partial_ballot);\n" - - "} \n"; - -static const char *ballot_source_inverse = - "__kernel void test_sub_group_ballot_inverse(const __global " - "Type *in, " - "__global int4 *xy, __global Type *out)\n" - "{\n" - " int gid = get_global_id(0);\n" - " XY(xy,gid);\n" - " Type x = in[gid];\n" - " uint4 value = (uint4)(10,0,0,0);\n" - " if (get_sub_group_local_id() & 1) {" - " uint4 partial_ballot_mask = " - "(uint4)(0xAAAAAAAA,0xAAAAAAAA,0xAAAAAAAA,0xAAAAAAAA);" - " if (sub_group_inverse_ballot(partial_ballot_mask)) {\n" - " value = (uint4)(1,0,0,1);\n" - " } else {\n" - " value = (uint4)(0,0,0,1);\n" - " }\n" - " } else {\n" - " uint4 partial_ballot_mask = " - "(uint4)(0x55555555,0x55555555,0x55555555,0x55555555);" - " if (sub_group_inverse_ballot(partial_ballot_mask)) {\n" - " value = (uint4)(1,0,0,2);\n" - " } else {\n" - " value = (uint4)(0,0,0,2);\n" - " }\n" - " }\n" - " out[gid] = value;\n" - "}\n"; +std::string sub_group_non_uniform_broadcast_source = R"( +__kernel void test_sub_group_non_uniform_broadcast(const __global Type *in, __global int4 *xy, __global Type *out) { + int gid = get_global_id(0); + XY(xy,gid); + Type x = in[gid]; + if (xy[gid].x < NR_OF_ACTIVE_WORK_ITEMS) { + out[gid] = sub_group_non_uniform_broadcast(x, xy[gid].z); + } else { + out[gid] = sub_group_non_uniform_broadcast(x, xy[gid].w); + } +} +)"; +std::string sub_group_broadcast_first_source = R"( +__kernel void test_sub_group_broadcast_first(const __global Type *in, __global int4 *xy, __global Type *out) { + int gid = get_global_id(0); + XY(xy,gid); + Type x = in[gid]; + if (xy[gid].x < NR_OF_ACTIVE_WORK_ITEMS) { + out[gid] = sub_group_broadcast_first(x);; + } else { + out[gid] = sub_group_broadcast_first(x);; + } +} +)"; +std::string sub_group_ballot_bit_scan_find_source = R"( +__kernel void test_%s(const __global Type *in, __global int4 *xy, __global Type *out) { + int gid = get_global_id(0); + XY(xy,gid); + Type x = in[gid]; + uint4 value = (uint4)(0,0,0,0); + value = (uint4)(%s(x),0,0,0); + out[gid] = value; +} +)"; +std::string sub_group_ballot_mask_source = R"( +__kernel void test_%s(const __global Type *in, __global int4 *xy, __global Type *out) { + int gid = get_global_id(0); + XY(xy,gid); + xy[gid].z = get_max_sub_group_size(); + Type x = in[gid]; + uint4 mask = %s(); + out[gid] = mask; +} +)"; +std::string sub_group_ballot_source = R"( +__kernel void test_sub_group_ballot(const __global Type *in, __global int4 *xy, __global Type *out) { + uint4 full_ballot = sub_group_ballot(1); + uint divergence_mask; + uint4 partial_ballot; + uint gid = get_global_id(0); + XY(xy,gid); + if (get_sub_group_local_id() & 1) { + divergence_mask = 0xaaaaaaaa; + partial_ballot = sub_group_ballot(1); + } else { + divergence_mask = 0x55555555; + partial_ballot = sub_group_ballot(1); + } + size_t lws = get_local_size(0); + uint4 masked_ballot = full_ballot; + masked_ballot.x &= divergence_mask; + masked_ballot.y &= divergence_mask; + masked_ballot.z &= divergence_mask; + masked_ballot.w &= divergence_mask; + out[gid] = all(masked_ballot == partial_ballot); -static const char *ballot_bit_extract_source = - "__kernel void test_sub_group_ballot_bit_extract(const __global Type *in, " - "__global int4 *xy, __global Type *out)\n" - "{\n" - " int gid = get_global_id(0);\n" - " XY(xy,gid);\n" - " Type x = in[gid];\n" - " uint index = xy[gid].z;\n" - " uint4 value = (uint4)(10,0,0,0);\n" - " if (get_sub_group_local_id() & 1) {" - " if (sub_group_ballot_bit_extract(x, xy[gid].z)) {\n" - " value = (uint4)(1,0,0,1);\n" - " } else {\n" - " value = (uint4)(0,0,0,1);\n" - " }\n" - " } else {\n" - " if (sub_group_ballot_bit_extract(x, xy[gid].w)) {\n" - " value = (uint4)(1,0,0,2);\n" - " } else {\n" - " value = (uint4)(0,0,0,2);\n" - " }\n" - " }\n" - " out[gid] = value;\n" - "}\n"; +} +)"; +std::string sub_group_inverse_ballot_source = R"( +__kernel void test_sub_group_inverse_ballot(const __global Type *in, __global int4 *xy, __global Type *out) { + int gid = get_global_id(0); + XY(xy,gid); + Type x = in[gid]; + uint4 value = (uint4)(10,0,0,0); + if (get_sub_group_local_id() & 1) { + uint4 partial_ballot_mask = (uint4)(0xAAAAAAAA,0xAAAAAAAA,0xAAAAAAAA,0xAAAAAAAA); + if (sub_group_inverse_ballot(partial_ballot_mask)) { + value = (uint4)(1,0,0,1); + } else { + value = (uint4)(0,0,0,1); + } + } else { + uint4 partial_ballot_mask = (uint4)(0x55555555,0x55555555,0x55555555,0x55555555); + if (sub_group_inverse_ballot(partial_ballot_mask)) { + value = (uint4)(1,0,0,2); + } else { + value = (uint4)(0,0,0,2); + } + } + out[gid] = value; +} +)"; +std::string sub_group_ballot_bit_extract_source = R"( + __kernel void test_sub_group_ballot_bit_extract(const __global Type *in, __global int4 *xy, __global Type *out) { + int gid = get_global_id(0); + XY(xy,gid); + Type x = in[gid]; + uint index = xy[gid].z; + uint4 value = (uint4)(10,0,0,0); + if (get_sub_group_local_id() & 1) { + if (sub_group_ballot_bit_extract(x, xy[gid].z)) { + value = (uint4)(1,0,0,1); + } else { + value = (uint4)(0,0,0,1); + } + } else { + if (sub_group_ballot_bit_extract(x, xy[gid].w)) { + value = (uint4)(1,0,0,2); + } else { + value = (uint4)(0,0,0,2); + } + } + out[gid] = value; +} +)"; template int run_non_uniform_broadcast_for_type(RunTestForType rft) { int error = rft.run_impl>( - "test_bcast_non_uniform", bcast_non_uniform_source); + "sub_group_non_uniform_broadcast"); return error; } @@ -932,9 +820,15 @@ int test_subgroup_functions_ballot(cl_device_id device, cl_context context, "skipping test.\n"); return TEST_SKIPPED_ITSELF; } + constexpr size_t global_work_size = 170; constexpr size_t local_work_size = 64; WorkGroupParams test_params(global_work_size, local_work_size); + test_params.save_kernel_source(sub_group_ballot_mask_source); + test_params.save_kernel_source(sub_group_non_uniform_broadcast_source, + "sub_group_non_uniform_broadcast"); + test_params.save_kernel_source(sub_group_broadcast_first_source, + "sub_group_broadcast_first"); RunTestForType rft(device, context, queue, num_elements, test_params); // non uniform broadcast functions @@ -1018,76 +912,87 @@ int test_subgroup_functions_ballot(cl_device_id device, cl_context context, // broadcast first functions error |= rft.run_impl>( - "test_bcast_first", bcast_first_source); + "sub_group_broadcast_first"); error |= rft.run_impl>( - "test_bcast_first", bcast_first_source); + "sub_group_broadcast_first"); error |= rft.run_impl>( - "test_bcast_first", bcast_first_source); + "sub_group_broadcast_first"); error |= rft.run_impl>( - "test_bcast_first", bcast_first_source); + "sub_group_broadcast_first"); error |= rft.run_impl>( - "test_bcast_first", bcast_first_source); + "sub_group_broadcast_first"); error |= rft.run_impl>( - "test_bcast_first", bcast_first_source); + "sub_group_broadcast_first"); error |= rft.run_impl>( - "test_bcast_first", bcast_first_source); + "sub_group_broadcast_first"); error |= rft.run_impl>( - "test_bcast_first", bcast_first_source); + "sub_group_broadcast_first"); error |= rft.run_impl>( - "test_bcast_first", bcast_first_source); + "sub_group_broadcast_first"); error |= rft.run_impl>( - "test_bcast_first", bcast_first_source); + "sub_group_broadcast_first"); error |= rft.run_impl< subgroups::cl_half, BC>( - "test_bcast_first", bcast_first_source); + "sub_group_broadcast_first"); // mask functions error |= rft.run_impl>( - "test_get_sub_group_eq_mask", get_subgroup_eq_mask_source); + "get_sub_group_eq_mask"); error |= rft.run_impl>( - "test_get_sub_group_ge_mask", get_subgroup_ge_mask_source); + "get_sub_group_ge_mask"); error |= rft.run_impl>( - "test_get_sub_group_gt_mask", get_subgroup_gt_mask_source); + "get_sub_group_gt_mask"); error |= rft.run_impl>( - "test_get_sub_group_le_mask", get_subgroup_le_mask_source); + "get_sub_group_le_mask"); error |= rft.run_impl>( - "test_get_sub_group_lt_mask", get_subgroup_lt_mask_source); + "get_sub_group_lt_mask"); // ballot functions - error |= rft.run_impl>("test_sub_group_ballot", - ballot_source); - error |= rft.run_impl>( - "test_sub_group_ballot_inverse", ballot_source_inverse); - error |= rft.run_impl< + WorkGroupParams test_params_ballot(global_work_size, local_work_size); + test_params_ballot.save_kernel_source( + sub_group_ballot_bit_scan_find_source); + test_params_ballot.save_kernel_source(sub_group_ballot_source, + "sub_group_ballot"); + test_params_ballot.save_kernel_source(sub_group_inverse_ballot_source, + "sub_group_inverse_ballot"); + test_params_ballot.save_kernel_source(sub_group_ballot_bit_extract_source, + "sub_group_ballot_bit_extract"); + RunTestForType rft_ballot(device, context, queue, num_elements, + test_params_ballot); + error |= rft_ballot.run_impl>("sub_group_ballot"); + error |= + rft_ballot.run_impl>( + "sub_group_inverse_ballot"); + error |= rft_ballot.run_impl< cl_uint4, BALLOT_BIT_EXTRACT>( - "test_sub_group_ballot_bit_extract", ballot_bit_extract_source); - error |= rft.run_impl< + "sub_group_ballot_bit_extract"); + error |= rft_ballot.run_impl< cl_uint4, BALLOT_COUNT_SCAN_FIND>( - "test_sub_group_ballot_bit_count", ballot_bit_count_source); - error |= rft.run_impl< + "sub_group_ballot_bit_count"); + error |= rft_ballot.run_impl< cl_uint4, BALLOT_COUNT_SCAN_FIND>( - "test_sub_group_ballot_inclusive_scan", ballot_inclusive_scan_source); - error |= rft.run_impl< + "sub_group_ballot_inclusive_scan"); + error |= rft_ballot.run_impl< cl_uint4, BALLOT_COUNT_SCAN_FIND>( - "test_sub_group_ballot_exclusive_scan", ballot_exclusive_scan_source); - error |= rft.run_impl< + "sub_group_ballot_exclusive_scan"); + error |= rft_ballot.run_impl< cl_uint4, BALLOT_COUNT_SCAN_FIND>( - "test_sub_group_ballot_find_lsb", ballot_find_lsb_source); - error |= rft.run_impl< + "sub_group_ballot_find_lsb"); + error |= rft_ballot.run_impl< cl_uint4, BALLOT_COUNT_SCAN_FIND>( - "test_sub_group_ballot_find_msb", ballot_find_msb_source); + "sub_group_ballot_find_msb"); return error; } diff --git a/test_conformance/subgroups/test_subgroup_clustered_reduce.cpp b/test_conformance/subgroups/test_subgroup_clustered_reduce.cpp index 87507e37..11fcebc4 100644 --- a/test_conformance/subgroups/test_subgroup_clustered_reduce.cpp +++ b/test_conformance/subgroups/test_subgroup_clustered_reduce.cpp @@ -22,149 +22,17 @@ #define CLUSTER_SIZE_STR "4" namespace { -static const char *redadd_clustered_source = - "__kernel void test_redadd_clustered(const __global Type *in, __global " - "int4 *xy, __global Type *out)\n" - "{\n" - " int gid = get_global_id(0);\n" - " XY(xy,gid);\n" - " xy[gid].w = 0;\n" - " if (sizeof(in[gid]) == " - "sizeof(sub_group_clustered_reduce_add(in[gid], " CLUSTER_SIZE_STR ")))\n" - " {xy[gid].w = sizeof(in[gid]);}\n" - " out[gid] = sub_group_clustered_reduce_add(in[gid], " CLUSTER_SIZE_STR - ");\n" - "}\n"; - -static const char *redmax_clustered_source = - "__kernel void test_redmax_clustered(const __global Type *in, __global " - "int4 *xy, __global Type *out)\n" - "{\n" - " int gid = get_global_id(0);\n" - " XY(xy,gid);\n" - " xy[gid].w = 0;\n" - " if (sizeof(in[gid]) == " - "sizeof(sub_group_clustered_reduce_max(in[gid], " CLUSTER_SIZE_STR ")))\n" - " {xy[gid].w = sizeof(in[gid]);}\n" - " out[gid] = sub_group_clustered_reduce_max(in[gid], " CLUSTER_SIZE_STR - ");\n" - "}\n"; - -static const char *redmin_clustered_source = - "__kernel void test_redmin_clustered(const __global Type *in, __global " - "int4 *xy, __global Type *out)\n" - "{\n" - " int gid = get_global_id(0);\n" - " XY(xy,gid);\n" - " xy[gid].w = 0;\n" - " if (sizeof(in[gid]) == " - "sizeof(sub_group_clustered_reduce_min(in[gid], " CLUSTER_SIZE_STR ")))\n" - " {xy[gid].w = sizeof(in[gid]);}\n" - " out[gid] = sub_group_clustered_reduce_min(in[gid], " CLUSTER_SIZE_STR - ");\n" - "}\n"; - -static const char *redmul_clustered_source = - "__kernel void test_redmul_clustered(const __global Type *in, __global " - "int4 *xy, __global Type *out)\n" - "{\n" - " int gid = get_global_id(0);\n" - " XY(xy,gid);\n" - " xy[gid].w = 0;\n" - " if (sizeof(in[gid]) == " - "sizeof(sub_group_clustered_reduce_mul(in[gid], " CLUSTER_SIZE_STR ")))\n" - " {xy[gid].w = sizeof(in[gid]);}\n" - " out[gid] = sub_group_clustered_reduce_mul(in[gid], " CLUSTER_SIZE_STR - ");\n" - "}\n"; - -static const char *redand_clustered_source = - "__kernel void test_redand_clustered(const __global Type *in, __global " - "int4 *xy, __global Type *out)\n" - "{\n" - " int gid = get_global_id(0);\n" - " XY(xy,gid);\n" - " xy[gid].w = 0;\n" - " if (sizeof(in[gid]) == " - "sizeof(sub_group_clustered_reduce_and(in[gid], " CLUSTER_SIZE_STR ")))\n" - " {xy[gid].w = sizeof(in[gid]);}\n" - " out[gid] = sub_group_clustered_reduce_and(in[gid], " CLUSTER_SIZE_STR - ");\n" - "}\n"; - -static const char *redor_clustered_source = - "__kernel void test_redor_clustered(const __global Type *in, __global int4 " - "*xy, __global Type *out)\n" - "{\n" - " int gid = get_global_id(0);\n" - " XY(xy,gid);\n" - " xy[gid].w = 0;\n" - " if (sizeof(in[gid]) == " - "sizeof(sub_group_clustered_reduce_or(in[gid], " CLUSTER_SIZE_STR ")))\n" - " {xy[gid].w = sizeof(in[gid]);}\n" - " out[gid] = sub_group_clustered_reduce_or(in[gid], " CLUSTER_SIZE_STR - ");\n" - "}\n"; - -static const char *redxor_clustered_source = - "__kernel void test_redxor_clustered(const __global Type *in, __global " - "int4 *xy, __global Type *out)\n" - "{\n" - " int gid = get_global_id(0);\n" - " XY(xy,gid);\n" - " xy[gid].w = 0;\n" - " if (sizeof(in[gid]) == " - "sizeof(sub_group_clustered_reduce_xor(in[gid], " CLUSTER_SIZE_STR ")))\n" - " {xy[gid].w = sizeof(in[gid]);}\n" - " out[gid] = sub_group_clustered_reduce_xor(in[gid], " CLUSTER_SIZE_STR - ");\n" - "}\n"; - -static const char *redand_clustered_logical_source = - "__kernel void test_redand_clustered_logical(const __global Type *in, " - "__global int4 *xy, __global Type *out)\n" - "{\n" - " int gid = get_global_id(0);\n" - " XY(xy,gid);\n" - " xy[gid].w = 0;\n" - " if (sizeof(in[gid]) == " - "sizeof(sub_group_clustered_reduce_logical_and(in[gid], " CLUSTER_SIZE_STR - ")))\n" - " {xy[gid].w = sizeof(in[gid]);}\n" - " out[gid] = " - "sub_group_clustered_reduce_logical_and(in[gid], " CLUSTER_SIZE_STR ");\n" - "}\n"; - -static const char *redor_clustered_logical_source = - "__kernel void test_redor_clustered_logical(const __global Type *in, " - "__global int4 *xy, __global Type *out)\n" - "{\n" - " int gid = get_global_id(0);\n" - " XY(xy,gid);\n" - " xy[gid].w = 0;\n" - " if (sizeof(in[gid]) == " - "sizeof(sub_group_clustered_reduce_logical_or(in[gid], " CLUSTER_SIZE_STR - ")))\n" - " {xy[gid].w = sizeof(in[gid]);}\n" - " out[gid] = " - "sub_group_clustered_reduce_logical_or(in[gid], " CLUSTER_SIZE_STR ");\n" - "}\n"; - -static const char *redxor_clustered_logical_source = - "__kernel void test_redxor_clustered_logical(const __global Type *in, " - "__global int4 *xy, __global Type *out)\n" - "{\n" - " int gid = get_global_id(0);\n" - " XY(xy,gid);\n" - " xy[gid].w = 0;\n" - " if ( sizeof(in[gid]) == " - "sizeof(sub_group_clustered_reduce_logical_xor(in[gid], " CLUSTER_SIZE_STR - ")))\n" - " {xy[gid].w = sizeof(in[gid]);}\n" - " out[gid] = " - "sub_group_clustered_reduce_logical_xor(in[gid], " CLUSTER_SIZE_STR ");\n" - "}\n"; - +std::string sub_group_clustered_reduce_source = R"( +__kernel void test_%s(const __global Type *in, __global int4 *xy, __global Type *out) { + int gid = get_global_id(0); + XY(xy,gid); + xy[gid].w = 0; + if (sizeof(in[gid]) == sizeof(%s(in[gid], )" CLUSTER_SIZE_STR R"())) { + xy[gid].w = sizeof(in[gid]); + } + out[gid] = %s(in[gid], )" CLUSTER_SIZE_STR R"(); +} +)"; // DESCRIPTION: // Test for reduce cluster functions @@ -267,34 +135,34 @@ template int run_cluster_red_add_max_min_mul_for_type(RunTestForType rft) { int error = rft.run_impl>( - "test_redadd_clustered", redadd_clustered_source); + "sub_group_clustered_reduce_add"); error |= rft.run_impl>( - "test_redmax_clustered", redmax_clustered_source); + "sub_group_clustered_reduce_max"); error |= rft.run_impl>( - "test_redmin_clustered", redmin_clustered_source); + "sub_group_clustered_reduce_min"); error |= rft.run_impl>( - "test_redmul_clustered", redmul_clustered_source); + "sub_group_clustered_reduce_mul"); return error; } template int run_cluster_and_or_xor_for_type(RunTestForType rft) { int error = rft.run_impl>( - "test_redand_clustered", redand_clustered_source); + "sub_group_clustered_reduce_and"); error |= rft.run_impl>( - "test_redor_clustered", redor_clustered_source); + "sub_group_clustered_reduce_or"); error |= rft.run_impl>( - "test_redxor_clustered", redxor_clustered_source); + "sub_group_clustered_reduce_xor"); return error; } template int run_cluster_logical_and_or_xor_for_type(RunTestForType rft) { int error = rft.run_impl>( - "test_redand_clustered_logical", redand_clustered_logical_source); + "sub_group_clustered_reduce_logical_and"); error |= rft.run_impl>( - "test_redor_clustered_logical", redor_clustered_logical_source); + "sub_group_clustered_reduce_logical_or"); error |= rft.run_impl>( - "test_redxor_clustered_logical", redxor_clustered_logical_source); + "sub_group_clustered_reduce_logical_xor"); return error; } @@ -311,9 +179,11 @@ int test_subgroup_functions_clustered_reduce(cl_device_id device, "device, skipping test.\n"); return TEST_SKIPPED_ITSELF; } + constexpr size_t global_work_size = 2000; constexpr size_t local_work_size = 200; WorkGroupParams test_params(global_work_size, local_work_size); + test_params.save_kernel_source(sub_group_clustered_reduce_source); RunTestForType rft(device, context, queue, num_elements, test_params); int error = run_cluster_red_add_max_min_mul_for_type(rft); diff --git a/test_conformance/subgroups/test_subgroup_extended_types.cpp b/test_conformance/subgroups/test_subgroup_extended_types.cpp index b281f618..dbe24623 100644 --- a/test_conformance/subgroups/test_subgroup_extended_types.cpp +++ b/test_conformance/subgroups/test_subgroup_extended_types.cpp @@ -24,30 +24,30 @@ namespace { template int run_broadcast_for_extended_type(RunTestForType rft) { int error = rft.run_impl>( - "test_bcast", bcast_source); + "sub_group_broadcast"); return error; } template int run_scan_reduction_for_type(RunTestForType rft) { - int error = rft.run_impl>("test_redadd", - redadd_source); - error |= rft.run_impl>("test_redmax", - redmax_source); - error |= rft.run_impl>("test_redmin", - redmin_source); - error |= rft.run_impl>("test_scinadd", - scinadd_source); - error |= rft.run_impl>("test_scinmax", - scinmax_source); - error |= rft.run_impl>("test_scinmin", - scinmin_source); - error |= rft.run_impl>("test_scexadd", - scexadd_source); - error |= rft.run_impl>("test_scexmax", - scexmax_source); - error |= rft.run_impl>("test_scexmin", - scexmin_source); + int error = + rft.run_impl>("sub_group_reduce_add"); + error |= + rft.run_impl>("sub_group_reduce_max"); + error |= + rft.run_impl>("sub_group_reduce_min"); + error |= rft.run_impl>( + "sub_group_scan_inclusive_add"); + error |= rft.run_impl>( + "sub_group_scan_inclusive_max"); + error |= rft.run_impl>( + "sub_group_scan_inclusive_min"); + error |= rft.run_impl>( + "sub_group_scan_exclusive_add"); + error |= rft.run_impl>( + "sub_group_scan_exclusive_max"); + error |= rft.run_impl>( + "sub_group_scan_exclusive_min"); return error; } @@ -65,11 +65,15 @@ int test_subgroup_functions_extended_types(cl_device_id device, "device, skipping test.\n"); return TEST_SKIPPED_ITSELF; } + constexpr size_t global_work_size = 2000; constexpr size_t local_work_size = 200; WorkGroupParams test_params(global_work_size, local_work_size); - RunTestForType rft(device, context, queue, num_elements, test_params); + test_params.save_kernel_source(sub_group_reduction_scan_source); + test_params.save_kernel_source(sub_group_generic_source, + "sub_group_broadcast"); + RunTestForType rft(device, context, queue, num_elements, test_params); int error = run_broadcast_for_extended_type(rft); error |= run_broadcast_for_extended_type(rft); error |= run_broadcast_for_extended_type(rft); diff --git a/test_conformance/subgroups/test_subgroup_non_uniform_arithmetic.cpp b/test_conformance/subgroups/test_subgroup_non_uniform_arithmetic.cpp index 6c44249e..bb257bcd 100644 --- a/test_conformance/subgroups/test_subgroup_non_uniform_arithmetic.cpp +++ b/test_conformance/subgroups/test_subgroup_non_uniform_arithmetic.cpp @@ -17,336 +17,29 @@ #include "subhelpers.h" #include "harness/typeWrappers.h" #include "subgroup_common_templates.h" +#include namespace { -static const char *scinadd_non_uniform_source = R"( - __kernel void test_scinadd_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) { +std::string sub_group_non_uniform_arithmetic_source = R"( + __kernel void test_%s(const __global Type *in, __global int4 *xy, __global Type *out) { int gid = get_global_id(0); XY(xy,gid); - int elect_work_item = 1 << (get_sub_group_local_id() % 32); - if (elect_work_item & WORK_ITEMS_MASK){ - out[gid] = sub_group_non_uniform_scan_inclusive_add(in[gid]); - } - } -)"; - -static const char *scinmax_non_uniform_source = R"( - __kernel void test_scinmax_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) { - int gid = get_global_id(0); - XY(xy,gid); - int elect_work_item = 1 << (get_sub_group_local_id() % 32); - if (elect_work_item & WORK_ITEMS_MASK){ - out[gid] = sub_group_non_uniform_scan_inclusive_max(in[gid]); - } - } -)"; - -static const char *scinmin_non_uniform_source = R"( - __kernel void test_scinmin_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) { - int gid = get_global_id(0); - XY(xy,gid); - int elect_work_item = 1 << (get_sub_group_local_id() % 32); - if (elect_work_item & WORK_ITEMS_MASK){ - out[gid] = sub_group_non_uniform_scan_inclusive_min(in[gid]); - } - } -)"; - -static const char *scinmul_non_uniform_source = R"( - __kernel void test_scinmul_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) { - int gid = get_global_id(0); - XY(xy,gid); - int elect_work_item = 1 << (get_sub_group_local_id() % 32); - if (elect_work_item & WORK_ITEMS_MASK){ - out[gid] = sub_group_non_uniform_scan_inclusive_mul(in[gid]); - } - } -)"; - -static const char *scinand_non_uniform_source = R"( - __kernel void test_scinand_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) { - int gid = get_global_id(0); - XY(xy,gid); - int elect_work_item = 1 << (get_sub_group_local_id() % 32); - if (elect_work_item & WORK_ITEMS_MASK){ - out[gid] = sub_group_non_uniform_scan_inclusive_and(in[gid]); - } - } -)"; - -static const char *scinor_non_uniform_source = R"( - __kernel void test_scinor_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) { - int gid = get_global_id(0); - XY(xy,gid); - int elect_work_item = 1 << (get_sub_group_local_id() % 32); - if (elect_work_item & WORK_ITEMS_MASK){ - out[gid] = sub_group_non_uniform_scan_inclusive_or(in[gid]); - } - } -)"; - -static const char *scinxor_non_uniform_source = R"( - __kernel void test_scinxor_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) { - int gid = get_global_id(0); - XY(xy,gid); - int elect_work_item = 1 << (get_sub_group_local_id() % 32); - if (elect_work_item & WORK_ITEMS_MASK){ - out[gid] = sub_group_non_uniform_scan_inclusive_xor(in[gid]); - } - } -)"; - -static const char *scinand_non_uniform_logical_source = R"( - __kernel void test_scinand_non_uniform_logical(const __global Type *in, __global int4 *xy, __global Type *out) { - int gid = get_global_id(0); - XY(xy,gid); - int elect_work_item = 1 << (get_sub_group_local_id() % 32); - if (elect_work_item & WORK_ITEMS_MASK){ - out[gid] = sub_group_non_uniform_scan_inclusive_logical_and(in[gid]); - } - } -)"; - -static const char *scinor_non_uniform_logical_source = R"( - __kernel void test_scinor_non_uniform_logical(const __global Type *in, __global int4 *xy, __global Type *out) { - int gid = get_global_id(0); - XY(xy,gid); - int elect_work_item = 1 << (get_sub_group_local_id() % 32); - if (elect_work_item & WORK_ITEMS_MASK){ - out[gid] = sub_group_non_uniform_scan_inclusive_logical_or(in[gid]); - } - } -)"; - -static const char *scinxor_non_uniform_logical_source = R"( - __kernel void test_scinxor_non_uniform_logical(const __global Type *in, __global int4 *xy, __global Type *out) { - int gid = get_global_id(0); - XY(xy,gid); - int elect_work_item = 1 << (get_sub_group_local_id() % 32); - if (elect_work_item & WORK_ITEMS_MASK){ - out[gid] = sub_group_non_uniform_scan_inclusive_logical_xor(in[gid]); - } - } -)"; - -static const char *scexadd_non_uniform_source = R"( - __kernel void test_scexadd_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) { - int gid = get_global_id(0); - XY(xy,gid); - int elect_work_item = 1 << (get_sub_group_local_id() % 32); - if (elect_work_item & WORK_ITEMS_MASK){ - out[gid] = sub_group_non_uniform_scan_exclusive_add(in[gid]); - } - } -)"; - -static const char *scexmax_non_uniform_source = R"( - __kernel void test_scexmax_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) { - int gid = get_global_id(0); - XY(xy,gid); - int elect_work_item = 1 << (get_sub_group_local_id() % 32); - if (elect_work_item & WORK_ITEMS_MASK){ - out[gid] = sub_group_non_uniform_scan_exclusive_max(in[gid]); - } - } -)"; - -static const char *scexmin_non_uniform_source = R"( - __kernel void test_scexmin_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) { - int gid = get_global_id(0); - XY(xy,gid); - int elect_work_item = 1 << (get_sub_group_local_id() % 32); - if (elect_work_item & WORK_ITEMS_MASK){ - out[gid] = sub_group_non_uniform_scan_exclusive_min(in[gid]); - } - } -)"; - -static const char *scexmul_non_uniform_source = R"( - __kernel void test_scexmul_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) { - int gid = get_global_id(0); - XY(xy,gid); - int elect_work_item = 1 << (get_sub_group_local_id() % 32); - if (elect_work_item & WORK_ITEMS_MASK){ - out[gid] = sub_group_non_uniform_scan_exclusive_mul(in[gid]); - } - } -)"; - -static const char *scexand_non_uniform_source = R"( - __kernel void test_scexand_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) { - int gid = get_global_id(0); - XY(xy,gid); - int elect_work_item = 1 << (get_sub_group_local_id() % 32); - if (elect_work_item & WORK_ITEMS_MASK){ - out[gid] = sub_group_non_uniform_scan_exclusive_and(in[gid]); - } - } -)"; - -static const char *scexor_non_uniform_source = R"( - __kernel void test_scexor_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) { - int gid = get_global_id(0); - XY(xy,gid); - int elect_work_item = 1 << (get_sub_group_local_id() % 32); - if (elect_work_item & WORK_ITEMS_MASK){ - out[gid] = sub_group_non_uniform_scan_exclusive_or(in[gid]); - } - } -)"; - -static const char *scexxor_non_uniform_source = R"( - __kernel void test_scexxor_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) { - int gid = get_global_id(0); - XY(xy,gid); - int elect_work_item = 1 << (get_sub_group_local_id() % 32); - if (elect_work_item & WORK_ITEMS_MASK){ - out[gid] = sub_group_non_uniform_scan_exclusive_xor(in[gid]); - } - } -)"; - -static const char *scexand_non_uniform_logical_source = R"( - __kernel void test_scexand_non_uniform_logical(const __global Type *in, __global int4 *xy, __global Type *out) { - int gid = get_global_id(0); - XY(xy,gid); - int elect_work_item = 1 << (get_sub_group_local_id() % 32); - if (elect_work_item & WORK_ITEMS_MASK){ - out[gid] = sub_group_non_uniform_scan_exclusive_logical_and(in[gid]); - } - } -)"; - -static const char *scexor_non_uniform_logical_source = R"( - __kernel void test_scexor_non_uniform_logical(const __global Type *in, __global int4 *xy, __global Type *out) { - int gid = get_global_id(0); - XY(xy,gid); - int elect_work_item = 1 << (get_sub_group_local_id() % 32); - if (elect_work_item & WORK_ITEMS_MASK){ - out[gid] = sub_group_non_uniform_scan_exclusive_logical_or(in[gid]); - } - } -)"; - -static const char *scexxor_non_uniform_logical_source = R"( - __kernel void test_scexxor_non_uniform_logical(const __global Type *in, __global int4 *xy, __global Type *out) { - int gid = get_global_id(0); - XY(xy,gid); - int elect_work_item = 1 << (get_sub_group_local_id() % 32); - if (elect_work_item & WORK_ITEMS_MASK){ - out[gid] = sub_group_non_uniform_scan_exclusive_logical_xor(in[gid]); - } - } -)"; - -static const char *redadd_non_uniform_source = R"( - __kernel void test_redadd_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) { - int gid = get_global_id(0); - XY(xy,gid); - int elect_work_item = 1 << (get_sub_group_local_id() % 32); - if (elect_work_item & WORK_ITEMS_MASK){ - out[gid] = sub_group_non_uniform_reduce_add(in[gid]); - } - } -)"; - -static const char *redmax_non_uniform_source = R"( - __kernel void test_redmax_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) { - int gid = get_global_id(0); - XY(xy,gid); - int elect_work_item = 1 << (get_sub_group_local_id() % 32); - if (elect_work_item & WORK_ITEMS_MASK){ - out[gid] = sub_group_non_uniform_reduce_max(in[gid]); - } - } -)"; - -static const char *redmin_non_uniform_source = R"( - __kernel void test_redmin_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) { - int gid = get_global_id(0); - XY(xy,gid); - int elect_work_item = 1 << (get_sub_group_local_id() % 32); - if (elect_work_item & WORK_ITEMS_MASK){ - out[gid] = sub_group_non_uniform_reduce_min(in[gid]); - } - } -)"; - -static const char *redmul_non_uniform_source = R"( - __kernel void test_redmul_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) { - int gid = get_global_id(0); - XY(xy,gid); - int elect_work_item = 1 << (get_sub_group_local_id() % 32); - if (elect_work_item & WORK_ITEMS_MASK){ - out[gid] = sub_group_non_uniform_reduce_mul(in[gid]); - } - } -)"; - -static const char *redand_non_uniform_source = R"( - __kernel void test_redand_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) { - int gid = get_global_id(0); - XY(xy,gid); - int elect_work_item = 1 << (get_sub_group_local_id() % 32); - if (elect_work_item & WORK_ITEMS_MASK){ - out[gid] = sub_group_non_uniform_reduce_and(in[gid]); - } - } -)"; - -static const char *redor_non_uniform_source = R"( - __kernel void test_redor_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) { - int gid = get_global_id(0); - XY(xy,gid); - int elect_work_item = 1 << (get_sub_group_local_id() % 32); - if (elect_work_item & WORK_ITEMS_MASK){ - out[gid] = sub_group_non_uniform_reduce_or(in[gid]); - } - } -)"; - -static const char *redxor_non_uniform_source = R"( - __kernel void test_redxor_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) { - int gid = get_global_id(0); - XY(xy,gid); - int elect_work_item = 1 << (get_sub_group_local_id() % 32); - if (elect_work_item & WORK_ITEMS_MASK){ - out[gid] = sub_group_non_uniform_reduce_xor(in[gid]); - } - } -)"; - -static const char *redand_non_uniform_logical_source = R"( - __kernel void test_redand_non_uniform_logical(const __global Type *in, __global int4 *xy, __global Type *out) { - int gid = get_global_id(0); - XY(xy,gid); - int elect_work_item = 1 << (get_sub_group_local_id() % 32); - if (elect_work_item & WORK_ITEMS_MASK){ - out[gid] = sub_group_non_uniform_reduce_logical_and(in[gid]); - } - } -)"; - -static const char *redor_non_uniform_logical_source = R"( - __kernel void test_redor_non_uniform_logical(const __global Type *in, __global int4 *xy, __global Type *out) { - int gid = get_global_id(0); - XY(xy,gid); - int elect_work_item = 1 << (get_sub_group_local_id() % 32); - if (elect_work_item & WORK_ITEMS_MASK){ - out[gid] = sub_group_non_uniform_reduce_logical_or(in[gid]); - } - } -)"; - -static const char *redxor_non_uniform_logical_source = R"( - __kernel void test_redxor_non_uniform_logical(const __global Type *in, __global int4 *xy, __global Type *out) { - int gid = get_global_id(0); - XY(xy,gid); - int elect_work_item = 1 << (get_sub_group_local_id() % 32); - if (elect_work_item & WORK_ITEMS_MASK){ - out[gid] = sub_group_non_uniform_reduce_logical_xor(in[gid]); - } + uint subgroup_local_id = get_sub_group_local_id(); + uint elect_work_item = 1 << (subgroup_local_id % 32); + uint work_item_mask; + if(subgroup_local_id < 32) { + work_item_mask = work_item_mask_vector.x; + } else if(subgroup_local_id < 64) { + work_item_mask = work_item_mask_vector.y; + } else if(subgroup_local_id < 96) { + work_item_mask = work_item_mask_vector.w; + } else if(subgroup_local_id < 128) { + work_item_mask = work_item_mask_vector.z; + } + if (elect_work_item & work_item_mask){ + out[gid] = %s(in[gid]); + } } )"; @@ -354,52 +47,52 @@ template int run_functions_add_mul_max_min_for_type(RunTestForType rft) { int error = rft.run_impl>( - "test_scinadd_non_uniform", scinadd_non_uniform_source); + "sub_group_non_uniform_scan_inclusive_add"); error |= rft.run_impl>( - "test_scinmul_non_uniform", scinmul_non_uniform_source); + "sub_group_non_uniform_scan_inclusive_mul"); error |= rft.run_impl>( - "test_scinmax_non_uniform", scinmax_non_uniform_source); + "sub_group_non_uniform_scan_inclusive_max"); error |= rft.run_impl>( - "test_scinmin_non_uniform", scinmin_non_uniform_source); + "sub_group_non_uniform_scan_inclusive_min"); error |= rft.run_impl>( - "test_scexadd_non_uniform", scexadd_non_uniform_source); + "sub_group_non_uniform_scan_exclusive_add"); error |= rft.run_impl>( - "test_scexmul_non_uniform", scexmul_non_uniform_source); + "sub_group_non_uniform_scan_exclusive_mul"); error |= rft.run_impl>( - "test_scexmax_non_uniform", scexmax_non_uniform_source); + "sub_group_non_uniform_scan_exclusive_max"); error |= rft.run_impl>( - "test_scexmin_non_uniform", scexmin_non_uniform_source); + "sub_group_non_uniform_scan_exclusive_min"); error |= rft.run_impl>( - "test_redadd_non_uniform", redadd_non_uniform_source); + "sub_group_non_uniform_reduce_add"); error |= rft.run_impl>( - "test_redmul_non_uniform", redmul_non_uniform_source); + "sub_group_non_uniform_reduce_mul"); error |= rft.run_impl>( - "test_redmax_non_uniform", redmax_non_uniform_source); + "sub_group_non_uniform_reduce_max"); error |= rft.run_impl>( - "test_redmin_non_uniform", redmin_non_uniform_source); + "sub_group_non_uniform_reduce_min"); return error; } template int run_functions_and_or_xor_for_type(RunTestForType rft) { int error = rft.run_impl>( - "test_scinand_non_uniform", scinand_non_uniform_source); + "sub_group_non_uniform_scan_inclusive_and"); error |= rft.run_impl>( - "test_scinor_non_uniform", scinor_non_uniform_source); + "sub_group_non_uniform_scan_inclusive_or"); error |= rft.run_impl>( - "test_scinxor_non_uniform", scinxor_non_uniform_source); + "sub_group_non_uniform_scan_inclusive_xor"); error |= rft.run_impl>( - "test_scexand_non_uniform", scexand_non_uniform_source); + "sub_group_non_uniform_scan_exclusive_and"); error |= rft.run_impl>( - "test_scexor_non_uniform", scexor_non_uniform_source); + "sub_group_non_uniform_scan_exclusive_or"); error |= rft.run_impl>( - "test_scexxor_non_uniform", scexxor_non_uniform_source); + "sub_group_non_uniform_scan_exclusive_xor"); error |= rft.run_impl>( - "test_redand_non_uniform", redand_non_uniform_source); + "sub_group_non_uniform_reduce_and"); error |= rft.run_impl>( - "test_redor_non_uniform", redor_non_uniform_source); + "sub_group_non_uniform_reduce_or"); error |= rft.run_impl>( - "test_redxor_non_uniform", redxor_non_uniform_source); + "sub_group_non_uniform_reduce_xor"); return error; } @@ -407,23 +100,23 @@ template int run_functions_logical_and_or_xor_for_type(RunTestForType rft) { int error = rft.run_impl>( - "test_scinand_non_uniform_logical", scinand_non_uniform_logical_source); + "sub_group_non_uniform_scan_inclusive_logical_and"); error |= rft.run_impl>( - "test_scinor_non_uniform_logical", scinor_non_uniform_logical_source); + "sub_group_non_uniform_scan_inclusive_logical_or"); error |= rft.run_impl>( - "test_scinxor_non_uniform_logical", scinxor_non_uniform_logical_source); + "sub_group_non_uniform_scan_inclusive_logical_xor"); error |= rft.run_impl>( - "test_scexand_non_uniform_logical", scexand_non_uniform_logical_source); + "sub_group_non_uniform_scan_exclusive_logical_and"); error |= rft.run_impl>( - "test_scexor_non_uniform_logical", scexor_non_uniform_logical_source); + "sub_group_non_uniform_scan_exclusive_logical_or"); error |= rft.run_impl>( - "test_scexxor_non_uniform_logical", scexxor_non_uniform_logical_source); + "sub_group_non_uniform_scan_exclusive_logical_xor"); error |= rft.run_impl>( - "test_redand_non_uniform_logical", redand_non_uniform_logical_source); + "sub_group_non_uniform_reduce_logical_and"); error |= rft.run_impl>( - "test_redor_non_uniform_logical", redor_non_uniform_logical_source); + "sub_group_non_uniform_reduce_logical_or"); error |= rft.run_impl>( - "test_redxor_non_uniform_logical", redxor_non_uniform_logical_source); + "sub_group_non_uniform_reduce_logical_xor"); return error; } @@ -441,13 +134,11 @@ int test_subgroup_functions_non_uniform_arithmetic(cl_device_id device, "this device, skipping test.\n"); return TEST_SKIPPED_ITSELF; } - std::vector masks{ 0xffffffff, 0x55aaaa55, 0x5555aaaa, 0xaaaa5555, - 0x0f0ff0f0, 0x0f0f0f0f, 0xff0000ff, 0xff00ff00, - 0x00ffff00, 0x80000000, 0xaaaaaaaa }; constexpr size_t global_work_size = 2000; constexpr size_t local_work_size = 200; - WorkGroupParams test_params(global_work_size, local_work_size, masks); + WorkGroupParams test_params(global_work_size, local_work_size, true); + test_params.save_kernel_source(sub_group_non_uniform_arithmetic_source); RunTestForType rft(device, context, queue, num_elements, test_params); int error = run_functions_add_mul_max_min_for_type(rft); diff --git a/test_conformance/subgroups/test_subgroup_non_uniform_vote.cpp b/test_conformance/subgroups/test_subgroup_non_uniform_vote.cpp index 484e9b6b..f956960b 100644 --- a/test_conformance/subgroups/test_subgroup_non_uniform_vote.cpp +++ b/test_conformance/subgroups/test_subgroup_non_uniform_vote.cpp @@ -28,7 +28,6 @@ template struct VOTE int nw = test_params.local_workgroup_size; int ns = test_params.subgroup_size; int ng = test_params.global_workgroup_size; - uint32_t work_items_mask = test_params.work_items_mask; int nj = (nw + ns - 1) / ns; int non_uniform_size = ng % nw; ng = ng / nw; @@ -40,9 +39,11 @@ template struct VOTE operation_names(operation)); log_info(" test params: global size = %d local size = %d subgroups " - "size = %d work item mask = 0x%x data type (%s)\n", - test_params.global_workgroup_size, nw, ns, work_items_mask, + "size = %d data type (%s)\n", + test_params.global_workgroup_size, nw, ns, TypeManager::name()); + log_info(" work items mask: %s\n", + test_params.work_items_mask.to_string().c_str()); if (non_uniform_size) { log_info(" non uniform work group size mode ON\n"); @@ -99,7 +100,6 @@ template struct VOTE int nw = test_params.local_workgroup_size; int ns = test_params.subgroup_size; int ng = test_params.global_workgroup_size; - uint32_t work_items_mask = test_params.work_items_mask; int nj = (nw + ns - 1) / ns; cl_int tr, rr; int non_uniform_size = ng % nw; @@ -141,8 +141,7 @@ template struct VOTE std::set active_work_items; for (i = 0; i < n; ++i) { - uint32_t check_work_item = 1 << (i % 32); - if (work_items_mask & check_work_item) + if (test_params.work_items_mask.test(i)) { active_work_items.insert(i); switch (operation) @@ -215,46 +214,47 @@ template struct VOTE return TEST_PASS; } }; -static const char *elect_source = R"( - __kernel void test_elect(const __global Type *in, __global int4 *xy, __global Type *out) { - int gid = get_global_id(0); - XY(xy,gid); - uint elect_work_item = 1 << (get_sub_group_local_id() % 32); - if (elect_work_item & WORK_ITEMS_MASK){ - out[gid] = sub_group_elect(); - } - } -)"; - -static const char *non_uniform_any_source = R"( - __kernel void test_non_uniform_any(const __global Type *in, __global int4 *xy, __global Type *out) { - int gid = get_global_id(0); - XY(xy,gid); - uint elect_work_item = 1 << (get_sub_group_local_id() % 32); - if (elect_work_item & WORK_ITEMS_MASK){ - out[gid] = sub_group_non_uniform_any(in[gid]); - } - } -)"; -static const char *non_uniform_all_source = R"( - __kernel void test_non_uniform_all(const __global Type *in, __global int4 *xy, __global Type *out) { +std::string sub_group_elect_source = R"( + __kernel void test_sub_group_elect(const __global Type *in, __global int4 *xy, __global Type *out) { int gid = get_global_id(0); XY(xy,gid); - uint elect_work_item = 1 << (get_sub_group_local_id() % 32); - if (elect_work_item & WORK_ITEMS_MASK){ - out[gid] = sub_group_non_uniform_all(in[gid]); - } + uint subgroup_local_id = get_sub_group_local_id(); + uint elect_work_item = 1 << (subgroup_local_id % 32); + uint work_item_mask; + if(subgroup_local_id < 32) { + work_item_mask = work_item_mask_vector.x; + } else if(subgroup_local_id < 64) { + work_item_mask = work_item_mask_vector.y; + } else if(subgroup_local_id < 96) { + work_item_mask = work_item_mask_vector.w; + } else if(subgroup_local_id < 128) { + work_item_mask = work_item_mask_vector.z; + } + if (elect_work_item & work_item_mask){ + out[gid] = sub_group_elect(); + } } )"; -static const char *non_uniform_all_equal_source = R"( - __kernel void test_non_uniform_all_equal(const __global Type *in, __global int4 *xy, __global Type *out) { +std::string sub_group_non_uniform_any_all_all_equal_source = R"( + __kernel void test_%s(const __global Type *in, __global int4 *xy, __global Type *out) { int gid = get_global_id(0); XY(xy,gid); - uint elect_work_item = 1 << (get_sub_group_local_id() % 32); - if (elect_work_item & WORK_ITEMS_MASK){ - out[gid] = sub_group_non_uniform_all_equal(in[gid]); + uint subgroup_local_id = get_sub_group_local_id(); + uint elect_work_item = 1 << (subgroup_local_id % 32); + uint work_item_mask; + if(subgroup_local_id < 32) { + work_item_mask = work_item_mask_vector.x; + } else if(subgroup_local_id < 64) { + work_item_mask = work_item_mask_vector.y; + } else if(subgroup_local_id < 96) { + work_item_mask = work_item_mask_vector.w; + } else if(subgroup_local_id < 128) { + work_item_mask = work_item_mask_vector.z; + } + if (elect_work_item & work_item_mask){ + out[gid] = %s(in[gid]); } } )"; @@ -262,7 +262,7 @@ static const char *non_uniform_all_equal_source = R"( template int run_vote_all_equal_for_type(RunTestForType rft) { int error = rft.run_impl>( - "test_non_uniform_all_equal", non_uniform_all_equal_source); + "sub_group_non_uniform_all_equal"); return error; } } @@ -278,12 +278,13 @@ int test_subgroup_functions_non_uniform_vote(cl_device_id device, "device, skipping test.\n"); return TEST_SKIPPED_ITSELF; } - std::vector masks{ 0xffffffff, 0x55aaaa55, 0x5555aaaa, 0xaaaa5555, - 0x0f0ff0f0, 0x0f0f0f0f, 0xff0000ff, 0xff00ff00, - 0x00ffff00, 0x80000000 }; + constexpr size_t global_work_size = 170; constexpr size_t local_work_size = 64; - WorkGroupParams test_params(global_work_size, local_work_size, masks); + WorkGroupParams test_params(global_work_size, local_work_size, true); + test_params.save_kernel_source( + sub_group_non_uniform_any_all_all_equal_source); + test_params.save_kernel_source(sub_group_elect_source, "sub_group_elect"); RunTestForType rft(device, context, queue, num_elements, test_params); int error = run_vote_all_equal_for_type(rft); @@ -295,10 +296,10 @@ int test_subgroup_functions_non_uniform_vote(cl_device_id device, error |= run_vote_all_equal_for_type(rft); error |= rft.run_impl>( - "test_non_uniform_all", non_uniform_all_source); + "sub_group_non_uniform_all"); error |= rft.run_impl>( - "test_elect", elect_source); + "sub_group_elect"); error |= rft.run_impl>( - "test_non_uniform_any", non_uniform_any_source); + "sub_group_non_uniform_any"); return error; } diff --git a/test_conformance/subgroups/test_subgroup_shuffle.cpp b/test_conformance/subgroups/test_subgroup_shuffle.cpp index 37b27ced..56231cbf 100644 --- a/test_conformance/subgroups/test_subgroup_shuffle.cpp +++ b/test_conformance/subgroups/test_subgroup_shuffle.cpp @@ -15,38 +15,19 @@ // #include "procs.h" #include "subhelpers.h" +#include "subgroup_common_kernels.h" #include "subgroup_common_templates.h" #include "harness/typeWrappers.h" #include namespace { -static const char* shuffle_xor_source = - "__kernel void test_sub_group_shuffle_xor(const __global Type *in, " - "__global int4 *xy, __global Type *out)\n" - "{\n" - " int gid = get_global_id(0);\n" - " XY(xy,gid);\n" - " Type x = in[gid];\n" - " out[gid] = sub_group_shuffle_xor(x, xy[gid].z);" - "}\n"; - -static const char* shuffle_source = - "__kernel void test_sub_group_shuffle(const __global Type *in, __global " - "int4 *xy, __global Type *out)\n" - "{\n" - " int gid = get_global_id(0);\n" - " XY(xy,gid);\n" - " Type x = in[gid];\n" - " out[gid] = sub_group_shuffle(x, xy[gid].z);" - "}\n"; - template int run_shuffle_for_type(RunTestForType rft) { - int error = rft.run_impl>( - "test_sub_group_shuffle", shuffle_source); + int error = + rft.run_impl>("sub_group_shuffle"); error |= rft.run_impl>( - "test_sub_group_shuffle_xor", shuffle_xor_source); + "sub_group_shuffle_xor"); return error; } @@ -61,9 +42,11 @@ int test_subgroup_functions_shuffle(cl_device_id device, cl_context context, "skipping test.\n"); return TEST_SKIPPED_ITSELF; } + constexpr size_t global_work_size = 2000; constexpr size_t local_work_size = 200; WorkGroupParams test_params(global_work_size, local_work_size); + test_params.save_kernel_source(sub_group_generic_source); RunTestForType rft(device, context, queue, num_elements, test_params); int error = run_shuffle_for_type(rft); diff --git a/test_conformance/subgroups/test_subgroup_shuffle_relative.cpp b/test_conformance/subgroups/test_subgroup_shuffle_relative.cpp index 11401e80..caa1dccc 100644 --- a/test_conformance/subgroups/test_subgroup_shuffle_relative.cpp +++ b/test_conformance/subgroups/test_subgroup_shuffle_relative.cpp @@ -15,37 +15,19 @@ // #include "procs.h" #include "subhelpers.h" +#include "subgroup_common_kernels.h" #include "subgroup_common_templates.h" #include "harness/conversions.h" #include "harness/typeWrappers.h" namespace { -static const char* shuffle_down_source = - "__kernel void test_sub_group_shuffle_down(const __global Type *in, " - "__global int4 *xy, __global Type *out)\n" - "{\n" - " int gid = get_global_id(0);\n" - " XY(xy,gid);\n" - " Type x = in[gid];\n" - " out[gid] = sub_group_shuffle_down(x, xy[gid].z);" - "}\n"; -static const char* shuffle_up_source = - "__kernel void test_sub_group_shuffle_up(const __global Type *in, __global " - "int4 *xy, __global Type *out)\n" - "{\n" - " int gid = get_global_id(0);\n" - " XY(xy,gid);\n" - " Type x = in[gid];\n" - " out[gid] = sub_group_shuffle_up(x, xy[gid].z);" - "}\n"; - template int run_shuffle_relative_for_type(RunTestForType rft) { - int error = rft.run_impl>( - "test_sub_group_shuffle_up", shuffle_up_source); + int error = + rft.run_impl>("sub_group_shuffle_up"); error |= rft.run_impl>( - "test_sub_group_shuffle_down", shuffle_down_source); + "sub_group_shuffle_down"); return error; } @@ -62,9 +44,11 @@ int test_subgroup_functions_shuffle_relative(cl_device_id device, "device, skipping test.\n"); return TEST_SKIPPED_ITSELF; } + constexpr size_t global_work_size = 2000; constexpr size_t local_work_size = 200; WorkGroupParams test_params(global_work_size, local_work_size); + test_params.save_kernel_source(sub_group_generic_source); RunTestForType rft(device, context, queue, num_elements, test_params); int error = run_shuffle_relative_for_type(rft); -- cgit v1.2.3 From 7147d072c7bbed99e429cb8fe3e86139a12ef8bb Mon Sep 17 00:00:00 2001 From: Grzegorz Wawiorko Date: Mon, 4 Oct 2021 15:42:44 +0200 Subject: Remove space character from extension name (#1336) --- test_common/gl/setup_x11.cpp | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/test_common/gl/setup_x11.cpp b/test_common/gl/setup_x11.cpp index c54ecdec..7efda3d2 100644 --- a/test_common/gl/setup_x11.cpp +++ b/test_common/gl/setup_x11.cpp @@ -90,10 +90,17 @@ public: } for (int i=0; i<(int)num_of_devices; i++) { - if (!is_extension_available(devices[i], "cl_khr_gl_sharing ")) { - log_info("Device %d of %d does not support required extension cl_khr_gl_sharing.\n", i+1, num_of_devices); - } else { - log_info("Device %d of %d supports required extension cl_khr_gl_sharing.\n", i+1, num_of_devices); + if (!is_extension_available(devices[i], "cl_khr_gl_sharing")) + { + log_info("Device %d of %d does not support required extension " + "cl_khr_gl_sharing.\n", + i + 1, num_of_devices); + } + else + { + log_info("Device %d of %d supports required extension " + "cl_khr_gl_sharing.\n", + i + 1, num_of_devices); found_valid_device = 1; m_devices[m_device_count++] = devices[i]; } -- cgit v1.2.3 From 410f46f49fcec65d18d30b0df7a1d7ae0a4cd5db Mon Sep 17 00:00:00 2001 From: Stuart Brady Date: Wed, 3 Nov 2021 16:36:36 +0000 Subject: Add testing of sub_group_broadcast for (u)char and (u)short types (#1347) Signed-off-by: Stuart Brady --- test_conformance/subgroups/test_subgroup_extended_types.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/test_conformance/subgroups/test_subgroup_extended_types.cpp b/test_conformance/subgroups/test_subgroup_extended_types.cpp index dbe24623..c9e6bb61 100644 --- a/test_conformance/subgroups/test_subgroup_extended_types.cpp +++ b/test_conformance/subgroups/test_subgroup_extended_types.cpp @@ -108,22 +108,26 @@ int test_subgroup_functions_extended_types(cl_device_id device, error |= run_broadcast_for_extended_type(rft); error |= run_broadcast_for_extended_type(rft); + error |= run_broadcast_for_extended_type(rft); error |= run_broadcast_for_extended_type(rft); error |= run_broadcast_for_extended_type(rft); error |= run_broadcast_for_extended_type(rft); error |= run_broadcast_for_extended_type(rft); error |= run_broadcast_for_extended_type(rft); + error |= run_broadcast_for_extended_type(rft); error |= run_broadcast_for_extended_type(rft); error |= run_broadcast_for_extended_type(rft); error |= run_broadcast_for_extended_type(rft); error |= run_broadcast_for_extended_type(rft); error |= run_broadcast_for_extended_type(rft); + error |= run_broadcast_for_extended_type(rft); error |= run_broadcast_for_extended_type(rft); error |= run_broadcast_for_extended_type(rft); error |= run_broadcast_for_extended_type(rft); error |= run_broadcast_for_extended_type(rft); error |= run_broadcast_for_extended_type(rft); + error |= run_broadcast_for_extended_type(rft); error |= run_broadcast_for_extended_type(rft); error |= run_broadcast_for_extended_type(rft); error |= run_broadcast_for_extended_type(rft); -- cgit v1.2.3 From e9cd9a446e1b36a02f6e8f959256d5f96eda21a4 Mon Sep 17 00:00:00 2001 From: Stuart Brady Date: Mon, 8 Nov 2021 11:00:50 +0000 Subject: Remove excessive logging in subgroup tests (#1343) This also adds some missing data type logging to the subgroup_functions_non_uniform_vote tests. Signed-off-by: Stuart Brady --- .../subgroups/subgroup_common_templates.h | 37 -------------- .../subgroups/test_subgroup_ballot.cpp | 14 ------ .../subgroups/test_subgroup_non_uniform_vote.cpp | 58 ++++++++-------------- 3 files changed, 21 insertions(+), 88 deletions(-) diff --git a/test_conformance/subgroups/subgroup_common_templates.h b/test_conformance/subgroups/subgroup_common_templates.h index 5c5f9560..349f8100 100644 --- a/test_conformance/subgroups/subgroup_common_templates.h +++ b/test_conformance/subgroups/subgroup_common_templates.h @@ -80,7 +80,6 @@ template struct BC TypeManager::name()); if (non_uniform_size) { - log_info(" non uniform work group size mode ON\n"); ng++; } for (k = 0; k < ng; ++k) @@ -581,14 +580,6 @@ template struct SCEX_NU : func_name = "sub_group_scan_exclusive"; log_info(" %s_%s(%s)...\n", func_name.c_str(), operation_names(operation), TypeManager::name()); - log_info(" test params: global size = %d local size = %d subgroups " - "size = %d \n", - test_params.global_workgroup_size, nw, ns); - if (test_params.work_items_mask.any()) - { - log_info(" work items mask: %s\n", - test_params.work_items_mask.to_string().c_str()); - } genrand(x, t, m, ns, nw, ng); } @@ -637,16 +628,10 @@ template struct SCEX_NU } if (active_work_items.empty()) { - log_info(" No acitve workitems in workgroup id = %d " - "subgroup id = %d - no calculation\n", - k, j); continue; } else if (active_work_items.size() == 1) { - log_info(" One active workitem in workgroup id = %d " - "subgroup id = %d - no calculation\n", - k, j); continue; } else @@ -702,14 +687,6 @@ template struct SCIN_NU genrand(x, t, m, ns, nw, ng); log_info(" %s_%s(%s)...\n", func_name.c_str(), operation_names(operation), TypeManager::name()); - log_info(" test params: global size = %d local size = %d subgroups " - "size = %d \n", - test_params.global_workgroup_size, nw, ns); - if (test_params.work_items_mask.any()) - { - log_info(" work items mask: %s\n", - test_params.work_items_mask.to_string().c_str()); - } } static int chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m, @@ -765,9 +742,6 @@ template struct SCIN_NU } if (active_work_items.empty()) { - log_info(" No acitve workitems in workgroup id = %d " - "subgroup id = %d - no calculation\n", - k, j); continue; } else @@ -828,14 +802,6 @@ template struct RED_NU : func_name = "sub_group_reduce"; log_info(" %s_%s(%s)...\n", func_name.c_str(), operation_names(operation), TypeManager::name()); - log_info(" test params: global size = %d local size = %d subgroups " - "size = %d \n", - test_params.global_workgroup_size, nw, ns); - if (test_params.work_items_mask.any()) - { - log_info(" work items mask: %s\n", - test_params.work_items_mask.to_string().c_str()); - } genrand(x, t, m, ns, nw, ng); } @@ -894,9 +860,6 @@ template struct RED_NU if (active_work_items.empty()) { - log_info(" No acitve workitems in workgroup id = %d " - "subgroup id = %d - no calculation\n", - k, j); continue; } diff --git a/test_conformance/subgroups/test_subgroup_ballot.cpp b/test_conformance/subgroups/test_subgroup_ballot.cpp index 2bd54e43..ac90bad7 100644 --- a/test_conformance/subgroups/test_subgroup_ballot.cpp +++ b/test_conformance/subgroups/test_subgroup_ballot.cpp @@ -31,10 +31,6 @@ template struct BALLOT int sbs = test_params.subgroup_size; int non_uniform_size = gws % lws; log_info(" sub_group_ballot...\n"); - if (non_uniform_size) - { - log_info(" non uniform work group size mode ON\n"); - } } static int chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m, @@ -117,11 +113,6 @@ template struct BALLOT_BIT_EXTRACT log_info(" sub_group_%s(%s)...\n", operation_names(operation), TypeManager::name()); - if (non_uniform_size) - { - log_info(" non uniform work group size mode ON\n"); - } - for (wg_id = 0; wg_id < wg_number; ++wg_id) { // for each work_group for (sb_id = 0; sb_id < sb_number; ++sb_id) @@ -275,10 +266,6 @@ template struct BALLOT_INVERSE int sbs = test_params.subgroup_size; int non_uniform_size = gws % lws; log_info(" sub_group_inverse_ballot...\n"); - if (non_uniform_size) - { - log_info(" non uniform work group size mode ON\n"); - } // no work here } @@ -379,7 +366,6 @@ template struct BALLOT_COUNT_SCAN_FIND TypeManager::name()); if (non_uniform_size) { - log_info(" non uniform work group size mode ON\n"); wg_number++; } int e; diff --git a/test_conformance/subgroups/test_subgroup_non_uniform_vote.cpp b/test_conformance/subgroups/test_subgroup_non_uniform_vote.cpp index f956960b..835de25d 100644 --- a/test_conformance/subgroups/test_subgroup_non_uniform_vote.cpp +++ b/test_conformance/subgroups/test_subgroup_non_uniform_vote.cpp @@ -34,20 +34,10 @@ template struct VOTE int last_subgroup_size = 0; ii = 0; - log_info(" sub_group_%s%s... \n", + log_info(" sub_group_%s%s(%s)... \n", (operation == NonUniformVoteOp::elect) ? "" : "non_uniform_", - operation_names(operation)); + operation_names(operation), TypeManager::name()); - log_info(" test params: global size = %d local size = %d subgroups " - "size = %d data type (%s)\n", - test_params.global_workgroup_size, nw, ns, - TypeManager::name()); - log_info(" work items mask: %s\n", - test_params.work_items_mask.to_string().c_str()); - if (non_uniform_size) - { - log_info(" non uniform work group size mode ON\n"); - } if (operation == NonUniformVoteOp::elect) return; for (k = 0; k < ng; ++k) @@ -171,34 +161,28 @@ template struct VOTE } if (active_work_items.empty()) { - log_info(" no one workitem acitve... in workgroup id = %d " - "subgroup id = %d\n", - k, j); + continue; } - else + auto lowest_active = active_work_items.begin(); + for (const int &active_work_item : active_work_items) { - auto lowest_active = active_work_items.begin(); - for (const int &active_work_item : active_work_items) + i = active_work_item; + if (operation == NonUniformVoteOp::elect) { - i = active_work_item; - if (operation == NonUniformVoteOp::elect) - { - i == *lowest_active ? tr = 1 : tr = 0; - } + i == *lowest_active ? tr = 1 : tr = 0; + } - // normalize device values on host, non zero set 1. - rr = compare_ordered(my[ii + i], 0) ? 0 : 1; + // normalize device values on host, non zero set 1. + rr = compare_ordered(my[ii + i], 0) ? 0 : 1; - if (rr != tr) - { - log_error("ERROR: sub_group_%s() \n", - operation_names(operation)); - log_error( - "mismatch for work item %d sub group %d in " - "work group %d. Expected: %d Obtained: %d\n", - i, j, k, tr, rr); - return TEST_FAIL; - } + if (rr != tr) + { + log_error("ERROR: sub_group_%s() \n", + operation_names(operation)); + log_error("mismatch for work item %d sub group %d in " + "work group %d. Expected: %d Obtained: %d\n", + i, j, k, tr, rr); + return TEST_FAIL; } } } @@ -208,9 +192,9 @@ template struct VOTE m += 4 * nw; } - log_info(" sub_group_%s%s... passed\n", + log_info(" sub_group_%s%s(%s)... passed\n", (operation == NonUniformVoteOp::elect) ? "" : "non_uniform_", - operation_names(operation)); + operation_names(operation), TypeManager::name()); return TEST_PASS; } }; -- cgit v1.2.3 From 1116a71ba2994ecf761d2ab853de7de51448500d Mon Sep 17 00:00:00 2001 From: Stuart Brady Date: Tue, 16 Nov 2021 11:27:04 +0000 Subject: Improve error handling in subgroup tests (#1352) * MPGCOMP-14761 Improve error handling in subgroup tests Signed-off-by: Stuart Brady * Add missing newline --- test_common/harness/errorHelpers.h | 1 + .../subgroups/subgroup_common_templates.h | 20 ++++---- test_conformance/subgroups/subhelpers.h | 58 ++++++++++++++-------- test_conformance/subgroups/test_barrier.cpp | 10 ++-- test_conformance/subgroups/test_ifp.cpp | 12 ++--- test_conformance/subgroups/test_subgroup.cpp | 4 +- .../subgroups/test_subgroup_ballot.cpp | 20 ++++---- .../subgroups/test_subgroup_clustered_reduce.cpp | 4 +- .../subgroups/test_subgroup_non_uniform_vote.cpp | 4 +- 9 files changed, 74 insertions(+), 59 deletions(-) diff --git a/test_common/harness/errorHelpers.h b/test_common/harness/errorHelpers.h index d59bc78d..80eb3b58 100644 --- a/test_common/harness/errorHelpers.h +++ b/test_common/harness/errorHelpers.h @@ -62,6 +62,7 @@ static int vlog_win32(const char *format, ...); return TEST_FAIL; \ } #define test_error(errCode, msg) test_error_ret(errCode, msg, errCode) +#define test_error_fail(errCode, msg) test_error_ret(errCode, msg, TEST_FAIL) #define test_error_ret(errCode, msg, retValue) \ { \ auto errCodeResult = errCode; \ diff --git a/test_conformance/subgroups/subgroup_common_templates.h b/test_conformance/subgroups/subgroup_common_templates.h index 349f8100..cfe02c2f 100644 --- a/test_conformance/subgroups/subgroup_common_templates.h +++ b/test_conformance/subgroups/subgroup_common_templates.h @@ -168,8 +168,8 @@ template struct BC } } - static int chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m, - const WorkGroupParams &test_params) + static test_status chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m, + const WorkGroupParams &test_params) { int ii, i, j, k, l, n; int ng = test_params.global_workgroup_size; @@ -499,8 +499,8 @@ template struct SHF } } - static int chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m, - const WorkGroupParams &test_params) + static test_status chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m, + const WorkGroupParams &test_params) { int ii, i, j, k, l, n; int nw = test_params.local_workgroup_size; @@ -583,8 +583,8 @@ template struct SCEX_NU genrand(x, t, m, ns, nw, ng); } - static int chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m, - const WorkGroupParams &test_params) + static test_status chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m, + const WorkGroupParams &test_params) { int ii, i, j, k, n; int nw = test_params.local_workgroup_size; @@ -689,8 +689,8 @@ template struct SCIN_NU operation_names(operation), TypeManager::name()); } - static int chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m, - const WorkGroupParams &test_params) + static test_status chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m, + const WorkGroupParams &test_params) { int ii, i, j, k, n; int nw = test_params.local_workgroup_size; @@ -805,8 +805,8 @@ template struct RED_NU genrand(x, t, m, ns, nw, ng); } - static int chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m, - const WorkGroupParams &test_params) + static test_status chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m, + const WorkGroupParams &test_params) { int ii, i, j, k, n; int nw = test_params.local_workgroup_size; diff --git a/test_conformance/subgroups/subhelpers.h b/test_conformance/subgroups/subhelpers.h index 6d32928a..bd4b6d61 100644 --- a/test_conformance/subgroups/subhelpers.h +++ b/test_conformance/subgroups/subhelpers.h @@ -1375,25 +1375,31 @@ static int run_kernel(cl_context context, cl_command_queue queue, // Driver for testing a single built in function template struct test { - static int mrun(cl_device_id device, cl_context context, - cl_command_queue queue, int num_elements, const char *kname, - const char *src, WorkGroupParams test_params) + static test_status mrun(cl_device_id device, cl_context context, + cl_command_queue queue, int num_elements, + const char *kname, const char *src, + WorkGroupParams test_params) { - int error = TEST_PASS; + test_status combined_error = TEST_SKIPPED_ITSELF; for (auto &mask : test_params.all_work_item_masks) { test_params.work_items_mask = mask; - error |= run(device, context, queue, num_elements, kname, src, - test_params); + test_status error = run(device, context, queue, num_elements, kname, + src, test_params); + + if (error == TEST_FAIL + || (error == TEST_PASS && combined_error != TEST_FAIL)) + combined_error = error; } - return error; + return combined_error; }; - static int run(cl_device_id device, cl_context context, - cl_command_queue queue, int num_elements, const char *kname, - const char *src, WorkGroupParams test_params) + static test_status run(cl_device_id device, cl_context context, + cl_command_queue queue, int num_elements, + const char *kname, const char *src, + WorkGroupParams test_params) { size_t tmp; - int error; + cl_int error; int subgroup_size, num_subgroups; size_t realSize; size_t global = test_params.global_workgroup_size; @@ -1434,7 +1440,7 @@ template struct test if (!TypeManager::type_supported(device)) { log_info("Data type not supported : %s\n", TypeManager::name()); - return 0; + return TEST_SKIPPED_ITSELF; } else { @@ -1450,7 +1456,7 @@ template struct test error = clGetDeviceInfo(device, CL_DEVICE_PLATFORM, sizeof(platform), (void *)&platform, NULL); - test_error(error, "clGetDeviceInfo failed for CL_DEVICE_PLATFORM"); + test_error_fail(error, "clGetDeviceInfo failed for CL_DEVICE_PLATFORM"); if (test_params.use_core_subgroups) { kernel_sstr @@ -1465,12 +1471,12 @@ template struct test error = create_single_kernel_helper(context, &program, &kernel, 1, &kernel_src, kname); - if (error != 0) return error; + if (error != CL_SUCCESS) return TEST_FAIL; // Determine some local dimensions to use for the test. error = get_max_common_work_group_size( context, kernel, test_params.global_workgroup_size, &local); - test_error(error, "get_max_common_work_group_size failed"); + test_error_fail(error, "get_max_common_work_group_size failed"); // Limit it a bit so we have muliple work groups // Ideally this will still be large enough to give us multiple @@ -1543,7 +1549,7 @@ template struct test input_array_size * sizeof(Ty), sgmap.data(), global * sizeof(cl_int4), odata.data(), output_array_size * sizeof(Ty), TSIZE * sizeof(Ty)); - test_error(error, "Running kernel first time failed"); + test_error_fail(error, "Running kernel first time failed"); // Generate the desired input for the kernel @@ -1553,13 +1559,18 @@ template struct test input_array_size * sizeof(Ty), sgmap.data(), global * sizeof(cl_int4), odata.data(), output_array_size * sizeof(Ty), TSIZE * sizeof(Ty)); - test_error(error, "Running kernel second time failed"); + test_error_fail(error, "Running kernel second time failed"); // Check the result - error = Fns::chk(idata.data(), odata.data(), mapin.data(), - mapout.data(), sgmap.data(), test_params); - test_error(error, "Data verification failed"); - return TEST_PASS; + test_status status = Fns::chk(idata.data(), odata.data(), mapin.data(), + mapout.data(), sgmap.data(), test_params); + // Detailed failure and skip messages should be logged by Fns::gen + // and Fns::chk. + if (status == TEST_FAIL) + { + test_fail("Data verification failed\n"); + } + return status; } }; @@ -1625,7 +1636,10 @@ struct RunTestForType test_params_); } - return error; + // If we return TEST_SKIPPED_ITSELF here, then an entire suite may be + // reported as having been skipped even if some tests within it + // passed, as the status codes are erroneously ORed together: + return error == TEST_FAIL ? TEST_FAIL : TEST_PASS; } private: diff --git a/test_conformance/subgroups/test_barrier.cpp b/test_conformance/subgroups/test_barrier.cpp index 47e42f65..b570e922 100644 --- a/test_conformance/subgroups/test_barrier.cpp +++ b/test_conformance/subgroups/test_barrier.cpp @@ -92,8 +92,8 @@ template struct BAR } } - static int chk(cl_int *x, cl_int *y, cl_int *mx, cl_int *my, cl_int *m, - const WorkGroupParams &test_params) + static test_status chk(cl_int *x, cl_int *y, cl_int *mx, cl_int *my, + cl_int *m, const WorkGroupParams &test_params) { int ii, i, j, k, n; int nw = test_params.local_workgroup_size; @@ -133,7 +133,7 @@ template struct BAR "id %d in sub group %d in group %d expected " "%d got %d\n", i, j, k, tr, rr); - return -1; + return TEST_FAIL; } } } @@ -143,7 +143,7 @@ template struct BAR m += 2 * nw; } - return 0; + return TEST_PASS; } }; @@ -187,4 +187,4 @@ int test_barrier_functions_ext(cl_device_id device, cl_context context, } return test_barrier_functions(device, context, queue, num_elements, false); -} \ No newline at end of file +} diff --git a/test_conformance/subgroups/test_ifp.cpp b/test_conformance/subgroups/test_ifp.cpp index fccaa8c7..f6c5227d 100644 --- a/test_conformance/subgroups/test_ifp.cpp +++ b/test_conformance/subgroups/test_ifp.cpp @@ -245,8 +245,8 @@ struct IFP } } - static int chk(cl_int *x, cl_int *y, cl_int *t, cl_int *, cl_int *, - const WorkGroupParams &test_params) + static test_status chk(cl_int *x, cl_int *y, cl_int *t, cl_int *, cl_int *, + const WorkGroupParams &test_params) { int i, k; int nw = test_params.local_workgroup_size; @@ -255,8 +255,8 @@ struct IFP int nj = (nw + ns - 1) / ns; ng = ng / nw; - // We need at least 2 sub groups per group for this tes - if (nj == 1) return 0; + // We need at least 2 sub groups per group for this test + if (nj == 1) return TEST_SKIPPED_ITSELF; log_info(" independent forward progress...\n"); @@ -270,14 +270,14 @@ struct IFP log_error( "ERROR: mismatch at element %d in work group %d\n", i, k); - return -1; + return TEST_FAIL; } } x += nj * (NUM_LOC + 1); y += NUM_LOC; } - return 0; + return TEST_PASS; } }; diff --git a/test_conformance/subgroups/test_subgroup.cpp b/test_conformance/subgroups/test_subgroup.cpp index 63bfc453..eefca5f8 100644 --- a/test_conformance/subgroups/test_subgroup.cpp +++ b/test_conformance/subgroups/test_subgroup.cpp @@ -68,8 +68,8 @@ template struct AA } } - static int chk(cl_int *x, cl_int *y, cl_int *mx, cl_int *my, cl_int *m, - const WorkGroupParams &test_params) + static test_status chk(cl_int *x, cl_int *y, cl_int *mx, cl_int *my, + cl_int *m, const WorkGroupParams &test_params) { int ii, i, j, k, n; int ng = test_params.global_workgroup_size; diff --git a/test_conformance/subgroups/test_subgroup_ballot.cpp b/test_conformance/subgroups/test_subgroup_ballot.cpp index ac90bad7..0228e82c 100644 --- a/test_conformance/subgroups/test_subgroup_ballot.cpp +++ b/test_conformance/subgroups/test_subgroup_ballot.cpp @@ -33,8 +33,8 @@ template struct BALLOT log_info(" sub_group_ballot...\n"); } - static int chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m, - const WorkGroupParams &test_params) + static test_status chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m, + const WorkGroupParams &test_params) { int wi_id, wg_id, sb_id; int gws = test_params.global_workgroup_size; @@ -146,8 +146,8 @@ template struct BALLOT_BIT_EXTRACT } } - static int chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m, - const WorkGroupParams &test_params) + static test_status chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m, + const WorkGroupParams &test_params) { int wi_id, wg_id, l, sb_id; int gws = test_params.global_workgroup_size; @@ -269,8 +269,8 @@ template struct BALLOT_INVERSE // no work here } - static int chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m, - const WorkGroupParams &test_params) + static test_status chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m, + const WorkGroupParams &test_params) { int wi_id, wg_id, sb_id; int gws = test_params.global_workgroup_size; @@ -444,8 +444,8 @@ template struct BALLOT_COUNT_SCAN_FIND return mask; } - static int chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m, - const WorkGroupParams &test_params) + static test_status chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m, + const WorkGroupParams &test_params) { int wi_id, wg_id, sb_id; int gws = test_params.global_workgroup_size; @@ -617,8 +617,8 @@ template struct SMASK } } - static int chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m, - const WorkGroupParams &test_params) + static test_status chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m, + const WorkGroupParams &test_params) { int wi_id, wg_id, sb_id; int gws = test_params.global_workgroup_size; diff --git a/test_conformance/subgroups/test_subgroup_clustered_reduce.cpp b/test_conformance/subgroups/test_subgroup_clustered_reduce.cpp index 11fcebc4..ad9e1ff2 100644 --- a/test_conformance/subgroups/test_subgroup_clustered_reduce.cpp +++ b/test_conformance/subgroups/test_subgroup_clustered_reduce.cpp @@ -50,8 +50,8 @@ template struct RED_CLU genrand(x, t, m, ns, nw, ng); } - static int chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m, - const WorkGroupParams &test_params) + static test_status chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m, + const WorkGroupParams &test_params) { int nw = test_params.local_workgroup_size; int ns = test_params.subgroup_size; diff --git a/test_conformance/subgroups/test_subgroup_non_uniform_vote.cpp b/test_conformance/subgroups/test_subgroup_non_uniform_vote.cpp index 835de25d..b21a9f7e 100644 --- a/test_conformance/subgroups/test_subgroup_non_uniform_vote.cpp +++ b/test_conformance/subgroups/test_subgroup_non_uniform_vote.cpp @@ -83,8 +83,8 @@ template struct VOTE } } - static int chk(T *x, T *y, T *mx, T *my, cl_int *m, - const WorkGroupParams &test_params) + static test_status chk(T *x, T *y, T *mx, T *my, cl_int *m, + const WorkGroupParams &test_params) { int ii, i, j, k, n; int nw = test_params.local_workgroup_size; -- cgit v1.2.3 From 1c6dbc23e74afeb5dcfdf2de2d69734c6b02a845 Mon Sep 17 00:00:00 2001 From: Stuart Brady Date: Tue, 16 Nov 2021 14:03:06 +0000 Subject: Clean up logging in cl_khr_subgroup_ballot tests (#1351) The tests were logging scalar results as vectors padded with zeroes for no apparent benefit. Fix this. Signed-off-by: Stuart Brady --- .../subgroups/test_subgroup_ballot.cpp | 47 +++++++++------------- 1 file changed, 19 insertions(+), 28 deletions(-) diff --git a/test_conformance/subgroups/test_subgroup_ballot.cpp b/test_conformance/subgroups/test_subgroup_ballot.cpp index 0228e82c..ee2c5e51 100644 --- a/test_conformance/subgroups/test_subgroup_ballot.cpp +++ b/test_conformance/subgroups/test_subgroup_ballot.cpp @@ -81,8 +81,8 @@ template struct BALLOT { log_error( "ERROR: sub_group_ballot mismatch for local id " - "%d in sub group %d in group %d obtained {%d}, " - "expected {%d} \n", + "%d in sub group %d in group %d obtained %d, " + "expected %d\n", wi_id, sb_id, wg_id, device_result, expected_result); return TEST_FAIL; @@ -455,7 +455,7 @@ template struct BALLOT_COUNT_SCAN_FIND int non_uniform_size = gws % lws; int wg_number = gws / lws; wg_number = non_uniform_size ? wg_number + 1 : wg_number; - cl_uint4 expected_result, device_result; + cl_uint expected_result, device_result; int last_subgroup_size = 0; int current_sbs = 0; @@ -487,7 +487,7 @@ template struct BALLOT_COUNT_SCAN_FIND current_sbs = wg_offset + sbs > lws ? lws - wg_offset : sbs; } // Check result - expected_result = { 0, 0, 0, 0 }; + expected_result = 0; for (wi_id = 0; wi_id < current_sbs; ++wi_id) { // for subgroup element bs128 bs; @@ -497,23 +497,20 @@ template struct BALLOT_COUNT_SCAN_FIND | (bs128(mx[wg_offset + wi_id].s2) << 64) | (bs128(mx[wg_offset + wi_id].s3) << 96); bs &= getImportantBits(wi_id, current_sbs); - device_result = my[wg_offset + wi_id]; + device_result = my[wg_offset + wi_id].s0; if (operation == BallotOp::ballot_inclusive_scan || operation == BallotOp::ballot_exclusive_scan || operation == BallotOp::ballot_bit_count) { - expected_result.s0 = bs.count(); + expected_result = bs.count(); if (!compare(device_result, expected_result)) { log_error("ERROR: sub_group_%s " "mismatch for local id %d in sub group " - "%d in group %d obtained {%d, %d, %d, " - "%d}, expected {%d, %d, %d, %d}\n", + "%d in group %d obtained %d, " + "expected %d\n", operation_names(operation), wi_id, sb_id, - wg_id, device_result.s0, device_result.s1, - device_result.s2, device_result.s3, - expected_result.s0, expected_result.s1, - expected_result.s2, expected_result.s3); + wg_id, device_result, expected_result); return TEST_FAIL; } } @@ -523,7 +520,7 @@ template struct BALLOT_COUNT_SCAN_FIND { if (bs.test(id)) { - expected_result.s0 = id; + expected_result = id; break; } } @@ -531,13 +528,10 @@ template struct BALLOT_COUNT_SCAN_FIND { log_error("ERROR: sub_group_ballot_find_lsb " "mismatch for local id %d in sub group " - "%d in group %d obtained {%d, %d, %d, " - "%d}, expected {%d, %d, %d, %d}\n", - wi_id, sb_id, wg_id, device_result.s0, - device_result.s1, device_result.s2, - device_result.s3, expected_result.s0, - expected_result.s1, expected_result.s2, - expected_result.s3); + "%d in group %d obtained %d, " + "expected %d\n", + wi_id, sb_id, wg_id, device_result, + expected_result); return TEST_FAIL; } } @@ -547,7 +541,7 @@ template struct BALLOT_COUNT_SCAN_FIND { if (bs.test(id)) { - expected_result.s0 = id; + expected_result = id; break; } } @@ -555,13 +549,10 @@ template struct BALLOT_COUNT_SCAN_FIND { log_error("ERROR: sub_group_ballot_find_msb " "mismatch for local id %d in sub group " - "%d in group %d obtained {%d, %d, %d, " - "%d}, expected {%d, %d, %d, %d}\n", - wi_id, sb_id, wg_id, device_result.s0, - device_result.s1, device_result.s2, - device_result.s3, expected_result.s0, - expected_result.s1, expected_result.s2, - expected_result.s3); + "%d in group %d obtained %d, " + "expected %d\n", + wi_id, sb_id, wg_id, device_result, + expected_result); return TEST_FAIL; } } -- cgit v1.2.3 From 3cd906aa9b8b96ae0651269c47d6b8cc475c62f5 Mon Sep 17 00:00:00 2001 From: marcat03 <94451804+marcat03@users.noreply.github.com> Date: Tue, 16 Nov 2021 16:07:43 +0000 Subject: Fix missing cl_khr_semaphore extensions in compiler tests (#1357) * Added missing extensions related to cl_khr_semaphore Signed-off-by: Marco Cattani --- test_conformance/compiler/test_compiler_defines_for_extensions.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/test_conformance/compiler/test_compiler_defines_for_extensions.cpp b/test_conformance/compiler/test_compiler_defines_for_extensions.cpp index de30e06b..2f29d39b 100644 --- a/test_conformance/compiler/test_compiler_defines_for_extensions.cpp +++ b/test_conformance/compiler/test_compiler_defines_for_extensions.cpp @@ -75,6 +75,9 @@ const char *known_extensions[] = { "cl_khr_pci_bus_info", "cl_khr_suggested_local_work_size", "cl_khr_spirv_linkonce_odr", + "cl_khr_semaphore", + "cl_khr_external_semaphore", + "cl_khr_external_semaphore_sync_fd", }; size_t num_known_extensions = sizeof(known_extensions) / sizeof(char *); -- cgit v1.2.3 From c25709f3964f1675a03c1a4f1315a09a4386c0bc Mon Sep 17 00:00:00 2001 From: James Price Date: Tue, 23 Nov 2021 14:04:02 -0500 Subject: Fix stack-use-after-scope crash in conversions (#1358) The way that program sources were being constructed involved capturing pointers to strings that were allocated on the stack, and then trying to use them outside of that scope. This change uses a stringstream defined in the outer scope to build the program instead. --- test_conformance/conversions/test_conversions.cpp | 116 ++++++++++------------ 1 file changed, 54 insertions(+), 62 deletions(-) diff --git a/test_conformance/conversions/test_conversions.cpp b/test_conformance/conversions/test_conversions.cpp index e8e572e6..d489e28a 100644 --- a/test_conformance/conversions/test_conversions.cpp +++ b/test_conformance/conversions/test_conversions.cpp @@ -38,6 +38,7 @@ #include #endif +#include #include #include #include @@ -1559,84 +1560,40 @@ static cl_program MakeProgram( Type outType, Type inType, SaturationMode sat, cl_program program; char testName[256]; int error = 0; - const char **strings; - size_t stringCount = 0; + + std::ostringstream source; + if (outType == kdouble || inType == kdouble) + source << "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"; // Create the program. This is a bit complicated because we are trying to avoid byte and short stores. if (0 == vectorSize) { + // Create the type names. char inName[32]; char outName[32]; - const char *programSource[] = - { - "", // optional pragma - "__kernel void ", testName, "( __global ", inName, " *src, __global ", outName, " *dest )\n" - "{\n" - " size_t i = get_global_id(0);\n" - " dest[i] = src[i];\n" - "}\n" - }; - stringCount = sizeof(programSource) / sizeof(programSource[0]); - strings = programSource; - - if (outType == kdouble || inType == kdouble) - programSource[0] = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"; - - //create the type name strncpy(inName, gTypeNames[inType], sizeof(inName)); strncpy(outName, gTypeNames[outType], sizeof(outName)); sprintf(testName, "test_implicit_%s_%s", outName, inName); - vlog("Building implicit %s -> %s conversion test\n", gTypeNames[inType], gTypeNames[outType]); + + source << "__kernel void " << testName << "( __global " << inName + << " *src, __global " << outName << " *dest )\n"; + source << "{\n"; + source << " size_t i = get_global_id(0);\n"; + source << " dest[i] = src[i];\n"; + source << "}\n"; + + vlog("Building implicit %s -> %s conversion test\n", gTypeNames[inType], + gTypeNames[outType]); fflush(stdout); } else { int vectorSizetmp = vectorSizes[vectorSize]; + // Create the type names. char convertString[128]; char inName[32]; char outName[32]; - const char *programSource[] = - { - "", // optional pragma - "__kernel void ", testName, "( __global ", inName, " *src, __global ", outName, " *dest )\n" - "{\n" - " size_t i = get_global_id(0);\n" - " dest[i] = ", convertString, "( src[i] );\n" - "}\n" - }; - const char *programSourceV3[] = - { - "", // optional pragma - "__kernel void ", testName, "( __global ", inName, " *src, __global ", outName, " *dest )\n" - "{\n" - " size_t i = get_global_id(0);\n" - " if( i + 1 < get_global_size(0))\n" - " vstore3( ", convertString, "( vload3( i, src)), i, dest );\n" - " else\n" - " {\n" - " ", inName, "3 in;\n" - " ", outName, "3 out;\n" - " if( 0 == (i & 1) )\n" - " in.y = src[3*i+1];\n" - " in.x = src[3*i];\n" - " out = ", convertString, "( in ); \n" - " dest[3*i] = out.x;\n" - " if( 0 == (i & 1) )\n" - " dest[3*i+1] = out.y;\n" - " }\n" - "}\n" - }; - stringCount = 3 == vectorSizetmp ? sizeof(programSourceV3) / sizeof(programSourceV3[0]) : - sizeof(programSource) / sizeof(programSource[0]); - strings = 3 == vectorSizetmp ? programSourceV3 : programSource; - - if (outType == kdouble || inType == kdouble) { - programSource[0] = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"; - programSourceV3[0] = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"; - } - - //create the type name switch (vectorSizetmp) { case 1: @@ -1661,8 +1618,40 @@ static cl_program MakeProgram( Type outType, Type inType, SaturationMode sat, vlog("Building %s( %s ) test\n", convertString, inName); break; } - fflush(stdout); + + if (vectorSizetmp == 3) + { + source << "__kernel void " << testName << "( __global " << inName + << " *src, __global " << outName << " *dest )\n"; + source << "{\n"; + source << " size_t i = get_global_id(0);\n"; + source << " if( i + 1 < get_global_size(0))\n"; + source << " vstore3( " << convertString + << "( vload3( i, src)), i, dest );\n"; + source << " else\n"; + source << " {\n"; + source << " " << inName << "3 in;\n"; + source << " " << outName << "3 out;\n"; + source << " if( 0 == (i & 1) )\n"; + source << " in.y = src[3*i+1];\n"; + source << " in.x = src[3*i];\n"; + source << " out = " << convertString << "( in ); \n"; + source << " dest[3*i] = out.x;\n"; + source << " if( 0 == (i & 1) )\n"; + source << " dest[3*i+1] = out.y;\n"; + source << " }\n"; + source << "}\n"; + } + else + { + source << "__kernel void " << testName << "( __global " << inName + << " *src, __global " << outName << " *dest )\n"; + source << "{\n"; + source << " size_t i = get_global_id(0);\n"; + source << " dest[i] = " << convertString << "( src[i] );\n"; + source << "}\n"; + } } *outKernel = NULL; @@ -1671,7 +1660,10 @@ static cl_program MakeProgram( Type outType, Type inType, SaturationMode sat, flags = "-cl-denorms-are-zero"; // build it - error = create_single_kernel_helper(gContext, &program, outKernel, (cl_uint)stringCount, strings, testName, flags); + std::string sourceString = source.str(); + const char *programSource = sourceString.c_str(); + error = create_single_kernel_helper(gContext, &program, outKernel, 1, + &programSource, testName, flags); if (error) { char buffer[2048] = ""; -- cgit v1.2.3 From 3eb0f50d85df0350af29f5f1dbbf5a3ddef906b3 Mon Sep 17 00:00:00 2001 From: Stuart Brady Date: Thu, 25 Nov 2021 13:36:20 +0000 Subject: Use maximum subgroup size in sub_group_ballot tests (#1344) sub_group_ballot_bit_count() and sub_group_ballot_find_msb() mask their input according to a subgroup size, which is assumed to be the maximum subgroup size, and not the actual subgroup size excluding non-existent work-items in the "remainder" subgroup. Fix this as per the the clarification made to the OpenCL C specification in revision 3.0.9 for issue KhronosGroup/OpenCL-Docs#626 by pull request KhronosGroup/OpenCL-Docs#689. Signed-off-by: Stuart Brady --- test_conformance/subgroups/test_subgroup_ballot.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test_conformance/subgroups/test_subgroup_ballot.cpp b/test_conformance/subgroups/test_subgroup_ballot.cpp index ee2c5e51..f362a501 100644 --- a/test_conformance/subgroups/test_subgroup_ballot.cpp +++ b/test_conformance/subgroups/test_subgroup_ballot.cpp @@ -496,7 +496,7 @@ template struct BALLOT_COUNT_SCAN_FIND | (bs128(mx[wg_offset + wi_id].s1) << 32) | (bs128(mx[wg_offset + wi_id].s2) << 64) | (bs128(mx[wg_offset + wi_id].s3) << 96); - bs &= getImportantBits(wi_id, current_sbs); + bs &= getImportantBits(wi_id, sbs); device_result = my[wg_offset + wi_id].s0; if (operation == BallotOp::ballot_inclusive_scan || operation == BallotOp::ballot_exclusive_scan @@ -516,7 +516,7 @@ template struct BALLOT_COUNT_SCAN_FIND } else if (operation == BallotOp::ballot_find_lsb) { - for (int id = 0; id < current_sbs; ++id) + for (int id = 0; id < sbs; ++id) { if (bs.test(id)) { @@ -537,7 +537,7 @@ template struct BALLOT_COUNT_SCAN_FIND } else if (operation == BallotOp::ballot_find_msb) { - for (int id = current_sbs - 1; id >= 0; --id) + for (int id = sbs - 1; id >= 0; --id) { if (bs.test(id)) { -- cgit v1.2.3 From 6dff4fdffadff59c42083bd2f685598613c30091 Mon Sep 17 00:00:00 2001 From: BKoscielak Date: Thu, 25 Nov 2021 14:40:19 +0100 Subject: Fix conversion data loss in test_api min_max_constant_args (#1355) --- test_conformance/api/test_api_min_max.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test_conformance/api/test_api_min_max.cpp b/test_conformance/api/test_api_min_max.cpp index 9e981cd3..8d132fe6 100644 --- a/test_conformance/api/test_api_min_max.cpp +++ b/test_conformance/api/test_api_min_max.cpp @@ -1489,7 +1489,7 @@ int test_min_max_constant_args(cl_device_id deviceID, cl_context context, cl_com error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, sizeof( maxSize ), &maxSize, 0 ); test_error( error, "Unable to get max constant buffer size" ); - individualBufferSize = ((int)maxSize/2)/maxArgs; + individualBufferSize = (maxSize / 2) / maxArgs; log_info("Reported max constant arg count of %d and max constant buffer size of %d. Test will attempt to allocate half of that, or %d buffers of size %d.\n", (int)maxArgs, (int)maxSize, (int)maxArgs, (int)individualBufferSize); -- cgit v1.2.3 From 6f50623ba867ee5a847464e15937b1a9bda3506c Mon Sep 17 00:00:00 2001 From: Grzegorz Wawiorko Date: Thu, 25 Nov 2021 14:41:06 +0100 Subject: Subgroups tests - sub_group_non_uniform_scan_exclusive function fixes (#1350) * Fix - comparing results will never happen. * No special action needed for one work item in the subgroup --- test_conformance/subgroups/subgroup_common_templates.h | 8 -------- 1 file changed, 8 deletions(-) diff --git a/test_conformance/subgroups/subgroup_common_templates.h b/test_conformance/subgroups/subgroup_common_templates.h index cfe02c2f..64b4b971 100644 --- a/test_conformance/subgroups/subgroup_common_templates.h +++ b/test_conformance/subgroups/subgroup_common_templates.h @@ -630,19 +630,12 @@ template struct SCEX_NU { continue; } - else if (active_work_items.size() == 1) - { - continue; - } else { tr = TypeManager::identify_limits(operation); - int idx = 0; for (const int &active_work_item : active_work_items) { rr = my[ii + active_work_item]; - if (idx == 0) continue; - if (!compare_ordered(rr, tr)) { log_error( @@ -655,7 +648,6 @@ template struct SCEX_NU } tr = calculate(tr, mx[ii + active_work_item], operation); - idx++; } } } -- cgit v1.2.3 From 7625011b666c1a7c1fee5818309e9ed3d658a899 Mon Sep 17 00:00:00 2001 From: Stuart Brady Date: Fri, 26 Nov 2021 15:30:23 +0000 Subject: Remove unused inclusion of (#1362) Signed-off-by: Stuart Brady --- test_conformance/subgroups/test_subgroup_non_uniform_arithmetic.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/test_conformance/subgroups/test_subgroup_non_uniform_arithmetic.cpp b/test_conformance/subgroups/test_subgroup_non_uniform_arithmetic.cpp index bb257bcd..5ab45222 100644 --- a/test_conformance/subgroups/test_subgroup_non_uniform_arithmetic.cpp +++ b/test_conformance/subgroups/test_subgroup_non_uniform_arithmetic.cpp @@ -17,7 +17,6 @@ #include "subhelpers.h" #include "harness/typeWrappers.h" #include "subgroup_common_templates.h" -#include namespace { -- cgit v1.2.3 From f8ec235d3c1555fbfaa7eea6bf5f3b588de1b03f Mon Sep 17 00:00:00 2001 From: Stuart Brady Date: Thu, 2 Dec 2021 15:27:30 +0000 Subject: Tidy up code to determine bit mask for ballot scans (#1363) It seems more intuitive to set only the bits that are required, rather than to set one more bit than is required, only to clear it again. Signed-off-by: Stuart Brady --- test_conformance/subgroups/test_subgroup_ballot.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test_conformance/subgroups/test_subgroup_ballot.cpp b/test_conformance/subgroups/test_subgroup_ballot.cpp index f362a501..e742aa3b 100644 --- a/test_conformance/subgroups/test_subgroup_ballot.cpp +++ b/test_conformance/subgroups/test_subgroup_ballot.cpp @@ -437,9 +437,9 @@ template struct BALLOT_COUNT_SCAN_FIND else if (operation == BallotOp::ballot_inclusive_scan || operation == BallotOp::ballot_exclusive_scan) { - for (cl_uint i = 0; i <= sub_group_local_id; ++i) mask.set(i); - if (operation == BallotOp::ballot_exclusive_scan) - mask.reset(sub_group_local_id); + for (cl_uint i = 0; i < sub_group_local_id; ++i) mask.set(i); + if (operation == BallotOp::ballot_inclusive_scan) + mask.set(sub_group_local_id); } return mask; } -- cgit v1.2.3 From e106be14f9d21a13d485c8256da6cccb933850cd Mon Sep 17 00:00:00 2001 From: Grzegorz Wawiorko Date: Sat, 4 Dec 2021 18:55:17 +0100 Subject: Test api min max - fix printing cl_ulong data type (#1212) * test api - fix code formatting only * Fix printing cl_ulong type to avoid overloading. * Fix printing size_t data type * Fix printing size_t data type - set unsinged * Fix formatting for maxArgs (uint) and numberOfInts (size_t) --- test_conformance/api/test_api_min_max.cpp | 1746 ++++++++++++++++++----------- 1 file changed, 1087 insertions(+), 659 deletions(-) diff --git a/test_conformance/api/test_api_min_max.cpp b/test_conformance/api/test_api_min_max.cpp index 8d132fe6..28ca8237 100644 --- a/test_conformance/api/test_api_min_max.cpp +++ b/test_conformance/api/test_api_min_max.cpp @@ -1,6 +1,6 @@ // // Copyright (c) 2017 The Khronos Group Inc. -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -24,7 +24,8 @@ const char *sample_single_param_kernel[] = { "{\n" " int tid = get_global_id(0);\n" "\n" - "}\n" }; + "}\n" +}; const char *sample_single_param_write_kernel[] = { "__kernel void sample_test(__global int *src)\n" @@ -32,23 +33,29 @@ const char *sample_single_param_write_kernel[] = { " int tid = get_global_id(0);\n" " src[tid] = tid;\n" "\n" - "}\n" }; + "}\n" +}; const char *sample_read_image_kernel_pattern[] = { - "__kernel void sample_test( __global float *result, ", " )\n" + "__kernel void sample_test( __global float *result, ", + " )\n" "{\n" - " sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST;\n" + " sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP | " + "CLK_FILTER_NEAREST;\n" " int tid = get_global_id(0);\n" " result[0] = 0.0f;\n", "\n" - "}\n" }; + "}\n" +}; const char *sample_write_image_kernel_pattern[] = { - "__kernel void sample_test( ", " )\n" + "__kernel void sample_test( ", + " )\n" "{\n" " int tid = get_global_id(0);\n", "\n" - "}\n" }; + "}\n" +}; const char *sample_large_parmam_kernel_pattern[] = { @@ -57,7 +64,8 @@ const char *sample_large_parmam_kernel_pattern[] = { "result[0] = 0;\n" "%s" "\n" - "}\n" }; + "}\n" +}; const char *sample_large_int_parmam_kernel_pattern[] = { "__kernel void sample_test(%s, __global int *result)\n" @@ -65,15 +73,19 @@ const char *sample_large_int_parmam_kernel_pattern[] = { "result[0] = 0;\n" "%s" "\n" - "}\n" }; + "}\n" +}; const char *sample_sampler_kernel_pattern[] = { - "__kernel void sample_test( read_only image2d_t src, __global int4 *dst", ", sampler_t sampler%d", ")\n" + "__kernel void sample_test( read_only image2d_t src, __global int4 *dst", + ", sampler_t sampler%d", + ")\n" "{\n" " int tid = get_global_id(0);\n", " dst[ 0 ] = read_imagei( src, sampler%d, (int2)( 0, 0 ) );\n", "\n" - "}\n" }; + "}\n" +}; const char *sample_const_arg_kernel[] = { "__kernel void sample_test(__constant int *src1, __global int *dst)\n" @@ -82,10 +94,12 @@ const char *sample_const_arg_kernel[] = { "\n" " dst[tid] = src1[tid];\n" "\n" - "}\n" }; + "}\n" +}; const char *sample_local_arg_kernel[] = { - "__kernel void sample_test(__local int *src1, __global int *global_src, __global int *dst)\n" + "__kernel void sample_test(__local int *src1, __global int *global_src, " + "__global int *dst)\n" "{\n" " int tid = get_global_id(0);\n" "\n" @@ -93,19 +107,21 @@ const char *sample_local_arg_kernel[] = { " barrier(CLK_GLOBAL_MEM_FENCE);\n" " dst[tid] = src1[tid];\n" "\n" - "}\n" }; + "}\n" +}; const char *sample_const_max_arg_kernel_pattern = -"__kernel void sample_test(__constant int *src1 %s, __global int *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" dst[tid] = src1[tid];\n" -"%s" -"\n" -"}\n"; - -int test_min_max_thread_dimensions(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) + "__kernel void sample_test(__constant int *src1 %s, __global int *dst)\n" + "{\n" + " int tid = get_global_id(0);\n" + "\n" + " dst[tid] = src1[tid];\n" + "%s" + "\n" + "}\n"; + +int test_min_max_thread_dimensions(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { int error, retVal; unsigned int maxThreadDim, threadDim, i; @@ -118,19 +134,24 @@ int test_min_max_thread_dimensions(cl_device_id deviceID, cl_context context, cl /* Get the max thread dimensions */ - error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof( maxThreadDim ), &maxThreadDim, NULL ); - test_error( error, "Unable to get max work item dimensions from device" ); + error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, + sizeof(maxThreadDim), &maxThreadDim, NULL); + test_error(error, "Unable to get max work item dimensions from device"); - if( maxThreadDim < 3 ) + if (maxThreadDim < 3) { - log_error( "ERROR: Reported max work item dimensions is less than required! (%d)\n", maxThreadDim ); + log_error("ERROR: Reported max work item dimensions is less than " + "required! (%d)\n", + maxThreadDim); return -1; } log_info("Reported max thread dimensions of %d.\n", maxThreadDim); /* Create a kernel to test with */ - if( create_single_kernel_helper( context, &program, &kernel, 1, sample_single_param_kernel, "sample_test" ) != 0 ) + if (create_single_kernel_helper(context, &program, &kernel, 1, + sample_single_param_kernel, "sample_test") + != 0) { return -1; } @@ -138,105 +159,122 @@ int test_min_max_thread_dimensions(cl_device_id deviceID, cl_context context, cl /* Create some I/O streams */ streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_int) * 100, NULL, &error); - if( streams[0] == NULL ) + if (streams[0] == NULL) { log_error("ERROR: Creating test array failed!\n"); return -1; } /* Set the arguments */ - error = clSetKernelArg( kernel, 0, sizeof( streams[0] ), &streams[0] ); - test_error( error, "Unable to set kernel arguments" ); + error = clSetKernelArg(kernel, 0, sizeof(streams[0]), &streams[0]); + test_error(error, "Unable to set kernel arguments"); retVal = 0; /* Now try running the kernel with up to that many threads */ - for (threadDim=1; threadDim <= maxThreadDim; threadDim++) + for (threadDim = 1; threadDim <= maxThreadDim; threadDim++) { - threads = (size_t *)malloc( sizeof( size_t ) * maxThreadDim ); - localThreads = (size_t *)malloc( sizeof( size_t ) * maxThreadDim ); - for( i = 0; i < maxThreadDim; i++ ) + threads = (size_t *)malloc(sizeof(size_t) * maxThreadDim); + localThreads = (size_t *)malloc(sizeof(size_t) * maxThreadDim); + for (i = 0; i < maxThreadDim; i++) { - threads[ i ] = 1; + threads[i] = 1; localThreads[i] = 1; } - error = clEnqueueNDRangeKernel( queue, kernel, maxThreadDim, NULL, threads, localThreads, 0, NULL, &event ); - test_error( error, "Failed clEnqueueNDRangeKernel"); + error = clEnqueueNDRangeKernel(queue, kernel, maxThreadDim, NULL, + threads, localThreads, 0, NULL, &event); + test_error(error, "Failed clEnqueueNDRangeKernel"); // Verify that the event does not return an error from the execution error = clWaitForEvents(1, &event); - test_error( error, "clWaitForEvent failed"); - error = clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(event_status), &event_status, NULL); - test_error( error, "clGetEventInfo for CL_EVENT_COMMAND_EXECUTION_STATUS failed"); + test_error(error, "clWaitForEvent failed"); + error = clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS, + sizeof(event_status), &event_status, NULL); + test_error( + error, + "clGetEventInfo for CL_EVENT_COMMAND_EXECUTION_STATUS failed"); clReleaseEvent(event); if (event_status < 0) test_error(error, "Kernel execution event returned error"); /* All done */ - free( threads ); - free( localThreads ); + free(threads); + free(localThreads); } return retVal; } -int test_min_max_work_items_sizes(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_min_max_work_items_sizes(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { int error; size_t *deviceMaxWorkItemSize; unsigned int maxWorkItemDim; /* Get the max work item dimensions */ - error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof( maxWorkItemDim ), &maxWorkItemDim, NULL ); - test_error( error, "Unable to get max work item dimensions from device" ); - - log_info("CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS returned %d\n", maxWorkItemDim); - deviceMaxWorkItemSize = (size_t*)malloc(sizeof(size_t)*maxWorkItemDim); - error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t)*maxWorkItemDim, deviceMaxWorkItemSize, NULL ); - test_error( error, "clDeviceInfo for CL_DEVICE_MAX_WORK_ITEM_SIZES failed" ); + error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, + sizeof(maxWorkItemDim), &maxWorkItemDim, NULL); + test_error(error, "Unable to get max work item dimensions from device"); + + log_info("CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS returned %d\n", + maxWorkItemDim); + deviceMaxWorkItemSize = (size_t *)malloc(sizeof(size_t) * maxWorkItemDim); + error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_WORK_ITEM_SIZES, + sizeof(size_t) * maxWorkItemDim, + deviceMaxWorkItemSize, NULL); + test_error(error, "clDeviceInfo for CL_DEVICE_MAX_WORK_ITEM_SIZES failed"); unsigned int i; int errors = 0; - for(i=0; i= 128 && maxParameterSize == 1024) { - error = clGetDeviceInfo( deviceID, CL_DEVICE_TYPE, sizeof( deviceType ), &deviceType, NULL ); - test_error( error, "Unable to get device type from device" ); + error = clGetDeviceInfo(deviceID, CL_DEVICE_TYPE, sizeof(deviceType), + &deviceType, NULL); + test_error(error, "Unable to get device type from device"); - if(deviceType != CL_DEVICE_TYPE_CUSTOM) + if (deviceType != CL_DEVICE_TYPE_CUSTOM) { maxReadImages = 127; } @@ -295,85 +340,107 @@ int test_min_max_read_image_args(cl_device_id deviceID, cl_context context, cl_c maxParameterSize -= deviceAddressSize; // Calculate the number we can use - if (maxParameterSize/deviceAddressSize < maxReadImages) { - log_info("WARNING: Max parameter size of %d bytes limits test to %d max image arguments.\n", (int)maxParameterSize, (int)(maxParameterSize/deviceAddressSize)); - maxReadImages = (unsigned int)(maxParameterSize/deviceAddressSize); + if (maxParameterSize / deviceAddressSize < maxReadImages) + { + log_info("WARNING: Max parameter size of %d bytes limits test to %d " + "max image arguments.\n", + (int)maxParameterSize, + (int)(maxParameterSize / deviceAddressSize)); + maxReadImages = (unsigned int)(maxParameterSize / deviceAddressSize); } /* Create a program with that many read args */ - programSrc = (char *)malloc( strlen( sample_read_image_kernel_pattern[ 0 ] ) + ( strlen( readArgPattern ) + 6 ) * ( maxReadImages ) + - strlen( sample_read_image_kernel_pattern[ 1 ] ) + 1 + 40240); + programSrc = (char *)malloc(strlen(sample_read_image_kernel_pattern[0]) + + (strlen(readArgPattern) + 6) * (maxReadImages) + + strlen(sample_read_image_kernel_pattern[1]) + + 1 + 40240); - strcpy( programSrc, sample_read_image_kernel_pattern[ 0 ] ); - strcat( programSrc, "read_only image2d_t srcimg0" ); - for( i = 0; i < maxReadImages-1; i++ ) + strcpy(programSrc, sample_read_image_kernel_pattern[0]); + strcat(programSrc, "read_only image2d_t srcimg0"); + for (i = 0; i < maxReadImages - 1; i++) { - sprintf( readArgLine, readArgPattern, i+1 ); - strcat( programSrc, readArgLine ); + sprintf(readArgLine, readArgPattern, i + 1); + strcat(programSrc, readArgLine); } - strcat( programSrc, sample_read_image_kernel_pattern[ 1 ] ); - for ( i = 0; i < maxReadImages; i++) { - sprintf( readArgLine, "\tresult[0] += read_imagef( srcimg%d, sampler, (int2)(0,0)).x;\n", i); - strcat( programSrc, readArgLine ); + strcat(programSrc, sample_read_image_kernel_pattern[1]); + for (i = 0; i < maxReadImages; i++) + { + sprintf( + readArgLine, + "\tresult[0] += read_imagef( srcimg%d, sampler, (int2)(0,0)).x;\n", + i); + strcat(programSrc, readArgLine); } - strcat( programSrc, sample_read_image_kernel_pattern[ 2 ] ); + strcat(programSrc, sample_read_image_kernel_pattern[2]); - error = create_single_kernel_helper(context, &program, &kernel, 1, (const char **)&programSrc, "sample_test"); - test_error( error, "Failed to create the program and kernel."); - free( programSrc ); + error = + create_single_kernel_helper(context, &program, &kernel, 1, + (const char **)&programSrc, "sample_test"); + test_error(error, "Failed to create the program and kernel."); + free(programSrc); result = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_float), NULL, &error); - test_error( error, "clCreateBufer failed"); + test_error(error, "clCreateBufer failed"); /* Create some I/O streams */ streams = new clMemWrapper[maxReadImages + 1]; - for( i = 0; i < maxReadImages; i++ ) + for (i = 0; i < maxReadImages; i++) { - image_data[0]=i; - image_result+= image_data[0]; - streams[i] = create_image_2d( context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, &image_format_desc, 4, 4, 0, image_data, &error ); - test_error( error, "Unable to allocate test image" ); + image_data[0] = i; + image_result += image_data[0]; + streams[i] = + create_image_2d(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, + &image_format_desc, 4, 4, 0, image_data, &error); + test_error(error, "Unable to allocate test image"); } - error = clSetKernelArg( kernel, 0, sizeof( result ), &result ); - test_error( error, "Unable to set kernel arguments" ); + error = clSetKernelArg(kernel, 0, sizeof(result), &result); + test_error(error, "Unable to set kernel arguments"); /* Set the arguments */ - for( i = 1; i < maxReadImages+1; i++ ) + for (i = 1; i < maxReadImages + 1; i++) { - error = clSetKernelArg( kernel, i, sizeof( streams[i-1] ), &streams[i-1] ); - test_error( error, "Unable to set kernel arguments" ); + error = + clSetKernelArg(kernel, i, sizeof(streams[i - 1]), &streams[i - 1]); + test_error(error, "Unable to set kernel arguments"); } /* Now try running the kernel */ threads[0] = threads[1] = 1; - error = clEnqueueNDRangeKernel( queue, kernel, 2, NULL, threads, NULL, 0, NULL, &event ); - test_error( error, "clEnqueueNDRangeKernel failed"); + error = clEnqueueNDRangeKernel(queue, kernel, 2, NULL, threads, NULL, 0, + NULL, &event); + test_error(error, "clEnqueueNDRangeKernel failed"); // Verify that the event does not return an error from the execution error = clWaitForEvents(1, &event); - test_error( error, "clWaitForEvent failed"); - error = clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(event_status), &event_status, NULL); - test_error( error, "clGetEventInfo for CL_EVENT_COMMAND_EXECUTION_STATUS failed"); + test_error(error, "clWaitForEvent failed"); + error = clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS, + sizeof(event_status), &event_status, NULL); + test_error(error, + "clGetEventInfo for CL_EVENT_COMMAND_EXECUTION_STATUS failed"); clReleaseEvent(event); if (event_status < 0) test_error(error, "Kernel execution event returned error"); - error = clEnqueueReadBuffer(queue, result, CL_TRUE, 0, sizeof(cl_float), &actual_image_result, 0, NULL, NULL); + error = clEnqueueReadBuffer(queue, result, CL_TRUE, 0, sizeof(cl_float), + &actual_image_result, 0, NULL, NULL); test_error(error, "clEnqueueReadBuffer failed"); delete[] streams; - if (actual_image_result != image_result) { - log_error("Result failed to verify. Got %g, expected %g.\n", actual_image_result, image_result); + if (actual_image_result != image_result) + { + log_error("Result failed to verify. Got %g, expected %g.\n", + actual_image_result, image_result); return 1; } return 0; } -int test_min_max_write_image_args(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_min_max_write_image_args(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { int error; unsigned int maxWriteImages, i; @@ -381,94 +448,117 @@ int test_min_max_write_image_args(cl_device_id deviceID, cl_context context, cl_ char writeArgLine[128], *programSrc; const char *writeArgPattern = ", write_only image2d_t dstimg%d"; clKernelWrapper kernel; - clMemWrapper *streams; + clMemWrapper *streams; size_t threads[2]; - cl_image_format image_format_desc; + cl_image_format image_format_desc; size_t maxParameterSize; cl_event event; cl_int event_status; cl_uint minRequiredWriteImages = gIsEmbedded ? 1 : 8; - PASSIVE_REQUIRE_IMAGE_SUPPORT( deviceID ) + PASSIVE_REQUIRE_IMAGE_SUPPORT(deviceID) image_format_desc.image_channel_order = CL_RGBA; image_format_desc.image_channel_data_type = CL_UNORM_INT8; /* Get the max read image arg count */ - error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_WRITE_IMAGE_ARGS, sizeof( maxWriteImages ), &maxWriteImages, NULL ); - test_error( error, "Unable to get max write image arg count from device" ); + error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_WRITE_IMAGE_ARGS, + sizeof(maxWriteImages), &maxWriteImages, NULL); + test_error(error, "Unable to get max write image arg count from device"); - if( maxWriteImages == 0 ) + if (maxWriteImages == 0) { - log_info( "WARNING: Device reports 0 for a max write image arg count (write image arguments unsupported). Skipping test (implicitly passes). This is only valid if the number of image formats is also 0.\n" ); + log_info( + "WARNING: Device reports 0 for a max write image arg count (write " + "image arguments unsupported). Skipping test (implicitly passes). " + "This is only valid if the number of image formats is also 0.\n"); return 0; } - if( maxWriteImages < minRequiredWriteImages ) + if (maxWriteImages < minRequiredWriteImages) { - log_error( "ERROR: Reported max write image arg count is less than required! (%d)\n", maxWriteImages ); + log_error("ERROR: Reported max write image arg count is less than " + "required! (%d)\n", + maxWriteImages); return -1; } log_info("Reported %d max write image args.\n", maxWriteImages); - error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_PARAMETER_SIZE, sizeof( maxParameterSize ), &maxParameterSize, NULL ); - test_error( error, "Unable to get max parameter size from device" ); + error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_PARAMETER_SIZE, + sizeof(maxParameterSize), &maxParameterSize, NULL); + test_error(error, "Unable to get max parameter size from device"); // Calculate the number we can use - if (maxParameterSize/sizeof(cl_mem) < maxWriteImages) { - log_info("WARNING: Max parameter size of %d bytes limits test to %d max image arguments.\n", (int)maxParameterSize, (int)(maxParameterSize/sizeof(cl_mem))); - maxWriteImages = (unsigned int)(maxParameterSize/sizeof(cl_mem)); + if (maxParameterSize / sizeof(cl_mem) < maxWriteImages) + { + log_info("WARNING: Max parameter size of %d bytes limits test to %d " + "max image arguments.\n", + (int)maxParameterSize, + (int)(maxParameterSize / sizeof(cl_mem))); + maxWriteImages = (unsigned int)(maxParameterSize / sizeof(cl_mem)); } /* Create a program with that many write args + 1 */ - programSrc = (char *)malloc( strlen( sample_write_image_kernel_pattern[ 0 ] ) + ( strlen( writeArgPattern ) + 6 ) * ( maxWriteImages + 1 ) + - strlen( sample_write_image_kernel_pattern[ 1 ] ) + 1 + 40240 ); + programSrc = (char *)malloc( + strlen(sample_write_image_kernel_pattern[0]) + + (strlen(writeArgPattern) + 6) * (maxWriteImages + 1) + + strlen(sample_write_image_kernel_pattern[1]) + 1 + 40240); - strcpy( programSrc, sample_write_image_kernel_pattern[ 0 ] ); - strcat( programSrc, "write_only image2d_t dstimg0" ); - for( i = 1; i < maxWriteImages; i++ ) + strcpy(programSrc, sample_write_image_kernel_pattern[0]); + strcat(programSrc, "write_only image2d_t dstimg0"); + for (i = 1; i < maxWriteImages; i++) { - sprintf( writeArgLine, writeArgPattern, i ); - strcat( programSrc, writeArgLine ); + sprintf(writeArgLine, writeArgPattern, i); + strcat(programSrc, writeArgLine); } - strcat( programSrc, sample_write_image_kernel_pattern[ 1 ] ); - for ( i = 0; i < maxWriteImages; i++) { - sprintf( writeArgLine, "\twrite_imagef( dstimg%d, (int2)(0,0), (float4)(0,0,0,0));\n", i); - strcat( programSrc, writeArgLine ); + strcat(programSrc, sample_write_image_kernel_pattern[1]); + for (i = 0; i < maxWriteImages; i++) + { + sprintf(writeArgLine, + "\twrite_imagef( dstimg%d, (int2)(0,0), (float4)(0,0,0,0));\n", + i); + strcat(programSrc, writeArgLine); } - strcat( programSrc, sample_write_image_kernel_pattern[ 2 ] ); + strcat(programSrc, sample_write_image_kernel_pattern[2]); - error = create_single_kernel_helper(context, &program, &kernel, 1, (const char **)&programSrc, "sample_test"); - test_error( error, "Failed to create the program and kernel."); - free( programSrc ); + error = + create_single_kernel_helper(context, &program, &kernel, 1, + (const char **)&programSrc, "sample_test"); + test_error(error, "Failed to create the program and kernel."); + free(programSrc); /* Create some I/O streams */ streams = new clMemWrapper[maxWriteImages + 1]; - for( i = 0; i < maxWriteImages; i++ ) + for (i = 0; i < maxWriteImages; i++) { - streams[i] = create_image_2d( context, CL_MEM_READ_WRITE, &image_format_desc, 16, 16, 0, NULL, &error ); - test_error( error, "Unable to allocate test image" ); + streams[i] = + create_image_2d(context, CL_MEM_READ_WRITE, &image_format_desc, 16, + 16, 0, NULL, &error); + test_error(error, "Unable to allocate test image"); } /* Set the arguments */ - for( i = 0; i < maxWriteImages; i++ ) + for (i = 0; i < maxWriteImages; i++) { - error = clSetKernelArg( kernel, i, sizeof( streams[i] ), &streams[i] ); - test_error( error, "Unable to set kernel arguments" ); + error = clSetKernelArg(kernel, i, sizeof(streams[i]), &streams[i]); + test_error(error, "Unable to set kernel arguments"); } /* Now try running the kernel */ threads[0] = threads[1] = 16; - error = clEnqueueNDRangeKernel( queue, kernel, 2, NULL, threads, NULL, 0, NULL, &event ); - test_error( error, "clEnqueueNDRangeKernel failed."); + error = clEnqueueNDRangeKernel(queue, kernel, 2, NULL, threads, NULL, 0, + NULL, &event); + test_error(error, "clEnqueueNDRangeKernel failed."); // Verify that the event does not return an error from the execution error = clWaitForEvents(1, &event); - test_error( error, "clWaitForEvent failed"); - error = clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(event_status), &event_status, NULL); - test_error( error, "clGetEventInfo for CL_EVENT_COMMAND_EXECUTION_STATUS failed"); + test_error(error, "clWaitForEvent failed"); + error = clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS, + sizeof(event_status), &event_status, NULL); + test_error(error, + "clGetEventInfo for CL_EVENT_COMMAND_EXECUTION_STATUS failed"); clReleaseEvent(event); if (event_status < 0) test_error(error, "Kernel execution event returned error"); @@ -478,7 +568,8 @@ int test_min_max_write_image_args(cl_device_id deviceID, cl_context context, cl_ return 0; } -int test_min_max_mem_alloc_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_min_max_mem_alloc_size(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { int error; cl_ulong maxAllocSize, memSize, minSizeToTry; @@ -492,61 +583,89 @@ int test_min_max_mem_alloc_size(cl_device_id deviceID, cl_context context, cl_co requiredAllocSize = 128 * 1024 * 1024; /* Get the max mem alloc size */ - error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof( maxAllocSize ), &maxAllocSize, NULL ); - test_error( error, "Unable to get max mem alloc size from device" ); + error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_MEM_ALLOC_SIZE, + sizeof(maxAllocSize), &maxAllocSize, NULL); + test_error(error, "Unable to get max mem alloc size from device"); - error = clGetDeviceInfo( deviceID, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL ); - test_error( error, "Unable to get global memory size from device" ); + error = clGetDeviceInfo(deviceID, CL_DEVICE_GLOBAL_MEM_SIZE, + sizeof(memSize), &memSize, NULL); + test_error(error, "Unable to get global memory size from device"); - if (memSize > (cl_ulong)SIZE_MAX) { - memSize = (cl_ulong)SIZE_MAX; + if (memSize > (cl_ulong)SIZE_MAX) + { + memSize = (cl_ulong)SIZE_MAX; } - if( maxAllocSize < requiredAllocSize) + if (maxAllocSize < requiredAllocSize) { - log_error( "ERROR: Reported max allocation size is less than required %lldMB! (%llu or %lluMB, from a total mem size of %lldMB)\n", (requiredAllocSize / 1024) / 1024, maxAllocSize, (maxAllocSize / 1024)/1024, (memSize / 1024)/1024 ); + log_error("ERROR: Reported max allocation size is less than required " + "%lldMB! (%llu or %lluMB, from a total mem size of %lldMB)\n", + (requiredAllocSize / 1024) / 1024, maxAllocSize, + (maxAllocSize / 1024) / 1024, (memSize / 1024) / 1024); return -1; } - requiredAllocSize = ((memSize / 4) > (1024 * 1024 * 1024)) ? 1024 * 1024 * 1024 : memSize / 4; + requiredAllocSize = ((memSize / 4) > (1024 * 1024 * 1024)) + ? 1024 * 1024 * 1024 + : memSize / 4; if (gIsEmbedded) - requiredAllocSize = (requiredAllocSize < 1 * 1024 * 1024) ? 1 * 1024 * 1024 : requiredAllocSize; + requiredAllocSize = (requiredAllocSize < 1 * 1024 * 1024) + ? 1 * 1024 * 1024 + : requiredAllocSize; else - requiredAllocSize = (requiredAllocSize < 128 * 1024 * 1024) ? 128 * 1024 * 1024 : requiredAllocSize; + requiredAllocSize = (requiredAllocSize < 128 * 1024 * 1024) + ? 128 * 1024 * 1024 + : requiredAllocSize; - if( maxAllocSize < requiredAllocSize ) + if (maxAllocSize < requiredAllocSize) { - log_error( "ERROR: Reported max allocation size is less than required of total memory! (%llu or %lluMB, from a total mem size of %lluMB)\n", maxAllocSize, (maxAllocSize / 1024)/1024, (requiredAllocSize / 1024)/1024 ); + log_error( + "ERROR: Reported max allocation size is less than required of " + "total memory! (%llu or %lluMB, from a total mem size of %lluMB)\n", + maxAllocSize, (maxAllocSize / 1024) / 1024, + (requiredAllocSize / 1024) / 1024); return -1; } - log_info("Reported max allocation size of %lld bytes (%gMB) and global mem size of %lld bytes (%gMB).\n", - maxAllocSize, maxAllocSize/(1024.0*1024.0), requiredAllocSize, requiredAllocSize/(1024.0*1024.0)); + log_info("Reported max allocation size of %lld bytes (%gMB) and global mem " + "size of %lld bytes (%gMB).\n", + maxAllocSize, maxAllocSize / (1024.0 * 1024.0), requiredAllocSize, + requiredAllocSize / (1024.0 * 1024.0)); - if ( memSize < maxAllocSize ) { - log_info("Global memory size is less than max allocation size, using that.\n"); + if (memSize < maxAllocSize) + { + log_info("Global memory size is less than max allocation size, using " + "that.\n"); maxAllocSize = memSize; } - minSizeToTry = maxAllocSize/16; - while (maxAllocSize > (maxAllocSize/4)) { + minSizeToTry = maxAllocSize / 16; + while (maxAllocSize > (maxAllocSize / 4)) + { - log_info("Trying to create a buffer of size of %lld bytes (%gMB).\n", maxAllocSize, (double)maxAllocSize/(1024.0*1024.0)); - memHdl = clCreateBuffer( context, CL_MEM_READ_ONLY, (size_t)maxAllocSize, NULL, &error ); - if (error == CL_MEM_OBJECT_ALLOCATION_FAILURE || error == CL_OUT_OF_RESOURCES || error == CL_OUT_OF_HOST_MEMORY) { - log_info("\tAllocation failed at size of %lld bytes (%gMB).\n", maxAllocSize, (double)maxAllocSize/(1024.0*1024.0)); + log_info("Trying to create a buffer of size of %lld bytes (%gMB).\n", + maxAllocSize, (double)maxAllocSize / (1024.0 * 1024.0)); + memHdl = clCreateBuffer(context, CL_MEM_READ_ONLY, (size_t)maxAllocSize, + NULL, &error); + if (error == CL_MEM_OBJECT_ALLOCATION_FAILURE + || error == CL_OUT_OF_RESOURCES || error == CL_OUT_OF_HOST_MEMORY) + { + log_info("\tAllocation failed at size of %lld bytes (%gMB).\n", + maxAllocSize, (double)maxAllocSize / (1024.0 * 1024.0)); maxAllocSize -= minSizeToTry; continue; } - test_error( error, "clCreateBuffer failed for maximum sized buffer."); + test_error(error, "clCreateBuffer failed for maximum sized buffer."); return 0; } - log_error("Failed to allocate even %lld bytes (%gMB).\n", maxAllocSize, (double)maxAllocSize/(1024.0*1024.0)); + log_error("Failed to allocate even %lld bytes (%gMB).\n", maxAllocSize, + (double)maxAllocSize / (1024.0 * 1024.0)); return -1; } -int test_min_max_image_2d_width(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_min_max_image_2d_width(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { int error; size_t maxDimension; @@ -557,7 +676,7 @@ int test_min_max_image_2d_width(cl_device_id deviceID, cl_context context, cl_co size_t length; - PASSIVE_REQUIRE_IMAGE_SUPPORT( deviceID ) + PASSIVE_REQUIRE_IMAGE_SUPPORT(deviceID) auto version = get_device_cl_version(deviceID); if (version == Version(1, 0)) @@ -571,16 +690,20 @@ int test_min_max_image_2d_width(cl_device_id deviceID, cl_context context, cl_co /* Just get any ol format to test with */ - error = get_8_bit_image_format( context, CL_MEM_OBJECT_IMAGE2D, CL_MEM_READ_WRITE, 0, &image_format_desc ); - test_error( error, "Unable to obtain suitable image format to test with!" ); + error = get_8_bit_image_format(context, CL_MEM_OBJECT_IMAGE2D, + CL_MEM_READ_WRITE, 0, &image_format_desc); + test_error(error, "Unable to obtain suitable image format to test with!"); /* Get the max 2d image width */ - error = clGetDeviceInfo( deviceID, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof( maxDimension ), &maxDimension, NULL ); - test_error( error, "Unable to get max image 2d width from device" ); + error = clGetDeviceInfo(deviceID, CL_DEVICE_IMAGE2D_MAX_WIDTH, + sizeof(maxDimension), &maxDimension, NULL); + test_error(error, "Unable to get max image 2d width from device"); - if( maxDimension < minRequiredDimension ) + if (maxDimension < minRequiredDimension) { - log_error( "ERROR: Reported max image 2d width is less than required! (%d)\n", (int)maxDimension ); + log_error( + "ERROR: Reported max image 2d width is less than required! (%d)\n", + (int)maxDimension); return -1; } log_info("Max reported width is %ld.\n", maxDimension); @@ -588,34 +711,42 @@ int test_min_max_image_2d_width(cl_device_id deviceID, cl_context context, cl_co /* Verify we can use the format */ image_format_desc.image_channel_data_type = CL_UNORM_INT8; image_format_desc.image_channel_order = CL_RGBA; - if (!is_image_format_supported( context, CL_MEM_READ_ONLY, CL_MEM_OBJECT_IMAGE2D, &image_format_desc)) { + if (!is_image_format_supported(context, CL_MEM_READ_ONLY, + CL_MEM_OBJECT_IMAGE2D, &image_format_desc)) + { log_error("CL_UNORM_INT8 CL_RGBA not supported. Can not test."); return -1; } /* Verify that we can actually allocate an image that large */ - error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof ( maxAllocSize ), &maxAllocSize, NULL ); - test_error( error, "Unable to get CL_DEVICE_MAX_MEM_ALLOC_SIZE." ); - if ( (cl_ulong)maxDimension*1*4 > maxAllocSize ) { - log_error("Can not allocate a large enough image (min size: %lld bytes, max allowed: %lld bytes) to test.\n", - (cl_ulong)maxDimension*1*4, maxAllocSize); + error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_MEM_ALLOC_SIZE, + sizeof(maxAllocSize), &maxAllocSize, NULL); + test_error(error, "Unable to get CL_DEVICE_MAX_MEM_ALLOC_SIZE."); + if ((cl_ulong)maxDimension * 1 * 4 > maxAllocSize) + { + log_error("Can not allocate a large enough image (min size: %lld " + "bytes, max allowed: %lld bytes) to test.\n", + (cl_ulong)maxDimension * 1 * 4, maxAllocSize); return -1; } - log_info("Attempting to create an image of size %d x 1 = %gMB.\n", (int)maxDimension, ((float)maxDimension*4/1024.0/1024.0)); + log_info("Attempting to create an image of size %d x 1 = %gMB.\n", + (int)maxDimension, ((float)maxDimension * 4 / 1024.0 / 1024.0)); /* Try to allocate a very big image */ - streams[0] = create_image_2d( context, CL_MEM_READ_ONLY, &image_format_desc, maxDimension, 1, 0, NULL, &error ); - if( ( streams[0] == NULL ) || ( error != CL_SUCCESS )) + streams[0] = create_image_2d(context, CL_MEM_READ_ONLY, &image_format_desc, + maxDimension, 1, 0, NULL, &error); + if ((streams[0] == NULL) || (error != CL_SUCCESS)) { - print_error( error, "Image 2D creation failed for maximum width" ); + print_error(error, "Image 2D creation failed for maximum width"); return -1; } return 0; } -int test_min_max_image_2d_height(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_min_max_image_2d_height(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { int error; size_t maxDimension; @@ -625,7 +756,7 @@ int test_min_max_image_2d_height(cl_device_id deviceID, cl_context context, cl_c cl_uint minRequiredDimension; size_t length; - PASSIVE_REQUIRE_IMAGE_SUPPORT( deviceID ) + PASSIVE_REQUIRE_IMAGE_SUPPORT(deviceID) auto version = get_device_cl_version(deviceID); if (version == Version(1, 0)) @@ -638,16 +769,20 @@ int test_min_max_image_2d_height(cl_device_id deviceID, cl_context context, cl_c } /* Just get any ol format to test with */ - error = get_8_bit_image_format( context, CL_MEM_OBJECT_IMAGE2D, CL_MEM_READ_WRITE, 0, &image_format_desc ); - test_error( error, "Unable to obtain suitable image format to test with!" ); + error = get_8_bit_image_format(context, CL_MEM_OBJECT_IMAGE2D, + CL_MEM_READ_WRITE, 0, &image_format_desc); + test_error(error, "Unable to obtain suitable image format to test with!"); /* Get the max 2d image width */ - error = clGetDeviceInfo( deviceID, CL_DEVICE_IMAGE2D_MAX_HEIGHT, sizeof( maxDimension ), &maxDimension, NULL ); - test_error( error, "Unable to get max image 2d height from device" ); + error = clGetDeviceInfo(deviceID, CL_DEVICE_IMAGE2D_MAX_HEIGHT, + sizeof(maxDimension), &maxDimension, NULL); + test_error(error, "Unable to get max image 2d height from device"); - if( maxDimension < minRequiredDimension ) + if (maxDimension < minRequiredDimension) { - log_error( "ERROR: Reported max image 2d height is less than required! (%d)\n", (int)maxDimension ); + log_error( + "ERROR: Reported max image 2d height is less than required! (%d)\n", + (int)maxDimension); return -1; } log_info("Max reported height is %ld.\n", maxDimension); @@ -655,56 +790,67 @@ int test_min_max_image_2d_height(cl_device_id deviceID, cl_context context, cl_c /* Verify we can use the format */ image_format_desc.image_channel_data_type = CL_UNORM_INT8; image_format_desc.image_channel_order = CL_RGBA; - if (!is_image_format_supported( context, CL_MEM_READ_ONLY, CL_MEM_OBJECT_IMAGE2D, &image_format_desc)) { + if (!is_image_format_supported(context, CL_MEM_READ_ONLY, + CL_MEM_OBJECT_IMAGE2D, &image_format_desc)) + { log_error("CL_UNORM_INT8 CL_RGBA not supported. Can not test."); return -1; } /* Verify that we can actually allocate an image that large */ - error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof ( maxAllocSize ), &maxAllocSize, NULL ); - test_error( error, "Unable to get CL_DEVICE_MAX_MEM_ALLOC_SIZE." ); - if ( (cl_ulong)maxDimension*1*4 > maxAllocSize ) { - log_error("Can not allocate a large enough image (min size: %lld bytes, max allowed: %lld bytes) to test.\n", - (cl_ulong)maxDimension*1*4, maxAllocSize); + error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_MEM_ALLOC_SIZE, + sizeof(maxAllocSize), &maxAllocSize, NULL); + test_error(error, "Unable to get CL_DEVICE_MAX_MEM_ALLOC_SIZE."); + if ((cl_ulong)maxDimension * 1 * 4 > maxAllocSize) + { + log_error("Can not allocate a large enough image (min size: %lld " + "bytes, max allowed: %lld bytes) to test.\n", + (cl_ulong)maxDimension * 1 * 4, maxAllocSize); return -1; } - log_info("Attempting to create an image of size 1 x %d = %gMB.\n", (int)maxDimension, ((float)maxDimension*4/1024.0/1024.0)); + log_info("Attempting to create an image of size 1 x %d = %gMB.\n", + (int)maxDimension, ((float)maxDimension * 4 / 1024.0 / 1024.0)); /* Try to allocate a very big image */ - streams[0] = create_image_2d( context, CL_MEM_READ_ONLY, &image_format_desc, 1, maxDimension, 0, NULL, &error ); - if( ( streams[0] == NULL ) || ( error != CL_SUCCESS )) + streams[0] = create_image_2d(context, CL_MEM_READ_ONLY, &image_format_desc, + 1, maxDimension, 0, NULL, &error); + if ((streams[0] == NULL) || (error != CL_SUCCESS)) { - print_error( error, "Image 2D creation failed for maximum height" ); + print_error(error, "Image 2D creation failed for maximum height"); return -1; } return 0; } -int test_min_max_image_3d_width(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_min_max_image_3d_width(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { int error; size_t maxDimension; clMemWrapper streams[1]; - cl_image_format image_format_desc; + cl_image_format image_format_desc; cl_ulong maxAllocSize; - PASSIVE_REQUIRE_3D_IMAGE_SUPPORT( deviceID ) + PASSIVE_REQUIRE_3D_IMAGE_SUPPORT(deviceID) /* Just get any ol format to test with */ error = get_8_bit_image_format(context, CL_MEM_OBJECT_IMAGE3D, CL_MEM_READ_ONLY, 0, &image_format_desc); - test_error( error, "Unable to obtain suitable image format to test with!" ); + test_error(error, "Unable to obtain suitable image format to test with!"); /* Get the max 2d image width */ - error = clGetDeviceInfo( deviceID, CL_DEVICE_IMAGE3D_MAX_WIDTH, sizeof( maxDimension ), &maxDimension, NULL ); - test_error( error, "Unable to get max image 3d width from device" ); + error = clGetDeviceInfo(deviceID, CL_DEVICE_IMAGE3D_MAX_WIDTH, + sizeof(maxDimension), &maxDimension, NULL); + test_error(error, "Unable to get max image 3d width from device"); - if( maxDimension < 2048 ) + if (maxDimension < 2048) { - log_error( "ERROR: Reported max image 3d width is less than required! (%d)\n", (int)maxDimension ); + log_error( + "ERROR: Reported max image 3d width is less than required! (%d)\n", + (int)maxDimension); return -1; } log_info("Max reported width is %ld.\n", maxDimension); @@ -712,56 +858,68 @@ int test_min_max_image_3d_width(cl_device_id deviceID, cl_context context, cl_co /* Verify we can use the format */ image_format_desc.image_channel_data_type = CL_UNORM_INT8; image_format_desc.image_channel_order = CL_RGBA; - if (!is_image_format_supported( context, CL_MEM_READ_ONLY, CL_MEM_OBJECT_IMAGE3D, &image_format_desc)) { + if (!is_image_format_supported(context, CL_MEM_READ_ONLY, + CL_MEM_OBJECT_IMAGE3D, &image_format_desc)) + { log_error("CL_UNORM_INT8 CL_RGBA not supported. Can not test."); return -1; } /* Verify that we can actually allocate an image that large */ - error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof ( maxAllocSize ), &maxAllocSize, NULL ); - test_error( error, "Unable to get CL_DEVICE_MAX_MEM_ALLOC_SIZE." ); - if ( (cl_ulong)maxDimension*2*4 > maxAllocSize ) { - log_error("Can not allocate a large enough image (min size: %lld bytes, max allowed: %lld bytes) to test.\n", - (cl_ulong)maxDimension*2*4, maxAllocSize); + error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_MEM_ALLOC_SIZE, + sizeof(maxAllocSize), &maxAllocSize, NULL); + test_error(error, "Unable to get CL_DEVICE_MAX_MEM_ALLOC_SIZE."); + if ((cl_ulong)maxDimension * 2 * 4 > maxAllocSize) + { + log_error("Can not allocate a large enough image (min size: %lld " + "bytes, max allowed: %lld bytes) to test.\n", + (cl_ulong)maxDimension * 2 * 4, maxAllocSize); return -1; } - log_info("Attempting to create an image of size %d x 1 x 2 = %gMB.\n", (int)maxDimension, (2*(float)maxDimension*4/1024.0/1024.0)); + log_info("Attempting to create an image of size %d x 1 x 2 = %gMB.\n", + (int)maxDimension, + (2 * (float)maxDimension * 4 / 1024.0 / 1024.0)); /* Try to allocate a very big image */ - streams[0] = create_image_3d( context, CL_MEM_READ_ONLY, &image_format_desc, maxDimension, 1, 2, 0, 0, NULL, &error ); - if( ( streams[0] == NULL ) || ( error != CL_SUCCESS )) + streams[0] = create_image_3d(context, CL_MEM_READ_ONLY, &image_format_desc, + maxDimension, 1, 2, 0, 0, NULL, &error); + if ((streams[0] == NULL) || (error != CL_SUCCESS)) { - print_error( error, "Image 3D creation failed for maximum width" ); + print_error(error, "Image 3D creation failed for maximum width"); return -1; } return 0; } -int test_min_max_image_3d_height(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_min_max_image_3d_height(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { int error; size_t maxDimension; clMemWrapper streams[1]; - cl_image_format image_format_desc; + cl_image_format image_format_desc; cl_ulong maxAllocSize; - PASSIVE_REQUIRE_3D_IMAGE_SUPPORT( deviceID ) + PASSIVE_REQUIRE_3D_IMAGE_SUPPORT(deviceID) /* Just get any ol format to test with */ error = get_8_bit_image_format(context, CL_MEM_OBJECT_IMAGE3D, CL_MEM_READ_ONLY, 0, &image_format_desc); - test_error( error, "Unable to obtain suitable image format to test with!" ); + test_error(error, "Unable to obtain suitable image format to test with!"); /* Get the max 2d image width */ - error = clGetDeviceInfo( deviceID, CL_DEVICE_IMAGE3D_MAX_HEIGHT, sizeof( maxDimension ), &maxDimension, NULL ); - test_error( error, "Unable to get max image 3d height from device" ); + error = clGetDeviceInfo(deviceID, CL_DEVICE_IMAGE3D_MAX_HEIGHT, + sizeof(maxDimension), &maxDimension, NULL); + test_error(error, "Unable to get max image 3d height from device"); - if( maxDimension < 2048 ) + if (maxDimension < 2048) { - log_error( "ERROR: Reported max image 3d height is less than required! (%d)\n", (int)maxDimension ); + log_error( + "ERROR: Reported max image 3d height is less than required! (%d)\n", + (int)maxDimension); return -1; } log_info("Max reported height is %ld.\n", maxDimension); @@ -769,27 +927,35 @@ int test_min_max_image_3d_height(cl_device_id deviceID, cl_context context, cl_c /* Verify we can use the format */ image_format_desc.image_channel_data_type = CL_UNORM_INT8; image_format_desc.image_channel_order = CL_RGBA; - if (!is_image_format_supported( context, CL_MEM_READ_ONLY, CL_MEM_OBJECT_IMAGE3D, &image_format_desc)) { + if (!is_image_format_supported(context, CL_MEM_READ_ONLY, + CL_MEM_OBJECT_IMAGE3D, &image_format_desc)) + { log_error("CL_UNORM_INT8 CL_RGBA not supported. Can not test."); return -1; } /* Verify that we can actually allocate an image that large */ - error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof ( maxAllocSize ), &maxAllocSize, NULL ); - test_error( error, "Unable to get CL_DEVICE_MAX_MEM_ALLOC_SIZE." ); - if ( (cl_ulong)maxDimension*2*4 > maxAllocSize ) { - log_error("Can not allocate a large enough image (min size: %lld bytes, max allowed: %lld bytes) to test.\n", - (cl_ulong)maxDimension*2*4, maxAllocSize); + error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_MEM_ALLOC_SIZE, + sizeof(maxAllocSize), &maxAllocSize, NULL); + test_error(error, "Unable to get CL_DEVICE_MAX_MEM_ALLOC_SIZE."); + if ((cl_ulong)maxDimension * 2 * 4 > maxAllocSize) + { + log_error("Can not allocate a large enough image (min size: %lld " + "bytes, max allowed: %lld bytes) to test.\n", + (cl_ulong)maxDimension * 2 * 4, maxAllocSize); return -1; } - log_info("Attempting to create an image of size 1 x %d x 2 = %gMB.\n", (int)maxDimension, (2*(float)maxDimension*4/1024.0/1024.0)); + log_info("Attempting to create an image of size 1 x %d x 2 = %gMB.\n", + (int)maxDimension, + (2 * (float)maxDimension * 4 / 1024.0 / 1024.0)); /* Try to allocate a very big image */ - streams[0] = create_image_3d( context, CL_MEM_READ_ONLY, &image_format_desc, 1, maxDimension, 2, 0, 0, NULL, &error ); - if( ( streams[0] == NULL ) || ( error != CL_SUCCESS )) + streams[0] = create_image_3d(context, CL_MEM_READ_ONLY, &image_format_desc, + 1, maxDimension, 2, 0, 0, NULL, &error); + if ((streams[0] == NULL) || (error != CL_SUCCESS)) { - print_error( error, "Image 3D creation failed for maximum height" ); + print_error(error, "Image 3D creation failed for maximum height"); return -1; } @@ -797,29 +963,33 @@ int test_min_max_image_3d_height(cl_device_id deviceID, cl_context context, cl_c } -int test_min_max_image_3d_depth(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_min_max_image_3d_depth(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { int error; size_t maxDimension; clMemWrapper streams[1]; - cl_image_format image_format_desc; + cl_image_format image_format_desc; cl_ulong maxAllocSize; - PASSIVE_REQUIRE_3D_IMAGE_SUPPORT( deviceID ) + PASSIVE_REQUIRE_3D_IMAGE_SUPPORT(deviceID) /* Just get any ol format to test with */ error = get_8_bit_image_format(context, CL_MEM_OBJECT_IMAGE3D, CL_MEM_READ_ONLY, 0, &image_format_desc); - test_error( error, "Unable to obtain suitable image format to test with!" ); + test_error(error, "Unable to obtain suitable image format to test with!"); /* Get the max 2d image width */ - error = clGetDeviceInfo( deviceID, CL_DEVICE_IMAGE3D_MAX_DEPTH, sizeof( maxDimension ), &maxDimension, NULL ); - test_error( error, "Unable to get max image 3d depth from device" ); + error = clGetDeviceInfo(deviceID, CL_DEVICE_IMAGE3D_MAX_DEPTH, + sizeof(maxDimension), &maxDimension, NULL); + test_error(error, "Unable to get max image 3d depth from device"); - if( maxDimension < 2048 ) + if (maxDimension < 2048) { - log_error( "ERROR: Reported max image 3d depth is less than required! (%d)\n", (int)maxDimension ); + log_error( + "ERROR: Reported max image 3d depth is less than required! (%d)\n", + (int)maxDimension); return -1; } log_info("Max reported depth is %ld.\n", maxDimension); @@ -827,55 +997,67 @@ int test_min_max_image_3d_depth(cl_device_id deviceID, cl_context context, cl_co /* Verify we can use the format */ image_format_desc.image_channel_data_type = CL_UNORM_INT8; image_format_desc.image_channel_order = CL_RGBA; - if (!is_image_format_supported( context, CL_MEM_READ_ONLY, CL_MEM_OBJECT_IMAGE3D, &image_format_desc)) { + if (!is_image_format_supported(context, CL_MEM_READ_ONLY, + CL_MEM_OBJECT_IMAGE3D, &image_format_desc)) + { log_error("CL_UNORM_INT8 CL_RGBA not supported. Can not test."); return -1; } /* Verify that we can actually allocate an image that large */ - error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof ( maxAllocSize ), &maxAllocSize, NULL ); - test_error( error, "Unable to get CL_DEVICE_MAX_MEM_ALLOC_SIZE." ); - if ( (cl_ulong)maxDimension*1*4 > maxAllocSize ) { - log_error("Can not allocate a large enough image (min size: %lld bytes, max allowed: %lld bytes) to test.\n", - (cl_ulong)maxDimension*1*4, maxAllocSize); + error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_MEM_ALLOC_SIZE, + sizeof(maxAllocSize), &maxAllocSize, NULL); + test_error(error, "Unable to get CL_DEVICE_MAX_MEM_ALLOC_SIZE."); + if ((cl_ulong)maxDimension * 1 * 4 > maxAllocSize) + { + log_error("Can not allocate a large enough image (min size: %lld " + "bytes, max allowed: %lld bytes) to test.\n", + (cl_ulong)maxDimension * 1 * 4, maxAllocSize); return -1; } - log_info("Attempting to create an image of size 1 x 1 x %d = %gMB.\n", (int)maxDimension, ((float)maxDimension*4/1024.0/1024.0)); + log_info("Attempting to create an image of size 1 x 1 x %d = %gMB.\n", + (int)maxDimension, ((float)maxDimension * 4 / 1024.0 / 1024.0)); /* Try to allocate a very big image */ - streams[0] = create_image_3d( context, CL_MEM_READ_ONLY, &image_format_desc, 1, 1, maxDimension, 0, 0, NULL, &error ); - if( ( streams[0] == NULL ) || ( error != CL_SUCCESS )) + streams[0] = create_image_3d(context, CL_MEM_READ_ONLY, &image_format_desc, + 1, 1, maxDimension, 0, 0, NULL, &error); + if ((streams[0] == NULL) || (error != CL_SUCCESS)) { - print_error( error, "Image 3D creation failed for maximum depth" ); + print_error(error, "Image 3D creation failed for maximum depth"); return -1; } return 0; } -int test_min_max_image_array_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_min_max_image_array_size(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { int error; size_t maxDimension; clMemWrapper streams[1]; - cl_image_format image_format_desc; + cl_image_format image_format_desc; cl_ulong maxAllocSize; size_t minRequiredDimension = gIsEmbedded ? 256 : 2048; - PASSIVE_REQUIRE_IMAGE_SUPPORT( deviceID ); + PASSIVE_REQUIRE_IMAGE_SUPPORT(deviceID); /* Just get any ol format to test with */ - error = get_8_bit_image_format( context, CL_MEM_OBJECT_IMAGE2D_ARRAY, CL_MEM_READ_WRITE, 0, &image_format_desc ); - test_error( error, "Unable to obtain suitable image format to test with!" ); + error = get_8_bit_image_format(context, CL_MEM_OBJECT_IMAGE2D_ARRAY, + CL_MEM_READ_WRITE, 0, &image_format_desc); + test_error(error, "Unable to obtain suitable image format to test with!"); /* Get the max image array width */ - error = clGetDeviceInfo( deviceID, CL_DEVICE_IMAGE_MAX_ARRAY_SIZE, sizeof( maxDimension ), &maxDimension, NULL ); - test_error( error, "Unable to get max image array size from device" ); + error = clGetDeviceInfo(deviceID, CL_DEVICE_IMAGE_MAX_ARRAY_SIZE, + sizeof(maxDimension), &maxDimension, NULL); + test_error(error, "Unable to get max image array size from device"); - if( maxDimension < minRequiredDimension ) + if (maxDimension < minRequiredDimension) { - log_error( "ERROR: Reported max image array size is less than required! (%d)\n", (int)maxDimension ); + log_error("ERROR: Reported max image array size is less than required! " + "(%d)\n", + (int)maxDimension); return -1; } log_info("Max reported image array size is %ld.\n", maxDimension); @@ -883,96 +1065,127 @@ int test_min_max_image_array_size(cl_device_id deviceID, cl_context context, cl_ /* Verify we can use the format */ image_format_desc.image_channel_data_type = CL_UNORM_INT8; image_format_desc.image_channel_order = CL_RGBA; - if (!is_image_format_supported( context, CL_MEM_READ_ONLY, CL_MEM_OBJECT_IMAGE2D_ARRAY, &image_format_desc)) { + if (!is_image_format_supported(context, CL_MEM_READ_ONLY, + CL_MEM_OBJECT_IMAGE2D_ARRAY, + &image_format_desc)) + { log_error("CL_UNORM_INT8 CL_RGBA not supported. Can not test."); return -1; } /* Verify that we can actually allocate an image that large */ - error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof ( maxAllocSize ), &maxAllocSize, NULL ); - test_error( error, "Unable to get CL_DEVICE_MAX_MEM_ALLOC_SIZE." ); - if ( (cl_ulong)maxDimension*1*4 > maxAllocSize ) { - log_error("Can not allocate a large enough image (min size: %lld bytes, max allowed: %lld bytes) to test.\n", - (cl_ulong)maxDimension*1*4, maxAllocSize); + error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_MEM_ALLOC_SIZE, + sizeof(maxAllocSize), &maxAllocSize, NULL); + test_error(error, "Unable to get CL_DEVICE_MAX_MEM_ALLOC_SIZE."); + if ((cl_ulong)maxDimension * 1 * 4 > maxAllocSize) + { + log_error("Can not allocate a large enough image (min size: %lld " + "bytes, max allowed: %lld bytes) to test.\n", + (cl_ulong)maxDimension * 1 * 4, maxAllocSize); return -1; } - log_info("Attempting to create an image of size 1 x 1 x %d = %gMB.\n", (int)maxDimension, ((float)maxDimension*4/1024.0/1024.0)); + log_info("Attempting to create an image of size 1 x 1 x %d = %gMB.\n", + (int)maxDimension, ((float)maxDimension * 4 / 1024.0 / 1024.0)); /* Try to allocate a very big image */ - streams[0] = create_image_2d_array( context, CL_MEM_READ_ONLY, &image_format_desc, 1, 1, maxDimension, 0, 0, NULL, &error ); - if( ( streams[0] == NULL ) || ( error != CL_SUCCESS )) + streams[0] = + create_image_2d_array(context, CL_MEM_READ_ONLY, &image_format_desc, 1, + 1, maxDimension, 0, 0, NULL, &error); + if ((streams[0] == NULL) || (error != CL_SUCCESS)) { - print_error( error, "2D Image Array creation failed for maximum array size" ); + print_error(error, + "2D Image Array creation failed for maximum array size"); return -1; } return 0; } -int test_min_max_image_buffer_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_min_max_image_buffer_size(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { int error; size_t maxDimensionPixels; clMemWrapper streams[2]; - cl_image_format image_format_desc = {0}; + cl_image_format image_format_desc = { 0 }; cl_ulong maxAllocSize; size_t minRequiredDimension = gIsEmbedded ? 2048 : 65536; unsigned int i = 0; size_t pixelBytes = 0; - PASSIVE_REQUIRE_IMAGE_SUPPORT( deviceID ); + PASSIVE_REQUIRE_IMAGE_SUPPORT(deviceID); /* Get the max memory allocation size */ - error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof ( maxAllocSize ), &maxAllocSize, NULL ); - test_error( error, "Unable to get CL_DEVICE_MAX_MEM_ALLOC_SIZE." ); + error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_MEM_ALLOC_SIZE, + sizeof(maxAllocSize), &maxAllocSize, NULL); + test_error(error, "Unable to get CL_DEVICE_MAX_MEM_ALLOC_SIZE."); /* Get the max image array width */ - error = clGetDeviceInfo( deviceID, CL_DEVICE_IMAGE_MAX_BUFFER_SIZE, sizeof( maxDimensionPixels ), &maxDimensionPixels, NULL ); - test_error( error, "Unable to get max image buffer size from device" ); + error = + clGetDeviceInfo(deviceID, CL_DEVICE_IMAGE_MAX_BUFFER_SIZE, + sizeof(maxDimensionPixels), &maxDimensionPixels, NULL); + test_error(error, "Unable to get max image buffer size from device"); - if( maxDimensionPixels < minRequiredDimension ) + if (maxDimensionPixels < minRequiredDimension) { - log_error( "ERROR: Reported max image buffer size is less than required! (%d)\n", (int)maxDimensionPixels ); + log_error("ERROR: Reported max image buffer size is less than " + "required! (%d)\n", + (int)maxDimensionPixels); return -1; } - log_info("Max reported image buffer size is %ld pixels.\n", maxDimensionPixels); + log_info("Max reported image buffer size is %ld pixels.\n", + maxDimensionPixels); pixelBytes = maxAllocSize / maxDimensionPixels; - if ( pixelBytes == 0 ) + if (pixelBytes == 0) { - log_error( "Value of CL_DEVICE_IMAGE_MAX_BUFFER_SIZE is greater than CL_MAX_MEM_ALLOC_SIZE so there is no way to allocate image of maximum size!\n" ); + log_error("Value of CL_DEVICE_IMAGE_MAX_BUFFER_SIZE is greater than " + "CL_MAX_MEM_ALLOC_SIZE so there is no way to allocate image " + "of maximum size!\n"); return -1; } error = -1; - for ( i = pixelBytes; i > 0; --i ) + for (i = pixelBytes; i > 0; --i) { - error = get_8_bit_image_format( context, CL_MEM_OBJECT_IMAGE1D, CL_MEM_READ_ONLY, i, &image_format_desc ); - if ( error == CL_SUCCESS ) + error = get_8_bit_image_format(context, CL_MEM_OBJECT_IMAGE1D, + CL_MEM_READ_ONLY, i, &image_format_desc); + if (error == CL_SUCCESS) { pixelBytes = i; break; } } - test_error( error, "Device does not support format to be used to allocate image of CL_DEVICE_IMAGE_MAX_BUFFER_SIZE\n" ); + test_error(error, + "Device does not support format to be used to allocate image of " + "CL_DEVICE_IMAGE_MAX_BUFFER_SIZE\n"); - log_info("Attempting to create an 1D image with channel order %s from buffer of size %d = %gMB.\n", - GetChannelOrderName( image_format_desc.image_channel_order ), (int)maxDimensionPixels, ((float)maxDimensionPixels*pixelBytes/1024.0/1024.0)); + log_info("Attempting to create an 1D image with channel order %s from " + "buffer of size %d = %gMB.\n", + GetChannelOrderName(image_format_desc.image_channel_order), + (int)maxDimensionPixels, + ((float)maxDimensionPixels * pixelBytes / 1024.0 / 1024.0)); /* Try to allocate a buffer */ - streams[0] = clCreateBuffer( context, CL_MEM_READ_ONLY, maxDimensionPixels*pixelBytes, NULL, &error ); - if( ( streams[0] == NULL ) || ( error != CL_SUCCESS )) + streams[0] = clCreateBuffer(context, CL_MEM_READ_ONLY, + maxDimensionPixels * pixelBytes, NULL, &error); + if ((streams[0] == NULL) || (error != CL_SUCCESS)) { - print_error( error, "Buffer creation failed for maximum image buffer size" ); + print_error(error, + "Buffer creation failed for maximum image buffer size"); return -1; } /* Try to allocate a 1D image array from buffer */ - streams[1] = create_image_1d( context, CL_MEM_READ_ONLY, &image_format_desc, maxDimensionPixels, 0, NULL, streams[0], &error ); - if( ( streams[0] == NULL ) || ( error != CL_SUCCESS )) - { - print_error( error, "1D Image from buffer creation failed for maximum image buffer size" ); + streams[1] = + create_image_1d(context, CL_MEM_READ_ONLY, &image_format_desc, + maxDimensionPixels, 0, NULL, streams[0], &error); + if ((streams[0] == NULL) || (error != CL_SUCCESS)) + { + print_error(error, + "1D Image from buffer creation failed for maximum image " + "buffer size"); return -1; } @@ -980,8 +1193,8 @@ int test_min_max_image_buffer_size(cl_device_id deviceID, cl_context context, cl } - -int test_min_max_parameter_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_min_max_parameter_size(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { int error, retVal, i; size_t maxSize; @@ -1000,62 +1213,78 @@ int test_min_max_parameter_size(cl_device_id deviceID, cl_context context, cl_co /* Get the max param size */ - error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_PARAMETER_SIZE, sizeof( maxSize ), &maxSize, NULL ); - test_error( error, "Unable to get max parameter size from device" ); + error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_PARAMETER_SIZE, + sizeof(maxSize), &maxSize, NULL); + test_error(error, "Unable to get max parameter size from device"); - if( ((!gIsEmbedded) && (maxSize < 1024)) || ((gIsEmbedded) && (maxSize < 256)) ) + if (((!gIsEmbedded) && (maxSize < 1024)) + || ((gIsEmbedded) && (maxSize < 256))) { - log_error( "ERROR: Reported max parameter size is less than required! (%d)\n", (int)maxSize ); + log_error( + "ERROR: Reported max parameter size is less than required! (%d)\n", + (int)maxSize); return -1; } /* The embedded profile without cles_khr_int64 extension does not require * longs, so use ints */ if (embeddedNoLong) - numberOfIntParametersToTry = numberExpected = (maxSize-sizeof(cl_mem))/sizeof(cl_int); + numberOfIntParametersToTry = numberExpected = + (maxSize - sizeof(cl_mem)) / sizeof(cl_int); else - numberOfIntParametersToTry = numberExpected = (maxSize-sizeof(cl_mem))/sizeof(cl_long); + numberOfIntParametersToTry = numberExpected = + (maxSize - sizeof(cl_mem)) / sizeof(cl_long); - decrement = (size_t)(numberOfIntParametersToTry/8); - if (decrement < 1) - decrement = 1; + decrement = (size_t)(numberOfIntParametersToTry / 8); + if (decrement < 1) decrement = 1; log_info("Reported max parameter size of %d bytes.\n", (int)maxSize); - while (numberOfIntParametersToTry > 0) { - // These need to be inside to be deallocated automatically on each loop iteration. + while (numberOfIntParametersToTry > 0) + { + // These need to be inside to be deallocated automatically on each loop + // iteration. clProgramWrapper program; clMemWrapper mem; clKernelWrapper kernel; if (embeddedNoLong) { - log_info("Trying a kernel with %ld int arguments (%ld bytes) and one cl_mem (%ld bytes) for %ld bytes total.\n", - numberOfIntParametersToTry, sizeof(cl_int)*numberOfIntParametersToTry, sizeof(cl_mem), - sizeof(cl_mem)+numberOfIntParametersToTry*sizeof(cl_int)); + log_info( + "Trying a kernel with %ld int arguments (%ld bytes) and one " + "cl_mem (%ld bytes) for %ld bytes total.\n", + numberOfIntParametersToTry, + sizeof(cl_int) * numberOfIntParametersToTry, sizeof(cl_mem), + sizeof(cl_mem) + numberOfIntParametersToTry * sizeof(cl_int)); } else { - log_info("Trying a kernel with %ld long arguments (%ld bytes) and one cl_mem (%ld bytes) for %ld bytes total.\n", - numberOfIntParametersToTry, sizeof(cl_long)*numberOfIntParametersToTry, sizeof(cl_mem), - sizeof(cl_mem)+numberOfIntParametersToTry*sizeof(cl_long)); + log_info( + "Trying a kernel with %ld long arguments (%ld bytes) and one " + "cl_mem (%ld bytes) for %ld bytes total.\n", + numberOfIntParametersToTry, + sizeof(cl_long) * numberOfIntParametersToTry, sizeof(cl_mem), + sizeof(cl_mem) + numberOfIntParametersToTry * sizeof(cl_long)); } // Allocate memory for the program storage - data = malloc(sizeof(cl_long)*numberOfIntParametersToTry); - - argumentLine = (char*)malloc(sizeof(char)*numberOfIntParametersToTry*32); - codeLines = (char*)malloc(sizeof(char)*numberOfIntParametersToTry*32); - programSrc = (char*)malloc(sizeof(char)*(numberOfIntParametersToTry*64+1024)); + data = malloc(sizeof(cl_long) * numberOfIntParametersToTry); + + argumentLine = + (char *)malloc(sizeof(char) * numberOfIntParametersToTry * 32); + codeLines = + (char *)malloc(sizeof(char) * numberOfIntParametersToTry * 32); + programSrc = (char *)malloc(sizeof(char) + * (numberOfIntParametersToTry * 64 + 1024)); argumentLine[0] = '\0'; codeLines[0] = '\0'; programSrc[0] = '\0'; // Generate our results expectedResult = 0; - for (i=0; i<(int)numberOfIntParametersToTry; i++) - { - if( gHasLong ) + for (i = 0; i < (int)numberOfIntParametersToTry; i++) + { + if (gHasLong) { ((cl_long *)data)[i] = i; expectedResult += i; @@ -1068,30 +1297,35 @@ int test_min_max_parameter_size(cl_device_id deviceID, cl_context context, cl_co } // Build the program - if( gHasLong) + if (gHasLong) sprintf(argumentLine, "%s", "long arg0"); else sprintf(argumentLine, "%s", "int arg0"); sprintf(codeLines, "%s", "result[0] += arg0;"); - for (i=1; i<(int)numberOfIntParametersToTry; i++) + for (i = 1; i < (int)numberOfIntParametersToTry; i++) { - if( gHasLong) - sprintf(argumentLine + strlen( argumentLine), ", long arg%d", i); + if (gHasLong) + sprintf(argumentLine + strlen(argumentLine), ", long arg%d", i); else - sprintf(argumentLine + strlen( argumentLine), ", int arg%d", i); + sprintf(argumentLine + strlen(argumentLine), ", int arg%d", i); - sprintf(codeLines + strlen( codeLines), "\nresult[0] += arg%d;", i); + sprintf(codeLines + strlen(codeLines), "\nresult[0] += arg%d;", i); } /* Create a kernel to test with */ - sprintf( programSrc, gHasLong ? sample_large_parmam_kernel_pattern[0]: - sample_large_int_parmam_kernel_pattern[0], argumentLine, codeLines); + sprintf(programSrc, + gHasLong ? sample_large_parmam_kernel_pattern[0] + : sample_large_int_parmam_kernel_pattern[0], + argumentLine, codeLines); ptr = programSrc; - if( create_single_kernel_helper( context, &program, &kernel, 1, (const char **)&ptr, "sample_test" ) != 0 ) + if (create_single_kernel_helper(context, &program, &kernel, 1, + (const char **)&ptr, "sample_test") + != 0) { - log_info("Create program failed, decrementing number of parameters to try.\n"); + log_info("Create program failed, decrementing number of parameters " + "to try.\n"); numberOfIntParametersToTry -= decrement; continue; } @@ -1103,88 +1337,119 @@ int test_min_max_parameter_size(cl_device_id deviceID, cl_context context, cl_co &error); test_error(error, "clCreateBuffer failed"); - for (i=0; i<(int)numberOfIntParametersToTry; i++) { - if(gHasLong) - error = clSetKernelArg(kernel, i, sizeof(cl_long), &(((cl_long*)data)[i])); + for (i = 0; i < (int)numberOfIntParametersToTry; i++) + { + if (gHasLong) + error = clSetKernelArg(kernel, i, sizeof(cl_long), + &(((cl_long *)data)[i])); else - error = clSetKernelArg(kernel, i, sizeof(cl_int), &(((cl_int*)data)[i])); + error = clSetKernelArg(kernel, i, sizeof(cl_int), + &(((cl_int *)data)[i])); - if (error != CL_SUCCESS) { - log_info( "clSetKernelArg failed (%s), decrementing number of parameters to try.\n", IGetErrorString(error)); + if (error != CL_SUCCESS) + { + log_info("clSetKernelArg failed (%s), decrementing number of " + "parameters to try.\n", + IGetErrorString(error)); numberOfIntParametersToTry -= decrement; break; } } - if (error != CL_SUCCESS) - continue; + if (error != CL_SUCCESS) continue; error = clSetKernelArg(kernel, i, sizeof(cl_mem), &mem); - if (error != CL_SUCCESS) { - log_info( "clSetKernelArg failed (%s), decrementing number of parameters to try.\n", IGetErrorString(error)); + if (error != CL_SUCCESS) + { + log_info("clSetKernelArg failed (%s), decrementing number of " + "parameters to try.\n", + IGetErrorString(error)); numberOfIntParametersToTry -= decrement; continue; } - size_t globalDim[3]={1,1,1}, localDim[3]={1,1,1}; - error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, globalDim, localDim, 0, NULL, &event); - if (error != CL_SUCCESS) { - log_info( "clEnqueueNDRangeKernel failed (%s), decrementing number of parameters to try.\n", IGetErrorString(error)); + size_t globalDim[3] = { 1, 1, 1 }, localDim[3] = { 1, 1, 1 }; + error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, globalDim, + localDim, 0, NULL, &event); + if (error != CL_SUCCESS) + { + log_info("clEnqueueNDRangeKernel failed (%s), decrementing number " + "of parameters to try.\n", + IGetErrorString(error)); numberOfIntParametersToTry -= decrement; continue; } // Verify that the event does not return an error from the execution error = clWaitForEvents(1, &event); - test_error( error, "clWaitForEvent failed"); - error = clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(event_status), &event_status, NULL); - test_error( error, "clGetEventInfo for CL_EVENT_COMMAND_EXECUTION_STATUS failed"); + test_error(error, "clWaitForEvent failed"); + error = clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS, + sizeof(event_status), &event_status, NULL); + test_error( + error, + "clGetEventInfo for CL_EVENT_COMMAND_EXECUTION_STATUS failed"); clReleaseEvent(event); if (event_status < 0) test_error(error, "Kernel execution event returned error"); - if(gHasLong) - error = clEnqueueReadBuffer(queue, mem, CL_TRUE, 0, sizeof(cl_long), &long_result, 0, NULL, NULL); + if (gHasLong) + error = clEnqueueReadBuffer(queue, mem, CL_TRUE, 0, sizeof(cl_long), + &long_result, 0, NULL, NULL); else - error = clEnqueueReadBuffer(queue, mem, CL_TRUE, 0, sizeof(cl_int), &int_result, 0, NULL, NULL); + error = clEnqueueReadBuffer(queue, mem, CL_TRUE, 0, sizeof(cl_int), + &int_result, 0, NULL, NULL); test_error(error, "clEnqueueReadBuffer failed") - free(data); + free(data); free(argumentLine); free(codeLines); free(programSrc); - if(gHasLong) + if (gHasLong) { - if (long_result != expectedResult) { - log_error("Expected result (%lld) does not equal actual result (%lld).\n", expectedResult, long_result); + if (long_result != expectedResult) + { + log_error("Expected result (%lld) does not equal actual result " + "(%lld).\n", + expectedResult, long_result); numberOfIntParametersToTry -= decrement; continue; - } else { - log_info("Results verified at %ld bytes of arguments.\n", sizeof(cl_mem)+numberOfIntParametersToTry*sizeof(cl_long)); + } + else + { + log_info("Results verified at %ld bytes of arguments.\n", + sizeof(cl_mem) + + numberOfIntParametersToTry * sizeof(cl_long)); break; } } else { - if (int_result != expectedResult) { - log_error("Expected result (%lld) does not equal actual result (%d).\n", expectedResult, int_result); + if (int_result != expectedResult) + { + log_error("Expected result (%lld) does not equal actual result " + "(%d).\n", + expectedResult, int_result); numberOfIntParametersToTry -= decrement; continue; - } else { - log_info("Results verified at %ld bytes of arguments.\n", sizeof(cl_mem)+numberOfIntParametersToTry*sizeof(cl_int)); + } + else + { + log_info("Results verified at %ld bytes of arguments.\n", + sizeof(cl_mem) + + numberOfIntParametersToTry * sizeof(cl_int)); break; } } } - if (numberOfIntParametersToTry == (long)numberExpected) - return 0; + if (numberOfIntParametersToTry == (long)numberExpected) return 0; return -1; } -int test_min_max_samplers(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_min_max_samplers(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { int error; cl_uint maxSamplers, i; @@ -1197,104 +1462,124 @@ int test_min_max_samplers(cl_device_id deviceID, cl_context context, cl_command_ cl_uint minRequiredSamplers = gIsEmbedded ? 8 : 16; - PASSIVE_REQUIRE_IMAGE_SUPPORT( deviceID ) + PASSIVE_REQUIRE_IMAGE_SUPPORT(deviceID) /* Get the max value */ - error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_SAMPLERS, sizeof( maxSamplers ), &maxSamplers, NULL ); - test_error( error, "Unable to get max sampler count from device" ); + error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_SAMPLERS, + sizeof(maxSamplers), &maxSamplers, NULL); + test_error(error, "Unable to get max sampler count from device"); - if( maxSamplers < minRequiredSamplers ) + if (maxSamplers < minRequiredSamplers) { - log_error( "ERROR: Reported max sampler count is less than required! (%d)\n", (int)maxSamplers ); + log_error( + "ERROR: Reported max sampler count is less than required! (%d)\n", + (int)maxSamplers); return -1; } log_info("Reported max %d samplers.\n", maxSamplers); - error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_PARAMETER_SIZE, sizeof( maxParameterSize ), &maxParameterSize, NULL ); - test_error( error, "Unable to get max parameter size from device" ); + error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_PARAMETER_SIZE, + sizeof(maxParameterSize), &maxParameterSize, NULL); + test_error(error, "Unable to get max parameter size from device"); // Subtract the size of the result - maxParameterSize -= 2*sizeof(cl_mem); + maxParameterSize -= 2 * sizeof(cl_mem); // Calculate the number we can use - if (maxParameterSize/sizeof(cl_sampler) < maxSamplers) { - log_info("WARNING: Max parameter size of %d bytes limits test to %d max sampler arguments.\n", (int)maxParameterSize, (int)(maxParameterSize/sizeof(cl_sampler))); - maxSamplers = (unsigned int)(maxParameterSize/sizeof(cl_sampler)); + if (maxParameterSize / sizeof(cl_sampler) < maxSamplers) + { + log_info("WARNING: Max parameter size of %d bytes limits test to %d " + "max sampler arguments.\n", + (int)maxParameterSize, + (int)(maxParameterSize / sizeof(cl_sampler))); + maxSamplers = (unsigned int)(maxParameterSize / sizeof(cl_sampler)); } /* Create a kernel to test with */ - programSrc = (char *)malloc( ( strlen( sample_sampler_kernel_pattern[ 1 ] ) + 8 ) * ( maxSamplers ) + - strlen( sample_sampler_kernel_pattern[ 0 ] ) + strlen( sample_sampler_kernel_pattern[ 2 ] ) + - ( strlen( sample_sampler_kernel_pattern[ 3 ] ) + 8 ) * maxSamplers + - strlen( sample_sampler_kernel_pattern[ 4 ] ) ); - strcpy( programSrc, sample_sampler_kernel_pattern[ 0 ] ); - for( i = 0; i < maxSamplers; i++ ) + programSrc = (char *)malloc( + (strlen(sample_sampler_kernel_pattern[1]) + 8) * (maxSamplers) + + strlen(sample_sampler_kernel_pattern[0]) + + strlen(sample_sampler_kernel_pattern[2]) + + (strlen(sample_sampler_kernel_pattern[3]) + 8) * maxSamplers + + strlen(sample_sampler_kernel_pattern[4])); + strcpy(programSrc, sample_sampler_kernel_pattern[0]); + for (i = 0; i < maxSamplers; i++) { - sprintf( samplerLine, sample_sampler_kernel_pattern[ 1 ], i ); - strcat( programSrc, samplerLine ); + sprintf(samplerLine, sample_sampler_kernel_pattern[1], i); + strcat(programSrc, samplerLine); } - strcat( programSrc, sample_sampler_kernel_pattern[ 2 ] ); - for( i = 0; i < maxSamplers; i++ ) + strcat(programSrc, sample_sampler_kernel_pattern[2]); + for (i = 0; i < maxSamplers; i++) { - sprintf( samplerLine, sample_sampler_kernel_pattern[ 3 ], i ); - strcat( programSrc, samplerLine ); + sprintf(samplerLine, sample_sampler_kernel_pattern[3], i); + strcat(programSrc, samplerLine); } - strcat( programSrc, sample_sampler_kernel_pattern[ 4 ] ); + strcat(programSrc, sample_sampler_kernel_pattern[4]); - error = create_single_kernel_helper(context, &program, &kernel, 1, (const char **)&programSrc, "sample_test"); - test_error( error, "Failed to create the program and kernel."); + error = + create_single_kernel_helper(context, &program, &kernel, 1, + (const char **)&programSrc, "sample_test"); + test_error(error, "Failed to create the program and kernel."); // We have to set up some fake parameters so it'll work clSamplerWrapper *samplers = new clSamplerWrapper[maxSamplers]; cl_image_format format = { CL_RGBA, CL_SIGNED_INT8 }; - clMemWrapper image = create_image_2d( context, CL_MEM_READ_WRITE, &format, 16, 16, 0, NULL, &error ); - test_error( error, "Unable to create a test image" ); + clMemWrapper image = create_image_2d(context, CL_MEM_READ_WRITE, &format, + 16, 16, 0, NULL, &error); + test_error(error, "Unable to create a test image"); clMemWrapper stream = clCreateBuffer(context, CL_MEM_READ_WRITE, 16, NULL, &error); - test_error( error, "Unable to create test buffer" ); + test_error(error, "Unable to create test buffer"); - error = clSetKernelArg( kernel, 0, sizeof( cl_mem ), &image ); - error |= clSetKernelArg( kernel, 1, sizeof( cl_mem ), &stream ); - test_error( error, "Unable to set kernel arguments" ); - for( i = 0; i < maxSamplers; i++ ) + error = clSetKernelArg(kernel, 0, sizeof(cl_mem), &image); + error |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &stream); + test_error(error, "Unable to set kernel arguments"); + for (i = 0; i < maxSamplers; i++) { - samplers[ i ] = clCreateSampler( context, CL_FALSE, CL_ADDRESS_NONE, CL_FILTER_NEAREST, &error ); - test_error( error, "Unable to create sampler" ); + samplers[i] = clCreateSampler(context, CL_FALSE, CL_ADDRESS_NONE, + CL_FILTER_NEAREST, &error); + test_error(error, "Unable to create sampler"); - error = clSetKernelArg( kernel, 2 + i, sizeof( cl_sampler ), &samplers[ i ] ); - test_error( error, "Unable to set sampler argument" ); + error = clSetKernelArg(kernel, 2 + i, sizeof(cl_sampler), &samplers[i]); + test_error(error, "Unable to set sampler argument"); } - size_t globalDim[3]={1,1,1}, localDim[3]={1,1,1}; - error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, globalDim, localDim, 0, NULL, &event); - test_error(error, "clEnqueueNDRangeKernel failed with maximum number of samplers."); + size_t globalDim[3] = { 1, 1, 1 }, localDim[3] = { 1, 1, 1 }; + error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, globalDim, localDim, + 0, NULL, &event); + test_error( + error, + "clEnqueueNDRangeKernel failed with maximum number of samplers."); // Verify that the event does not return an error from the execution error = clWaitForEvents(1, &event); - test_error( error, "clWaitForEvent failed"); - error = clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(event_status), &event_status, NULL); - test_error( error, "clGetEventInfo for CL_EVENT_COMMAND_EXECUTION_STATUS failed"); + test_error(error, "clWaitForEvent failed"); + error = clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS, + sizeof(event_status), &event_status, NULL); + test_error(error, + "clGetEventInfo for CL_EVENT_COMMAND_EXECUTION_STATUS failed"); clReleaseEvent(event); if (event_status < 0) test_error(error, "Kernel execution event returned error"); - free( programSrc ); + free(programSrc); delete[] samplers; return 0; } #define PASSING_FRACTION 4 -int test_min_max_constant_buffer_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_min_max_constant_buffer_size(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { int error; clProgramWrapper program; clKernelWrapper kernel; - size_t threads[1], localThreads[1]; + size_t threads[1], localThreads[1]; cl_int *constantData, *resultData; cl_ulong maxSize, stepSize, currentSize, maxGlobalSize, maxAllocSize; int i; @@ -1303,48 +1588,56 @@ int test_min_max_constant_buffer_size(cl_device_id deviceID, cl_context context, MTdata d; /* Verify our test buffer won't be bigger than allowed */ - error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, sizeof( maxSize ), &maxSize, 0 ); - test_error( error, "Unable to get max constant buffer size" ); + error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, + sizeof(maxSize), &maxSize, 0); + test_error(error, "Unable to get max constant buffer size"); - if( ( 0 == gIsEmbedded && maxSize < 64L * 1024L ) || maxSize < 1L * 1024L ) + if ((0 == gIsEmbedded && maxSize < 64L * 1024L) || maxSize < 1L * 1024L) { - log_error( "ERROR: Reported max constant buffer size less than required by OpenCL 1.0 (reported %d KB)\n", (int)( maxSize / 1024L ) ); + log_error("ERROR: Reported max constant buffer size less than required " + "by OpenCL 1.0 (reported %d KB)\n", + (int)(maxSize / 1024L)); return -1; } log_info("Reported max constant buffer size of %lld bytes.\n", maxSize); // Limit test buffer size to 1/8 of CL_DEVICE_GLOBAL_MEM_SIZE - error = clGetDeviceInfo(deviceID, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(maxGlobalSize), &maxGlobalSize, 0); + error = clGetDeviceInfo(deviceID, CL_DEVICE_GLOBAL_MEM_SIZE, + sizeof(maxGlobalSize), &maxGlobalSize, 0); test_error(error, "Unable to get CL_DEVICE_GLOBAL_MEM_SIZE"); - if (maxSize > maxGlobalSize / 8) - maxSize = maxGlobalSize / 8; + if (maxSize > maxGlobalSize / 8) maxSize = maxGlobalSize / 8; - error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_MEM_ALLOC_SIZE , sizeof(maxAllocSize), &maxAllocSize, 0); + error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_MEM_ALLOC_SIZE, + sizeof(maxAllocSize), &maxAllocSize, 0); test_error(error, "Unable to get CL_DEVICE_MAX_MEM_ALLOC_SIZE "); - - if (maxSize > maxAllocSize) - maxSize = maxAllocSize; - + + if (maxSize > maxAllocSize) maxSize = maxAllocSize; + /* Create a kernel to test with */ - if( create_single_kernel_helper( context, &program, &kernel, 1, sample_const_arg_kernel, "sample_test" ) != 0 ) + if (create_single_kernel_helper(context, &program, &kernel, 1, + sample_const_arg_kernel, "sample_test") + != 0) { return -1; } /* Try the returned max size and decrease it until we get one that works. */ - stepSize = maxSize/16; + stepSize = maxSize / 16; currentSize = maxSize; int allocPassed = 0; - d = init_genrand( gRandomSeed ); - while (!allocPassed && currentSize >= maxSize/PASSING_FRACTION) { - log_info("Attempting to allocate constant buffer of size %lld bytes\n", maxSize); + d = init_genrand(gRandomSeed); + while (!allocPassed && currentSize >= maxSize / PASSING_FRACTION) + { + log_info("Attempting to allocate constant buffer of size %lld bytes\n", + maxSize); /* Create some I/O streams */ - size_t sizeToAllocate = ((size_t)currentSize/sizeof( cl_int ))*sizeof(cl_int); - size_t numberOfInts = sizeToAllocate/sizeof(cl_int); - constantData = (cl_int *)malloc( sizeToAllocate); + size_t sizeToAllocate = + ((size_t)currentSize / sizeof(cl_int)) * sizeof(cl_int); + size_t numberOfInts = sizeToAllocate / sizeof(cl_int); + constantData = (cl_int *)malloc(sizeToAllocate); if (constantData == NULL) { log_error("Failed to allocate memory for constantData!\n"); @@ -1352,53 +1645,74 @@ int test_min_max_constant_buffer_size(cl_device_id deviceID, cl_context context, return EXIT_FAILURE; } - for(i=0; i<(int)(numberOfInts); i++) + for (i = 0; i < (int)(numberOfInts); i++) constantData[i] = (int)genrand_int32(d); clMemWrapper streams[3]; streams[0] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, sizeToAllocate, constantData, &error); - test_error( error, "Creating test array failed" ); + test_error(error, "Creating test array failed"); streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeToAllocate, NULL, &error); - test_error( error, "Creating test array failed" ); + test_error(error, "Creating test array failed"); /* Set the arguments */ - error = clSetKernelArg(kernel, 0, sizeof( streams[0] ), &streams[0]); - test_error( error, "Unable to set indexed kernel arguments" ); - error = clSetKernelArg(kernel, 1, sizeof( streams[1] ), &streams[1]); - test_error( error, "Unable to set indexed kernel arguments" ); + error = clSetKernelArg(kernel, 0, sizeof(streams[0]), &streams[0]); + test_error(error, "Unable to set indexed kernel arguments"); + error = clSetKernelArg(kernel, 1, sizeof(streams[1]), &streams[1]); + test_error(error, "Unable to set indexed kernel arguments"); /* Test running the kernel and verifying it */ threads[0] = numberOfInts; localThreads[0] = 1; - log_info("Filling constant buffer with %d cl_ints (%d bytes).\n", (int)threads[0], (int)(threads[0]*sizeof(cl_int))); - - error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, &event ); - /* If we failed due to a resource issue, reduce the size and try again. */ - if ((error == CL_OUT_OF_RESOURCES) || (error == CL_MEM_OBJECT_ALLOCATION_FAILURE) || (error == CL_OUT_OF_HOST_MEMORY)) { - log_info("Kernel enqueue failed at size %lld, trying at a reduced size.\n", currentSize); + log_info("Filling constant buffer with %d cl_ints (%d bytes).\n", + (int)threads[0], (int)(threads[0] * sizeof(cl_int))); + + error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threads, + localThreads, 0, NULL, &event); + /* If we failed due to a resource issue, reduce the size and try again. + */ + if ((error == CL_OUT_OF_RESOURCES) + || (error == CL_MEM_OBJECT_ALLOCATION_FAILURE) + || (error == CL_OUT_OF_HOST_MEMORY)) + { + log_info("Kernel enqueue failed at size %lld, trying at a reduced " + "size.\n", + currentSize); currentSize -= stepSize; free(constantData); continue; } - test_error( error, "clEnqueueNDRangeKernel with maximum constant buffer size failed."); + test_error( + error, + "clEnqueueNDRangeKernel with maximum constant buffer size failed."); // Verify that the event does not return an error from the execution error = clWaitForEvents(1, &event); - test_error( error, "clWaitForEvent failed"); - error = clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(event_status), &event_status, NULL); - test_error( error, "clGetEventInfo for CL_EVENT_COMMAND_EXECUTION_STATUS failed"); + test_error(error, "clWaitForEvent failed"); + error = clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS, + sizeof(event_status), &event_status, NULL); + test_error( + error, + "clGetEventInfo for CL_EVENT_COMMAND_EXECUTION_STATUS failed"); clReleaseEvent(event); - if (event_status < 0) { - if ((event_status == CL_OUT_OF_RESOURCES) || (event_status == CL_MEM_OBJECT_ALLOCATION_FAILURE) || (event_status == CL_OUT_OF_HOST_MEMORY)) { - log_info("Kernel event indicates failure at size %lld, trying at a reduced size.\n", currentSize); + if (event_status < 0) + { + if ((event_status == CL_OUT_OF_RESOURCES) + || (event_status == CL_MEM_OBJECT_ALLOCATION_FAILURE) + || (event_status == CL_OUT_OF_HOST_MEMORY)) + { + log_info("Kernel event indicates failure at size %lld, trying " + "at a reduced size.\n", + currentSize); currentSize -= stepSize; free(constantData); continue; - } else { + } + else + { test_error(error, "Kernel execution event returned error"); } } @@ -1415,30 +1729,41 @@ int test_min_max_constant_buffer_size(cl_device_id deviceID, cl_context context, return EXIT_FAILURE; } - error = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, sizeToAllocate, resultData, 0, NULL, NULL); - test_error( error, "clEnqueueReadBuffer failed"); + error = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, + sizeToAllocate, resultData, 0, NULL, NULL); + test_error(error, "clEnqueueReadBuffer failed"); - for(i=0; i<(int)(numberOfInts); i++) - if (constantData[i] != resultData[i]) { - log_error("Data failed to verify: constantData[%d]=%d != resultData[%d]=%d\n", + for (i = 0; i < (int)(numberOfInts); i++) + if (constantData[i] != resultData[i]) + { + log_error("Data failed to verify: constantData[%d]=%d != " + "resultData[%d]=%d\n", i, constantData[i], i, resultData[i]); - free( constantData ); + free(constantData); free(resultData); - free_mtdata(d); d = NULL; + free_mtdata(d); + d = NULL; return -1; } - free( constantData ); + free(constantData); free(resultData); } - free_mtdata(d); d = NULL; + free_mtdata(d); + d = NULL; - if (allocPassed) { - if (currentSize < maxSize/PASSING_FRACTION) { - log_error("Failed to allocate at least 1/8 of the reported constant size.\n"); + if (allocPassed) + { + if (currentSize < maxSize / PASSING_FRACTION) + { + log_error("Failed to allocate at least 1/8 of the reported " + "constant size.\n"); return -1; - } else if (currentSize != maxSize) { - log_info("Passed at reduced size. (%lld of %lld bytes)\n", currentSize, maxSize); + } + else if (currentSize != maxSize) + { + log_info("Passed at reduced size. (%lld of %lld bytes)\n", + currentSize, maxSize); return 0; } return 0; @@ -1446,13 +1771,14 @@ int test_min_max_constant_buffer_size(cl_device_id deviceID, cl_context context, return -1; } -int test_min_max_constant_args(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_min_max_constant_args(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { int error; clProgramWrapper program; clKernelWrapper kernel; - clMemWrapper *streams; - size_t threads[1], localThreads[1]; + clMemWrapper *streams; + size_t threads[1], localThreads[1]; cl_uint i, maxArgs; cl_ulong maxSize; cl_ulong maxParameterSize; @@ -1465,119 +1791,145 @@ int test_min_max_constant_args(cl_device_id deviceID, cl_context context, cl_com /* Verify our test buffer won't be bigger than allowed */ - error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_CONSTANT_ARGS, sizeof( maxArgs ), &maxArgs, 0 ); - test_error( error, "Unable to get max constant arg count" ); + error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_CONSTANT_ARGS, + sizeof(maxArgs), &maxArgs, 0); + test_error(error, "Unable to get max constant arg count"); - error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_PARAMETER_SIZE, sizeof( maxParameterSize ), &maxParameterSize, NULL ); - test_error( error, "Unable to get max parameter size from device" ); + error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_PARAMETER_SIZE, + sizeof(maxParameterSize), &maxParameterSize, NULL); + test_error(error, "Unable to get max parameter size from device"); // Subtract the size of the result maxParameterSize -= sizeof(cl_mem); // Calculate the number we can use - if (maxParameterSize/sizeof(cl_mem) < maxArgs) { - log_info("WARNING: Max parameter size of %d bytes limits test to %d max image arguments.\n", (int)maxParameterSize, (int)(maxParameterSize/sizeof(cl_mem))); - maxArgs = (unsigned int)(maxParameterSize/sizeof(cl_mem)); + if (maxParameterSize / sizeof(cl_mem) < maxArgs) + { + log_info("WARNING: Max parameter size of %d bytes limits test to %d " + "max image arguments.\n", + (int)maxParameterSize, + (int)(maxParameterSize / sizeof(cl_mem))); + maxArgs = (unsigned int)(maxParameterSize / sizeof(cl_mem)); } - if( maxArgs < (gIsEmbedded ? 4 : 8) ) + if (maxArgs < (gIsEmbedded ? 4 : 8)) { - log_error( "ERROR: Reported max constant arg count less than required by OpenCL 1.0 (reported %d)\n", (int)maxArgs ); + log_error("ERROR: Reported max constant arg count less than required " + "by OpenCL 1.0 (reported %d)\n", + (int)maxArgs); return -1; } - error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, sizeof( maxSize ), &maxSize, 0 ); - test_error( error, "Unable to get max constant buffer size" ); + error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, + sizeof(maxSize), &maxSize, 0); + test_error(error, "Unable to get max constant buffer size"); individualBufferSize = (maxSize / 2) / maxArgs; - log_info("Reported max constant arg count of %d and max constant buffer size of %d. Test will attempt to allocate half of that, or %d buffers of size %d.\n", - (int)maxArgs, (int)maxSize, (int)maxArgs, (int)individualBufferSize); + log_info( + "Reported max constant arg count of %u and max constant buffer " + "size of %llu. Test will attempt to allocate half of that, or %llu " + "buffers of size %zu.\n", + maxArgs, maxSize, maxArgs, individualBufferSize); - str2 = (char*)malloc(sizeof(char)*32*(maxArgs+2)); - constArgs = (char*)malloc(sizeof(char)*32*(maxArgs+2)); - programSrc = (char*)malloc(sizeof(char)*32*2*(maxArgs+2)+1024); + str2 = (char *)malloc(sizeof(char) * 32 * (maxArgs + 2)); + constArgs = (char *)malloc(sizeof(char) * 32 * (maxArgs + 2)); + programSrc = (char *)malloc(sizeof(char) * 32 * 2 * (maxArgs + 2) + 1024); /* Create a test program */ constArgs[0] = 0; str2[0] = 0; - for( i = 0; i < maxArgs-1; i++ ) - { - sprintf( str, ", __constant int *src%d", (int)( i + 2 ) ); - strcat( constArgs, str ); - sprintf( str2 + strlen( str2), "\tdst[tid] += src%d[tid];\n", (int)(i+2)); - if (strlen(str2) > (sizeof(char)*32*(maxArgs+2)-32) || strlen(constArgs) > (sizeof(char)*32*(maxArgs+2)-32)) { - log_info("Limiting number of arguments tested to %d due to test program allocation size.\n", i); + for (i = 0; i < maxArgs - 1; i++) + { + sprintf(str, ", __constant int *src%d", (int)(i + 2)); + strcat(constArgs, str); + sprintf(str2 + strlen(str2), "\tdst[tid] += src%d[tid];\n", + (int)(i + 2)); + if (strlen(str2) > (sizeof(char) * 32 * (maxArgs + 2) - 32) + || strlen(constArgs) > (sizeof(char) * 32 * (maxArgs + 2) - 32)) + { + log_info("Limiting number of arguments tested to %d due to test " + "program allocation size.\n", + i); break; } } - sprintf( programSrc, sample_const_max_arg_kernel_pattern, constArgs, str2 ); + sprintf(programSrc, sample_const_max_arg_kernel_pattern, constArgs, str2); /* Create a kernel to test with */ ptr = programSrc; - if( create_single_kernel_helper( context, &program, &kernel, 1, &ptr, "sample_test" ) != 0 ) + if (create_single_kernel_helper(context, &program, &kernel, 1, &ptr, + "sample_test") + != 0) { return -1; } /* Create some I/O streams */ - streams = new clMemWrapper[ maxArgs + 1 ]; - for( i = 0; i < maxArgs + 1; i++ ) + streams = new clMemWrapper[maxArgs + 1]; + for (i = 0; i < maxArgs + 1; i++) { streams[i] = clCreateBuffer(context, CL_MEM_READ_WRITE, individualBufferSize, NULL, &error); - test_error( error, "Creating test array failed" ); + test_error(error, "Creating test array failed"); } /* Set the arguments */ - for( i = 0; i < maxArgs + 1; i++ ) + for (i = 0; i < maxArgs + 1; i++) { - error = clSetKernelArg(kernel, i, sizeof( streams[i] ), &streams[i]); - test_error( error, "Unable to set kernel argument" ); + error = clSetKernelArg(kernel, i, sizeof(streams[i]), &streams[i]); + test_error(error, "Unable to set kernel argument"); } /* Test running the kernel and verifying it */ threads[0] = (size_t)10; - while (threads[0]*sizeof(cl_int) > individualBufferSize) - threads[0]--; + while (threads[0] * sizeof(cl_int) > individualBufferSize) threads[0]--; - error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] ); - test_error( error, "Unable to get work group size to use" ); + error = get_max_common_work_group_size(context, kernel, threads[0], + &localThreads[0]); + test_error(error, "Unable to get work group size to use"); - error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, &event ); - test_error( error, "clEnqueueNDRangeKernel failed"); + error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threads, + localThreads, 0, NULL, &event); + test_error(error, "clEnqueueNDRangeKernel failed"); // Verify that the event does not return an error from the execution error = clWaitForEvents(1, &event); - test_error( error, "clWaitForEvent failed"); - error = clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(event_status), &event_status, NULL); - test_error( error, "clGetEventInfo for CL_EVENT_COMMAND_EXECUTION_STATUS failed"); + test_error(error, "clWaitForEvent failed"); + error = clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS, + sizeof(event_status), &event_status, NULL); + test_error(error, + "clGetEventInfo for CL_EVENT_COMMAND_EXECUTION_STATUS failed"); clReleaseEvent(event); if (event_status < 0) test_error(error, "Kernel execution event returned error"); error = clFinish(queue); - test_error( error, "clFinish failed."); + test_error(error, "clFinish failed."); - delete [] streams; + delete[] streams; free(str2); free(constArgs); free(programSrc); return 0; } -int test_min_max_compute_units(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_min_max_compute_units(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { int error; cl_uint value; - error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof( value ), &value, 0 ); - test_error( error, "Unable to get compute unit count" ); + error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_COMPUTE_UNITS, + sizeof(value), &value, 0); + test_error(error, "Unable to get compute unit count"); - if( value < 1 ) + if (value < 1) { - log_error( "ERROR: Reported compute unit count less than required by OpenCL 1.0 (reported %d)\n", (int)value ); + log_error("ERROR: Reported compute unit count less than required by " + "OpenCL 1.0 (reported %d)\n", + (int)value); return -1; } @@ -1586,18 +1938,22 @@ int test_min_max_compute_units(cl_device_id deviceID, cl_context context, cl_com return 0; } -int test_min_max_address_bits(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_min_max_address_bits(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { int error; cl_uint value; - error = clGetDeviceInfo( deviceID, CL_DEVICE_ADDRESS_BITS, sizeof( value ), &value, 0 ); - test_error( error, "Unable to get address bit count" ); + error = clGetDeviceInfo(deviceID, CL_DEVICE_ADDRESS_BITS, sizeof(value), + &value, 0); + test_error(error, "Unable to get address bit count"); - if( value != 32 && value != 64 ) + if (value != 32 && value != 64) { - log_error( "ERROR: Reported address bit count not valid by OpenCL 1.0 (reported %d)\n", (int)value ); + log_error("ERROR: Reported address bit count not valid by OpenCL 1.0 " + "(reported %d)\n", + (int)value); return -1; } @@ -1606,68 +1962,84 @@ int test_min_max_address_bits(cl_device_id deviceID, cl_context context, cl_comm return 0; } -int test_min_max_single_fp_config(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_min_max_single_fp_config(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { int error; cl_device_fp_config value; char profile[128] = ""; - error = clGetDeviceInfo( deviceID, CL_DEVICE_SINGLE_FP_CONFIG, sizeof( value ), &value, 0 ); - test_error( error, "Unable to get device single fp config" ); + error = clGetDeviceInfo(deviceID, CL_DEVICE_SINGLE_FP_CONFIG, sizeof(value), + &value, 0); + test_error(error, "Unable to get device single fp config"); - //Check to see if we are an embedded profile device - if((error = clGetDeviceInfo( deviceID, CL_DEVICE_PROFILE, sizeof(profile), profile, NULL ))) + // Check to see if we are an embedded profile device + if ((error = clGetDeviceInfo(deviceID, CL_DEVICE_PROFILE, sizeof(profile), + profile, NULL))) { - log_error( "FAILURE: Unable to get CL_DEVICE_PROFILE: error %d\n", error ); + log_error("FAILURE: Unable to get CL_DEVICE_PROFILE: error %d\n", + error); return error; } - if( 0 == strcmp( profile, "EMBEDDED_PROFILE" )) + if (0 == strcmp(profile, "EMBEDDED_PROFILE")) { // embedded device - if( 0 == (value & (CL_FP_ROUND_TO_NEAREST | CL_FP_ROUND_TO_ZERO))) + if (0 == (value & (CL_FP_ROUND_TO_NEAREST | CL_FP_ROUND_TO_ZERO))) { - log_error( "FAILURE: embedded device supports neither CL_FP_ROUND_TO_NEAREST or CL_FP_ROUND_TO_ZERO\n" ); + log_error("FAILURE: embedded device supports neither " + "CL_FP_ROUND_TO_NEAREST or CL_FP_ROUND_TO_ZERO\n"); return -1; } } else { // Full profile - if( ( value & ( CL_FP_ROUND_TO_NEAREST | CL_FP_INF_NAN )) != ( CL_FP_ROUND_TO_NEAREST | CL_FP_INF_NAN ) ) + if ((value & (CL_FP_ROUND_TO_NEAREST | CL_FP_INF_NAN)) + != (CL_FP_ROUND_TO_NEAREST | CL_FP_INF_NAN)) { - log_error( "ERROR: Reported single fp config doesn't meet minimum set by OpenCL 1.0 (reported 0x%08x)\n", (int)value ); + log_error("ERROR: Reported single fp config doesn't meet minimum " + "set by OpenCL 1.0 (reported 0x%08x)\n", + (int)value); return -1; } } return 0; } -int test_min_max_double_fp_config(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_min_max_double_fp_config(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { int error; cl_device_fp_config value; - error = clGetDeviceInfo( deviceID, CL_DEVICE_DOUBLE_FP_CONFIG, sizeof( value ), &value, 0 ); - test_error( error, "Unable to get device double fp config" ); + error = clGetDeviceInfo(deviceID, CL_DEVICE_DOUBLE_FP_CONFIG, sizeof(value), + &value, 0); + test_error(error, "Unable to get device double fp config"); - if (value == 0) - return 0; + if (value == 0) return 0; - if( ( value & (CL_FP_FMA | CL_FP_ROUND_TO_NEAREST | CL_FP_ROUND_TO_ZERO | CL_FP_ROUND_TO_INF | CL_FP_INF_NAN | CL_FP_DENORM)) != ( CL_FP_FMA | CL_FP_ROUND_TO_NEAREST | CL_FP_ROUND_TO_ZERO | CL_FP_ROUND_TO_INF | CL_FP_INF_NAN | CL_FP_DENORM) ) + if ((value + & (CL_FP_FMA | CL_FP_ROUND_TO_NEAREST | CL_FP_ROUND_TO_ZERO + | CL_FP_ROUND_TO_INF | CL_FP_INF_NAN | CL_FP_DENORM)) + != (CL_FP_FMA | CL_FP_ROUND_TO_NEAREST | CL_FP_ROUND_TO_ZERO + | CL_FP_ROUND_TO_INF | CL_FP_INF_NAN | CL_FP_DENORM)) { - log_error( "ERROR: Reported double fp config doesn't meet minimum set by OpenCL 1.0 (reported 0x%08x)\n", (int)value ); + log_error("ERROR: Reported double fp config doesn't meet minimum set " + "by OpenCL 1.0 (reported 0x%08x)\n", + (int)value); return -1; } return 0; } -int test_min_max_local_mem_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_min_max_local_mem_size(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { int error; clProgramWrapper program; clKernelWrapper kernel; - clMemWrapper streams[3]; - size_t threads[1], localThreads[1]; + clMemWrapper streams[3]; + size_t threads[1], localThreads[1]; cl_int *localData, *resultData; cl_ulong maxSize, kernelLocalUsage, min_max_local_mem_size; Version device_version; @@ -1676,8 +2048,9 @@ int test_min_max_local_mem_size(cl_device_id deviceID, cl_context context, cl_co MTdata d; /* Verify our test buffer won't be bigger than allowed */ - error = clGetDeviceInfo( deviceID, CL_DEVICE_LOCAL_MEM_SIZE, sizeof( maxSize ), &maxSize, 0 ); - test_error( error, "Unable to get max local buffer size" ); + error = clGetDeviceInfo(deviceID, CL_DEVICE_LOCAL_MEM_SIZE, sizeof(maxSize), + &maxSize, 0); + test_error(error, "Unable to get max local buffer size"); try { @@ -1709,65 +2082,80 @@ int test_min_max_local_mem_size(cl_device_id deviceID, cl_context context, cl_co return -1; } - log_info("Reported max local buffer size for device: %lld bytes.\n", maxSize); + log_info("Reported max local buffer size for device: %lld bytes.\n", + maxSize); /* Create a kernel to test with */ - if( create_single_kernel_helper( context, &program, &kernel, 1, sample_local_arg_kernel, "sample_test" ) != 0 ) + if (create_single_kernel_helper(context, &program, &kernel, 1, + sample_local_arg_kernel, "sample_test") + != 0) { return -1; } - error = clGetKernelWorkGroupInfo(kernel, deviceID, CL_KERNEL_LOCAL_MEM_SIZE, sizeof(kernelLocalUsage), &kernelLocalUsage, NULL); - test_error(error, "clGetKernelWorkGroupInfo for CL_KERNEL_LOCAL_MEM_SIZE failed"); + error = clGetKernelWorkGroupInfo(kernel, deviceID, CL_KERNEL_LOCAL_MEM_SIZE, + sizeof(kernelLocalUsage), + &kernelLocalUsage, NULL); + test_error(error, + "clGetKernelWorkGroupInfo for CL_KERNEL_LOCAL_MEM_SIZE failed"); - log_info("Reported local buffer usage for kernel (CL_KERNEL_LOCAL_MEM_SIZE): %lld bytes.\n", kernelLocalUsage); + log_info("Reported local buffer usage for kernel " + "(CL_KERNEL_LOCAL_MEM_SIZE): %lld bytes.\n", + kernelLocalUsage); /* Create some I/O streams */ - size_t sizeToAllocate = ((size_t)(maxSize-kernelLocalUsage)/sizeof( cl_int ))*sizeof(cl_int); - size_t numberOfInts = sizeToAllocate/sizeof(cl_int); + size_t sizeToAllocate = + ((size_t)(maxSize - kernelLocalUsage) / sizeof(cl_int)) + * sizeof(cl_int); + size_t numberOfInts = sizeToAllocate / sizeof(cl_int); - log_info("Attempting to use %lld bytes of local memory.\n", (cl_ulong)sizeToAllocate); + log_info("Attempting to use %zu bytes of local memory.\n", sizeToAllocate); - localData = (cl_int *)malloc( sizeToAllocate ); - d = init_genrand( gRandomSeed ); - for(i=0; i<(int)(numberOfInts); i++) + localData = (cl_int *)malloc(sizeToAllocate); + d = init_genrand(gRandomSeed); + for (i = 0; i < (int)(numberOfInts); i++) localData[i] = (int)genrand_int32(d); - free_mtdata(d); d = NULL; + free_mtdata(d); + d = NULL; streams[0] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, sizeToAllocate, localData, &error); - test_error( error, "Creating test array failed" ); + test_error(error, "Creating test array failed"); streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeToAllocate, NULL, &error); - test_error( error, "Creating test array failed" ); + test_error(error, "Creating test array failed"); /* Set the arguments */ error = clSetKernelArg(kernel, 0, sizeToAllocate, NULL); - test_error( error, "Unable to set indexed kernel arguments" ); - error = clSetKernelArg(kernel, 1, sizeof( streams[0] ), &streams[0]); - test_error( error, "Unable to set indexed kernel arguments" ); - error = clSetKernelArg(kernel, 2, sizeof( streams[1] ), &streams[1]); - test_error( error, "Unable to set indexed kernel arguments" ); + test_error(error, "Unable to set indexed kernel arguments"); + error = clSetKernelArg(kernel, 1, sizeof(streams[0]), &streams[0]); + test_error(error, "Unable to set indexed kernel arguments"); + error = clSetKernelArg(kernel, 2, sizeof(streams[1]), &streams[1]); + test_error(error, "Unable to set indexed kernel arguments"); /* Test running the kernel and verifying it */ threads[0] = numberOfInts; localThreads[0] = 1; - log_info("Creating local buffer with %d cl_ints (%d bytes).\n", (int)numberOfInts, (int)sizeToAllocate); + log_info("Creating local buffer with %zu cl_ints (%zu bytes).\n", + numberOfInts, sizeToAllocate); cl_event evt; - cl_int evt_err; - error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, &evt ); + cl_int evt_err; + error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threads, + localThreads, 0, NULL, &evt); test_error(error, "clEnqueueNDRangeKernel failed"); error = clFinish(queue); - test_error( error, "clFinish failed"); + test_error(error, "clFinish failed"); - error = clGetEventInfo(evt, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof evt_err, &evt_err, NULL); - test_error( error, "clGetEventInfo with maximum local buffer size failed."); + error = clGetEventInfo(evt, CL_EVENT_COMMAND_EXECUTION_STATUS, + sizeof evt_err, &evt_err, NULL); + test_error(error, "clGetEventInfo with maximum local buffer size failed."); - if (evt_err != CL_COMPLETE) { + if (evt_err != CL_COMPLETE) + { print_error(evt_err, "Kernel event returned error"); clReleaseEvent(evt); return -1; @@ -1775,95 +2163,118 @@ int test_min_max_local_mem_size(cl_device_id deviceID, cl_context context, cl_co resultData = (cl_int *)malloc(sizeToAllocate); - error = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, sizeToAllocate, resultData, 0, NULL, NULL); - test_error( error, "clEnqueueReadBuffer failed"); + error = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, sizeToAllocate, + resultData, 0, NULL, NULL); + test_error(error, "clEnqueueReadBuffer failed"); - for(i=0; i<(int)(numberOfInts); i++) - if (localData[i] != resultData[i]) { + for (i = 0; i < (int)(numberOfInts); i++) + if (localData[i] != resultData[i]) + { clReleaseEvent(evt); - free( localData ); + free(localData); free(resultData); log_error("Results failed to verify.\n"); return -1; } clReleaseEvent(evt); - free( localData ); + free(localData); free(resultData); return err; } -int test_min_max_kernel_preferred_work_group_size_multiple(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_min_max_kernel_preferred_work_group_size_multiple( + cl_device_id deviceID, cl_context context, cl_command_queue queue, + int num_elements) { - int err; + int err; clProgramWrapper program; clKernelWrapper kernel; size_t max_local_workgroup_size[3]; size_t max_workgroup_size = 0, preferred_workgroup_size = 0; - err = create_single_kernel_helper(context, &program, &kernel, 1, sample_local_arg_kernel, "sample_test" ); + err = create_single_kernel_helper(context, &program, &kernel, 1, + sample_local_arg_kernel, "sample_test"); test_error(err, "Failed to build kernel/program."); err = clGetKernelWorkGroupInfo(kernel, deviceID, CL_KERNEL_WORK_GROUP_SIZE, - sizeof(max_workgroup_size), &max_workgroup_size, NULL); + sizeof(max_workgroup_size), + &max_workgroup_size, NULL); test_error(err, "clGetKernelWorkgroupInfo failed."); - err = clGetKernelWorkGroupInfo(kernel, deviceID, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, - sizeof(preferred_workgroup_size), &preferred_workgroup_size, NULL); + err = clGetKernelWorkGroupInfo( + kernel, deviceID, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, + sizeof(preferred_workgroup_size), &preferred_workgroup_size, NULL); test_error(err, "clGetKernelWorkgroupInfo failed."); - err = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(max_local_workgroup_size), max_local_workgroup_size, NULL); + err = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_WORK_ITEM_SIZES, + sizeof(max_local_workgroup_size), + max_local_workgroup_size, NULL); test_error(err, "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_SIZES"); - // Since the preferred size is only a performance hint, we can only really check that we get a sane value - // back - log_info( "size: %ld preferred: %ld max: %ld\n", max_workgroup_size, preferred_workgroup_size, max_local_workgroup_size[0] ); + // Since the preferred size is only a performance hint, we can only really + // check that we get a sane value back + log_info("size: %ld preferred: %ld max: %ld\n", max_workgroup_size, + preferred_workgroup_size, max_local_workgroup_size[0]); - if( preferred_workgroup_size > max_workgroup_size ) + if (preferred_workgroup_size > max_workgroup_size) { - log_error( "ERROR: Reported preferred workgroup multiple larger than max workgroup size (preferred %ld, max %ld)\n", preferred_workgroup_size, max_workgroup_size ); + log_error("ERROR: Reported preferred workgroup multiple larger than " + "max workgroup size (preferred %ld, max %ld)\n", + preferred_workgroup_size, max_workgroup_size); return -1; } return 0; } -int test_min_max_execution_capabilities(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_min_max_execution_capabilities(cl_device_id deviceID, + cl_context context, + cl_command_queue queue, + int num_elements) { int error; cl_device_exec_capabilities value; - error = clGetDeviceInfo( deviceID, CL_DEVICE_EXECUTION_CAPABILITIES, sizeof( value ), &value, 0 ); - test_error( error, "Unable to get execution capabilities" ); + error = clGetDeviceInfo(deviceID, CL_DEVICE_EXECUTION_CAPABILITIES, + sizeof(value), &value, 0); + test_error(error, "Unable to get execution capabilities"); - if( ( value & CL_EXEC_KERNEL ) != CL_EXEC_KERNEL ) + if ((value & CL_EXEC_KERNEL) != CL_EXEC_KERNEL) { - log_error( "ERROR: Reported execution capabilities less than required by OpenCL 1.0 (reported 0x%08x)\n", (int)value ); + log_error("ERROR: Reported execution capabilities less than required " + "by OpenCL 1.0 (reported 0x%08x)\n", + (int)value); return -1; } return 0; } -int test_min_max_queue_properties(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_min_max_queue_properties(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { int error; cl_command_queue_properties value; - error = clGetDeviceInfo( deviceID, CL_DEVICE_QUEUE_ON_HOST_PROPERTIES, sizeof( value ), &value, 0 ); - test_error( error, "Unable to get queue properties" ); + error = clGetDeviceInfo(deviceID, CL_DEVICE_QUEUE_ON_HOST_PROPERTIES, + sizeof(value), &value, 0); + test_error(error, "Unable to get queue properties"); - if( ( value & CL_QUEUE_PROFILING_ENABLE ) != CL_QUEUE_PROFILING_ENABLE ) + if ((value & CL_QUEUE_PROFILING_ENABLE) != CL_QUEUE_PROFILING_ENABLE) { - log_error( "ERROR: Reported queue properties less than required by OpenCL 1.0 (reported 0x%08x)\n", (int)value ); + log_error("ERROR: Reported queue properties less than required by " + "OpenCL 1.0 (reported 0x%08x)\n", + (int)value); return -1; } return 0; } -int test_min_max_device_version(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_min_max_device_version(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { // Query for the device version. Version device_cl_version = get_device_cl_version(deviceID); @@ -1959,84 +2370,101 @@ int test_min_max_device_version(cl_device_id deviceID, cl_context context, cl_co return 0; } -int test_min_max_language_version(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_min_max_language_version(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { cl_int error; - cl_char buffer[ 4098 ]; + cl_char buffer[4098]; size_t length; // Device version should fit the regex "OpenCL [0-9]+\.[0-9]+ *.*" - error = clGetDeviceInfo( deviceID, CL_DEVICE_OPENCL_C_VERSION, sizeof( buffer ), buffer, &length ); - test_error( error, "Unable to get device opencl c version string" ); - if( memcmp( buffer, "OpenCL C ", strlen( "OpenCL C " ) ) != 0 ) - { - log_error( "ERROR: Initial part of device language version string does not match required format! (returned: \"%s\")\n", (char *)buffer ); + error = clGetDeviceInfo(deviceID, CL_DEVICE_OPENCL_C_VERSION, + sizeof(buffer), buffer, &length); + test_error(error, "Unable to get device opencl c version string"); + if (memcmp(buffer, "OpenCL C ", strlen("OpenCL C ")) != 0) + { + log_error("ERROR: Initial part of device language version string does " + "not match required format! (returned: \"%s\")\n", + (char *)buffer); return -1; } log_info("Returned version \"%s\".\n", buffer); - char *p1 = (char *)buffer + strlen( "OpenCL C " ); - while( *p1 == ' ' ) - p1++; + char *p1 = (char *)buffer + strlen("OpenCL C "); + while (*p1 == ' ') p1++; char *p2 = p1; - if( ! isdigit(*p2) ) + if (!isdigit(*p2)) { - log_error( "ERROR: Major revision number must follow space behind OpenCL C! (returned %s)\n", (char*) buffer ); + log_error("ERROR: Major revision number must follow space behind " + "OpenCL C! (returned %s)\n", + (char *)buffer); return -1; } - while( isdigit( *p2 ) ) - p2++; - if( *p2 != '.' ) + while (isdigit(*p2)) p2++; + if (*p2 != '.') { - log_error( "ERROR: Version number must contain a decimal point! (returned: %s)\n", (char *)buffer ); + log_error("ERROR: Version number must contain a decimal point! " + "(returned: %s)\n", + (char *)buffer); return -1; } char *p3 = p2 + 1; - if( ! isdigit(*p3) ) + if (!isdigit(*p3)) { - log_error( "ERROR: Minor revision number is missing or does not abut the decimal point! (returned %s)\n", (char*) buffer ); + log_error("ERROR: Minor revision number is missing or does not abut " + "the decimal point! (returned %s)\n", + (char *)buffer); return -1; } - while( isdigit( *p3 ) ) - p3++; - if( *p3 != ' ' ) + while (isdigit(*p3)) p3++; + if (*p3 != ' ') { - log_error( "ERROR: A space must appear after the minor version! (returned: %s)\n", (char *)buffer ); + log_error("ERROR: A space must appear after the minor version! " + "(returned: %s)\n", + (char *)buffer); return -1; } *p2 = ' '; // Put in a space for atoi below. p2++; - int major = atoi( p1 ); - int minor = atoi( p2 ); + int major = atoi(p1); + int minor = atoi(p2); int minor_revision = 2; - if( major * 10 + minor < 10 + minor_revision ) + if (major * 10 + minor < 10 + minor_revision) { - // If the language version did not match, check to see if OPENCL_1_0_DEVICE is set. - if( getenv("OPENCL_1_0_DEVICE")) + // If the language version did not match, check to see if + // OPENCL_1_0_DEVICE is set. + if (getenv("OPENCL_1_0_DEVICE")) { - log_info( "WARNING: This test was run with OPENCL_1_0_DEVICE defined! This is not a OpenCL 1.1 or OpenCL 1.2 compatible device!!!\n" ); + log_info("WARNING: This test was run with OPENCL_1_0_DEVICE " + "defined! This is not a OpenCL 1.1 or OpenCL 1.2 " + "compatible device!!!\n"); } - else if( getenv("OPENCL_1_1_DEVICE")) + else if (getenv("OPENCL_1_1_DEVICE")) { - log_info( "WARNING: This test was run with OPENCL_1_1_DEVICE defined! This is not a OpenCL 1.2 compatible device!!!\n" ); + log_info( + "WARNING: This test was run with OPENCL_1_1_DEVICE defined! " + "This is not a OpenCL 1.2 compatible device!!!\n"); } else { - log_error( "ERROR: OpenCL device language version returned is less than 1.%d! (Returned: %s)\n", minor_revision, (char *)buffer ); - return -1; + log_error("ERROR: OpenCL device language version returned is less " + "than 1.%d! (Returned: %s)\n", + minor_revision, (char *)buffer); + return -1; } } // Sanity checks on the returned values - if( length != (strlen( (char *)buffer ) + 1 )) + if (length != (strlen((char *)buffer) + 1)) { - log_error( "ERROR: Returned length of version string does not match actual length (actual: %d, returned: %d)\n", (int)strlen( (char *)buffer ), (int)length ); + log_error("ERROR: Returned length of version string does not match " + "actual length (actual: %d, returned: %d)\n", + (int)strlen((char *)buffer), (int)length); return -1; } return 0; } - -- cgit v1.2.3 From 8ffecf27c28d28296180cde282e5665bc2cb2c00 Mon Sep 17 00:00:00 2001 From: Grzegorz Wawiorko Date: Wed, 8 Dec 2021 17:07:42 +0100 Subject: Fix build, glext should not be used with GLEW (#1337) * Fix build, glext should not be used with GLEW * Remove additional define GL_GLEXT_PROTOTYPES * Remove includes which already defined in setup.h --- test_common/gl/setup_win32.cpp | 3 --- test_common/gl/setup_x11.cpp | 5 ----- 2 files changed, 8 deletions(-) diff --git a/test_common/gl/setup_win32.cpp b/test_common/gl/setup_win32.cpp index b120a36d..708e681d 100644 --- a/test_common/gl/setup_win32.cpp +++ b/test_common/gl/setup_win32.cpp @@ -13,14 +13,11 @@ // See the License for the specific language governing permissions and // limitations under the License. // -#define GL_GLEXT_PROTOTYPES #include "setup.h" #include "testBase.h" #include "harness/errorHelpers.h" -#include -#include #include typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetGLContextInfoKHR_fn)( diff --git a/test_common/gl/setup_x11.cpp b/test_common/gl/setup_x11.cpp index 7efda3d2..abc065c9 100644 --- a/test_common/gl/setup_x11.cpp +++ b/test_common/gl/setup_x11.cpp @@ -13,16 +13,11 @@ // See the License for the specific language governing permissions and // limitations under the License. // -#define GL_GLEXT_PROTOTYPES #include "setup.h" #include "testBase.h" #include "harness/errorHelpers.h" -#include -#include -#include -#include #include #include -- cgit v1.2.3 From 73d71b6a76ce9697c5224a0933157355302d5002 Mon Sep 17 00:00:00 2001 From: Ewan Crawford Date: Wed, 8 Dec 2021 16:08:15 +0000 Subject: Add cl_khr_command_buffer to list of extensions (#1365) cl_khr_command_buffer is now public as a provisional khr extension which implementations may report. --- test_conformance/compiler/test_compiler_defines_for_extensions.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/test_conformance/compiler/test_compiler_defines_for_extensions.cpp b/test_conformance/compiler/test_compiler_defines_for_extensions.cpp index 2f29d39b..1519779a 100644 --- a/test_conformance/compiler/test_compiler_defines_for_extensions.cpp +++ b/test_conformance/compiler/test_compiler_defines_for_extensions.cpp @@ -78,6 +78,7 @@ const char *known_extensions[] = { "cl_khr_semaphore", "cl_khr_external_semaphore", "cl_khr_external_semaphore_sync_fd", + "cl_khr_command_buffer", }; size_t num_known_extensions = sizeof(known_extensions) / sizeof(char *); -- cgit v1.2.3 From 1161d788dd5d71885ca19783210f18c305715a7f Mon Sep 17 00:00:00 2001 From: Stuart Brady Date: Tue, 14 Dec 2021 17:52:44 +0000 Subject: Refactor logging of subgroup test start/pass messages (#1361) Note that this also corrects the start messages logged for the sub_group_ballot_bit_count/find_msb/find_lsb tests. Signed-off-by: Stuart Brady --- .../subgroups/subgroup_common_templates.h | 101 +++++++++++---------- test_conformance/subgroups/subhelpers.h | 50 +++++++--- test_conformance/subgroups/test_barrier.cpp | 16 +++- test_conformance/subgroups/test_ifp.cpp | 8 +- test_conformance/subgroups/test_subgroup.cpp | 9 +- .../subgroups/test_subgroup_ballot.cpp | 48 +++++++--- .../subgroups/test_subgroup_clustered_reduce.cpp | 14 +-- .../subgroups/test_subgroup_non_uniform_vote.cpp | 16 ++-- 8 files changed, 164 insertions(+), 98 deletions(-) diff --git a/test_conformance/subgroups/subgroup_common_templates.h b/test_conformance/subgroups/subgroup_common_templates.h index 64b4b971..fc0b03b5 100644 --- a/test_conformance/subgroups/subgroup_common_templates.h +++ b/test_conformance/subgroups/subgroup_common_templates.h @@ -63,6 +63,13 @@ static cl_uint4 generate_bit_mask(cl_uint subgroup_local_id, // only 4 work_items from subgroup enter the code (are active) template struct BC { + static void log_test(const WorkGroupParams &test_params, + const char *extra_text) + { + log_info(" sub_group_%s(%s)...%s\n", operation_names(operation), + TypeManager::name(), extra_text); + } + static void gen(Ty *x, Ty *t, cl_int *m, const WorkGroupParams &test_params) { int i, ii, j, k, n; @@ -76,8 +83,6 @@ template struct BC int last_subgroup_size = 0; ii = 0; - log_info(" sub_group_%s(%s)...\n", operation_names(operation), - TypeManager::name()); if (non_uniform_size) { ng++; @@ -286,8 +291,6 @@ template struct BC y += nw; m += 4 * nw; } - log_info(" sub_group_%s(%s)... passed\n", operation_names(operation), - TypeManager::name()); return TEST_PASS; } }; @@ -437,6 +440,13 @@ void genrand(Ty *x, Ty *t, cl_int *m, int ns, int nw, int ng) template struct SHF { + static void log_test(const WorkGroupParams &test_params, + const char *extra_text) + { + log_info(" sub_group_%s(%s)...%s\n", operation_names(operation), + TypeManager::name(), extra_text); + } + static void gen(Ty *x, Ty *t, cl_int *m, const WorkGroupParams &test_params) { int i, ii, j, k, l, n, delta; @@ -447,8 +457,6 @@ template struct SHF int d = ns > 100 ? 100 : ns; ii = 0; ng = ng / nw; - log_info(" sub_group_%s(%s)...\n", operation_names(operation), - TypeManager::name()); for (k = 0; k < ng; ++k) { // for each work_group for (j = 0; j < nj; ++j) @@ -560,26 +568,29 @@ template struct SHF y += nw; m += 4 * nw; } - log_info(" sub_group_%s(%s)... passed\n", operation_names(operation), - TypeManager::name()); return TEST_PASS; } }; template struct SCEX_NU { + static void log_test(const WorkGroupParams &test_params, + const char *extra_text) + { + std::string func_name = (test_params.all_work_item_masks.size() > 0 + ? "sub_group_non_uniform_scan_exclusive" + : "sub_group_scan_exclusive"); + log_info(" %s_%s(%s)...%s\n", func_name.c_str(), + operation_names(operation), TypeManager::name(), + extra_text); + } + static void gen(Ty *x, Ty *t, cl_int *m, const WorkGroupParams &test_params) { int nw = test_params.local_workgroup_size; int ns = test_params.subgroup_size; int ng = test_params.global_workgroup_size; ng = ng / nw; - std::string func_name; - test_params.work_items_mask.any() - ? func_name = "sub_group_non_uniform_scan_exclusive" - : func_name = "sub_group_scan_exclusive"; - log_info(" %s_%s(%s)...\n", func_name.c_str(), - operation_names(operation), TypeManager::name()); genrand(x, t, m, ns, nw, ng); } @@ -595,11 +606,9 @@ template struct SCEX_NU Ty tr, rr; ng = ng / nw; - std::string func_name; - test_params.work_items_mask.any() - ? func_name = "sub_group_non_uniform_scan_exclusive" - : func_name = "sub_group_scan_exclusive"; - + std::string func_name = (test_params.all_work_item_masks.size() > 0 + ? "sub_group_non_uniform_scan_exclusive" + : "sub_group_scan_exclusive"); // for uniform case take into consideration all workitems if (!work_items_mask.any()) @@ -656,8 +665,6 @@ template struct SCEX_NU m += 4 * nw; } - log_info(" %s_%s(%s)... passed\n", func_name.c_str(), - operation_names(operation), TypeManager::name()); return TEST_PASS; } }; @@ -665,20 +672,24 @@ template struct SCEX_NU // Test for scan inclusive non uniform functions template struct SCIN_NU { + static void log_test(const WorkGroupParams &test_params, + const char *extra_text) + { + std::string func_name = (test_params.all_work_item_masks.size() > 0 + ? "sub_group_non_uniform_scan_inclusive" + : "sub_group_scan_inclusive"); + log_info(" %s_%s(%s)...%s\n", func_name.c_str(), + operation_names(operation), TypeManager::name(), + extra_text); + } + static void gen(Ty *x, Ty *t, cl_int *m, const WorkGroupParams &test_params) { int nw = test_params.local_workgroup_size; int ns = test_params.subgroup_size; int ng = test_params.global_workgroup_size; ng = ng / nw; - std::string func_name; - test_params.work_items_mask.any() - ? func_name = "sub_group_non_uniform_scan_inclusive" - : func_name = "sub_group_scan_inclusive"; - genrand(x, t, m, ns, nw, ng); - log_info(" %s_%s(%s)...\n", func_name.c_str(), - operation_names(operation), TypeManager::name()); } static test_status chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m, @@ -694,10 +705,9 @@ template struct SCIN_NU Ty tr, rr; ng = ng / nw; - std::string func_name; - work_items_mask.any() - ? func_name = "sub_group_non_uniform_scan_inclusive" - : func_name = "sub_group_scan_inclusive"; + std::string func_name = (test_params.all_work_item_masks.size() > 0 + ? "sub_group_non_uniform_scan_inclusive" + : "sub_group_scan_inclusive"); // for uniform case take into consideration all workitems if (!work_items_mask.any()) @@ -771,8 +781,6 @@ template struct SCIN_NU m += 4 * nw; } - log_info(" %s_%s(%s)... passed\n", func_name.c_str(), - operation_names(operation), TypeManager::name()); return TEST_PASS; } }; @@ -780,6 +788,16 @@ template struct SCIN_NU // Test for reduce non uniform functions template struct RED_NU { + static void log_test(const WorkGroupParams &test_params, + const char *extra_text) + { + std::string func_name = (test_params.all_work_item_masks.size() > 0 + ? "sub_group_non_uniform_reduce" + : "sub_group_reduce"); + log_info(" %s_%s(%s)...%s\n", func_name.c_str(), + operation_names(operation), TypeManager::name(), + extra_text); + } static void gen(Ty *x, Ty *t, cl_int *m, const WorkGroupParams &test_params) { @@ -787,13 +805,6 @@ template struct RED_NU int ns = test_params.subgroup_size; int ng = test_params.global_workgroup_size; ng = ng / nw; - std::string func_name; - - test_params.work_items_mask.any() - ? func_name = "sub_group_non_uniform_reduce" - : func_name = "sub_group_reduce"; - log_info(" %s_%s(%s)...\n", func_name.c_str(), - operation_names(operation), TypeManager::name()); genrand(x, t, m, ns, nw, ng); } @@ -809,9 +820,9 @@ template struct RED_NU ng = ng / nw; Ty tr, rr; - std::string func_name; - work_items_mask.any() ? func_name = "sub_group_non_uniform_reduce" - : func_name = "sub_group_reduce"; + std::string func_name = (test_params.all_work_item_masks.size() > 0 + ? "sub_group_non_uniform_reduce" + : "sub_group_reduce"); for (k = 0; k < ng; ++k) { @@ -875,8 +886,6 @@ template struct RED_NU m += 4 * nw; } - log_info(" %s_%s(%s)... passed\n", func_name.c_str(), - operation_names(operation), TypeManager::name()); return TEST_PASS; } }; diff --git a/test_conformance/subgroups/subhelpers.h b/test_conformance/subgroups/subhelpers.h index bd4b6d61..30105a57 100644 --- a/test_conformance/subgroups/subhelpers.h +++ b/test_conformance/subgroups/subhelpers.h @@ -1380,23 +1380,45 @@ template struct test const char *kname, const char *src, WorkGroupParams test_params) { + Fns::log_test(test_params, ""); + test_status combined_error = TEST_SKIPPED_ITSELF; for (auto &mask : test_params.all_work_item_masks) { test_params.work_items_mask = mask; - test_status error = run(device, context, queue, num_elements, kname, - src, test_params); + test_status error = do_run(device, context, queue, num_elements, + kname, src, test_params); if (error == TEST_FAIL || (error == TEST_PASS && combined_error != TEST_FAIL)) combined_error = error; } + + if (combined_error == TEST_PASS) + { + Fns::log_test(test_params, " passed"); + } return combined_error; }; - static test_status run(cl_device_id device, cl_context context, - cl_command_queue queue, int num_elements, - const char *kname, const char *src, - WorkGroupParams test_params) + static int run(cl_device_id device, cl_context context, + cl_command_queue queue, int num_elements, const char *kname, + const char *src, WorkGroupParams test_params) + { + Fns::log_test(test_params, ""); + + int error = do_run(device, context, queue, num_elements, kname, src, + test_params); + + if (error == TEST_PASS) + { + Fns::log_test(test_params, " passed"); + } + return error; + }; + static test_status do_run(cl_device_id device, cl_context context, + cl_command_queue queue, int num_elements, + const char *kname, const char *src, + WorkGroupParams test_params) { size_t tmp; cl_int error; @@ -1442,16 +1464,14 @@ template struct test log_info("Data type not supported : %s\n", TypeManager::name()); return TEST_SKIPPED_ITSELF; } - else + + if (strstr(TypeManager::name(), "double")) + { + kernel_sstr << "#pragma OPENCL EXTENSION cl_khr_fp64: enable\n"; + } + else if (strstr(TypeManager::name(), "half")) { - if (strstr(TypeManager::name(), "double")) - { - kernel_sstr << "#pragma OPENCL EXTENSION cl_khr_fp64: enable\n"; - } - else if (strstr(TypeManager::name(), "half")) - { - kernel_sstr << "#pragma OPENCL EXTENSION cl_khr_fp16: enable\n"; - } + kernel_sstr << "#pragma OPENCL EXTENSION cl_khr_fp16: enable\n"; } error = clGetDeviceInfo(device, CL_DEVICE_PLATFORM, sizeof(platform), diff --git a/test_conformance/subgroups/test_barrier.cpp b/test_conformance/subgroups/test_barrier.cpp index b570e922..d415eefb 100644 --- a/test_conformance/subgroups/test_barrier.cpp +++ b/test_conformance/subgroups/test_barrier.cpp @@ -59,6 +59,17 @@ static const char *gbar_source = // barrier test functions template struct BAR { + static void log_test(const WorkGroupParams &test_params, + const char *extra_text) + { + if (Which == 0) + log_info(" sub_group_barrier(CLK_LOCAL_MEM_FENCE)...%s\n", + extra_text); + else + log_info(" sub_group_barrier(CLK_GLOBAL_MEM_FENCE)...%s\n", + extra_text); + } + static void gen(cl_int *x, cl_int *t, cl_int *m, const WorkGroupParams &test_params) { @@ -103,11 +114,6 @@ template struct BAR ng = ng / nw; cl_int tr, rr; - if (Which == 0) - log_info(" sub_group_barrier(CLK_LOCAL_MEM_FENCE)...\n"); - else - log_info(" sub_group_barrier(CLK_GLOBAL_MEM_FENCE)...\n"); - for (k = 0; k < ng; ++k) { // Map to array indexed to array indexed by local ID and sub group diff --git a/test_conformance/subgroups/test_ifp.cpp b/test_conformance/subgroups/test_ifp.cpp index f6c5227d..f2bd5b92 100644 --- a/test_conformance/subgroups/test_ifp.cpp +++ b/test_conformance/subgroups/test_ifp.cpp @@ -225,6 +225,12 @@ void run_insts(cl_int *x, cl_int *p, int n) struct IFP { + static void log_test(const WorkGroupParams &test_params, + const char *extra_text) + { + log_info(" independent forward progress...%s\n", extra_text); + } + static void gen(cl_int *x, cl_int *t, cl_int *, const WorkGroupParams &test_params) { @@ -258,8 +264,6 @@ struct IFP // We need at least 2 sub groups per group for this test if (nj == 1) return TEST_SKIPPED_ITSELF; - log_info(" independent forward progress...\n"); - for (k = 0; k < ng; ++k) { run_insts(x, t, nj); diff --git a/test_conformance/subgroups/test_subgroup.cpp b/test_conformance/subgroups/test_subgroup.cpp index eefca5f8..aa9b32cb 100644 --- a/test_conformance/subgroups/test_subgroup.cpp +++ b/test_conformance/subgroups/test_subgroup.cpp @@ -24,6 +24,13 @@ namespace { // Any/All test functions template struct AA { + static void log_test(const WorkGroupParams &test_params, + const char *extra_text) + { + log_info(" sub_group_%s...%s\n", operation_names(operation), + extra_text); + } + static void gen(cl_int *x, cl_int *t, cl_int *m, const WorkGroupParams &test_params) { @@ -35,7 +42,6 @@ template struct AA int e; ng = ng / nw; ii = 0; - log_info(" sub_group_%s...\n", operation_names(operation)); for (k = 0; k < ng; ++k) { for (j = 0; j < nj; ++j) @@ -124,7 +130,6 @@ template struct AA y += nw; m += 4 * nw; } - log_info(" sub_group_%s... passed\n", operation_names(operation)); return TEST_PASS; } }; diff --git a/test_conformance/subgroups/test_subgroup_ballot.cpp b/test_conformance/subgroups/test_subgroup_ballot.cpp index e742aa3b..837988ea 100644 --- a/test_conformance/subgroups/test_subgroup_ballot.cpp +++ b/test_conformance/subgroups/test_subgroup_ballot.cpp @@ -23,6 +23,12 @@ namespace { // Test for ballot functions template struct BALLOT { + static void log_test(const WorkGroupParams &test_params, + const char *extra_text) + { + log_info(" sub_group_ballot...%s\n", extra_text); + } + static void gen(Ty *x, Ty *t, cl_int *m, const WorkGroupParams &test_params) { // no work here @@ -30,7 +36,6 @@ template struct BALLOT int lws = test_params.local_workgroup_size; int sbs = test_params.subgroup_size; int non_uniform_size = gws % lws; - log_info(" sub_group_ballot...\n"); } static test_status chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m, @@ -92,7 +97,6 @@ template struct BALLOT y += lws; m += 4 * lws; } - log_info(" sub_group_ballot... passed\n"); return TEST_PASS; } }; @@ -100,6 +104,13 @@ template struct BALLOT // Test for bit extract ballot functions template struct BALLOT_BIT_EXTRACT { + static void log_test(const WorkGroupParams &test_params, + const char *extra_text) + { + log_info(" sub_group_ballot_%s(%s)...%s\n", operation_names(operation), + TypeManager::name(), extra_text); + } + static void gen(Ty *x, Ty *t, cl_int *m, const WorkGroupParams &test_params) { int wi_id, sb_id, wg_id, l; @@ -110,8 +121,6 @@ template struct BALLOT_BIT_EXTRACT int wg_number = gws / lws; int limit_sbs = sbs > 100 ? 100 : sbs; int non_uniform_size = gws % lws; - log_info(" sub_group_%s(%s)...\n", operation_names(operation), - TypeManager::name()); for (wg_id = 0; wg_id < wg_number; ++wg_id) { // for each work_group @@ -251,21 +260,24 @@ template struct BALLOT_BIT_EXTRACT y += lws; m += 4 * lws; } - log_info(" sub_group_%s(%s)... passed\n", operation_names(operation), - TypeManager::name()); return TEST_PASS; } }; template struct BALLOT_INVERSE { + static void log_test(const WorkGroupParams &test_params, + const char *extra_text) + { + log_info(" sub_group_inverse_ballot...%s\n", extra_text); + } + static void gen(Ty *x, Ty *t, cl_int *m, const WorkGroupParams &test_params) { int gws = test_params.global_workgroup_size; int lws = test_params.local_workgroup_size; int sbs = test_params.subgroup_size; int non_uniform_size = gws % lws; - log_info(" sub_group_inverse_ballot...\n"); // no work here } @@ -341,7 +353,6 @@ template struct BALLOT_INVERSE m += 4 * lws; } - log_info(" sub_group_inverse_ballot... passed\n"); return TEST_PASS; } }; @@ -350,6 +361,13 @@ template struct BALLOT_INVERSE // Test for bit count/inclusive and exclusive scan/ find lsb msb ballot function template struct BALLOT_COUNT_SCAN_FIND { + static void log_test(const WorkGroupParams &test_params, + const char *extra_text) + { + log_info(" sub_group_%s(%s)...%s\n", operation_names(operation), + TypeManager::name(), extra_text); + } + static void gen(Ty *x, Ty *t, cl_int *m, const WorkGroupParams &test_params) { int wi_id, wg_id, sb_id; @@ -362,8 +380,6 @@ template struct BALLOT_COUNT_SCAN_FIND int last_subgroup_size = 0; int current_sbs = 0; - log_info(" sub_group_%s(%s)...\n", operation_names(operation), - TypeManager::name()); if (non_uniform_size) { wg_number++; @@ -562,8 +578,6 @@ template struct BALLOT_COUNT_SCAN_FIND y += lws; m += 4 * lws; } - log_info(" sub_group_ballot_%s(%s)... passed\n", - operation_names(operation), TypeManager::name()); return TEST_PASS; } }; @@ -571,6 +585,13 @@ template struct BALLOT_COUNT_SCAN_FIND // test mask functions template struct SMASK { + static void log_test(const WorkGroupParams &test_params, + const char *extra_text) + { + log_info(" get_sub_group_%s_mask...%s\n", operation_names(operation), + extra_text); + } + static void gen(Ty *x, Ty *t, cl_int *m, const WorkGroupParams &test_params) { int wi_id, wg_id, l, sb_id; @@ -579,7 +600,6 @@ template struct SMASK int sbs = test_params.subgroup_size; int sb_number = (lws + sbs - 1) / sbs; int wg_number = gws / lws; - log_info(" get_sub_group_%s_mask...\n", operation_names(operation)); for (wg_id = 0; wg_id < wg_number; ++wg_id) { // for each work_group for (sb_id = 0; sb_id < sb_number; ++sb_id) @@ -655,8 +675,6 @@ template struct SMASK y += lws; m += 4 * lws; } - log_info(" get_sub_group_%s_mask... passed\n", - operation_names(operation)); return TEST_PASS; } }; diff --git a/test_conformance/subgroups/test_subgroup_clustered_reduce.cpp b/test_conformance/subgroups/test_subgroup_clustered_reduce.cpp index ad9e1ff2..f5872006 100644 --- a/test_conformance/subgroups/test_subgroup_clustered_reduce.cpp +++ b/test_conformance/subgroups/test_subgroup_clustered_reduce.cpp @@ -38,15 +38,20 @@ __kernel void test_%s(const __global Type *in, __global int4 *xy, __global Type // Test for reduce cluster functions template struct RED_CLU { + static void log_test(const WorkGroupParams &test_params, + const char *extra_text) + { + log_info(" sub_group_clustered_reduce_%s(%s, %d bytes) ...%s\n", + operation_names(operation), TypeManager::name(), + sizeof(Ty), extra_text); + } + static void gen(Ty *x, Ty *t, cl_int *m, const WorkGroupParams &test_params) { int nw = test_params.local_workgroup_size; int ns = test_params.subgroup_size; int ng = test_params.global_workgroup_size; ng = ng / nw; - log_info(" sub_group_clustered_reduce_%s(%s, %d bytes) ...\n", - operation_names(operation), TypeManager::name(), - sizeof(Ty)); genrand(x, t, m, ns, nw, ng); } @@ -124,9 +129,6 @@ template struct RED_CLU y += nw; m += 4 * nw; } - log_info(" sub_group_clustered_reduce_%s(%s, %d bytes) ... passed\n", - operation_names(operation), TypeManager::name(), - sizeof(Ty)); return TEST_PASS; } }; diff --git a/test_conformance/subgroups/test_subgroup_non_uniform_vote.cpp b/test_conformance/subgroups/test_subgroup_non_uniform_vote.cpp index b21a9f7e..3f0985e2 100644 --- a/test_conformance/subgroups/test_subgroup_non_uniform_vote.cpp +++ b/test_conformance/subgroups/test_subgroup_non_uniform_vote.cpp @@ -22,6 +22,15 @@ namespace { template struct VOTE { + static void log_test(const WorkGroupParams &test_params, + const char *extra_text) + { + log_info(" sub_group_%s%s(%s)...%s\n", + (operation == NonUniformVoteOp::elect) ? "" : "non_uniform_", + operation_names(operation), TypeManager::name(), + extra_text); + } + static void gen(T *x, T *t, cl_int *m, const WorkGroupParams &test_params) { int i, ii, j, k, n; @@ -34,10 +43,6 @@ template struct VOTE int last_subgroup_size = 0; ii = 0; - log_info(" sub_group_%s%s(%s)... \n", - (operation == NonUniformVoteOp::elect) ? "" : "non_uniform_", - operation_names(operation), TypeManager::name()); - if (operation == NonUniformVoteOp::elect) return; for (k = 0; k < ng; ++k) @@ -192,9 +197,6 @@ template struct VOTE m += 4 * nw; } - log_info(" sub_group_%s%s(%s)... passed\n", - (operation == NonUniformVoteOp::elect) ? "" : "non_uniform_", - operation_names(operation), TypeManager::name()); return TEST_PASS; } }; -- cgit v1.2.3 From c2facedfa0a0e07f7602cfecae90392419c0e159 Mon Sep 17 00:00:00 2001 From: Sreelakshmi Haridas Maruthur Date: Wed, 5 Jan 2022 08:43:50 -0700 Subject: Remove dead threading code (#1339) Remove unused code that hasn't been used for the last three years and isn't included in makefiles. Co-authored-by: oramirez --- test_common/harness/threadTesting.cpp | 0 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 test_common/harness/threadTesting.cpp diff --git a/test_common/harness/threadTesting.cpp b/test_common/harness/threadTesting.cpp deleted file mode 100644 index e69de29b..00000000 -- cgit v1.2.3 From b71c2047943a44a2e99c367e406e680caa160bfe Mon Sep 17 00:00:00 2001 From: Grzegorz Wawiorko Date: Wed, 5 Jan 2022 17:08:52 +0100 Subject: test_subgroups - Set safe input values for half type and mul, add operations (#1346) * Set safe input values for half type and mul, add operations * Set safe values for all data types * Typo fix * Set constant seed for shuffle * Change function name to more specific * set_value takes an integer value, not a bit pattern --- .../subgroups/subgroup_common_templates.h | 48 ++++++++++++++++++---- .../subgroups/test_subgroup_clustered_reduce.cpp | 2 +- 2 files changed, 41 insertions(+), 9 deletions(-) diff --git a/test_conformance/subgroups/subgroup_common_templates.h b/test_conformance/subgroups/subgroup_common_templates.h index fc0b03b5..641c1875 100644 --- a/test_conformance/subgroups/subgroup_common_templates.h +++ b/test_conformance/subgroups/subgroup_common_templates.h @@ -20,6 +20,8 @@ #include "CL/cl_half.h" #include "subhelpers.h" #include +#include +#include static cl_uint4 generate_bit_mask(cl_uint subgroup_local_id, const std::string &mask_type, @@ -391,11 +393,44 @@ template bool is_floating_point() || std::is_same::value; } +// limit possible input values to avoid arithmetic rounding/overflow issues. +// for each subgroup values defined different values +// for rest of workitems set 1 +// shuffle values +static void fill_and_shuffle_safe_values(std::vector &safe_values, + int sb_size) +{ + // max product is 720, cl_half has enough precision for it + const std::vector non_one_values{ 2, 3, 4, 5, 6 }; + + if (sb_size <= non_one_values.size()) + { + safe_values.assign(non_one_values.begin(), + non_one_values.begin() + sb_size); + } + else + { + safe_values.assign(sb_size, 1); + std::copy(non_one_values.begin(), non_one_values.end(), + safe_values.begin()); + } + + std::mt19937 mersenne_twister_engine(10000); + std::shuffle(safe_values.begin(), safe_values.end(), + mersenne_twister_engine); +}; + template -void genrand(Ty *x, Ty *t, cl_int *m, int ns, int nw, int ng) +void generate_inputs(Ty *x, Ty *t, cl_int *m, int ns, int nw, int ng) { int nj = (nw + ns - 1) / ns; + std::vector safe_values; + if (operation == ArithmeticOp::mul_ || operation == ArithmeticOp::add_) + { + fill_and_shuffle_safe_values(safe_values, ns); + } + for (int k = 0; k < ng; ++k) { for (int j = 0; j < nj; ++j) @@ -406,13 +441,10 @@ void genrand(Ty *x, Ty *t, cl_int *m, int ns, int nw, int ng) for (int i = 0; i < n; ++i) { cl_ulong out_value; - double y; if (operation == ArithmeticOp::mul_ || operation == ArithmeticOp::add_) { - // work around to avoid overflow, do not use 0 for - // multiplication - out_value = (genrand_int32(gMTdata) % 4) + 1; + out_value = safe_values[i]; } else { @@ -591,7 +623,7 @@ template struct SCEX_NU int ns = test_params.subgroup_size; int ng = test_params.global_workgroup_size; ng = ng / nw; - genrand(x, t, m, ns, nw, ng); + generate_inputs(x, t, m, ns, nw, ng); } static test_status chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m, @@ -689,7 +721,7 @@ template struct SCIN_NU int ns = test_params.subgroup_size; int ng = test_params.global_workgroup_size; ng = ng / nw; - genrand(x, t, m, ns, nw, ng); + generate_inputs(x, t, m, ns, nw, ng); } static test_status chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m, @@ -805,7 +837,7 @@ template struct RED_NU int ns = test_params.subgroup_size; int ng = test_params.global_workgroup_size; ng = ng / nw; - genrand(x, t, m, ns, nw, ng); + generate_inputs(x, t, m, ns, nw, ng); } static test_status chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m, diff --git a/test_conformance/subgroups/test_subgroup_clustered_reduce.cpp b/test_conformance/subgroups/test_subgroup_clustered_reduce.cpp index f5872006..527be5ad 100644 --- a/test_conformance/subgroups/test_subgroup_clustered_reduce.cpp +++ b/test_conformance/subgroups/test_subgroup_clustered_reduce.cpp @@ -52,7 +52,7 @@ template struct RED_CLU int ns = test_params.subgroup_size; int ng = test_params.global_workgroup_size; ng = ng / nw; - genrand(x, t, m, ns, nw, ng); + generate_inputs(x, t, m, ns, nw, ng); } static test_status chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m, -- cgit v1.2.3 From f91daf3d062d7d085bd9e9154869d2179655685f Mon Sep 17 00:00:00 2001 From: Jim Lewis Date: Thu, 6 Jan 2022 04:23:07 -0600 Subject: Remove invalid negative_get_platform_info testcase (#1374) * Remove invalid negative_get_platform_info testcase * Implementations are only required to do null checks * Fixes #1318 * Fix formatting --- test_conformance/api/negative_platform.cpp | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/test_conformance/api/negative_platform.cpp b/test_conformance/api/negative_platform.cpp index 7d9de5df..861d4748 100644 --- a/test_conformance/api/negative_platform.cpp +++ b/test_conformance/api/negative_platform.cpp @@ -42,18 +42,9 @@ int test_negative_get_platform_info(cl_device_id deviceID, cl_context context, { cl_platform_id platform = getPlatformFromDevice(deviceID); - cl_int err = - clGetPlatformInfo(reinterpret_cast(deviceID), - CL_PLATFORM_VERSION, sizeof(char*), nullptr, nullptr); - test_failure_error_ret( - err, CL_INVALID_PLATFORM, - "clGetPlatformInfo should return CL_INVALID_PLATFORM when: \"platform " - "is not a valid platform\" using a valid object which is NOT a " - "platform", - TEST_FAIL); - constexpr cl_platform_info INVALID_PARAM_VALUE = 0; - err = clGetPlatformInfo(platform, INVALID_PARAM_VALUE, 0, nullptr, nullptr); + cl_int err = + clGetPlatformInfo(platform, INVALID_PARAM_VALUE, 0, nullptr, nullptr); test_failure_error_ret( err, CL_INVALID_VALUE, "clGetPlatformInfo should return CL_INVALID_VALUE when: \"param_name " -- cgit v1.2.3 From 51c6d97d2f9d62e5bdcbc1f4cbec2d5be2bedf0a Mon Sep 17 00:00:00 2001 From: Jim Lewis Date: Thu, 6 Jan 2022 04:26:20 -0600 Subject: Fix test_api get_command_queue_info (#1324) * Fix test_api get_command_queue_info Decouple host and device out-of-order test enabling * Rename property sets more generically * Refactor to use std::vector to accumulate test permutations --- test_conformance/api/test_queries.cpp | 127 +++++++++++++++++++--------------- 1 file changed, 70 insertions(+), 57 deletions(-) diff --git a/test_conformance/api/test_queries.cpp b/test_conformance/api/test_queries.cpp index 469a1934..30b5706f 100644 --- a/test_conformance/api/test_queries.cpp +++ b/test_conformance/api/test_queries.cpp @@ -1,6 +1,6 @@ // // Copyright (c) 2017 The Khronos Group Inc. -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -19,6 +19,7 @@ #include #include #include +#include int test_get_platform_info(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { @@ -345,87 +346,100 @@ int command_queue_param_test(cl_command_queue queue, return 0; } -#define MIN_NUM_COMMAND_QUEUE_PROPERTIES 2 -#define OOO_NUM_COMMAND_QUEUE_PROPERTIES 4 -static cl_command_queue_properties property_options[] = { - 0, - - CL_QUEUE_PROFILING_ENABLE, - - CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, - - CL_QUEUE_PROFILING_ENABLE | CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, - - CL_QUEUE_ON_DEVICE | CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, - - CL_QUEUE_PROFILING_ENABLE | CL_QUEUE_ON_DEVICE - | CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, - - CL_QUEUE_ON_DEVICE | CL_QUEUE_ON_DEVICE_DEFAULT - | CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, - - CL_QUEUE_PROFILING_ENABLE | CL_QUEUE_ON_DEVICE | CL_QUEUE_ON_DEVICE_DEFAULT - | CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE -}; - int check_get_command_queue_info_params(cl_device_id deviceID, cl_context context, bool is_compatibility) { - int error; - size_t size; + const cl_command_queue_properties host_optional[] = { + CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, + CL_QUEUE_PROFILING_ENABLE | CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE + }; + + const cl_command_queue_properties device_required[] = { + CL_QUEUE_ON_DEVICE | CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, + CL_QUEUE_PROFILING_ENABLE | CL_QUEUE_ON_DEVICE + | CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, + CL_QUEUE_ON_DEVICE | CL_QUEUE_ON_DEVICE_DEFAULT + | CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, + CL_QUEUE_PROFILING_ENABLE | CL_QUEUE_ON_DEVICE + | CL_QUEUE_ON_DEVICE_DEFAULT + | CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE + }; + + const size_t host_optional_size = ARRAY_SIZE(host_optional); + const size_t device_required_size = ARRAY_SIZE(device_required); + + Version version = get_device_cl_version(deviceID); - cl_queue_properties host_queue_props, device_queue_props; - cl_queue_properties queue_props[] = { CL_QUEUE_PROPERTIES, 0, 0 }; + const cl_device_info host_queue_query = version >= Version(2, 0) + ? CL_DEVICE_QUEUE_ON_HOST_PROPERTIES + : CL_DEVICE_QUEUE_PROPERTIES; - clGetDeviceInfo(deviceID, CL_DEVICE_QUEUE_ON_HOST_PROPERTIES, - sizeof(host_queue_props), &host_queue_props, NULL); - log_info("CL_DEVICE_QUEUE_ON_HOST_PROPERTIES is %d\n", - (int)host_queue_props); - clGetDeviceInfo(deviceID, CL_DEVICE_QUEUE_ON_DEVICE_PROPERTIES, - sizeof(device_queue_props), &device_queue_props, NULL); - log_info("CL_DEVICE_QUEUE_ON_HOST_PROPERTIES is %d\n", - (int)device_queue_props); + cl_queue_properties host_queue_props = 0; + int error = + clGetDeviceInfo(deviceID, host_queue_query, sizeof(host_queue_props), + &host_queue_props, NULL); + test_error(error, "clGetDeviceInfo failed"); + log_info("CL_DEVICE_QUEUE_ON_HOST_PROPERTIES is %d\n", host_queue_props); - auto version = get_device_cl_version(deviceID); + cl_queue_properties device_queue_props = 0; + if (version >= Version(2, 0)) + { + error = clGetDeviceInfo(deviceID, CL_DEVICE_QUEUE_ON_DEVICE_PROPERTIES, + sizeof(device_queue_props), &device_queue_props, + NULL); + test_error(error, "clGetDeviceInfo failed"); + log_info("CL_DEVICE_QUEUE_ON_DEVICE_PROPERTIES is %d\n", + device_queue_props); + } + + bool out_of_order_supported = + host_queue_props & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE; - // Are on device queues supported bool on_device_supported = (version >= Version(2, 0) && version < Version(3, 0)) || (version >= Version(3, 0) && device_queue_props != 0); - int num_test_options = MIN_NUM_COMMAND_QUEUE_PROPERTIES; - if (host_queue_props & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) + // test device queues if the device and the API under test support it + bool test_on_device = on_device_supported && !is_compatibility; + + std::vector queue_props{ 0, + CL_QUEUE_PROFILING_ENABLE }; + + if (out_of_order_supported) { - // Test out-of-order queues properties if supported - num_test_options = OOO_NUM_COMMAND_QUEUE_PROPERTIES; - } - if (on_device_supported && !is_compatibility) + queue_props.insert(queue_props.end(), &host_optional[0], + &host_optional[host_optional_size]); + }; + + cl_queue_properties queue_props_arg[] = { CL_QUEUE_PROPERTIES, 0, 0 }; + + if (test_on_device) { - // Test queue on device if supported (in this case out-of-order must - // also be supported) - num_test_options = ARRAY_SIZE(property_options); - } + queue_props.insert(queue_props.end(), &device_required[0], + &device_required[device_required_size]); + }; - for (int i = 0; i < num_test_options; i++) + for (cl_queue_properties props : queue_props) { - queue_props[1] = property_options[i]; - clCommandQueueWrapper queue; + queue_props_arg[1] = props; + + clCommandQueueWrapper queue; if (is_compatibility) { - queue = - clCreateCommandQueue(context, deviceID, queue_props[1], &error); + queue = clCreateCommandQueue(context, deviceID, props, &error); test_error(error, "Unable to create command queue to test with"); } else { queue = clCreateCommandQueueWithProperties(context, deviceID, - &queue_props[0], &error); + queue_props_arg, &error); test_error(error, "Unable to create command queue to test with"); } cl_uint refCount; + size_t size; error = clGetCommandQueueInfo(queue, CL_QUEUE_REFERENCE_COUNT, sizeof(refCount), &refCount, &size); test_error(error, "Unable to get command queue reference count"); @@ -442,11 +456,12 @@ int check_get_command_queue_info_params(cl_device_id deviceID, test_error(error, "param checking failed"); error = command_queue_param_test(queue, CL_QUEUE_PROPERTIES, - queue_props[1], "properties"); + queue_props_arg[1], "properties"); test_error(error, "param checking failed"); } return 0; } + int test_get_command_queue_info(cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements) { @@ -824,5 +839,3 @@ int test_kernel_required_group_size(cl_device_id deviceID, cl_context context, c return 0; } - - -- cgit v1.2.3 From 06415f8b79c38bb08279c8267d38b41101f32760 Mon Sep 17 00:00:00 2001 From: Sreelakshmi Haridas Maruthur Date: Tue, 11 Jan 2022 09:52:11 -0700 Subject: Fix memory leaks (#1378) * Fix memory leaks Fixed memory leaks in: buffers, basic, and vectors * Formatting fixes Co-authored-by: oramirez --- test_conformance/basic/test_vector_swizzle.cpp | 58 ++++++++++++++++---------- test_conformance/buffers/test_buffer_fill.cpp | 4 +- test_conformance/buffers/test_buffer_read.cpp | 4 +- test_conformance/vectors/test_step.cpp | 2 + 4 files changed, 41 insertions(+), 27 deletions(-) diff --git a/test_conformance/basic/test_vector_swizzle.cpp b/test_conformance/basic/test_vector_swizzle.cpp index 5ab3ea4f..884bcf36 100644 --- a/test_conformance/basic/test_vector_swizzle.cpp +++ b/test_conformance/basic/test_vector_swizzle.cpp @@ -610,9 +610,6 @@ static int test_vectype(const char* type_name, cl_device_id device, cl_int error = CL_SUCCESS; int result = TEST_PASS; - clProgramWrapper program; - clKernelWrapper kernel; - std::string buildOptions{ "-DTYPE=" }; buildOptions += type_name; buildOptions += std::to_string(N); @@ -628,35 +625,50 @@ static int test_vectype(const char* type_name, cl_device_id device, makeReference(reference); // XYZW swizzles: + { + clProgramWrapper program; + clKernelWrapper kernel; - const char* xyzw_source = TestInfo::kernel_source_xyzw; - error = create_single_kernel_helper( - context, &program, &kernel, 1, &xyzw_source, "test_vector_swizzle_xyzw", - buildOptions.c_str()); - test_error(error, "Unable to create xyzw test kernel"); + const char* xyzw_source = TestInfo::kernel_source_xyzw; + error = create_single_kernel_helper( + context, &program, &kernel, 1, &xyzw_source, + "test_vector_swizzle_xyzw", buildOptions.c_str()); + test_error(error, "Unable to create xyzw test kernel"); - result |= test_vectype_case(value, reference, context, kernel, queue); + result |= test_vectype_case(value, reference, context, kernel, queue); + } // sN swizzles: - const char* sN_source = TestInfo::kernel_source_sN; - error = create_single_kernel_helper(context, &program, &kernel, 1, - &sN_source, "test_vector_swizzle_sN", - buildOptions.c_str()); - test_error(error, "Unable to create sN test kernel"); + { + clProgramWrapper program; + clKernelWrapper kernel; + + const char* sN_source = TestInfo::kernel_source_sN; + error = create_single_kernel_helper( + context, &program, &kernel, 1, &sN_source, "test_vector_swizzle_sN", + buildOptions.c_str()); + test_error(error, "Unable to create sN test kernel"); - result |= test_vectype_case(value, reference, context, kernel, queue); + result |= test_vectype_case(value, reference, context, kernel, queue); + } // RGBA swizzles for OpenCL 3.0 and newer: - const Version device_version = get_device_cl_version(device); - if (device_version >= Version(3, 0)) { - const char* rgba_source = TestInfo::kernel_source_rgba; - error = create_single_kernel_helper( - context, &program, &kernel, 1, &rgba_source, - "test_vector_swizzle_rgba", buildOptions.c_str()); - test_error(error, "Unable to create rgba test kernel"); + clProgramWrapper program; + clKernelWrapper kernel; - result |= test_vectype_case(value, reference, context, kernel, queue); + const Version device_version = get_device_cl_version(device); + if (device_version >= Version(3, 0)) + { + const char* rgba_source = TestInfo::kernel_source_rgba; + error = create_single_kernel_helper( + context, &program, &kernel, 1, &rgba_source, + "test_vector_swizzle_rgba", buildOptions.c_str()); + test_error(error, "Unable to create rgba test kernel"); + + result |= + test_vectype_case(value, reference, context, kernel, queue); + } } return result; diff --git a/test_conformance/buffers/test_buffer_fill.cpp b/test_conformance/buffers/test_buffer_fill.cpp index 9c9c7d17..92079794 100644 --- a/test_conformance/buffers/test_buffer_fill.cpp +++ b/test_conformance/buffers/test_buffer_fill.cpp @@ -703,8 +703,6 @@ int test_buffer_fill( cl_device_id deviceID, cl_context context, cl_command_queu int test_buffer_fill_struct( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) { TestStruct pattern; - clProgramWrapper program; - clKernelWrapper kernel; size_t ptrSize = sizeof( TestStruct ); size_t global_work_size[3]; int n, err; @@ -720,6 +718,8 @@ int test_buffer_fill_struct( cl_device_id deviceID, cl_context context, cl_comma for (src_flag_id = 0; src_flag_id < NUM_FLAGS; src_flag_id++) { + clProgramWrapper program; + clKernelWrapper kernel; log_info("Testing with cl_mem_flags: %s\n", flag_set_names[src_flag_id]); diff --git a/test_conformance/buffers/test_buffer_read.cpp b/test_conformance/buffers/test_buffer_read.cpp index 39cf3297..49a57f92 100644 --- a/test_conformance/buffers/test_buffer_read.cpp +++ b/test_conformance/buffers/test_buffer_read.cpp @@ -763,7 +763,6 @@ int test_buffer_read_async( cl_device_id deviceID, cl_context context, cl_comman { clProgramWrapper program[5]; clKernelWrapper kernel[5]; - clEventWrapper event; void *outptr[5]; void *inptr[5]; size_t global_work_size[3]; @@ -805,6 +804,7 @@ int test_buffer_read_async( cl_device_id deviceID, cl_context context, cl_comman for (src_flag_id = 0; src_flag_id < NUM_FLAGS; src_flag_id++) { clMemWrapper buffer; + clEventWrapper event; outptr[i] = align_malloc(ptrSizes[i] * num_elements, min_alignment); if ( ! outptr[i] ){ log_error( " unable to allocate %d bytes for outptr\n", (int)(ptrSizes[i] * num_elements) ); @@ -900,7 +900,6 @@ int test_buffer_read_array_barrier( cl_device_id deviceID, cl_context context, c { clProgramWrapper program[5]; clKernelWrapper kernel[5]; - clEventWrapper event; void *outptr[5], *inptr[5]; size_t global_work_size[3]; cl_int err; @@ -941,6 +940,7 @@ int test_buffer_read_array_barrier( cl_device_id deviceID, cl_context context, c for (src_flag_id = 0; src_flag_id < NUM_FLAGS; src_flag_id++) { clMemWrapper buffer; + clEventWrapper event; outptr[i] = align_malloc(ptrSizes[i] * num_elements, min_alignment); if ( ! outptr[i] ){ log_error( " unable to allocate %d bytes for outptr\n", (int)(ptrSizes[i] * num_elements) ); diff --git a/test_conformance/vectors/test_step.cpp b/test_conformance/vectors/test_step.cpp index 2f6ad187..089bad2f 100644 --- a/test_conformance/vectors/test_step.cpp +++ b/test_conformance/vectors/test_step.cpp @@ -172,6 +172,8 @@ int test_step_internal(cl_device_id deviceID, cl_context context, destroyClState(pClState); return -1; } + + clStateDestroyProgramAndKernel(pClState); } } -- cgit v1.2.3 From 656886030b294225b92379ef14306b2e5b9a3f04 Mon Sep 17 00:00:00 2001 From: Stuart Brady Date: Wed, 19 Jan 2022 14:17:54 +0000 Subject: Refactor divergence mask handling in subgroup tests (#1379) This changes compilation of subgroup test kernels so that a separate compilation is no longer performed for each divergence mask value. The divergence mask is now passed as a kernel argument. This also fixes all subgroup_functions_non_uniform_arithmetic testing and the sub_group_elect and sub_group_any/all_equal subtests of the subgroup_functions_non_uniform_vote test to use the correct order of vector components for GPUs with a subgroup size greater than 64. The conversion of divergence mask bitsets to uint4 vectors has been corrected to match code comments in WorkGroupParams::load_masks() in test_conformance/subgroups/subhelpers.h. Signed-off-by: Stuart Brady --- test_conformance/subgroups/subhelpers.h | 172 ++++++++++----------- .../test_subgroup_non_uniform_arithmetic.cpp | 8 +- .../subgroups/test_subgroup_non_uniform_vote.cpp | 14 +- 3 files changed, 96 insertions(+), 98 deletions(-) diff --git a/test_conformance/subgroups/subhelpers.h b/test_conformance/subgroups/subhelpers.h index 30105a57..aa4abc96 100644 --- a/test_conformance/subgroups/subhelpers.h +++ b/test_conformance/subgroups/subhelpers.h @@ -34,12 +34,24 @@ extern MTdata gMTdata; typedef std::bitset<128> bs128; extern cl_half_rounding_mode g_rounding_mode; +static cl_uint4 bs128_to_cl_uint4(bs128 v) +{ + bs128 bs128_ffffffff = 0xffffffffU; + + cl_uint4 r; + r.s0 = ((v >> 0) & bs128_ffffffff).to_ulong(); + r.s1 = ((v >> 32) & bs128_ffffffff).to_ulong(); + r.s2 = ((v >> 64) & bs128_ffffffff).to_ulong(); + r.s3 = ((v >> 96) & bs128_ffffffff).to_ulong(); + + return r; +} + struct WorkGroupParams { - WorkGroupParams(size_t gws, size_t lws, - bool use_mask = false) + WorkGroupParams(size_t gws, size_t lws, int dm_arg = -1) : global_workgroup_size(gws), local_workgroup_size(lws), - use_masks(use_mask) + divergence_mask_arg(dm_arg) { subgroup_size = 0; work_items_mask = 0; @@ -54,7 +66,7 @@ struct WorkGroupParams int dynsc; bool use_core_subgroups; std::vector all_work_item_masks; - bool use_masks; + int divergence_mask_arg; void save_kernel_source(const std::string &source, std::string name = "") { if (name == "") @@ -84,7 +96,7 @@ private: std::map kernel_function_name; void load_masks() { - if (use_masks) + if (divergence_mask_arg != -1) { // 1 in string will be set 1, 0 will be set 0 bs128 mask_0xf0f0f0f0("11110000111100001111000011110000" @@ -1375,50 +1387,10 @@ static int run_kernel(cl_context context, cl_command_queue queue, // Driver for testing a single built in function template struct test { - static test_status mrun(cl_device_id device, cl_context context, - cl_command_queue queue, int num_elements, - const char *kname, const char *src, - WorkGroupParams test_params) - { - Fns::log_test(test_params, ""); - - test_status combined_error = TEST_SKIPPED_ITSELF; - for (auto &mask : test_params.all_work_item_masks) - { - test_params.work_items_mask = mask; - test_status error = do_run(device, context, queue, num_elements, - kname, src, test_params); - - if (error == TEST_FAIL - || (error == TEST_PASS && combined_error != TEST_FAIL)) - combined_error = error; - } - - if (combined_error == TEST_PASS) - { - Fns::log_test(test_params, " passed"); - } - return combined_error; - }; - static int run(cl_device_id device, cl_context context, - cl_command_queue queue, int num_elements, const char *kname, - const char *src, WorkGroupParams test_params) - { - Fns::log_test(test_params, ""); - - int error = do_run(device, context, queue, num_elements, kname, src, - test_params); - - if (error == TEST_PASS) - { - Fns::log_test(test_params, " passed"); - } - return error; - }; - static test_status do_run(cl_device_id device, cl_context context, - cl_command_queue queue, int num_elements, - const char *kname, const char *src, - WorkGroupParams test_params) + static test_status run(cl_device_id device, cl_context context, + cl_command_queue queue, int num_elements, + const char *kname, const char *src, + WorkGroupParams test_params) { size_t tmp; cl_int error; @@ -1436,25 +1408,8 @@ template struct test std::vector mapout; mapout.resize(local); std::stringstream kernel_sstr; - if (test_params.use_masks) - { - // Prapare uint4 type to store bitmask on kernel OpenCL C side - // To keep order the first characet in string is the lowest bit - // there was a need to give such offset to bitset constructor - // (first highest offset = 96) - std::bitset<32> bits_1_32(test_params.work_items_mask.to_string(), - 96, 32); - std::bitset<32> bits_33_64(test_params.work_items_mask.to_string(), - 64, 32); - std::bitset<32> bits_65_96(test_params.work_items_mask.to_string(), - 32, 32); - std::bitset<32> bits_97_128(test_params.work_items_mask.to_string(), - 0, 32); - kernel_sstr << "global uint4 work_item_mask_vector = (uint4)(0b" - << bits_1_32 << ",0b" << bits_33_64 << ",0b" - << bits_65_96 << ",0b" << bits_97_128 << ");\n"; - } + Fns::log_test(test_params, ""); kernel_sstr << "#define NR_OF_ACTIVE_WORK_ITEMS "; kernel_sstr << NR_OF_ACTIVE_WORK_ITEMS << "\n"; @@ -1563,6 +1518,18 @@ template struct test idata.resize(input_array_size); odata.resize(output_array_size); + if (test_params.divergence_mask_arg != -1) + { + cl_uint4 mask_vector; + mask_vector.x = 0xffffffffU; + mask_vector.y = 0xffffffffU; + mask_vector.z = 0xffffffffU; + mask_vector.w = 0xffffffffU; + error = clSetKernelArg(kernel, test_params.divergence_mask_arg, + sizeof(cl_uint4), &mask_vector); + test_error_fail(error, "Unable to set divergence mask argument"); + } + // Run the kernel once on zeroes to get the map memset(idata.data(), 0, input_array_size * sizeof(Ty)); error = run_kernel(context, queue, kernel, global, local, idata.data(), @@ -1572,25 +1539,65 @@ template struct test test_error_fail(error, "Running kernel first time failed"); // Generate the desired input for the kernel - test_params.subgroup_size = subgroup_size; Fns::gen(idata.data(), mapin.data(), sgmap.data(), test_params); - error = run_kernel(context, queue, kernel, global, local, idata.data(), + + test_status combined_status; + + if (test_params.divergence_mask_arg != -1) + { + combined_status = TEST_SKIPPED_ITSELF; + + for (auto &mask : test_params.all_work_item_masks) + { + test_params.work_items_mask = mask; + cl_uint4 mask_vector = bs128_to_cl_uint4(mask); + clSetKernelArg(kernel, test_params.divergence_mask_arg, + sizeof(cl_uint4), &mask_vector); + error = run_kernel(context, queue, kernel, global, local, + idata.data(), input_array_size * sizeof(Ty), + sgmap.data(), global * sizeof(cl_int4), + odata.data(), output_array_size * sizeof(Ty), + TSIZE * sizeof(Ty)); + test_error_fail(error, "Running kernel second time failed"); + + // Check the result + test_status status = + Fns::chk(idata.data(), odata.data(), mapin.data(), + mapout.data(), sgmap.data(), test_params); + + if (status == TEST_FAIL + || (status == TEST_PASS && combined_status != TEST_FAIL)) + combined_status = status; + + if (status == TEST_FAIL) break; + } + } + else + { + error = + run_kernel(context, queue, kernel, global, local, idata.data(), input_array_size * sizeof(Ty), sgmap.data(), global * sizeof(cl_int4), odata.data(), output_array_size * sizeof(Ty), TSIZE * sizeof(Ty)); - test_error_fail(error, "Running kernel second time failed"); + test_error_fail(error, "Running kernel second time failed"); - // Check the result - test_status status = Fns::chk(idata.data(), odata.data(), mapin.data(), - mapout.data(), sgmap.data(), test_params); + // Check the result + combined_status = + Fns::chk(idata.data(), odata.data(), mapin.data(), + mapout.data(), sgmap.data(), test_params); + } // Detailed failure and skip messages should be logged by Fns::gen // and Fns::chk. - if (status == TEST_FAIL) + if (combined_status == TEST_PASS) + { + Fns::log_test(test_params, " passed"); + } + else if (combined_status == TEST_FAIL) { test_fail("Data verification failed\n"); } - return status; + return combined_status; } }; @@ -1643,18 +1650,9 @@ struct RunTestForType std::regex_replace(test_params_.get_kernel_source(function_name), std::regex("\\%s"), function_name); std::string kernel_name = "test_" + function_name; - if (test_params_.all_work_item_masks.size() > 0) - { - error = test::mrun(device_, context_, queue_, num_elements_, - kernel_name.c_str(), source.c_str(), - test_params_); - } - else - { - error = test::run(device_, context_, queue_, num_elements_, - kernel_name.c_str(), source.c_str(), - test_params_); - } + error = + test::run(device_, context_, queue_, num_elements_, + kernel_name.c_str(), source.c_str(), test_params_); // If we return TEST_SKIPPED_ITSELF here, then an entire suite may be // reported as having been skipped even if some tests within it diff --git a/test_conformance/subgroups/test_subgroup_non_uniform_arithmetic.cpp b/test_conformance/subgroups/test_subgroup_non_uniform_arithmetic.cpp index 5ab45222..02fc507b 100644 --- a/test_conformance/subgroups/test_subgroup_non_uniform_arithmetic.cpp +++ b/test_conformance/subgroups/test_subgroup_non_uniform_arithmetic.cpp @@ -21,7 +21,7 @@ namespace { std::string sub_group_non_uniform_arithmetic_source = R"( - __kernel void test_%s(const __global Type *in, __global int4 *xy, __global Type *out) { + __kernel void test_%s(const __global Type *in, __global int4 *xy, __global Type *out, uint4 work_item_mask_vector) { int gid = get_global_id(0); XY(xy,gid); uint subgroup_local_id = get_sub_group_local_id(); @@ -32,9 +32,9 @@ std::string sub_group_non_uniform_arithmetic_source = R"( } else if(subgroup_local_id < 64) { work_item_mask = work_item_mask_vector.y; } else if(subgroup_local_id < 96) { - work_item_mask = work_item_mask_vector.w; - } else if(subgroup_local_id < 128) { work_item_mask = work_item_mask_vector.z; + } else if(subgroup_local_id < 128) { + work_item_mask = work_item_mask_vector.w; } if (elect_work_item & work_item_mask){ out[gid] = %s(in[gid]); @@ -136,7 +136,7 @@ int test_subgroup_functions_non_uniform_arithmetic(cl_device_id device, constexpr size_t global_work_size = 2000; constexpr size_t local_work_size = 200; - WorkGroupParams test_params(global_work_size, local_work_size, true); + WorkGroupParams test_params(global_work_size, local_work_size, 3); test_params.save_kernel_source(sub_group_non_uniform_arithmetic_source); RunTestForType rft(device, context, queue, num_elements, test_params); diff --git a/test_conformance/subgroups/test_subgroup_non_uniform_vote.cpp b/test_conformance/subgroups/test_subgroup_non_uniform_vote.cpp index 3f0985e2..3be1ba30 100644 --- a/test_conformance/subgroups/test_subgroup_non_uniform_vote.cpp +++ b/test_conformance/subgroups/test_subgroup_non_uniform_vote.cpp @@ -202,7 +202,7 @@ template struct VOTE }; std::string sub_group_elect_source = R"( - __kernel void test_sub_group_elect(const __global Type *in, __global int4 *xy, __global Type *out) { + __kernel void test_sub_group_elect(const __global Type *in, __global int4 *xy, __global Type *out, uint4 work_item_mask_vector) { int gid = get_global_id(0); XY(xy,gid); uint subgroup_local_id = get_sub_group_local_id(); @@ -213,9 +213,9 @@ std::string sub_group_elect_source = R"( } else if(subgroup_local_id < 64) { work_item_mask = work_item_mask_vector.y; } else if(subgroup_local_id < 96) { - work_item_mask = work_item_mask_vector.w; - } else if(subgroup_local_id < 128) { work_item_mask = work_item_mask_vector.z; + } else if(subgroup_local_id < 128) { + work_item_mask = work_item_mask_vector.w; } if (elect_work_item & work_item_mask){ out[gid] = sub_group_elect(); @@ -224,7 +224,7 @@ std::string sub_group_elect_source = R"( )"; std::string sub_group_non_uniform_any_all_all_equal_source = R"( - __kernel void test_%s(const __global Type *in, __global int4 *xy, __global Type *out) { + __kernel void test_%s(const __global Type *in, __global int4 *xy, __global Type *out, uint4 work_item_mask_vector) { int gid = get_global_id(0); XY(xy,gid); uint subgroup_local_id = get_sub_group_local_id(); @@ -235,9 +235,9 @@ std::string sub_group_non_uniform_any_all_all_equal_source = R"( } else if(subgroup_local_id < 64) { work_item_mask = work_item_mask_vector.y; } else if(subgroup_local_id < 96) { - work_item_mask = work_item_mask_vector.w; - } else if(subgroup_local_id < 128) { work_item_mask = work_item_mask_vector.z; + } else if(subgroup_local_id < 128) { + work_item_mask = work_item_mask_vector.w; } if (elect_work_item & work_item_mask){ out[gid] = %s(in[gid]); @@ -267,7 +267,7 @@ int test_subgroup_functions_non_uniform_vote(cl_device_id device, constexpr size_t global_work_size = 170; constexpr size_t local_work_size = 64; - WorkGroupParams test_params(global_work_size, local_work_size, true); + WorkGroupParams test_params(global_work_size, local_work_size, 3); test_params.save_kernel_source( sub_group_non_uniform_any_all_all_equal_source); test_params.save_kernel_source(sub_group_elect_source, "sub_group_elect"); -- cgit v1.2.3 From 60471a520804fbd6611acd1c48f35549bb512deb Mon Sep 17 00:00:00 2001 From: Stuart Brady Date: Fri, 28 Jan 2022 09:15:44 +0000 Subject: Improve testing of sub_group_ballot (#1382) Signed-off-by: Stuart Brady --- test_common/harness/mt19937.cpp | 2 + test_common/harness/mt19937.h | 3 + test_conformance/subgroups/subhelpers.h | 6 + .../subgroups/test_subgroup_ballot.cpp | 191 +++++++++++++++------ 4 files changed, 147 insertions(+), 55 deletions(-) diff --git a/test_common/harness/mt19937.cpp b/test_common/harness/mt19937.cpp index c32d9bac..f5665deb 100644 --- a/test_common/harness/mt19937.cpp +++ b/test_common/harness/mt19937.cpp @@ -277,3 +277,5 @@ double genrand_res53(MTdata d) unsigned long a = genrand_int32(d) >> 5, b = genrand_int32(d) >> 6; return (a * 67108864.0 + b) * (1.0 / 9007199254740992.0); } + +bool genrand_bool(MTdata d) { return ((cl_uint)genrand_int32(d) & 1); } diff --git a/test_common/harness/mt19937.h b/test_common/harness/mt19937.h index 35c84933..98eec843 100644 --- a/test_common/harness/mt19937.h +++ b/test_common/harness/mt19937.h @@ -90,6 +90,9 @@ double genrand_res53(MTdata /*data*/); #ifdef __cplusplus +/* generates a random boolean */ +bool genrand_bool(MTdata /*data*/); + #include struct MTdataHolder diff --git a/test_conformance/subgroups/subhelpers.h b/test_conformance/subgroups/subhelpers.h index aa4abc96..153045d0 100644 --- a/test_conformance/subgroups/subhelpers.h +++ b/test_conformance/subgroups/subhelpers.h @@ -34,6 +34,12 @@ extern MTdata gMTdata; typedef std::bitset<128> bs128; extern cl_half_rounding_mode g_rounding_mode; +static bs128 cl_uint4_to_bs128(cl_uint4 v) +{ + return bs128(v.s0) | (bs128(v.s1) << 32) | (bs128(v.s2) << 64) + | (bs128(v.s3) << 96); +} + static cl_uint4 bs128_to_cl_uint4(bs128 v) { bs128 bs128_ffffffff = 0xffffffffU; diff --git a/test_conformance/subgroups/test_subgroup_ballot.cpp b/test_conformance/subgroups/test_subgroup_ballot.cpp index 837988ea..4148707e 100644 --- a/test_conformance/subgroups/test_subgroup_ballot.cpp +++ b/test_conformance/subgroups/test_subgroup_ballot.cpp @@ -31,45 +31,93 @@ template struct BALLOT static void gen(Ty *x, Ty *t, cl_int *m, const WorkGroupParams &test_params) { - // no work here int gws = test_params.global_workgroup_size; int lws = test_params.local_workgroup_size; int sbs = test_params.subgroup_size; + int sb_number = (lws + sbs - 1) / sbs; int non_uniform_size = gws % lws; + int wg_number = gws / lws; + wg_number = non_uniform_size ? wg_number + 1 : wg_number; + int last_subgroup_size = 0; + + for (int wg_id = 0; wg_id < wg_number; ++wg_id) + { // for each work_group + if (non_uniform_size && wg_id == wg_number - 1) + { + set_last_workgroup_params(non_uniform_size, sb_number, sbs, lws, + last_subgroup_size); + } + for (int sb_id = 0; sb_id < sb_number; ++sb_id) + { // for each subgroup + int wg_offset = sb_id * sbs; + int current_sbs; + if (last_subgroup_size && sb_id == sb_number - 1) + { + current_sbs = last_subgroup_size; + } + else + { + current_sbs = wg_offset + sbs > lws ? lws - wg_offset : sbs; + } + + for (int wi_id = 0; wi_id < current_sbs; wi_id++) + { + cl_uint v; + if (genrand_bool(gMTdata)) + { + v = genrand_bool(gMTdata); + } + else if (genrand_bool(gMTdata)) + { + v = 1U << ((genrand_int32(gMTdata) % 31) + 1); + } + else + { + v = genrand_int32(gMTdata); + } + cl_uint4 v4 = { v, 0, 0, 0 }; + t[wi_id + wg_offset] = v4; + } + } + // Now map into work group using map from device + for (int wi_id = 0; wi_id < lws; ++wi_id) + { + x[wi_id] = t[wi_id]; + } + x += lws; + m += 4 * lws; + } } static test_status chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m, const WorkGroupParams &test_params) { - int wi_id, wg_id, sb_id; int gws = test_params.global_workgroup_size; int lws = test_params.local_workgroup_size; int sbs = test_params.subgroup_size; int sb_number = (lws + sbs - 1) / sbs; - int current_sbs = 0; - cl_uint expected_result, device_result; int non_uniform_size = gws % lws; int wg_number = gws / lws; wg_number = non_uniform_size ? wg_number + 1 : wg_number; int last_subgroup_size = 0; - for (wg_id = 0; wg_id < wg_number; ++wg_id) + for (int wg_id = 0; wg_id < wg_number; ++wg_id) { // for each work_group if (non_uniform_size && wg_id == wg_number - 1) { set_last_workgroup_params(non_uniform_size, sb_number, sbs, lws, last_subgroup_size); } - - for (wi_id = 0; wi_id < lws; ++wi_id) + for (int wi_id = 0; wi_id < lws; ++wi_id) { // inside the work_group - // read device outputs for work_group - my[wi_id] = y[wi_id]; + mx[wi_id] = x[wi_id]; // read host inputs for work_group + my[wi_id] = y[wi_id]; // read device outputs for work_group } - for (sb_id = 0; sb_id < sb_number; ++sb_id) + for (int sb_id = 0; sb_id < sb_number; ++sb_id) { // for each subgroup int wg_offset = sb_id * sbs; + int current_sbs; if (last_subgroup_size && sb_id == sb_number - 1) { current_sbs = last_subgroup_size; @@ -78,25 +126,54 @@ template struct BALLOT { current_sbs = wg_offset + sbs > lws ? lws - wg_offset : sbs; } - for (wi_id = 0; wi_id < current_sbs; ++wi_id) + + bs128 expected_result_bs = 0; + + std::set active_work_items; + for (int wi_id = 0; wi_id < current_sbs; ++wi_id) { - device_result = my[wg_offset + wi_id]; - expected_result = 1; - if (!compare(device_result, expected_result)) + if (test_params.work_items_mask.test(wi_id)) + { + bool predicate = (mx[wg_offset + wi_id].s0 != 0); + expected_result_bs |= (bs128(predicate) << wi_id); + active_work_items.insert(wi_id); + } + } + if (active_work_items.empty()) + { + continue; + } + + cl_uint4 expected_result = + bs128_to_cl_uint4(expected_result_bs); + for (const int &active_work_item : active_work_items) + { + int wi_id = active_work_item; + + cl_uint4 device_result = my[wg_offset + wi_id]; + bs128 device_result_bs = cl_uint4_to_bs128(device_result); + + if (device_result_bs != expected_result_bs) { log_error( "ERROR: sub_group_ballot mismatch for local id " - "%d in sub group %d in group %d obtained %d, " - "expected %d\n", - wi_id, sb_id, wg_id, device_result, - expected_result); + "%d in sub group %d in group %d obtained {%d, %d, " + "%d, %d}, expected {%d, %d, %d, %d}\n", + wi_id, sb_id, wg_id, device_result.s0, + device_result.s1, device_result.s2, + device_result.s3, expected_result.s0, + expected_result.s1, expected_result.s2, + expected_result.s3); return TEST_FAIL; } } } + + x += lws; y += lws; m += 4 * lws; } + return TEST_PASS; } }; @@ -724,27 +801,26 @@ __kernel void test_%s(const __global Type *in, __global int4 *xy, __global Type } )"; std::string sub_group_ballot_source = R"( -__kernel void test_sub_group_ballot(const __global Type *in, __global int4 *xy, __global Type *out) { - uint4 full_ballot = sub_group_ballot(1); - uint divergence_mask; - uint4 partial_ballot; +__kernel void test_sub_group_ballot(const __global Type *in, __global int4 *xy, __global Type *out, uint4 work_item_mask_vector) { uint gid = get_global_id(0); XY(xy,gid); - if (get_sub_group_local_id() & 1) { - divergence_mask = 0xaaaaaaaa; - partial_ballot = sub_group_ballot(1); - } else { - divergence_mask = 0x55555555; - partial_ballot = sub_group_ballot(1); + uint subgroup_local_id = get_sub_group_local_id(); + uint elect_work_item = 1 << (subgroup_local_id % 32); + uint work_item_mask; + if (subgroup_local_id < 32) { + work_item_mask = work_item_mask_vector.x; + } else if(subgroup_local_id < 64) { + work_item_mask = work_item_mask_vector.y; + } else if(subgroup_local_id < 96) { + work_item_mask = work_item_mask_vector.z; + } else if(subgroup_local_id < 128) { + work_item_mask = work_item_mask_vector.w; } - size_t lws = get_local_size(0); - uint4 masked_ballot = full_ballot; - masked_ballot.x &= divergence_mask; - masked_ballot.y &= divergence_mask; - masked_ballot.z &= divergence_mask; - masked_ballot.w &= divergence_mask; - out[gid] = all(masked_ballot == partial_ballot); - + uint4 value = (uint4)(0, 0, 0, 0); + if (elect_work_item & work_item_mask) { + value = sub_group_ballot(in[gid].s0); + } + out[gid] = value; } )"; std::string sub_group_inverse_ballot_source = R"( @@ -952,42 +1028,47 @@ int test_subgroup_functions_ballot(cl_device_id device, cl_context context, error |= rft.run_impl>( "get_sub_group_lt_mask"); - // ballot functions - WorkGroupParams test_params_ballot(global_work_size, local_work_size); - test_params_ballot.save_kernel_source( - sub_group_ballot_bit_scan_find_source); - test_params_ballot.save_kernel_source(sub_group_ballot_source, - "sub_group_ballot"); - test_params_ballot.save_kernel_source(sub_group_inverse_ballot_source, - "sub_group_inverse_ballot"); - test_params_ballot.save_kernel_source(sub_group_ballot_bit_extract_source, - "sub_group_ballot_bit_extract"); + // sub_group_ballot function + WorkGroupParams test_params_ballot(global_work_size, local_work_size, 3); + test_params_ballot.save_kernel_source(sub_group_ballot_source); RunTestForType rft_ballot(device, context, queue, num_elements, test_params_ballot); - error |= rft_ballot.run_impl>("sub_group_ballot"); error |= - rft_ballot.run_impl>( + rft_ballot.run_impl>("sub_group_ballot"); + + // ballot arithmetic functions + WorkGroupParams test_params_arith(global_work_size, local_work_size); + test_params_arith.save_kernel_source(sub_group_ballot_bit_scan_find_source); + test_params_arith.save_kernel_source(sub_group_inverse_ballot_source, + "sub_group_inverse_ballot"); + test_params_arith.save_kernel_source(sub_group_ballot_bit_extract_source, + "sub_group_ballot_bit_extract"); + RunTestForType rft_arith(device, context, queue, num_elements, + test_params_arith); + error |= + rft_arith.run_impl>( "sub_group_inverse_ballot"); - error |= rft_ballot.run_impl< + error |= rft_arith.run_impl< cl_uint4, BALLOT_BIT_EXTRACT>( "sub_group_ballot_bit_extract"); - error |= rft_ballot.run_impl< + error |= rft_arith.run_impl< cl_uint4, BALLOT_COUNT_SCAN_FIND>( "sub_group_ballot_bit_count"); - error |= rft_ballot.run_impl< + error |= rft_arith.run_impl< cl_uint4, BALLOT_COUNT_SCAN_FIND>( "sub_group_ballot_inclusive_scan"); - error |= rft_ballot.run_impl< + error |= rft_arith.run_impl< cl_uint4, BALLOT_COUNT_SCAN_FIND>( "sub_group_ballot_exclusive_scan"); - error |= rft_ballot.run_impl< + error |= rft_arith.run_impl< cl_uint4, BALLOT_COUNT_SCAN_FIND>( "sub_group_ballot_find_lsb"); - error |= rft_ballot.run_impl< + error |= rft_arith.run_impl< cl_uint4, BALLOT_COUNT_SCAN_FIND>( "sub_group_ballot_find_msb"); + return error; } -- cgit v1.2.3 From 6b14d408dc8cc0a05bca554e8b43d269fba179d0 Mon Sep 17 00:00:00 2001 From: Stuart Brady Date: Thu, 10 Feb 2022 06:24:33 +0000 Subject: Improve testing of kernel arg info in pipe_info test (#1326) The test now checks that CL_KERNEL_ARG_INFO_NOT_AVAILABLE is returned when calling clGetKernelArgInfo() with offline compilation modes. The correct function name is printed if clGetKernelArgInfo() fails when using online compilation (and not "clSetKernelArgInfo()"). When using online compilation, if the actual arg type is not as expected, the actual arg type is now logged, and the return value is now TEST_FAIL (-1) as per other failures (and not 1). All other test pass/fail values used in the test now use TEST_PASS and TEST_FAIL instead of 0 and -1 literals. An unnecessary cast of pipe_kernel_code has been removed. Signed-off-by: Stuart Brady --- test_conformance/pipes/test_pipe_info.cpp | 40 ++++++++++++++++++------------- 1 file changed, 24 insertions(+), 16 deletions(-) diff --git a/test_conformance/pipes/test_pipe_info.cpp b/test_conformance/pipes/test_pipe_info.cpp index 7543c6cd..e7b486db 100644 --- a/test_conformance/pipes/test_pipe_info.cpp +++ b/test_conformance/pipes/test_pipe_info.cpp @@ -14,6 +14,7 @@ // limitations under the License. // #include "procs.h" +#include "harness/parseParameters.h" const char* pipe_kernel_code = { "__kernel void pipe_kernel(__write_only pipe int out_pipe)\n" @@ -39,8 +40,7 @@ int test_pipe_info( cl_device_id deviceID, cl_context context, cl_command_queue if (pipe_width != returnVal) { - log_error("Error in clGetPipeInfo() check of pipe packet size\n"); - return -1; + test_fail("Error in clGetPipeInfo() check of pipe packet size\n"); } else { @@ -52,29 +52,37 @@ int test_pipe_info( cl_device_id deviceID, cl_context context, cl_command_queue if(pipe_depth != returnVal) { - log_error( "Error in clGetPipeInfo() check of pipe max packets\n" ); - return -1; + test_fail("Error in clGetPipeInfo() check of pipe max packets\n"); } else { log_info( " CL_PIPE_MAX_PACKETS passed.\n" ); } - err = create_single_kernel_helper_with_build_options(context, &program, &kernel, 1, (const char**)&pipe_kernel_code, "pipe_kernel", "-cl-std=CL2.0 -cl-kernel-arg-info"); - test_error_ret(err, " Error creating program", -1); + err = create_single_kernel_helper_with_build_options( + context, &program, &kernel, 1, &pipe_kernel_code, "pipe_kernel", + "-cl-std=CL2.0 -cl-kernel-arg-info"); + test_error_fail(err, "Error creating program"); cl_kernel_arg_type_qualifier arg_type_qualifier = 0; - cl_kernel_arg_type_qualifier expected_type_qualifier = CL_KERNEL_ARG_TYPE_PIPE; - err = clGetKernelArgInfo( kernel, 0, CL_KERNEL_ARG_TYPE_QUALIFIER, sizeof(arg_type_qualifier), &arg_type_qualifier, NULL ); - test_error_ret(err, " clSetKernelArgInfo failed", -1); - err = (arg_type_qualifier != expected_type_qualifier); - - if(err) + err = clGetKernelArgInfo(kernel, 0, CL_KERNEL_ARG_TYPE_QUALIFIER, + sizeof(arg_type_qualifier), &arg_type_qualifier, + NULL); + if (gCompilationMode == kOnline) { - print_error(err, "ERROR: Bad type qualifier\n"); - return -1; + test_error_fail(err, "clGetKernelArgInfo failed"); + if (arg_type_qualifier != CL_KERNEL_ARG_TYPE_PIPE) + { + test_fail("ERROR: Incorrect type qualifier: %i\n", + arg_type_qualifier); + } + } + else + { + test_failure_error_ret(err, CL_KERNEL_ARG_INFO_NOT_AVAILABLE, + "clGetKernelArgInfo error not as expected", + TEST_FAIL); } - return err; - + return TEST_PASS; } -- cgit v1.2.3 From 2d93b122c3078cd67a0528ad9e791dbcadaf03d6 Mon Sep 17 00:00:00 2001 From: Jim Lewis Date: Tue, 22 Feb 2022 10:49:35 -0600 Subject: Sync submission_details with conformance doc v26 (#1389) Add "Patches" field --- test_conformance/submission_details_template.txt | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/test_conformance/submission_details_template.txt b/test_conformance/submission_details_template.txt index 9d276a62..ff624837 100644 --- a/test_conformance/submission_details_template.txt +++ b/test_conformance/submission_details_template.txt @@ -81,6 +81,12 @@ Platform Version: # Tests version: +# Commit SHAs (7-digit) of any cherry-picked patches subsequent to tagged +# version. Any patches included must apply without conflicts to the tagged +# version in the order listed. +# +Patches: + # Implementations that support cl_khr_icd are required to use a loader to run # the tests and document the loader that was used. # -- cgit v1.2.3 From 279803ababb0495843c05103a8d4a2e4a1fdf017 Mon Sep 17 00:00:00 2001 From: Stuart Brady Date: Wed, 2 Mar 2022 13:25:53 +0000 Subject: Refactor kernel execution in subgroup tests (#1391) Signed-off-by: Stuart Brady --- test_conformance/subgroups/subhelpers.h | 212 ++++++++++++++++++-------------- 1 file changed, 123 insertions(+), 89 deletions(-) diff --git a/test_conformance/subgroups/subhelpers.h b/test_conformance/subgroups/subhelpers.h index 153045d0..b88d2278 100644 --- a/test_conformance/subgroups/subhelpers.h +++ b/test_conformance/subgroups/subhelpers.h @@ -1322,73 +1322,129 @@ inline bool compare_ordered(const subgroups::cl_half &lhs, const int &rhs) return cl_half_to_float(lhs.data) == rhs; } -// Run a test kernel to compute the result of a built-in on an input -static int run_kernel(cl_context context, cl_command_queue queue, - cl_kernel kernel, size_t global, size_t local, - void *idata, size_t isize, void *mdata, size_t msize, - void *odata, size_t osize, size_t tsize = 0) -{ - clMemWrapper in; - clMemWrapper xy; - clMemWrapper out; - clMemWrapper tmp; - int error; +template class KernelExecutor { +public: + KernelExecutor(cl_context c, cl_command_queue q, cl_kernel k, size_t g, + size_t l, Ty *id, size_t is, Ty *mid, Ty *mod, cl_int *md, + size_t ms, Ty *od, size_t os, size_t ts = 0) + : context(c), queue(q), kernel(k), global(g), local(l), idata(id), + isize(is), mapin_data(mid), mapout_data(mod), mdata(md), msize(ms), + odata(od), osize(os), tsize(ts) + { + has_status = false; + run_failed = false; + } + cl_context context; + cl_command_queue queue; + cl_kernel kernel; + size_t global; + size_t local; + Ty *idata; + size_t isize; + Ty *mapin_data; + Ty *mapout_data; + cl_int *mdata; + size_t msize; + Ty *odata; + size_t osize; + size_t tsize; + bool run_failed; - in = clCreateBuffer(context, CL_MEM_READ_ONLY, isize, NULL, &error); - test_error(error, "clCreateBuffer failed"); +private: + bool has_status; + test_status status; - xy = clCreateBuffer(context, CL_MEM_WRITE_ONLY, msize, NULL, &error); - test_error(error, "clCreateBuffer failed"); +public: + // Run a test kernel to compute the result of a built-in on an input + int run() + { + clMemWrapper in; + clMemWrapper xy; + clMemWrapper out; + clMemWrapper tmp; + int error; - out = clCreateBuffer(context, CL_MEM_WRITE_ONLY, osize, NULL, &error); - test_error(error, "clCreateBuffer failed"); + in = clCreateBuffer(context, CL_MEM_READ_ONLY, isize, NULL, &error); + test_error(error, "clCreateBuffer failed"); - if (tsize) - { - tmp = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_HOST_NO_ACCESS, - tsize, NULL, &error); + xy = clCreateBuffer(context, CL_MEM_WRITE_ONLY, msize, NULL, &error); test_error(error, "clCreateBuffer failed"); - } - error = clSetKernelArg(kernel, 0, sizeof(in), (void *)&in); - test_error(error, "clSetKernelArg failed"); + out = clCreateBuffer(context, CL_MEM_WRITE_ONLY, osize, NULL, &error); + test_error(error, "clCreateBuffer failed"); - error = clSetKernelArg(kernel, 1, sizeof(xy), (void *)&xy); - test_error(error, "clSetKernelArg failed"); + if (tsize) + { + tmp = clCreateBuffer(context, + CL_MEM_READ_WRITE | CL_MEM_HOST_NO_ACCESS, + tsize, NULL, &error); + test_error(error, "clCreateBuffer failed"); + } - error = clSetKernelArg(kernel, 2, sizeof(out), (void *)&out); - test_error(error, "clSetKernelArg failed"); + error = clSetKernelArg(kernel, 0, sizeof(in), (void *)&in); + test_error(error, "clSetKernelArg failed"); - if (tsize) - { - error = clSetKernelArg(kernel, 3, sizeof(tmp), (void *)&tmp); + error = clSetKernelArg(kernel, 1, sizeof(xy), (void *)&xy); test_error(error, "clSetKernelArg failed"); - } - error = clEnqueueWriteBuffer(queue, in, CL_FALSE, 0, isize, idata, 0, NULL, - NULL); - test_error(error, "clEnqueueWriteBuffer failed"); + error = clSetKernelArg(kernel, 2, sizeof(out), (void *)&out); + test_error(error, "clSetKernelArg failed"); + + if (tsize) + { + error = clSetKernelArg(kernel, 3, sizeof(tmp), (void *)&tmp); + test_error(error, "clSetKernelArg failed"); + } + + error = clEnqueueWriteBuffer(queue, in, CL_FALSE, 0, isize, idata, 0, + NULL, NULL); + test_error(error, "clEnqueueWriteBuffer failed"); - error = clEnqueueWriteBuffer(queue, xy, CL_FALSE, 0, msize, mdata, 0, NULL, - NULL); - test_error(error, "clEnqueueWriteBuffer failed"); - error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0, - NULL, NULL); - test_error(error, "clEnqueueNDRangeKernel failed"); + error = clEnqueueWriteBuffer(queue, xy, CL_FALSE, 0, msize, mdata, 0, + NULL, NULL); + test_error(error, "clEnqueueWriteBuffer failed"); + error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, + 0, NULL, NULL); + test_error(error, "clEnqueueNDRangeKernel failed"); - error = clEnqueueReadBuffer(queue, xy, CL_FALSE, 0, msize, mdata, 0, NULL, - NULL); - test_error(error, "clEnqueueReadBuffer failed"); + error = clEnqueueReadBuffer(queue, xy, CL_FALSE, 0, msize, mdata, 0, + NULL, NULL); + test_error(error, "clEnqueueReadBuffer failed"); - error = clEnqueueReadBuffer(queue, out, CL_FALSE, 0, osize, odata, 0, NULL, - NULL); - test_error(error, "clEnqueueReadBuffer failed"); + error = clEnqueueReadBuffer(queue, out, CL_FALSE, 0, osize, odata, 0, + NULL, NULL); + test_error(error, "clEnqueueReadBuffer failed"); - error = clFinish(queue); - test_error(error, "clFinish failed"); + error = clFinish(queue); + test_error(error, "clFinish failed"); - return error; -} + return error; + } + + test_status run_and_check(const WorkGroupParams &test_params) + { + cl_int error = run(); + if (error != CL_SUCCESS) + { + print_error(error, "Failed to run subgroup test kernel"); + status = TEST_FAIL; + run_failed = true; + return status; + } + + test_status tmp_status = + Fns::chk(idata, odata, mapin_data, mapout_data, mdata, test_params); + + if (!has_status || tmp_status == TEST_FAIL + || (tmp_status == TEST_PASS && status != TEST_FAIL)) + { + status = tmp_status; + has_status = true; + } + + return status; + } +}; // Driver for testing a single built in function template struct test @@ -1536,74 +1592,52 @@ template struct test test_error_fail(error, "Unable to set divergence mask argument"); } + KernelExecutor executor( + context, queue, kernel, global, local, idata.data(), + input_array_size * sizeof(Ty), mapin.data(), mapout.data(), + sgmap.data(), global * sizeof(cl_int4), odata.data(), + output_array_size * sizeof(Ty), TSIZE * sizeof(Ty)); + // Run the kernel once on zeroes to get the map memset(idata.data(), 0, input_array_size * sizeof(Ty)); - error = run_kernel(context, queue, kernel, global, local, idata.data(), - input_array_size * sizeof(Ty), sgmap.data(), - global * sizeof(cl_int4), odata.data(), - output_array_size * sizeof(Ty), TSIZE * sizeof(Ty)); + error = executor.run(); test_error_fail(error, "Running kernel first time failed"); // Generate the desired input for the kernel test_params.subgroup_size = subgroup_size; Fns::gen(idata.data(), mapin.data(), sgmap.data(), test_params); - test_status combined_status; + test_status status; if (test_params.divergence_mask_arg != -1) { - combined_status = TEST_SKIPPED_ITSELF; - for (auto &mask : test_params.all_work_item_masks) { test_params.work_items_mask = mask; cl_uint4 mask_vector = bs128_to_cl_uint4(mask); clSetKernelArg(kernel, test_params.divergence_mask_arg, sizeof(cl_uint4), &mask_vector); - error = run_kernel(context, queue, kernel, global, local, - idata.data(), input_array_size * sizeof(Ty), - sgmap.data(), global * sizeof(cl_int4), - odata.data(), output_array_size * sizeof(Ty), - TSIZE * sizeof(Ty)); - test_error_fail(error, "Running kernel second time failed"); - - // Check the result - test_status status = - Fns::chk(idata.data(), odata.data(), mapin.data(), - mapout.data(), sgmap.data(), test_params); - - if (status == TEST_FAIL - || (status == TEST_PASS && combined_status != TEST_FAIL)) - combined_status = status; + + status = executor.run_and_check(test_params); if (status == TEST_FAIL) break; } } else { - error = - run_kernel(context, queue, kernel, global, local, idata.data(), - input_array_size * sizeof(Ty), sgmap.data(), - global * sizeof(cl_int4), odata.data(), - output_array_size * sizeof(Ty), TSIZE * sizeof(Ty)); - test_error_fail(error, "Running kernel second time failed"); - - // Check the result - combined_status = - Fns::chk(idata.data(), odata.data(), mapin.data(), - mapout.data(), sgmap.data(), test_params); + status = executor.run_and_check(test_params); } - // Detailed failure and skip messages should be logged by Fns::gen - // and Fns::chk. - if (combined_status == TEST_PASS) + // Detailed failure and skip messages should be logged by + // run_and_check. + if (status == TEST_PASS) { Fns::log_test(test_params, " passed"); } - else if (combined_status == TEST_FAIL) + else if (!executor.run_failed && status == TEST_FAIL) { test_fail("Data verification failed\n"); } - return combined_status; + return status; } }; -- cgit v1.2.3 From d36196b662fb64d5610e027d836985bfc87ae07b Mon Sep 17 00:00:00 2001 From: Alastair Murray Date: Wed, 2 Mar 2022 18:27:52 +0000 Subject: Update format script and drop Travis badge for branch rename (#1393) `master` is now `main`, so update `check-format.sh` accordingly. Also completely drop the Travis badge as we now use GitHub actions. There is no replacement badge as the current action is pre-submission, not post-submission. --- README.md | 2 +- check-format.sh | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index b2d825fc..796f7c86 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,2 @@ -# OpenCL-CTS [![Build Status](https://api.travis-ci.org/KhronosGroup/OpenCL-CTS.svg?branch=master)](https://travis-ci.org/KhronosGroup/OpenCL-CTS/branches) +# OpenCL-CTS The OpenCL Conformance Tests diff --git a/check-format.sh b/check-format.sh index 7de2bd2c..be8f9d78 100755 --- a/check-format.sh +++ b/check-format.sh @@ -1,7 +1,7 @@ #!/usr/bin/env bash -# Arg used to specify non-'origin/master' comparison branch -ORIGIN_BRANCH=${1:-"origin/master"} +# Arg used to specify non-'origin/main' comparison branch +ORIGIN_BRANCH=${1:-"origin/main"} CLANG_BINARY=${2:-"`which clang-format-9`"} # Run git-clang-format to check for violations -- cgit v1.2.3 From e437acd908b435e65655ae31e210511f434e108c Mon Sep 17 00:00:00 2001 From: Jeremy Kemp Date: Wed, 2 Mar 2022 18:28:12 +0000 Subject: Added simple test for CL_DEVICE_PRINTF_BUFFER_SIZE. (#1386) * Added simple test for CL_DEVICE_PRINTF_BUFFER_SIZE. * Clang format fix. --- test_conformance/printf/test_printf.cpp | 134 ++++++++++++++++---------------- 1 file changed, 68 insertions(+), 66 deletions(-) diff --git a/test_conformance/printf/test_printf.cpp b/test_conformance/printf/test_printf.cpp index 2b804e40..12ff6535 100644 --- a/test_conformance/printf/test_printf.cpp +++ b/test_conformance/printf/test_printf.cpp @@ -825,73 +825,75 @@ int test_address_space_4(cl_device_id deviceID, cl_context context, cl_command_q return doTest(gQueue, gContext, TYPE_ADDRESS_SPACE, 4, deviceID); } +int test_buffer_size(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) +{ + size_t printf_buff_size = 0; + const size_t printf_buff_size_req = !gIsEmbedded ? (1024 * 1024UL) : 1024UL; + const size_t config_size = sizeof(printf_buff_size); + cl_int err = CL_SUCCESS; + + err = clGetDeviceInfo(deviceID, CL_DEVICE_PRINTF_BUFFER_SIZE, config_size, + &printf_buff_size, NULL); + if (err != CL_SUCCESS) + { + log_error("Unable to query CL_DEVICE_PRINTF_BUFFER_SIZE"); + return TEST_FAIL; + } + + if (printf_buff_size < printf_buff_size_req) + { + log_error("CL_DEVICE_PRINTF_BUFFER_SIZE does not meet requirements"); + return TEST_FAIL; + } + + return TEST_PASS; +} + test_definition test_list[] = { - ADD_TEST( int_0 ), - ADD_TEST( int_1 ), - ADD_TEST( int_2 ), - ADD_TEST( int_3 ), - ADD_TEST( int_4 ), - ADD_TEST( int_5 ), - ADD_TEST( int_6 ), - ADD_TEST( int_7 ), - ADD_TEST( int_8 ), - - ADD_TEST( float_0 ), - ADD_TEST( float_1 ), - ADD_TEST( float_2 ), - ADD_TEST( float_3 ), - ADD_TEST( float_4 ), - ADD_TEST( float_5 ), - ADD_TEST( float_6 ), - ADD_TEST( float_7 ), - ADD_TEST( float_8 ), - ADD_TEST( float_9 ), - ADD_TEST( float_10 ), - ADD_TEST( float_11 ), - ADD_TEST( float_12 ), - ADD_TEST( float_13 ), - ADD_TEST( float_14 ), - ADD_TEST( float_15 ), - ADD_TEST( float_16 ), - ADD_TEST( float_17 ), - - ADD_TEST( float_limits_0 ), - ADD_TEST( float_limits_1 ), - ADD_TEST( float_limits_2 ), - - ADD_TEST( octal_0 ), - ADD_TEST( octal_1 ), - ADD_TEST( octal_2 ), - ADD_TEST( octal_3 ), - - ADD_TEST( unsigned_0 ), - ADD_TEST( unsigned_1 ), - - ADD_TEST( hexadecimal_0 ), - ADD_TEST( hexadecimal_1 ), - ADD_TEST( hexadecimal_2 ), - ADD_TEST( hexadecimal_3 ), - ADD_TEST( hexadecimal_4 ), - - ADD_TEST( char_0 ), - ADD_TEST( char_1 ), - ADD_TEST( char_2 ), - - ADD_TEST( string_0 ), - ADD_TEST( string_1 ), - ADD_TEST( string_2 ), - - ADD_TEST( vector_0 ), - ADD_TEST( vector_1 ), - ADD_TEST( vector_2 ), - ADD_TEST( vector_3 ), - ADD_TEST( vector_4 ), - - ADD_TEST( address_space_0 ), - ADD_TEST( address_space_1 ), - ADD_TEST( address_space_2 ), - ADD_TEST( address_space_3 ), - ADD_TEST( address_space_4 ), + ADD_TEST(int_0), ADD_TEST(int_1), + ADD_TEST(int_2), ADD_TEST(int_3), + ADD_TEST(int_4), ADD_TEST(int_5), + ADD_TEST(int_6), ADD_TEST(int_7), + ADD_TEST(int_8), + + ADD_TEST(float_0), ADD_TEST(float_1), + ADD_TEST(float_2), ADD_TEST(float_3), + ADD_TEST(float_4), ADD_TEST(float_5), + ADD_TEST(float_6), ADD_TEST(float_7), + ADD_TEST(float_8), ADD_TEST(float_9), + ADD_TEST(float_10), ADD_TEST(float_11), + ADD_TEST(float_12), ADD_TEST(float_13), + ADD_TEST(float_14), ADD_TEST(float_15), + ADD_TEST(float_16), ADD_TEST(float_17), + + ADD_TEST(float_limits_0), ADD_TEST(float_limits_1), + ADD_TEST(float_limits_2), + + ADD_TEST(octal_0), ADD_TEST(octal_1), + ADD_TEST(octal_2), ADD_TEST(octal_3), + + ADD_TEST(unsigned_0), ADD_TEST(unsigned_1), + + ADD_TEST(hexadecimal_0), ADD_TEST(hexadecimal_1), + ADD_TEST(hexadecimal_2), ADD_TEST(hexadecimal_3), + ADD_TEST(hexadecimal_4), + + ADD_TEST(char_0), ADD_TEST(char_1), + ADD_TEST(char_2), + + ADD_TEST(string_0), ADD_TEST(string_1), + ADD_TEST(string_2), + + ADD_TEST(vector_0), ADD_TEST(vector_1), + ADD_TEST(vector_2), ADD_TEST(vector_3), + ADD_TEST(vector_4), + + ADD_TEST(address_space_0), ADD_TEST(address_space_1), + ADD_TEST(address_space_2), ADD_TEST(address_space_3), + ADD_TEST(address_space_4), + + ADD_TEST(buffer_size), }; const int test_num = ARRAY_SIZE( test_list ); -- cgit v1.2.3 From ae217e8bd2de2ea7dc9a8d50574530a2a29e4be9 Mon Sep 17 00:00:00 2001 From: Jack Frankland <30410009+FranklandJack@users.noreply.github.com> Date: Wed, 2 Mar 2022 18:30:31 +0000 Subject: Check for non-uniform work-group support (#1383) Only run sub-group tests with non-uniform work-groups on OpenCL 3.0 and later if it is supported by the device. --- test_conformance/subgroups/test_workitem.cpp | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/test_conformance/subgroups/test_workitem.cpp b/test_conformance/subgroups/test_workitem.cpp index 7ffa6a7c..b69f3138 100644 --- a/test_conformance/subgroups/test_workitem.cpp +++ b/test_conformance/subgroups/test_workitem.cpp @@ -16,6 +16,7 @@ #include "procs.h" #include "harness/conversions.h" #include "harness/typeWrappers.h" +#include struct get_test_data { @@ -251,8 +252,21 @@ int test_work_item_functions(cl_device_id device, cl_context context, global = local * 5; - // Make sure we have a flexible range - global += 3 * local / 4; + // Non-uniform work-groups are an optional feature from 3.0 onward. + cl_bool device_supports_non_uniform_wg = CL_TRUE; + if (get_device_cl_version(device) >= Version(3, 0)) + { + error = clGetDeviceInfo( + device, CL_DEVICE_NON_UNIFORM_WORK_GROUP_SUPPORT, sizeof(cl_bool), + &device_supports_non_uniform_wg, nullptr); + test_error(error, "clGetDeviceInfo failed"); + } + + if (device_supports_non_uniform_wg) + { + // Make sure we have a flexible range + global += 3 * local / 4; + } // Collect the data memset((void *)&result, 0xf0, sizeof(result)); @@ -327,4 +341,4 @@ int test_work_item_functions_ext(cl_device_id device, cl_context context, return test_work_item_functions(device, context, queue, num_elements, false); -} \ No newline at end of file +} -- cgit v1.2.3 From 3c4a1a3ce6ddb8880e1be7a6a1bb525b28db6e7f Mon Sep 17 00:00:00 2001 From: Jim Lewis Date: Wed, 2 Mar 2022 12:31:14 -0600 Subject: Fix build error for linux with clang-8 (#1304) -Wabsolute-value warning reported as error (long double truncated to double) --- test_conformance/math_brute_force/reference_math.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/test_conformance/math_brute_force/reference_math.cpp b/test_conformance/math_brute_force/reference_math.cpp index 0b037e01..16db3d67 100644 --- a/test_conformance/math_brute_force/reference_math.cpp +++ b/test_conformance/math_brute_force/reference_math.cpp @@ -4549,8 +4549,8 @@ long double reference_powl(long double x, long double y) if (x != x || y != y) return x + y; // do the work required to sort out edge cases - double fabsy = reference_fabs(y); - double fabsx = reference_fabs(x); + double fabsy = (double)reference_fabsl(y); + double fabsx = (double)reference_fabsl(x); double iy = reference_rint( fabsy); // we do round to nearest here so that |fy| <= 0.5 if (iy > fabsy) // convert nearbyint to floor @@ -4637,13 +4637,13 @@ long double reference_powl(long double x, long double y) // compute product of y*log2(x) // scale to avoid overflow in double-double multiplication - if (reference_fabs(y) > HEX_DBL(+, 1, 0, +, 970)) + if (fabsy > HEX_DBL(+, 1, 0, +, 970)) { y_hi = reference_ldexp(y_hi, -53); y_lo = reference_ldexp(y_lo, -53); } MulDD(&ylog2x_hi, &ylog2x_lo, log2x_hi, log2x_lo, y_hi, y_lo); - if (fabs(y) > HEX_DBL(+, 1, 0, +, 970)) + if (fabsy > HEX_DBL(+, 1, 0, +, 970)) { ylog2x_hi = reference_ldexp(ylog2x_hi, 53); ylog2x_lo = reference_ldexp(ylog2x_lo, 53); -- cgit v1.2.3 From bbc7ccfc58386bea759ef4fa2cd47888172ad76a Mon Sep 17 00:00:00 2001 From: Ben Ashbaugh Date: Wed, 2 Mar 2022 10:34:06 -0800 Subject: add a prefix to OpenCL extension names (#1311) * add a prefix to OpenCL extension names * fix formatting --- test_conformance/computeinfo/main.cpp | 12 ++-- test_conformance/spir/run_services.cpp | 126 +++++++++++++++++---------------- test_conformance/spir/run_services.h | 59 +++++++-------- 3 files changed, 97 insertions(+), 100 deletions(-) diff --git a/test_conformance/computeinfo/main.cpp b/test_conformance/computeinfo/main.cpp index d993655b..03bdb2c1 100644 --- a/test_conformance/computeinfo/main.cpp +++ b/test_conformance/computeinfo/main.cpp @@ -95,8 +95,8 @@ typedef struct _version version_t; struct _extensions { - int cl_khr_fp64; - int cl_khr_fp16; + int has_cl_khr_fp64; + int has_cl_khr_fp16; }; typedef struct _extensions extensions_t; @@ -1069,11 +1069,11 @@ int parseExtensions(char const* str, extensions_t* extensions) } if (strncmp(begin, "cl_khr_fp64", length) == 0) { - extensions->cl_khr_fp64 = 1; + extensions->has_cl_khr_fp64 = 1; } if (strncmp(begin, "cl_khr_fp16", length) == 0) { - extensions->cl_khr_fp16 = 1; + extensions->has_cl_khr_fp16 = 1; } begin += length; // Skip word. if (begin[0] == ' ') @@ -1112,13 +1112,13 @@ int getConfigInfos(cl_device_id device) // version 1.1, we have to check doubles are sopported. In // OpenCL 1.2 CL_DEVICE_DOUBLE_FP_CONFIG should be reported // unconditionally. - get = extensions.cl_khr_fp64; + get = extensions.has_cl_khr_fp64; }; if (info.opcode == CL_DEVICE_HALF_FP_CONFIG) { // CL_DEVICE_HALF_FP_CONFIG should be reported only when cl_khr_fp16 // extension is available - get = extensions.cl_khr_fp16; + get = extensions.has_cl_khr_fp16; }; if (get) { diff --git a/test_conformance/spir/run_services.cpp b/test_conformance/spir/run_services.cpp index 06fc418d..3162e16f 100644 --- a/test_conformance/spir/run_services.cpp +++ b/test_conformance/spir/run_services.cpp @@ -389,6 +389,7 @@ OclExtensions OclExtensions::getDeviceCapabilities(cl_device_id devId) { ret = ret | OclExtensions::fromString(*it); } + return ret; } @@ -399,75 +400,80 @@ OclExtensions OclExtensions::empty() OclExtensions OclExtensions::fromString(const std::string& e) { - std::string s = "OclExtensions::" + e; - RETURN_IF_ENUM(s, OclExtensions::cl_khr_int64_base_atomics); - RETURN_IF_ENUM(s, OclExtensions::cl_khr_int64_extended_atomics); - RETURN_IF_ENUM(s, OclExtensions::cl_khr_3d_image_writes); - RETURN_IF_ENUM(s, OclExtensions::cl_khr_fp16); - RETURN_IF_ENUM(s, OclExtensions::cl_khr_gl_sharing); - RETURN_IF_ENUM(s, OclExtensions::cl_khr_gl_event); - RETURN_IF_ENUM(s, OclExtensions::cl_khr_d3d10_sharing); - RETURN_IF_ENUM(s, OclExtensions::cl_khr_dx9_media_sharing); - RETURN_IF_ENUM(s, OclExtensions::cl_khr_d3d11_sharing); - RETURN_IF_ENUM(s, OclExtensions::cl_khr_depth_images); - RETURN_IF_ENUM(s, OclExtensions::cl_khr_gl_depth_images); - RETURN_IF_ENUM(s, OclExtensions::cl_khr_gl_msaa_sharing); - RETURN_IF_ENUM(s, OclExtensions::cl_khr_image2d_from_buffer); - RETURN_IF_ENUM(s, OclExtensions::cl_khr_initialize_memory); - RETURN_IF_ENUM(s, OclExtensions::cl_khr_spir); - RETURN_IF_ENUM(s, OclExtensions::cl_khr_fp64); - RETURN_IF_ENUM(s, OclExtensions::cl_khr_global_int32_base_atomics); - RETURN_IF_ENUM(s, OclExtensions::cl_khr_global_int32_extended_atomics); - RETURN_IF_ENUM(s, OclExtensions::cl_khr_local_int32_base_atomics); - RETURN_IF_ENUM(s, OclExtensions::cl_khr_local_int32_extended_atomics); - RETURN_IF_ENUM(s, OclExtensions::cl_khr_byte_addressable_store); - RETURN_IF_ENUM(s, OclExtensions::cles_khr_int64); - RETURN_IF_ENUM(s, OclExtensions::cles_khr_2d_image_array_writes); + std::string s = "OclExtensions::has_" + e; + RETURN_IF_ENUM(s, OclExtensions::has_cl_khr_int64_base_atomics); + RETURN_IF_ENUM(s, OclExtensions::has_cl_khr_int64_extended_atomics); + RETURN_IF_ENUM(s, OclExtensions::has_cl_khr_3d_image_writes); + RETURN_IF_ENUM(s, OclExtensions::has_cl_khr_fp16); + RETURN_IF_ENUM(s, OclExtensions::has_cl_khr_gl_sharing); + RETURN_IF_ENUM(s, OclExtensions::has_cl_khr_gl_event); + RETURN_IF_ENUM(s, OclExtensions::has_cl_khr_d3d10_sharing); + RETURN_IF_ENUM(s, OclExtensions::has_cl_khr_dx9_media_sharing); + RETURN_IF_ENUM(s, OclExtensions::has_cl_khr_d3d11_sharing); + RETURN_IF_ENUM(s, OclExtensions::has_cl_khr_depth_images); + RETURN_IF_ENUM(s, OclExtensions::has_cl_khr_gl_depth_images); + RETURN_IF_ENUM(s, OclExtensions::has_cl_khr_gl_msaa_sharing); + RETURN_IF_ENUM(s, OclExtensions::has_cl_khr_image2d_from_buffer); + RETURN_IF_ENUM(s, OclExtensions::has_cl_khr_initialize_memory); + RETURN_IF_ENUM(s, OclExtensions::has_cl_khr_spir); + RETURN_IF_ENUM(s, OclExtensions::has_cl_khr_fp64); + RETURN_IF_ENUM(s, OclExtensions::has_cl_khr_global_int32_base_atomics); + RETURN_IF_ENUM(s, OclExtensions::has_cl_khr_global_int32_extended_atomics); + RETURN_IF_ENUM(s, OclExtensions::has_cl_khr_local_int32_base_atomics); + RETURN_IF_ENUM(s, OclExtensions::has_cl_khr_local_int32_extended_atomics); + RETURN_IF_ENUM(s, OclExtensions::has_cl_khr_byte_addressable_store); + RETURN_IF_ENUM(s, OclExtensions::has_cles_khr_int64); + RETURN_IF_ENUM(s, OclExtensions::has_cles_khr_2d_image_array_writes); // Unknown KHR string. return OclExtensions::empty(); } std::string OclExtensions::toString() { - - #define APPEND_STR_IF_SUPPORTS( STR, E) \ - if ( this->supports(E) ) \ - { \ - std::string ext_str( #E ); \ - std::string prefix = "OclExtensions::"; \ - size_t pos = ext_str.find( prefix ); \ - if ( pos != std::string::npos ) \ - { \ - ext_str.replace( pos, prefix.length(), ""); \ - } \ - STR += ext_str; \ - } +#define APPEND_STR_IF_SUPPORTS(STR, E) \ + if (this->supports(E)) \ + { \ + std::string ext_str(#E); \ + std::string prefix = "OclExtensions::has_"; \ + size_t pos = ext_str.find(prefix); \ + if (pos != std::string::npos) \ + { \ + ext_str.replace(pos, prefix.length(), ""); \ + } \ + STR += ext_str; \ + STR += " "; \ + } std::string s = ""; - APPEND_STR_IF_SUPPORTS( s, OclExtensions::cl_khr_int64_base_atomics ); - APPEND_STR_IF_SUPPORTS( s, OclExtensions::cl_khr_int64_extended_atomics ); - APPEND_STR_IF_SUPPORTS( s, OclExtensions::cl_khr_3d_image_writes ); - APPEND_STR_IF_SUPPORTS( s, OclExtensions::cl_khr_fp16 ); - APPEND_STR_IF_SUPPORTS( s, OclExtensions::cl_khr_gl_sharing ); - APPEND_STR_IF_SUPPORTS( s, OclExtensions::cl_khr_gl_event ); - APPEND_STR_IF_SUPPORTS( s, OclExtensions::cl_khr_d3d10_sharing ); - APPEND_STR_IF_SUPPORTS( s, OclExtensions::cl_khr_dx9_media_sharing ); - APPEND_STR_IF_SUPPORTS( s, OclExtensions::cl_khr_d3d11_sharing ); - APPEND_STR_IF_SUPPORTS( s, OclExtensions::cl_khr_depth_images ); - APPEND_STR_IF_SUPPORTS( s, OclExtensions::cl_khr_gl_depth_images ); - APPEND_STR_IF_SUPPORTS( s, OclExtensions::cl_khr_gl_msaa_sharing ); - APPEND_STR_IF_SUPPORTS( s, OclExtensions::cl_khr_image2d_from_buffer ); - APPEND_STR_IF_SUPPORTS( s, OclExtensions::cl_khr_initialize_memory ); - APPEND_STR_IF_SUPPORTS( s, OclExtensions::cl_khr_spir ); - APPEND_STR_IF_SUPPORTS( s, OclExtensions::cl_khr_fp64 ); - APPEND_STR_IF_SUPPORTS( s, OclExtensions::cl_khr_global_int32_base_atomics ); - APPEND_STR_IF_SUPPORTS( s, OclExtensions::cl_khr_global_int32_extended_atomics ); - APPEND_STR_IF_SUPPORTS( s, OclExtensions::cl_khr_local_int32_base_atomics ); - APPEND_STR_IF_SUPPORTS( s, OclExtensions::cl_khr_local_int32_extended_atomics ); - APPEND_STR_IF_SUPPORTS( s, OclExtensions::cl_khr_byte_addressable_store ); - APPEND_STR_IF_SUPPORTS( s, OclExtensions::cles_khr_int64 ); - APPEND_STR_IF_SUPPORTS( s, OclExtensions::cles_khr_2d_image_array_writes ); + APPEND_STR_IF_SUPPORTS(s, OclExtensions::has_cl_khr_int64_base_atomics); + APPEND_STR_IF_SUPPORTS(s, OclExtensions::has_cl_khr_int64_extended_atomics); + APPEND_STR_IF_SUPPORTS(s, OclExtensions::has_cl_khr_3d_image_writes); + APPEND_STR_IF_SUPPORTS(s, OclExtensions::has_cl_khr_fp16); + APPEND_STR_IF_SUPPORTS(s, OclExtensions::has_cl_khr_gl_sharing); + APPEND_STR_IF_SUPPORTS(s, OclExtensions::has_cl_khr_gl_event); + APPEND_STR_IF_SUPPORTS(s, OclExtensions::has_cl_khr_d3d10_sharing); + APPEND_STR_IF_SUPPORTS(s, OclExtensions::has_cl_khr_dx9_media_sharing); + APPEND_STR_IF_SUPPORTS(s, OclExtensions::has_cl_khr_d3d11_sharing); + APPEND_STR_IF_SUPPORTS(s, OclExtensions::has_cl_khr_depth_images); + APPEND_STR_IF_SUPPORTS(s, OclExtensions::has_cl_khr_gl_depth_images); + APPEND_STR_IF_SUPPORTS(s, OclExtensions::has_cl_khr_gl_msaa_sharing); + APPEND_STR_IF_SUPPORTS(s, OclExtensions::has_cl_khr_image2d_from_buffer); + APPEND_STR_IF_SUPPORTS(s, OclExtensions::has_cl_khr_initialize_memory); + APPEND_STR_IF_SUPPORTS(s, OclExtensions::has_cl_khr_spir); + APPEND_STR_IF_SUPPORTS(s, OclExtensions::has_cl_khr_fp64); + APPEND_STR_IF_SUPPORTS(s, + OclExtensions::has_cl_khr_global_int32_base_atomics); + APPEND_STR_IF_SUPPORTS( + s, OclExtensions::has_cl_khr_global_int32_extended_atomics); + APPEND_STR_IF_SUPPORTS(s, + OclExtensions::has_cl_khr_local_int32_base_atomics); + APPEND_STR_IF_SUPPORTS( + s, OclExtensions::has_cl_khr_local_int32_extended_atomics); + APPEND_STR_IF_SUPPORTS(s, OclExtensions::has_cl_khr_byte_addressable_store); + APPEND_STR_IF_SUPPORTS(s, OclExtensions::has_cles_khr_int64); + APPEND_STR_IF_SUPPORTS(s, + OclExtensions::has_cles_khr_2d_image_array_writes); return s; } diff --git a/test_conformance/spir/run_services.h b/test_conformance/spir/run_services.h index 6bac4c91..10f0d05e 100644 --- a/test_conformance/spir/run_services.h +++ b/test_conformance/spir/run_services.h @@ -113,42 +113,33 @@ private: OclExtensions(size_t ext) : m_extVector(ext) {} -// Fix a compilation error, since cl_khr_gl_sharing is defined as a macro. -#ifdef cl_khr_gl_sharing -#undef cl_khr_gl_sharing -#endif//cl_khr_gl_sharing - -#ifdef cl_khr_icd -#undef cl_khr_icd -#endif//cl_khr_icd - enum ClKhrs { - no_extensions = KhrValue<0>::Mask, - cl_khr_int64_base_atomics = KhrValue<1>::Mask, - cl_khr_int64_extended_atomics = KhrValue<2>::Mask, - cl_khr_3d_image_writes = KhrValue<3>::Mask, - cl_khr_fp16 = KhrValue<4>::Mask, - cl_khr_gl_sharing = KhrValue<5>::Mask, - cl_khr_gl_event = KhrValue<6>::Mask, - cl_khr_d3d10_sharing = KhrValue<7>::Mask, - cl_khr_dx9_media_sharing = KhrValue<8>::Mask, - cl_khr_d3d11_sharing = KhrValue<9>::Mask, - cl_khr_depth_images = KhrValue<10>::Mask, - cl_khr_gl_depth_images = KhrValue<11>::Mask, - cl_khr_gl_msaa_sharing = KhrValue<12>::Mask, - cl_khr_image2d_from_buffer = KhrValue<13>::Mask, - cl_khr_initialize_memory = KhrValue<14>::Mask, - cl_khr_context_abort = KhrValue<15>::Mask, - cl_khr_spir = KhrValue<16>::Mask, - cl_khr_fp64 = KhrValue<17>::Mask, - cl_khr_global_int32_base_atomics = KhrValue<18>::Mask, - cl_khr_global_int32_extended_atomics = KhrValue<19>::Mask, - cl_khr_local_int32_base_atomics = KhrValue<20>::Mask, - cl_khr_local_int32_extended_atomics = KhrValue<21>::Mask, - cl_khr_byte_addressable_store = KhrValue<22>::Mask, - cles_khr_int64 = KhrValue<23>::Mask, - cles_khr_2d_image_array_writes = KhrValue<24>::Mask, + no_extensions = KhrValue<0>::Mask, + has_cl_khr_int64_base_atomics = KhrValue<1>::Mask, + has_cl_khr_int64_extended_atomics = KhrValue<2>::Mask, + has_cl_khr_3d_image_writes = KhrValue<3>::Mask, + has_cl_khr_fp16 = KhrValue<4>::Mask, + has_cl_khr_gl_sharing = KhrValue<5>::Mask, + has_cl_khr_gl_event = KhrValue<6>::Mask, + has_cl_khr_d3d10_sharing = KhrValue<7>::Mask, + has_cl_khr_dx9_media_sharing = KhrValue<8>::Mask, + has_cl_khr_d3d11_sharing = KhrValue<9>::Mask, + has_cl_khr_depth_images = KhrValue<10>::Mask, + has_cl_khr_gl_depth_images = KhrValue<11>::Mask, + has_cl_khr_gl_msaa_sharing = KhrValue<12>::Mask, + has_cl_khr_image2d_from_buffer = KhrValue<13>::Mask, + has_cl_khr_initialize_memory = KhrValue<14>::Mask, + has_cl_khr_context_abort = KhrValue<15>::Mask, + has_cl_khr_spir = KhrValue<16>::Mask, + has_cl_khr_fp64 = KhrValue<17>::Mask, + has_cl_khr_global_int32_base_atomics = KhrValue<18>::Mask, + has_cl_khr_global_int32_extended_atomics = KhrValue<19>::Mask, + has_cl_khr_local_int32_base_atomics = KhrValue<20>::Mask, + has_cl_khr_local_int32_extended_atomics = KhrValue<21>::Mask, + has_cl_khr_byte_addressable_store = KhrValue<22>::Mask, + has_cles_khr_int64 = KhrValue<23>::Mask, + has_cles_khr_2d_image_array_writes = KhrValue<24>::Mask, }; size_t m_extVector; -- cgit v1.2.3 From 0f4dc3166c9604b781d92a9acfd1fd13c4915846 Mon Sep 17 00:00:00 2001 From: Sreelakshmi Haridas Maruthur Date: Thu, 17 Mar 2022 14:27:24 -0600 Subject: conversions: Use volatile qualifier to prevent optimizations (#1399) Use volatile to prevent clang optimizations, fix int2float --- test_conformance/conversions/basic_test_conversions.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test_conformance/conversions/basic_test_conversions.cpp b/test_conformance/conversions/basic_test_conversions.cpp index 32998841..3ee072da 100644 --- a/test_conformance/conversions/basic_test_conversions.cpp +++ b/test_conformance/conversions/basic_test_conversions.cpp @@ -696,7 +696,8 @@ static void int2short( void *out, void *in){ ((cl_short*) out)[0] = ((cl_int*) i static void int2uint( void *out, void *in){ ((cl_uint*) out)[0] = ((cl_int*) in)[0]; } static void int2float( void *out, void *in) { - cl_int l = ((cl_int*) in)[0]; + // Use volatile to prevent optimization by Clang compiler + volatile cl_int l = ((cl_int *)in)[0]; ((float*) out)[0] = (l == 0 ? 0.0f : (float) l); // Per IEEE-754-2008 5.4.1, 0's always convert to +0.0 } static void int2double( void *out, void *in) -- cgit v1.2.3 From f6dbc5b9b5321ae9d9dc177353e233d6d7964ec9 Mon Sep 17 00:00:00 2001 From: Stuart Brady Date: Tue, 22 Mar 2022 16:21:09 +0000 Subject: Add cluster size handling in subgroup test helpers (#1394) Signed-off-by: Stuart Brady --- test_conformance/subgroups/subhelpers.h | 48 ++++++++++++++++++++++++++++++--- 1 file changed, 45 insertions(+), 3 deletions(-) diff --git a/test_conformance/subgroups/subhelpers.h b/test_conformance/subgroups/subhelpers.h index b88d2278..c73027dc 100644 --- a/test_conformance/subgroups/subhelpers.h +++ b/test_conformance/subgroups/subhelpers.h @@ -55,11 +55,12 @@ static cl_uint4 bs128_to_cl_uint4(bs128 v) struct WorkGroupParams { - WorkGroupParams(size_t gws, size_t lws, int dm_arg = -1) + WorkGroupParams(size_t gws, size_t lws, int dm_arg = -1, int cs_arg = -1) : global_workgroup_size(gws), local_workgroup_size(lws), - divergence_mask_arg(dm_arg) + divergence_mask_arg(dm_arg), cluster_size_arg(cs_arg) { subgroup_size = 0; + cluster_size = 0; work_items_mask = 0; use_core_subgroups = true; dynsc = 0; @@ -68,11 +69,13 @@ struct WorkGroupParams size_t global_workgroup_size; size_t local_workgroup_size; size_t subgroup_size; + cl_uint cluster_size; bs128 work_items_mask; int dynsc; bool use_core_subgroups; std::vector all_work_item_masks; int divergence_mask_arg; + int cluster_size_arg; void save_kernel_source(const std::string &source, std::string name = "") { if (name == "") @@ -1421,7 +1424,9 @@ public: return error; } - test_status run_and_check(const WorkGroupParams &test_params) +private: + test_status + run_and_check_with_cluster_size(const WorkGroupParams &test_params) { cl_int error = run(); if (error != CL_SUCCESS) @@ -1444,6 +1449,35 @@ public: return status; } + +public: + test_status run_and_check(WorkGroupParams &test_params) + { + test_status tmp_status = TEST_SKIPPED_ITSELF; + + if (test_params.cluster_size_arg != -1) + { + for (cl_uint cluster_size = 1; + cluster_size <= test_params.subgroup_size; cluster_size *= 2) + { + test_params.cluster_size = cluster_size; + cl_int error = + clSetKernelArg(kernel, test_params.cluster_size_arg, + sizeof(cl_uint), &cluster_size); + test_error_fail(error, "Unable to set cluster size"); + + tmp_status = run_and_check_with_cluster_size(test_params); + + if (tmp_status == TEST_FAIL) break; + } + } + else + { + tmp_status = run_and_check_with_cluster_size(test_params); + } + + return tmp_status; + } }; // Driver for testing a single built in function @@ -1592,6 +1626,14 @@ template struct test test_error_fail(error, "Unable to set divergence mask argument"); } + if (test_params.cluster_size_arg != -1) + { + cl_uint dummy_cluster_size = 1; + error = clSetKernelArg(kernel, test_params.cluster_size_arg, + sizeof(cl_uint), &dummy_cluster_size); + test_error_fail(error, "Unable to set dummy cluster size"); + } + KernelExecutor executor( context, queue, kernel, global, local, idata.data(), input_array_size * sizeof(Ty), mapin.data(), mapout.data(), -- cgit v1.2.3 From c42cf518dabf79b19d795ca9ce1e41c848cf54e8 Mon Sep 17 00:00:00 2001 From: Stuart Brady Date: Tue, 29 Mar 2022 19:39:06 +0100 Subject: Improve cl_khr_subgroup_shuffle* test coverage (#1402) Test cases where the index/mask/delta is greater than or equal to the maximum subgroup size. These are cases that return undefined results but are not undefined behavior. The index/mask/delta values now include values less than twice the subgroup size, and 0xffffffff. Testing for sub_group_shuffle_xor() already allowed inputs that were greater or equal to the subgroup size for the last subgroup in a workgroup, but did not properly account for this in the verification function, potentially resulting in out of bounds accesses. Signed-off-by: Stuart Brady --- .../subgroups/subgroup_common_templates.h | 78 +++++++++------------- 1 file changed, 33 insertions(+), 45 deletions(-) diff --git a/test_conformance/subgroups/subgroup_common_templates.h b/test_conformance/subgroups/subgroup_common_templates.h index 641c1875..0ffa46c8 100644 --- a/test_conformance/subgroups/subgroup_common_templates.h +++ b/test_conformance/subgroups/subgroup_common_templates.h @@ -481,12 +481,12 @@ template struct SHF static void gen(Ty *x, Ty *t, cl_int *m, const WorkGroupParams &test_params) { - int i, ii, j, k, l, n, delta; + int i, ii, j, k, n, delta; + cl_uint l; int nw = test_params.local_workgroup_size; int ns = test_params.subgroup_size; int ng = test_params.global_workgroup_size; int nj = (nw + ns - 1) / ns; - int d = ns > 100 ? 100 : ns; ii = 0; ng = ng / nw; for (k = 0; k < ng; ++k) @@ -498,33 +498,10 @@ template struct SHF for (i = 0; i < n; ++i) { int midx = 4 * ii + 4 * i + 2; - l = (int)(genrand_int32(gMTdata) & 0x7fffffff) - % (d > n ? n : d); - switch (operation) - { - case ShuffleOp::shuffle: - case ShuffleOp::shuffle_xor: - // storing information about shuffle index - m[midx] = (cl_int)l; - break; - case ShuffleOp::shuffle_up: - delta = l; // calculate delta for shuffle up - if (i - delta < 0) - { - delta = i; - } - m[midx] = (cl_int)delta; - break; - case ShuffleOp::shuffle_down: - delta = l; // calculate delta for shuffle down - if (i + delta >= n) - { - delta = n - 1 - i; - } - m[midx] = (cl_int)delta; - break; - default: break; - } + l = (((cl_uint)(genrand_int32(gMTdata) & 0x7fffffff) + 1) + % (ns * 2 + 1)) + - 1; + m[midx] = l; cl_ulong number = genrand_int64(gMTdata); set_value(t[ii + i], number); } @@ -542,7 +519,8 @@ template struct SHF static test_status chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m, const WorkGroupParams &test_params) { - int ii, i, j, k, l, n; + int ii, i, j, k, n; + cl_uint l; int nw = test_params.local_workgroup_size; int ns = test_params.subgroup_size; int ng = test_params.global_workgroup_size; @@ -567,32 +545,42 @@ template struct SHF { // inside the subgroup // shuffle index storage int midx = 4 * ii + 4 * i + 2; - l = (int)m[midx]; + l = m[midx]; rr = my[ii + i]; + cl_uint tr_idx; + bool skip = false; switch (operation) { // shuffle basic - treat l as index - case ShuffleOp::shuffle: tr = mx[ii + l]; break; - // shuffle up - treat l as delta - case ShuffleOp::shuffle_up: tr = mx[ii + i - l]; break; + case ShuffleOp::shuffle: tr_idx = l; break; + // shuffle xor - treat l as mask + case ShuffleOp::shuffle_xor: tr_idx = i ^ l; break; // shuffle up - treat l as delta - case ShuffleOp::shuffle_down: - tr = mx[ii + i + l]; + case ShuffleOp::shuffle_up: + if (l >= ns) skip = true; + tr_idx = i - l; break; - // shuffle xor - treat l as mask - case ShuffleOp::shuffle_xor: - tr = mx[ii + (i ^ l)]; + // shuffle down - treat l as delta + case ShuffleOp::shuffle_down: + if (l >= ns) skip = true; + tr_idx = i + l; break; default: break; } - if (!compare(rr, tr)) + if (!skip && tr_idx < n) { - log_error("ERROR: sub_group_%s(%s) mismatch for " - "local id %d in sub group %d in group %d\n", - operation_names(operation), - TypeManager::name(), i, j, k); - return TEST_FAIL; + tr = mx[ii + tr_idx]; + + if (!compare(rr, tr)) + { + log_error("ERROR: sub_group_%s(%s) mismatch for " + "local id %d in sub group %d in group " + "%d\n", + operation_names(operation), + TypeManager::name(), i, j, k); + return TEST_FAIL; + } } } } -- cgit v1.2.3 From 93f4f6a54842b0080ec7bb562e0324f735487a36 Mon Sep 17 00:00:00 2001 From: Jason Tang Date: Tue, 29 Mar 2022 19:08:08 -0400 Subject: test_api_min_max.cpp: use size_t for get_global_id() value (#1410) In some rare cases where get_global_id() is larger than 2G, the 32bit int type would convert the value into a negative integer. --- test_conformance/api/test_api_min_max.cpp | 22 +++++++--------------- 1 file changed, 7 insertions(+), 15 deletions(-) diff --git a/test_conformance/api/test_api_min_max.cpp b/test_conformance/api/test_api_min_max.cpp index 28ca8237..9e08b16d 100644 --- a/test_conformance/api/test_api_min_max.cpp +++ b/test_conformance/api/test_api_min_max.cpp @@ -22,19 +22,11 @@ const char *sample_single_param_kernel[] = { "__kernel void sample_test(__global int *src)\n" "{\n" - " int tid = get_global_id(0);\n" + " size_t tid = get_global_id(0);\n" "\n" "}\n" }; -const char *sample_single_param_write_kernel[] = { - "__kernel void sample_test(__global int *src)\n" - "{\n" - " int tid = get_global_id(0);\n" - " src[tid] = tid;\n" - "\n" - "}\n" -}; const char *sample_read_image_kernel_pattern[] = { "__kernel void sample_test( __global float *result, ", @@ -42,7 +34,7 @@ const char *sample_read_image_kernel_pattern[] = { "{\n" " sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP | " "CLK_FILTER_NEAREST;\n" - " int tid = get_global_id(0);\n" + " size_t tid = get_global_id(0);\n" " result[0] = 0.0f;\n", "\n" "}\n" @@ -52,7 +44,7 @@ const char *sample_write_image_kernel_pattern[] = { "__kernel void sample_test( ", " )\n" "{\n" - " int tid = get_global_id(0);\n", + " size_t tid = get_global_id(0);\n", "\n" "}\n" }; @@ -81,8 +73,8 @@ const char *sample_sampler_kernel_pattern[] = { ", sampler_t sampler%d", ")\n" "{\n" - " int tid = get_global_id(0);\n", - " dst[ 0 ] = read_imagei( src, sampler%d, (int2)( 0, 0 ) );\n", + " size_t tid = get_global_id(0);\n", + " dst[ 0 ] = read_imagei( src, sampler%d, (int2)( 0, 0 ) );\n", "\n" "}\n" }; @@ -90,7 +82,7 @@ const char *sample_sampler_kernel_pattern[] = { const char *sample_const_arg_kernel[] = { "__kernel void sample_test(__constant int *src1, __global int *dst)\n" "{\n" - " int tid = get_global_id(0);\n" + " size_t tid = get_global_id(0);\n" "\n" " dst[tid] = src1[tid];\n" "\n" @@ -101,7 +93,7 @@ const char *sample_local_arg_kernel[] = { "__kernel void sample_test(__local int *src1, __global int *global_src, " "__global int *dst)\n" "{\n" - " int tid = get_global_id(0);\n" + " size_t tid = get_global_id(0);\n" "\n" " src1[tid] = global_src[tid];\n" " barrier(CLK_GLOBAL_MEM_FENCE);\n" -- cgit v1.2.3 From e121b9d1bf0380a9d0468686e79ac1b4057857b8 Mon Sep 17 00:00:00 2001 From: Stuart Brady Date: Mon, 4 Apr 2022 17:57:36 +0100 Subject: Fix sub_group_ballot_find_msb/lsb tests (#1411) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As per the OpenCL Extension Specification § 38.6 Ballots: If no bits representing predicate values from all work items in the subgroup are set in the bitfield value then the return value is undefined. The case with no bits set is still worth testing, as it does not result in undefined behavior, but only an undefined return value. Signed-off-by: Stuart Brady --- test_conformance/subgroups/test_subgroup_ballot.cpp | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/test_conformance/subgroups/test_subgroup_ballot.cpp b/test_conformance/subgroups/test_subgroup_ballot.cpp index 4148707e..b1e6944f 100644 --- a/test_conformance/subgroups/test_subgroup_ballot.cpp +++ b/test_conformance/subgroups/test_subgroup_ballot.cpp @@ -609,6 +609,12 @@ template struct BALLOT_COUNT_SCAN_FIND } else if (operation == BallotOp::ballot_find_lsb) { + if (bs.none()) + { + // Return value is undefined when no bits are set, + // so skip validation: + continue; + } for (int id = 0; id < sbs; ++id) { if (bs.test(id)) @@ -630,6 +636,12 @@ template struct BALLOT_COUNT_SCAN_FIND } else if (operation == BallotOp::ballot_find_msb) { + if (bs.none()) + { + // Return value is undefined when no bits are set, + // so skip validation: + continue; + } for (int id = sbs - 1; id >= 0; --id) { if (bs.test(id)) -- cgit v1.2.3 From 2fcdde96d246cd405ec4fc97fb90eb235ba9fd1e Mon Sep 17 00:00:00 2001 From: Ben Ashbaugh Date: Mon, 4 Apr 2022 13:19:30 -0700 Subject: refactor work group scan and reduction tests (#1401) * updated reduce test * switched all reduce tests to new framework * switch over scans to new framework * remove old files * minor fixes * add type type name to the kernel name * fix Windows build and warnings * address review comments --- test_conformance/workgroups/CMakeLists.txt | 10 +- test_conformance/workgroups/test_wg_reduce.cpp | 596 ------------------- test_conformance/workgroups/test_wg_reduce_max.cpp | 632 -------------------- test_conformance/workgroups/test_wg_reduce_min.cpp | 632 -------------------- .../workgroups/test_wg_scan_exclusive_add.cpp | 604 -------------------- .../workgroups/test_wg_scan_exclusive_max.cpp | 632 -------------------- .../workgroups/test_wg_scan_exclusive_min.cpp | 633 --------------------- .../workgroups/test_wg_scan_inclusive_add.cpp | 593 ------------------- .../workgroups/test_wg_scan_inclusive_max.cpp | 597 ------------------- .../workgroups/test_wg_scan_inclusive_min.cpp | 597 ------------------- .../workgroups/test_wg_scan_reduce.cpp | 456 +++++++++++++++ 11 files changed, 457 insertions(+), 5525 deletions(-) delete mode 100644 test_conformance/workgroups/test_wg_reduce.cpp delete mode 100644 test_conformance/workgroups/test_wg_reduce_max.cpp delete mode 100644 test_conformance/workgroups/test_wg_reduce_min.cpp delete mode 100644 test_conformance/workgroups/test_wg_scan_exclusive_add.cpp delete mode 100644 test_conformance/workgroups/test_wg_scan_exclusive_max.cpp delete mode 100644 test_conformance/workgroups/test_wg_scan_exclusive_min.cpp delete mode 100644 test_conformance/workgroups/test_wg_scan_inclusive_add.cpp delete mode 100644 test_conformance/workgroups/test_wg_scan_inclusive_max.cpp delete mode 100644 test_conformance/workgroups/test_wg_scan_inclusive_min.cpp create mode 100644 test_conformance/workgroups/test_wg_scan_reduce.cpp diff --git a/test_conformance/workgroups/CMakeLists.txt b/test_conformance/workgroups/CMakeLists.txt index c90bef88..0c004b32 100644 --- a/test_conformance/workgroups/CMakeLists.txt +++ b/test_conformance/workgroups/CMakeLists.txt @@ -5,15 +5,7 @@ set(${MODULE_NAME}_SOURCES test_wg_all.cpp test_wg_any.cpp test_wg_broadcast.cpp - test_wg_reduce.cpp - test_wg_reduce_max.cpp - test_wg_reduce_min.cpp - test_wg_scan_exclusive_add.cpp - test_wg_scan_exclusive_min.cpp - test_wg_scan_exclusive_max.cpp - test_wg_scan_inclusive_add.cpp - test_wg_scan_inclusive_min.cpp - test_wg_scan_inclusive_max.cpp + test_wg_scan_reduce.cpp test_wg_suggested_local_work_size.cpp ) diff --git a/test_conformance/workgroups/test_wg_reduce.cpp b/test_conformance/workgroups/test_wg_reduce.cpp deleted file mode 100644 index eb26f498..00000000 --- a/test_conformance/workgroups/test_wg_reduce.cpp +++ /dev/null @@ -1,596 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#include "harness/compat.h" - -#include -#include -#include -#include - -#include "procs.h" - - -const char *wg_reduce_add_kernel_code_int = -"__kernel void test_wg_reduce_add_int(global int *input, global int *output)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" int result = work_group_reduce_add(input[tid]);\n" -" output[tid] = result;\n" -"}\n"; - - -const char *wg_reduce_add_kernel_code_uint = -"__kernel void test_wg_reduce_add_uint(global uint *input, global uint *output)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" uint result = work_group_reduce_add(input[tid]);\n" -" output[tid] = result;\n" -"}\n"; - -const char *wg_reduce_add_kernel_code_long = -"__kernel void test_wg_reduce_add_long(global long *input, global long *output)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" long result = work_group_reduce_add(input[tid]);\n" -" output[tid] = result;\n" -"}\n"; - - -const char *wg_reduce_add_kernel_code_ulong = -"__kernel void test_wg_reduce_add_ulong(global ulong *input, global ulong *output)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" ulong result = work_group_reduce_add(input[tid]);\n" -" output[tid] = result;\n" -"}\n"; - - -static int -verify_wg_reduce_add_int(int *inptr, int *outptr, size_t n, size_t wg_size) -{ - size_t i, j; - - for (i=0; i wg_size ? wg_size : (n-i)); j++) - sum += inptr[i+j]; - - for (j=0; j<((n-i) > wg_size ? wg_size : (n-i)); j++) - { - if ( sum != outptr[i+j] ) - { - log_info("work_group_reduce_add int: Error at %u: expected = %d, got = %d\n", i+j, sum, outptr[i+j]); - return -1; - } - } - } - - return 0; -} - -static int -verify_wg_reduce_add_uint(unsigned int *inptr, unsigned int *outptr, size_t n, size_t wg_size) -{ - size_t i, j; - - for (i=0; i wg_size ? wg_size : (n-i)); j++) - sum += inptr[i+j]; - - for (j=0; j<((n-i) > wg_size ? wg_size : (n-i)); j++) - { - if ( sum != outptr[i+j] ) - { - log_info("work_group_reduce_add uint: Error at %u: expected = %d, got = %d\n", i+j, sum, outptr[i+j]); - return -1; - } - } - } - - return 0; -} - -static int -verify_wg_reduce_add_long(cl_long *inptr, cl_long *outptr, size_t n, size_t wg_size) -{ - size_t i, j; - - for (i=0; i wg_size ? wg_size : (n-i)); j++) - sum += inptr[i+j]; - - for (j=0; j<((n-i) > wg_size ? wg_size : (n-i)); j++) - { - if ( sum != outptr[i+j] ) - { - log_info("work_group_reduce_add long: Error at %u: expected = %lld, got = %lld\n", i+j, sum, outptr[i+j]); - return -1; - } - } - } - - return 0; -} - -static int -verify_wg_reduce_add_ulong(cl_ulong *inptr, cl_ulong *outptr, size_t n, size_t wg_size) -{ - size_t i, j; - - for (i=0; i wg_size ? wg_size : (n-i)); j++) - sum += inptr[i+j]; - - for (j=0; j<((n-i) > wg_size ? wg_size : (n-i)); j++) - { - if ( sum != outptr[i+j] ) - { - log_info("work_group_reduce_add ulong: Error at %u: expected = %llu, got = %llu\n", i+j, sum, outptr[i+j]); - return -1; - } - } - } - - return 0; -} - - - -int -test_work_group_reduce_add_int(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) -{ - cl_mem streams[2]; - cl_int *input_ptr[1], *p; - cl_int *output_ptr; - cl_program program; - cl_kernel kernel; - void *values[2]; - size_t threads[1]; - size_t wg_size[1]; - size_t num_elements; - int err; - int i; - MTdata d; - - err = create_single_kernel_helper(context, &program, &kernel, 1, - &wg_reduce_add_kernel_code_int, - "test_wg_reduce_add_int"); - if (err) - return -1; - - // "wg_size" is limited to that of the first dimension as only a 1DRange is executed. - err = get_max_allowed_1d_work_group_size_on_device(device, kernel, wg_size); - test_error(err, "get_max_allowed_1d_work_group_size_on_device failed"); - - num_elements = n_elems; - - input_ptr[0] = (cl_int*)malloc(sizeof(cl_int) * num_elements); - output_ptr = (cl_int*)malloc(sizeof(cl_int) * num_elements); - streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_int) * num_elements, NULL, NULL); - if (!streams[0]) - { - log_error("clCreateBuffer failed\n"); - return -1; - } - - streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_int) * num_elements, NULL, NULL); - if (!streams[1]) - { - log_error("clCreateBuffer failed\n"); - return -1; - } - - p = input_ptr[0]; - d = init_genrand( gRandomSeed ); - for (i=0; i -#include -#include -#include - -#include "procs.h" - - -const char *wg_reduce_max_kernel_code_int = -"__kernel void test_wg_reduce_max_int(global int *input, global int *output)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" int result = work_group_reduce_max(input[tid]);\n" -" output[tid] = result;\n" -"}\n"; - - -const char *wg_reduce_max_kernel_code_uint = -"__kernel void test_wg_reduce_max_uint(global uint *input, global uint *output)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" uint result = work_group_reduce_max(input[tid]);\n" -" output[tid] = result;\n" -"}\n"; - -const char *wg_reduce_max_kernel_code_long = -"__kernel void test_wg_reduce_max_long(global long *input, global long *output)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" long result = work_group_reduce_max(input[tid]);\n" -" output[tid] = result;\n" -"}\n"; - - -const char *wg_reduce_max_kernel_code_ulong = -"__kernel void test_wg_reduce_max_ulong(global ulong *input, global ulong *output)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" ulong result = work_group_reduce_max(input[tid]);\n" -" output[tid] = result;\n" -"}\n"; - - -static int -verify_wg_reduce_max_int(int *inptr, int *outptr, size_t n, size_t wg_size) -{ - size_t i, j; - - for (i=0; i wg_size ? wg_size : (n-i)); j++) - max = (max > inptr[i+j]) ? max : inptr[i+j]; - - for (j=0; j<((n-i) > wg_size ? wg_size : (n-i)); j++) - { - if ( max != outptr[i+j] ) - { - log_info("work_group_reduce_max int: Error at %u: expected = %d, got = %d\n", i+j, max, outptr[i+j]); - return -1; - } - } - } - - return 0; -} - -static int -verify_wg_reduce_max_uint(unsigned int *inptr, unsigned int *outptr, size_t n, size_t wg_size) -{ - size_t i, j; - - for (i=0; i wg_size ? wg_size : (n-i)); j++) - max = (max > inptr[i+j]) ? max : inptr[i+j]; - - for (j=0; j<((n-i) > wg_size ? wg_size : (n-i)); j++) - { - if ( max != outptr[i+j] ) - { - log_info("work_group_reduce_max uint: Error at %u: expected = %d, got = %d\n", i+j, max, outptr[i+j]); - return -1; - } - } - } - - return 0; -} - -static int -verify_wg_reduce_max_long(cl_long *inptr, cl_long *outptr, size_t n, size_t wg_size) -{ - size_t i, j; - - for (i=0; i wg_size ? wg_size : (n-i)); j++) - max = (max > inptr[i+j]) ? max : inptr[i+j]; - - for (j=0; j<((n-i) > wg_size ? wg_size : (n-i)); j++) - { - if ( max != outptr[i+j] ) - { - log_info("work_group_reduce_max long: Error at %u: expected = %lld, got = %lld\n", i+j, max, outptr[i+j]); - return -1; - } - } - } - - return 0; -} - -static int -verify_wg_reduce_max_ulong(cl_ulong *inptr, cl_ulong *outptr, size_t n, size_t wg_size) -{ - size_t i, j; - - for (i=0; i wg_size ? wg_size : (n-i)); j++) - max = (max > inptr[i+j]) ? max : inptr[i+j]; - - for (j=0; j<((n-i) > wg_size ? wg_size : (n-i)); j++) - { - if ( max != outptr[i+j] ) - { - log_info("work_group_reduce_max ulong: Error at %u: expected = %llu, got = %llu\n", i+j, max, outptr[i+j]); - return -1; - } - } - } - - return 0; -} - - - -int -test_work_group_reduce_max_int(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) -{ - cl_mem streams[2]; - cl_int *input_ptr[1], *p; - cl_int *output_ptr; - cl_program program; - cl_kernel kernel; - void *values[2]; - size_t threads[1]; - size_t wg_size[1]; - size_t wg_sizes_per_dimension[3]; - size_t num_elements; - int err; - int i; - MTdata d; - - err = create_single_kernel_helper(context, &program, &kernel, 1, - &wg_reduce_max_kernel_code_int, - "test_wg_reduce_max_int"); - if (err) - return -1; - - err = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), wg_size, NULL); - if (err) - return -1; - - err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t) * 3, wg_sizes_per_dimension, NULL); - if (err) - return -1; - if(wg_sizes_per_dimension[0] < wg_size[0]) - { - wg_size[0] = wg_sizes_per_dimension[0]; - } - - num_elements = n_elems; - - input_ptr[0] = (cl_int*)malloc(sizeof(cl_int) * num_elements); - output_ptr = (cl_int*)malloc(sizeof(cl_int) * num_elements); - streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_int) * num_elements, NULL, NULL); - if (!streams[0]) - { - log_error("clCreateBuffer failed\n"); - return -1; - } - - streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_int) * num_elements, NULL, NULL); - if (!streams[1]) - { - log_error("clCreateBuffer failed\n"); - return -1; - } - - p = input_ptr[0]; - d = init_genrand( gRandomSeed ); - for (i=0; i -#include -#include -#include - -#include "procs.h" - - -const char *wg_reduce_min_kernel_code_int = -"__kernel void test_wg_reduce_min_int(global int *input, global int *output)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" int result = work_group_reduce_min(input[tid]);\n" -" output[tid] = result;\n" -"}\n"; - - -const char *wg_reduce_min_kernel_code_uint = -"__kernel void test_wg_reduce_min_uint(global uint *input, global uint *output)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" uint result = work_group_reduce_min(input[tid]);\n" -" output[tid] = result;\n" -"}\n"; - -const char *wg_reduce_min_kernel_code_long = -"__kernel void test_wg_reduce_min_long(global long *input, global long *output)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" long result = work_group_reduce_min(input[tid]);\n" -" output[tid] = result;\n" -"}\n"; - - -const char *wg_reduce_min_kernel_code_ulong = -"__kernel void test_wg_reduce_min_ulong(global ulong *input, global ulong *output)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" ulong result = work_group_reduce_min(input[tid]);\n" -" output[tid] = result;\n" -"}\n"; - - -static int -verify_wg_reduce_min_int(int *inptr, int *outptr, size_t n, size_t wg_size) -{ - size_t i, j; - - for (i=0; i wg_size ? wg_size : (n-i)); j++) - min = (min < inptr[i+j]) ? min : inptr[i+j]; - - for (j=0; j<((n-i) > wg_size ? wg_size : (n-i)); j++) - { - if ( min != outptr[i+j] ) - { - log_info("work_group_reduce_min int: Error at %u: expected = %d, got = %d\n", i+j, min, outptr[i+j]); - return -1; - } - } - } - - return 0; -} - -static int -verify_wg_reduce_min_uint(unsigned int *inptr, unsigned int *outptr, size_t n, size_t wg_size) -{ - size_t i, j; - - for (i=0; i wg_size ? wg_size : (n-i)); j++) - min = (min < inptr[i+j]) ? min : inptr[i+j]; - - for (j=0; j<((n-i) > wg_size ? wg_size : (n-i)); j++) - { - if ( min != outptr[i+j] ) - { - log_info("work_group_reduce_min uint: Error at %u: expected = %d, got = %d\n", i+j, min, outptr[i+j]); - return -1; - } - } - } - - return 0; -} - -static int -verify_wg_reduce_min_long(cl_long *inptr, cl_long *outptr, size_t n, size_t wg_size) -{ - size_t i, j; - - for (i=0; i wg_size ? wg_size : (n-i)); j++) - min = (min < inptr[i+j]) ? min : inptr[i+j]; - - for (j=0; j<((n-i) > wg_size ? wg_size : (n-i)); j++) - { - if ( min != outptr[i+j] ) - { - log_info("work_group_reduce_min long: Error at %u: expected = %lld, got = %lld\n", i+j, min, outptr[i+j]); - return -1; - } - } - } - - return 0; -} - -static int -verify_wg_reduce_min_ulong(cl_ulong *inptr, cl_ulong *outptr, size_t n, size_t wg_size) -{ - size_t i, j; - - for (i=0; i wg_size ? wg_size : (n-i)); j++) - min = (min < inptr[i+j]) ? min : inptr[i+j]; - - for (j=0; j<((n-i) > wg_size ? wg_size : (n-i)); j++) - { - if ( min != outptr[i+j] ) - { - log_info("work_group_reduce_min ulong: Error at %u: expected = %llu, got = %llu\n", i+j, min, outptr[i+j]); - return -1; - } - } - } - - return 0; -} - - - -int -test_work_group_reduce_min_int(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) -{ - cl_mem streams[2]; - cl_int *input_ptr[1], *p; - cl_int *output_ptr; - cl_program program; - cl_kernel kernel; - void *values[2]; - size_t threads[1]; - size_t wg_size[1]; - size_t wg_sizes_per_dimension[3]; - size_t num_elements; - int err; - int i; - MTdata d; - - err = create_single_kernel_helper(context, &program, &kernel, 1, - &wg_reduce_min_kernel_code_int, - "test_wg_reduce_min_int"); - if (err) - return -1; - - err = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), wg_size, NULL); - if (err) - return -1; - - err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t) * 3, wg_sizes_per_dimension, NULL); - if (err) - return -1; - if(wg_sizes_per_dimension[0] < wg_size[0]) - { - wg_size[0] = wg_sizes_per_dimension[0]; - } - - num_elements = n_elems; - - input_ptr[0] = (cl_int*)malloc(sizeof(cl_int) * num_elements); - output_ptr = (cl_int*)malloc(sizeof(cl_int) * num_elements); - streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_int) * num_elements, NULL, NULL); - if (!streams[0]) - { - log_error("clCreateBuffer failed\n"); - return -1; - } - - streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_int) * num_elements, NULL, NULL); - if (!streams[1]) - { - log_error("clCreateBuffer failed\n"); - return -1; - } - - p = input_ptr[0]; - d = init_genrand( gRandomSeed ); - for (i=0; i -#include -#include -#include - -#include "procs.h" - - -const char *wg_scan_exclusive_add_kernel_code_int = -"__kernel void test_wg_scan_exclusive_add_int(global int *input, global int *output)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" int result = work_group_scan_exclusive_add(input[tid]);\n" -" output[tid] = result;\n" -"}\n"; - - -const char *wg_scan_exclusive_add_kernel_code_uint = -"__kernel void test_wg_scan_exclusive_add_uint(global uint *input, global uint *output)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" uint result = work_group_scan_exclusive_add(input[tid]);\n" -" output[tid] = result;\n" -"}\n"; - -const char *wg_scan_exclusive_add_kernel_code_long = -"__kernel void test_wg_scan_exclusive_add_long(global long *input, global long *output)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" long result = work_group_scan_exclusive_add(input[tid]);\n" -" output[tid] = result;\n" -"}\n"; - - -const char *wg_scan_exclusive_add_kernel_code_ulong = -"__kernel void test_wg_scan_exclusive_add_ulong(global ulong *input, global ulong *output)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" ulong result = work_group_scan_exclusive_add(input[tid]);\n" -" output[tid] = result;\n" -"}\n"; - - -static int -verify_wg_scan_exclusive_add_int(int *inptr, int *outptr, size_t n, size_t wg_size) { - - size_t i, j, m; - int s, lasts; - - - - for (j = 0; j < n; j += wg_size) { - m = n - j; - if (m > wg_size) m = wg_size; - - s = 0; - lasts = 0; - for (i = 0; i < m; ++i) { - s += inptr[j + i]; - if (outptr[j + i] != lasts) { - log_info("work_group_scan_exclusive_add int: Error at %u: expected = %d, got = %d\n", - (unsigned int)(j + i), lasts, outptr[j + i]); - return -1; - } - lasts = s; - } - } - return 0; -} - -static int -verify_wg_scan_exclusive_add_uint(unsigned int *inptr, unsigned int *outptr, size_t n, size_t wg_size) { - - size_t i, j, m; - unsigned int s, lasts; - - for (j = 0; j < n; j += wg_size) { - m = n - j; - if (m > wg_size) m = wg_size; - s = 0; - lasts = 0; - for (i = 0; i < m; ++i) { - s += inptr[j + i]; - if (outptr[j + i] != lasts) { - log_info("work_group_scan_exclusive_add uint: Error at %u: expected = %u, got = %u\n", - (unsigned int)(j + i), lasts, outptr[j + i]); - return -1; - } - lasts = s; - } - } - - return 0; -} - -static int -verify_wg_scan_exclusive_add_long(cl_long *inptr, cl_long *outptr, size_t n, size_t wg_size) { - - size_t i, j, m; - cl_long s, lasts; - - for (j = 0; j < n; j += wg_size) { - m = n - j; - if (m > wg_size) m = wg_size; - s = 0; - - lasts = 0; - for (i = 0; i < m; ++i) { - s += inptr[j + i]; - - if (outptr[j + i] != lasts) { - log_info("work_group_scan_exclusive_add long: Error at %u: expected = %lld, got = %lld\n", - (unsigned int)(j + i), (long long)lasts, (long long)outptr[j + i]); - return -1; - } - lasts = s; - } - } - - return 0; -} - -static int -verify_wg_scan_exclusive_add_ulong(cl_ulong *inptr, cl_ulong *outptr, size_t n, size_t wg_size) { - - size_t i, j, m; - - cl_ulong s, lasts; - - for (j = 0; j < n; j += wg_size) { - m = n - j; - if (m > wg_size) m = wg_size; - - s = 0; - lasts = 0; - for (i = 0; i < m; ++i) { - s += inptr[j + i]; - if (outptr[j + i] != lasts) { - log_info("work_group_scan_exclusive_add ulong: Error at %u: expected = %llu, got = %llu\n", - (unsigned int)(j + i), (unsigned long long)lasts, (unsigned long long)outptr[j + i]); - return -1; - } - lasts = s; - } - } - return 0; -} - - -int -test_work_group_scan_exclusive_add_int(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) -{ - cl_mem streams[2]; - cl_int *input_ptr[1], *p; - cl_int *output_ptr; - cl_program program; - cl_kernel kernel; - void *values[2]; - size_t threads[1]; - size_t wg_size[1]; - size_t num_elements; - int err; - int i; - MTdata d; - - err = create_single_kernel_helper(context, &program, &kernel, 1, - &wg_scan_exclusive_add_kernel_code_int, - "test_wg_scan_exclusive_add_int"); - if (err) - return -1; - - // "wg_size" is limited to that of the first dimension as only a 1DRange is executed. - err = get_max_allowed_1d_work_group_size_on_device(device, kernel, wg_size); - test_error(err, "get_max_allowed_1d_work_group_size_on_device failed"); - - num_elements = n_elems; - - input_ptr[0] = (cl_int*)malloc(sizeof(cl_int) * num_elements); - output_ptr = (cl_int*)malloc(sizeof(cl_int) * num_elements); - streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_int) * num_elements, NULL, NULL); - if (!streams[0]) - { - log_error("clCreateBuffer failed\n"); - return -1; - } - - streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_int) * num_elements, NULL, NULL); - if (!streams[1]) - { - log_error("clCreateBuffer failed\n"); - return -1; - } - - p = input_ptr[0]; - d = init_genrand( gRandomSeed ); - for (i=0; i -#include -#include -#include - -#include - -#include "procs.h" - -const char *wg_scan_exclusive_max_kernel_code_int = -"__kernel void test_wg_scan_exclusive_max_int(global int *input, global int *output)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" int result = work_group_scan_exclusive_max(input[tid]);\n" -" output[tid] = result;\n" -"}\n"; - - -const char *wg_scan_exclusive_max_kernel_code_uint = -"__kernel void test_wg_scan_exclusive_max_uint(global uint *input, global uint *output)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" uint result = work_group_scan_exclusive_max(input[tid]);\n" -" output[tid] = result;\n" -"}\n"; - -const char *wg_scan_exclusive_max_kernel_code_long = -"__kernel void test_wg_scan_exclusive_max_long(global long *input, global long *output)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" long result = work_group_scan_exclusive_max(input[tid]);\n" -" output[tid] = result;\n" -"}\n"; - - -const char *wg_scan_exclusive_max_kernel_code_ulong = -"__kernel void test_wg_scan_exclusive_max_ulong(global ulong *input, global ulong *output)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" ulong result = work_group_scan_exclusive_max(input[tid]);\n" -" output[tid] = result;\n" -"}\n"; - - -static int -verify_wg_scan_exclusive_max_int(int *inptr, int *outptr, size_t n, size_t wg_size) { - - size_t i, j, m; - - for (j=0; j wg_size) - m = wg_size; - - for (i = 0; i < m; ++i) { - if (outptr[j+i] != max_) { - log_info("work_group_scan_exclusive_max int: Error at %u: expected = %d, got = %d\n", (unsigned int)(j+i), max_, outptr[j+i]); - return -1; - } - max_ = std::max(inptr[j + i], max_); - } - } - - return 0; -} - -static int -verify_wg_scan_exclusive_max_uint(unsigned int *inptr, unsigned int *outptr, size_t n, size_t wg_size) { - - size_t i, j, m; - - for (j=0; j wg_size) - m = wg_size; - - for (i = 0; i < m; ++i) { - if (outptr[j+i] != max_) { - log_info("work_group_scan_exclusive_max int: Error at %u: expected = %u, got = %u\n", (unsigned int)(j+i), max_, outptr[j+i]); - return -1; - } - max_ = std::max(inptr[j + i], max_); - } - } - - return 0; -} - -static int -verify_wg_scan_exclusive_max_long(cl_long *inptr, cl_long *outptr, size_t n, size_t wg_size) { - - size_t i, j, m; - - for (j=0; j wg_size) - m = wg_size; - - for (i = 0; i < m; ++i) { - if (outptr[j+i] != max_) { - log_info("work_group_scan_exclusive_max long: Error at %u: expected = %lld, got = %lld\n", (unsigned int)(j+i), max_, outptr[j+i]); - return -1; - } - max_ = std::max(inptr[j + i], max_); - } - } - - return 0; -} - -static int -verify_wg_scan_exclusive_max_ulong(cl_ulong *inptr, cl_ulong *outptr, size_t n, size_t wg_size) { - - size_t i, j, m; - - for (j=0; j wg_size) - m = wg_size; - - for (i = 0; i < m; ++i) { - if (outptr[j+i] != max_) { - log_info("work_group_scan_exclusive_max ulong: Error at %u: expected = %llu, got = %llu\n", (unsigned int)(j+i), max_, outptr[j+i]); - return -1; - } - max_ = std::max(inptr[j + i], max_); - } - } - - return 0; -} - - -int -test_work_group_scan_exclusive_max_int(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) -{ - cl_mem streams[2]; - cl_int *input_ptr[1], *p; - cl_int *output_ptr; - cl_program program; - cl_kernel kernel; - void *values[2]; - size_t threads[1]; - size_t wg_size[1]; - size_t wg_sizes_per_dimension[3]; - size_t num_elements; - int err; - int i; - MTdata d; - - err = create_single_kernel_helper(context, &program, &kernel, 1, - &wg_scan_exclusive_max_kernel_code_int, - "test_wg_scan_exclusive_max_int"); - if (err) - return -1; - - err = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), wg_size, NULL); - if (err) - return -1; - - err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t) * 3, wg_sizes_per_dimension, NULL); - if (err) - return -1; - if(wg_sizes_per_dimension[0] < wg_size[0]) - { - wg_size[0] = wg_sizes_per_dimension[0]; - } - - num_elements = n_elems; - - input_ptr[0] = (cl_int*)malloc(sizeof(cl_int) * num_elements); - output_ptr = (cl_int*)malloc(sizeof(cl_int) * num_elements); - streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_int) * num_elements, NULL, NULL); - if (!streams[0]) - { - log_error("clCreateBuffer failed\n"); - return -1; - } - - streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_int) * num_elements, NULL, NULL); - if (!streams[1]) - { - log_error("clCreateBuffer failed\n"); - return -1; - } - - p = input_ptr[0]; - d = init_genrand( gRandomSeed ); - for (i=0; i -#include -#include -#include - -#include - -#include "procs.h" - -const char *wg_scan_exclusive_min_kernel_code_int = -"__kernel void test_wg_scan_exclusive_min_int(global int *input, global int *output)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" int result = work_group_scan_exclusive_min(input[tid]);\n" -" output[tid] = result;\n" -"}\n"; - - -const char *wg_scan_exclusive_min_kernel_code_uint = -"__kernel void test_wg_scan_exclusive_min_uint(global uint *input, global uint *output)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" uint result = work_group_scan_exclusive_min(input[tid]);\n" -" output[tid] = result;\n" -"}\n"; - -const char *wg_scan_exclusive_min_kernel_code_long = -"__kernel void test_wg_scan_exclusive_min_long(global long *input, global long *output)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" long result = work_group_scan_exclusive_min(input[tid]);\n" -" output[tid] = result;\n" -"}\n"; - - -const char *wg_scan_exclusive_min_kernel_code_ulong = -"__kernel void test_wg_scan_exclusive_min_ulong(global ulong *input, global ulong *output)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" ulong result = work_group_scan_exclusive_min(input[tid]);\n" -" output[tid] = result;\n" -"}\n"; - - - -static int -verify_wg_scan_exclusive_min_int(int *inptr, int *outptr, size_t n, size_t wg_size) { - - size_t i, j, m; - - for (j=0; j wg_size) - m = wg_size; - - for (i = 0; i < m; ++i) { - if (outptr[j+i] != min_) { - log_info("work_group_scan_exclusive_min int: Error at %u: expected = %d, got = %d\n", (unsigned int)(j+i), min_, outptr[j+i]); - return -1; - } - min_ = std::min(inptr[j + i], min_); - } - } - - return 0; -} - -static int -verify_wg_scan_exclusive_min_uint(unsigned int *inptr, unsigned int *outptr, size_t n, size_t wg_size) { - - size_t i, j, m; - - for (j=0; j wg_size) - m = wg_size; - - for (i = 0; i < m; ++i) { - if (outptr[j+i] != min_) { - log_info("work_group_scan_exclusive_min int: Error at %u: expected = %u, got = %u\n", j+i, min_, outptr[j+i]); - return -1; - } - min_ = std::min(inptr[j + i], min_); - } - } - - return 0; -} - -static int -verify_wg_scan_exclusive_min_long(cl_long *inptr, cl_long *outptr, size_t n, size_t wg_size) { - - size_t i, j, m; - - for (j=0; j wg_size) - m = wg_size; - - for (i = 0; i < m; ++i) { - if (outptr[j+i] != min_) { - log_info("work_group_scan_exclusive_min long: Error at %u: expected = %lld, got = %lld\n", (unsigned int)(j+i), min_, outptr[j+i]); - return -1; - } - min_ = std::min(inptr[j + i], min_); - } - } - - return 0; -} - -static int -verify_wg_scan_exclusive_min_ulong(cl_ulong *inptr, cl_ulong *outptr, size_t n, size_t wg_size) { - - size_t i, j, m; - - for (j=0; j wg_size) - m = wg_size; - - for (i = 0; i < m; ++i) { - if (outptr[j+i] != min_) { - log_info("work_group_scan_exclusive_min ulong: Error at %u: expected = %llu, got = %llu\n", (unsigned int)(j+i), min_, outptr[j+i]); - return -1; - } - min_ = std::min(inptr[j + i], min_); - } - } - - return 0; -} - - -int -test_work_group_scan_exclusive_min_int(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) -{ - cl_mem streams[2]; - cl_int *input_ptr[1], *p; - cl_int *output_ptr; - cl_program program; - cl_kernel kernel; - void *values[2]; - size_t threads[1]; - size_t wg_size[1]; - size_t wg_sizes_per_dimension[3]; - size_t num_elements; - int err; - int i; - MTdata d; - - err = create_single_kernel_helper(context, &program, &kernel, 1, - &wg_scan_exclusive_min_kernel_code_int, - "test_wg_scan_exclusive_min_int"); - if (err) - return -1; - - err = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), wg_size, NULL); - if (err) - return -1; - - err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t) * 3, wg_sizes_per_dimension, NULL); - if (err) - return -1; - if(wg_sizes_per_dimension[0] < wg_size[0]) - { - wg_size[0] = wg_sizes_per_dimension[0]; - } - - num_elements = n_elems; - - input_ptr[0] = (cl_int*)malloc(sizeof(cl_int) * num_elements); - output_ptr = (cl_int*)malloc(sizeof(cl_int) * num_elements); - streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_int) * num_elements, NULL, NULL); - if (!streams[0]) - { - log_error("clCreateBuffer failed\n"); - return -1; - } - - streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_int) * num_elements, NULL, NULL); - if (!streams[1]) - { - log_error("clCreateBuffer failed\n"); - return -1; - } - - p = input_ptr[0]; - d = init_genrand( gRandomSeed ); - for (i=0; i -#include -#include -#include - -#include "procs.h" - - -const char *wg_scan_inclusive_add_kernel_code_int = -"__kernel void test_wg_scan_inclusive_add_int(global int *input, global int *output)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" int result = work_group_scan_inclusive_add(input[tid]);\n" -" output[tid] = result;\n" -"}\n"; - - -const char *wg_scan_inclusive_add_kernel_code_uint = -"__kernel void test_wg_scan_inclusive_add_uint(global uint *input, global uint *output)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" uint result = work_group_scan_inclusive_add(input[tid]);\n" -" output[tid] = result;\n" -"}\n"; - -const char *wg_scan_inclusive_add_kernel_code_long = -"__kernel void test_wg_scan_inclusive_add_long(global long *input, global long *output)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" long result = work_group_scan_inclusive_add(input[tid]);\n" -" output[tid] = result;\n" -"}\n"; - - -const char *wg_scan_inclusive_add_kernel_code_ulong = -"__kernel void test_wg_scan_inclusive_add_ulong(global ulong *input, global ulong *output)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" ulong result = work_group_scan_inclusive_add(input[tid]);\n" -" output[tid] = result;\n" -"}\n"; - - -static int -verify_wg_scan_inclusive_add_int(int *inptr, int *outptr, size_t n, size_t wg_size) -{ - size_t i, j, m; - int s; - - for (j=0; j wg_size) - m = wg_size; - - s = 0; - for (i=0; i wg_size) - m = wg_size; - - s = 0; - for (i=0; i wg_size) - m = wg_size; - - s = 0; - for (i=0; i wg_size) - m = wg_size; - - s = 0; - for (i=0; i -#include -#include -#include - -#include - -#include "procs.h" - - -const char *wg_scan_inclusive_max_kernel_code_int = -"__kernel void test_wg_scan_inclusive_max_int(global int *input, global int *output)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" int result = work_group_scan_inclusive_max(input[tid]);\n" -" output[tid] = result;\n" -"}\n"; - - -const char *wg_scan_inclusive_max_kernel_code_uint = -"__kernel void test_wg_scan_inclusive_max_uint(global uint *input, global uint *output)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" uint result = work_group_scan_inclusive_max(input[tid]);\n" -" output[tid] = result;\n" -"}\n"; - -const char *wg_scan_inclusive_max_kernel_code_long = -"__kernel void test_wg_scan_inclusive_max_long(global long *input, global long *output)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" long result = work_group_scan_inclusive_max(input[tid]);\n" -" output[tid] = result;\n" -"}\n"; - - -const char *wg_scan_inclusive_max_kernel_code_ulong = -"__kernel void test_wg_scan_inclusive_max_ulong(global ulong *input, global ulong *output)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" ulong result = work_group_scan_inclusive_max(input[tid]);\n" -" output[tid] = result;\n" -"}\n"; - - -static int -verify_wg_scan_inclusive_max_int(int *inptr, int *outptr, size_t n, size_t wg_size) { - - size_t i, j, m; - - for (j=0; j wg_size) - m = wg_size; - - for (i = 0; i < m; ++i) { - max_ = std::max(inptr[j + i], max_); - if (outptr[j+i] != max_) { - log_info("work_group_scan_inclusive_max int: Error at %u: expected = %d, got = %d\n", (unsigned int)(j+i), max_, outptr[j+i]); - return -1; - } - } - } - - return 0; -} - -static int -verify_wg_scan_inclusive_max_uint(unsigned int *inptr, unsigned int *outptr, size_t n, size_t wg_size) { - - size_t i, j, m; - - for (j=0; j wg_size) - m = wg_size; - - for (i = 0; i < m; ++i) { - max_ = std::max(inptr[j + i], max_); - if (outptr[j+i] != max_) { - log_info("work_group_scan_inclusive_max int: Error at %lu: expected = %u, got = %u\n", (unsigned long)(j+i), max_, outptr[j+i]); - return -1; - } - } - } - - return 0; -} - -static int -verify_wg_scan_inclusive_max_long(cl_long *inptr, cl_long *outptr, size_t n, size_t wg_size) { - - size_t i, j, m; - - for (j=0; j wg_size) - m = wg_size; - - for (i = 0; i < m; ++i) { - max_ = std::max(inptr[j + i], max_); - if (outptr[j+i] != max_) { - log_info("work_group_scan_inclusive_max long: Error at %u: expected = %lld, got = %lld\n", (unsigned int)(j+i), max_, outptr[j+i]); - return -1; - } - } - } - - return 0; -} - -static int -verify_wg_scan_inclusive_max_ulong(cl_ulong *inptr, cl_ulong *outptr, size_t n, size_t wg_size) { - - size_t i, j, m; - - for (j=0; j wg_size) - m = wg_size; - - for (i = 0; i < m; ++i) { - max_ = std::max(inptr[j + i], max_); - if (outptr[j+i] != max_) { - log_info("work_group_scan_inclusive_max ulong: Error at %u: expected = %llu, got = %llu\n", (unsigned int)(j+i), max_, outptr[j+i]); - return -1; - } - } - } - - return 0; -} - - -int -test_work_group_scan_inclusive_max_int(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) -{ - cl_mem streams[2]; - cl_int *input_ptr[1], *p; - cl_int *output_ptr; - cl_program program; - cl_kernel kernel; - void *values[2]; - size_t threads[1]; - size_t wg_size[1]; - size_t num_elements; - int err; - int i; - MTdata d; - - err = create_single_kernel_helper(context, &program, &kernel, 1, - &wg_scan_inclusive_max_kernel_code_int, - "test_wg_scan_inclusive_max_int"); - if (err) - return -1; - - // "wg_size" is limited to that of the first dimension as only a 1DRange is executed. - err = get_max_allowed_1d_work_group_size_on_device(device, kernel, wg_size); - test_error(err, "get_max_allowed_1d_work_group_size_on_device failed"); - - num_elements = n_elems; - - input_ptr[0] = (cl_int*)malloc(sizeof(cl_int) * num_elements); - output_ptr = (cl_int*)malloc(sizeof(cl_int) * num_elements); - streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_int) * num_elements, NULL, NULL); - if (!streams[0]) - { - log_error("clCreateBuffer failed\n"); - return -1; - } - - streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_int) * num_elements, NULL, NULL); - if (!streams[1]) - { - log_error("clCreateBuffer failed\n"); - return -1; - } - - p = input_ptr[0]; - d = init_genrand( gRandomSeed ); - for (i=0; i -#include -#include -#include - -#include - -#include "procs.h" - - -const char *wg_scan_inclusive_min_kernel_code_int = -"__kernel void test_wg_scan_inclusive_min_int(global int *input, global int *output)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" int result = work_group_scan_inclusive_min(input[tid]);\n" -" output[tid] = result;\n" -"}\n"; - - -const char *wg_scan_inclusive_min_kernel_code_uint = -"__kernel void test_wg_scan_inclusive_min_uint(global uint *input, global uint *output)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" uint result = work_group_scan_inclusive_min(input[tid]);\n" -" output[tid] = result;\n" -"}\n"; - -const char *wg_scan_inclusive_min_kernel_code_long = -"__kernel void test_wg_scan_inclusive_min_long(global long *input, global long *output)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" long result = work_group_scan_inclusive_min(input[tid]);\n" -" output[tid] = result;\n" -"}\n"; - - -const char *wg_scan_inclusive_min_kernel_code_ulong = -"__kernel void test_wg_scan_inclusive_min_ulong(global ulong *input, global ulong *output)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" ulong result = work_group_scan_inclusive_min(input[tid]);\n" -" output[tid] = result;\n" -"}\n"; - - -static int -verify_wg_scan_inclusive_min_int(int *inptr, int *outptr, size_t n, size_t wg_size) { - - size_t i, j, m; - - for (j=0; j wg_size) - m = wg_size; - - for (i = 0; i < m; ++i) { - min_ = std::min(inptr[j + i], min_); - if (outptr[j+i] != min_) { - log_info("work_group_scan_inclusive_min int: Error at %u: expected = %d, got = %d\n", (unsigned int)(j+i), min_, outptr[j+i]); - return -1; - } - } - } - - return 0; -} - -static int -verify_wg_scan_inclusive_min_uint(unsigned int *inptr, unsigned int *outptr, size_t n, size_t wg_size) { - - size_t i, j, m; - - for (j=0; j wg_size) - m = wg_size; - - for (i = 0; i < m; ++i) { - min_ = std::min(inptr[j + i], min_); - if (outptr[j+i] != min_) { - log_info("work_group_scan_inclusive_min int: Error at %u: expected = %u, got = %u\n", (unsigned int)(j+i), min_, outptr[j+i]); - return -1; - } - } - } - - return 0; -} - -static int -verify_wg_scan_inclusive_min_long(cl_long *inptr, cl_long *outptr, size_t n, size_t wg_size) { - - size_t i, j, m; - - for (j=0; j wg_size) - m = wg_size; - - for (i = 0; i < m; ++i) { - min_ = std::min(inptr[j + i], min_); - if (outptr[j+i] != min_) { - log_info("work_group_scan_inclusive_min long: Error at %u: expected = %lld, got = %lld\n", (unsigned int)(j+i), min_, outptr[j+i]); - return -1; - } - } - } - - return 0; -} - -static int -verify_wg_scan_inclusive_min_ulong(cl_ulong *inptr, cl_ulong *outptr, size_t n, size_t wg_size) { - - size_t i, j, m; - - for (j=0; j wg_size) - m = wg_size; - - for (i = 0; i < m; ++i) { - min_ = std::min(inptr[j + i], min_); - if (outptr[j+i] != min_) { - log_info("work_group_scan_inclusive_min ulong: Error at %u: expected = %llu, got = %llu\n", (unsigned int)(j+i), min_, outptr[j+i]); - return -1; - } - } - } - - return 0; -} - - -int -test_work_group_scan_inclusive_min_int(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) -{ - cl_mem streams[2]; - cl_int *input_ptr[1], *p; - cl_int *output_ptr; - cl_program program; - cl_kernel kernel; - void *values[2]; - size_t threads[1]; - size_t wg_size[1]; - size_t num_elements; - int err; - int i; - MTdata d; - - err = create_single_kernel_helper(context, &program, &kernel, 1, - &wg_scan_inclusive_min_kernel_code_int, - "test_wg_scan_inclusive_min_int"); - if (err) - return -1; - - // "wg_size" is limited to that of the first dimension as only a 1DRange is executed. - err = get_max_allowed_1d_work_group_size_on_device(device, kernel, wg_size); - test_error(err, "get_max_allowed_1d_work_group_size_on_device failed"); - - num_elements = n_elems; - - input_ptr[0] = (cl_int*)malloc(sizeof(cl_int) * num_elements); - output_ptr = (cl_int*)malloc(sizeof(cl_int) * num_elements); - streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_int) * num_elements, NULL, NULL); - if (!streams[0]) - { - log_error("clCreateBuffer failed\n"); - return -1; - } - - streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_int) * num_elements, NULL, NULL); - if (!streams[1]) - { - log_error("clCreateBuffer failed\n"); - return -1; - } - - p = input_ptr[0]; - d = init_genrand( gRandomSeed ); - for (i=0; i +#include +#include + +#include "procs.h" + +static std::string make_kernel_string(const std::string &type, + const std::string &kernelName, + const std::string &func) +{ + // Build a kernel string of the form: + // __kernel void KERNEL_NAME(global TYPE *input, global TYPE *output) { + // int tid = get_global_id(0); + // output[tid] = FUNC(input[tid]); + // } + + std::ostringstream os; + os << "__kernel void " << kernelName << "(global " << type + << " *input, global " << type << " *output) {\n"; + os << " int tid = get_global_id(0);\n"; + os << " output[tid] = " << func << "(input[tid]);\n"; + os << "}\n"; + return os.str(); +} + +template struct TestTypeInfo +{ +}; + +template <> struct TestTypeInfo +{ + static constexpr const char *deviceName = "int"; +}; + +template <> struct TestTypeInfo +{ + static constexpr const char *deviceName = "uint"; +}; + +template <> struct TestTypeInfo +{ + static constexpr const char *deviceName = "long"; +}; + +template <> struct TestTypeInfo +{ + static constexpr const char *deviceName = "ulong"; +}; + +template struct Add +{ + using Type = T; + static constexpr const char *opName = "add"; + static constexpr T identityValue = 0; + static T combine(T a, T b) { return a + b; } +}; + +template struct Max +{ + using Type = T; + static constexpr const char *opName = "max"; + static constexpr T identityValue = std::numeric_limits::min(); + static T combine(T a, T b) { return std::max(a, b); } +}; + +template struct Min +{ + using Type = T; + static constexpr const char *opName = "min"; + static constexpr T identityValue = std::numeric_limits::max(); + static T combine(T a, T b) { return std::min(a, b); } +}; + +template struct Reduce +{ + using Type = typename C::Type; + + static constexpr const char *testName = "work_group_reduce"; + static constexpr const char *testOpName = C::opName; + static constexpr const char *deviceTypeName = + TestTypeInfo::deviceName; + static constexpr const char *kernelName = "test_wg_reduce"; + static int verify(Type *inptr, Type *outptr, size_t n_elems, + size_t max_wg_size) + { + for (size_t i = 0; i < n_elems; i += max_wg_size) + { + size_t wg_size = std::min(max_wg_size, n_elems - i); + + Type result = C::identityValue; + for (size_t j = 0; j < wg_size; j++) + { + result = C::combine(result, inptr[i + j]); + } + + for (size_t j = 0; j < wg_size; j++) + { + if (result != outptr[i + j]) + { + log_info("%s_%s: Error at %zu\n", testName, testOpName, + i + j); + return -1; + } + } + } + return 0; + } +}; + +template struct ScanInclusive +{ + using Type = typename C::Type; + + static constexpr const char *testName = "work_group_scan_inclusive"; + static constexpr const char *testOpName = C::opName; + static constexpr const char *deviceTypeName = + TestTypeInfo::deviceName; + static constexpr const char *kernelName = "test_wg_scan_inclusive"; + static int verify(Type *inptr, Type *outptr, size_t n_elems, + size_t max_wg_size) + { + for (size_t i = 0; i < n_elems; i += max_wg_size) + { + size_t wg_size = std::min(max_wg_size, n_elems - i); + + Type result = C::identityValue; + for (size_t j = 0; j < wg_size; ++j) + { + result = C::combine(result, inptr[i + j]); + if (result != outptr[i + j]) + { + log_info("%s_%s: Error at %zu\n", testName, testOpName, + i + j); + return -1; + } + } + } + return 0; + } +}; + +template struct ScanExclusive +{ + using Type = typename C::Type; + + static constexpr const char *testName = "work_group_scan_exclusive"; + static constexpr const char *testOpName = C::opName; + static constexpr const char *deviceTypeName = + TestTypeInfo::deviceName; + static constexpr const char *kernelName = "test_wg_scan_exclusive"; + static int verify(Type *inptr, Type *outptr, size_t n_elems, + size_t max_wg_size) + { + for (size_t i = 0; i < n_elems; i += max_wg_size) + { + size_t wg_size = std::min(max_wg_size, n_elems - i); + + Type result = C::identityValue; + for (size_t j = 0; j < wg_size; ++j) + { + if (result != outptr[i + j]) + { + log_info("%s_%s: Error at %zu\n", testName, testOpName, + i + j); + return -1; + } + result = C::combine(result, inptr[i + j]); + } + } + return 0; + } +}; + +template +static int run_test(cl_device_id device, cl_context context, + cl_command_queue queue, int n_elems) +{ + using T = typename TestInfo::Type; + + cl_int err = CL_SUCCESS; + + clProgramWrapper program; + clKernelWrapper kernel; + + std::string funcName = TestInfo::testName; + funcName += "_"; + funcName += TestInfo::testOpName; + + std::string kernelName = TestInfo::kernelName; + kernelName += "_"; + kernelName += TestInfo::testOpName; + kernelName += "_"; + kernelName += TestInfo::deviceTypeName; + + std::string kernelString = + make_kernel_string(TestInfo::deviceTypeName, kernelName, funcName); + + const char *kernel_source = kernelString.c_str(); + err = create_single_kernel_helper(context, &program, &kernel, 1, + &kernel_source, kernelName.c_str()); + test_error(err, "Unable to create test kernel"); + + size_t wg_size[1]; + err = get_max_allowed_1d_work_group_size_on_device(device, kernel, wg_size); + test_error(err, "get_max_allowed_1d_work_group_size_on_device failed"); + + clMemWrapper src = clCreateBuffer(context, CL_MEM_READ_WRITE, + sizeof(T) * n_elems, NULL, &err); + test_error(err, "Unable to create source buffer"); + + clMemWrapper dst = clCreateBuffer(context, CL_MEM_READ_WRITE, + sizeof(T) * n_elems, NULL, &err); + test_error(err, "Unable to create destination buffer"); + + std::vector input_ptr(n_elems); + + MTdataHolder d(gRandomSeed); + for (int i = 0; i < n_elems; i++) + { + input_ptr[i] = (T)genrand_int64(d); + } + + err = clEnqueueWriteBuffer(queue, src, CL_TRUE, 0, sizeof(T) * n_elems, + input_ptr.data(), 0, NULL, NULL); + test_error(err, "clWriteBuffer to initialize src buffer failed"); + + err = clSetKernelArg(kernel, 0, sizeof(src), &src); + test_error(err, "Unable to set src buffer kernel arg"); + err |= clSetKernelArg(kernel, 1, sizeof(dst), &dst); + test_error(err, "Unable to set dst buffer kernel arg"); + + size_t global_work_size[] = { (size_t)n_elems }; + err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global_work_size, + wg_size, 0, NULL, NULL); + test_error(err, "Unable to enqueue test kernel"); + + std::vector output_ptr(n_elems); + + cl_uint dead = 0xdeaddead; + memset_pattern4(output_ptr.data(), &dead, sizeof(T) * n_elems); + err = clEnqueueReadBuffer(queue, dst, CL_TRUE, 0, sizeof(T) * n_elems, + output_ptr.data(), 0, NULL, NULL); + test_error(err, "clEnqueueReadBuffer to read read dst buffer failed"); + + if (TestInfo::verify(input_ptr.data(), output_ptr.data(), n_elems, + wg_size[0])) + { + log_error("%s_%s %s failed\n", TestInfo::testName, TestInfo::testOpName, + TestInfo::deviceTypeName); + return TEST_FAIL; + } + + log_info("%s_%s %s passed\n", TestInfo::testName, TestInfo::testOpName, + TestInfo::deviceTypeName); + return TEST_PASS; +} + +int test_work_group_reduce_add(cl_device_id device, cl_context context, + cl_command_queue queue, int n_elems) +{ + int result = TEST_PASS; + + result |= run_test>>(device, context, queue, n_elems); + result |= run_test>>(device, context, queue, n_elems); + + if (gHasLong) + { + result |= + run_test>>(device, context, queue, n_elems); + result |= + run_test>>(device, context, queue, n_elems); + } + + return result; +} + +int test_work_group_reduce_max(cl_device_id device, cl_context context, + cl_command_queue queue, int n_elems) +{ + int result = TEST_PASS; + + result |= run_test>>(device, context, queue, n_elems); + result |= run_test>>(device, context, queue, n_elems); + + if (gHasLong) + { + result |= + run_test>>(device, context, queue, n_elems); + result |= + run_test>>(device, context, queue, n_elems); + } + + return result; +} + +int test_work_group_reduce_min(cl_device_id device, cl_context context, + cl_command_queue queue, int n_elems) +{ + int result = TEST_PASS; + + result |= run_test>>(device, context, queue, n_elems); + result |= run_test>>(device, context, queue, n_elems); + + if (gHasLong) + { + result |= + run_test>>(device, context, queue, n_elems); + result |= + run_test>>(device, context, queue, n_elems); + } + + return result; +} + +int test_work_group_scan_inclusive_add(cl_device_id device, cl_context context, + cl_command_queue queue, int n_elems) +{ + int result = TEST_PASS; + + result |= + run_test>>(device, context, queue, n_elems); + result |= + run_test>>(device, context, queue, n_elems); + + if (gHasLong) + { + result |= run_test>>(device, context, queue, + n_elems); + result |= run_test>>(device, context, queue, + n_elems); + } + + return result; +} + +int test_work_group_scan_inclusive_max(cl_device_id device, cl_context context, + cl_command_queue queue, int n_elems) +{ + int result = TEST_PASS; + + result |= + run_test>>(device, context, queue, n_elems); + result |= + run_test>>(device, context, queue, n_elems); + + if (gHasLong) + { + result |= run_test>>(device, context, queue, + n_elems); + result |= run_test>>(device, context, queue, + n_elems); + } + + return result; +} + +int test_work_group_scan_inclusive_min(cl_device_id device, cl_context context, + cl_command_queue queue, int n_elems) +{ + int result = TEST_PASS; + + result |= + run_test>>(device, context, queue, n_elems); + result |= + run_test>>(device, context, queue, n_elems); + + if (gHasLong) + { + result |= run_test>>(device, context, queue, + n_elems); + result |= run_test>>(device, context, queue, + n_elems); + } + + return result; +} + +int test_work_group_scan_exclusive_add(cl_device_id device, cl_context context, + cl_command_queue queue, int n_elems) +{ + int result = TEST_PASS; + + result |= + run_test>>(device, context, queue, n_elems); + result |= + run_test>>(device, context, queue, n_elems); + + if (gHasLong) + { + result |= run_test>>(device, context, queue, + n_elems); + result |= run_test>>(device, context, queue, + n_elems); + } + + return result; +} + +int test_work_group_scan_exclusive_max(cl_device_id device, cl_context context, + cl_command_queue queue, int n_elems) +{ + int result = TEST_PASS; + + result |= + run_test>>(device, context, queue, n_elems); + result |= + run_test>>(device, context, queue, n_elems); + + if (gHasLong) + { + result |= run_test>>(device, context, queue, + n_elems); + result |= run_test>>(device, context, queue, + n_elems); + } + + return result; +} + +int test_work_group_scan_exclusive_min(cl_device_id device, cl_context context, + cl_command_queue queue, int n_elems) +{ + int result = TEST_PASS; + + result |= + run_test>>(device, context, queue, n_elems); + result |= + run_test>>(device, context, queue, n_elems); + + if (gHasLong) + { + result |= run_test>>(device, context, queue, + n_elems); + result |= run_test>>(device, context, queue, + n_elems); + } + + return result; +} -- cgit v1.2.3 From 7a0e7e767a1a33e7b7c10954b4a106dedf316e00 Mon Sep 17 00:00:00 2001 From: Stuart Brady Date: Tue, 12 Apr 2022 17:42:55 +0100 Subject: Test all cluster sizes for cl_khr_subgroup_clustered_reduce (#1408) Signed-off-by: Stuart Brady --- .../subgroups/test_subgroup_clustered_reduce.cpp | 39 ++++++++++++++-------- 1 file changed, 25 insertions(+), 14 deletions(-) diff --git a/test_conformance/subgroups/test_subgroup_clustered_reduce.cpp b/test_conformance/subgroups/test_subgroup_clustered_reduce.cpp index 527be5ad..b016bf99 100644 --- a/test_conformance/subgroups/test_subgroup_clustered_reduce.cpp +++ b/test_conformance/subgroups/test_subgroup_clustered_reduce.cpp @@ -18,19 +18,29 @@ #include "subgroup_common_templates.h" #include "harness/typeWrappers.h" -#define CLUSTER_SIZE 4 -#define CLUSTER_SIZE_STR "4" - namespace { std::string sub_group_clustered_reduce_source = R"( -__kernel void test_%s(const __global Type *in, __global int4 *xy, __global Type *out) { +__kernel void test_%s(const __global Type *in, __global int4 *xy, __global Type *out, + uint cluster_size) { + Type r; int gid = get_global_id(0); XY(xy,gid); xy[gid].w = 0; - if (sizeof(in[gid]) == sizeof(%s(in[gid], )" CLUSTER_SIZE_STR R"())) { + Type v = in[gid]; + if (sizeof(in[gid]) == sizeof(%s(v, 1))) { xy[gid].w = sizeof(in[gid]); } - out[gid] = %s(in[gid], )" CLUSTER_SIZE_STR R"(); + switch (cluster_size) { + case 1: r = %s(v, 1); break; + case 2: r = %s(v, 2); break; + case 4: r = %s(v, 4); break; + case 8: r = %s(v, 8); break; + case 16: r = %s(v, 16); break; + case 32: r = %s(v, 32); break; + case 64: r = %s(v, 64); break; + case 128: r = %s(v, 128); break; + } + out[gid] = r; } )"; @@ -94,32 +104,33 @@ template struct RED_CLU int n = ii + ns > nw ? nw - ii : ns; int midx = 4 * ii + 2; std::vector clusters_results; - int clusters_counter = ns / CLUSTER_SIZE; + int clusters_counter = ns / test_params.cluster_size; clusters_results.resize(clusters_counter); // Compute target Ty tr = mx[ii]; for (int i = 0; i < n; ++i) { - if (i % CLUSTER_SIZE == 0) + if (i % test_params.cluster_size == 0) tr = mx[ii + i]; else tr = calculate(tr, mx[ii + i], operation); - clusters_results[i / CLUSTER_SIZE] = tr; + clusters_results[i / test_params.cluster_size] = tr; } // Check result for (int i = 0; i < n; ++i) { Ty rr = my[ii + i]; - tr = clusters_results[i / CLUSTER_SIZE]; + tr = clusters_results[i / test_params.cluster_size]; if (!compare(rr, tr)) { log_error( - "ERROR: sub_group_clustered_reduce_%s(%s) mismatch " - "for local id %d in sub group %d in group %d\n", + "ERROR: sub_group_clustered_reduce_%s(%s, %u) " + "mismatch for local id %d in sub group %d in group " + "%d\n", operation_names(operation), TypeManager::name(), - i, j, k); + test_params.cluster_size, i, j, k); return TEST_FAIL; } } @@ -184,7 +195,7 @@ int test_subgroup_functions_clustered_reduce(cl_device_id device, constexpr size_t global_work_size = 2000; constexpr size_t local_work_size = 200; - WorkGroupParams test_params(global_work_size, local_work_size); + WorkGroupParams test_params(global_work_size, local_work_size, -1, 3); test_params.save_kernel_source(sub_group_clustered_reduce_source); RunTestForType rft(device, context, queue, num_elements, test_params); -- cgit v1.2.3 From d533472c27995bb97ed5caab69eca90dd9e5a4ea Mon Sep 17 00:00:00 2001 From: Grzegorz Wawiorko Date: Tue, 19 Apr 2022 18:55:03 +0200 Subject: Fix incorrect use image channel data type and filtering mode (#1375) --- test_conformance/spir/sampler_enumeration.zip | Bin 63216 -> 67926 bytes 1 file changed, 0 insertions(+), 0 deletions(-) diff --git a/test_conformance/spir/sampler_enumeration.zip b/test_conformance/spir/sampler_enumeration.zip index 5f8a7a06..ab9c9a56 100644 Binary files a/test_conformance/spir/sampler_enumeration.zip and b/test_conformance/spir/sampler_enumeration.zip differ -- cgit v1.2.3 From 03da14d6a9a1525cc585f256404bbfc79ccc0e44 Mon Sep 17 00:00:00 2001 From: Jim Lewis Date: Tue, 19 Apr 2022 11:57:15 -0500 Subject: Fix clang 10 build errors (#1387) * Fix clang 10 build errors Lossy casts due to inexact float representation of CL_INT_MAX * Fix clang format * Remove implicit-const-int-float-conversion flag --- CMakeLists.txt | 1 - test_common/harness/imageHelpers.cpp | 21 +++++++-------------- test_common/harness/imageHelpers.h | 2 +- 3 files changed, 8 insertions(+), 16 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 7b307a11..8f5f4472 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -94,7 +94,6 @@ if(CMAKE_COMPILER_IS_GNUCC OR "${CMAKE_CXX_COMPILER_ID}" MATCHES "(Apple)?Clang" add_cxx_flag_if_supported(-Wno-error=cpp) # Allow #warning directive add_cxx_flag_if_supported(-Wno-error=unknown-pragmas) # Issue #785 add_cxx_flag_if_supported(-Wno-error=asm-operand-widths) # Issue #784 - add_cxx_flag_if_supported(-Wno-error=implicit-const-int-float-conversion) # Issue #1250 # -msse -mfpmath=sse to force gcc to use sse for float math, # avoiding excess precision problems that cause tests like int2float diff --git a/test_common/harness/imageHelpers.cpp b/test_common/harness/imageHelpers.cpp index 3a5c5533..c380c1f3 100644 --- a/test_common/harness/imageHelpers.cpp +++ b/test_common/harness/imageHelpers.cpp @@ -2624,11 +2624,11 @@ void pack_image_pixel(int *srcVector, const cl_image_format *imageFormat, } } -int round_to_even(float v) +cl_int round_to_even(float v) { // clamp overflow - if (v >= -(float)INT_MIN) return INT_MAX; - if (v <= (float)INT_MIN) return INT_MIN; + if (v >= -(float)CL_INT_MIN) return CL_INT_MAX; + if (v <= (float)CL_INT_MIN) return CL_INT_MIN; // round fractional values to integer value if (fabsf(v) < MAKE_HEX_FLOAT(0x1.0p23f, 0x1L, 23)) @@ -2640,7 +2640,7 @@ int round_to_even(float v) v -= magicVal; } - return (int)v; + return (cl_int)v; } void pack_image_pixel(float *srcVector, const cl_image_format *imageFormat, @@ -2765,10 +2765,7 @@ void pack_image_pixel(float *srcVector, const cl_image_format *imageFormat, case CL_SIGNED_INT32: { cl_int *ptr = (cl_int *)outData; for (unsigned int i = 0; i < channelCount; i++) - ptr[i] = (int)CONVERT_INT( - srcVector[i], MAKE_HEX_FLOAT(-0x1.0p31f, -1, 31), - MAKE_HEX_FLOAT(0x1.fffffep30f, 0x1fffffe, 30 - 23), - CL_INT_MAX); + ptr[i] = round_to_even(srcVector[i]); break; } case CL_UNSIGNED_INT8: { @@ -2932,12 +2929,8 @@ void pack_image_pixel_error(const float *srcVector, case CL_SIGNED_INT32: { const cl_int *ptr = (const cl_int *)results; for (unsigned int i = 0; i < channelCount; i++) - errors[i] = (cl_float)( - (cl_long)ptr[i] - - (cl_long)CONVERT_INT( - srcVector[i], MAKE_HEX_FLOAT(-0x1.0p31f, -1, 31), - MAKE_HEX_FLOAT(0x1.fffffep30f, 0x1fffffe, 30 - 23), - CL_INT_MAX)); + errors[i] = (cl_float)((cl_long)ptr[i] + - (cl_long)round_to_even(srcVector[i])); break; } case CL_UNSIGNED_INT8: { diff --git a/test_common/harness/imageHelpers.h b/test_common/harness/imageHelpers.h index e728a939..2cc8e68e 100644 --- a/test_common/harness/imageHelpers.h +++ b/test_common/harness/imageHelpers.h @@ -63,7 +63,7 @@ typedef struct bool normalized_coords; } image_sampler_data; -int round_to_even(float v); +cl_int round_to_even(float v); #define NORMALIZE(v, max) (v < 0 ? 0 : (v > 1.f ? max : round_to_even(v * max))) #define NORMALIZE_UNROUNDED(v, max) (v < 0 ? 0 : (v > 1.f ? max : v * max)) -- cgit v1.2.3 From 13d1b01f65d106c3462bd3d5222780c3fcf097ea Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 22 Apr 2022 04:51:32 -0500 Subject: test_basic/enqueue_map: Initialize all the data (#1417) --- test_conformance/basic/test_enqueue_map.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test_conformance/basic/test_enqueue_map.cpp b/test_conformance/basic/test_enqueue_map.cpp index 3702726f..d28f7e41 100644 --- a/test_conformance/basic/test_enqueue_map.cpp +++ b/test_conformance/basic/test_enqueue_map.cpp @@ -146,7 +146,7 @@ int test_enqueue_map_image(cl_device_id deviceID, cl_context context, cl_command clMemWrapper memObject; log_info("Testing with cl_mem_flags src: %s\n", flag_set_names[src_flag_id]); - generate_random_data(kUInt, (unsigned int)(imageSize * imageSize), d, + generate_random_data(kUInt, (unsigned int)(imageSize * imageSize * 4), d, hostPtrData); memcpy(referenceData, hostPtrData, imageDataSize); -- cgit v1.2.3 From 35c21a8e06f94ffd84bdfe0f94a2aa0deb3d1013 Mon Sep 17 00:00:00 2001 From: Romaric Jodin <89833130+rjodinchr@users.noreply.github.com> Date: Thu, 28 Apr 2022 23:46:52 +0200 Subject: imageHelpers: add CL_UNORM_SHORT_{555, 565} in get_max_absolute_error (#1406) * imageHelpers: add CL_UNORM_SHORT_{555, 565} in get_max_absolute_error Working on a device supporting CL_UNORM_SHORT_565 image data type, I noticed that the max absolute error authorized was not the right one for such image data type. Also because of normalization, there is always an absolute error authorized whatever the filtering of the sampler. Ref #1140 * put back if statement on filter_mode --- test_common/harness/imageHelpers.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/test_common/harness/imageHelpers.cpp b/test_common/harness/imageHelpers.cpp index c380c1f3..a254c48f 100644 --- a/test_common/harness/imageHelpers.cpp +++ b/test_common/harness/imageHelpers.cpp @@ -924,6 +924,8 @@ float get_max_absolute_error(const cl_image_format *format, #ifdef CL_SFIXED14_APPLE case CL_SFIXED14_APPLE: return 0x1.0p-14f; #endif + case CL_UNORM_SHORT_555: + case CL_UNORM_SHORT_565: return 1.0f / 31.0f; default: return 0.0f; } } -- cgit v1.2.3 From 5d6ca3e9d1374ef32644847c1eefeb503a27b732 Mon Sep 17 00:00:00 2001 From: Jeremy Kemp Date: Thu, 28 Apr 2022 23:34:08 +0100 Subject: Change memory order and scope for atomics that gate final results being stored. (#1377) * Change memory order and scope for atomics that gate final results being stored. memory_order_acq_rel with memory_scope_device is now used to guarantee that the correct memory consistency is observed before final results are stored. Previously it was possible for kernels to be generated that all used relaxed memory ordering, which could lead to false-positive failures. Fixes #1370 * Disable atomics tests with global, in-program atomics. If the device does not support `memory_order_relaxed` or `memory_scope_device`, disable atomics tests that declare their atomics in-program with global memory. There is now an implicit requirement to support `memory_order_relaxed` and `memory_scope_device` for these tests. * Fix misplaced parentheses. * Change memory scope for atomic fetch and load calls in kernel Change the memory scope from memory_scope_work_group to memory_scope_device so the ordering applies across all work items Co-authored-by: Sreelakshmi Haridas --- test_conformance/c11_atomics/common.h | 51 +++++++++++++++++++++++++++-------- 1 file changed, 40 insertions(+), 11 deletions(-) diff --git a/test_conformance/c11_atomics/common.h b/test_conformance/c11_atomics/common.h index d30259f0..42fe32b6 100644 --- a/test_conformance/c11_atomics/common.h +++ b/test_conformance/c11_atomics/common.h @@ -1031,20 +1031,40 @@ CBasicTest::KernelCode(cl_uint maxNumDestItems) } code += "\n"; } - if (LocalMemory() || DeclaredInProgram()) + if (LocalMemory()) { code += " // Copy final values to host reachable buffer\n"; - if (LocalMemory()) - code += " barrier(CLK_LOCAL_MEM_FENCE);\n" - " if(get_local_id(0) == 0) // first thread in workgroup\n"; + code += " barrier(CLK_LOCAL_MEM_FENCE);\n" + " if(get_local_id(0) == 0) // first thread in workgroup\n"; + code += " for(uint dstItemIdx = 0; dstItemIdx < numDestItems; " + "dstItemIdx++)\n"; + if (aTypeName == "atomic_flag") + { + code += R"( + finalDest[dstItemIdx] = + atomic_flag_test_and_set_explicit(destMemory+dstItemIdx, + memory_order_relaxed, + memory_scope_work_group);)"; + } else - // global atomics declared in program scope + { code += R"( - if(atomic_fetch_add_explicit(&finishedThreads, 1u, - memory_order_relaxed, - memory_scope_work_group) + finalDest[dstItemIdx] = + atomic_load_explicit(destMemory+dstItemIdx, + memory_order_relaxed, + memory_scope_work_group);)"; + } + } + else if (DeclaredInProgram()) + { + // global atomics declared in program scope + code += " // Copy final values to host reachable buffer\n"; + code += R"( + if(atomic_fetch_add_explicit(&finishedThreads, 1u, + memory_order_acq_rel, + memory_scope_device) == get_global_size(0)-1) // last finished thread - )"; + )"; code += " for(uint dstItemIdx = 0; dstItemIdx < numDestItems; " "dstItemIdx++)\n"; if (aTypeName == "atomic_flag") @@ -1053,7 +1073,7 @@ CBasicTest::KernelCode(cl_uint maxNumDestItems) finalDest[dstItemIdx] = atomic_flag_test_and_set_explicit(destMemory+dstItemIdx, memory_order_relaxed, - memory_scope_work_group);)"; + memory_scope_device);)"; } else { @@ -1061,7 +1081,7 @@ CBasicTest::KernelCode(cl_uint maxNumDestItems) finalDest[dstItemIdx] = atomic_load_explicit(destMemory+dstItemIdx, memory_order_relaxed, - memory_scope_work_group);)"; + memory_scope_device);)"; } } code += "}\n" @@ -1108,6 +1128,15 @@ int CBasicTest::ExecuteSingleTest( log_info("\t\tTest disabled\n"); return 0; } + if (!LocalMemory() && DeclaredInProgram()) + { + if (((gAtomicMemCap & CL_DEVICE_ATOMIC_SCOPE_DEVICE) == 0) + || ((gAtomicMemCap & CL_DEVICE_ATOMIC_ORDER_ACQ_REL) == 0)) + { + log_info("\t\tTest disabled\n"); + return 0; + } + } // set up work sizes based on device capabilities and test configuration error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_WORK_GROUP_SIZE, -- cgit v1.2.3 From 3662d1744778e333fd593312cb9083a245fc44d1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?K=C3=A9vin=20Petit?= Date: Fri, 29 Apr 2022 18:42:27 +0100 Subject: Update Github Actions CI and add Windows (#1413) - Add one Windows build to Github Actions - Remove Appveyor config - Move a few build steps out of the script - Use Ninja as the generator (makes for more readable logs) - Add build cache (except on Windows where it seems to break) Change-Id: Ida90ee1842af98aff86e5144ab7b9766480378c9 Signed-off-by: Kevin Petit --- .appveyor.yml | 54 ----------------------------------------- .github/workflows/presubmit.yml | 29 +++++++++++++++++++--- presubmit.sh | 31 +++++++++++------------ 3 files changed, 41 insertions(+), 73 deletions(-) delete mode 100644 .appveyor.yml diff --git a/.appveyor.yml b/.appveyor.yml deleted file mode 100644 index ea010778..00000000 --- a/.appveyor.yml +++ /dev/null @@ -1,54 +0,0 @@ -os: - - Visual Studio 2017 - -shallow_clone: true - -platform: - - Win32 - - x64 - -configuration: - - Release - -environment: - matrix: - - SETARCH: i686 - - SETARCH: x86_64 - -matrix: - exclude: - - platform: Win32 - SETARCH: x86_64 - - platform: x64 - SETARCH: i686 - -before_build: - # Setup environment: - - ps: $env:TOP = $env:APPVEYOR_BUILD_FOLDER - - ps: $env:TOP - - echo %TOP% - # Get the OpenCL Headers: - - git clone --depth=1 https://github.com/KhronosGroup/OpenCL-Headers OpenCL-Headers - # Get and build the OpenCL ICD Loader: - - git clone --depth=1 https://github.com/KhronosGroup/OpenCL-ICD-Loader.git - - ps: cd OpenCL-ICD-Loader - - ps: mkdir build - - ps: cd build - - cmake -A%PLATFORM% -DENABLE_OPENCL30_PROVISIONAL=1 -DOPENCL_ICD_LOADER_HEADERS_DIR=%TOP%/OpenCL-Headers/ .. - - cmake --build . --config %CONFIGURATION% - - ps: cd $env:TOP - # Get the libclcxx standard library: - - git clone --depth=1 https://github.com/KhronosGroup/libclcxx.git libclcxx - # Generate the CTS solution file: - - cmake -DCL_INCLUDE_DIR=%TOP%/OpenCL-Headers - -DCL_LIB_DIR=%TOP%/OpenCL-ICD-Loader/build - -DCL_LIBCLCXX_DIR=%TOP%/libclcxx - -DCMAKE_RUNTIME_OUTPUT_DIRECTORY=./bin - -DOPENCL_LIBRARIES="OpenCL" - -H. -Bbuild_win -A%PLATFORM% - -DD3D10_IS_SUPPORTED=ON -DD3D11_IS_SUPPORTED=ON -DARCH=%SETARCH% - -build: - project: build_win\CLConform.sln - parallel: true - verbosity: normal diff --git a/.github/workflows/presubmit.yml b/.github/workflows/presubmit.yml index 2aedc199..bac4ceba 100644 --- a/.github/workflows/presubmit.yml +++ b/.github/workflows/presubmit.yml @@ -3,30 +3,51 @@ on: [push, pull_request] jobs: build: - name: Build ${{ matrix.os }} ${{ matrix.name }} + name: Build ${{ matrix.os }} ${{ matrix.arch }} runs-on: ${{ matrix.os }} env: JOB_ARCHITECTURE: ${{ matrix.arch }} JOB_ENABLE_GL: ${{ matrix.gl }} strategy: + fail-fast: false matrix: mainmatrix: [true] - os: [ubuntu-20.04, macos-latest] + os: [ubuntu-20.04, macos-latest, windows-latest] include: - os: ubuntu-20.04 mainmatrix: true gl: 1 - os: ubuntu-20.04 mainmatrix: false - name: Arm arch: arm - os: ubuntu-20.04 mainmatrix: false - name: AArch64 arch: aarch64 steps: - uses: actions/checkout@v2 + - name: Setup Ninja + uses: seanmiddleditch/gha-setup-ninja@master + - name: Setup OpenGL build dependencies + if: ${{ matrix.gl }} + run: | + sudo apt-get update + sudo apt-get -y install libglu1-mesa-dev freeglut3-dev mesa-common-dev libglew-dev + - name: Setup MSVC with Ninja + uses: ilammy/msvc-dev-cmd@v1 + - name: Setup ccache + uses: hendrikmuhs/ccache-action@v1.2 + with: + variant: sccache + key: ${{ matrix.os }}-${{ matrix.arch }} + - name: Fetch OpenCL Headers + shell: bash + run: | + git clone https://github.com/KhronosGroup/OpenCL-Headers.git + cd OpenCL-Headers + ln -s CL OpenCL # For OSX builds + cd .. - name: Build + shell: bash run: ./presubmit.sh formatcheck: name: Check code format diff --git a/presubmit.sh b/presubmit.sh index 6fc037c8..b63a4373 100755 --- a/presubmit.sh +++ b/presubmit.sh @@ -15,7 +15,7 @@ touch ${TOOLCHAIN_FILE} BUILD_OPENGL_TEST="OFF" # Prepare toolchain if needed -if [[ ${JOB_ARCHITECTURE} != "" ]]; then +if [[ ${JOB_ARCHITECTURE} != "" && ${RUNNER_OS} != "Windows" ]]; then TOOLCHAIN_URL_VAR=TOOLCHAIN_URL_${JOB_ARCHITECTURE} TOOLCHAIN_URL=${!TOOLCHAIN_URL_VAR} wget ${TOOLCHAIN_URL} @@ -38,35 +38,36 @@ fi if [[ ( ${JOB_ARCHITECTURE} == "" && ${JOB_ENABLE_GL} == "1" ) ]]; then BUILD_OPENGL_TEST="ON" - sudo apt-get update - sudo apt-get -y install libglu1-mesa-dev freeglut3-dev mesa-common-dev libglew-dev fi -# Prepare headers -git clone https://github.com/KhronosGroup/OpenCL-Headers.git -cd OpenCL-Headers -ln -s CL OpenCL # For OSX builds -cd .. # Get and build loader git clone https://github.com/KhronosGroup/OpenCL-ICD-Loader.git cd ${TOP}/OpenCL-ICD-Loader mkdir build cd build -cmake -DCMAKE_TOOLCHAIN_FILE=${TOOLCHAIN_FILE} -DOPENCL_ICD_LOADER_HEADERS_DIR=${TOP}/OpenCL-Headers/ .. -make +cmake .. -G Ninja -DCMAKE_TOOLCHAIN_FILE=${TOOLCHAIN_FILE} -DOPENCL_ICD_LOADER_HEADERS_DIR=${TOP}/OpenCL-Headers/ +cmake --build . -j2 --config Release # Build CTS cd ${TOP} ls -l mkdir build cd build -cmake -DCL_INCLUDE_DIR=${TOP}/OpenCL-Headers \ +if [[ ${RUNNER_OS} == "Windows" ]]; then + CMAKE_OPENCL_LIBRARIES_OPTION="OpenCL" + CMAKE_CACHE_OPTIONS="" +else + CMAKE_OPENCL_LIBRARIES_OPTION="-lOpenCL -lpthread" + CMAKE_CACHE_OPTIONS="-DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_CXX_COMPILER_LAUNCHER=sccache" +fi +cmake .. -G Ninja \ + ${CMAKE_CACHE_OPTIONS} \ + -DCL_INCLUDE_DIR=${TOP}/OpenCL-Headers \ -DCL_LIB_DIR=${TOP}/OpenCL-ICD-Loader/build \ -DCMAKE_TOOLCHAIN_FILE=${TOOLCHAIN_FILE} \ -DCMAKE_RUNTIME_OUTPUT_DIRECTORY=./bin \ - -DOPENCL_LIBRARIES="-lOpenCL -lpthread" \ + -DOPENCL_LIBRARIES="${CMAKE_OPENCL_LIBRARIES_OPTION}" \ -DUSE_CL_EXPERIMENTAL=ON \ - -DGL_IS_SUPPORTED=${BUILD_OPENGL_TEST} \ - .. -make -j2 + -DGL_IS_SUPPORTED=${BUILD_OPENGL_TEST} +cmake --build . -j3 --config Release -- cgit v1.2.3 From 5149de22777158936b8c078a234206e1ffcfbde6 Mon Sep 17 00:00:00 2001 From: Karol Herbst Date: Tue, 17 May 2022 17:51:10 +0200 Subject: api/kernel_arg_info: Check for read_write image support before testing it (#1420) Code taken from api/test_min_image_formats.cpp --- test_conformance/api/test_kernel_arg_info.cpp | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/test_conformance/api/test_kernel_arg_info.cpp b/test_conformance/api/test_kernel_arg_info.cpp index dddb4a23..28825f10 100644 --- a/test_conformance/api/test_kernel_arg_info.cpp +++ b/test_conformance/api/test_kernel_arg_info.cpp @@ -814,8 +814,34 @@ static int run_image_tests(cl_context context, cl_device_id deviceID) cl_kernel_arg_address_qualifier address_qualifier = CL_KERNEL_ARG_ADDRESS_GLOBAL; + Version version = get_device_cl_version(deviceID); + bool supports_read_write_images = false; + if (version >= Version(3, 0)) + { + cl_uint maxReadWriteImageArgs = 0; + cl_int error = clGetDeviceInfo( + deviceID, CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS, + sizeof(maxReadWriteImageArgs), &maxReadWriteImageArgs, NULL); + test_error(error, + "Unable to query " + "CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS"); + + // read-write images are supported if MAX_READ_WRITE_IMAGE_ARGS is + // nonzero + supports_read_write_images = maxReadWriteImageArgs != 0; + } + else if (version >= Version(2, 0)) + { + // read-write images are required for OpenCL 2.x + supports_read_write_images = true; + } + for (auto access_qualifier : access_qualifiers) { + if (access_qualifier == CL_KERNEL_ARG_ACCESS_READ_WRITE + && !supports_read_write_images) + continue; + bool is_write = (access_qualifier == CL_KERNEL_ARG_ACCESS_WRITE_ONLY || access_qualifier == CL_KERNEL_ARG_ACCESS_READ_WRITE); -- cgit v1.2.3 From 6e6249fb489afbdc628e3d412aed9199ed006d48 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 17 May 2022 10:51:53 -0500 Subject: images: Stop checking gDeviceType != CL_DEVICE_TYPE_GPU (#1418) * images: Stop checking gDeviceType != CL_DEVICE_TYPE_GPU If the device type also advertises CL_DEVICE_TYPE_DEFAULT (which should be valid), this causes it to be considered a CPU device and the tests enforce different precision and rounding expectations. * Fix clang-format * Drop redundant NORM_OFFSET checks --- .../images/kernel_read_write/test_common.cpp | 10 ++-- .../images/kernel_read_write/test_iterations.cpp | 68 ++++++++++++++++------ .../images/kernel_read_write/test_loops.cpp | 2 +- .../images/kernel_read_write/test_read_1D.cpp | 48 +++++++++++---- .../kernel_read_write/test_read_1D_array.cpp | 52 ++++++++++++----- .../kernel_read_write/test_read_2D_array.cpp | 55 ++++++++++++----- 6 files changed, 169 insertions(+), 66 deletions(-) diff --git a/test_conformance/images/kernel_read_write/test_common.cpp b/test_conformance/images/kernel_read_write/test_common.cpp index 6b3cf849..62bd4ab1 100644 --- a/test_conformance/images/kernel_read_write/test_common.cpp +++ b/test_conformance/images/kernel_read_write/test_common.cpp @@ -557,7 +557,7 @@ int test_read_image(cl_context context, cl_command_queue queue, // Apple requires its CPU implementation to do // correctly rounded address arithmetic in all // modes - || gDeviceType != CL_DEVICE_TYPE_GPU + || !(gDeviceType & CL_DEVICE_TYPE_GPU) #endif ) offset = 0.0f; // Loop only once @@ -875,7 +875,7 @@ int test_read_image(cl_context context, cl_command_queue queue, // Apple requires its CPU implementation to do // correctly rounded address arithmetic in all // modes - || gDeviceType != CL_DEVICE_TYPE_GPU + || !(gDeviceType & CL_DEVICE_TYPE_GPU) #endif ) offset = 0.0f; // Loop only once @@ -1214,7 +1214,8 @@ int test_read_image(cl_context context, cl_command_queue queue, // offsets (0.0, 0.0) E.g., test one // pixel. if (!imageSampler->normalized_coords - || gDeviceType != CL_DEVICE_TYPE_GPU + || !(gDeviceType + & CL_DEVICE_TYPE_GPU) || NORM_OFFSET == 0) { norm_offset_x = 0.0f; @@ -1396,7 +1397,8 @@ int test_read_image(cl_context context, cl_command_queue queue, // offsets (0.0, 0.0) E.g., test one // pixel. if (!imageSampler->normalized_coords - || gDeviceType != CL_DEVICE_TYPE_GPU + || !(gDeviceType + & CL_DEVICE_TYPE_GPU) || NORM_OFFSET == 0) { norm_offset_x = 0.0f; diff --git a/test_conformance/images/kernel_read_write/test_iterations.cpp b/test_conformance/images/kernel_read_write/test_iterations.cpp index 3b779fab..2f5c75a7 100644 --- a/test_conformance/images/kernel_read_write/test_iterations.cpp +++ b/test_conformance/images/kernel_read_write/test_iterations.cpp @@ -415,12 +415,15 @@ int validate_image_2D_depth_results(void *imageValues, void *resultValues, doubl int checkOnlyOnePixel = 0; int found_pixel = 0; float offset = NORM_OFFSET; - if (!imageSampler->normalized_coords || imageSampler->filter_mode != CL_FILTER_NEAREST || NORM_OFFSET == 0 + if (!imageSampler->normalized_coords + || imageSampler->filter_mode != CL_FILTER_NEAREST + || NORM_OFFSET == 0 #if defined( __APPLE__ ) - // Apple requires its CPU implementation to do correctly rounded address arithmetic in all modes - || gDeviceType != CL_DEVICE_TYPE_GPU + // Apple requires its CPU implementation to do correctly + // rounded address arithmetic in all modes + || !(gDeviceType & CL_DEVICE_TYPE_GPU) #endif - ) + ) offset = 0.0f; // Loop only once for (float norm_offset_x = -offset; norm_offset_x <= offset && !found_pixel; norm_offset_x += NORM_OFFSET) { @@ -474,7 +477,10 @@ int validate_image_2D_depth_results(void *imageValues, void *resultValues, doubl // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0) // E.g., test one pixel. - if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) { + if (!imageSampler->normalized_coords + || !(gDeviceType & CL_DEVICE_TYPE_GPU) + || NORM_OFFSET == 0) + { norm_offset_x = 0.0f; norm_offset_y = 0.0f; checkOnlyOnePixel = 1; @@ -569,12 +575,15 @@ int validate_image_2D_results(void *imageValues, void *resultValues, double form int checkOnlyOnePixel = 0; int found_pixel = 0; float offset = NORM_OFFSET; - if (!imageSampler->normalized_coords || imageSampler->filter_mode != CL_FILTER_NEAREST || NORM_OFFSET == 0 + if (!imageSampler->normalized_coords + || imageSampler->filter_mode != CL_FILTER_NEAREST + || NORM_OFFSET == 0 #if defined( __APPLE__ ) - // Apple requires its CPU implementation to do correctly rounded address arithmetic in all modes - || gDeviceType != CL_DEVICE_TYPE_GPU + // Apple requires its CPU implementation to do correctly + // rounded address arithmetic in all modes + || !(gDeviceType & CL_DEVICE_TYPE_GPU) #endif - ) + ) offset = 0.0f; // Loop only once for (float norm_offset_x = -offset; norm_offset_x <= offset && !found_pixel; norm_offset_x += NORM_OFFSET) { @@ -658,7 +667,10 @@ int validate_image_2D_results(void *imageValues, void *resultValues, double form // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0) // E.g., test one pixel. - if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) { + if (!imageSampler->normalized_coords + || !(gDeviceType & CL_DEVICE_TYPE_GPU) + || NORM_OFFSET == 0) + { norm_offset_x = 0.0f; norm_offset_y = 0.0f; checkOnlyOnePixel = 1; @@ -778,7 +790,10 @@ int validate_image_2D_results(void *imageValues, void *resultValues, double form // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0) // E.g., test one pixel. - if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) { + if (!imageSampler->normalized_coords + || !(gDeviceType & CL_DEVICE_TYPE_GPU) + || NORM_OFFSET == 0) + { norm_offset_x = 0.0f; norm_offset_y = 0.0f; checkOnlyOnePixel = 1; @@ -813,7 +828,10 @@ int validate_image_2D_results(void *imageValues, void *resultValues, double form // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0) // E.g., test one pixel. - if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) { + if (!imageSampler->normalized_coords + || !(gDeviceType & CL_DEVICE_TYPE_GPU) + || NORM_OFFSET == 0) + { norm_offset_x = 0.0f; norm_offset_y = 0.0f; checkOnlyOnePixel = 1; @@ -874,7 +892,10 @@ int validate_image_2D_results(void *imageValues, void *resultValues, double form // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0) // E.g., test one pixel. - if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) { + if (!imageSampler->normalized_coords + || !(gDeviceType & CL_DEVICE_TYPE_GPU) + || NORM_OFFSET == 0) + { norm_offset_x = 0.0f; norm_offset_y = 0.0f; checkOnlyOnePixel = 1; @@ -909,7 +930,10 @@ int validate_image_2D_results(void *imageValues, void *resultValues, double form // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0) // E.g., test one pixel. - if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) { + if (!imageSampler->normalized_coords + || !(gDeviceType & CL_DEVICE_TYPE_GPU) + || NORM_OFFSET == 0) + { norm_offset_x = 0.0f; norm_offset_y = 0.0f; checkOnlyOnePixel = 1; @@ -975,12 +999,15 @@ int validate_image_2D_sRGB_results(void *imageValues, void *resultValues, double int checkOnlyOnePixel = 0; int found_pixel = 0; float offset = NORM_OFFSET; - if (!imageSampler->normalized_coords || imageSampler->filter_mode != CL_FILTER_NEAREST || NORM_OFFSET == 0 + if (!imageSampler->normalized_coords + || imageSampler->filter_mode != CL_FILTER_NEAREST + || NORM_OFFSET == 0 #if defined( __APPLE__ ) - // Apple requires its CPU implementation to do correctly rounded address arithmetic in all modes - || gDeviceType != CL_DEVICE_TYPE_GPU + // Apple requires its CPU implementation to do correctly + // rounded address arithmetic in all modes + || !(gDeviceType & CL_DEVICE_TYPE_GPU) #endif - ) + ) offset = 0.0f; // Loop only once for (float norm_offset_x = -offset; norm_offset_x <= offset && !found_pixel; norm_offset_x += NORM_OFFSET) { @@ -1054,7 +1081,10 @@ int validate_image_2D_sRGB_results(void *imageValues, void *resultValues, double // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0) // E.g., test one pixel. - if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) { + if (!imageSampler->normalized_coords + || !(gDeviceType & CL_DEVICE_TYPE_GPU) + || NORM_OFFSET == 0) + { norm_offset_x = 0.0f; norm_offset_y = 0.0f; checkOnlyOnePixel = 1; diff --git a/test_conformance/images/kernel_read_write/test_loops.cpp b/test_conformance/images/kernel_read_write/test_loops.cpp index 795a9eda..ea1e1c7c 100644 --- a/test_conformance/images/kernel_read_write/test_loops.cpp +++ b/test_conformance/images/kernel_read_write/test_loops.cpp @@ -84,7 +84,7 @@ int test_read_image_type(cl_device_id device, cl_context context, // of operations for linear filtering on the GPU. We do not test linear // filtering for the CL_RGB CL_UNORM_INT_101010 image format; however, we // test it internally for a set of other image formats. - if ((gDeviceType == CL_DEVICE_TYPE_GPU) + if ((gDeviceType & CL_DEVICE_TYPE_GPU) && (imageSampler->filter_mode == CL_FILTER_LINEAR) && (format->image_channel_order == CL_RGB) && (format->image_channel_data_type == CL_UNORM_INT_101010)) diff --git a/test_conformance/images/kernel_read_write/test_read_1D.cpp b/test_conformance/images/kernel_read_write/test_read_1D.cpp index 68113f9a..e9306fc4 100644 --- a/test_conformance/images/kernel_read_write/test_read_1D.cpp +++ b/test_conformance/images/kernel_read_write/test_read_1D.cpp @@ -487,10 +487,13 @@ int test_read_image_1D( cl_context context, cl_command_queue queue, cl_kernel ke int checkOnlyOnePixel = 0; int found_pixel = 0; float offset = NORM_OFFSET; - if (!imageSampler->normalized_coords || imageSampler->filter_mode != CL_FILTER_NEAREST || NORM_OFFSET == 0 + if (!imageSampler->normalized_coords + || imageSampler->filter_mode != CL_FILTER_NEAREST + || NORM_OFFSET == 0 #if defined( __APPLE__ ) - // Apple requires its CPU implementation to do correctly rounded address arithmetic in all modes - || gDeviceType != CL_DEVICE_TYPE_GPU + // Apple requires its CPU implementation to do correctly + // rounded address arithmetic in all modes + || !(gDeviceType & CL_DEVICE_TYPE_GPU) #endif ) offset = 0.0f; // Loop only once @@ -553,7 +556,10 @@ int test_read_image_1D( cl_context context, cl_command_queue queue, cl_kernel ke // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0) // E.g., test one pixel. - if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) { + if (!imageSampler->normalized_coords + || !(gDeviceType & CL_DEVICE_TYPE_GPU) + || NORM_OFFSET == 0) + { norm_offset_x = 0.0f; checkOnlyOnePixel = 1; } @@ -646,10 +652,13 @@ int test_read_image_1D( cl_context context, cl_command_queue queue, cl_kernel ke int checkOnlyOnePixel = 0; int found_pixel = 0; float offset = NORM_OFFSET; - if (!imageSampler->normalized_coords || imageSampler->filter_mode != CL_FILTER_NEAREST || NORM_OFFSET == 0 + if (!imageSampler->normalized_coords + || imageSampler->filter_mode != CL_FILTER_NEAREST + || NORM_OFFSET == 0 #if defined( __APPLE__ ) - // Apple requires its CPU implementation to do correctly rounded address arithmetic in all modes - || gDeviceType != CL_DEVICE_TYPE_GPU + // Apple requires its CPU implementation to do correctly + // rounded address arithmetic in all modes + || !(gDeviceType & CL_DEVICE_TYPE_GPU) #endif ) offset = 0.0f; // Loop only once @@ -720,7 +729,10 @@ int test_read_image_1D( cl_context context, cl_command_queue queue, cl_kernel ke // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0) // E.g., test one pixel. - if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) { + if (!imageSampler->normalized_coords + || !(gDeviceType & CL_DEVICE_TYPE_GPU) + || NORM_OFFSET == 0) + { norm_offset_x = 0.0f; checkOnlyOnePixel = 1; } @@ -826,7 +838,10 @@ int test_read_image_1D( cl_context context, cl_command_queue queue, cl_kernel ke // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0) // E.g., test one pixel. - if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) { + if (!imageSampler->normalized_coords + || !(gDeviceType & CL_DEVICE_TYPE_GPU) + || NORM_OFFSET == 0) + { norm_offset_x = 0.0f; checkOnlyOnePixel = 1; } @@ -857,7 +872,10 @@ int test_read_image_1D( cl_context context, cl_command_queue queue, cl_kernel ke // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0) // E.g., test one pixel. - if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) { + if (!imageSampler->normalized_coords + || !(gDeviceType & CL_DEVICE_TYPE_GPU) + || NORM_OFFSET == 0) + { norm_offset_x = 0.0f; checkOnlyOnePixel = 1; } @@ -913,7 +931,10 @@ int test_read_image_1D( cl_context context, cl_command_queue queue, cl_kernel ke // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0) // E.g., test one pixel. - if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) { + if (!imageSampler->normalized_coords + || !(gDeviceType & CL_DEVICE_TYPE_GPU) + || NORM_OFFSET == 0) + { norm_offset_x = 0.0f; checkOnlyOnePixel = 1; } @@ -944,7 +965,10 @@ int test_read_image_1D( cl_context context, cl_command_queue queue, cl_kernel ke // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0) // E.g., test one pixel. - if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) { + if (!imageSampler->normalized_coords + || !(gDeviceType & CL_DEVICE_TYPE_GPU) + || NORM_OFFSET == 0) + { norm_offset_x = 0.0f; checkOnlyOnePixel = 1; } diff --git a/test_conformance/images/kernel_read_write/test_read_1D_array.cpp b/test_conformance/images/kernel_read_write/test_read_1D_array.cpp index ac266ad7..2f4e4d3b 100644 --- a/test_conformance/images/kernel_read_write/test_read_1D_array.cpp +++ b/test_conformance/images/kernel_read_write/test_read_1D_array.cpp @@ -578,12 +578,15 @@ int test_read_image_1D_array( cl_context context, cl_command_queue queue, cl_ker int checkOnlyOnePixel = 0; int found_pixel = 0; float offset = NORM_OFFSET; - if (!imageSampler->normalized_coords || imageSampler->filter_mode != CL_FILTER_NEAREST || NORM_OFFSET == 0 + if (!imageSampler->normalized_coords + || imageSampler->filter_mode != CL_FILTER_NEAREST + || NORM_OFFSET == 0 #if defined( __APPLE__ ) - // Apple requires its CPU implementation to do correctly rounded address arithmetic in all modes - || gDeviceType != CL_DEVICE_TYPE_GPU + // Apple requires its CPU implementation to do correctly + // rounded address arithmetic in all modes + || !(gDeviceType & CL_DEVICE_TYPE_GPU) #endif - ) + ) offset = 0.0f; // Loop only once for (float norm_offset_x = -offset; norm_offset_x <= offset && !found_pixel; norm_offset_x += NORM_OFFSET) { @@ -647,7 +650,10 @@ int test_read_image_1D_array( cl_context context, cl_command_queue queue, cl_ker // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0) // E.g., test one pixel. - if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) { + if (!imageSampler->normalized_coords + || !(gDeviceType & CL_DEVICE_TYPE_GPU) + || NORM_OFFSET == 0) + { norm_offset_x = 0.0f; norm_offset_y = 0.0f; checkOnlyOnePixel = 1; @@ -746,12 +752,15 @@ int test_read_image_1D_array( cl_context context, cl_command_queue queue, cl_ker int checkOnlyOnePixel = 0; int found_pixel = 0; float offset = NORM_OFFSET; - if (!imageSampler->normalized_coords || imageSampler->filter_mode != CL_FILTER_NEAREST || NORM_OFFSET == 0 + if (!imageSampler->normalized_coords + || imageSampler->filter_mode != CL_FILTER_NEAREST + || NORM_OFFSET == 0 #if defined( __APPLE__ ) - // Apple requires its CPU implementation to do correctly rounded address arithmetic in all modes - || gDeviceType != CL_DEVICE_TYPE_GPU + // Apple requires its CPU implementation to do correctly + // rounded address arithmetic in all modes + || !(gDeviceType & CL_DEVICE_TYPE_GPU) #endif - ) + ) offset = 0.0f; // Loop only once for (float norm_offset_x = -offset; norm_offset_x <= offset && !found_pixel; norm_offset_x += NORM_OFFSET) { @@ -824,7 +833,10 @@ int test_read_image_1D_array( cl_context context, cl_command_queue queue, cl_ker // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0) // E.g., test one pixel. - if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) { + if (!imageSampler->normalized_coords + || !(gDeviceType & CL_DEVICE_TYPE_GPU) + || NORM_OFFSET == 0) + { norm_offset_x = 0.0f; norm_offset_y = 0.0f; checkOnlyOnePixel = 1; @@ -935,7 +947,10 @@ int test_read_image_1D_array( cl_context context, cl_command_queue queue, cl_ker // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0) // E.g., test one pixel. - if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) { + if (!imageSampler->normalized_coords + || !(gDeviceType & CL_DEVICE_TYPE_GPU) + || NORM_OFFSET == 0) + { norm_offset_x = 0.0f; norm_offset_y = 0.0f; checkOnlyOnePixel = 1; @@ -965,7 +980,10 @@ int test_read_image_1D_array( cl_context context, cl_command_queue queue, cl_ker // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0) // E.g., test one pixel. - if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) { + if (!imageSampler->normalized_coords + || !(gDeviceType & CL_DEVICE_TYPE_GPU) + || NORM_OFFSET == 0) + { norm_offset_x = 0.0f; norm_offset_y = 0.0f; checkOnlyOnePixel = 1; @@ -1021,7 +1039,10 @@ int test_read_image_1D_array( cl_context context, cl_command_queue queue, cl_ker // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0) // E.g., test one pixel. - if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) { + if (!imageSampler->normalized_coords + || !(gDeviceType & CL_DEVICE_TYPE_GPU) + || NORM_OFFSET == 0) + { norm_offset_x = 0.0f; norm_offset_y = 0.0f; checkOnlyOnePixel = 1; @@ -1051,7 +1072,10 @@ int test_read_image_1D_array( cl_context context, cl_command_queue queue, cl_ker // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0) // E.g., test one pixel. - if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) { + if (!imageSampler->normalized_coords + || !(gDeviceType & CL_DEVICE_TYPE_GPU) + || NORM_OFFSET == 0) + { norm_offset_x = 0.0f; norm_offset_y = 0.0f; checkOnlyOnePixel = 1; diff --git a/test_conformance/images/kernel_read_write/test_read_2D_array.cpp b/test_conformance/images/kernel_read_write/test_read_2D_array.cpp index 11b78814..d71bfec4 100644 --- a/test_conformance/images/kernel_read_write/test_read_2D_array.cpp +++ b/test_conformance/images/kernel_read_write/test_read_2D_array.cpp @@ -597,12 +597,15 @@ int test_read_image_2D_array( cl_context context, cl_command_queue queue, cl_ker int checkOnlyOnePixel = 0; int found_pixel = 0; float offset = NORM_OFFSET; - if (!imageSampler->normalized_coords || imageSampler->filter_mode != CL_FILTER_NEAREST || NORM_OFFSET == 0 + if (!imageSampler->normalized_coords + || imageSampler->filter_mode != CL_FILTER_NEAREST + || NORM_OFFSET == 0 #if defined( __APPLE__ ) - // Apple requires its CPU implementation to do correctly rounded address arithmetic in all modes - || gDeviceType != CL_DEVICE_TYPE_GPU + // Apple requires its CPU implementation to do + // correctly rounded address arithmetic in all modes + || !(gDeviceType & CL_DEVICE_TYPE_GPU) #endif - ) + ) offset = 0.0f; // Loop only once for (float norm_offset_x = -offset; norm_offset_x <= offset && !found_pixel ; norm_offset_x += NORM_OFFSET) { @@ -738,12 +741,15 @@ int test_read_image_2D_array( cl_context context, cl_command_queue queue, cl_ker int checkOnlyOnePixel = 0; int found_pixel = 0; float offset = NORM_OFFSET; - if (!imageSampler->normalized_coords || imageSampler->filter_mode != CL_FILTER_NEAREST || NORM_OFFSET == 0 + if (!imageSampler->normalized_coords + || imageSampler->filter_mode != CL_FILTER_NEAREST + || NORM_OFFSET == 0 #if defined( __APPLE__ ) - // Apple requires its CPU implementation to do correctly rounded address arithmetic in all modes - || gDeviceType != CL_DEVICE_TYPE_GPU + // Apple requires its CPU implementation to do + // correctly rounded address arithmetic in all modes + || !(gDeviceType & CL_DEVICE_TYPE_GPU) #endif - ) + ) offset = 0.0f; // Loop only once for (float norm_offset_x = -offset; norm_offset_x <= offset && !found_pixel ; norm_offset_x += NORM_OFFSET) { @@ -915,12 +921,15 @@ int test_read_image_2D_array( cl_context context, cl_command_queue queue, cl_ker int checkOnlyOnePixel = 0; int found_pixel = 0; float offset = NORM_OFFSET; - if (!imageSampler->normalized_coords || imageSampler->filter_mode != CL_FILTER_NEAREST || NORM_OFFSET == 0 + if (!imageSampler->normalized_coords + || imageSampler->filter_mode != CL_FILTER_NEAREST + || NORM_OFFSET == 0 #if defined( __APPLE__ ) - // Apple requires its CPU implementation to do correctly rounded address arithmetic in all modes - || gDeviceType != CL_DEVICE_TYPE_GPU + // Apple requires its CPU implementation to do + // correctly rounded address arithmetic in all modes + || !(gDeviceType & CL_DEVICE_TYPE_GPU) #endif - ) + ) offset = 0.0f; // Loop only once for (float norm_offset_x = -offset; norm_offset_x <= offset && !found_pixel ; norm_offset_x += NORM_OFFSET) { @@ -1108,7 +1117,10 @@ int test_read_image_2D_array( cl_context context, cl_command_queue queue, cl_ker // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0) // E.g., test one pixel. - if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) { + if (!imageSampler->normalized_coords + || !(gDeviceType & CL_DEVICE_TYPE_GPU) + || NORM_OFFSET == 0) + { norm_offset_x = 0.0f; norm_offset_y = 0.0f; norm_offset_z = 0.0f; @@ -1147,7 +1159,11 @@ int test_read_image_2D_array( cl_context context, cl_command_queue queue, cl_ker // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0) // E.g., test one pixel. - if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) { + if (!imageSampler->normalized_coords + || !(gDeviceType + & CL_DEVICE_TYPE_GPU) + || NORM_OFFSET == 0) + { norm_offset_x = 0.0f; norm_offset_y = 0.0f; norm_offset_z = 0.0f; @@ -1216,7 +1232,10 @@ int test_read_image_2D_array( cl_context context, cl_command_queue queue, cl_ker // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0) // E.g., test one pixel. - if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) { + if (!imageSampler->normalized_coords + || !(gDeviceType & CL_DEVICE_TYPE_GPU) + || NORM_OFFSET == 0) + { norm_offset_x = 0.0f; norm_offset_y = 0.0f; norm_offset_z = 0.0f; @@ -1255,7 +1274,11 @@ int test_read_image_2D_array( cl_context context, cl_command_queue queue, cl_ker // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0) // E.g., test one pixel. - if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0 || NORM_OFFSET == 0 || NORM_OFFSET == 0) { + if (!imageSampler->normalized_coords + || !(gDeviceType + & CL_DEVICE_TYPE_GPU) + || NORM_OFFSET == 0) + { norm_offset_x = 0.0f; norm_offset_y = 0.0f; norm_offset_z = 0.0f; -- cgit v1.2.3 From d54954c7cfd4311d12d076b205ee632b0d6cc151 Mon Sep 17 00:00:00 2001 From: Jeremy Kemp Date: Tue, 17 May 2022 16:52:40 +0100 Subject: Enable mipmap extension pragmas (#1349) * Enable mipmap pragmas where appopriate. * clang-format changes. --- .../images/kernel_read_write/test_iterations.cpp | 56 +++++++++++--------- .../images/kernel_read_write/test_read_1D.cpp | 50 +++++++++--------- .../kernel_read_write/test_read_1D_array.cpp | 54 +++++++++++--------- .../kernel_read_write/test_read_2D_array.cpp | 59 ++++++++++++---------- .../images/kernel_read_write/test_read_3D.cpp | 59 +++++++++++++--------- .../images/kernel_read_write/test_write_1D.cpp | 42 ++++++++------- .../kernel_read_write/test_write_1D_array.cpp | 44 +++++++++------- .../kernel_read_write/test_write_2D_array.cpp | 54 ++++++++++++-------- .../images/kernel_read_write/test_write_3D.cpp | 53 +++++++++++-------- .../images/kernel_read_write/test_write_image.cpp | 50 ++++++++++-------- 10 files changed, 296 insertions(+), 225 deletions(-) diff --git a/test_conformance/images/kernel_read_write/test_iterations.cpp b/test_conformance/images/kernel_read_write/test_iterations.cpp index 2f5c75a7..05aed02c 100644 --- a/test_conformance/images/kernel_read_write/test_iterations.cpp +++ b/test_conformance/images/kernel_read_write/test_iterations.cpp @@ -39,24 +39,28 @@ static size_t reduceImageSizeRange(size_t maxDimSize) { } const char *read2DKernelSourcePattern = -"__kernel void sample_kernel( read_only %s input,%s __global float *xOffsets, __global float *yOffsets, __global %s%s *results %s)\n" -"{\n" -"%s" -" int tidX = get_global_id(0), tidY = get_global_id(1);\n" -"%s" -"%s" -" results[offset] = read_image%s( input, imageSampler, coords %s);\n" -"}"; + "%s\n" + "__kernel void sample_kernel( read_only %s input,%s __global float " + "*xOffsets, __global float *yOffsets, __global %s%s *results %s)\n" + "{\n" + "%s" + " int tidX = get_global_id(0), tidY = get_global_id(1);\n" + "%s" + "%s" + " results[offset] = read_image%s( input, imageSampler, coords %s);\n" + "}"; const char *read_write2DKernelSourcePattern = -"__kernel void sample_kernel( read_write %s input,%s __global float *xOffsets, __global float *yOffsets, __global %s%s *results %s)\n" -"{\n" -"%s" -" int tidX = get_global_id(0), tidY = get_global_id(1);\n" -"%s" -"%s" -" results[offset] = read_image%s( input, coords %s);\n" -"}"; + "%s\n" + "__kernel void sample_kernel( read_write %s input,%s __global float " + "*xOffsets, __global float *yOffsets, __global %s%s *results %s)\n" + "{\n" + "%s" + " int tidX = get_global_id(0), tidY = get_global_id(1);\n" + "%s" + "%s" + " results[offset] = read_image%s( input, coords %s);\n" + "}"; const char *intCoordKernelSource = " int2 coords = (int2)( xOffsets[offset], yOffsets[offset]);\n"; @@ -1691,16 +1695,18 @@ int test_read_image_set_2D(cl_device_id device, cl_context context, } - sprintf( programSrc, KernelSourcePattern, - (format->image_channel_order == CL_DEPTH) ? "image2d_depth_t" : "image2d_t", - samplerArg, get_explicit_type_name( outputType ), + sprintf(programSrc, KernelSourcePattern, + gTestMipmaps + ? "#pragma OPENCL EXTENSION cl_khr_mipmap_image: enable" + : "", + (format->image_channel_order == CL_DEPTH) ? "image2d_depth_t" + : "image2d_t", + samplerArg, get_explicit_type_name(outputType), (format->image_channel_order == CL_DEPTH) ? "" : "4", - gTestMipmaps?", float lod":" ", - samplerVar, - gTestMipmaps? lodOffsetSource : offsetSource, - floatCoords ? floatKernelSource : intCoordKernelSource, - readFormat, - gTestMipmaps?", lod":" "); + gTestMipmaps ? ", float lod" : " ", samplerVar, + gTestMipmaps ? lodOffsetSource : offsetSource, + floatCoords ? floatKernelSource : intCoordKernelSource, readFormat, + gTestMipmaps ? ", lod" : " "); ptr = programSrc; error = create_single_kernel_helper(context, &program, &kernel, 1, &ptr, diff --git a/test_conformance/images/kernel_read_write/test_read_1D.cpp b/test_conformance/images/kernel_read_write/test_read_1D.cpp index e9306fc4..2a722088 100644 --- a/test_conformance/images/kernel_read_write/test_read_1D.cpp +++ b/test_conformance/images/kernel_read_write/test_read_1D.cpp @@ -26,24 +26,28 @@ #endif const char *read1DKernelSourcePattern = -"__kernel void sample_kernel( read_only image1d_t input,%s __global float *xOffsets, __global %s4 *results %s)\n" -"{\n" -"%s" -" int tidX = get_global_id(0);\n" -" int offset = tidX;\n" -"%s" -" results[offset] = read_image%s( input, imageSampler, coord %s);\n" -"}"; + "%s\n" + "__kernel void sample_kernel( read_only image1d_t input,%s __global float " + "*xOffsets, __global %s4 *results %s)\n" + "{\n" + "%s" + " int tidX = get_global_id(0);\n" + " int offset = tidX;\n" + "%s" + " results[offset] = read_image%s( input, imageSampler, coord %s);\n" + "}"; const char *read_write1DKernelSourcePattern = -"__kernel void sample_kernel( read_write image1d_t input,%s __global float *xOffsets, __global %s4 *results %s)\n" -"{\n" -"%s" -" int tidX = get_global_id(0);\n" -" int offset = tidX;\n" -"%s" -" results[offset] = read_image%s( input, coord %s);\n" -"}"; + "%s\n" + "__kernel void sample_kernel( read_write image1d_t input,%s __global float " + "*xOffsets, __global %s4 *results %s)\n" + "{\n" + "%s" + " int tidX = get_global_id(0);\n" + " int offset = tidX;\n" + "%s" + " results[offset] = read_image%s( input, coord %s);\n" + "}"; const char *int1DCoordKernelSource = " int coord = xOffsets[offset];\n"; @@ -1075,14 +1079,14 @@ int test_read_image_set_1D(cl_device_id device, cl_context context, { KernelSourcePattern = read1DKernelSourcePattern; } - sprintf( programSrc, - KernelSourcePattern, - samplerArg, get_explicit_type_name( outputType ), - gTestMipmaps ? ", float lod" : "", - samplerVar, + sprintf(programSrc, KernelSourcePattern, + gTestMipmaps + ? "#pragma OPENCL EXTENSION cl_khr_mipmap_image: enable" + : "", + samplerArg, get_explicit_type_name(outputType), + gTestMipmaps ? ", float lod" : "", samplerVar, floatCoords ? float1DKernelSource : int1DCoordKernelSource, - readFormat, - gTestMipmaps ? ", lod" : "" ); + readFormat, gTestMipmaps ? ", lod" : ""); ptr = programSrc; diff --git a/test_conformance/images/kernel_read_write/test_read_1D_array.cpp b/test_conformance/images/kernel_read_write/test_read_1D_array.cpp index 2f4e4d3b..a8009420 100644 --- a/test_conformance/images/kernel_read_write/test_read_1D_array.cpp +++ b/test_conformance/images/kernel_read_write/test_read_1D_array.cpp @@ -25,24 +25,28 @@ #endif const char *read1DArrayKernelSourcePattern = -"__kernel void sample_kernel( read_only image1d_array_t input,%s __global float *xOffsets, __global float *yOffsets, __global %s4 *results %s)\n" -"{\n" -"%s" -" int tidX = get_global_id(0), tidY = get_global_id(1);\n" -"%s" -"%s" -" results[offset] = read_image%s( input, imageSampler, coords %s);\n" -"}"; + "%s\n" + "__kernel void sample_kernel( read_only image1d_array_t input,%s __global " + "float *xOffsets, __global float *yOffsets, __global %s4 *results %s)\n" + "{\n" + "%s" + " int tidX = get_global_id(0), tidY = get_global_id(1);\n" + "%s" + "%s" + " results[offset] = read_image%s( input, imageSampler, coords %s);\n" + "}"; const char *read_write1DArrayKernelSourcePattern = -"__kernel void sample_kernel( read_write image1d_array_t input,%s __global float *xOffsets, __global float *yOffsets, __global %s4 *results %s )\n" -"{\n" -"%s" -" int tidX = get_global_id(0), tidY = get_global_id(1);\n" -"%s" -"%s" -" results[offset] = read_image%s( input, coords %s);\n" -"}"; + "%s\n" + "__kernel void sample_kernel( read_write image1d_array_t input,%s __global " + "float *xOffsets, __global float *yOffsets, __global %s4 *results %s )\n" + "{\n" + "%s" + " int tidX = get_global_id(0), tidY = get_global_id(1);\n" + "%s" + "%s" + " results[offset] = read_image%s( input, coords %s);\n" + "}"; const char *offset1DArrayKernelSource = " int offset = tidY*get_image_width(input) + tidX;\n"; @@ -1180,15 +1184,15 @@ int test_read_image_set_1D_array(cl_device_id device, cl_context context, KernelSourcePattern = read_write1DArrayKernelSourcePattern; } - sprintf( programSrc, - KernelSourcePattern, - samplerArg, get_explicit_type_name( outputType ), - gTestMipmaps ? ", float lod" : "", - samplerVar, - gTestMipmaps ? offset1DArrayLodKernelSource : offset1DArrayKernelSource, - floatCoords ? floatKernelSource1DArray : intCoordKernelSource1DArray, - readFormat, - gTestMipmaps ? ", lod" : "" ); + sprintf( + programSrc, KernelSourcePattern, + gTestMipmaps ? "#pragma OPENCL EXTENSION cl_khr_mipmap_image: enable" + : "", + samplerArg, get_explicit_type_name(outputType), + gTestMipmaps ? ", float lod" : "", samplerVar, + gTestMipmaps ? offset1DArrayLodKernelSource : offset1DArrayKernelSource, + floatCoords ? floatKernelSource1DArray : intCoordKernelSource1DArray, + readFormat, gTestMipmaps ? ", lod" : ""); ptr = programSrc; error = create_single_kernel_helper(context, &program, &kernel, 1, &ptr, diff --git a/test_conformance/images/kernel_read_write/test_read_2D_array.cpp b/test_conformance/images/kernel_read_write/test_read_2D_array.cpp index d71bfec4..533a0fe8 100644 --- a/test_conformance/images/kernel_read_write/test_read_2D_array.cpp +++ b/test_conformance/images/kernel_read_write/test_read_2D_array.cpp @@ -41,24 +41,32 @@ static size_t reduceImageDepth(size_t maxDepth) { } const char *read2DArrayKernelSourcePattern = -"__kernel void sample_kernel( read_only %s input,%s __global float *xOffsets, __global float *yOffsets, __global float *zOffsets, __global %s%s *results %s )\n" -"{\n" -"%s" -" int tidX = get_global_id(0), tidY = get_global_id(1), tidZ = get_global_id(2);\n" -"%s" -"%s" -" results[offset] = read_image%s( input, imageSampler, coords %s);\n" -"}"; + "%s\n" + "__kernel void sample_kernel( read_only %s input,%s __global float " + "*xOffsets, __global float *yOffsets, __global float *zOffsets, __global " + "%s%s *results %s )\n" + "{\n" + "%s" + " int tidX = get_global_id(0), tidY = get_global_id(1), tidZ = " + "get_global_id(2);\n" + "%s" + "%s" + " results[offset] = read_image%s( input, imageSampler, coords %s);\n" + "}"; const char *read_write2DArrayKernelSourcePattern = -"__kernel void sample_kernel( read_write %s input,%s __global float *xOffsets, __global float *yOffsets, __global float *zOffsets, __global %s%s *results %s)\n" -"{\n" -"%s" -" int tidX = get_global_id(0), tidY = get_global_id(1), tidZ = get_global_id(2);\n" -"%s" -"%s" -" results[offset] = read_image%s( input, coords %s);\n" -"}"; + "%s\n" + "__kernel void sample_kernel( read_write %s input,%s __global float " + "*xOffsets, __global float *yOffsets, __global float *zOffsets, __global " + "%s%s *results %s)\n" + "{\n" + "%s" + " int tidX = get_global_id(0), tidY = get_global_id(1), tidZ = " + "get_global_id(2);\n" + "%s" + "%s" + " results[offset] = read_image%s( input, coords %s);\n" + "}"; const char* offset2DarraySource =" int offset = tidZ*get_image_width(input)*get_image_height(input) + tidY*get_image_width(input) + tidX;\n"; const char* offset2DarraySourceLod = @@ -1412,17 +1420,16 @@ int test_read_image_set_2D_array(cl_device_id device, cl_context context, } // Construct the source - sprintf( programSrc, - KernelSourcePattern, - imageType, - samplerArg, get_explicit_type_name( outputType ), - imageElement, - gTestMipmaps ? ", float lod" : " ", - samplerVar, + sprintf(programSrc, KernelSourcePattern, + gTestMipmaps + ? "#pragma OPENCL EXTENSION cl_khr_mipmap_image: enable" + : "", + imageType, samplerArg, get_explicit_type_name(outputType), + imageElement, gTestMipmaps ? ", float lod" : " ", samplerVar, gTestMipmaps ? offset2DarraySourceLod : offset2DarraySource, - floatCoords ? float2DArrayUnnormalizedCoordKernelSource : int2DArrayCoordKernelSource, - readFormat, - gTestMipmaps ? ", lod" : " " ); + floatCoords ? float2DArrayUnnormalizedCoordKernelSource + : int2DArrayCoordKernelSource, + readFormat, gTestMipmaps ? ", lod" : " "); ptr = programSrc; error = create_single_kernel_helper(context, &program, &kernel, 1, &ptr, diff --git a/test_conformance/images/kernel_read_write/test_read_3D.cpp b/test_conformance/images/kernel_read_write/test_read_3D.cpp index 860114fb..cec77bf0 100644 --- a/test_conformance/images/kernel_read_write/test_read_3D.cpp +++ b/test_conformance/images/kernel_read_write/test_read_3D.cpp @@ -36,24 +36,32 @@ static size_t reduceImageDepth(size_t maxDimSize, RandomSeed& seed) { const char *read3DKernelSourcePattern = -"__kernel void sample_kernel( read_only image3d_t input,%s __global float *xOffsets, __global float *yOffsets, __global float *zOffsets, __global %s4 *results %s)\n" -"{\n" -"%s" -" int tidX = get_global_id(0), tidY = get_global_id(1), tidZ = get_global_id(2);\n" -"%s" -"%s" -" results[offset] = read_image%s( input, imageSampler, coords %s);\n" -"}"; + "%s\n" + "__kernel void sample_kernel( read_only image3d_t input,%s __global float " + "*xOffsets, __global float *yOffsets, __global float *zOffsets, __global " + "%s4 *results %s)\n" + "{\n" + "%s" + " int tidX = get_global_id(0), tidY = get_global_id(1), tidZ = " + "get_global_id(2);\n" + "%s" + "%s" + " results[offset] = read_image%s( input, imageSampler, coords %s);\n" + "}"; const char *read_write3DKernelSourcePattern = -"__kernel void sample_kernel( read_write image3d_t input,%s __global float *xOffsets, __global float *yOffsets, __global float *zOffsets, __global %s4 *results %s)\n" -"{\n" -"%s" -" int tidX = get_global_id(0), tidY = get_global_id(1), tidZ = get_global_id(2);\n" -"%s" -"%s" -" results[offset] = read_image%s( input, coords %s);\n" -"}"; + "%s\n" + "__kernel void sample_kernel( read_write image3d_t input,%s __global float " + "*xOffsets, __global float *yOffsets, __global float *zOffsets, __global " + "%s4 *results %s)\n" + "{\n" + "%s" + " int tidX = get_global_id(0), tidY = get_global_id(1), tidZ = " + "get_global_id(2);\n" + "%s" + "%s" + " results[offset] = read_image%s( input, coords %s);\n" + "}"; const char *offset3DKernelSource = " int offset = tidZ*get_image_width(input)*get_image_height(input) + tidY*get_image_width(input) + tidX;\n"; @@ -137,15 +145,16 @@ int test_read_image_set_3D(cl_device_id device, cl_context context, KernelSourcePattern = read_write3DKernelSourcePattern; } - sprintf( programSrc, - KernelSourcePattern, - samplerArg, get_explicit_type_name( outputType ), - gTestMipmaps? ", float lod": " ", - samplerVar, - gTestMipmaps? offset3DLodKernelSource: offset3DKernelSource, - floatCoords ? float3DUnnormalizedCoordKernelSource : int3DCoordKernelSource, - readFormat, - gTestMipmaps? ",lod":" "); + sprintf(programSrc, KernelSourcePattern, + gTestMipmaps + ? "#pragma OPENCL EXTENSION cl_khr_mipmap_image: enable" + : "", + samplerArg, get_explicit_type_name(outputType), + gTestMipmaps ? ", float lod" : " ", samplerVar, + gTestMipmaps ? offset3DLodKernelSource : offset3DKernelSource, + floatCoords ? float3DUnnormalizedCoordKernelSource + : int3DCoordKernelSource, + readFormat, gTestMipmaps ? ",lod" : " "); ptr = programSrc; error = create_single_kernel_helper(context, &program, &kernel, 1, &ptr, diff --git a/test_conformance/images/kernel_read_write/test_write_1D.cpp b/test_conformance/images/kernel_read_write/test_write_1D.cpp index 1556a76a..5f726796 100644 --- a/test_conformance/images/kernel_read_write/test_write_1D.cpp +++ b/test_conformance/images/kernel_read_write/test_write_1D.cpp @@ -27,20 +27,24 @@ extern bool validate_float_write_results( float *expected, float *actual, image_ extern bool validate_half_write_results( cl_half *expected, cl_half *actual, image_descriptor* imageInfo ); const char *readwrite1DKernelSourcePattern = -"__kernel void sample_kernel( __global %s4 *input, read_write image1d_t output %s)\n" -"{\n" -" int tidX = get_global_id(0);\n" -" int offset = tidX;\n" -" write_image%s( output, tidX %s, input[ offset ]);\n" -"}"; + "%s\n" + "__kernel void sample_kernel( __global %s4 *input, read_write image1d_t " + "output %s)\n" + "{\n" + " int tidX = get_global_id(0);\n" + " int offset = tidX;\n" + " write_image%s( output, tidX %s, input[ offset ]);\n" + "}"; const char *write1DKernelSourcePattern = -"__kernel void sample_kernel( __global %s4 *input, write_only image1d_t output %s)\n" -"{\n" -" int tidX = get_global_id(0);\n" -" int offset = tidX;\n" -" write_image%s( output, tidX %s, input[ offset ]);\n" -"}"; + "%s\n" + "__kernel void sample_kernel( __global %s4 *input, write_only image1d_t " + "output %s)\n" + "{\n" + " int tidX = get_global_id(0);\n" + " int offset = tidX;\n" + " write_image%s( output, tidX %s, input[ offset ]);\n" + "}"; int test_write_image_1D( cl_device_id device, cl_context context, cl_command_queue queue, cl_kernel kernel, image_descriptor *imageInfo, ExplicitType inputType, MTdata d ) @@ -614,12 +618,14 @@ int test_write_image_1D_set(cl_device_id device, cl_context context, KernelSourcePattern = readwrite1DKernelSourcePattern; } - sprintf( programSrc, - KernelSourcePattern, - get_explicit_type_name( inputType ), - gTestMipmaps ? ", int lod" : "", - readFormat, - gTestMipmaps ? ", lod" :"" ); + sprintf( + programSrc, KernelSourcePattern, + gTestMipmaps + ? "#pragma OPENCL EXTENSION cl_khr_mipmap_image: enable\n#pragma " + "OPENCL EXTENSION cl_khr_mipmap_image_writes: enable" + : "", + get_explicit_type_name(inputType), gTestMipmaps ? ", int lod" : "", + readFormat, gTestMipmaps ? ", lod" : ""); ptr = programSrc; error = create_single_kernel_helper(context, &program, &kernel, 1, &ptr, diff --git a/test_conformance/images/kernel_read_write/test_write_1D_array.cpp b/test_conformance/images/kernel_read_write/test_write_1D_array.cpp index e9aa8d2a..f9024405 100644 --- a/test_conformance/images/kernel_read_write/test_write_1D_array.cpp +++ b/test_conformance/images/kernel_read_write/test_write_1D_array.cpp @@ -27,20 +27,24 @@ extern bool validate_float_write_results( float *expected, float *actual, image_ extern bool validate_half_write_results( cl_half *expected, cl_half *actual, image_descriptor *imageInfo ); const char *readwrite1DArrayKernelSourcePattern = -"__kernel void sample_kernel( __global %s4 *input, read_write image1d_array_t output %s)\n" -"{\n" -" int tidX = get_global_id(0), tidY = get_global_id(1);\n" -"%s" -" write_image%s( output, (int2)( tidX, tidY )%s, input[ offset ]);\n" -"}"; + "%s\n" + "__kernel void sample_kernel( __global %s4 *input, read_write " + "image1d_array_t output %s)\n" + "{\n" + " int tidX = get_global_id(0), tidY = get_global_id(1);\n" + "%s" + " write_image%s( output, (int2)( tidX, tidY )%s, input[ offset ]);\n" + "}"; const char *write1DArrayKernelSourcePattern = -"__kernel void sample_kernel( __global %s4 *input, write_only image1d_array_t output %s)\n" -"{\n" -" int tidX = get_global_id(0), tidY = get_global_id(1);\n" -"%s" -" write_image%s( output, (int2)( tidX, tidY ) %s, input[ offset ]);\n" -"}"; + "%s\n" + "__kernel void sample_kernel( __global %s4 *input, write_only " + "image1d_array_t output %s)\n" + "{\n" + " int tidX = get_global_id(0), tidY = get_global_id(1);\n" + "%s" + " write_image%s( output, (int2)( tidX, tidY ) %s, input[ offset ]);\n" + "}"; const char *offset1DArraySource = " int offset = tidY*get_image_width(output) + tidX;\n"; @@ -637,13 +641,15 @@ int test_write_image_1D_array_set(cl_device_id device, cl_context context, } // Construct the source // Construct the source - sprintf( programSrc, - KernelSourcePattern, - get_explicit_type_name( inputType ), - gTestMipmaps ? ", int lod" : "", - gTestMipmaps ? offset1DArrayLodSource : offset1DArraySource, - readFormat, - gTestMipmaps ? ", lod" :"" ); + sprintf( + programSrc, KernelSourcePattern, + gTestMipmaps + ? "#pragma OPENCL EXTENSION cl_khr_mipmap_image: enable\n#pragma " + "OPENCL EXTENSION cl_khr_mipmap_image_writes: enable" + : "", + get_explicit_type_name(inputType), gTestMipmaps ? ", int lod" : "", + gTestMipmaps ? offset1DArrayLodSource : offset1DArraySource, readFormat, + gTestMipmaps ? ", lod" : ""); ptr = programSrc; error = create_single_kernel_helper(context, &program, &kernel, 1, &ptr, diff --git a/test_conformance/images/kernel_read_write/test_write_2D_array.cpp b/test_conformance/images/kernel_read_write/test_write_2D_array.cpp index 5bca7124..c1c56994 100644 --- a/test_conformance/images/kernel_read_write/test_write_2D_array.cpp +++ b/test_conformance/images/kernel_read_write/test_write_2D_array.cpp @@ -49,20 +49,28 @@ static size_t reduceImageDepth(size_t maxDepth) { } const char *write2DArrayKernelSourcePattern = -"__kernel void sample_kernel( __global %s%s *input, write_only %s output %s)\n" -"{\n" -" int tidX = get_global_id(0), tidY = get_global_id(1), tidZ = get_global_id(2);\n" -"%s" -" write_image%s( output, (int4)( tidX, tidY, tidZ, 0 ) %s, input[ offset ]);\n" -"}"; + "%s\n" + "__kernel void sample_kernel( __global %s%s *input, write_only %s output " + "%s)\n" + "{\n" + " int tidX = get_global_id(0), tidY = get_global_id(1), tidZ = " + "get_global_id(2);\n" + "%s" + " write_image%s( output, (int4)( tidX, tidY, tidZ, 0 ) %s, input[ offset " + "]);\n" + "}"; const char *readwrite2DArrayKernelSourcePattern = -"__kernel void sample_kernel( __global %s%s *input, read_write %s output %s)\n" -"{\n" -" int tidX = get_global_id(0), tidY = get_global_id(1), tidZ = get_global_id(2);\n" -"%s" -" write_image%s( output, (int4)( tidX, tidY, tidZ, 0 ) %s, input[ offset ] );\n" -"}"; + "%s\n" + "__kernel void sample_kernel( __global %s%s *input, read_write %s output " + "%s)\n" + "{\n" + " int tidX = get_global_id(0), tidY = get_global_id(1), tidZ = " + "get_global_id(2);\n" + "%s" + " write_image%s( output, (int4)( tidX, tidY, tidZ, 0 ) %s, input[ offset " + "] );\n" + "}"; const char *offset2DArrayKernelSource = " int offset = tidZ*get_image_width(output)*get_image_height(output) + tidY*get_image_width(output) + tidX;\n"; @@ -671,15 +679,19 @@ int test_write_image_2D_array_set(cl_device_id device, cl_context context, } // Construct the source // Construct the source - sprintf( programSrc, - KernelSourcePattern, - get_explicit_type_name( inputType ), - (format->image_channel_order == CL_DEPTH) ? "" : "4", - (format->image_channel_order == CL_DEPTH) ? "image2d_array_depth_t" : "image2d_array_t", - gTestMipmaps ? " , int lod" : "", - gTestMipmaps ? offset2DArrayLodKernelSource : offset2DArrayKernelSource, - readFormat, - gTestMipmaps ? ", lod" : "" ); + sprintf( + programSrc, KernelSourcePattern, + gTestMipmaps + ? "#pragma OPENCL EXTENSION cl_khr_mipmap_image: enable\n#pragma " + "OPENCL EXTENSION cl_khr_mipmap_image_writes: enable" + : "", + get_explicit_type_name(inputType), + (format->image_channel_order == CL_DEPTH) ? "" : "4", + (format->image_channel_order == CL_DEPTH) ? "image2d_array_depth_t" + : "image2d_array_t", + gTestMipmaps ? " , int lod" : "", + gTestMipmaps ? offset2DArrayLodKernelSource : offset2DArrayKernelSource, + readFormat, gTestMipmaps ? ", lod" : ""); ptr = programSrc; error = create_single_kernel_helper(context, &program, &kernel, 1, &ptr, diff --git a/test_conformance/images/kernel_read_write/test_write_3D.cpp b/test_conformance/images/kernel_read_write/test_write_3D.cpp index d9a69627..9da93695 100644 --- a/test_conformance/images/kernel_read_write/test_write_3D.cpp +++ b/test_conformance/images/kernel_read_write/test_write_3D.cpp @@ -46,22 +46,30 @@ static size_t reduceImageDepth(size_t maxDimSize, MTdata& seed) { const char *write3DKernelSourcePattern = -"%s" -"__kernel void sample_kernel( __global %s4 *input, write_only image3d_t output %s )\n" -"{\n" -" int tidX = get_global_id(0), tidY = get_global_id(1), tidZ = get_global_id(2);\n" -"%s" -" write_image%s( output, (int4)( tidX, tidY, tidZ, 0 ) %s, input[ offset ]);\n" -"}"; + "%s" + "%s\n" + "__kernel void sample_kernel( __global %s4 *input, write_only image3d_t " + "output %s )\n" + "{\n" + " int tidX = get_global_id(0), tidY = get_global_id(1), tidZ = " + "get_global_id(2);\n" + "%s" + " write_image%s( output, (int4)( tidX, tidY, tidZ, 0 ) %s, input[ offset " + "]);\n" + "}"; const char *readwrite3DKernelSourcePattern = -"%s" -"__kernel void sample_kernel( __global %s4 *input, read_write image3d_t output %s )\n" -"{\n" -" int tidX = get_global_id(0), tidY = get_global_id(1), tidZ = get_global_id(2);\n" -"%s" -" write_image%s( output, (int4)( tidX, tidY, tidZ, 0 ) %s, input[ offset ]);\n" -"}"; + "%s" + "%s\n" + "__kernel void sample_kernel( __global %s4 *input, read_write image3d_t " + "output %s )\n" + "{\n" + " int tidX = get_global_id(0), tidY = get_global_id(1), tidZ = " + "get_global_id(2);\n" + "%s" + " write_image%s( output, (int4)( tidX, tidY, tidZ, 0 ) %s, input[ offset " + "]);\n" + "}"; const char *khr3DWritesPragma = "#pragma OPENCL EXTENSION cl_khr_3d_image_writes : enable\n"; @@ -678,14 +686,15 @@ int test_write_image_3D_set(cl_device_id device, cl_context context, } // Construct the source - sprintf( programSrc, - KernelSourcePattern, - gTestMipmaps ? "" : khr3DWritesPragma, - get_explicit_type_name( inputType ), - gTestMipmaps ? ", int lod" : "", - gTestMipmaps ? offset3DLodSource : offset3DSource, - readFormat, - gTestMipmaps ? ", lod" : "" ); + sprintf( + programSrc, KernelSourcePattern, khr3DWritesPragma, + gTestMipmaps + ? "#pragma OPENCL EXTENSION cl_khr_mipmap_image: enable\n#pragma " + "OPENCL EXTENSION cl_khr_mipmap_image_writes: enable" + : "", + get_explicit_type_name(inputType), gTestMipmaps ? ", int lod" : "", + gTestMipmaps ? offset3DLodSource : offset3DSource, readFormat, + gTestMipmaps ? ", lod" : ""); ptr = programSrc; error = create_single_kernel_helper(context, &program, &kernel, 1, &ptr, diff --git a/test_conformance/images/kernel_read_write/test_write_image.cpp b/test_conformance/images/kernel_read_write/test_write_image.cpp index 9cc9698c..29626971 100644 --- a/test_conformance/images/kernel_read_write/test_write_image.cpp +++ b/test_conformance/images/kernel_read_write/test_write_image.cpp @@ -47,20 +47,24 @@ extern bool validate_float_write_results( float *expected, float *actual, image_ extern bool validate_half_write_results( cl_half *expected, cl_half *actual, image_descriptor *imageInfo ); const char *writeKernelSourcePattern = -"__kernel void sample_kernel( __global %s%s *input, write_only %s output %s)\n" -"{\n" -" int tidX = get_global_id(0), tidY = get_global_id(1);\n" -"%s" -" write_image%s( output, (int2)( tidX, tidY ) %s, input[ offset ]);\n" -"}"; + "%s\n" + "__kernel void sample_kernel( __global %s%s *input, write_only %s output " + "%s)\n" + "{\n" + " int tidX = get_global_id(0), tidY = get_global_id(1);\n" + "%s" + " write_image%s( output, (int2)( tidX, tidY ) %s, input[ offset ]);\n" + "}"; const char *read_writeKernelSourcePattern = -"__kernel void sample_kernel( __global %s%s *input, read_write %s output %s)\n" -"{\n" -" int tidX = get_global_id(0), tidY = get_global_id(1);\n" -"%s" -" write_image%s( output, (int2)( tidX, tidY )%s, input[ offset ] );\n" -"}"; + "%s\n" + "__kernel void sample_kernel( __global %s%s *input, read_write %s output " + "%s)\n" + "{\n" + " int tidX = get_global_id(0), tidY = get_global_id(1);\n" + "%s" + " write_image%s( output, (int2)( tidX, tidY )%s, input[ offset ] );\n" + "}"; const char *offset2DKernelSource = " int offset = tidY*get_image_width(output) + tidX;\n"; @@ -728,15 +732,19 @@ int test_write_image_set(cl_device_id device, cl_context context, } // Construct the source - sprintf( programSrc, - KernelSourcePattern, - get_explicit_type_name( inputType ), - (format->image_channel_order == CL_DEPTH) ? "" : "4", - (format->image_channel_order == CL_DEPTH) ? "image2d_depth_t" : "image2d_t", - gTestMipmaps ? ", int lod" : "", - gTestMipmaps ? offset2DLodKernelSource : offset2DKernelSource, - readFormat, - gTestMipmaps ? ", lod" : "" ); + sprintf( + programSrc, KernelSourcePattern, + gTestMipmaps + ? "#pragma OPENCL EXTENSION cl_khr_mipmap_image: enable\n#pragma " + "OPENCL EXTENSION cl_khr_mipmap_image_writes: enable" + : "", + get_explicit_type_name(inputType), + (format->image_channel_order == CL_DEPTH) ? "" : "4", + (format->image_channel_order == CL_DEPTH) ? "image2d_depth_t" + : "image2d_t", + gTestMipmaps ? ", int lod" : "", + gTestMipmaps ? offset2DLodKernelSource : offset2DKernelSource, + readFormat, gTestMipmaps ? ", lod" : ""); ptr = programSrc; error = create_single_kernel_helper(context, &program, &kernel, 1, &ptr, -- cgit v1.2.3 From f32f1aeaa20d796210c4c7050695eb2062ddfba1 Mon Sep 17 00:00:00 2001 From: Ewan Crawford Date: Tue, 17 May 2022 16:54:39 +0100 Subject: Add content to README (#1427) Fill in the placeholder readme with some basic information on building and running the project. Information on the conformance submission process and contributing are also included. Should help close a few issues referenced in https://github.com/KhronosGroup/OpenCL-CTS/issues/1096 I don't think this is all the information we want, but is a starting point from which we can progress. For example, adding the android build instructions from https://github.com/KhronosGroup/OpenCL-CTS/pull/1021 --- README.md | 117 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 115 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 796f7c86..3d410644 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,115 @@ -# OpenCL-CTS -The OpenCL Conformance Tests +# OpenCL Conformance Test Suite (CTS) + +This it the OpenCL CTS for all versions of the Khronos +[OpenCL](https://www.khronos.org/opencl/) standard. + +## Building the CTS + +The CTS supports Linux, Windows, macOS, and Android platforms. In particular, +GitHub Actions CI builds against Ubuntu 20.04, Windows-latest, and +macos-latest. + +Compiling the CTS requires the following CMake configuration options to be set: + +* `CL_INCLUDE_DIR` Points to the unified + [OpenCL-Headers](https://github.com/KhronosGroup/OpenCL-Headers). +* `CL_LIB_DIR` Directory containing the OpenCL library to build against. +* `OPENCL_LIBRARIES` Name of the OpenCL library to link. + +It is advised that the [OpenCL ICD-Loader](https://github.com/KhronosGroup/OpenCL-ICD-Loader) +is used as the OpenCL library to build against. Where `CL_LIB_DIR` points to a +build of the ICD loader and `OPENCL_LIBRARIES` is "OpenCL". + +### Example Build + +Steps on a Linux platform to clone dependencies from GitHub sources, configure +a build, and compile. + +```sh +git clone https://github.com/KhronosGroup/OpenCL-CTS.git +git clone https://github.com/KhronosGroup/OpenCL-Headers.git +git clone https://github.com/KhronosGroup/OpenCL-ICD-Loader.git + +mkdir OpenCL-ICD-Loader/build +cmake -S OpenCL-ICD-Loader -B OpenCL-ICD-Loader/build \ + -DOPENCL_ICD_LOADER_HEADERS_DIR=$PWD/OpenCL-Headers +cmake --build ./OpenCL-ICD-Loader/build --config Release + +mkdir OpenCL-CTS/build +cmake -S OpenCL-CTS -B OpenCL-CTS/build \ + -DCL_INCLUDE_DIR=$PWD/OpenCL-Headers \ + -DCL_LIB_DIR=$PWD/OpenCL-ICD-Loader/build \ + -DOPENCL_LIBRARIES=OpenCL +cmake --build OpenCL-CTS/build --config Release +``` + +## Running the CTS + +A build of the CTS contains multiple executables representing the directories in +the `test_conformance` folder. Each of these executables contains sub-tests, and +possibly smaller granularities of testing within the sub-tests. + +See the `--help` output on each executable for the list of sub-tests available, +as well as other options for configuring execution. + +If the OpenCL library built against is the ICD Loader, and the vendor library to +be tested is not registered in the +[default ICD Loader location](https://github.com/KhronosGroup/OpenCL-ICD-Loader#registering-icds) +then the [OCL_ICD_FILENAMES](https://github.com/KhronosGroup/OpenCL-ICD-Loader#table-of-debug-environment-variables) +environment variable will need to be set for the ICD Loader to detect the OpenCL +library to use at runtime. For example, to run the basic tests on a Linux +platform: + +```sh +OCL_ICD_FILENAMES=/path/to/vendor_lib.so ./test_basic +``` + +### Offline Compilation + +Testing OpenCL drivers which do not have a runtime compiler can be done by using +additional command line arguments provided by the test harness for tests which +require compilation, these are: + +* `--compilation-mode` Selects if OpenCL-C source code should be compiled using + an external tool before being passed on to the OpenCL driver in that form for + testing. Online is the default mode, but also accepts the values `spir-v`, and + `binary`. + +* `--compilation-cache-mode` Controls how the compiled OpenCL-C source code + should be cached on disk. + +* `--compilation-cache-path` Accepts a path to a directory where the compiled + binary cache should be stored on disk. + +* `--compilation-program` Accepts a path to an executable (default: + cl_offline_compiler) invoked by the test harness to perform offline + compilation of OpenCL-C source code. This executable must match the + [interface description](test_common/harness/cl_offline_compiler-interface.txt). + +## Generating a Conformance Report + +The Khronos [Conformance Process Document](https://members.khronos.org/document/dl/911) +details the steps required for a conformance submissions. +In this repository [opencl_conformance_tests_full.csv](test_conformance/submission_details_template.txt) +defines the full list of tests which must be run for conformance. The output log +of which must be included alongside a filled in +[submission details template](test_conformance/submission_details_template.txt). + +Utility script [run_conformance.py](test_conformance/run_conformance.py) can be +used to help generating the submission log, although it is not required. + +Git [tags](https://github.com/KhronosGroup/OpenCL-CTS/tags) are used to define +the version of the repository conformance submissions are made against. + +## Contributing + +Contributions are welcome to the project from Khronos members and non-members +alike via GitHub Pull Requests (PR). Alternatively, if you've found a bug or have +a questions please file an issue in the GitHub project. First time contributors +will be required to sign the Khronos Contributor License Agreement (CLA) before +their PR can be merged. + +PRs to the repository are required to be `clang-format` clean to pass CI. +Developers can either use the `git-clang-format` tool locally to verify this +before contributing, or update their PR based on the diff provided by a failing +CI job. -- cgit v1.2.3 From 3bf46004ef4f6308bc49b1e22b1c7824a7a0e626 Mon Sep 17 00:00:00 2001 From: paulfradgley <39525348+paulfradgley@users.noreply.github.com> Date: Tue, 31 May 2022 16:55:42 +0100 Subject: Fixes incorrect slice pitch calculation in clCopyImage 1Darray (#1258) The slice pitch/padding calculation assumed that the 'height' variable contained the pixel height of the image, which it doesn't for IMAGE1D_ARRAY. Fixes #1257 --- test_conformance/images/clCopyImage/test_copy_generic.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/test_conformance/images/clCopyImage/test_copy_generic.cpp b/test_conformance/images/clCopyImage/test_copy_generic.cpp index bd935e7f..3bd1b6ef 100644 --- a/test_conformance/images/clCopyImage/test_copy_generic.cpp +++ b/test_conformance/images/clCopyImage/test_copy_generic.cpp @@ -228,6 +228,11 @@ cl_mem create_image( cl_context context, cl_command_queue queue, BufferOwningPtr } size_t mappedSlicePad = mappedSlice - (mappedRow * height); + // For 1Darray, the height variable actually contains the arraysize, + // so it can't be used for calculating the slice padding. + if (imageInfo->type == CL_MEM_OBJECT_IMAGE1D_ARRAY) + mappedSlicePad = mappedSlice - (mappedRow * 1); + // Copy the image. size_t scanlineSize = row_pitch_lod; size_t sliceSize = slice_pitch_lod - scanlineSize * height; -- cgit v1.2.3 From 7c65afc4e71b6b6b6023b598f9f675ebfeffc8a5 Mon Sep 17 00:00:00 2001 From: jansol Date: Tue, 7 Jun 2022 18:55:43 +0300 Subject: test_compiler_defines_for_extensions: fix overflow (#1430) GCC 11.2.0 warns about a possible string overflow (when num_not_supported_extensions+num_of_supported_extensions == 0) since no space would be allocated for the terminating null byte that string manipulation fns expect to find. This unconditionally adds an extra byte to the allocation to silence the warning and fix building with -Werror. --- .../compiler/test_compiler_defines_for_extensions.cpp | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/test_conformance/compiler/test_compiler_defines_for_extensions.cpp b/test_conformance/compiler/test_compiler_defines_for_extensions.cpp index 1519779a..84b7798f 100644 --- a/test_conformance/compiler/test_compiler_defines_for_extensions.cpp +++ b/test_conformance/compiler/test_compiler_defines_for_extensions.cpp @@ -322,8 +322,15 @@ int test_compiler_defines_for_extensions(cl_device_id device, cl_context context } // Build the kernel - char *kernel_code = (char*)malloc(1025*256*(num_not_supported_extensions+num_of_supported_extensions)); - memset(kernel_code, 0, 1025*256*(num_not_supported_extensions+num_of_supported_extensions)); + char *kernel_code = (char *)malloc( + 1 + + 1025 * 256 + * (num_not_supported_extensions + num_of_supported_extensions)); + memset( + kernel_code, 0, + 1 + + 1025 * 256 + * (num_not_supported_extensions + num_of_supported_extensions)); int i, index = 0; strcat(kernel_code, kernel_strings[0]); -- cgit v1.2.3 From c2aca7d8e6a6ec2162a1c68b127409aa9931974d Mon Sep 17 00:00:00 2001 From: paulfradgley <39525348+paulfradgley@users.noreply.github.com> Date: Tue, 14 Jun 2022 16:47:06 +0100 Subject: Fix local memory out of bounds issue in atomic_fence (replaces PR #1285) (#1437) * Fix local memory out of bounds in atomic_fence In the error condition, the atomic_fence kernel can illegally access local memory addresses. In this snippet, localValues is in the local address space and provided as a kernel argument. Its size is effectively get_local_size(0) * sizeof(int). The stores to localValues lead to OoB accesses. size_t myId = get_local_id(0); ... if(hisAtomicValue != hisValue) { // fail atomic_store(&destMemory[myId], myValue-1); hisId = (hisId+get_local_size(0)-1)%get_local_size(0); if(myValue+1 < 1) localValues[myId*1+myValue+1] = hisId; if(myValue+2 < 1) localValues[myId*1+myValue+2] = hisAtomicValue; if(myValue+3 < 1) localValues[myId*1+myValue+3] = hisValue; } * Fix formatting * Fix formatting again * Formatting --- test_conformance/c11_atomics/common.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/test_conformance/c11_atomics/common.h b/test_conformance/c11_atomics/common.h index 42fe32b6..5bb9e5b7 100644 --- a/test_conformance/c11_atomics/common.h +++ b/test_conformance/c11_atomics/common.h @@ -1360,8 +1360,10 @@ int CBasicTest::ExecuteSingleTest( { error = clSetKernelArg(kernel, argInd++, - LocalRefValues() ? typeSize * CurrentGroupSize() - * NumNonAtomicVariablesPerThread() + LocalRefValues() ? typeSize + * ((CurrentGroupSize() + * NumNonAtomicVariablesPerThread()) + + 4) : 1, NULL); test_error(error, "Unable to set indexed kernel argument"); -- cgit v1.2.3 From f1c051afb1484dca7cacdf66383f5e8a6e6bec32 Mon Sep 17 00:00:00 2001 From: Ahmed <36049290+AhmedAmraniAkdi@users.noreply.github.com> Date: Tue, 14 Jun 2022 16:48:59 +0100 Subject: Added missing tests for integer_dot_product_input_4x8bit and integer_dot_product_input_4x8bit_packed on feature_macro compiler test. (#1432) * Added integer_dot_product_input_4x8bit and integer_dot_product_input_4x8bit_packed tests to feature_macro_test * clang formatting * Now the test checks whether the array of optional features returned by clGetDeviceInfo contains the standard optional features we are testing. * Update test_conformance/compiler/test_feature_macro.cpp Added printing the missing standard feature it it is not found inside the optional features array returned by clGetDeviceInfo. Co-authored-by: Ben Ashbaugh Co-authored-by: Ben Ashbaugh --- test_conformance/compiler/test_feature_macro.cpp | 98 +++++++++++++++++++++--- 1 file changed, 89 insertions(+), 9 deletions(-) diff --git a/test_conformance/compiler/test_feature_macro.cpp b/test_conformance/compiler/test_feature_macro.cpp index ac355dd4..ef3c0028 100644 --- a/test_conformance/compiler/test_feature_macro.cpp +++ b/test_conformance/compiler/test_feature_macro.cpp @@ -579,6 +579,78 @@ int test_feature_macro_fp64(cl_device_id deviceID, cl_context context, compiler_status, supported); } +int test_feature_macro_integer_dot_product_input_4x8bit_packed( + cl_device_id deviceID, cl_context context, std::string test_macro_name, + cl_bool& supported) +{ + cl_int error = TEST_FAIL; + cl_bool api_status; + cl_bool compiler_status; + log_info("\n%s ...\n", test_macro_name.c_str()); + + if (!is_extension_available(deviceID, "cl_khr_integer_dot_product")) + { + supported = false; + return TEST_PASS; + } + + error = check_api_feature_info_capabilities< + cl_device_integer_dot_product_capabilities_khr>( + deviceID, context, api_status, + CL_DEVICE_INTEGER_DOT_PRODUCT_CAPABILITIES_KHR, + CL_DEVICE_INTEGER_DOT_PRODUCT_INPUT_4x8BIT_PACKED_KHR); + if (error != CL_SUCCESS) + { + return error; + } + + error = check_compiler_feature_info(deviceID, context, test_macro_name, + compiler_status); + if (error != CL_SUCCESS) + { + return error; + } + + return feature_macro_verify_results(test_macro_name, api_status, + compiler_status, supported); +} + +int test_feature_macro_integer_dot_product_input_4x8bit( + cl_device_id deviceID, cl_context context, std::string test_macro_name, + cl_bool& supported) +{ + cl_int error = TEST_FAIL; + cl_bool api_status; + cl_bool compiler_status; + log_info("\n%s ...\n", test_macro_name.c_str()); + + if (!is_extension_available(deviceID, "cl_khr_integer_dot_product")) + { + supported = false; + return TEST_PASS; + } + + error = check_api_feature_info_capabilities< + cl_device_integer_dot_product_capabilities_khr>( + deviceID, context, api_status, + CL_DEVICE_INTEGER_DOT_PRODUCT_CAPABILITIES_KHR, + CL_DEVICE_INTEGER_DOT_PRODUCT_INPUT_4x8BIT_KHR); + if (error != CL_SUCCESS) + { + return error; + } + + error = check_compiler_feature_info(deviceID, context, test_macro_name, + compiler_status); + if (error != CL_SUCCESS) + { + return error; + } + + return feature_macro_verify_results(test_macro_name, api_status, + compiler_status, supported); +} + int test_feature_macro_int64(cl_device_id deviceID, cl_context context, std::string test_macro_name, cl_bool& supported) { @@ -686,15 +758,6 @@ int test_consistency_c_features_list(cl_device_id deviceID, sort(vec_to_cmp.begin(), vec_to_cmp.end()); sort(vec_device_feature_names.begin(), vec_device_feature_names.end()); - if (vec_device_feature_names == vec_to_cmp) - { - log_info("Comparison list of features - passed\n"); - } - else - { - log_info("Comparison list of features - failed\n"); - error = TEST_FAIL; - } log_info( "Supported features based on CL_DEVICE_OPENCL_C_FEATURES API query:\n"); for (auto each_f : vec_device_feature_names) @@ -703,11 +766,26 @@ int test_consistency_c_features_list(cl_device_id deviceID, } log_info("\nSupported features based on queries to API/compiler :\n"); + for (auto each_f : vec_to_cmp) { log_info("%s\n", each_f.c_str()); } + for (auto each_f : vec_to_cmp) + { + if (find(vec_device_feature_names.begin(), + vec_device_feature_names.end(), each_f) + == vec_device_feature_names.end()) + { + log_info("Comparison list of features - failed - missing %s\n", + each_f.c_str()); + return TEST_FAIL; + } + } + + log_info("Comparison list of features - passed\n"); + return error; } @@ -748,6 +826,8 @@ int test_features_macro(cl_device_id deviceID, cl_context context, NEW_FEATURE_MACRO_TEST(images); NEW_FEATURE_MACRO_TEST(fp64); NEW_FEATURE_MACRO_TEST(int64); + NEW_FEATURE_MACRO_TEST(integer_dot_product_input_4x8bit); + NEW_FEATURE_MACRO_TEST(integer_dot_product_input_4x8bit_packed); error |= test_consistency_c_features_list(deviceID, supported_features_vec); -- cgit v1.2.3 From 67ac6c8d2d1b2e8ee9d6b775be459759ec301bf9 Mon Sep 17 00:00:00 2001 From: Wenju He Date: Tue, 14 Jun 2022 23:51:39 +0800 Subject: Fix test_half async_work_group_copy arguments (#1298) (#1299) Workitems in the last workgroup calls async_work_group_copy with different argument values depending on 'adjust'. According to spec, this results in undefined values. --- test_conformance/half/Test_vStoreHalf.cpp | 35 ++++++++++++++++++++++++------- 1 file changed, 28 insertions(+), 7 deletions(-) diff --git a/test_conformance/half/Test_vStoreHalf.cpp b/test_conformance/half/Test_vStoreHalf.cpp index 85824a9f..3ca5920b 100644 --- a/test_conformance/half/Test_vStoreHalf.cpp +++ b/test_conformance/half/Test_vStoreHalf.cpp @@ -422,7 +422,9 @@ int Test_vStoreHalf_private( cl_device_id device, f2h referenceFunc, d2h doubleR "__kernel void test( __global float *p, __global half *f,\n" " uint extra_last_thread )\n" "{\n" - " __local ushort data[3*(", local_buf_size, "+1)];\n" + " __local ushort data[3*(", + local_buf_size, + "+1)];\n" " size_t i = get_global_id(0);\n" " size_t lid = get_local_id(0);\n" " size_t last_i = get_global_size(0)-1;\n" @@ -432,9 +434,18 @@ int Test_vStoreHalf_private( cl_device_id device, f2h referenceFunc, d2h doubleR " if(last_i == i && extra_last_thread != 0) {\n" " adjust = 3-extra_last_thread;\n" " } " - " vstore_half3",roundName,"( vload3(i,p-adjust), lid, (__local half *)(&data[0]) );\n" + " vstore_half3", + roundName, + "( vload3(i,p-adjust), lid, (__local half *)(&data[0]) );\n" " barrier( CLK_LOCAL_MEM_FENCE ); \n" - " async_event = async_work_group_copy((__global ushort *)(f+3*(i-lid)), (__local ushort *)(&data[adjust]), lsize*3-adjust, 0);\n" // investigate later + " if (get_group_id(0) == (get_num_groups(0) - 1) &&\n" + " extra_last_thread != 0) {\n" + " adjust = 3-extra_last_thread;\n" + " }\n" + " async_event = async_work_group_copy(\n" + " (__global ushort*)(f+3*(i-lid)),\n" + " (__local ushort *)(&data[adjust]),\n" + " lsize*3-adjust, 0);\n" // investigate later " wait_group_events(1, &async_event);\n" "}\n" }; @@ -524,7 +535,9 @@ int Test_vStoreHalf_private( cl_device_id device, f2h referenceFunc, d2h doubleR "__kernel void test( __global double *p, __global half *f,\n" " uint extra_last_thread )\n" "{\n" - " __local ushort data[3*(", local_buf_size, "+1)];\n" + " __local ushort data[3*(", + local_buf_size, + "+1)];\n" " size_t i = get_global_id(0);\n" " size_t lid = get_local_id(0);\n" " size_t last_i = get_global_size(0)-1;\n" @@ -534,15 +547,23 @@ int Test_vStoreHalf_private( cl_device_id device, f2h referenceFunc, d2h doubleR " if(last_i == i && extra_last_thread != 0) {\n" " adjust = 3-extra_last_thread;\n" " }\n " - " vstore_half3",roundName,"( vload3(i,p-adjust), lid, (__local half *)(&data[0]) );\n" + " vstore_half3", + roundName, + "( vload3(i,p-adjust), lid, (__local half *)(&data[0]) );\n" " barrier( CLK_LOCAL_MEM_FENCE ); \n" - " async_event = async_work_group_copy((__global ushort *)(f+3*(i-lid)), (__local ushort *)(&data[adjust]), lsize*3-adjust, 0);\n" // investigate later + " if (get_group_id(0) == (get_num_groups(0) - 1) &&\n" + " extra_last_thread != 0) {\n" + " adjust = 3-extra_last_thread;\n" + " }\n" + " async_event = async_work_group_copy(\n" + " (__global ushort *)(f+3*(i-lid)),\n" + " (__local ushort *)(&data[adjust]),\n" + " lsize*3-adjust, 0);\n" // investigate later " wait_group_events(1, &async_event);\n" "}\n" }; - if(g_arrVecSizes[vectorSize] == 3) { programs[vectorSize][0] = MakeProgram( device, source_v3, sizeof(source_v3) / sizeof( source_v3[0]) ); } else { -- cgit v1.2.3 From 0b7118186af0f146dd044909c677bed7869c1363 Mon Sep 17 00:00:00 2001 From: Nikhil Joshi Date: Tue, 21 Jun 2022 21:51:47 +0530 Subject: Initial CTS for external semaphore and memory extensions (#1390) * Initial CTS for external sharing extensions Initial set of tests for below extensions with Vulkan as producer 1. cl_khr_external_memory 2. cl_khr_external_memory_win32 3. cl_khr_external_memory_opaque_fd 4. cl_khr_external_semaphore 5. cl_khr_external_semaphore_win32 6. cl_khr_external_semaphore_opaque_fd * Updates to external sharing CTS Updates to external sharing CTS 1. Fix some build issues to remove unnecessary, non-existent files 2. Add new tests for platform and device queries. 3. Some added checks for VK Support. * Update CTS build script for Vulkan Headers Update CTS build to clone Vulkan Headers repo and pass it to CTS build in preparation for external memory and semaphore tests * Fix Vulkan header path Fix Vulkan header include path. * Add Vulkan loader dependency Vulkan loader is required to build test_vulkan of OpenCL-CTS. Clone and build Vulkan loader as prerequisite to OpenCL-CTS. * Fix Vulkan loader path in test_vulkan Remove arch/os suffix in Vulkan loader path to match vulkan loader repo build. * Fix warnings around getHandle API. Return type of getHandle is defined differently based on win or linux builds. Use appropriate guards when using API at other places. While at it remove duplicate definition of ARRAY_SIZE. * Use ARRAY_SIZE in harness. Use already defined ARRAY_SIZE macro from test_harness. * Fix build issues for test_vulkan Fix build issues for test_vulkan 1. Add cl_ext.h in common files 2. Replace cl_mem_properties_khr with cl_mem_properties 3. Replace cl_external_mem_handle_type_khr with cl_external_memory_handle_type_khr 4. Type-cast malloc as required. * Fix code formatting. Fix code formatting to get CTS CI builds clean. * Fix formatting fixes part-2 Another set of formatting fixes. * Fix code formatting part-3 Some more code formatting fixes. * Fix code formatting issues part-4 More code formatting fixes. * Formatting fixes part-5 Some more formatting fixes * Fix formatting part-6 More formatting fixes continued. * Code formatting fixes part-7 Code formatting fixes for image * Code formatting fixes part-8 Fixes for platform and device query tests. * Code formatting fixes part-9 More formatting fixes for vulkan_wrapper * Code formatting fixes part-10 More fixes to wrapper header * Code formatting fixes part-11 Formatting fixes for api_list * Code formatting fixes part-12 Formatting fixes for api_list_map. * Code formatting changes part-13 Code formatting changes for utility. * Code formatting fixes part-15 Formatting fixes for wrapper. * Misc Code formatting fixes Some more misc code formatting fixes. * Fix build breaks due to code formatting Fix build issues arised with recent code formatting issues. * Fix presubmit script after merge Fix presubmit script after merge conflicts. * Fix Vulkan loader build in presubmit script. Use cmake ninja and appropriate toolchain for Vulkan loader dependency to fix linking issue on arm/aarch64. * Use static array sizes Use static array sizes to fix windows builds. * Some left-out formatting fixes. Fix remaining formatting issues. * Fix harness header path Fix harness header path While at it, remove Misc and test pragma. * Add/Fix license information Add Khronos License info for test_vulkan. Replace Apple license with Khronos as applicable. * Fix headers for Mac OSX builds. Use appropriate headers for Mac OSX builds * Fix Mac OSX builds. Use appropriate headers for Mac OSX builds. Also, fix some build issues due to type-casting. * Fix new code formatting issues Fix new code formatting issues with recent MacOS fixes. * Add back missing case statement Add back missing case statement that was accidentally removed. * Disable USE_GAS for Vulkan Loader build. Disable USE_GAS for Vulkan Loader build to fix aarch64 build. * Update Copyright Year. Update Copyright Year to 2022 for external memory sharing tests. * Android specific fixes Android specific fixes to external sharing tests. --- presubmit.sh | 18 +- test_common/harness/kernelHelpers.cpp | 12 +- test_conformance/CMakeLists.txt | 1 + test_conformance/subgroups/subhelpers.h | 1 + test_conformance/vulkan/CMakeLists.txt | 50 + test_conformance/vulkan/main.cpp | 344 ++++ test_conformance/vulkan/procs.h | 38 + .../vulkan/test_vulkan_api_consistency.cpp | 568 ++++++ .../vulkan/test_vulkan_interop_buffer.cpp | 1808 +++++++++++++++++ .../vulkan/test_vulkan_interop_image.cpp | 1648 ++++++++++++++++ .../vulkan/test_vulkan_platform_device_info.cpp | 146 ++ .../opencl_vulkan_wrapper.cpp | 818 ++++++++ .../opencl_vulkan_wrapper.hpp | 129 ++ .../vulkan_interop_common/vulkan_api_list.hpp | 195 ++ .../vulkan_interop_common.cpp | 22 + .../vulkan_interop_common.hpp | 50 + .../vulkan_interop_common/vulkan_list_map.cpp | 424 ++++ .../vulkan_interop_common/vulkan_list_map.hpp | 389 ++++ .../vulkan_interop_common/vulkan_utility.cpp | 693 +++++++ .../vulkan_interop_common/vulkan_utility.hpp | 69 + .../vulkan_interop_common/vulkan_wrapper.cpp | 2075 ++++++++++++++++++++ .../vulkan_interop_common/vulkan_wrapper.hpp | 579 ++++++ .../vulkan_interop_common/vulkan_wrapper_types.hpp | 463 +++++ 23 files changed, 10535 insertions(+), 5 deletions(-) create mode 100644 test_conformance/vulkan/CMakeLists.txt create mode 100644 test_conformance/vulkan/main.cpp create mode 100644 test_conformance/vulkan/procs.h create mode 100644 test_conformance/vulkan/test_vulkan_api_consistency.cpp create mode 100644 test_conformance/vulkan/test_vulkan_interop_buffer.cpp create mode 100644 test_conformance/vulkan/test_vulkan_interop_image.cpp create mode 100644 test_conformance/vulkan/test_vulkan_platform_device_info.cpp create mode 100644 test_conformance/vulkan/vulkan_interop_common/opencl_vulkan_wrapper.cpp create mode 100644 test_conformance/vulkan/vulkan_interop_common/opencl_vulkan_wrapper.hpp create mode 100644 test_conformance/vulkan/vulkan_interop_common/vulkan_api_list.hpp create mode 100644 test_conformance/vulkan/vulkan_interop_common/vulkan_interop_common.cpp create mode 100644 test_conformance/vulkan/vulkan_interop_common/vulkan_interop_common.hpp create mode 100644 test_conformance/vulkan/vulkan_interop_common/vulkan_list_map.cpp create mode 100644 test_conformance/vulkan/vulkan_interop_common/vulkan_list_map.hpp create mode 100644 test_conformance/vulkan/vulkan_interop_common/vulkan_utility.cpp create mode 100644 test_conformance/vulkan/vulkan_interop_common/vulkan_utility.hpp create mode 100644 test_conformance/vulkan/vulkan_interop_common/vulkan_wrapper.cpp create mode 100644 test_conformance/vulkan/vulkan_interop_common/vulkan_wrapper.hpp create mode 100644 test_conformance/vulkan/vulkan_interop_common/vulkan_wrapper_types.hpp diff --git a/presubmit.sh b/presubmit.sh index b63a4373..6c3a293e 100755 --- a/presubmit.sh +++ b/presubmit.sh @@ -40,6 +40,9 @@ if [[ ( ${JOB_ARCHITECTURE} == "" && ${JOB_ENABLE_GL} == "1" ) ]]; then BUILD_OPENGL_TEST="ON" fi +#Vulkan Headers +git clone https://github.com/KhronosGroup/Vulkan-Headers.git + # Get and build loader git clone https://github.com/KhronosGroup/OpenCL-ICD-Loader.git cd ${TOP}/OpenCL-ICD-Loader @@ -48,6 +51,16 @@ cd build cmake .. -G Ninja -DCMAKE_TOOLCHAIN_FILE=${TOOLCHAIN_FILE} -DOPENCL_ICD_LOADER_HEADERS_DIR=${TOP}/OpenCL-Headers/ cmake --build . -j2 --config Release +#Vulkan Loader +cd ${TOP} +git clone https://github.com/KhronosGroup/Vulkan-Loader.git +cd Vulkan-Loader +mkdir build +cd build +python3 ../scripts/update_deps.py +cmake .. -G Ninja -DCMAKE_TOOLCHAIN_FILE=${TOOLCHAIN_FILE} -DBUILD_WSI_XLIB_SUPPORT=OFF -DBUILD_WSI_XCB_SUPPORT=OFF -DBUILD_WSI_WAYLAND_SUPPORT=OFF -DUSE_GAS=OFF -C helper.cmake .. +cmake --build . -j2 --config Release + # Build CTS cd ${TOP} ls -l @@ -68,6 +81,9 @@ cmake .. -G Ninja \ -DCMAKE_RUNTIME_OUTPUT_DIRECTORY=./bin \ -DOPENCL_LIBRARIES="${CMAKE_OPENCL_LIBRARIES_OPTION}" \ -DUSE_CL_EXPERIMENTAL=ON \ - -DGL_IS_SUPPORTED=${BUILD_OPENGL_TEST} + -DGL_IS_SUPPORTED=${BUILD_OPENGL_TEST} \ + -DVULKAN_INCLUDE_DIR=${TOP}/Vulkan-Headers/include/ \ + -DVULKAN_LIB_DIR=${TOP}/Vulkan-Loader/build/loader/ cmake --build . -j3 --config Release + diff --git a/test_common/harness/kernelHelpers.cpp b/test_common/harness/kernelHelpers.cpp index 1d1f8d8c..13ebcbc9 100644 --- a/test_common/harness/kernelHelpers.cpp +++ b/test_common/harness/kernelHelpers.cpp @@ -1661,8 +1661,10 @@ Version get_device_latest_cl_c_version(cl_device_id device) Version max_supported_cl_c_version{}; for (const auto &name_version : name_versions) { - Version current_version{ CL_VERSION_MAJOR(name_version.version), - CL_VERSION_MINOR(name_version.version) }; + Version current_version{ + static_cast(CL_VERSION_MAJOR(name_version.version)), + static_cast(CL_VERSION_MINOR(name_version.version)) + }; max_supported_cl_c_version = (current_version > max_supported_cl_c_version) ? current_version @@ -1745,8 +1747,10 @@ bool device_supports_cl_c_version(cl_device_id device, Version version) for (const auto &name_version : name_versions) { - Version current_version{ CL_VERSION_MAJOR(name_version.version), - CL_VERSION_MINOR(name_version.version) }; + Version current_version{ + static_cast(CL_VERSION_MAJOR(name_version.version)), + static_cast(CL_VERSION_MINOR(name_version.version)) + }; if (current_version == version) { return true; diff --git a/test_conformance/CMakeLists.txt b/test_conformance/CMakeLists.txt index 363ece86..f9514f1e 100644 --- a/test_conformance/CMakeLists.txt +++ b/test_conformance/CMakeLists.txt @@ -52,6 +52,7 @@ add_subdirectory( pipes ) add_subdirectory( device_timer ) add_subdirectory( spirv_new ) add_subdirectory( spir ) +add_subdirectory( vulkan ) file(GLOB CSV_FILES "opencl_conformance_tests_*.csv") diff --git a/test_conformance/subgroups/subhelpers.h b/test_conformance/subgroups/subhelpers.h index c73027dc..12704db8 100644 --- a/test_conformance/subgroups/subhelpers.h +++ b/test_conformance/subgroups/subhelpers.h @@ -55,6 +55,7 @@ static cl_uint4 bs128_to_cl_uint4(bs128 v) struct WorkGroupParams { + WorkGroupParams(size_t gws, size_t lws, int dm_arg = -1, int cs_arg = -1) : global_workgroup_size(gws), local_workgroup_size(lws), divergence_mask_arg(dm_arg), cluster_size_arg(cs_arg) diff --git a/test_conformance/vulkan/CMakeLists.txt b/test_conformance/vulkan/CMakeLists.txt new file mode 100644 index 00000000..4f43172a --- /dev/null +++ b/test_conformance/vulkan/CMakeLists.txt @@ -0,0 +1,50 @@ +set (MODULE_NAME VULKAN) + +if(WIN32) + list(APPEND CLConform_LIBRARIES vulkan-1) +else(WIN32) + list(APPEND CLConform_LIBRARIES vulkan dl) +endif(WIN32) +set(CMAKE_CXX_FLAGS "-fpermissive") +if(WIN32) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DVK_USE_PLATFORM_WIN32_KHR") +endif(WIN32) + +set (CLConform_VULKAN_LIBRARIES_DIR "${VULKAN_LIB_DIR}") + +link_directories(${CLConform_VULKAN_LIBRARIES_DIR}) + +list(APPEND CLConform_INCLUDE_DIR ${VULKAN_INCLUDE_DIR}) + +include_directories(${CMAKE_CURRENT_SOURCE_DIR}) + +include_directories (${CLConform_INCLUDE_DIR}) + +set (${MODULE_NAME}_SOURCES + main.cpp + test_vulkan_interop_buffer.cpp + test_vulkan_interop_image.cpp + test_vulkan_api_consistency.cpp + test_vulkan_platform_device_info.cpp + vulkan_interop_common/vulkan_wrapper.cpp + vulkan_interop_common/vulkan_interop_common.cpp + vulkan_interop_common/opencl_vulkan_wrapper.cpp + vulkan_interop_common/vulkan_utility.cpp + vulkan_interop_common/vulkan_list_map.cpp + ../../test_common/harness/genericThread.cpp + ../../test_common/harness/errorHelpers.cpp + ../../test_common/harness/testHarness.cpp + ../../test_common/harness/kernelHelpers.cpp + ../../test_common/harness/mt19937.cpp + ../../test_common/harness/msvc9.c + ../../test_common/harness/parseParameters.cpp + ../../test_common/harness/deviceInfo.cpp + ../../test_common/harness/crc32.cpp + ) + +set_source_files_properties( + ${${MODULE_NAME}_SOURCES} + PROPERTIES LANGUAGE CXX) +include_directories("./vulkan_interop_common/") + +include(../CMakeCommon.txt) diff --git a/test_conformance/vulkan/main.cpp b/test_conformance/vulkan/main.cpp new file mode 100644 index 00000000..6cbde5cc --- /dev/null +++ b/test_conformance/vulkan/main.cpp @@ -0,0 +1,344 @@ +// +// Copyright (c) 2022 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#include +#include + +#if !defined(_WIN32) +#include +#endif + +#include +#include + +#if !defined(__APPLE__) +#include +#else +#include +#endif + + +#include "procs.h" +#include "harness/testHarness.h" +#include "harness/parseParameters.h" +#include "harness/deviceInfo.h" + +#if !defined(_WIN32) +#include +#endif +#include +#include + +#define BUFFERSIZE 3000 + +static void params_reset() +{ + numCQ = 1; + multiImport = false; + multiCtx = false; +} + +extern int test_buffer_common(cl_device_id device_, cl_context context_, + cl_command_queue queue_, int numElements_); +extern int test_image_common(cl_device_id device_, cl_context context_, + cl_command_queue queue_, int numElements_); + +int test_buffer_single_queue(cl_device_id device_, cl_context context_, + cl_command_queue queue_, int numElements_) +{ + params_reset(); + log_info("RUNNING TEST WITH ONE QUEUE...... \n\n"); + return test_buffer_common(device_, context_, queue_, numElements_); +} +int test_buffer_multiple_queue(cl_device_id device_, cl_context context_, + cl_command_queue queue_, int numElements_) +{ + params_reset(); + numCQ = 2; + log_info("RUNNING TEST WITH TWO QUEUE...... \n\n"); + return test_buffer_common(device_, context_, queue_, numElements_); +} +int test_buffer_multiImport_sameCtx(cl_device_id device_, cl_context context_, + cl_command_queue queue_, int numElements_) +{ + params_reset(); + multiImport = true; + log_info("RUNNING TEST WITH MULTIPLE DEVICE MEMORY IMPORT " + "IN SAME CONTEXT...... \n\n"); + return test_buffer_common(device_, context_, queue_, numElements_); +} +int test_buffer_multiImport_diffCtx(cl_device_id device_, cl_context context_, + cl_command_queue queue_, int numElements_) +{ + params_reset(); + multiImport = true; + multiCtx = true; + log_info("RUNNING TEST WITH MULTIPLE DEVICE MEMORY IMPORT " + "IN DIFFERENT CONTEXT...... \n\n"); + return test_buffer_common(device_, context_, queue_, numElements_); +} +int test_image_single_queue(cl_device_id device_, cl_context context_, + cl_command_queue queue_, int numElements_) +{ + params_reset(); + log_info("RUNNING TEST WITH ONE QUEUE...... \n\n"); + return test_image_common(device_, context_, queue_, numElements_); +} +int test_image_multiple_queue(cl_device_id device_, cl_context context_, + cl_command_queue queue_, int numElements_) +{ + params_reset(); + numCQ = 2; + log_info("RUNNING TEST WITH TWO QUEUE...... \n\n"); + return test_image_common(device_, context_, queue_, numElements_); +} + +test_definition test_list[] = { ADD_TEST(buffer_single_queue), + ADD_TEST(buffer_multiple_queue), + ADD_TEST(buffer_multiImport_sameCtx), + ADD_TEST(buffer_multiImport_diffCtx), + ADD_TEST(image_single_queue), + ADD_TEST(image_multiple_queue), + ADD_TEST(consistency_external_buffer), + ADD_TEST(consistency_external_image), + ADD_TEST(consistency_external_semaphore), + ADD_TEST(platform_info), + ADD_TEST(device_info) }; + +const int test_num = ARRAY_SIZE(test_list); + +cl_device_type gDeviceType = CL_DEVICE_TYPE_DEFAULT; +char *choosen_platform_name = NULL; +cl_platform_id platform = NULL; +cl_int choosen_platform_index = -1; +char platform_name[1024] = ""; +cl_platform_id select_platform = NULL; +char *extensions = NULL; +size_t extensionSize = 0; +cl_uint num_devices = 0; +cl_uint device_no = 0; +cl_device_id *devices; +const size_t bufsize = BUFFERSIZE; +char buf[BUFFERSIZE]; +cl_uchar uuid[CL_UUID_SIZE_KHR]; +VulkanDevice vkDevice; +unsigned int numCQ; +bool multiImport; +bool multiCtx; +bool debug_trace = false; +bool useSingleImageKernel = false; +bool useDeviceLocal = false; +bool disableNTHandleType = false; +bool enableOffset = false; +bool non_dedicated = false; + +static void printUsage(const char *execName) +{ + const char *p = strrchr(execName, '/'); + if (p != NULL) execName = p + 1; + + log_info("Usage: %s [test_names] [options]\n", execName); + log_info("Test names:\n"); + for (int i = 0; i < test_num; i++) + { + log_info("\t%s\n", test_list[i].name); + } + log_info("\n"); + log_info("Options:\n"); + log_info("\t--debug_trace - Enables additional debug info logging\n"); + log_info("\t--non_dedicated - Choose dedicated Vs. non_dedicated \n"); +} + +size_t parseParams(int argc, const char *argv[], const char **argList) +{ + size_t argCount = 1; + for (int i = 1; i < argc; i++) + { + if (argv[i] == NULL) break; + if (argv[i][0] == '-') + { + if (!strcmp(argv[i], "--debug_trace")) + { + debug_trace = true; + } + if (!strcmp(argv[i], "--useSingleImageKernel")) + { + useSingleImageKernel = true; + } + if (!strcmp(argv[i], "--useDeviceLocal")) + { + useDeviceLocal = true; + } + if (!strcmp(argv[i], "--disableNTHandleType")) + { + disableNTHandleType = true; + } + if (!strcmp(argv[i], "--enableOffset")) + { + enableOffset = true; + } + if (!strcmp(argv[i], "--non_dedicated")) + { + non_dedicated = true; + } + if (strcmp(argv[i], "-h") == 0) + { + printUsage(argv[0]); + argCount = 0; // Returning argCount=0 to assert error in main() + break; + } + } + else + { + argList[argCount] = argv[i]; + argCount++; + } + } + return argCount; +} + +int main(int argc, const char *argv[]) +{ + int errNum = 0; + + test_start(); + params_reset(); + + if (!checkVkSupport()) + { + log_info("Vulkan supported GPU not found \n"); + return 0; + } + + cl_device_type requestedDeviceType = CL_DEVICE_TYPE_GPU; + char *force_cpu = getenv("CL_DEVICE_TYPE"); + if (force_cpu != NULL) + { + if (strcmp(force_cpu, "gpu") == 0 + || strcmp(force_cpu, "CL_DEVICE_TYPE_GPU") == 0) + requestedDeviceType = CL_DEVICE_TYPE_GPU; + else if (strcmp(force_cpu, "cpu") == 0 + || strcmp(force_cpu, "CL_DEVICE_TYPE_CPU") == 0) + requestedDeviceType = CL_DEVICE_TYPE_CPU; + else if (strcmp(force_cpu, "accelerator") == 0 + || strcmp(force_cpu, "CL_DEVICE_TYPE_ACCELERATOR") == 0) + requestedDeviceType = CL_DEVICE_TYPE_ACCELERATOR; + else if (strcmp(force_cpu, "CL_DEVICE_TYPE_DEFAULT") == 0) + requestedDeviceType = CL_DEVICE_TYPE_DEFAULT; + } + + if (requestedDeviceType != CL_DEVICE_TYPE_GPU) + { + log_info("Vulkan tests can only run on a GPU device.\n"); + return 0; + } + gDeviceType = CL_DEVICE_TYPE_GPU; + + const char **argList = (const char **)calloc(argc, sizeof(char *)); + size_t argCount = parseParams(argc, argv, argList); + if (argCount == 0) return 0; + // get the platform ID + errNum = clGetPlatformIDs(1, &platform, NULL); + if (errNum != CL_SUCCESS) + { + print_error(errNum, "Error: Failed to get platform\n"); + return errNum; + } + + errNum = + clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 0, NULL, &num_devices); + if (CL_SUCCESS != errNum) + { + print_error(errNum, "clGetDeviceIDs failed in returning of devices\n"); + return errNum; + } + devices = (cl_device_id *)malloc(num_devices * sizeof(cl_device_id)); + if (NULL == devices) + { + print_error(errNum, "Unable to allocate memory for devices\n"); + return CL_OUT_OF_HOST_MEMORY; + } + errNum = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, num_devices, devices, + NULL); + if (CL_SUCCESS != errNum) + { + print_error(errNum, "Failed to get deviceID.\n"); + return errNum; + } + for (device_no = 0; device_no < num_devices; device_no++) + { + errNum = clGetDeviceInfo(devices[device_no], CL_DEVICE_EXTENSIONS, 0, + NULL, &extensionSize); + if (CL_SUCCESS != errNum) + { + log_error("Error in clGetDeviceInfo for getting " + "device_extension size....\n"); + return errNum; + } + extensions = (char *)malloc(extensionSize); + if (NULL == extensions) + { + log_error("Unable to allocate memory for extensions\n"); + return CL_OUT_OF_HOST_MEMORY; + } + errNum = + clGetDeviceInfo(devices[device_no], CL_DEVICE_EXTENSIONS, + extensionSize, extensions, NULL /*&extensionSize*/); + if (CL_SUCCESS != errNum) + { + print_error(errNum, + "Error in clGetDeviceInfo for getting " + "device_extension\n"); + return errNum; + } + errNum = clGetDeviceInfo(devices[device_no], CL_DEVICE_UUID_KHR, + CL_UUID_SIZE_KHR, uuid, &extensionSize); + if (CL_SUCCESS != errNum) + { + print_error(errNum, "clGetDeviceInfo failed with error\n "); + return errNum; + } + errNum = + memcmp(uuid, vkDevice.getPhysicalDevice().getUUID(), VK_UUID_SIZE); + if (errNum == 0) + { + break; + } + } + if (device_no >= num_devices) + { + fprintf(stderr, + "OpenCL error: " + "No Vulkan-OpenCL Interop capable GPU found.\n"); + } + if (!(is_extension_available(devices[device_no], "cl_khr_external_memory") + && is_extension_available(devices[device_no], + "cl_khr_external_semaphore"))) + { + log_info("Device does not support cl_khr_external_memory " + "or cl_khr_external_semaphore\n"); + log_info(" TEST SKIPPED\n"); + return CL_SUCCESS; + } + init_cl_vk_ext(platform); + + // Execute tests. + // Note: don't use the entire harness, because we have a different way of + // obtaining the device (via the context) + errNum = parseAndCallCommandLineTests(argCount, argList, devices[device_no], + test_num, test_list, true, 0, 1024); + return errNum; +} diff --git a/test_conformance/vulkan/procs.h b/test_conformance/vulkan/procs.h new file mode 100644 index 00000000..37bf7869 --- /dev/null +++ b/test_conformance/vulkan/procs.h @@ -0,0 +1,38 @@ +// +// Copyright (c) 2022 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#include "harness/mt19937.h" + +extern int test_vulkan_interop_buffer(cl_device_id device, cl_context context, + cl_command_queue queue, int num_elements); +extern int test_vulkan_interop_image(cl_device_id device, cl_context context, + cl_command_queue queue, int num_elements); +extern int test_consistency_external_buffer(cl_device_id device, + cl_context context, + cl_command_queue queue, + int num_elements); +extern int test_consistency_external_image(cl_device_id device, + cl_context context, + cl_command_queue queue, + int num_elements); +extern int test_consistency_external_semaphore(cl_device_id device, + cl_context context, + cl_command_queue queue, + int num_elements); +extern int test_platform_info(cl_device_id device, cl_context context, + cl_command_queue queue, int num_elements); +extern int test_device_info(cl_device_id device, cl_context context, + cl_command_queue queue, int num_elements); diff --git a/test_conformance/vulkan/test_vulkan_api_consistency.cpp b/test_conformance/vulkan/test_vulkan_api_consistency.cpp new file mode 100644 index 00000000..2987418f --- /dev/null +++ b/test_conformance/vulkan/test_vulkan_api_consistency.cpp @@ -0,0 +1,568 @@ +// +// Copyright (c) 2022 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#include +#include +#include +#if !defined(__APPLE__) +#include +#include +#else +#include +#include +#endif + +#include +#include +#include +#include +#include "harness/testHarness.h" +#include "harness/typeWrappers.h" +#include "harness/deviceInfo.h" + +int test_consistency_external_buffer(cl_device_id deviceID, cl_context _context, + cl_command_queue _queue, int num_elements) +{ + cl_int errNum; + VulkanDevice vkDevice; + // Context and command queue creation + cl_platform_id platform = NULL; + cl_context context = NULL; + cl_command_queue cmd_queue = NULL; + + cl_context_properties contextProperties[] = { CL_CONTEXT_PLATFORM, 0, 0 }; + errNum = clGetPlatformIDs(1, &platform, NULL); + test_error(errNum, "Failed to get platform Id"); + + contextProperties[1] = (cl_context_properties)platform; + + context = clCreateContextFromType(contextProperties, CL_DEVICE_TYPE_GPU, + NULL, NULL, &errNum); + test_error(errNum, "Unable to create context with properties"); + + cmd_queue = clCreateCommandQueue(context, deviceID, 0, &errNum); + test_error(errNum, "Unable to create command queue"); + + uint32_t bufferSize = 32; + cl_device_id devList[] = { deviceID, NULL }; + +#ifdef _WIN32 + if (!is_extension_available(devList[0], "cl_khr_external_memory_win32")) + { + throw std::runtime_error("Device does not support " + "cl_khr_external_memory_win32 extension \n"); + } +#else + if (!is_extension_available(devList[0], "cl_khr_external_memory_opaque_fd")) + { + throw std::runtime_error( + "Device does not support " + "cl_khr_external_memory_opaque_fd extension \n"); + } +#endif + + VulkanExternalMemoryHandleType vkExternalMemoryHandleType = + getSupportedVulkanExternalMemoryHandleTypeList()[0]; + + VulkanBuffer vkDummyBuffer(vkDevice, 4 * 1024, vkExternalMemoryHandleType); + const VulkanMemoryTypeList& memoryTypeList = + vkDummyBuffer.getMemoryTypeList(); + + VulkanDeviceMemory* vkDeviceMem = new VulkanDeviceMemory( + vkDevice, bufferSize, memoryTypeList[0], vkExternalMemoryHandleType); + VulkanBufferList vkBufferList(1, vkDevice, bufferSize, + vkExternalMemoryHandleType); + + vkDeviceMem->bindBuffer(vkBufferList[0], 0); + + void* handle = NULL; + int fd; + + std::vector extMemProperties{ + (cl_mem_properties)CL_DEVICE_HANDLE_LIST_KHR, + (cl_mem_properties)devList[0], + (cl_mem_properties)CL_DEVICE_HANDLE_LIST_END_KHR, + }; + cl_external_memory_handle_type_khr type; + switch (vkExternalMemoryHandleType) + { +#ifdef _WIN32 + case VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_NT: + handle = vkDeviceMem->getHandle(vkExternalMemoryHandleType); + type = CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_WIN32_KHR; + errNum = check_external_memory_handle_type(devList[0], type); + extMemProperties.push_back((cl_mem_properties)type); + extMemProperties.push_back((cl_mem_properties)handle); + break; + case VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT: + handle = vkDeviceMem->getHandle(vkExternalMemoryHandleType); + type = CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_WIN32_KMT_KHR; + errNum = check_external_memory_handle_type(devList[0], type); + extMemProperties.push_back((cl_mem_properties)type); + extMemProperties.push_back((cl_mem_properties)handle); + break; +#else + case VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD: + fd = (int)vkDeviceMem->getHandle(vkExternalMemoryHandleType); + type = CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_FD_KHR; + errNum = check_external_memory_handle_type(devList[0], type); + extMemProperties.push_back((cl_mem_properties)type); + extMemProperties.push_back((cl_mem_properties)fd); + break; +#endif + default: + errNum = TEST_FAIL; + log_error("Unsupported external memory handle type \n"); + break; + } + if (errNum != CL_SUCCESS) + { + log_error("Checks failed for " + "CL_DEVICE_EXTERNAL_MEMORY_IMPORT_HANDLE_TYPES_KHR\n"); + return TEST_FAIL; + } + extMemProperties.push_back(0); + + clMemWrapper buffer; + + // Passing NULL properties and a valid extMem_desc size + buffer = clCreateBufferWithProperties(context, NULL, 1, bufferSize, NULL, + &errNum); + test_error(errNum, "Unable to create buffer with NULL properties"); + + buffer.reset(); + + // Passing valid extMemProperties and buffersize + buffer = clCreateBufferWithProperties(context, extMemProperties.data(), 1, + bufferSize, NULL, &errNum); + test_error(errNum, "Unable to create buffer with Properties"); + + buffer.reset(); + + // Not passing external memory handle + std::vector extMemProperties2{ +#ifdef _WIN32 + (cl_mem_properties)type, + NULL, // Passing NULL handle +#else + (cl_mem_properties)type, + (cl_mem_properties)-64, // Passing random invalid fd +#endif + (cl_mem_properties)CL_DEVICE_HANDLE_LIST_KHR, + (cl_mem_properties)devList[0], + (cl_mem_properties)CL_DEVICE_HANDLE_LIST_END_KHR, + 0 + }; + buffer = clCreateBufferWithProperties(context, extMemProperties2.data(), 1, + bufferSize, NULL, &errNum); + test_failure_error(errNum, CL_INVALID_VALUE, + "Should return CL_INVALID_VALUE "); + + buffer.reset(); + + // Passing extMem_desc size = 0 but valid memProperties, CL_INVALID_SIZE + // should be returned. + buffer = clCreateBufferWithProperties(context, extMemProperties.data(), 1, + 0, NULL, &errNum); + test_failure_error(errNum, CL_INVALID_BUFFER_SIZE, + "Should return CL_INVALID_BUFFER_SIZE"); + + return TEST_PASS; +} + +int test_consistency_external_image(cl_device_id deviceID, cl_context _context, + cl_command_queue _queue, int num_elements) +{ + cl_int errNum; + VulkanDevice vkDevice; + + // Context and command queue creation + cl_platform_id platform = NULL; + cl_context context = NULL; + cl_command_queue cmd_queue = NULL; + + cl_context_properties contextProperties[] = { CL_CONTEXT_PLATFORM, 0, 0 }; + errNum = clGetPlatformIDs(1, &platform, NULL); + test_error(errNum, "Failed to get platform id"); + + contextProperties[1] = (cl_context_properties)platform; + + context = clCreateContextFromType(contextProperties, CL_DEVICE_TYPE_GPU, + NULL, NULL, &errNum); + test_error(errNum, "Unable to create context with properties"); + + cmd_queue = clCreateCommandQueue(context, deviceID, 0, &errNum); + test_error(errNum, "Unable to create command queue"); + + cl_device_id devList[] = { deviceID, NULL }; + +#ifdef _WIN32 + if (!is_extension_available(devList[0], "cl_khr_external_memory_win32")) + { + throw std::runtime_error("Device does not support" + "cl_khr_external_memory_win32 extension \n"); + } +#else + if (!is_extension_available(devList[0], "cl_khr_external_memory_opaque_fd")) + { + throw std::runtime_error( + "Device does not support cl_khr_external_memory_opaque_fd " + "extension \n"); + } +#endif + uint32_t width = 256; + uint32_t height = 16; + cl_image_desc image_desc; + memset(&image_desc, 0x0, sizeof(cl_image_desc)); + cl_image_format img_format = { 0 }; + + VulkanExternalMemoryHandleType vkExternalMemoryHandleType = + getSupportedVulkanExternalMemoryHandleTypeList()[0]; + VulkanImage2D* vkImage2D = + new VulkanImage2D(vkDevice, VULKAN_FORMAT_R8G8B8A8_UNORM, width, height, + 1, vkExternalMemoryHandleType); + + const VulkanMemoryTypeList& memoryTypeList = vkImage2D->getMemoryTypeList(); + uint64_t totalImageMemSize = vkImage2D->getSize(); + + log_info("Memory type index: %d\n", (uint32_t)memoryTypeList[0]); + log_info("Memory type property: %d\n", + memoryTypeList[0].getMemoryTypeProperty()); + log_info("Image size : %d\n", totalImageMemSize); + + VulkanDeviceMemory* vkDeviceMem = + new VulkanDeviceMemory(vkDevice, totalImageMemSize, memoryTypeList[0], + vkExternalMemoryHandleType); + vkDeviceMem->bindImage(*vkImage2D, 0); + + void* handle = NULL; + int fd; + std::vector extMemProperties{ + (cl_mem_properties)CL_DEVICE_HANDLE_LIST_KHR, + (cl_mem_properties)devList[0], + (cl_mem_properties)CL_DEVICE_HANDLE_LIST_END_KHR, + }; + switch (vkExternalMemoryHandleType) + { +#ifdef _WIN32 + case VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_NT: + handle = vkDeviceMem->getHandle(vkExternalMemoryHandleType); + errNum = check_external_memory_handle_type( + devList[0], CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_WIN32_KHR); + extMemProperties.push_back( + (cl_mem_properties)CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_WIN32_KHR); + extMemProperties.push_back((cl_mem_properties)handle); + break; + case VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT: + handle = vkDeviceMem->getHandle(vkExternalMemoryHandleType); + errNum = check_external_memory_handle_type( + devList[0], CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_WIN32_KMT_KHR); + extMemProperties.push_back( + (cl_mem_properties) + CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_WIN32_KMT_KHR); + extMemProperties.push_back((cl_mem_properties)handle); + break; +#else + case VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD: + fd = (int)vkDeviceMem->getHandle(vkExternalMemoryHandleType); + errNum = check_external_memory_handle_type( + devList[0], CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_FD_KHR); + extMemProperties.push_back( + (cl_mem_properties)CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_FD_KHR); + extMemProperties.push_back((cl_mem_properties)fd); + break; +#endif + default: + errNum = TEST_FAIL; + log_error("Unsupported external memory handle type \n"); + break; + } + if (errNum != CL_SUCCESS) + { + log_error("Checks failed for " + "CL_DEVICE_EXTERNAL_MEMORY_IMPORT_HANDLE_TYPES_KHR\n"); + return TEST_FAIL; + } + extMemProperties.push_back(0); + + const VkImageCreateInfo VulkanImageCreateInfo = + vkImage2D->getVkImageCreateInfo(); + + errNum = getCLImageInfoFromVkImageInfo( + &VulkanImageCreateInfo, totalImageMemSize, &img_format, &image_desc); + if (errNum != CL_SUCCESS) + { + log_error("getCLImageInfoFromVkImageInfo failed!!!"); + return TEST_FAIL; + } + + clMemWrapper image; + + // Pass valid properties, image_desc and image_format + image = clCreateImageWithProperties( + context, extMemProperties.data(), CL_MEM_READ_WRITE, &img_format, + &image_desc, NULL /* host_ptr */, &errNum); + test_error(errNum, "Unable to create Image with Properties"); + image.reset(); + + // Passing properties, image_desc and image_format all as NULL + image = clCreateImageWithProperties(context, NULL, CL_MEM_READ_WRITE, NULL, + NULL, NULL, &errNum); + test_failure_error( + errNum, CL_INVALID_IMAGE_DESCRIPTOR, + "Image creation must fail with CL_INVALID_IMAGE_DESCRIPTOR " + "when all are passed as NULL"); + + image.reset(); + + // Passing NULL properties and a valid image_format and image_desc + image = + clCreateImageWithProperties(context, NULL, CL_MEM_READ_WRITE, + &img_format, &image_desc, NULL, &errNum); + test_error(errNum, + "Unable to create image with NULL properties " + "with valid image format and image desc"); + + image.reset(); + + // Passing image_format as NULL + image = clCreateImageWithProperties(context, extMemProperties.data(), + CL_MEM_READ_WRITE, NULL, &image_desc, + NULL, &errNum); + test_failure_error(errNum, CL_INVALID_IMAGE_FORMAT_DESCRIPTOR, + "Image creation must fail with " + "CL_INVALID_IMAGE_FORMAT_DESCRIPTOR" + "when image desc passed as NULL"); + + image.reset(); + + // Passing image_desc as NULL + image = clCreateImageWithProperties(context, extMemProperties.data(), + CL_MEM_READ_WRITE, &img_format, NULL, + NULL, &errNum); + test_failure_error(errNum, CL_INVALID_IMAGE_DESCRIPTOR, + "Image creation must fail with " + "CL_INVALID_IMAGE_DESCRIPTOR " + "when image desc passed as NULL"); + image.reset(); + + return TEST_PASS; +} + +int test_consistency_external_semaphore(cl_device_id deviceID, + cl_context _context, + cl_command_queue _queue, + int num_elements) +{ + cl_int errNum; + VulkanDevice vkDevice; + // Context and command queue creation + cl_platform_id platform = NULL; + cl_context context = NULL; + cl_command_queue cmd_queue = NULL; + + errNum = clGetPlatformIDs(1, &platform, NULL); + test_error(errNum, "Failed to get platform Id"); + + cl_context_properties contextProperties[] = { CL_CONTEXT_PLATFORM, 0, 0 }; + + contextProperties[1] = (cl_context_properties)platform; + + context = clCreateContextFromType(contextProperties, CL_DEVICE_TYPE_GPU, + NULL, NULL, &errNum); + test_error(errNum, "Unable to create context with properties"); + + cmd_queue = clCreateCommandQueue(context, deviceID, 0, &errNum); + test_error(errNum, "Unable to create command queue"); + + cl_device_id devList[] = { deviceID, NULL }; + +#ifdef _WIN32 + if (!is_extension_available(devList[0], "cl_khr_external_semaphore_win32")) + { + throw std::runtime_error( + "Device does not support cl_khr_external_semaphore_win32 " + "extension \n"); + } +#else + if (!is_extension_available(devList[0], + "cl_khr_external_semaphore_opaque_fd")) + { + throw std::runtime_error( + "Device does not support " + "cl_khr_external_semaphore_opaque_fd extension \n"); + } +#endif + VulkanExternalSemaphoreHandleType vkExternalSemaphoreHandleType = + getSupportedVulkanExternalSemaphoreHandleTypeList()[0]; + VulkanSemaphore vkVk2Clsemaphore(vkDevice, vkExternalSemaphoreHandleType); + VulkanSemaphore vkCl2Vksemaphore(vkDevice, vkExternalSemaphoreHandleType); + cl_semaphore_khr clCl2Vksemaphore; + cl_semaphore_khr clVk2Clsemaphore; + + void* handle1 = NULL; + void* handle2 = NULL; + int fd1, fd2; + std::vector sema_props1{ + (cl_semaphore_properties_khr)CL_SEMAPHORE_TYPE_KHR, + (cl_semaphore_properties_khr)CL_SEMAPHORE_TYPE_BINARY_KHR, + }; + std::vector sema_props2{ + (cl_semaphore_properties_khr)CL_SEMAPHORE_TYPE_KHR, + (cl_semaphore_properties_khr)CL_SEMAPHORE_TYPE_BINARY_KHR, + }; + switch (vkExternalSemaphoreHandleType) + { +#ifdef _WIN32 + case VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_NT: + log_info(" Opaque NT handles are only supported on Windows\n"); + handle1 = vkVk2Clsemaphore.getHandle(vkExternalSemaphoreHandleType); + handle2 = vkCl2Vksemaphore.getHandle(vkExternalSemaphoreHandleType); + errNum = check_external_semaphore_handle_type( + devList[0], CL_SEMAPHORE_HANDLE_OPAQUE_WIN32_KHR); + sema_props1.push_back((cl_semaphore_properties_khr) + CL_SEMAPHORE_HANDLE_OPAQUE_WIN32_KHR); + sema_props1.push_back((cl_semaphore_properties_khr)handle1); + sema_props2.push_back((cl_semaphore_properties_khr) + CL_SEMAPHORE_HANDLE_OPAQUE_WIN32_KHR); + sema_props2.push_back((cl_semaphore_properties_khr)handle2); + break; + case VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT: + log_info(" Opaque D3DKMT handles are only supported on Windows\n"); + handle1 = vkVk2Clsemaphore.getHandle(vkExternalSemaphoreHandleType); + handle2 = vkCl2Vksemaphore.getHandle(vkExternalSemaphoreHandleType); + errNum = check_external_semaphore_handle_type( + devList[0], CL_SEMAPHORE_HANDLE_OPAQUE_WIN32_KMT_KHR); + sema_props1.push_back((cl_semaphore_properties_khr) + CL_SEMAPHORE_HANDLE_OPAQUE_WIN32_KMT_KHR); + sema_props1.push_back((cl_semaphore_properties_khr)handle1); + sema_props2.push_back((cl_semaphore_properties_khr) + CL_SEMAPHORE_HANDLE_OPAQUE_WIN32_KMT_KHR); + sema_props2.push_back((cl_semaphore_properties_khr)handle2); + break; +#else + case VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD: + log_info(" Opaque file descriptors are not supported on Windows\n"); + fd1 = + (int)vkVk2Clsemaphore.getHandle(vkExternalSemaphoreHandleType); + fd2 = + (int)vkCl2Vksemaphore.getHandle(vkExternalSemaphoreHandleType); + errNum = check_external_semaphore_handle_type( + devList[0], CL_SEMAPHORE_HANDLE_OPAQUE_FD_KHR); + sema_props1.push_back( + (cl_semaphore_properties_khr)CL_SEMAPHORE_HANDLE_OPAQUE_FD_KHR); + sema_props1.push_back((cl_semaphore_properties_khr)fd1); + sema_props2.push_back( + (cl_semaphore_properties_khr)CL_SEMAPHORE_HANDLE_OPAQUE_FD_KHR); + sema_props2.push_back((cl_semaphore_properties_khr)fd2); + break; +#endif + default: log_error("Unsupported external memory handle type\n"); break; + } + if (CL_SUCCESS != errNum) + { + throw std::runtime_error( + "Unsupported external sempahore handle type\n "); + } + sema_props1.push_back( + (cl_semaphore_properties_khr)CL_DEVICE_HANDLE_LIST_KHR); + sema_props1.push_back((cl_semaphore_properties_khr)devList[0]); + sema_props1.push_back( + (cl_semaphore_properties_khr)CL_DEVICE_HANDLE_LIST_END_KHR); + sema_props2.push_back( + (cl_semaphore_properties_khr)CL_DEVICE_HANDLE_LIST_KHR); + sema_props2.push_back((cl_semaphore_properties_khr)devList[0]); + sema_props2.push_back( + (cl_semaphore_properties_khr)CL_DEVICE_HANDLE_LIST_END_KHR); + sema_props1.push_back(0); + sema_props2.push_back(0); + + // Pass NULL properties + cl_semaphore_khr cl_ext_semaphore = + clCreateSemaphoreWithPropertiesKHRptr(context, NULL, &errNum); + test_failure_error(errNum, CL_INVALID_VALUE, + "Semaphore creation must fail with CL_INVALID_VALUE " + " when properties are passed as NULL"); + + + // Pass invalid semaphore object to wait + errNum = + clEnqueueWaitSemaphoresKHRptr(cmd_queue, 1, NULL, NULL, 0, NULL, NULL); + test_failure_error(errNum, CL_INVALID_VALUE, + "clEnqueueWaitSemaphoresKHR fails with CL_INVALID_VALUE " + "when invalid semaphore object is passed"); + + + // Pass invalid semaphore object to signal + errNum = clEnqueueSignalSemaphoresKHRptr(cmd_queue, 1, NULL, NULL, 0, NULL, + NULL); + test_failure_error( + errNum, CL_INVALID_VALUE, + "clEnqueueSignalSemaphoresKHR fails with CL_INVALID_VALUE" + "when invalid semaphore object is passed"); + + + // Create two semaphore objects + clVk2Clsemaphore = clCreateSemaphoreWithPropertiesKHRptr( + context, sema_props1.data(), &errNum); + test_error(errNum, + "Unable to create semaphore with valid semaphore properties"); + + clCl2Vksemaphore = clCreateSemaphoreWithPropertiesKHRptr( + context, sema_props2.data(), &errNum); + test_error(errNum, + "Unable to create semaphore with valid semaphore properties"); + + + // Call Signal twice consecutively + errNum = clEnqueueSignalSemaphoresKHRptr(cmd_queue, 1, &clVk2Clsemaphore, + NULL, 0, NULL, NULL); + test_error(errNum, "clEnqueueSignalSemaphoresKHRptr failed"); + + errNum = clEnqueueSignalSemaphoresKHRptr(cmd_queue, 1, &clCl2Vksemaphore, + NULL, 0, NULL, NULL); + test_error(errNum, + "clEnqueueSignalSemaphoresKHRptr failed for two " + "consecutive wait events"); + + + // Call Wait twice consecutively + errNum = clEnqueueWaitSemaphoresKHRptr(cmd_queue, 1, &clVk2Clsemaphore, + NULL, 0, NULL, NULL); + test_error(errNum, "clEnqueueWaitSemaphoresKHRptr failed"); + + errNum = clEnqueueWaitSemaphoresKHRptr(cmd_queue, 1, &clCl2Vksemaphore, + NULL, 0, NULL, NULL); + test_error(errNum, + "clEnqueueWaitSemaphoresKHRptr failed for two " + " consecutive wait events"); + + + // Pass invalid object to release call + errNum = clReleaseSemaphoreObjectKHRptr(NULL); + test_failure_error(errNum, CL_INVALID_VALUE, + "clReleaseSemaphoreObjectKHRptr fails with " + "CL_INVALID_VALUE when NULL semaphore object is passed"); + + // Release both semaphore objects + errNum = clReleaseSemaphoreObjectKHRptr(clVk2Clsemaphore); + test_error(errNum, "clReleaseSemaphoreObjectKHRptr failed"); + + errNum = clReleaseSemaphoreObjectKHRptr(clCl2Vksemaphore); + test_error(errNum, "clReleaseSemaphoreObjectKHRptr failed"); + + return TEST_PASS; +} diff --git a/test_conformance/vulkan/test_vulkan_interop_buffer.cpp b/test_conformance/vulkan/test_vulkan_interop_buffer.cpp new file mode 100644 index 00000000..7daf96de --- /dev/null +++ b/test_conformance/vulkan/test_vulkan_interop_buffer.cpp @@ -0,0 +1,1808 @@ +// +// Copyright (c) 2022 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#include +#include +#include +#include +#include +#include +#include +#include +#include "harness/errorHelpers.h" + +#define MAX_BUFFERS 5 +#define MAX_IMPORTS 5 +#define BUFFERSIZE 3000 +static cl_uchar uuid[CL_UUID_SIZE_KHR]; +static cl_device_id deviceId = NULL; + +namespace { +struct Params +{ + uint32_t numBuffers; + uint32_t bufferSize; + uint32_t interBufferOffset; +}; +} + +static const char *vkBufferShader = + "#version 450\n" + "#extension GL_ARB_separate_shader_objects : enable\n" + "#extension GL_NV_gpu_shader5 : enable\n" + "layout(binding = 0) buffer Params\n" + "{\n" + " uint32_t numBuffers;\n" + " uint32_t bufferSize;\n" + " uint32_t interBufferOffset;\n" + "};\n" + "layout(binding = 1) buffer Buffer\n" + "{\n" + " uint8_t ptr[];\n" + "} bufferPtrList[" STRING( + MAX_BUFFERS) "];\n" + "layout(local_size_x = 512) in;\n" + "void main() {\n" + " for (uint32_t bufIdx = 0; bufIdx < numBuffers;" + " bufIdx++) {\n" + " uint32_t ptrIdx = gl_GlobalInvocationID.x;\n" + " uint32_t limit = bufferSize;\n" + " while (ptrIdx < limit) {\n" + " bufferPtrList[bufIdx].ptr[ptrIdx]++;\n" + " ptrIdx += (gl_NumWorkGroups.x * " + "gl_WorkGroupSize.x);\n" + " }\n" + " }\n" + "}\n"; + +const char *kernel_text_numbuffer_1 = " \ +__kernel void clUpdateBuffer(int bufferSize, __global unsigned char *a) { \n\ + int gid = get_global_id(0); \n\ + if (gid < bufferSize) { \n\ + a[gid]++; \n\ + } \n\ +}"; + +const char *kernel_text_numbuffer_2 = " \ +__kernel void clUpdateBuffer(int bufferSize, __global unsigned char *a, __global unsigned char *b) { \n\ + int gid = get_global_id(0); \n\ + if (gid < bufferSize) { \n\ + a[gid]++; \n\ + b[gid]++;\n\ + } \n\ +}"; + +const char *kernel_text_numbuffer_4 = " \ +__kernel void clUpdateBuffer(int bufferSize, __global unsigned char *a, __global unsigned char *b, __global unsigned char *c, __global unsigned char *d) { \n\ + int gid = get_global_id(0); \n\ + if (gid < bufferSize) { \n\ + a[gid]++;\n\ + b[gid]++; \n\ + c[gid]++; \n\ + d[gid]++; \n\ + } \n\ +}"; + + +const char *kernel_text_verify = " \ +__kernel void checkKernel(__global unsigned char *ptr, int size, int expVal, __global unsigned char *err) \n\ +{ \n\ + int idx = get_global_id(0); \n\ + if ((idx < size) && (*err == 0)) { \n\ + if (ptr[idx] != expVal){ \n\ + *err = 1; \n\ + } \n\ + } \n\ +}"; + +int run_test_with_two_queue(cl_context &context, cl_command_queue &cmd_queue1, + cl_command_queue &cmd_queue2, cl_kernel *kernel, + cl_kernel &verify_kernel, VulkanDevice &vkDevice, + uint32_t numBuffers, uint32_t bufferSize) +{ + int err = CL_SUCCESS; + size_t global_work_size[1]; + uint8_t *error_2; + cl_mem error_1; + cl_kernel update_buffer_kernel; + cl_kernel kernel_cq; + clExternalSemaphore *clVk2CLExternalSemaphore = NULL; + clExternalSemaphore *clCl2VkExternalSemaphore = NULL; + const char *program_source_const = kernel_text_numbuffer_2; + size_t program_source_length = strlen(program_source_const); + cl_program program = clCreateProgramWithSource( + context, 1, &program_source_const, &program_source_length, &err); + err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL); + if (err != CL_SUCCESS) + { + print_error(err, "Error: Failed to build program \n"); + return err; + } + // create the kernel + kernel_cq = clCreateKernel(program, "clUpdateBuffer", &err); + if (err != CL_SUCCESS) + { + print_error(err, "clCreateKernel failed \n"); + return err; + } + + const std::vector + vkExternalMemoryHandleTypeList = + getSupportedVulkanExternalMemoryHandleTypeList(); + VulkanExternalSemaphoreHandleType vkExternalSemaphoreHandleType = + getSupportedVulkanExternalSemaphoreHandleTypeList()[0]; + VulkanSemaphore vkVk2CLSemaphore(vkDevice, vkExternalSemaphoreHandleType); + VulkanSemaphore vkCl2VkSemaphore(vkDevice, vkExternalSemaphoreHandleType); + + VulkanQueue &vkQueue = vkDevice.getQueue(); + + VulkanShaderModule vkBufferShaderModule(vkDevice, vkBufferShader); + VulkanDescriptorSetLayoutBindingList vkDescriptorSetLayoutBindingList( + MAX_BUFFERS + 1, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER); + VulkanDescriptorSetLayout vkDescriptorSetLayout( + vkDevice, vkDescriptorSetLayoutBindingList); + VulkanPipelineLayout vkPipelineLayout(vkDevice, vkDescriptorSetLayout); + VulkanComputePipeline vkComputePipeline(vkDevice, vkPipelineLayout, + vkBufferShaderModule); + + VulkanDescriptorPool vkDescriptorPool(vkDevice, + vkDescriptorSetLayoutBindingList); + VulkanDescriptorSet vkDescriptorSet(vkDevice, vkDescriptorPool, + vkDescriptorSetLayout); + + clVk2CLExternalSemaphore = new clExternalSemaphore( + vkVk2CLSemaphore, context, vkExternalSemaphoreHandleType, deviceId); + clCl2VkExternalSemaphore = new clExternalSemaphore( + vkCl2VkSemaphore, context, vkExternalSemaphoreHandleType, deviceId); + + const uint32_t maxIter = innerIterations; + VulkanCommandPool vkCommandPool(vkDevice); + VulkanCommandBuffer vkCommandBuffer(vkDevice, vkCommandPool); + + VulkanBuffer vkParamsBuffer(vkDevice, sizeof(Params)); + VulkanDeviceMemory vkParamsDeviceMemory( + vkDevice, vkParamsBuffer.getSize(), + getVulkanMemoryType(vkDevice, + VULKAN_MEMORY_TYPE_PROPERTY_HOST_VISIBLE_COHERENT)); + vkParamsDeviceMemory.bindBuffer(vkParamsBuffer); + std::vector vkBufferListDeviceMemory; + std::vector externalMemory; + for (size_t emhtIdx = 0; emhtIdx < vkExternalMemoryHandleTypeList.size(); + emhtIdx++) + { + VulkanExternalMemoryHandleType vkExternalMemoryHandleType = + vkExternalMemoryHandleTypeList[emhtIdx]; + log_info("External memory handle type: %d\n", + vkExternalMemoryHandleType); + + VulkanBuffer vkDummyBuffer(vkDevice, 4 * 1024, + vkExternalMemoryHandleType); + const VulkanMemoryTypeList &memoryTypeList = + vkDummyBuffer.getMemoryTypeList(); + + for (size_t mtIdx = 0; mtIdx < memoryTypeList.size(); mtIdx++) + { + const VulkanMemoryType &memoryType = memoryTypeList[mtIdx]; + + log_info("Memory type index: %d\n", (uint32_t)memoryType); + log_info("Memory type property: %d\n", + memoryType.getMemoryTypeProperty()); + + VulkanBufferList vkBufferList(numBuffers, vkDevice, bufferSize, + vkExternalMemoryHandleType); + + for (size_t bIdx = 0; bIdx < numBuffers; bIdx++) + { + vkBufferListDeviceMemory.push_back( + new VulkanDeviceMemory(vkDevice, bufferSize, memoryType, + vkExternalMemoryHandleType)); + externalMemory.push_back(new clExternalMemory( + vkBufferListDeviceMemory[bIdx], vkExternalMemoryHandleType, + 0, bufferSize, context, deviceId)); + } + cl_mem buffers[MAX_BUFFERS]; + clFinish(cmd_queue1); + Params *params = (Params *)vkParamsDeviceMemory.map(); + params->numBuffers = numBuffers; + params->bufferSize = bufferSize; + params->interBufferOffset = 0; + vkParamsDeviceMemory.unmap(); + vkDescriptorSet.update(0, vkParamsBuffer); + for (size_t bIdx = 0; bIdx < vkBufferList.size(); bIdx++) + { + size_t buffer_size = vkBufferList[bIdx].getSize(); + vkBufferListDeviceMemory[bIdx]->bindBuffer(vkBufferList[bIdx], + 0); + buffers[bIdx] = externalMemory[bIdx]->getExternalMemoryBuffer(); + vkDescriptorSet.update((uint32_t)bIdx + 1, vkBufferList[bIdx]); + } + vkCommandBuffer.begin(); + vkCommandBuffer.bindPipeline(vkComputePipeline); + vkCommandBuffer.bindDescriptorSets( + vkComputePipeline, vkPipelineLayout, vkDescriptorSet); + vkCommandBuffer.dispatch(512, 1, 1); + vkCommandBuffer.end(); + + if (vkBufferList.size() == 2) + { + update_buffer_kernel = kernel[0]; + } + else if (vkBufferList.size() == 3) + { + update_buffer_kernel = kernel[1]; + } + else if (vkBufferList.size() == 5) + { + update_buffer_kernel = kernel[2]; + } + // global work size should be less than or equal to + // bufferSizeList[i] + global_work_size[0] = bufferSize; + for (uint32_t iter = 0; iter < maxIter; iter++) + { + + if (iter == 0) + { + vkQueue.submit(vkCommandBuffer, vkVk2CLSemaphore); + } + else + { + vkQueue.submit(vkCl2VkSemaphore, vkCommandBuffer, + vkVk2CLSemaphore); + } + clVk2CLExternalSemaphore->wait(cmd_queue1); + + err = clSetKernelArg(update_buffer_kernel, 0, sizeof(uint32_t), + (void *)&bufferSize); + err |= clSetKernelArg(kernel_cq, 0, sizeof(uint32_t), + (void *)&bufferSize); + err |= clSetKernelArg(kernel_cq, 1, sizeof(cl_mem), + (void *)&(buffers[0])); + + for (int i = 0; i < vkBufferList.size() - 1; i++) + { + err |= + clSetKernelArg(update_buffer_kernel, i + 1, + sizeof(cl_mem), (void *)&(buffers[i])); + } + + err |= + clSetKernelArg(kernel_cq, 2, sizeof(cl_mem), + (void *)&(buffers[vkBufferList.size() - 1])); + + if (err != CL_SUCCESS) + { + print_error(err, + "Error: Failed to set arg values for kernel\n"); + goto CLEANUP; + } + cl_event first_launch; + + err = clEnqueueNDRangeKernel(cmd_queue1, update_buffer_kernel, + 1, NULL, global_work_size, NULL, 0, + NULL, &first_launch); + if (err != CL_SUCCESS) + { + print_error(err, + "Error: Failed to launch update_buffer_kernel," + "error\n"); + goto CLEANUP; + } + + err = clEnqueueNDRangeKernel(cmd_queue2, kernel_cq, 1, NULL, + global_work_size, NULL, 1, + &first_launch, NULL); + if (err != CL_SUCCESS) + { + print_error(err, + "Error: Failed to launch update_buffer_kernel," + "error\n"); + goto CLEANUP; + } + + if (iter != (maxIter - 1)) + { + clCl2VkExternalSemaphore->signal(cmd_queue2); + } + } + error_2 = (uint8_t *)malloc(sizeof(uint8_t)); + if (NULL == error_2) + { + log_error("Not able to allocate memory\n"); + goto CLEANUP; + } + clFinish(cmd_queue2); + error_1 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, + sizeof(uint8_t), NULL, &err); + if (CL_SUCCESS != err) + { + print_error(err, "Error: clCreateBuffer \n"); + goto CLEANUP; + } + uint8_t val = 0; + err = clEnqueueWriteBuffer(cmd_queue1, error_1, CL_TRUE, 0, + sizeof(uint8_t), &val, 0, NULL, NULL); + if (err != CL_SUCCESS) + { + print_error(err, "Error: Failed read output, error\n"); + goto CLEANUP; + } + + int calc_max_iter; + for (int i = 0; i < vkBufferList.size(); i++) + { + if (i == 0) + calc_max_iter = (maxIter * 3); + else + calc_max_iter = (maxIter * 2); + err = clSetKernelArg(verify_kernel, 0, sizeof(cl_mem), + (void *)&(buffers[i])); + err |= + clSetKernelArg(verify_kernel, 1, sizeof(int), &bufferSize); + err |= clSetKernelArg(verify_kernel, 2, sizeof(int), + &calc_max_iter); + err |= clSetKernelArg(verify_kernel, 3, sizeof(cl_mem), + (void *)&error_1); + if (err != CL_SUCCESS) + { + print_error(err, + "Error: Failed to set arg values for " + "verify_kernel \n"); + goto CLEANUP; + } + err = clEnqueueNDRangeKernel(cmd_queue1, verify_kernel, 1, NULL, + global_work_size, NULL, 0, NULL, + NULL); + + if (err != CL_SUCCESS) + { + print_error(err, + "Error: Failed to launch verify_kernel," + "error \n"); + goto CLEANUP; + } + err = clEnqueueReadBuffer(cmd_queue1, error_1, CL_TRUE, 0, + sizeof(uint8_t), error_2, 0, NULL, + NULL); + if (err != CL_SUCCESS) + { + print_error(err, "Error: Failed read output, error \n "); + goto CLEANUP; + } + if (*error_2 == 1) + { + log_error("&&&& vulkan_opencl_buffer test FAILED\n"); + goto CLEANUP; + } + } + for (size_t i = 0; i < vkBufferList.size(); i++) + { + delete vkBufferListDeviceMemory[i]; + delete externalMemory[i]; + } + vkBufferListDeviceMemory.erase(vkBufferListDeviceMemory.begin(), + vkBufferListDeviceMemory.begin() + + numBuffers); + externalMemory.erase(externalMemory.begin(), + externalMemory.begin() + numBuffers); + } + } +CLEANUP: + for (size_t i = 0; i < vkBufferListDeviceMemory.size(); i++) + { + if (vkBufferListDeviceMemory[i]) + { + delete vkBufferListDeviceMemory[i]; + } + if (externalMemory[i]) + { + delete externalMemory[i]; + } + } + if (program) clReleaseProgram(program); + if (kernel_cq) clReleaseKernel(kernel_cq); + if (clVk2CLExternalSemaphore) delete clVk2CLExternalSemaphore; + if (clCl2VkExternalSemaphore) delete clCl2VkExternalSemaphore; + if (error_2) free(error_2); + if (error_1) clReleaseMemObject(error_1); + + return err; +} + +int run_test_with_one_queue(cl_context &context, cl_command_queue &cmd_queue1, + cl_kernel *kernel, cl_kernel &verify_kernel, + VulkanDevice &vkDevice, uint32_t numBuffers, + uint32_t bufferSize) +{ + log_info("RUNNING TEST WITH ONE QUEUE...... \n\n"); + size_t global_work_size[1]; + uint8_t *error_2; + cl_mem error_1; + cl_kernel update_buffer_kernel; + clExternalSemaphore *clVk2CLExternalSemaphore = NULL; + clExternalSemaphore *clCl2VkExternalSemaphore = NULL; + int err = CL_SUCCESS; + + const std::vector + vkExternalMemoryHandleTypeList = + getSupportedVulkanExternalMemoryHandleTypeList(); + VulkanExternalSemaphoreHandleType vkExternalSemaphoreHandleType = + getSupportedVulkanExternalSemaphoreHandleTypeList()[0]; + VulkanSemaphore vkVk2CLSemaphore(vkDevice, vkExternalSemaphoreHandleType); + VulkanSemaphore vkCl2VkSemaphore(vkDevice, vkExternalSemaphoreHandleType); + + VulkanQueue &vkQueue = vkDevice.getQueue(); + + VulkanShaderModule vkBufferShaderModule(vkDevice, vkBufferShader); + VulkanDescriptorSetLayoutBindingList vkDescriptorSetLayoutBindingList( + MAX_BUFFERS + 1, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER); + VulkanDescriptorSetLayout vkDescriptorSetLayout( + vkDevice, vkDescriptorSetLayoutBindingList); + VulkanPipelineLayout vkPipelineLayout(vkDevice, vkDescriptorSetLayout); + VulkanComputePipeline vkComputePipeline(vkDevice, vkPipelineLayout, + vkBufferShaderModule); + + VulkanDescriptorPool vkDescriptorPool(vkDevice, + vkDescriptorSetLayoutBindingList); + VulkanDescriptorSet vkDescriptorSet(vkDevice, vkDescriptorPool, + vkDescriptorSetLayout); + + clVk2CLExternalSemaphore = new clExternalSemaphore( + vkVk2CLSemaphore, context, vkExternalSemaphoreHandleType, deviceId); + clCl2VkExternalSemaphore = new clExternalSemaphore( + vkCl2VkSemaphore, context, vkExternalSemaphoreHandleType, deviceId); + const uint32_t maxIter = innerIterations; + VulkanCommandPool vkCommandPool(vkDevice); + VulkanCommandBuffer vkCommandBuffer(vkDevice, vkCommandPool); + + VulkanBuffer vkParamsBuffer(vkDevice, sizeof(Params)); + VulkanDeviceMemory vkParamsDeviceMemory( + vkDevice, vkParamsBuffer.getSize(), + getVulkanMemoryType(vkDevice, + VULKAN_MEMORY_TYPE_PROPERTY_HOST_VISIBLE_COHERENT)); + vkParamsDeviceMemory.bindBuffer(vkParamsBuffer); + std::vector vkBufferListDeviceMemory; + std::vector externalMemory; + + for (size_t emhtIdx = 0; emhtIdx < vkExternalMemoryHandleTypeList.size(); + emhtIdx++) + { + VulkanExternalMemoryHandleType vkExternalMemoryHandleType = + vkExternalMemoryHandleTypeList[emhtIdx]; + log_info("External memory handle type: %d\n", + vkExternalMemoryHandleType); + + VulkanBuffer vkDummyBuffer(vkDevice, 4 * 1024, + vkExternalMemoryHandleType); + const VulkanMemoryTypeList &memoryTypeList = + vkDummyBuffer.getMemoryTypeList(); + + for (size_t mtIdx = 0; mtIdx < memoryTypeList.size(); mtIdx++) + { + const VulkanMemoryType &memoryType = memoryTypeList[mtIdx]; + + log_info("Memory type index: %d\n", (uint32_t)memoryType); + log_info("Memory type property: %d\n", + memoryType.getMemoryTypeProperty()); + + VulkanBufferList vkBufferList(numBuffers, vkDevice, bufferSize, + vkExternalMemoryHandleType); + + for (size_t bIdx = 0; bIdx < numBuffers; bIdx++) + { + vkBufferListDeviceMemory.push_back( + new VulkanDeviceMemory(vkDevice, bufferSize, memoryType, + vkExternalMemoryHandleType)); + externalMemory.push_back(new clExternalMemory( + vkBufferListDeviceMemory[bIdx], vkExternalMemoryHandleType, + 0, bufferSize, context, deviceId)); + } + cl_mem buffers[4]; + clFinish(cmd_queue1); + Params *params = (Params *)vkParamsDeviceMemory.map(); + params->numBuffers = numBuffers; + params->bufferSize = bufferSize; + params->interBufferOffset = 0; + vkParamsDeviceMemory.unmap(); + vkDescriptorSet.update(0, vkParamsBuffer); + for (size_t bIdx = 0; bIdx < vkBufferList.size(); bIdx++) + { + size_t buffer_size = vkBufferList[bIdx].getSize(); + vkBufferListDeviceMemory[bIdx]->bindBuffer(vkBufferList[bIdx], + 0); + buffers[bIdx] = externalMemory[bIdx]->getExternalMemoryBuffer(); + vkDescriptorSet.update((uint32_t)bIdx + 1, vkBufferList[bIdx]); + } + vkCommandBuffer.begin(); + vkCommandBuffer.bindPipeline(vkComputePipeline); + vkCommandBuffer.bindDescriptorSets( + vkComputePipeline, vkPipelineLayout, vkDescriptorSet); + vkCommandBuffer.dispatch(512, 1, 1); + vkCommandBuffer.end(); + + if (vkBufferList.size() == 1) + { + update_buffer_kernel = kernel[0]; + } + else if (vkBufferList.size() == 2) + { + update_buffer_kernel = kernel[1]; + } + else if (vkBufferList.size() == 4) + { + update_buffer_kernel = kernel[2]; + } + + // global work size should be less than or equal to + // bufferSizeList[i] + global_work_size[0] = bufferSize; + + for (uint32_t iter = 0; iter < maxIter; iter++) + { + if (iter == 0) + { + vkQueue.submit(vkCommandBuffer, vkVk2CLSemaphore); + } + else + { + vkQueue.submit(vkCl2VkSemaphore, vkCommandBuffer, + vkVk2CLSemaphore); + } + clVk2CLExternalSemaphore->wait(cmd_queue1); + + err = clSetKernelArg(update_buffer_kernel, 0, sizeof(uint32_t), + (void *)&bufferSize); + for (int i = 0; i < vkBufferList.size(); i++) + { + err |= + clSetKernelArg(update_buffer_kernel, i + 1, + sizeof(cl_mem), (void *)&(buffers[i])); + } + + if (err != CL_SUCCESS) + { + print_error(err, + "Error: Failed to set arg values for kernel\n"); + goto CLEANUP; + } + err = clEnqueueNDRangeKernel(cmd_queue1, update_buffer_kernel, + 1, NULL, global_work_size, NULL, 0, + NULL, NULL); + if (err != CL_SUCCESS) + { + print_error(err, + "Error: Failed to launch update_buffer_kernel," + " error\n"); + goto CLEANUP; + } + if (iter != (maxIter - 1)) + { + clCl2VkExternalSemaphore->signal(cmd_queue1); + } + } + error_2 = (uint8_t *)malloc(sizeof(uint8_t)); + if (NULL == error_2) + { + log_error("Not able to allocate memory\n"); + goto CLEANUP; + } + + error_1 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, + sizeof(uint8_t), NULL, &err); + if (CL_SUCCESS != err) + { + print_error(err, "Error: clCreateBuffer \n"); + goto CLEANUP; + } + uint8_t val = 0; + err = clEnqueueWriteBuffer(cmd_queue1, error_1, CL_TRUE, 0, + sizeof(uint8_t), &val, 0, NULL, NULL); + if (CL_SUCCESS != err) + { + print_error(err, "Error: clEnqueueWriteBuffer \n"); + goto CLEANUP; + } + + int calc_max_iter = (maxIter * 2); + for (int i = 0; i < vkBufferList.size(); i++) + { + err = clSetKernelArg(verify_kernel, 0, sizeof(cl_mem), + (void *)&(buffers[i])); + err |= + clSetKernelArg(verify_kernel, 1, sizeof(int), &bufferSize); + err |= clSetKernelArg(verify_kernel, 2, sizeof(int), + &calc_max_iter); + err |= clSetKernelArg(verify_kernel, 3, sizeof(cl_mem), + (void *)&error_1); + if (err != CL_SUCCESS) + { + print_error( + err, + "Error: Failed to set arg values for verify_kernel \n"); + goto CLEANUP; + } + err = clEnqueueNDRangeKernel(cmd_queue1, verify_kernel, 1, NULL, + global_work_size, NULL, 0, NULL, + NULL); + if (err != CL_SUCCESS) + { + print_error( + err, "Error: Failed to launch verify_kernel, error\n"); + goto CLEANUP; + } + + err = clEnqueueReadBuffer(cmd_queue1, error_1, CL_TRUE, 0, + sizeof(uint8_t), error_2, 0, NULL, + NULL); + if (err != CL_SUCCESS) + { + print_error(err, "Error: Failed read output, error \n"); + goto CLEANUP; + } + if (*error_2 == 1) + { + log_error("&&&& vulkan_opencl_buffer test FAILED\n"); + goto CLEANUP; + } + } + for (size_t i = 0; i < vkBufferList.size(); i++) + { + delete vkBufferListDeviceMemory[i]; + delete externalMemory[i]; + } + vkBufferListDeviceMemory.erase(vkBufferListDeviceMemory.begin(), + vkBufferListDeviceMemory.begin() + + numBuffers); + externalMemory.erase(externalMemory.begin(), + externalMemory.begin() + numBuffers); + } + } +CLEANUP: + for (size_t i = 0; i < vkBufferListDeviceMemory.size(); i++) + { + if (vkBufferListDeviceMemory[i]) + { + delete vkBufferListDeviceMemory[i]; + } + if (externalMemory[i]) + { + delete externalMemory[i]; + } + } + if (clVk2CLExternalSemaphore) delete clVk2CLExternalSemaphore; + if (clCl2VkExternalSemaphore) delete clCl2VkExternalSemaphore; + if (error_2) free(error_2); + if (error_1) clReleaseMemObject(error_1); + return err; +} + +int run_test_with_multi_import_same_ctx( + cl_context &context, cl_command_queue &cmd_queue1, cl_kernel *kernel, + cl_kernel &verify_kernel, VulkanDevice &vkDevice, uint32_t numBuffers, + uint32_t bufferSize, uint32_t bufferSizeForOffset) +{ + size_t global_work_size[1]; + uint8_t *error_2; + cl_mem error_1; + int numImports = numBuffers; + cl_kernel update_buffer_kernel[MAX_IMPORTS]; + clExternalSemaphore *clVk2CLExternalSemaphore = NULL; + clExternalSemaphore *clCl2VkExternalSemaphore = NULL; + int err = CL_SUCCESS; + int calc_max_iter; + bool withOffset; + uint32_t pBufferSize; + + const std::vector + vkExternalMemoryHandleTypeList = + getSupportedVulkanExternalMemoryHandleTypeList(); + VulkanExternalSemaphoreHandleType vkExternalSemaphoreHandleType = + getSupportedVulkanExternalSemaphoreHandleTypeList()[0]; + VulkanSemaphore vkVk2CLSemaphore(vkDevice, vkExternalSemaphoreHandleType); + VulkanSemaphore vkCl2VkSemaphore(vkDevice, vkExternalSemaphoreHandleType); + + VulkanQueue &vkQueue = vkDevice.getQueue(); + + VulkanShaderModule vkBufferShaderModule(vkDevice, vkBufferShader); + VulkanDescriptorSetLayoutBindingList vkDescriptorSetLayoutBindingList( + MAX_BUFFERS + 1, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER); + VulkanDescriptorSetLayout vkDescriptorSetLayout( + vkDevice, vkDescriptorSetLayoutBindingList); + VulkanPipelineLayout vkPipelineLayout(vkDevice, vkDescriptorSetLayout); + VulkanComputePipeline vkComputePipeline(vkDevice, vkPipelineLayout, + vkBufferShaderModule); + + VulkanDescriptorPool vkDescriptorPool(vkDevice, + vkDescriptorSetLayoutBindingList); + VulkanDescriptorSet vkDescriptorSet(vkDevice, vkDescriptorPool, + vkDescriptorSetLayout); + + clVk2CLExternalSemaphore = new clExternalSemaphore( + vkVk2CLSemaphore, context, vkExternalSemaphoreHandleType, deviceId); + clCl2VkExternalSemaphore = new clExternalSemaphore( + vkCl2VkSemaphore, context, vkExternalSemaphoreHandleType, deviceId); + const uint32_t maxIter = innerIterations; + VulkanCommandPool vkCommandPool(vkDevice); + VulkanCommandBuffer vkCommandBuffer(vkDevice, vkCommandPool); + + VulkanBuffer vkParamsBuffer(vkDevice, sizeof(Params)); + VulkanDeviceMemory vkParamsDeviceMemory( + vkDevice, vkParamsBuffer.getSize(), + getVulkanMemoryType(vkDevice, + VULKAN_MEMORY_TYPE_PROPERTY_HOST_VISIBLE_COHERENT)); + vkParamsDeviceMemory.bindBuffer(vkParamsBuffer); + std::vector vkBufferListDeviceMemory; + std::vector> externalMemory; + + + for (size_t emhtIdx = 0; emhtIdx < vkExternalMemoryHandleTypeList.size(); + emhtIdx++) + { + VulkanExternalMemoryHandleType vkExternalMemoryHandleType = + vkExternalMemoryHandleTypeList[emhtIdx]; + log_info("External memory handle type: %d\n", + vkExternalMemoryHandleType); + + VulkanBuffer vkDummyBuffer(vkDevice, 4 * 1024, + vkExternalMemoryHandleType); + const VulkanMemoryTypeList &memoryTypeList = + vkDummyBuffer.getMemoryTypeList(); + + for (size_t mtIdx = 0; mtIdx < memoryTypeList.size(); mtIdx++) + { + const VulkanMemoryType &memoryType = memoryTypeList[mtIdx]; + + log_info("Memory type index: %d\n", (uint32_t)memoryType); + log_info("Memory type property: %d\n", + memoryType.getMemoryTypeProperty()); + for (unsigned int withOffset = 0; + withOffset <= (unsigned int)enableOffset; withOffset++) + { + log_info("Running withOffset case %d\n", (uint32_t)withOffset); + if (withOffset) + { + pBufferSize = bufferSizeForOffset; + } + else + { + pBufferSize = bufferSize; + } + cl_mem buffers[MAX_BUFFERS][MAX_IMPORTS]; + VulkanBufferList vkBufferList(numBuffers, vkDevice, pBufferSize, + vkExternalMemoryHandleType); + uint32_t interBufferOffset = + (uint32_t)(vkBufferList[0].getSize()); + + for (size_t bIdx = 0; bIdx < numBuffers; bIdx++) + { + if (withOffset == 0) + { + vkBufferListDeviceMemory.push_back( + new VulkanDeviceMemory(vkDevice, pBufferSize, + memoryType, + vkExternalMemoryHandleType)); + } + if (withOffset == 1) + { + uint32_t totalSize = + (uint32_t)(vkBufferList.size() * interBufferOffset); + vkBufferListDeviceMemory.push_back( + new VulkanDeviceMemory(vkDevice, totalSize, + memoryType, + vkExternalMemoryHandleType)); + } + std::vector pExternalMemory; + for (size_t cl_bIdx = 0; cl_bIdx < numImports; cl_bIdx++) + { + pExternalMemory.push_back(new clExternalMemory( + vkBufferListDeviceMemory[bIdx], + vkExternalMemoryHandleType, + withOffset * bIdx * interBufferOffset, pBufferSize, + context, deviceId)); + } + externalMemory.push_back(pExternalMemory); + } + + clFinish(cmd_queue1); + Params *params = (Params *)vkParamsDeviceMemory.map(); + params->numBuffers = numBuffers; + params->bufferSize = pBufferSize; + params->interBufferOffset = interBufferOffset * withOffset; + vkParamsDeviceMemory.unmap(); + vkDescriptorSet.update(0, vkParamsBuffer); + for (size_t bIdx = 0; bIdx < vkBufferList.size(); bIdx++) + { + size_t buffer_size = vkBufferList[bIdx].getSize(); + vkBufferListDeviceMemory[bIdx]->bindBuffer( + vkBufferList[bIdx], + bIdx * interBufferOffset * withOffset); + for (size_t cl_bIdx = 0; cl_bIdx < numImports; cl_bIdx++) + { + buffers[bIdx][cl_bIdx] = + externalMemory[bIdx][cl_bIdx] + ->getExternalMemoryBuffer(); + } + vkDescriptorSet.update((uint32_t)bIdx + 1, + vkBufferList[bIdx]); + } + vkCommandBuffer.begin(); + vkCommandBuffer.bindPipeline(vkComputePipeline); + vkCommandBuffer.bindDescriptorSets( + vkComputePipeline, vkPipelineLayout, vkDescriptorSet); + vkCommandBuffer.dispatch(512, 1, 1); + vkCommandBuffer.end(); + for (int i = 0; i < numImports; i++) + { + update_buffer_kernel[i] = (numBuffers == 1) + ? kernel[0] + : ((numBuffers == 2) ? kernel[1] : kernel[2]); + } + // global work size should be less than or equal to + // bufferSizeList[i] + global_work_size[0] = pBufferSize; + + for (uint32_t iter = 0; iter < maxIter; iter++) + { + if (iter == 0) + { + vkQueue.submit(vkCommandBuffer, vkVk2CLSemaphore); + } + else + { + vkQueue.submit(vkCl2VkSemaphore, vkCommandBuffer, + vkVk2CLSemaphore); + } + clVk2CLExternalSemaphore->wait(cmd_queue1); + for (uint8_t launchIter = 0; launchIter < numImports; + launchIter++) + { + err = clSetKernelArg(update_buffer_kernel[launchIter], + 0, sizeof(uint32_t), + (void *)&pBufferSize); + for (int i = 0; i < numBuffers; i++) + { + err |= clSetKernelArg( + update_buffer_kernel[launchIter], i + 1, + sizeof(cl_mem), + (void *)&(buffers[i][launchIter])); + } + + if (err != CL_SUCCESS) + { + print_error(err, + "Error: Failed to set arg values for " + "kernel\n "); + goto CLEANUP; + } + err = clEnqueueNDRangeKernel( + cmd_queue1, update_buffer_kernel[launchIter], 1, + NULL, global_work_size, NULL, 0, NULL, NULL); + if (err != CL_SUCCESS) + { + print_error(err, + "Error: Failed to launch " + "update_buffer_kernel, error\n "); + goto CLEANUP; + } + } + if (iter != (maxIter - 1)) + { + clCl2VkExternalSemaphore->signal(cmd_queue1); + } + } + error_2 = (uint8_t *)malloc(sizeof(uint8_t)); + if (NULL == error_2) + { + log_error("Not able to allocate memory\n"); + goto CLEANUP; + } + + error_1 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, + sizeof(uint8_t), NULL, &err); + if (CL_SUCCESS != err) + { + print_error(err, "Error: clCreateBuffer \n"); + goto CLEANUP; + } + uint8_t val = 0; + err = + clEnqueueWriteBuffer(cmd_queue1, error_1, CL_TRUE, 0, + sizeof(uint8_t), &val, 0, NULL, NULL); + if (CL_SUCCESS != err) + { + print_error(err, "Error: clEnqueueWriteBuffer \n"); + goto CLEANUP; + } + calc_max_iter = maxIter * (numBuffers + 1); + + for (int i = 0; i < vkBufferList.size(); i++) + { + err = clSetKernelArg(verify_kernel, 0, sizeof(cl_mem), + (void *)&(buffers[i][0])); + err |= clSetKernelArg(verify_kernel, 1, sizeof(int), + &pBufferSize); + err |= clSetKernelArg(verify_kernel, 2, sizeof(int), + &calc_max_iter); + err |= clSetKernelArg(verify_kernel, 3, sizeof(cl_mem), + (void *)&error_1); + if (err != CL_SUCCESS) + { + print_error(err, + "Error: Failed to set arg values for " + "verify_kernel \n"); + goto CLEANUP; + } + err = clEnqueueNDRangeKernel(cmd_queue1, verify_kernel, 1, + NULL, global_work_size, NULL, + 0, NULL, NULL); + if (err != CL_SUCCESS) + { + print_error( + err, + "Error: Failed to launch verify_kernel, error\n"); + goto CLEANUP; + } + + err = clEnqueueReadBuffer(cmd_queue1, error_1, CL_TRUE, 0, + sizeof(uint8_t), error_2, 0, NULL, + NULL); + if (err != CL_SUCCESS) + { + print_error(err, "Error: Failed read output, error \n"); + goto CLEANUP; + } + if (*error_2 == 1) + { + log_error("&&&& vulkan_opencl_buffer test FAILED\n"); + goto CLEANUP; + } + } + for (size_t i = 0; i < vkBufferList.size(); i++) + { + for (size_t j = 0; j < numImports; j++) + { + delete externalMemory[i][j]; + } + } + for (size_t i = 0; i < vkBufferListDeviceMemory.size(); i++) + { + delete vkBufferListDeviceMemory[i]; + } + vkBufferListDeviceMemory.erase(vkBufferListDeviceMemory.begin(), + vkBufferListDeviceMemory.end()); + for (size_t i = 0; i < externalMemory.size(); i++) + { + externalMemory[i].erase(externalMemory[i].begin(), + externalMemory[i].begin() + + numBuffers); + } + externalMemory.clear(); + } + } + } +CLEANUP: + for (size_t i = 0; i < vkBufferListDeviceMemory.size(); i++) + { + if (vkBufferListDeviceMemory[i]) + { + delete vkBufferListDeviceMemory[i]; + } + } + for (size_t i = 0; i < externalMemory.size(); i++) + { + for (size_t j = 0; j < externalMemory[i].size(); j++) + { + if (externalMemory[i][j]) + { + delete externalMemory[i][j]; + } + } + } + if (clVk2CLExternalSemaphore) delete clVk2CLExternalSemaphore; + if (clCl2VkExternalSemaphore) delete clCl2VkExternalSemaphore; + if (error_2) free(error_2); + if (error_1) clReleaseMemObject(error_1); + return err; +} + +int run_test_with_multi_import_diff_ctx( + cl_context &context, cl_context &context2, cl_command_queue &cmd_queue1, + cl_command_queue &cmd_queue2, cl_kernel *kernel1, cl_kernel *kernel2, + cl_kernel &verify_kernel, cl_kernel verify_kernel2, VulkanDevice &vkDevice, + uint32_t numBuffers, uint32_t bufferSize, uint32_t bufferSizeForOffset) +{ + size_t global_work_size[1]; + uint8_t *error_3; + cl_mem error_1; + cl_mem error_2; + int numImports = numBuffers; + cl_kernel update_buffer_kernel1[MAX_IMPORTS]; + cl_kernel update_buffer_kernel2[MAX_IMPORTS]; + clExternalSemaphore *clVk2CLExternalSemaphore = NULL; + clExternalSemaphore *clCl2VkExternalSemaphore = NULL; + clExternalSemaphore *clVk2CLExternalSemaphore2 = NULL; + clExternalSemaphore *clCl2VkExternalSemaphore2 = NULL; + int err = CL_SUCCESS; + int calc_max_iter; + bool withOffset; + uint32_t pBufferSize; + + const std::vector + vkExternalMemoryHandleTypeList = + getSupportedVulkanExternalMemoryHandleTypeList(); + VulkanExternalSemaphoreHandleType vkExternalSemaphoreHandleType = + getSupportedVulkanExternalSemaphoreHandleTypeList()[0]; + VulkanSemaphore vkVk2CLSemaphore(vkDevice, vkExternalSemaphoreHandleType); + VulkanSemaphore vkCl2VkSemaphore(vkDevice, vkExternalSemaphoreHandleType); + + VulkanQueue &vkQueue = vkDevice.getQueue(); + + VulkanShaderModule vkBufferShaderModule(vkDevice, vkBufferShader); + VulkanDescriptorSetLayoutBindingList vkDescriptorSetLayoutBindingList( + MAX_BUFFERS + 1, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER); + VulkanDescriptorSetLayout vkDescriptorSetLayout( + vkDevice, vkDescriptorSetLayoutBindingList); + VulkanPipelineLayout vkPipelineLayout(vkDevice, vkDescriptorSetLayout); + VulkanComputePipeline vkComputePipeline(vkDevice, vkPipelineLayout, + vkBufferShaderModule); + + VulkanDescriptorPool vkDescriptorPool(vkDevice, + vkDescriptorSetLayoutBindingList); + VulkanDescriptorSet vkDescriptorSet(vkDevice, vkDescriptorPool, + vkDescriptorSetLayout); + + clVk2CLExternalSemaphore = new clExternalSemaphore( + vkVk2CLSemaphore, context, vkExternalSemaphoreHandleType, deviceId); + clCl2VkExternalSemaphore = new clExternalSemaphore( + vkCl2VkSemaphore, context, vkExternalSemaphoreHandleType, deviceId); + + clVk2CLExternalSemaphore2 = new clExternalSemaphore( + vkVk2CLSemaphore, context2, vkExternalSemaphoreHandleType, deviceId); + clCl2VkExternalSemaphore2 = new clExternalSemaphore( + vkCl2VkSemaphore, context2, vkExternalSemaphoreHandleType, deviceId); + + const uint32_t maxIter = innerIterations; + VulkanCommandPool vkCommandPool(vkDevice); + VulkanCommandBuffer vkCommandBuffer(vkDevice, vkCommandPool); + + VulkanBuffer vkParamsBuffer(vkDevice, sizeof(Params)); + VulkanDeviceMemory vkParamsDeviceMemory( + vkDevice, vkParamsBuffer.getSize(), + getVulkanMemoryType(vkDevice, + VULKAN_MEMORY_TYPE_PROPERTY_HOST_VISIBLE_COHERENT)); + vkParamsDeviceMemory.bindBuffer(vkParamsBuffer); + std::vector vkBufferListDeviceMemory; + std::vector> externalMemory1; + std::vector> externalMemory2; + + for (size_t emhtIdx = 0; emhtIdx < vkExternalMemoryHandleTypeList.size(); + emhtIdx++) + { + VulkanExternalMemoryHandleType vkExternalMemoryHandleType = + vkExternalMemoryHandleTypeList[emhtIdx]; + log_info("External memory handle type:%d\n", + vkExternalMemoryHandleType); + + VulkanBuffer vkDummyBuffer(vkDevice, 4 * 1024, + vkExternalMemoryHandleType); + const VulkanMemoryTypeList &memoryTypeList = + vkDummyBuffer.getMemoryTypeList(); + + for (size_t mtIdx = 0; mtIdx < memoryTypeList.size(); mtIdx++) + { + const VulkanMemoryType &memoryType = memoryTypeList[mtIdx]; + + log_info("Memory type index: %d\n", (uint32_t)memoryType); + log_info("Memory type property: %d\n", + memoryType.getMemoryTypeProperty()); + + for (unsigned int withOffset = 0; + withOffset <= (unsigned int)enableOffset; withOffset++) + { + log_info("Running withOffset case %d\n", (uint32_t)withOffset); + cl_mem buffers1[MAX_BUFFERS][MAX_IMPORTS]; + cl_mem buffers2[MAX_BUFFERS][MAX_IMPORTS]; + if (withOffset) + { + pBufferSize = bufferSizeForOffset; + } + else + { + pBufferSize = bufferSize; + } + VulkanBufferList vkBufferList(numBuffers, vkDevice, pBufferSize, + vkExternalMemoryHandleType); + uint32_t interBufferOffset = + (uint32_t)(vkBufferList[0].getSize()); + + for (size_t bIdx = 0; bIdx < numBuffers; bIdx++) + { + if (withOffset == 0) + { + vkBufferListDeviceMemory.push_back( + new VulkanDeviceMemory(vkDevice, pBufferSize, + memoryType, + vkExternalMemoryHandleType)); + } + if (withOffset == 1) + { + uint32_t totalSize = + (uint32_t)(vkBufferList.size() * interBufferOffset); + vkBufferListDeviceMemory.push_back( + new VulkanDeviceMemory(vkDevice, totalSize, + memoryType, + vkExternalMemoryHandleType)); + } + std::vector pExternalMemory1; + std::vector pExternalMemory2; + for (size_t cl_bIdx = 0; cl_bIdx < numImports; cl_bIdx++) + { + pExternalMemory1.push_back(new clExternalMemory( + vkBufferListDeviceMemory[bIdx], + vkExternalMemoryHandleType, + withOffset * bIdx * interBufferOffset, pBufferSize, + context, deviceId)); + pExternalMemory2.push_back(new clExternalMemory( + vkBufferListDeviceMemory[bIdx], + vkExternalMemoryHandleType, + withOffset * bIdx * interBufferOffset, pBufferSize, + context2, deviceId)); + } + externalMemory1.push_back(pExternalMemory1); + externalMemory2.push_back(pExternalMemory2); + } + + clFinish(cmd_queue1); + Params *params = (Params *)vkParamsDeviceMemory.map(); + params->numBuffers = numBuffers; + params->bufferSize = pBufferSize; + params->interBufferOffset = interBufferOffset * withOffset; + vkParamsDeviceMemory.unmap(); + vkDescriptorSet.update(0, vkParamsBuffer); + for (size_t bIdx = 0; bIdx < vkBufferList.size(); bIdx++) + { + size_t buffer_size = vkBufferList[bIdx].getSize(); + vkBufferListDeviceMemory[bIdx]->bindBuffer( + vkBufferList[bIdx], + bIdx * interBufferOffset * withOffset); + for (size_t cl_bIdx = 0; cl_bIdx < numImports; cl_bIdx++) + { + buffers1[bIdx][cl_bIdx] = + externalMemory1[bIdx][cl_bIdx] + ->getExternalMemoryBuffer(); + buffers2[bIdx][cl_bIdx] = + externalMemory2[bIdx][cl_bIdx] + ->getExternalMemoryBuffer(); + } + vkDescriptorSet.update((uint32_t)bIdx + 1, + vkBufferList[bIdx]); + } + + vkCommandBuffer.begin(); + vkCommandBuffer.bindPipeline(vkComputePipeline); + vkCommandBuffer.bindDescriptorSets( + vkComputePipeline, vkPipelineLayout, vkDescriptorSet); + vkCommandBuffer.dispatch(512, 1, 1); + vkCommandBuffer.end(); + + for (int i = 0; i < numImports; i++) + { + update_buffer_kernel1[i] = (numBuffers == 1) + ? kernel1[0] + : ((numBuffers == 2) ? kernel1[1] : kernel1[2]); + update_buffer_kernel2[i] = (numBuffers == 1) + ? kernel2[0] + : ((numBuffers == 2) ? kernel2[1] : kernel2[2]); + } + + // global work size should be less than or equal + // to bufferSizeList[i] + global_work_size[0] = pBufferSize; + + for (uint32_t iter = 0; iter < maxIter; iter++) + { + if (iter == 0) + { + vkQueue.submit(vkCommandBuffer, vkVk2CLSemaphore); + } + else + { + vkQueue.submit(vkCl2VkSemaphore, vkCommandBuffer, + vkVk2CLSemaphore); + } + clVk2CLExternalSemaphore->wait(cmd_queue1); + + for (uint8_t launchIter = 0; launchIter < numImports; + launchIter++) + { + err = clSetKernelArg(update_buffer_kernel1[launchIter], + 0, sizeof(uint32_t), + (void *)&pBufferSize); + for (int i = 0; i < numBuffers; i++) + { + err |= clSetKernelArg( + update_buffer_kernel1[launchIter], i + 1, + sizeof(cl_mem), + (void *)&(buffers1[i][launchIter])); + } + + if (err != CL_SUCCESS) + { + print_error(err, + "Error: Failed to set arg values for " + "kernel\n "); + goto CLEANUP; + } + err = clEnqueueNDRangeKernel( + cmd_queue1, update_buffer_kernel1[launchIter], 1, + NULL, global_work_size, NULL, 0, NULL, NULL); + if (err != CL_SUCCESS) + { + print_error(err, + "Error: Failed to launch " + "update_buffer_kernel, error\n"); + goto CLEANUP; + } + } + if (iter != (maxIter - 1)) + { + clCl2VkExternalSemaphore->signal(cmd_queue1); + } + } + clFinish(cmd_queue1); + for (uint32_t iter = 0; iter < maxIter; iter++) + { + if (iter == 0) + { + vkQueue.submit(vkCommandBuffer, vkVk2CLSemaphore); + } + else + { + vkQueue.submit(vkCl2VkSemaphore, vkCommandBuffer, + vkVk2CLSemaphore); + } + clVk2CLExternalSemaphore2->wait(cmd_queue2); + + for (uint8_t launchIter = 0; launchIter < numImports; + launchIter++) + { + err = clSetKernelArg(update_buffer_kernel2[launchIter], + 0, sizeof(uint32_t), + (void *)&bufferSize); + for (int i = 0; i < numBuffers; i++) + { + err |= clSetKernelArg( + update_buffer_kernel2[launchIter], i + 1, + sizeof(cl_mem), + (void *)&(buffers2[i][launchIter])); + } + + if (err != CL_SUCCESS) + { + print_error(err, + "Error: Failed to set arg values for " + "kernel\n "); + goto CLEANUP; + } + err = clEnqueueNDRangeKernel( + cmd_queue2, update_buffer_kernel2[launchIter], 1, + NULL, global_work_size, NULL, 0, NULL, NULL); + if (err != CL_SUCCESS) + { + print_error(err, + "Error: Failed to launch " + "update_buffer_kernel, error\n "); + goto CLEANUP; + } + } + if (iter != (maxIter - 1)) + { + clCl2VkExternalSemaphore2->signal(cmd_queue2); + } + } + clFinish(cmd_queue2); + error_3 = (uint8_t *)malloc(sizeof(uint8_t)); + if (NULL == error_3) + { + log_error("Not able to allocate memory\n"); + goto CLEANUP; + } + + error_1 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, + sizeof(uint8_t), NULL, &err); + if (CL_SUCCESS != err) + { + print_error(err, "Error: clCreateBuffer \n"); + goto CLEANUP; + } + error_2 = clCreateBuffer(context2, CL_MEM_WRITE_ONLY, + sizeof(uint8_t), NULL, &err); + if (CL_SUCCESS != err) + { + print_error(err, "Error: clCreateBuffer \n"); + goto CLEANUP; + } + uint8_t val = 0; + err = + clEnqueueWriteBuffer(cmd_queue1, error_1, CL_TRUE, 0, + sizeof(uint8_t), &val, 0, NULL, NULL); + if (err != CL_SUCCESS) + { + print_error(err, "Error: Failed read output, error \n"); + goto CLEANUP; + } + + err = + clEnqueueWriteBuffer(cmd_queue2, error_2, CL_TRUE, 0, + sizeof(uint8_t), &val, 0, NULL, NULL); + if (err != CL_SUCCESS) + { + print_error(err, "Error: Failed read output, error \n"); + goto CLEANUP; + } + + calc_max_iter = maxIter * 2 * (numBuffers + 1); + for (int i = 0; i < numBuffers; i++) + { + err = clSetKernelArg(verify_kernel, 0, sizeof(cl_mem), + (void *)&(buffers1[i][0])); + err |= clSetKernelArg(verify_kernel, 1, sizeof(int), + &pBufferSize); + err |= clSetKernelArg(verify_kernel, 2, sizeof(int), + &calc_max_iter); + err |= clSetKernelArg(verify_kernel, 3, sizeof(cl_mem), + (void *)&error_1); + if (err != CL_SUCCESS) + { + print_error(err, + "Error: Failed to set arg values for " + "verify_kernel \n"); + goto CLEANUP; + } + err = clEnqueueNDRangeKernel(cmd_queue1, verify_kernel, 1, + NULL, global_work_size, NULL, + 0, NULL, NULL); + if (err != CL_SUCCESS) + { + print_error(err, + "Error: Failed to launch verify_kernel," + "error\n"); + goto CLEANUP; + } + + err = clEnqueueReadBuffer(cmd_queue1, error_1, CL_TRUE, 0, + sizeof(uint8_t), error_3, 0, NULL, + NULL); + if (err != CL_SUCCESS) + { + print_error(err, "Error: Failed read output, error\n"); + goto CLEANUP; + } + if (*error_3 == 1) + { + log_error("&&&& vulkan_opencl_buffer test FAILED\n"); + goto CLEANUP; + } + } + *error_3 = 0; + for (int i = 0; i < vkBufferList.size(); i++) + { + err = clSetKernelArg(verify_kernel2, 0, sizeof(cl_mem), + (void *)&(buffers2[i][0])); + err |= clSetKernelArg(verify_kernel2, 1, sizeof(int), + &pBufferSize); + err |= clSetKernelArg(verify_kernel2, 2, sizeof(int), + &calc_max_iter); + err |= clSetKernelArg(verify_kernel2, 3, sizeof(cl_mem), + (void *)&error_2); + if (err != CL_SUCCESS) + { + print_error(err, + "Error: Failed to set arg values for " + "verify_kernel \n"); + goto CLEANUP; + } + err = clEnqueueNDRangeKernel(cmd_queue2, verify_kernel2, 1, + NULL, global_work_size, NULL, + 0, NULL, NULL); + if (err != CL_SUCCESS) + { + print_error(err, + "Error: Failed to launch verify_kernel," + "error\n"); + goto CLEANUP; + } + + err = clEnqueueReadBuffer(cmd_queue2, error_2, CL_TRUE, 0, + sizeof(uint8_t), error_3, 0, NULL, + NULL); + if (err != CL_SUCCESS) + { + print_error(err, "Error: Failed read output, error\n"); + goto CLEANUP; + } + if (*error_3 == 1) + { + log_error("&&&& vulkan_opencl_buffer test FAILED\n"); + goto CLEANUP; + } + } + for (size_t i = 0; i < vkBufferList.size(); i++) + { + for (size_t j = 0; j < numImports; j++) + { + delete externalMemory1[i][j]; + delete externalMemory2[i][j]; + } + } + for (size_t i = 0; i < vkBufferListDeviceMemory.size(); i++) + { + delete vkBufferListDeviceMemory[i]; + } + vkBufferListDeviceMemory.erase(vkBufferListDeviceMemory.begin(), + vkBufferListDeviceMemory.end()); + for (size_t i = 0; i < externalMemory1.size(); i++) + { + externalMemory1[i].erase(externalMemory1[i].begin(), + externalMemory1[i].begin() + + numBuffers); + externalMemory2[i].erase(externalMemory2[i].begin(), + externalMemory2[i].begin() + + numBuffers); + } + externalMemory1.clear(); + externalMemory2.clear(); + } + } + } +CLEANUP: + for (size_t i = 0; i < vkBufferListDeviceMemory.size(); i++) + { + if (vkBufferListDeviceMemory[i]) + { + delete vkBufferListDeviceMemory[i]; + } + } + for (size_t i = 0; i < externalMemory1.size(); i++) + { + for (size_t j = 0; j < externalMemory1[i].size(); j++) + { + if (externalMemory1[i][j]) + { + delete externalMemory1[i][j]; + } + } + } + for (size_t i = 0; i < externalMemory2.size(); i++) + { + for (size_t j = 0; j < externalMemory2[i].size(); j++) + { + if (externalMemory2[i][j]) + { + delete externalMemory2[i][j]; + } + } + } + if (clVk2CLExternalSemaphore) delete clVk2CLExternalSemaphore; + if (clCl2VkExternalSemaphore) delete clCl2VkExternalSemaphore; + if (clVk2CLExternalSemaphore2) delete clVk2CLExternalSemaphore2; + if (clCl2VkExternalSemaphore2) delete clCl2VkExternalSemaphore2; + if (error_3) free(error_3); + if (error_1) clReleaseMemObject(error_1); + if (error_2) clReleaseMemObject(error_2); + return err; +} + +int test_buffer_common(cl_device_id device_, cl_context context_, + cl_command_queue queue_, int numElements_) +{ + + int current_device = 0; + int device_count = 0; + int devices_prohibited = 0; + cl_int errNum = CL_SUCCESS; + cl_platform_id platform = NULL; + size_t extensionSize = 0; + cl_uint num_devices = 0; + cl_uint device_no = 0; + const size_t bufsize = BUFFERSIZE; + char buf[BUFFERSIZE]; + cl_device_id *devices; + char *extensions = NULL; + cl_kernel verify_kernel; + cl_kernel verify_kernel2; + cl_kernel kernel[3] = { NULL, NULL, NULL }; + cl_kernel kernel2[3] = { NULL, NULL, NULL }; + const char *program_source_const[3] = { kernel_text_numbuffer_1, + kernel_text_numbuffer_2, + kernel_text_numbuffer_4 }; + const char *program_source_const_verify; + size_t program_source_length; + cl_command_queue cmd_queue1 = NULL; + cl_command_queue cmd_queue2 = NULL; + cl_command_queue cmd_queue3 = NULL; + cl_context context = NULL; + cl_program program[3] = { NULL, NULL, NULL }; + cl_program program_verify, program_verify2; + cl_context context2 = NULL; + + + VulkanDevice vkDevice; + uint32_t numBuffersList[] = { 1, 2, 4 }; + uint32_t bufferSizeList[] = { 4 * 1024, 64 * 1024, 2 * 1024 * 1024 }; + uint32_t bufferSizeListforOffset[] = { 256, 512, 1024 }; + + cl_context_properties contextProperties[] = { CL_CONTEXT_PLATFORM, 0, 0 }; + errNum = clGetPlatformIDs(1, &platform, NULL); + if (errNum != CL_SUCCESS) + { + print_error(errNum, "Error: Failed to get platform\n"); + goto CLEANUP; + } + + errNum = + clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 0, NULL, &num_devices); + if (CL_SUCCESS != errNum) + { + print_error(errNum, "clGetDeviceIDs failed in returning of devices\n"); + goto CLEANUP; + } + devices = (cl_device_id *)malloc(num_devices * sizeof(cl_device_id)); + if (NULL == devices) + { + errNum = CL_OUT_OF_HOST_MEMORY; + print_error(errNum, "Unable to allocate memory for devices\n"); + goto CLEANUP; + } + errNum = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, num_devices, devices, + NULL); + if (CL_SUCCESS != errNum) + { + print_error(errNum, "Failed to get deviceID.\n"); + goto CLEANUP; + } + contextProperties[1] = (cl_context_properties)platform; + log_info("Assigned contextproperties for platform\n"); + for (device_no = 0; device_no < num_devices; device_no++) + { + errNum = clGetDeviceInfo(devices[device_no], CL_DEVICE_EXTENSIONS, 0, + NULL, &extensionSize); + if (CL_SUCCESS != errNum) + { + print_error(errNum, + "Error in clGetDeviceInfo for getting device_extension " + "size....\n"); + goto CLEANUP; + } + extensions = (char *)malloc(extensionSize); + if (NULL == extensions) + { + print_error(errNum, "Unable to allocate memory for extensions\n"); + errNum = CL_OUT_OF_HOST_MEMORY; + goto CLEANUP; + } + errNum = clGetDeviceInfo(devices[device_no], CL_DEVICE_EXTENSIONS, + extensionSize, extensions, NULL); + if (CL_SUCCESS != errNum) + { + print_error(errNum, + "Error in clGetDeviceInfo for device_extension\n"); + goto CLEANUP; + } + errNum = clGetDeviceInfo(devices[device_no], CL_DEVICE_UUID_KHR, + CL_UUID_SIZE_KHR, uuid, &extensionSize); + if (CL_SUCCESS != errNum) + { + print_error(errNum, "clGetDeviceInfo failed\n"); + goto CLEANUP; + } + errNum = + memcmp(uuid, vkDevice.getPhysicalDevice().getUUID(), VK_UUID_SIZE); + if (errNum == 0) + { + break; + } + } + if (device_no >= num_devices) + { + errNum = EXIT_FAILURE; + print_error(errNum, + "OpenCL error: " + "No Vulkan-OpenCL Interop capable GPU found.\n"); + goto CLEANUP; + } + deviceId = devices[device_no]; + context = clCreateContextFromType(contextProperties, CL_DEVICE_TYPE_GPU, + NULL, NULL, &errNum); + if (CL_SUCCESS != errNum) + { + print_error(errNum, "error creating context\n"); + goto CLEANUP; + } + log_info("Successfully created context !!!\n"); + + cmd_queue1 = clCreateCommandQueue(context, devices[device_no], 0, &errNum); + if (CL_SUCCESS != errNum) + { + errNum = CL_INVALID_COMMAND_QUEUE; + print_error(errNum, "Error: Failed to create command queue!\n"); + goto CLEANUP; + } + cmd_queue2 = clCreateCommandQueue(context, devices[device_no], 0, &errNum); + if (CL_SUCCESS != errNum) + { + errNum = CL_INVALID_COMMAND_QUEUE; + print_error(errNum, "Error: Failed to create command queue!\n"); + goto CLEANUP; + } + log_info("clCreateCommandQueue successful\n"); + for (int i = 0; i < 3; i++) + { + program_source_length = strlen(program_source_const[i]); + program[i] = + clCreateProgramWithSource(context, 1, &program_source_const[i], + &program_source_length, &errNum); + errNum = clBuildProgram(program[i], 0, NULL, NULL, NULL, NULL); + if (errNum != CL_SUCCESS) + { + print_error(errNum, "Error: Failed to build program \n"); + return errNum; + } + // create the kernel + kernel[i] = clCreateKernel(program[i], "clUpdateBuffer", &errNum); + if (errNum != CL_SUCCESS) + { + print_error(errNum, "clCreateKernel failed \n"); + return errNum; + } + } + + program_source_const_verify = kernel_text_verify; + program_source_length = strlen(program_source_const_verify); + program_verify = + clCreateProgramWithSource(context, 1, &program_source_const_verify, + &program_source_length, &errNum); + errNum = clBuildProgram(program_verify, 0, NULL, NULL, NULL, NULL); + if (errNum != CL_SUCCESS) + { + log_error("Error: Failed to build program2\n"); + return errNum; + } + verify_kernel = clCreateKernel(program_verify, "checkKernel", &errNum); + if (errNum != CL_SUCCESS) + { + print_error(errNum, "clCreateKernel failed \n"); + return errNum; + } + + if (multiCtx) // different context guard + { + context2 = clCreateContextFromType( + contextProperties, CL_DEVICE_TYPE_GPU, NULL, NULL, &errNum); + if (CL_SUCCESS != errNum) + { + print_error(errNum, "error creating context\n"); + goto CLEANUP; + } + cmd_queue3 = + clCreateCommandQueue(context2, devices[device_no], 0, &errNum); + if (CL_SUCCESS != errNum) + { + errNum = CL_INVALID_COMMAND_QUEUE; + print_error(errNum, "Error: Failed to create command queue!\n"); + goto CLEANUP; + } + for (int i = 0; i < 3; i++) + { + program_source_length = strlen(program_source_const[i]); + program[i] = + clCreateProgramWithSource(context2, 1, &program_source_const[i], + &program_source_length, &errNum); + errNum = clBuildProgram(program[i], 0, NULL, NULL, NULL, NULL); + if (errNum != CL_SUCCESS) + { + print_error(errNum, "Error: Failed to build program \n"); + return errNum; + } + // create the kernel + kernel2[i] = clCreateKernel(program[i], "clUpdateBuffer", &errNum); + if (errNum != CL_SUCCESS) + { + print_error(errNum, "clCreateKernel failed \n"); + return errNum; + } + } + program_source_length = strlen(program_source_const_verify); + program_verify = + clCreateProgramWithSource(context2, 1, &program_source_const_verify, + &program_source_length, &errNum); + errNum = clBuildProgram(program_verify, 0, NULL, NULL, NULL, NULL); + if (errNum != CL_SUCCESS) + { + log_error("Error: Failed to build program2\n"); + return errNum; + } + verify_kernel2 = clCreateKernel(program_verify, "checkKernel", &errNum); + if (errNum != CL_SUCCESS) + { + print_error(errNum, "clCreateKernel failed \n"); + return errNum; + } + } + + for (size_t numBuffersIdx = 0; numBuffersIdx < ARRAY_SIZE(numBuffersList); + numBuffersIdx++) + { + uint32_t numBuffers = numBuffersList[numBuffersIdx]; + log_info("Number of buffers: %d\n", numBuffers); + for (size_t sizeIdx = 0; sizeIdx < ARRAY_SIZE(bufferSizeList); + sizeIdx++) + { + uint32_t bufferSize = bufferSizeList[sizeIdx]; + uint32_t bufferSizeForOffset = bufferSizeListforOffset[sizeIdx]; + log_info("&&&& RUNNING vulkan_opencl_buffer test for Buffer size: " + "%d\n", + bufferSize); + if (multiImport && !multiCtx) + { + errNum = run_test_with_multi_import_same_ctx( + context, cmd_queue1, kernel, verify_kernel, vkDevice, + numBuffers, bufferSize, bufferSizeForOffset); + } + else if (multiImport && multiCtx) + { + errNum = run_test_with_multi_import_diff_ctx( + context, context2, cmd_queue1, cmd_queue3, kernel, kernel2, + verify_kernel, verify_kernel2, vkDevice, numBuffers, + bufferSize, bufferSizeForOffset); + } + else if (numCQ == 2) + { + errNum = run_test_with_two_queue( + context, cmd_queue1, cmd_queue2, kernel, verify_kernel, + vkDevice, numBuffers + 1, bufferSize); + } + else + { + errNum = run_test_with_one_queue(context, cmd_queue1, kernel, + verify_kernel, vkDevice, + numBuffers, bufferSize); + } + if (errNum != CL_SUCCESS) + { + print_error(errNum, "func_name failed \n"); + goto CLEANUP; + } + } + } + +CLEANUP: + for (int i = 0; i < 3; i++) + { + if (program[i]) clReleaseProgram(program[i]); + if (kernel[i]) clReleaseKernel(kernel[i]); + } + if (cmd_queue1) clReleaseCommandQueue(cmd_queue1); + if (cmd_queue2) clReleaseCommandQueue(cmd_queue2); + if (cmd_queue3) clReleaseCommandQueue(cmd_queue3); + if (context) clReleaseContext(context); + if (context2) clReleaseContext(context2); + + if (devices) free(devices); + if (extensions) free(extensions); + + return errNum; +} diff --git a/test_conformance/vulkan/test_vulkan_interop_image.cpp b/test_conformance/vulkan/test_vulkan_interop_image.cpp new file mode 100644 index 00000000..f1d0af1f --- /dev/null +++ b/test_conformance/vulkan/test_vulkan_interop_image.cpp @@ -0,0 +1,1648 @@ +// +// Copyright (c) 2022 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#define NOMINMAX +#include +#include +#include "harness/errorHelpers.h" + +#define MAX_2D_IMAGES 5 +#define MAX_2D_IMAGE_WIDTH 1024 +#define MAX_2D_IMAGE_HEIGHT 1024 +#define MAX_2D_IMAGE_ELEMENT_SIZE 16 +#define MAX_2D_IMAGE_MIP_LEVELS 11 +#define MAX_2D_IMAGE_DESCRIPTORS MAX_2D_IMAGES *MAX_2D_IMAGE_MIP_LEVELS +#define GLSL_FORMAT_STRING "" +#define GLSL_TYPE_PREFIX_STRING "" +#define NUM_THREADS_PER_GROUP_X 32 +#define NUM_THREADS_PER_GROUP_Y 32 +#define NUM_BLOCKS(size, blockSize) \ + (ROUND_UP((size), (blockSize)) / (blockSize)) + +#define ASSERT(x) \ + if (!(x)) \ + { \ + fprintf(stderr, "Assertion \"%s\" failed at %s:%d\n", #x, __FILE__, \ + __LINE__); \ + exit(1); \ + } + +#define ASSERT_LEQ(x, y) \ + if (x > y) \ + { \ + ASSERT(0); \ + } + +namespace { +struct Params +{ + uint32_t numImage2DDescriptors; +}; +} +static cl_uchar uuid[CL_UUID_SIZE_KHR]; +static cl_device_id deviceId = NULL; + +static const char *vkImage2DShader = + "#version 450\n" + "#extension GL_ARB_separate_shader_objects : enable\n" + "#extension GL_NV_gpu_shader5 : enable\n" + "layout(binding = 0) buffer Params\n" + "{\n" + " uint32_t numImage2DDescriptors;\n" + "};\n" + "layout(binding = 1, " GLSL_FORMAT_STRING + ") uniform " GLSL_TYPE_PREFIX_STRING "image2D image2DList[" STRING( + MAX_2D_IMAGE_DESCRIPTORS) "];\n" + "layout(local_size_x = 32, local_size_y = " + "32) in;\n" + "void main() {\n" + " uvec3 numThreads = gl_NumWorkGroups * " + "gl_WorkGroupSize;\n" + " for (uint32_t image2DIdx = 0; " + "image2DIdx < numImage2DDescriptors; " + "image2DIdx++)" + " {\n" + " ivec2 imageDim = " + "imageSize(image2DList[image2DIdx]);\n" + " uint32_t heightBy2 = imageDim.y / " + "2;\n" + " for (uint32_t row = " + "gl_GlobalInvocationID.y; row < heightBy2; " + "row += numThreads.y)" + " {\n" + " for (uint32_t col = " + "gl_GlobalInvocationID.x; col < imageDim.x; " + "col += numThreads.x)" + " {\n" + " ivec2 coordsA = ivec2(col, " + "row);\n" + " ivec2 coordsB = ivec2(col, " + "imageDim.y - row - 1);\n" + " " GLSL_TYPE_PREFIX_STRING + "vec4 dataA = " + "imageLoad(image2DList[image2DIdx], " + "coordsA);\n" + " " GLSL_TYPE_PREFIX_STRING + "vec4 dataB = " + "imageLoad(image2DList[image2DIdx], " + "coordsB);\n" + " " + "imageStore(image2DList[image2DIdx], " + "coordsA, dataB);\n" + " " + "imageStore(image2DList[image2DIdx], " + "coordsB, dataA);\n" + " }\n" + " }\n" + " }\n" + "}\n"; + +const char *kernel_text_numImage_1 = " \ +__constant sampler_t smpImg = CLK_NORMALIZED_COORDS_FALSE|CLK_ADDRESS_NONE|CLK_FILTER_NEAREST;\n\ +__kernel void image2DKernel(read_only image2d_t InputImage, write_only image2d_t OutImage, int num2DImages, int baseWidth, int baseHeight, int numMipLevels)\n\ +{\n\ + int threadIdxX = get_global_id(0);\n\ + int threadIdxY = get_global_id(1);\n\ + int numThreadsX = get_global_size(0); \n\ + int numThreadsY = get_global_size(1);\n\ + if (threadIdxX >= baseWidth || threadIdxY >= baseHeight)\n\ + {\n\ + return;\n\ + }\n\ + %s dataA = read_image%s(InputImage, smpImg, (int2)(threadIdxX, threadIdxY)); \n\ + %s dataB = read_image%s(InputImage, smpImg, (int2)(threadIdxX, baseHeight-threadIdxY-1)); \n\ + write_image%s(OutImage, (int2)(threadIdxX, baseHeight-threadIdxY-1), dataA);\n\ + write_image%s(OutImage, (int2)( threadIdxX, threadIdxY), dataB);\n\ +\n\ +}"; + +const char *kernel_text_numImage_2 = " \ +__constant sampler_t smpImg = CLK_NORMALIZED_COORDS_FALSE|CLK_ADDRESS_NONE|CLK_FILTER_NEAREST;\n\ +__kernel void image2DKernel(read_only image2d_t InputImage_1, write_only image2d_t OutImage_1, read_only image2d_t InputImage_2,write_only image2d_t OutImage_2,int num2DImages, int baseWidth, int baseHeight, int numMipLevels) \n\ +{\n\ + int threadIdxX = get_global_id(0);\n\ + int threadIdxY = get_global_id(1);\n\ + int numThreadsX = get_global_size(0);\n\ + int numThreadsY = get_global_size(1);\n\ + if (threadIdxX >= baseWidth || threadIdxY >= baseHeight) \n\ + {\n\ + return;\n\ + }\n\ + %s dataA = read_image%s(InputImage_1, smpImg, (int2)(threadIdxX, threadIdxY)); \n\ + %s dataB = read_image%s(InputImage_1, smpImg, (int2)(threadIdxX, baseHeight-threadIdxY-1)); \n\ + %s dataC = read_image%s(InputImage_2, smpImg, (int2)(threadIdxX, threadIdxY)); \n\ + %s dataD = read_image%s(InputImage_2, smpImg, (int2)(threadIdxX, baseHeight-threadIdxY-1)); \n\ + write_image%s(OutImage_1, (int2)(threadIdxX, baseHeight-threadIdxY-1), dataA);\n\ + write_image%s(OutImage_1, (int2)(threadIdxX, threadIdxY), dataB);\n\ + write_image%s(OutImage_2, (int2)(threadIdxX, baseHeight-threadIdxY-1), dataC);\n\ + write_image%s(OutImage_2, (int2)(threadIdxX, threadIdxY), dataD);\n\ +\n\ +}"; + +const char *kernel_text_numImage_4 = " \ +__constant sampler_t smpImg = CLK_NORMALIZED_COORDS_FALSE|CLK_ADDRESS_NONE|CLK_FILTER_NEAREST;\n\ +__kernel void image2DKernel(read_only image2d_t InputImage_1, write_only image2d_t OutImage_1, read_only image2d_t InputImage_2, write_only image2d_t OutImage_2, read_only image2d_t InputImage_3, write_only image2d_t OutImage_3, read_only image2d_t InputImage_4, write_only image2d_t OutImage_4, int num2DImages, int baseWidth, int baseHeight, int numMipLevels) \n\ +{\n\ + int threadIdxX = get_global_id(0);\n\ + int threadIdxY = get_global_id(1);\n\ + int numThreadsX = get_global_size(0);\n\ + int numThreadsY = get_global_size(1);\n\ + if (threadIdxX >= baseWidth || threadIdxY >= baseHeight) \n\ + {\n\ + return;\n\ + }\n\ + %s dataA = read_image%s(InputImage_1, smpImg, (int2)(threadIdxX, threadIdxY)); \n\ + %s dataB = read_image%s(InputImage_1, smpImg, (int2)(threadIdxX, baseHeight-threadIdxY-1)); \n\ + %s dataC = read_image%s(InputImage_2, smpImg, (int2)(threadIdxX, threadIdxY)); \n\ + %s dataD = read_image%s(InputImage_2, smpImg, (int2)(threadIdxX, baseHeight-threadIdxY-1)); \n\ + %s dataE = read_image%s(InputImage_3, smpImg, (int2)(threadIdxX, threadIdxY)); \n\ + %s dataF = read_image%s(InputImage_3, smpImg, (int2)(threadIdxX, baseHeight-threadIdxY-1)); \n\ + %s dataG = read_image%s(InputImage_4, smpImg, (int2)(threadIdxX, threadIdxY)); \n\ + %s dataH = read_image%s(InputImage_4, smpImg, (int2)(threadIdxX, baseHeight-threadIdxY-1)); \n\ + write_image%s(OutImage_1, (int2)(threadIdxX, baseHeight-threadIdxY-1), dataA);\n\ + write_image%s(OutImage_1, (int2)(threadIdxX, threadIdxY), dataB);\n\ + write_image%s(OutImage_2, (int2)(threadIdxX, baseHeight-threadIdxY-1), dataC);\n\ + write_image%s(OutImage_2, (int2)(threadIdxX, threadIdxY), dataD);\n\ + write_image%s(OutImage_3, (int2)(threadIdxX, baseHeight-threadIdxY-1), dataE);\n\ + write_image%s(OutImage_3, (int2)(threadIdxX, threadIdxY), dataF);\n\ + write_image%s(OutImage_4, (int2)(threadIdxX, baseHeight-threadIdxY-1), dataG);\n\ + write_image%s(OutImage_4, (int2)(threadIdxX, threadIdxY), dataH);\n\ +\n\ +}"; + +const uint32_t num2DImagesList[] = { 1, 2, 4 }; +const uint32_t widthList[] = { 4, 64, 183, 1024 }; +const uint32_t heightList[] = { 4, 64, 365 }; + +const cl_kernel getKernelType(VulkanFormat format, cl_kernel kernel_float, + cl_kernel kernel_signed, + cl_kernel kernel_unsigned) +{ + cl_kernel kernel; + switch (format) + { + case VULKAN_FORMAT_R32G32B32A32_SFLOAT: kernel = kernel_float; break; + + case VULKAN_FORMAT_R32G32B32A32_UINT: kernel = kernel_unsigned; break; + + case VULKAN_FORMAT_R32G32B32A32_SINT: kernel = kernel_signed; break; + + case VULKAN_FORMAT_R16G16B16A16_UINT: kernel = kernel_unsigned; break; + + case VULKAN_FORMAT_R16G16B16A16_SINT: kernel = kernel_signed; break; + + case VULKAN_FORMAT_R8G8B8A8_UINT: kernel = kernel_unsigned; break; + + case VULKAN_FORMAT_R8G8B8A8_SINT: kernel = kernel_signed; break; + + case VULKAN_FORMAT_R32G32_SFLOAT: kernel = kernel_float; break; + + case VULKAN_FORMAT_R32G32_UINT: kernel = kernel_unsigned; break; + + case VULKAN_FORMAT_R32G32_SINT: kernel = kernel_signed; break; + + case VULKAN_FORMAT_R16G16_UINT: kernel = kernel_unsigned; break; + + case VULKAN_FORMAT_R16G16_SINT: kernel = kernel_signed; break; + + case VULKAN_FORMAT_R8G8_UINT: kernel = kernel_unsigned; break; + + case VULKAN_FORMAT_R8G8_SINT: kernel = kernel_signed; break; + + case VULKAN_FORMAT_R32_SFLOAT: kernel = kernel_float; break; + + case VULKAN_FORMAT_R32_UINT: kernel = kernel_unsigned; break; + + case VULKAN_FORMAT_R32_SINT: kernel = kernel_signed; break; + + case VULKAN_FORMAT_R16_UINT: kernel = kernel_unsigned; break; + + case VULKAN_FORMAT_R16_SINT: kernel = kernel_signed; break; + + case VULKAN_FORMAT_R8_UINT: kernel = kernel_unsigned; break; + + case VULKAN_FORMAT_R8_SINT: kernel = kernel_signed; break; + + default: + log_error(" Unsupported format"); + ASSERT(0); + break; + } + return kernel; +} + +int run_test_with_two_queue(cl_context &context, cl_command_queue &cmd_queue1, + cl_command_queue &cmd_queue2, + cl_kernel *kernel_unsigned, + cl_kernel *kernel_signed, cl_kernel *kernel_float, + VulkanDevice &vkDevice) +{ + cl_int err = CL_SUCCESS; + size_t origin[3] = { 0, 0, 0 }; + size_t region[3] = { 1, 1, 1 }; + + cl_kernel updateKernelCQ1, updateKernelCQ2; + std::vector vkFormatList = getSupportedVulkanFormatList(); + const std::vector + vkExternalMemoryHandleTypeList = + getSupportedVulkanExternalMemoryHandleTypeList(); + char magicValue = 0; + + VulkanBuffer vkParamsBuffer(vkDevice, sizeof(Params)); + VulkanDeviceMemory vkParamsDeviceMemory( + vkDevice, vkParamsBuffer.getSize(), + getVulkanMemoryType(vkDevice, + VULKAN_MEMORY_TYPE_PROPERTY_HOST_VISIBLE_COHERENT)); + vkParamsDeviceMemory.bindBuffer(vkParamsBuffer); + + uint64_t maxImage2DSize = MAX_2D_IMAGE_WIDTH * MAX_2D_IMAGE_HEIGHT + * MAX_2D_IMAGE_ELEMENT_SIZE * 2; + VulkanBuffer vkSrcBuffer(vkDevice, maxImage2DSize); + VulkanDeviceMemory vkSrcBufferDeviceMemory( + vkDevice, vkSrcBuffer.getSize(), + getVulkanMemoryType(vkDevice, + VULKAN_MEMORY_TYPE_PROPERTY_HOST_VISIBLE_COHERENT)); + vkSrcBufferDeviceMemory.bindBuffer(vkSrcBuffer); + + char *srcBufferPtr, *dstBufferPtr; + srcBufferPtr = (char *)malloc(maxImage2DSize); + dstBufferPtr = (char *)malloc(maxImage2DSize); + + VulkanDescriptorSetLayoutBindingList vkDescriptorSetLayoutBindingList( + VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, + VULKAN_DESCRIPTOR_TYPE_STORAGE_IMAGE, MAX_2D_IMAGE_DESCRIPTORS); + VulkanDescriptorSetLayout vkDescriptorSetLayout( + vkDevice, vkDescriptorSetLayoutBindingList); + VulkanPipelineLayout vkPipelineLayout(vkDevice, vkDescriptorSetLayout); + + VulkanDescriptorPool vkDescriptorPool(vkDevice, + vkDescriptorSetLayoutBindingList); + VulkanDescriptorSet vkDescriptorSet(vkDevice, vkDescriptorPool, + vkDescriptorSetLayout); + + VulkanCommandPool vkCommandPool(vkDevice); + VulkanCommandBuffer vkCopyCommandBuffer(vkDevice, vkCommandPool); + VulkanCommandBuffer vkShaderCommandBuffer(vkDevice, vkCommandPool); + VulkanQueue &vkQueue = vkDevice.getQueue(); + + VulkanExternalSemaphoreHandleType vkExternalSemaphoreHandleType = + getSupportedVulkanExternalSemaphoreHandleTypeList()[0]; + VulkanSemaphore vkVk2CLSemaphore(vkDevice, vkExternalSemaphoreHandleType); + VulkanSemaphore vkCl2VkSemaphore(vkDevice, vkExternalSemaphoreHandleType); + clExternalSemaphore *clVk2CLExternalSemaphore = NULL; + clExternalSemaphore *clCl2VkExternalSemaphore = NULL; + + clVk2CLExternalSemaphore = new clExternalSemaphore( + vkVk2CLSemaphore, context, vkExternalSemaphoreHandleType, deviceId); + clCl2VkExternalSemaphore = new clExternalSemaphore( + vkCl2VkSemaphore, context, vkExternalSemaphoreHandleType, deviceId); + + for (size_t fIdx = 0; fIdx < vkFormatList.size(); fIdx++) + { + VulkanFormat vkFormat = vkFormatList[fIdx]; + log_info("Format: %d\n", vkFormat); + uint32_t elementSize = getVulkanFormatElementSize(vkFormat); + ASSERT_LEQ(elementSize, (uint32_t)MAX_2D_IMAGE_ELEMENT_SIZE); + log_info("elementSize= %d\n", elementSize); + std::map patternToSubstituteMap; + patternToSubstituteMap[GLSL_FORMAT_STRING] = + getVulkanFormatGLSLFormat(vkFormat); + patternToSubstituteMap[GLSL_TYPE_PREFIX_STRING] = + getVulkanFormatGLSLTypePrefix(vkFormat); + + VulkanShaderModule vkImage2DShaderModule( + vkDevice, + prepareVulkanShader(vkImage2DShader, patternToSubstituteMap)); + VulkanComputePipeline vkComputePipeline(vkDevice, vkPipelineLayout, + vkImage2DShaderModule); + + for (size_t wIdx = 0; wIdx < ARRAY_SIZE(widthList); wIdx++) + { + uint32_t width = widthList[wIdx]; + log_info("Width: %d\n", width); + ASSERT_LEQ(width, (uint32_t)MAX_2D_IMAGE_WIDTH); + region[0] = width; + for (size_t hIdx = 0; hIdx < ARRAY_SIZE(heightList); hIdx++) + { + uint32_t height = heightList[hIdx]; + log_info("Height: %d", height); + ASSERT_LEQ(height, (uint32_t)MAX_2D_IMAGE_HEIGHT); + region[1] = height; + + uint32_t numMipLevels = 1; + log_info("Number of mipmap levels: %d\n", numMipLevels); + + magicValue++; + char *vkSrcBufferDeviceMemoryPtr = + (char *)vkSrcBufferDeviceMemory.map(); + uint64_t srcBufSize = 0; + memset(vkSrcBufferDeviceMemoryPtr, 0, maxImage2DSize); + memset(srcBufferPtr, 0, maxImage2DSize); + uint32_t mipLevel = 0; + for (uint32_t row = 0; + row < std::max(height >> mipLevel, uint32_t(1)); row++) + { + for (uint32_t col = 0; + col < std::max(width >> mipLevel, uint32_t(1)); col++) + { + for (uint32_t elementByte = 0; + elementByte < elementSize; elementByte++) + { + vkSrcBufferDeviceMemoryPtr[srcBufSize] = + (char)(magicValue + mipLevel + row + col); + srcBufferPtr[srcBufSize] = + (char)(magicValue + mipLevel + row + col); + srcBufSize++; + } + } + } + srcBufSize = ROUND_UP( + srcBufSize, + std::max( + elementSize, + (uint32_t)VULKAN_MIN_BUFFER_OFFSET_COPY_ALIGNMENT)); + vkSrcBufferDeviceMemory.unmap(); + + for (size_t niIdx = 0; niIdx < ARRAY_SIZE(num2DImagesList); + niIdx++) + { + uint32_t num2DImages = num2DImagesList[niIdx] + 1; + // added one image for cross-cq case for updateKernelCQ2 + log_info("Number of images: %d\n", num2DImages); + ASSERT_LEQ(num2DImages, (uint32_t)MAX_2D_IMAGES); + uint32_t num_2D_image; + if (useSingleImageKernel) + { + num_2D_image = 1; + } + else + { + num_2D_image = num2DImages; + } + Params *params = (Params *)vkParamsDeviceMemory.map(); + params->numImage2DDescriptors = num_2D_image * numMipLevels; + vkParamsDeviceMemory.unmap(); + vkDescriptorSet.update(0, vkParamsBuffer); + for (size_t emhtIdx = 0; + emhtIdx < vkExternalMemoryHandleTypeList.size(); + emhtIdx++) + { + VulkanExternalMemoryHandleType + vkExternalMemoryHandleType = + vkExternalMemoryHandleTypeList[emhtIdx]; + log_info("External memory handle type: %d \n", + vkExternalMemoryHandleType); + if ((true == disableNTHandleType) + && (VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_NT + == vkExternalMemoryHandleType)) + { + // Skip running for WIN32 NT handle. + continue; + } + VulkanImage2D vkDummyImage2D( + vkDevice, vkFormatList[0], widthList[0], + heightList[0], 1, vkExternalMemoryHandleType); + const VulkanMemoryTypeList &memoryTypeList = + vkDummyImage2D.getMemoryTypeList(); + + std::vector + vkNonDedicatedImage2DListDeviceMemory1; + std::vector + vkNonDedicatedImage2DListDeviceMemory2; + std::vector + nonDedicatedExternalMemory1; + std::vector + nonDedicatedExternalMemory2; + for (size_t mtIdx = 0; mtIdx < memoryTypeList.size(); + mtIdx++) + { + const VulkanMemoryType &memoryType = + memoryTypeList[mtIdx]; + log_info("Memory type index: %d\n", + (uint32_t)memoryType); + log_info("Memory type property: %d\n", + memoryType.getMemoryTypeProperty()); + if (!useDeviceLocal) + { + if (VULKAN_MEMORY_TYPE_PROPERTY_DEVICE_LOCAL + == memoryType.getMemoryTypeProperty()) + { + continue; + } + } + + size_t totalImageMemSize = 0; + uint64_t interImageOffset = 0; + { + VulkanImage2D vkImage2D( + vkDevice, vkFormat, width, height, + numMipLevels, vkExternalMemoryHandleType); + ASSERT_LEQ(vkImage2D.getSize(), maxImage2DSize); + totalImageMemSize = + ROUND_UP(vkImage2D.getSize(), + vkImage2D.getAlignment()); + } + VulkanImage2DList vkNonDedicatedImage2DList( + num2DImages, vkDevice, vkFormat, width, height, + numMipLevels, vkExternalMemoryHandleType); + for (size_t bIdx = 0; bIdx < num2DImages; bIdx++) + { + if (non_dedicated) + { + vkNonDedicatedImage2DListDeviceMemory1 + .push_back(new VulkanDeviceMemory( + vkDevice, totalImageMemSize, + memoryType, + vkExternalMemoryHandleType)); + } + else + { + vkNonDedicatedImage2DListDeviceMemory1 + .push_back(new VulkanDeviceMemory( + vkDevice, + vkNonDedicatedImage2DList[bIdx], + memoryType, + vkExternalMemoryHandleType)); + } + vkNonDedicatedImage2DListDeviceMemory1[bIdx] + ->bindImage(vkNonDedicatedImage2DList[bIdx], + 0); + nonDedicatedExternalMemory1.push_back( + new clExternalMemoryImage( + *vkNonDedicatedImage2DListDeviceMemory1 + [bIdx], + vkExternalMemoryHandleType, context, + totalImageMemSize, width, height, 0, + vkNonDedicatedImage2DList[bIdx], + deviceId)); + } + VulkanImageViewList vkNonDedicatedImage2DViewList( + vkDevice, vkNonDedicatedImage2DList); + VulkanImage2DList vkNonDedicatedImage2DList2( + num2DImages, vkDevice, vkFormat, width, height, + numMipLevels, vkExternalMemoryHandleType); + for (size_t bIdx = 0; bIdx < num2DImages; bIdx++) + { + if (non_dedicated) + { + vkNonDedicatedImage2DListDeviceMemory2 + .push_back(new VulkanDeviceMemory( + vkDevice, totalImageMemSize, + memoryType, + vkExternalMemoryHandleType)); + } + else + { + vkNonDedicatedImage2DListDeviceMemory2 + .push_back(new VulkanDeviceMemory( + vkDevice, + vkNonDedicatedImage2DList2[bIdx], + memoryType, + vkExternalMemoryHandleType)); + } + vkNonDedicatedImage2DListDeviceMemory2[bIdx] + ->bindImage( + vkNonDedicatedImage2DList2[bIdx], 0); + nonDedicatedExternalMemory2.push_back( + new clExternalMemoryImage( + *vkNonDedicatedImage2DListDeviceMemory2 + [bIdx], + vkExternalMemoryHandleType, context, + totalImageMemSize, width, height, 0, + vkNonDedicatedImage2DList2[bIdx], + deviceId)); + } + VulkanImageViewList vkDedicatedImage2DViewList( + vkDevice, vkNonDedicatedImage2DList2); + + cl_mem external_mem_image1[5]; + cl_mem external_mem_image2[5]; + for (int i = 0; i < num2DImages; i++) + { + external_mem_image1[i] = + nonDedicatedExternalMemory1[i] + ->getExternalMemoryImage(); + external_mem_image2[i] = + nonDedicatedExternalMemory2[i] + ->getExternalMemoryImage(); + } + VulkanImage2DList &vkImage2DList = + vkNonDedicatedImage2DList; + VulkanImageViewList &vkImage2DViewList = + vkNonDedicatedImage2DViewList; + + clCl2VkExternalSemaphore->signal(cmd_queue1); + if (!useSingleImageKernel) + { + for (size_t i2DIdx = 0; + i2DIdx < vkImage2DList.size(); i2DIdx++) + { + for (uint32_t mipLevel = 0; + mipLevel < numMipLevels; mipLevel++) + { + uint32_t i2DvIdx = + (uint32_t)(i2DIdx * numMipLevels) + + mipLevel; + vkDescriptorSet.update( + 1 + i2DvIdx, + vkImage2DViewList[i2DvIdx]); + } + } + vkCopyCommandBuffer.begin(); + vkCopyCommandBuffer.pipelineBarrier( + vkImage2DList, + VULKAN_IMAGE_LAYOUT_UNDEFINED, + VULKAN_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); + for (size_t i2DIdx = 0; + i2DIdx < vkImage2DList.size(); i2DIdx++) + { + vkCopyCommandBuffer.copyBufferToImage( + vkSrcBuffer, vkImage2DList[i2DIdx], + VULKAN_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); + } + vkCopyCommandBuffer.pipelineBarrier( + vkImage2DList, + VULKAN_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + VULKAN_IMAGE_LAYOUT_GENERAL); + vkCopyCommandBuffer.end(); + memset(dstBufferPtr, 0, srcBufSize); + vkQueue.submit(vkCopyCommandBuffer); + vkShaderCommandBuffer.begin(); + vkShaderCommandBuffer.bindPipeline( + vkComputePipeline); + vkShaderCommandBuffer.bindDescriptorSets( + vkComputePipeline, vkPipelineLayout, + vkDescriptorSet); + vkShaderCommandBuffer.dispatch( + NUM_BLOCKS(width, NUM_THREADS_PER_GROUP_X), + NUM_BLOCKS(height, + NUM_THREADS_PER_GROUP_Y / 2), + 1); + vkShaderCommandBuffer.end(); + } + for (uint32_t iter = 0; iter < innerIterations; + iter++) + { + if (useSingleImageKernel) + { + for (size_t i2DIdx = 0; + i2DIdx < vkImage2DList.size(); + i2DIdx++) + { + vkDescriptorSet.update( + 1, vkImage2DViewList[i2DIdx]); + vkCopyCommandBuffer.begin(); + vkCopyCommandBuffer.pipelineBarrier( + vkImage2DList, + VULKAN_IMAGE_LAYOUT_UNDEFINED, + VULKAN_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); + + vkCopyCommandBuffer.copyBufferToImage( + vkSrcBuffer, vkImage2DList[i2DIdx], + VULKAN_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); + vkCopyCommandBuffer.pipelineBarrier( + vkImage2DList, + VULKAN_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + VULKAN_IMAGE_LAYOUT_GENERAL); + vkCopyCommandBuffer.end(); + memset(dstBufferPtr, 0, srcBufSize); + vkQueue.submit(vkCopyCommandBuffer); + vkShaderCommandBuffer.begin(); + vkShaderCommandBuffer.bindPipeline( + vkComputePipeline); + vkShaderCommandBuffer + .bindDescriptorSets( + vkComputePipeline, + vkPipelineLayout, + vkDescriptorSet); + vkShaderCommandBuffer.dispatch( + NUM_BLOCKS(width, + NUM_THREADS_PER_GROUP_X), + NUM_BLOCKS(height, + NUM_THREADS_PER_GROUP_Y + / 2), + 1); + vkShaderCommandBuffer.end(); + if (i2DIdx < vkImage2DList.size() - 1) + { + vkQueue.submit( + vkShaderCommandBuffer); + } + } + } + vkQueue.submit(vkCl2VkSemaphore, + vkShaderCommandBuffer, + vkVk2CLSemaphore); + clVk2CLExternalSemaphore->wait(cmd_queue1); + switch (num2DImages) + { + case 2: + updateKernelCQ1 = getKernelType( + vkFormat, kernel_float[0], + kernel_signed[0], + kernel_unsigned[0]); + break; + case 3: + updateKernelCQ1 = getKernelType( + vkFormat, kernel_float[1], + kernel_signed[1], + kernel_unsigned[1]); + break; + case 5: + updateKernelCQ1 = getKernelType( + vkFormat, kernel_float[2], + kernel_signed[2], + kernel_unsigned[2]); + break; + } + updateKernelCQ2 = getKernelType( + vkFormat, kernel_float[3], kernel_signed[3], + kernel_unsigned[3]); + // similar kernel-type based on vkFormat + int j = 0; + // Setting arguments of updateKernelCQ2 + + err = clSetKernelArg(updateKernelCQ2, 0, + sizeof(cl_mem), + &external_mem_image1[0]); + err |= clSetKernelArg(updateKernelCQ2, 1, + sizeof(cl_mem), + &external_mem_image2[0]); + err |= clSetKernelArg( + updateKernelCQ2, 2, sizeof(cl_mem), + &external_mem_image1[num2DImages - 1]); + err |= clSetKernelArg( + updateKernelCQ2, 3, sizeof(cl_mem), + &external_mem_image2[num2DImages - 1]); + err |= clSetKernelArg(updateKernelCQ2, 4, + sizeof(unsigned int), + &num2DImages); + err |= clSetKernelArg(updateKernelCQ2, 5, + sizeof(unsigned int), + &width); + err |= clSetKernelArg(updateKernelCQ2, 6, + sizeof(unsigned int), + &height); + err |= clSetKernelArg(updateKernelCQ2, 7, + sizeof(unsigned int), + &numMipLevels); + for (int i = 0; i < num2DImages - 1; i++, ++j) + { + err = clSetKernelArg( + updateKernelCQ1, j, sizeof(cl_mem), + &external_mem_image1[i]); + err |= clSetKernelArg( + updateKernelCQ1, ++j, sizeof(cl_mem), + &external_mem_image2[i]); + } + err |= clSetKernelArg(updateKernelCQ1, j, + sizeof(unsigned int), + &num2DImages); + err |= clSetKernelArg(updateKernelCQ1, ++j, + sizeof(unsigned int), + &width); + err |= clSetKernelArg(updateKernelCQ1, ++j, + sizeof(unsigned int), + &height); + err |= clSetKernelArg(updateKernelCQ1, ++j, + sizeof(unsigned int), + &numMipLevels); + + if (err != CL_SUCCESS) + { + print_error( + err, + "Error: Failed to set arg values \n"); + goto CLEANUP; + } + // clVk2CLExternalSemaphore->wait(cmd_queue1); + size_t global_work_size[3] = { width, height, + 1 }; + cl_event first_launch; + err = clEnqueueNDRangeKernel( + cmd_queue1, updateKernelCQ1, 2, NULL, + global_work_size, NULL, 0, NULL, + &first_launch); + if (err != CL_SUCCESS) + { + goto CLEANUP; + } + err = clEnqueueNDRangeKernel( + cmd_queue2, updateKernelCQ2, 2, NULL, + global_work_size, NULL, 1, &first_launch, + NULL); + if (err != CL_SUCCESS) + { + goto CLEANUP; + } + + clFinish(cmd_queue2); + clCl2VkExternalSemaphore->signal(cmd_queue2); + } + + unsigned int flags = 0; + size_t mipmapLevelOffset = 0; + cl_event eventReadImage = NULL; + clFinish(cmd_queue2); + for (int i = 0; i < num2DImages; i++) + { + err = clEnqueueReadImage( + cmd_queue1, external_mem_image2[i], CL_TRUE, + origin, region, 0, 0, dstBufferPtr, 0, NULL, + &eventReadImage); + + if (err != CL_SUCCESS) + { + print_error(err, + "clEnqueueReadImage failed with" + "error\n"); + } + + if (memcmp(srcBufferPtr, dstBufferPtr, + srcBufSize)) + { + log_info("Source and destination buffers " + "don't match\n"); + if (debug_trace) + { + log_info("Source buffer contents: \n"); + for (uint64_t sIdx = 0; + sIdx < srcBufSize; sIdx++) + { + log_info( + "%d ", + (int)vkSrcBufferDeviceMemoryPtr + [sIdx]); + } + log_info("Destination buffer contents:" + "\n"); + for (uint64_t dIdx = 0; + dIdx < srcBufSize; dIdx++) + { + log_info("%d ", + (int)dstBufferPtr[dIdx]); + } + } + err = -1; + break; + } + } + for (int i = 0; i < num2DImages; i++) + { + delete vkNonDedicatedImage2DListDeviceMemory1 + [i]; + delete vkNonDedicatedImage2DListDeviceMemory2 + [i]; + delete nonDedicatedExternalMemory1[i]; + delete nonDedicatedExternalMemory2[i]; + } + vkNonDedicatedImage2DListDeviceMemory1.erase( + vkNonDedicatedImage2DListDeviceMemory1.begin(), + vkNonDedicatedImage2DListDeviceMemory1.begin() + + num2DImages); + vkNonDedicatedImage2DListDeviceMemory2.erase( + vkNonDedicatedImage2DListDeviceMemory2.begin(), + vkNonDedicatedImage2DListDeviceMemory2.begin() + + num2DImages); + nonDedicatedExternalMemory1.erase( + nonDedicatedExternalMemory1.begin(), + nonDedicatedExternalMemory1.begin() + + num2DImages); + nonDedicatedExternalMemory2.erase( + nonDedicatedExternalMemory2.begin(), + nonDedicatedExternalMemory2.begin() + + num2DImages); + if (CL_SUCCESS != err) + { + goto CLEANUP; + } + } + } + } + } + } + } +CLEANUP: + if (clVk2CLExternalSemaphore) delete clVk2CLExternalSemaphore; + if (clCl2VkExternalSemaphore) delete clCl2VkExternalSemaphore; + + if (srcBufferPtr) free(srcBufferPtr); + if (dstBufferPtr) free(dstBufferPtr); + return err; +} + +int run_test_with_one_queue(cl_context &context, cl_command_queue &cmd_queue1, + cl_kernel *kernel_unsigned, + cl_kernel *kernel_signed, cl_kernel *kernel_float, + VulkanDevice &vkDevice) +{ + cl_int err = CL_SUCCESS; + size_t origin[3] = { 0, 0, 0 }; + size_t region[3] = { 1, 1, 1 }; + cl_kernel updateKernelCQ1; + std::vector vkFormatList = getSupportedVulkanFormatList(); + const std::vector + vkExternalMemoryHandleTypeList = + getSupportedVulkanExternalMemoryHandleTypeList(); + char magicValue = 0; + + VulkanBuffer vkParamsBuffer(vkDevice, sizeof(Params)); + VulkanDeviceMemory vkParamsDeviceMemory( + vkDevice, vkParamsBuffer.getSize(), + getVulkanMemoryType(vkDevice, + VULKAN_MEMORY_TYPE_PROPERTY_HOST_VISIBLE_COHERENT)); + vkParamsDeviceMemory.bindBuffer(vkParamsBuffer); + + uint64_t maxImage2DSize = MAX_2D_IMAGE_WIDTH * MAX_2D_IMAGE_HEIGHT + * MAX_2D_IMAGE_ELEMENT_SIZE * 2; + VulkanBuffer vkSrcBuffer(vkDevice, maxImage2DSize); + VulkanDeviceMemory vkSrcBufferDeviceMemory( + vkDevice, vkSrcBuffer.getSize(), + getVulkanMemoryType(vkDevice, + VULKAN_MEMORY_TYPE_PROPERTY_HOST_VISIBLE_COHERENT)); + vkSrcBufferDeviceMemory.bindBuffer(vkSrcBuffer); + + char *srcBufferPtr, *dstBufferPtr; + srcBufferPtr = (char *)malloc(maxImage2DSize); + dstBufferPtr = (char *)malloc(maxImage2DSize); + + VulkanDescriptorSetLayoutBindingList vkDescriptorSetLayoutBindingList( + VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, + VULKAN_DESCRIPTOR_TYPE_STORAGE_IMAGE, MAX_2D_IMAGE_DESCRIPTORS); + VulkanDescriptorSetLayout vkDescriptorSetLayout( + vkDevice, vkDescriptorSetLayoutBindingList); + VulkanPipelineLayout vkPipelineLayout(vkDevice, vkDescriptorSetLayout); + + VulkanDescriptorPool vkDescriptorPool(vkDevice, + vkDescriptorSetLayoutBindingList); + VulkanDescriptorSet vkDescriptorSet(vkDevice, vkDescriptorPool, + vkDescriptorSetLayout); + + VulkanCommandPool vkCommandPool(vkDevice); + VulkanCommandBuffer vkCopyCommandBuffer(vkDevice, vkCommandPool); + VulkanCommandBuffer vkShaderCommandBuffer(vkDevice, vkCommandPool); + VulkanQueue &vkQueue = vkDevice.getQueue(); + + VulkanExternalSemaphoreHandleType vkExternalSemaphoreHandleType = + getSupportedVulkanExternalSemaphoreHandleTypeList()[0]; + VulkanSemaphore vkVk2CLSemaphore(vkDevice, vkExternalSemaphoreHandleType); + VulkanSemaphore vkCl2VkSemaphore(vkDevice, vkExternalSemaphoreHandleType); + clExternalSemaphore *clVk2CLExternalSemaphore = NULL; + clExternalSemaphore *clCl2VkExternalSemaphore = NULL; + + clVk2CLExternalSemaphore = new clExternalSemaphore( + vkVk2CLSemaphore, context, vkExternalSemaphoreHandleType, deviceId); + clCl2VkExternalSemaphore = new clExternalSemaphore( + vkCl2VkSemaphore, context, vkExternalSemaphoreHandleType, deviceId); + + for (size_t fIdx = 0; fIdx < vkFormatList.size(); fIdx++) + { + VulkanFormat vkFormat = vkFormatList[fIdx]; + log_info("Format: %d\n", vkFormat); + uint32_t elementSize = getVulkanFormatElementSize(vkFormat); + ASSERT_LEQ(elementSize, (uint32_t)MAX_2D_IMAGE_ELEMENT_SIZE); + log_info("elementSize= %d\n", elementSize); + std::map patternToSubstituteMap; + patternToSubstituteMap[GLSL_FORMAT_STRING] = + getVulkanFormatGLSLFormat(vkFormat); + patternToSubstituteMap[GLSL_TYPE_PREFIX_STRING] = + getVulkanFormatGLSLTypePrefix(vkFormat); + + VulkanShaderModule vkImage2DShaderModule( + vkDevice, + prepareVulkanShader(vkImage2DShader, patternToSubstituteMap)); + VulkanComputePipeline vkComputePipeline(vkDevice, vkPipelineLayout, + vkImage2DShaderModule); + + for (size_t wIdx = 0; wIdx < ARRAY_SIZE(widthList); wIdx++) + { + uint32_t width = widthList[wIdx]; + log_info("Width: %d\n", width); + ASSERT_LEQ(width, (uint32_t)MAX_2D_IMAGE_WIDTH); + region[0] = width; + for (size_t hIdx = 0; hIdx < ARRAY_SIZE(heightList); hIdx++) + { + uint32_t height = heightList[hIdx]; + log_info("Height: %d\n", height); + ASSERT_LEQ(height, (uint32_t)MAX_2D_IMAGE_HEIGHT); + region[1] = height; + + uint32_t numMipLevels = 1; + log_info("Number of mipmap levels: %d\n", numMipLevels); + + magicValue++; + char *vkSrcBufferDeviceMemoryPtr = + (char *)vkSrcBufferDeviceMemory.map(); + uint64_t srcBufSize = 0; + memset(vkSrcBufferDeviceMemoryPtr, 0, maxImage2DSize); + memset(srcBufferPtr, 0, maxImage2DSize); + uint32_t mipLevel = 0; + for (uint32_t row = 0; + row < std::max(height >> mipLevel, uint32_t(1)); row++) + { + for (uint32_t col = 0; + col < std::max(width >> mipLevel, uint32_t(1)); col++) + { + for (uint32_t elementByte = 0; + elementByte < elementSize; elementByte++) + { + vkSrcBufferDeviceMemoryPtr[srcBufSize] = + (char)(magicValue + mipLevel + row + col); + srcBufferPtr[srcBufSize] = + (char)(magicValue + mipLevel + row + col); + srcBufSize++; + } + } + } + srcBufSize = ROUND_UP( + srcBufSize, + std::max( + elementSize, + (uint32_t)VULKAN_MIN_BUFFER_OFFSET_COPY_ALIGNMENT)); + vkSrcBufferDeviceMemory.unmap(); + + for (size_t niIdx = 0; niIdx < ARRAY_SIZE(num2DImagesList); + niIdx++) + { + uint32_t num2DImages = num2DImagesList[niIdx]; + log_info("Number of images: %d\n", num2DImages); + ASSERT_LEQ(num2DImages, (uint32_t)MAX_2D_IMAGES); + + Params *params = (Params *)vkParamsDeviceMemory.map(); + uint32_t num_2D_image; + if (useSingleImageKernel) + { + num_2D_image = 1; + } + else + { + num_2D_image = num2DImages; + } + params->numImage2DDescriptors = num_2D_image * numMipLevels; + vkParamsDeviceMemory.unmap(); + vkDescriptorSet.update(0, vkParamsBuffer); + for (size_t emhtIdx = 0; + emhtIdx < vkExternalMemoryHandleTypeList.size(); + emhtIdx++) + { + VulkanExternalMemoryHandleType + vkExternalMemoryHandleType = + vkExternalMemoryHandleTypeList[emhtIdx]; + log_info("External memory handle type: %d \n", + vkExternalMemoryHandleType); + if ((true == disableNTHandleType) + && (VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_NT + == vkExternalMemoryHandleType)) + { + // Skip running for WIN32 NT handle. + continue; + } + VulkanImage2D vkDummyImage2D( + vkDevice, vkFormatList[0], widthList[0], + heightList[0], 1, vkExternalMemoryHandleType); + const VulkanMemoryTypeList &memoryTypeList = + vkDummyImage2D.getMemoryTypeList(); + + std::vector + vkNonDedicatedImage2DListDeviceMemory1; + std::vector + vkNonDedicatedImage2DListDeviceMemory2; + std::vector + nonDedicatedExternalMemory1; + std::vector + nonDedicatedExternalMemory2; + for (size_t mtIdx = 0; mtIdx < memoryTypeList.size(); + mtIdx++) + { + const VulkanMemoryType &memoryType = + memoryTypeList[mtIdx]; + log_info("Memory type index: %d\n", + (uint32_t)memoryType); + log_info("Memory type property: %d\n", + memoryType.getMemoryTypeProperty()); + if (!useDeviceLocal) + { + if (VULKAN_MEMORY_TYPE_PROPERTY_DEVICE_LOCAL + == memoryType.getMemoryTypeProperty()) + { + continue; + } + } + size_t totalImageMemSize = 0; + uint64_t interImageOffset = 0; + { + VulkanImage2D vkImage2D( + vkDevice, vkFormat, width, height, + numMipLevels, vkExternalMemoryHandleType); + ASSERT_LEQ(vkImage2D.getSize(), maxImage2DSize); + totalImageMemSize = + ROUND_UP(vkImage2D.getSize(), + vkImage2D.getAlignment()); + } + VulkanImage2DList vkNonDedicatedImage2DList( + num2DImages, vkDevice, vkFormat, width, height, + numMipLevels, vkExternalMemoryHandleType); + for (size_t bIdx = 0; + bIdx < vkNonDedicatedImage2DList.size(); + bIdx++) + { + // Create list of Vulkan device memories and + // bind the list of Vulkan images. + vkNonDedicatedImage2DListDeviceMemory1 + .push_back(new VulkanDeviceMemory( + vkDevice, totalImageMemSize, memoryType, + vkExternalMemoryHandleType)); + vkNonDedicatedImage2DListDeviceMemory1[bIdx] + ->bindImage(vkNonDedicatedImage2DList[bIdx], + 0); + nonDedicatedExternalMemory1.push_back( + new clExternalMemoryImage( + *vkNonDedicatedImage2DListDeviceMemory1 + [bIdx], + vkExternalMemoryHandleType, context, + totalImageMemSize, width, height, 0, + vkNonDedicatedImage2DList[bIdx], + deviceId)); + } + VulkanImageViewList vkNonDedicatedImage2DViewList( + vkDevice, vkNonDedicatedImage2DList); + + VulkanImage2DList vkNonDedicatedImage2DList2( + num2DImages, vkDevice, vkFormat, width, height, + numMipLevels, vkExternalMemoryHandleType); + for (size_t bIdx = 0; + bIdx < vkNonDedicatedImage2DList2.size(); + bIdx++) + { + vkNonDedicatedImage2DListDeviceMemory2 + .push_back(new VulkanDeviceMemory( + vkDevice, totalImageMemSize, memoryType, + vkExternalMemoryHandleType)); + vkNonDedicatedImage2DListDeviceMemory2[bIdx] + ->bindImage( + vkNonDedicatedImage2DList2[bIdx], 0); + nonDedicatedExternalMemory2.push_back( + new clExternalMemoryImage( + *vkNonDedicatedImage2DListDeviceMemory2 + [bIdx], + vkExternalMemoryHandleType, context, + totalImageMemSize, width, height, 0, + vkNonDedicatedImage2DList2[bIdx], + deviceId)); + } + VulkanImageViewList vkDedicatedImage2DViewList( + vkDevice, vkNonDedicatedImage2DList2); + cl_mem external_mem_image1[4]; + cl_mem external_mem_image2[4]; + for (int i = 0; i < num2DImages; i++) + { + external_mem_image1[i] = + nonDedicatedExternalMemory1[i] + ->getExternalMemoryImage(); + external_mem_image2[i] = + nonDedicatedExternalMemory2[i] + ->getExternalMemoryImage(); + } + VulkanImage2DList &vkImage2DList = + vkNonDedicatedImage2DList; + VulkanImageViewList &vkImage2DViewList = + vkNonDedicatedImage2DViewList; + + clCl2VkExternalSemaphore->signal(cmd_queue1); + if (!useSingleImageKernel) + { + for (size_t i2DIdx = 0; + i2DIdx < vkImage2DList.size(); i2DIdx++) + { + for (uint32_t mipLevel = 0; + mipLevel < numMipLevels; mipLevel++) + { + uint32_t i2DvIdx = + (uint32_t)(i2DIdx * numMipLevels) + + mipLevel; + vkDescriptorSet.update( + 1 + i2DvIdx, + vkImage2DViewList[i2DvIdx]); + } + } + vkCopyCommandBuffer.begin(); + vkCopyCommandBuffer.pipelineBarrier( + vkImage2DList, + VULKAN_IMAGE_LAYOUT_UNDEFINED, + VULKAN_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); + for (size_t i2DIdx = 0; + i2DIdx < vkImage2DList.size(); i2DIdx++) + { + vkCopyCommandBuffer.copyBufferToImage( + vkSrcBuffer, vkImage2DList[i2DIdx], + VULKAN_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); + } + vkCopyCommandBuffer.pipelineBarrier( + vkImage2DList, + VULKAN_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + VULKAN_IMAGE_LAYOUT_GENERAL); + vkCopyCommandBuffer.end(); + memset(dstBufferPtr, 0, srcBufSize); + vkQueue.submit(vkCopyCommandBuffer); + vkShaderCommandBuffer.begin(); + vkShaderCommandBuffer.bindPipeline( + vkComputePipeline); + vkShaderCommandBuffer.bindDescriptorSets( + vkComputePipeline, vkPipelineLayout, + vkDescriptorSet); + vkShaderCommandBuffer.dispatch( + NUM_BLOCKS(width, NUM_THREADS_PER_GROUP_X), + NUM_BLOCKS(height, + NUM_THREADS_PER_GROUP_Y / 2), + 1); + vkShaderCommandBuffer.end(); + } + for (uint32_t iter = 0; iter < innerIterations; + iter++) + { + if (useSingleImageKernel) + { + for (size_t i2DIdx = 0; + i2DIdx < vkImage2DList.size(); + i2DIdx++) + { + vkDescriptorSet.update( + 1, vkImage2DViewList[i2DIdx]); + vkCopyCommandBuffer.begin(); + vkCopyCommandBuffer.pipelineBarrier( + vkImage2DList, + VULKAN_IMAGE_LAYOUT_UNDEFINED, + VULKAN_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); + + vkCopyCommandBuffer.copyBufferToImage( + vkSrcBuffer, vkImage2DList[i2DIdx], + VULKAN_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); + vkCopyCommandBuffer.pipelineBarrier( + vkImage2DList, + VULKAN_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + VULKAN_IMAGE_LAYOUT_GENERAL); + vkCopyCommandBuffer.end(); + memset(dstBufferPtr, 0, srcBufSize); + vkQueue.submit(vkCopyCommandBuffer); + vkShaderCommandBuffer.begin(); + vkShaderCommandBuffer.bindPipeline( + vkComputePipeline); + vkShaderCommandBuffer + .bindDescriptorSets( + vkComputePipeline, + vkPipelineLayout, + vkDescriptorSet); + vkShaderCommandBuffer.dispatch( + NUM_BLOCKS(width, + NUM_THREADS_PER_GROUP_X), + NUM_BLOCKS(height, + NUM_THREADS_PER_GROUP_Y + / 2), + 1); + vkShaderCommandBuffer.end(); + if (i2DIdx < vkImage2DList.size() - 1) + { + vkQueue.submit( + vkShaderCommandBuffer); + } + } + } + vkQueue.submit(vkCl2VkSemaphore, + vkShaderCommandBuffer, + vkVk2CLSemaphore); + clVk2CLExternalSemaphore->wait(cmd_queue1); + switch (num2DImages) + { + case 1: + updateKernelCQ1 = getKernelType( + vkFormat, kernel_float[0], + kernel_signed[0], + kernel_unsigned[0]); + break; + case 2: + updateKernelCQ1 = getKernelType( + vkFormat, kernel_float[1], + kernel_signed[1], + kernel_unsigned[1]); + break; + case 4: + updateKernelCQ1 = getKernelType( + vkFormat, kernel_float[2], + kernel_signed[2], + kernel_unsigned[2]); + break; + } + int j = 0; + for (int i = 0; i < num2DImages; i++, ++j) + { + err = clSetKernelArg( + updateKernelCQ1, j, sizeof(cl_mem), + &external_mem_image1[i]); + err |= clSetKernelArg( + updateKernelCQ1, ++j, sizeof(cl_mem), + &external_mem_image2[i]); + } + err |= clSetKernelArg(updateKernelCQ1, j, + sizeof(unsigned int), + &num2DImages); + err |= clSetKernelArg(updateKernelCQ1, ++j, + sizeof(unsigned int), + &width); + err |= clSetKernelArg(updateKernelCQ1, ++j, + sizeof(unsigned int), + &height); + err |= clSetKernelArg(updateKernelCQ1, ++j, + sizeof(unsigned int), + &numMipLevels); + + if (err != CL_SUCCESS) + { + print_error(err, + "Error: Failed to set arg " + "values for kernel-1\n"); + goto CLEANUP; + } + + size_t global_work_size[3] = { width, height, + 1 }; + err = clEnqueueNDRangeKernel( + cmd_queue1, updateKernelCQ1, 2, NULL, + global_work_size, NULL, 0, NULL, NULL); + if (err != CL_SUCCESS) + { + goto CLEANUP; + } + clCl2VkExternalSemaphore->signal(cmd_queue1); + } + + unsigned int flags = 0; + size_t mipmapLevelOffset = 0; + cl_event eventReadImage = NULL; + for (int i = 0; i < num2DImages; i++) + { + err = clEnqueueReadImage( + cmd_queue1, external_mem_image2[i], CL_TRUE, + origin, region, 0, 0, dstBufferPtr, 0, NULL, + &eventReadImage); + + if (err != CL_SUCCESS) + { + print_error(err, + "clEnqueueReadImage failed with" + "error\n"); + } + + if (memcmp(srcBufferPtr, dstBufferPtr, + srcBufSize)) + { + log_info("Source and destination buffers " + "don't match\n"); + if (debug_trace) + { + log_info("Source buffer contents: \n"); + for (uint64_t sIdx = 0; + sIdx < srcBufSize; sIdx++) + { + log_info( + "%d", + (int)vkSrcBufferDeviceMemoryPtr + [sIdx]); + } + log_info( + "Destination buffer contents:"); + for (uint64_t dIdx = 0; + dIdx < srcBufSize; dIdx++) + { + log_info("%d", + (int)dstBufferPtr[dIdx]); + } + } + err = -1; + break; + } + } + for (int i = 0; i < num2DImages; i++) + { + delete vkNonDedicatedImage2DListDeviceMemory1 + [i]; + delete vkNonDedicatedImage2DListDeviceMemory2 + [i]; + delete nonDedicatedExternalMemory1[i]; + delete nonDedicatedExternalMemory2[i]; + } + vkNonDedicatedImage2DListDeviceMemory1.erase( + vkNonDedicatedImage2DListDeviceMemory1.begin(), + vkNonDedicatedImage2DListDeviceMemory1.begin() + + num2DImages); + vkNonDedicatedImage2DListDeviceMemory2.erase( + vkNonDedicatedImage2DListDeviceMemory2.begin(), + vkNonDedicatedImage2DListDeviceMemory2.begin() + + num2DImages); + nonDedicatedExternalMemory1.erase( + nonDedicatedExternalMemory1.begin(), + nonDedicatedExternalMemory1.begin() + + num2DImages); + nonDedicatedExternalMemory2.erase( + nonDedicatedExternalMemory2.begin(), + nonDedicatedExternalMemory2.begin() + + num2DImages); + if (CL_SUCCESS != err) + { + goto CLEANUP; + } + } + } + } + } + } + } +CLEANUP: + if (clVk2CLExternalSemaphore) delete clVk2CLExternalSemaphore; + if (clCl2VkExternalSemaphore) delete clCl2VkExternalSemaphore; + + if (srcBufferPtr) free(srcBufferPtr); + if (dstBufferPtr) free(dstBufferPtr); + return err; +} + +int test_image_common(cl_device_id device_, cl_context context_, + cl_command_queue queue_, int numElements_) +{ + int current_device = 0; + int device_count = 0; + int devices_prohibited = 0; + cl_int err = CL_SUCCESS; + cl_platform_id platform = NULL; + size_t extensionSize = 0; + cl_uint num_devices = 0; + cl_uint device_no = 0; + cl_device_id *devices; + char *extensions = NULL; + const char *program_source_const; + cl_command_queue cmd_queue1 = NULL; + cl_command_queue cmd_queue2 = NULL; + cl_context context = NULL; + const uint32_t num_kernels = ARRAY_SIZE(num2DImagesList) + 1; + // One kernel for Cross-CQ case + const uint32_t num_kernel_types = 3; + const char *kernel_source[num_kernels] = { kernel_text_numImage_1, + kernel_text_numImage_2, + kernel_text_numImage_4 }; + char source_1[4096]; + char source_2[4096]; + char source_3[4096]; + size_t program_source_length; + cl_program program[num_kernel_types]; + cl_kernel kernel_float[num_kernels] = { NULL, NULL, NULL, NULL }; + cl_kernel kernel_signed[num_kernels] = { NULL, NULL, NULL, NULL }; + cl_kernel kernel_unsigned[num_kernels] = { NULL, NULL, NULL, NULL }; + cl_mem external_mem_image1; + cl_mem external_mem_image2; + + VulkanDevice vkDevice; + + cl_context_properties contextProperties[] = { CL_CONTEXT_PLATFORM, 0, 0 }; + // get the platform ID + err = clGetPlatformIDs(1, &platform, NULL); + if (err != CL_SUCCESS) + { + print_error(err, "Error: Failed to get platform\n"); + goto CLEANUP; + } + + err = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 0, NULL, &num_devices); + if (CL_SUCCESS != err) + { + print_error(err, "clGetDeviceIDs failed in returning no. of devices\n"); + goto CLEANUP; + } + devices = (cl_device_id *)malloc(num_devices * sizeof(cl_device_id)); + if (NULL == devices) + { + err = CL_OUT_OF_HOST_MEMORY; + print_error(err, "Unable to allocate memory for devices\n"); + goto CLEANUP; + } + err = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, num_devices, devices, + NULL); + if (CL_SUCCESS != err) + { + print_error(err, "Failed to get deviceID.\n"); + goto CLEANUP; + } + contextProperties[1] = (cl_context_properties)platform; + log_info("Assigned contextproperties for platform\n"); + for (device_no = 0; device_no < num_devices; device_no++) + { + err = clGetDeviceInfo(devices[device_no], CL_DEVICE_EXTENSIONS, 0, NULL, + &extensionSize); + if (CL_SUCCESS != err) + { + print_error( + err, + "Error in clGetDeviceInfo for getting device_extension size\n"); + goto CLEANUP; + } + extensions = (char *)malloc(extensionSize); + if (NULL == extensions) + { + err = CL_OUT_OF_HOST_MEMORY; + print_error(err, "Unable to allocate memory for extensions\n"); + goto CLEANUP; + } + err = clGetDeviceInfo(devices[device_no], CL_DEVICE_EXTENSIONS, + extensionSize, extensions, NULL); + if (CL_SUCCESS != err) + { + print_error( + err, "Error in clGetDeviceInfo for getting device_extension\n"); + goto CLEANUP; + } + err = clGetDeviceInfo(devices[device_no], CL_DEVICE_UUID_KHR, + CL_UUID_SIZE_KHR, uuid, &extensionSize); + if (CL_SUCCESS != err) + { + print_error(err, "clGetDeviceInfo failed with error"); + goto CLEANUP; + } + err = + memcmp(uuid, vkDevice.getPhysicalDevice().getUUID(), VK_UUID_SIZE); + if (err == 0) + { + break; + } + } + if (device_no >= num_devices) + { + err = EXIT_FAILURE; + print_error(err, + "OpenCL error:" + "No Vulkan-OpenCL Interop capable GPU found.\n"); + goto CLEANUP; + } + deviceId = devices[device_no]; + context = clCreateContextFromType(contextProperties, CL_DEVICE_TYPE_GPU, + NULL, NULL, &err); + if (CL_SUCCESS != err) + { + print_error(err, "error creating context"); + goto CLEANUP; + } + log_info("Successfully created context !!!\n"); + + cmd_queue1 = clCreateCommandQueue(context, devices[device_no], 0, &err); + if (CL_SUCCESS != err) + { + err = CL_INVALID_COMMAND_QUEUE; + print_error(err, "Error: Failed to create command queue!\n"); + goto CLEANUP; + } + log_info("clCreateCommandQueue successfull \n"); + + cmd_queue2 = clCreateCommandQueue(context, devices[device_no], 0, &err); + if (CL_SUCCESS != err) + { + err = CL_INVALID_COMMAND_QUEUE; + print_error(err, "Error: Failed to create command queue!\n"); + goto CLEANUP; + } + log_info("clCreateCommandQueue2 successful \n"); + + for (int i = 0; i < num_kernels; i++) + { + switch (i) + { + case 0: + sprintf(source_1, kernel_source[i], "float4", "f", "float4", + "f", "f", "f"); + sprintf(source_2, kernel_source[i], "int4", "i", "int4", "i", + "i", "i"); + sprintf(source_3, kernel_source[i], "uint4", "ui", "uint4", + "ui", "ui", "ui"); + break; + case 1: + sprintf(source_1, kernel_source[i], "float4", "f", "float4", + "f", "float4", "f", "float4", "f", "f", "f", "f", "f"); + sprintf(source_2, kernel_source[i], "int4", "i", "int4", "i", + "int4", "i", "int4", "i", "i", "i", "i", "i"); + sprintf(source_3, kernel_source[i], "uint4", "ui", "uint4", + "ui", "uint4", "ui", "uint4", "ui", "ui", "ui", "ui", + "ui"); + break; + case 2: + sprintf(source_1, kernel_source[i], "float4", "f", "float4", + "f", "float4", "f", "float4", "f", "float4", "f", + "float4", "f", "float4", "f", "float4", "f", "f", "f", + "f", "f", "f", "f", "f", "f"); + sprintf(source_2, kernel_source[i], "int4", "i", "int4", "i", + "int4", "i", "int4", "i", "int4", "i", "int4", "i", + "int4", "i", "int4", "i", "i", "i", "i", "i", "i", "i", + "i", "i"); + sprintf(source_3, kernel_source[i], "uint4", "ui", "uint4", + "ui", "uint4", "ui", "uint4", "ui", "uint4", "ui", + "uint4", "ui", "uint4", "ui", "uint4", "ui", "ui", "ui", + "ui", "ui", "ui", "ui", "ui", "ui"); + break; + case 3: + // Addtional case for creating updateKernelCQ2 which takes two + // images + sprintf(source_1, kernel_source[1], "float4", "f", "float4", + "f", "float4", "f", "float4", "f", "f", "f", "f", "f"); + sprintf(source_2, kernel_source[1], "int4", "i", "int4", "i", + "int4", "i", "int4", "i", "i", "i", "i", "i"); + sprintf(source_3, kernel_source[1], "uint4", "ui", "uint4", + "ui", "uint4", "ui", "uint4", "ui", "ui", "ui", "ui", + "ui"); + break; + } + const char *sourceTexts[num_kernel_types] = { source_1, source_2, + source_3 }; + for (int k = 0; k < num_kernel_types; k++) + { + program_source_length = strlen(sourceTexts[k]); + program[k] = clCreateProgramWithSource( + context, 1, &sourceTexts[k], &program_source_length, &err); + err |= clBuildProgram(program[k], 0, NULL, NULL, NULL, NULL); + } + + if (err != CL_SUCCESS) + { + print_error(err, "Error: Failed to build program"); + goto CLEANUP; + } + // create the kernel + kernel_float[i] = clCreateKernel(program[0], "image2DKernel", &err); + if (err != CL_SUCCESS) + { + print_error(err, "clCreateKernel failed"); + goto CLEANUP; + } + kernel_signed[i] = clCreateKernel(program[1], "image2DKernel", &err); + if (err != CL_SUCCESS) + { + print_error(err, "clCreateKernel failed"); + goto CLEANUP; + } + kernel_unsigned[i] = clCreateKernel(program[2], "image2DKernel", &err); + if (err != CL_SUCCESS) + { + print_error(err, "clCreateKernel failed "); + goto CLEANUP; + } + } + if (numCQ == 2) + { + err = run_test_with_two_queue(context, cmd_queue1, cmd_queue2, + kernel_unsigned, kernel_signed, + kernel_float, vkDevice); + } + else + { + err = run_test_with_one_queue(context, cmd_queue1, kernel_unsigned, + kernel_signed, kernel_float, vkDevice); + } +CLEANUP: + for (int i = 0; i < num_kernels; i++) + { + if (kernel_float[i]) + { + clReleaseKernel(kernel_float[i]); + } + if (kernel_unsigned[i]) + { + clReleaseKernel(kernel_unsigned[i]); + } + if (kernel_signed[i]) + { + clReleaseKernel(kernel_signed[i]); + } + } + for (int i = 0; i < num_kernel_types; i++) + { + if (program[i]) + { + clReleaseProgram(program[i]); + } + } + if (cmd_queue1) clReleaseCommandQueue(cmd_queue1); + if (cmd_queue2) clReleaseCommandQueue(cmd_queue2); + if (context) clReleaseContext(context); + + if (extensions) free(extensions); + if (devices) free(devices); + + return err; +} diff --git a/test_conformance/vulkan/test_vulkan_platform_device_info.cpp b/test_conformance/vulkan/test_vulkan_platform_device_info.cpp new file mode 100644 index 00000000..12f373b5 --- /dev/null +++ b/test_conformance/vulkan/test_vulkan_platform_device_info.cpp @@ -0,0 +1,146 @@ +// +// Copyright (c) 2022 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#include +#include +#include "harness/testHarness.h" +#include +#include + +typedef struct +{ + cl_uint info; + const char *name; +} _info; + +_info platform_info_table[] = { +#define STRING(x) \ + { \ + x, #x \ + } + STRING(CL_PLATFORM_EXTERNAL_MEMORY_IMPORT_HANDLE_TYPES_KHR), + STRING(CL_PLATFORM_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR), + STRING(CL_PLATFORM_SEMAPHORE_IMPORT_HANDLE_TYPES_KHR) +#undef STRING +}; + +_info device_info_table[] = { +#define STRING(x) \ + { \ + x, #x \ + } + STRING(CL_DEVICE_SEMAPHORE_IMPORT_HANDLE_TYPES_KHR), + STRING(CL_DEVICE_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR), + STRING(CL_DEVICE_EXTERNAL_MEMORY_IMPORT_HANDLE_TYPES_KHR) +#undef STRING +}; + +int test_platform_info(cl_device_id deviceID, cl_context _context, + cl_command_queue _queue, int num_elements) +{ + cl_uint num_platforms; + cl_uint i, j; + cl_platform_id *platforms; + cl_int errNum; + cl_uint *handle_type; + size_t handle_type_size = 0; + cl_uint num_handles = 0; + + // get total # of platforms + errNum = clGetPlatformIDs(0, NULL, &num_platforms); + test_error(errNum, "clGetPlatformIDs (getting count) failed"); + + platforms = + (cl_platform_id *)malloc(num_platforms * sizeof(cl_platform_id)); + if (!platforms) + { + printf("error allocating memory\n"); + exit(1); + } + log_info("%d platforms available\n", num_platforms); + errNum = clGetPlatformIDs(num_platforms, platforms, NULL); + test_error(errNum, "clGetPlatformIDs (getting IDs) failed"); + + for (i = 0; i < num_platforms; i++) + { + log_info("Platform%d (id %lu) info:\n", i, (unsigned long)platforms[i]); + for (j = 0; + j < sizeof(platform_info_table) / sizeof(platform_info_table[0]); + j++) + { + errNum = + clGetPlatformInfo(platforms[i], platform_info_table[j].info, 0, + NULL, &handle_type_size); + test_error(errNum, "clGetPlatformInfo failed"); + num_handles = handle_type_size / sizeof(cl_uint); + handle_type = (cl_uint *)malloc(handle_type_size); + errNum = + clGetPlatformInfo(platforms[i], platform_info_table[j].info, + handle_type_size, handle_type, NULL); + test_error(errNum, "clGetPlatformInfo failed"); + + log_info("%s: \n", platform_info_table[j].name); + while (num_handles--) + { + log_info("%x \n", handle_type[num_handles]); + } + if (handle_type) + { + free(handle_type); + } + } + } + if (platforms) + { + free(platforms); + } + return TEST_PASS; +} + +int test_device_info(cl_device_id deviceID, cl_context _context, + cl_command_queue _queue, int num_elements) +{ + cl_uint j; + cl_uint *handle_type; + size_t handle_type_size = 0; + cl_uint num_handles = 0; + cl_int errNum = CL_SUCCESS; + for (j = 0; j < sizeof(device_info_table) / sizeof(device_info_table[0]); + j++) + { + errNum = clGetDeviceInfo(deviceID, device_info_table[j].info, 0, NULL, + &handle_type_size); + test_error(errNum, "clGetDeviceInfo failed"); + + num_handles = handle_type_size / sizeof(cl_uint); + handle_type = (cl_uint *)malloc(handle_type_size); + + errNum = clGetDeviceInfo(deviceID, device_info_table[j].info, + handle_type_size, handle_type, NULL); + test_error(errNum, "clGetDeviceInfo failed"); + + log_info("%s: \n", device_info_table[j].name); + while (num_handles--) + { + log_info("%x \n", handle_type[num_handles]); + } + if (handle_type) + { + free(handle_type); + } + } + return TEST_PASS; +} diff --git a/test_conformance/vulkan/vulkan_interop_common/opencl_vulkan_wrapper.cpp b/test_conformance/vulkan/vulkan_interop_common/opencl_vulkan_wrapper.cpp new file mode 100644 index 00000000..136818f6 --- /dev/null +++ b/test_conformance/vulkan/vulkan_interop_common/opencl_vulkan_wrapper.cpp @@ -0,0 +1,818 @@ +// +// Copyright (c) 2022 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +#include +#include "opencl_vulkan_wrapper.hpp" +#include "vulkan_wrapper.hpp" +#include "harness/errorHelpers.h" +#include "harness/deviceInfo.h" +#include +#include +#include + +#define ASSERT(x) assert((x)) + +pfnclCreateSemaphoreWithPropertiesKHR clCreateSemaphoreWithPropertiesKHRptr; +pfnclEnqueueWaitSemaphoresKHR clEnqueueWaitSemaphoresKHRptr; +pfnclEnqueueSignalSemaphoresKHR clEnqueueSignalSemaphoresKHRptr; +pfnclEnqueueAcquireExternalMemObjectsKHR + clEnqueueAcquireExternalMemObjectsKHRptr; +pfnclEnqueueReleaseExternalMemObjectsKHR + clEnqueueReleaseExternalMemObjectsKHRptr; +pfnclReleaseSemaphoreObjectKHR clReleaseSemaphoreObjectKHRptr; + +void init_cl_vk_ext(cl_platform_id opencl_platform) +{ + clEnqueueWaitSemaphoresKHRptr = + (pfnclEnqueueWaitSemaphoresKHR)clGetExtensionFunctionAddressForPlatform( + opencl_platform, "clEnqueueWaitSemaphoresKHR"); + if (NULL == clEnqueueWaitSemaphoresKHRptr) + { + throw std::runtime_error("Failed to get the function pointer of " + "clEnqueueWaitSemaphoresKHRptr!"); + } + clEnqueueSignalSemaphoresKHRptr = (pfnclEnqueueSignalSemaphoresKHR) + clGetExtensionFunctionAddressForPlatform( + opencl_platform, "clEnqueueSignalSemaphoresKHR"); + if (NULL == clEnqueueSignalSemaphoresKHRptr) + { + throw std::runtime_error("Failed to get the function pointer of " + "clEnqueueSignalSemaphoresKHRptr!"); + } + clReleaseSemaphoreObjectKHRptr = (pfnclReleaseSemaphoreObjectKHR) + clGetExtensionFunctionAddressForPlatform(opencl_platform, + "clReleaseSemaphoreObjectKHR"); + if (NULL == clReleaseSemaphoreObjectKHRptr) + { + throw std::runtime_error("Failed to get the function pointer of " + "clReleaseSemaphoreObjectKHRptr!"); + } + clCreateSemaphoreWithPropertiesKHRptr = + (pfnclCreateSemaphoreWithPropertiesKHR) + clGetExtensionFunctionAddressForPlatform( + opencl_platform, "clCreateSemaphoreWithPropertiesKHR"); + if (NULL == clCreateSemaphoreWithPropertiesKHRptr) + { + throw std::runtime_error("Failed to get the function pointer of " + "clCreateSemaphoreWithPropertiesKHRptr!"); + } +} + +cl_int getCLFormatFromVkFormat(VkFormat vkFormat, + cl_image_format *clImageFormat) +{ + cl_int result = CL_SUCCESS; + switch (vkFormat) + { + case VK_FORMAT_R8G8B8A8_UNORM: + clImageFormat->image_channel_order = CL_RGBA; + clImageFormat->image_channel_data_type = CL_UNORM_INT8; + break; + case VK_FORMAT_B8G8R8A8_UNORM: + clImageFormat->image_channel_order = CL_BGRA; + clImageFormat->image_channel_data_type = CL_UNORM_INT8; + break; + case VK_FORMAT_R16G16B16A16_UNORM: + clImageFormat->image_channel_order = CL_RGBA; + clImageFormat->image_channel_data_type = CL_UNORM_INT16; + break; + case VK_FORMAT_R8G8B8A8_SINT: + clImageFormat->image_channel_order = CL_RGBA; + clImageFormat->image_channel_data_type = CL_SIGNED_INT8; + break; + case VK_FORMAT_R16G16B16A16_SINT: + clImageFormat->image_channel_order = CL_RGBA; + clImageFormat->image_channel_data_type = CL_SIGNED_INT16; + break; + case VK_FORMAT_R32G32B32A32_SINT: + clImageFormat->image_channel_order = CL_RGBA; + clImageFormat->image_channel_data_type = CL_SIGNED_INT32; + break; + case VK_FORMAT_R8G8B8A8_UINT: + clImageFormat->image_channel_order = CL_RGBA; + clImageFormat->image_channel_data_type = CL_UNSIGNED_INT8; + break; + case VK_FORMAT_R16G16B16A16_UINT: + clImageFormat->image_channel_order = CL_RGBA; + clImageFormat->image_channel_data_type = CL_UNSIGNED_INT16; + break; + case VK_FORMAT_R32G32B32A32_UINT: + clImageFormat->image_channel_order = CL_RGBA; + clImageFormat->image_channel_data_type = CL_UNSIGNED_INT32; + break; + case VK_FORMAT_R16G16B16A16_SFLOAT: + clImageFormat->image_channel_order = CL_RGBA; + clImageFormat->image_channel_data_type = CL_HALF_FLOAT; + break; + case VK_FORMAT_R32G32B32A32_SFLOAT: + clImageFormat->image_channel_order = CL_RGBA; + clImageFormat->image_channel_data_type = CL_FLOAT; + break; + case VK_FORMAT_R8_SNORM: + clImageFormat->image_channel_order = CL_R; + clImageFormat->image_channel_data_type = CL_SNORM_INT8; + break; + case VK_FORMAT_R16_SNORM: + clImageFormat->image_channel_order = CL_R; + clImageFormat->image_channel_data_type = CL_SNORM_INT16; + break; + case VK_FORMAT_R8_UNORM: + clImageFormat->image_channel_order = CL_R; + clImageFormat->image_channel_data_type = CL_UNORM_INT8; + break; + case VK_FORMAT_R16_UNORM: + clImageFormat->image_channel_order = CL_R; + clImageFormat->image_channel_data_type = CL_UNORM_INT16; + break; + case VK_FORMAT_R8_SINT: + clImageFormat->image_channel_order = CL_R; + clImageFormat->image_channel_data_type = CL_SIGNED_INT8; + break; + case VK_FORMAT_R16_SINT: + clImageFormat->image_channel_order = CL_R; + clImageFormat->image_channel_data_type = CL_SIGNED_INT16; + break; + case VK_FORMAT_R32_SINT: + clImageFormat->image_channel_order = CL_R; + clImageFormat->image_channel_data_type = CL_SIGNED_INT32; + break; + case VK_FORMAT_R8_UINT: + clImageFormat->image_channel_order = CL_R; + clImageFormat->image_channel_data_type = CL_UNSIGNED_INT8; + break; + case VK_FORMAT_R16_UINT: + clImageFormat->image_channel_order = CL_R; + clImageFormat->image_channel_data_type = CL_UNSIGNED_INT16; + break; + case VK_FORMAT_R32_UINT: + clImageFormat->image_channel_order = CL_R; + clImageFormat->image_channel_data_type = CL_UNSIGNED_INT32; + break; + case VK_FORMAT_R16_SFLOAT: + clImageFormat->image_channel_order = CL_R; + clImageFormat->image_channel_data_type = CL_HALF_FLOAT; + break; + case VK_FORMAT_R32_SFLOAT: + clImageFormat->image_channel_order = CL_R; + clImageFormat->image_channel_data_type = CL_FLOAT; + break; + case VK_FORMAT_R8G8_SNORM: + clImageFormat->image_channel_order = CL_RG; + clImageFormat->image_channel_data_type = CL_SNORM_INT8; + break; + case VK_FORMAT_R16G16_SNORM: + clImageFormat->image_channel_order = CL_RG; + clImageFormat->image_channel_data_type = CL_SNORM_INT16; + break; + case VK_FORMAT_R8G8_UNORM: + clImageFormat->image_channel_order = CL_RG; + clImageFormat->image_channel_data_type = CL_UNORM_INT8; + break; + case VK_FORMAT_R16G16_UNORM: + clImageFormat->image_channel_order = CL_RG; + clImageFormat->image_channel_data_type = CL_UNORM_INT16; + break; + case VK_FORMAT_R8G8_SINT: + clImageFormat->image_channel_order = CL_RG; + clImageFormat->image_channel_data_type = CL_SIGNED_INT8; + break; + case VK_FORMAT_R16G16_SINT: + clImageFormat->image_channel_order = CL_RG; + clImageFormat->image_channel_data_type = CL_SIGNED_INT16; + break; + case VK_FORMAT_R32G32_SINT: + clImageFormat->image_channel_order = CL_RG; + clImageFormat->image_channel_data_type = CL_SIGNED_INT32; + break; + case VK_FORMAT_R8G8_UINT: + clImageFormat->image_channel_order = CL_RG; + clImageFormat->image_channel_data_type = CL_UNSIGNED_INT8; + break; + case VK_FORMAT_R16G16_UINT: + clImageFormat->image_channel_order = CL_RG; + clImageFormat->image_channel_data_type = CL_UNSIGNED_INT16; + break; + case VK_FORMAT_R32G32_UINT: + clImageFormat->image_channel_order = CL_RG; + clImageFormat->image_channel_data_type = CL_UNSIGNED_INT32; + break; + case VK_FORMAT_R16G16_SFLOAT: + clImageFormat->image_channel_order = CL_RG; + clImageFormat->image_channel_data_type = CL_HALF_FLOAT; + break; + case VK_FORMAT_R32G32_SFLOAT: + clImageFormat->image_channel_order = CL_RG; + clImageFormat->image_channel_data_type = CL_FLOAT; + break; + case VK_FORMAT_R5G6B5_UNORM_PACK16: + clImageFormat->image_channel_order = CL_RGBA; + clImageFormat->image_channel_data_type = CL_UNORM_SHORT_565; + break; + case VK_FORMAT_R5G5B5A1_UNORM_PACK16: + clImageFormat->image_channel_order = CL_RGBA; + clImageFormat->image_channel_data_type = CL_UNORM_SHORT_555; + break; + case VK_FORMAT_R8G8B8A8_SNORM: + clImageFormat->image_channel_order = CL_RGBA; + clImageFormat->image_channel_data_type = CL_SNORM_INT8; + break; + case VK_FORMAT_R16G16B16A16_SNORM: + clImageFormat->image_channel_order = CL_RGBA; + clImageFormat->image_channel_data_type = CL_SNORM_INT16; + break; + case VK_FORMAT_B8G8R8A8_SNORM: + clImageFormat->image_channel_order = CL_BGRA; + clImageFormat->image_channel_data_type = CL_SNORM_INT8; + break; + case VK_FORMAT_B5G6R5_UNORM_PACK16: + clImageFormat->image_channel_order = CL_BGRA; + clImageFormat->image_channel_data_type = CL_UNORM_SHORT_565; + break; + case VK_FORMAT_B5G5R5A1_UNORM_PACK16: + clImageFormat->image_channel_order = CL_BGRA; + clImageFormat->image_channel_data_type = CL_UNORM_SHORT_555; + break; + case VK_FORMAT_B8G8R8A8_SINT: + clImageFormat->image_channel_order = CL_BGRA; + clImageFormat->image_channel_data_type = CL_SIGNED_INT8; + break; + case VK_FORMAT_B8G8R8A8_UINT: + clImageFormat->image_channel_order = CL_BGRA; + clImageFormat->image_channel_data_type = CL_UNSIGNED_INT8; + break; + case VK_FORMAT_A8B8G8R8_SNORM_PACK32: result = CL_INVALID_VALUE; break; + case VK_FORMAT_A8B8G8R8_UNORM_PACK32: result = CL_INVALID_VALUE; break; + case VK_FORMAT_A8B8G8R8_SINT_PACK32: result = CL_INVALID_VALUE; break; + case VK_FORMAT_A8B8G8R8_UINT_PACK32: result = CL_INVALID_VALUE; break; + default: + log_error("Unsupported format\n"); + ASSERT(0); + break; + } + return result; +} + +cl_mem_object_type getImageTypeFromVk(VkImageType imageType) +{ + cl_mem_object_type cl_image_type = CL_INVALID_VALUE; + switch (imageType) + { + case VK_IMAGE_TYPE_1D: cl_image_type = CL_MEM_OBJECT_IMAGE1D; break; + case VK_IMAGE_TYPE_2D: cl_image_type = CL_MEM_OBJECT_IMAGE2D; break; + case VK_IMAGE_TYPE_3D: cl_image_type = CL_MEM_OBJECT_IMAGE3D; break; + default: break; + } + return cl_image_type; +} + +size_t GetElementNBytes(const cl_image_format *format) +{ + size_t result; + + switch (format->image_channel_order) + { + case CL_R: + case CL_A: + case CL_INTENSITY: + case CL_LUMINANCE: + case CL_DEPTH: result = 1; break; + case CL_RG: + case CL_RA: result = 2; break; + case CL_RGB: result = 3; break; + case CL_RGBA: + case CL_ARGB: + case CL_BGRA: + case CL_sRGBA: result = 4; break; + default: result = 0; break; + } + + switch (format->image_channel_data_type) + { + case CL_SNORM_INT8: + case CL_UNORM_INT8: + case CL_SIGNED_INT8: + case CL_UNSIGNED_INT8: + // result *= 1; + break; + + case CL_SNORM_INT16: + case CL_UNORM_INT16: + case CL_SIGNED_INT16: + case CL_UNSIGNED_INT16: + case CL_HALF_FLOAT: result *= 2; break; + + case CL_SIGNED_INT32: + case CL_UNSIGNED_INT32: + case CL_FLOAT: result *= 4; break; + + case CL_UNORM_SHORT_565: + case CL_UNORM_SHORT_555: + if (result == 3) + { + result = 2; + } + else + { + result = 0; + } + break; + + case CL_UNORM_INT_101010: + if (result == 3) + { + result = 4; + } + else + { + result = 0; + } + break; + + default: result = 0; break; + } + + return result; +} + +cl_int get2DImageDimensions(const VkImageCreateInfo *VulkanImageCreateInfo, + cl_image_format *img_fmt, size_t totalImageSize, + size_t &width, size_t &height) +{ + cl_int result = CL_SUCCESS; + if (totalImageSize == 0) + { + result = CL_INVALID_VALUE; + } + size_t element_size = GetElementNBytes(img_fmt); + size_t row_pitch = element_size * VulkanImageCreateInfo->extent.width; + row_pitch = row_pitch % 64 == 0 ? row_pitch : ((row_pitch / 64) + 1) * 64; + + width = row_pitch / element_size; + height = totalImageSize / row_pitch; + + return result; +} + +cl_int +getCLImageInfoFromVkImageInfo(const VkImageCreateInfo *VulkanImageCreateInfo, + size_t totalImageSize, cl_image_format *img_fmt, + cl_image_desc *img_desc) +{ + cl_int result = CL_SUCCESS; + + cl_image_format clImgFormat = { 0 }; + result = + getCLFormatFromVkFormat(VulkanImageCreateInfo->format, &clImgFormat); + if (CL_SUCCESS != result) + { + return result; + } + memcpy(img_fmt, &clImgFormat, sizeof(cl_image_format)); + + img_desc->image_type = getImageTypeFromVk(VulkanImageCreateInfo->imageType); + if (CL_INVALID_VALUE == img_desc->image_type) + { + return CL_INVALID_VALUE; + } + + result = + get2DImageDimensions(VulkanImageCreateInfo, img_fmt, totalImageSize, + img_desc->image_width, img_desc->image_height); + if (CL_SUCCESS != result) + { + throw std::runtime_error("get2DImageDimensions failed!!!"); + } + + img_desc->image_depth = 0; // VulkanImageCreateInfo->extent.depth; + img_desc->image_array_size = 0; + img_desc->image_row_pitch = 0; // Row pitch set to zero as host_ptr is NULL + img_desc->image_slice_pitch = + img_desc->image_row_pitch * img_desc->image_height; + img_desc->num_mip_levels = 1; + img_desc->num_samples = 0; + img_desc->buffer = NULL; + + return result; +} + +cl_int check_external_memory_handle_type( + cl_device_id deviceID, + cl_external_memory_handle_type_khr requiredHandleType) +{ + unsigned int i; + cl_external_memory_handle_type_khr *handle_type; + size_t handle_type_size = 0; + + cl_int errNum = CL_SUCCESS; + + errNum = clGetDeviceInfo(deviceID, + CL_DEVICE_EXTERNAL_MEMORY_IMPORT_HANDLE_TYPES_KHR, + 0, NULL, &handle_type_size); + handle_type = + (cl_external_memory_handle_type_khr *)malloc(handle_type_size); + + errNum = clGetDeviceInfo(deviceID, + CL_DEVICE_EXTERNAL_MEMORY_IMPORT_HANDLE_TYPES_KHR, + handle_type_size, handle_type, NULL); + + test_error( + errNum, + "Unable to query CL_DEVICE_EXTERNAL_MEMORY_IMPORT_HANDLE_TYPES_KHR \n"); + + for (i = 0; i < handle_type_size; i++) + { + if (requiredHandleType == handle_type[i]) + { + return CL_SUCCESS; + } + } + log_error("cl_khr_external_memory extension is missing support for %d\n", + requiredHandleType); + + return CL_INVALID_VALUE; +} + +cl_int check_external_semaphore_handle_type( + cl_device_id deviceID, + cl_external_semaphore_handle_type_khr requiredHandleType) +{ + unsigned int i; + cl_external_semaphore_handle_type_khr *handle_type; + size_t handle_type_size = 0; + cl_int errNum = CL_SUCCESS; + + errNum = + clGetDeviceInfo(deviceID, CL_DEVICE_SEMAPHORE_IMPORT_HANDLE_TYPES_KHR, + 0, NULL, &handle_type_size); + handle_type = + (cl_external_semaphore_handle_type_khr *)malloc(handle_type_size); + + errNum = + clGetDeviceInfo(deviceID, CL_DEVICE_SEMAPHORE_IMPORT_HANDLE_TYPES_KHR, + handle_type_size, handle_type, NULL); + + test_error( + errNum, + "Unable to query CL_DEVICE_SEMAPHORE_IMPORT_HANDLE_TYPES_KHR \n"); + + for (i = 0; i < handle_type_size; i++) + { + if (requiredHandleType == handle_type[i]) + { + return CL_SUCCESS; + } + } + log_error("cl_khr_external_semaphore extension is missing support for %d\n", + requiredHandleType); + + return CL_INVALID_VALUE; +} +clExternalMemory::clExternalMemory() {} + +clExternalMemory::clExternalMemory(const clExternalMemory &externalMemory) + : m_externalMemory(externalMemory.m_externalMemory) +{} + +clExternalMemory::clExternalMemory( + const VulkanDeviceMemory *deviceMemory, + VulkanExternalMemoryHandleType externalMemoryHandleType, uint64_t offset, + uint64_t size, cl_context context, cl_device_id deviceId) +{ + int err = 0; + m_externalMemory = NULL; + cl_device_id devList[] = { deviceId, NULL }; + std::vector extMemProperties; +#ifdef _WIN32 + if (!is_extension_available(devList[0], "cl_khr_external_memory_win32")) + { + throw std::runtime_error( + "Device does not support cl_khr_external_memory_win32 extension\n"); + } +#else + if (!is_extension_available(devList[0], "cl_khr_external_memory_opaque_fd")) + { + throw std::runtime_error( + "Device does not support cl_khr_external_memory_opaque_fd " + "extension \n"); + } +#endif + + switch (externalMemoryHandleType) + { + case VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD: +#ifdef _WIN32 + ASSERT(0); +#endif + log_info("Opaque file descriptors are not supported on Windows\n"); + fd = (int)deviceMemory->getHandle(externalMemoryHandleType); + err = check_external_memory_handle_type( + devList[0], CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_FD_KHR); + extMemProperties.push_back( + (cl_mem_properties)CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_FD_KHR); + extMemProperties.push_back((cl_mem_properties)fd); + break; + case VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_NT: +#ifndef _WIN32 + ASSERT(0); +#else + log_info(" Opaque NT handles are only supported on Windows\n"); + handle = deviceMemory->getHandle(externalMemoryHandleType); + err = check_external_memory_handle_type( + devList[0], CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_WIN32_KHR); + extMemProperties.push_back( + (cl_mem_properties)CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_WIN32_KHR); + extMemProperties.push_back((cl_mem_properties)handle); +#endif + break; + case VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT: +#ifndef _WIN32 + ASSERT(0); +#else + log_info("Opaque D3DKMT handles are only supported on Windows\n"); + handle = deviceMemory->getHandle(externalMemoryHandleType); + err = check_external_memory_handle_type( + devList[0], CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_WIN32_KMT_KHR); + extMemProperties.push_back( + (cl_mem_properties) + CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_WIN32_KMT_KHR); + extMemProperties.push_back((cl_mem_properties)handle); +#endif + break; + default: + ASSERT(0); + log_error("Unsupported external memory handle type\n"); + break; + } + if (CL_SUCCESS != err) + { + throw std::runtime_error("Unsupported external memory type\n "); + } + + extMemProperties.push_back((cl_mem_properties)CL_DEVICE_HANDLE_LIST_KHR); + extMemProperties.push_back((cl_mem_properties)devList[0]); + extMemProperties.push_back( + (cl_mem_properties)CL_DEVICE_HANDLE_LIST_END_KHR); + extMemProperties.push_back(0); + + m_externalMemory = clCreateBufferWithProperties( + context, extMemProperties.data(), 1, size, NULL, &err); + if (CL_SUCCESS != err) + { + log_error("clCreateBufferWithProperties failed with %d\n", err); + throw std::runtime_error("clCreateBufferWithProperties failed "); + } +} +clExternalMemoryImage::clExternalMemoryImage( + const VulkanDeviceMemory &deviceMemory, + VulkanExternalMemoryHandleType externalMemoryHandleType, cl_context context, + size_t totalImageMemSize, size_t imageWidth, size_t imageHeight, + size_t totalSize, const VulkanImage2D &image2D, cl_device_id deviceId) +{ + cl_int errcode_ret = 0; + std::vector extMemProperties1; + cl_device_id devList[] = { deviceId, NULL }; + +#ifdef _WIN32 + if (!is_extension_available(devList[0], "cl_khr_external_memory_win32")) + { + throw std::runtime_error("Device does not support " + "cl_khr_external_memory_win32 extension \n"); + } +#elif !defined(__APPLE__) + if (!is_extension_available(devList[0], "cl_khr_external_memory_opaque_fd")) + { + throw std::runtime_error( + "Device does not support cl_khr_external_memory_opaque_fd " + "extension\n"); + } +#endif + + switch (externalMemoryHandleType) + { +#ifdef _WIN32 + case VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_NT: + log_info("Opaque NT handles are only supported on Windows\n"); + handle = deviceMemory.getHandle(externalMemoryHandleType); + errcode_ret = check_external_memory_handle_type( + devList[0], CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_WIN32_KHR); + extMemProperties1.push_back( + (cl_mem_properties)CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_WIN32_KHR); + extMemProperties1.push_back((cl_mem_properties)handle); + break; + case VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT: + log_info("Opaque D3DKMT handles are only supported on Windows\n"); + handle = deviceMemory.getHandle(externalMemoryHandleType); + errcode_ret = check_external_memory_handle_type( + devList[0], CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_WIN32_KMT_KHR); + extMemProperties1.push_back( + (cl_mem_properties) + CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_WIN32_KMT_KHR); + extMemProperties1.push_back((cl_mem_properties)handle); + break; +#elif !defined(__APPLE__) + case VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD: + log_info(" Opaque file descriptors are not supported on Windows\n"); + fd = (int)deviceMemory.getHandle(externalMemoryHandleType); + errcode_ret = check_external_memory_handle_type( + devList[0], CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_FD_KHR); + extMemProperties1.push_back( + (cl_mem_properties)CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_FD_KHR); + extMemProperties1.push_back((cl_mem_properties)fd); + break; +#endif + default: + ASSERT(0); + log_error("Unsupported external memory handle type\n"); + break; + } + if (CL_SUCCESS != errcode_ret) + { + throw std::runtime_error("Unsupported external memory type\n "); + } + // Set cl_image_desc + size_t clImageFormatSize; + cl_image_desc image_desc; + memset(&image_desc, 0x0, sizeof(cl_image_desc)); + cl_image_format img_format = { 0 }; + const VkImageCreateInfo VulkanImageCreateInfo = + image2D.getVkImageCreateInfo(); + + errcode_ret = getCLImageInfoFromVkImageInfo( + &VulkanImageCreateInfo, image2D.getSize(), &img_format, &image_desc); + if (CL_SUCCESS != errcode_ret) + { + throw std::runtime_error("getCLImageInfoFromVkImageInfo failed!!!"); + } + + extMemProperties1.push_back((cl_mem_properties)CL_DEVICE_HANDLE_LIST_KHR); + extMemProperties1.push_back((cl_mem_properties)devList[0]); + extMemProperties1.push_back( + (cl_mem_properties)CL_DEVICE_HANDLE_LIST_END_KHR); + extMemProperties1.push_back(0); + m_externalMemory = clCreateImageWithProperties( + context, extMemProperties1.data(), CL_MEM_READ_WRITE, &img_format, + &image_desc, NULL, &errcode_ret); + if (CL_SUCCESS != errcode_ret) + { + throw std::runtime_error("clCreateImageWithProperties failed!!!"); + } +} + +cl_mem clExternalMemory::getExternalMemoryBuffer() { return m_externalMemory; } + +cl_mem clExternalMemoryImage::getExternalMemoryImage() +{ + return m_externalMemory; +} + +clExternalMemoryImage::~clExternalMemoryImage() +{ + clReleaseMemObject(m_externalMemory); +} + +clExternalMemory::~clExternalMemory() { clReleaseMemObject(m_externalMemory); } + +clExternalMemoryImage::clExternalMemoryImage() {} + + +////////////////////////////////////////// +// clExternalSemaphore implementation // +////////////////////////////////////////// + +clExternalSemaphore::clExternalSemaphore( + const clExternalSemaphore &externalSemaphore) + : m_externalSemaphore(externalSemaphore.m_externalSemaphore) +{} + +clExternalSemaphore::clExternalSemaphore( + const VulkanSemaphore &semaphore, cl_context context, + VulkanExternalSemaphoreHandleType externalSemaphoreHandleType, + cl_device_id deviceId) +{ + + cl_int err = 0; + cl_device_id devList[] = { deviceId, NULL }; + +#ifdef _WIN32 + if (!is_extension_available(devList[0], "cl_khr_external_semaphore_win32")) + { + throw std::runtime_error("Device does not support " + "cl_khr_external_semaphore_win32 extension\n"); + } +#elif !defined(__APPLE__) + if (!is_extension_available(devList[0], + "cl_khr_external_semaphore_opaque_fd")) + { + throw std::runtime_error( + "Device does not support cl_khr_external_semaphore_opaque_fd " + "extension \n"); + } +#endif + + std::vector sema_props{ + (cl_semaphore_properties_khr)CL_SEMAPHORE_TYPE_KHR, + (cl_semaphore_properties_khr)CL_SEMAPHORE_TYPE_BINARY_KHR, + }; + switch (externalSemaphoreHandleType) + { + case VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD: +#ifdef _WIN32 + ASSERT(0); +#else + log_info(" Opaque file descriptors are not supported on Windows\n"); + fd = (int)semaphore.getHandle(externalSemaphoreHandleType); + err = check_external_semaphore_handle_type( + devList[0], CL_SEMAPHORE_HANDLE_OPAQUE_FD_KHR); + sema_props.push_back( + (cl_semaphore_properties_khr)CL_SEMAPHORE_HANDLE_OPAQUE_FD_KHR); + sema_props.push_back((cl_semaphore_properties_khr)fd); +#endif + break; + case VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_NT: +#ifndef _WIN32 + ASSERT(0); +#else + log_info(" Opaque NT handles are only supported on Windows\n"); + handle = semaphore.getName().size() + ? NULL + : semaphore.getHandle(externalSemaphoreHandleType); + err = check_external_semaphore_handle_type( + devList[0], CL_SEMAPHORE_HANDLE_OPAQUE_WIN32_KHR); + sema_props.push_back((cl_semaphore_properties_khr) + CL_SEMAPHORE_HANDLE_OPAQUE_WIN32_KHR); + sema_props.push_back((cl_semaphore_properties_khr)handle); +#endif + break; + case VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT: +#ifndef _WIN32 + ASSERT(0); +#else + log_info(" Opaque D3DKMT handles are only supported on Windows\n"); + handle = semaphore.getHandle(externalSemaphoreHandleType); + err = check_external_semaphore_handle_type( + devList[0], CL_SEMAPHORE_HANDLE_OPAQUE_WIN32_KMT_KHR); + sema_props.push_back((cl_semaphore_properties_khr) + CL_SEMAPHORE_HANDLE_OPAQUE_WIN32_KMT_KHR); + sema_props.push_back((cl_semaphore_properties_khr)handle); +#endif + break; + default: + ASSERT(0); + log_error("Unsupported external memory handle type\n"); + break; + } + if (CL_SUCCESS != err) + { + throw std::runtime_error( + "Unsupported external sempahore handle type\n "); + } + + sema_props.push_back( + (cl_semaphore_properties_khr)CL_DEVICE_HANDLE_LIST_KHR); + sema_props.push_back((cl_semaphore_properties_khr)devList[0]); + sema_props.push_back( + (cl_semaphore_properties_khr)CL_DEVICE_HANDLE_LIST_END_KHR); + sema_props.push_back(0); + m_externalSemaphore = + clCreateSemaphoreWithPropertiesKHRptr(context, sema_props.data(), &err); + if (CL_SUCCESS != err) + { + log_error("clCreateSemaphoreWithPropertiesKHRptr failed with %d\n", + err); + throw std::runtime_error( + "clCreateSemaphoreWithPropertiesKHRptr failed! "); + } +} + +clExternalSemaphore::~clExternalSemaphore() +{ + cl_int err = clReleaseSemaphoreObjectKHRptr(m_externalSemaphore); + if (err != CL_SUCCESS) + { + throw std::runtime_error("clReleaseSemaphoreObjectKHR failed!"); + } +} + +void clExternalSemaphore::signal(cl_command_queue cmd_queue) +{ + clEnqueueSignalSemaphoresKHRptr(cmd_queue, 1, &m_externalSemaphore, NULL, 0, + NULL, NULL); +} + +void clExternalSemaphore::wait(cl_command_queue cmd_queue) +{ + clEnqueueWaitSemaphoresKHRptr(cmd_queue, 1, &m_externalSemaphore, NULL, 0, + NULL, NULL); +} diff --git a/test_conformance/vulkan/vulkan_interop_common/opencl_vulkan_wrapper.hpp b/test_conformance/vulkan/vulkan_interop_common/opencl_vulkan_wrapper.hpp new file mode 100644 index 00000000..c1d2a766 --- /dev/null +++ b/test_conformance/vulkan/vulkan_interop_common/opencl_vulkan_wrapper.hpp @@ -0,0 +1,129 @@ +// +// Copyright (c) 2022 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +#ifndef _opencl_vulkan_wrapper_hpp_ +#define _opencl_vulkan_wrapper_hpp_ + +#include "vulkan_wrapper.hpp" + +#if !defined(__APPLE__) +#include +#include +#else +#include +#include +#endif + +typedef cl_semaphore_khr (*pfnclCreateSemaphoreWithPropertiesKHR)( + cl_context context, cl_semaphore_properties_khr *sema_props, + cl_int *errcode_ret); +typedef cl_int (*pfnclEnqueueWaitSemaphoresKHR)( + cl_command_queue command_queue, cl_uint num_semaphores, + const cl_semaphore_khr *sema_list, + const cl_semaphore_payload_khr *sema_payload_list, + cl_uint num_events_in_wait_list, const cl_event *event_wait_list, + cl_event *event); +typedef cl_int (*pfnclEnqueueSignalSemaphoresKHR)( + cl_command_queue command_queue, cl_uint num_semaphores, + const cl_semaphore_khr *sema_list, + const cl_semaphore_payload_khr *sema_payload_list, + cl_uint num_events_in_wait_list, const cl_event *event_wait_list, + cl_event *event); +typedef cl_int (*pfnclEnqueueAcquireExternalMemObjectsKHR)( + cl_command_queue command_queue, cl_uint num_mem_objects, + const cl_mem *mem_objects, cl_uint num_events_in_wait_list, + const cl_event *event_wait_list, cl_event *event); +typedef cl_int (*pfnclEnqueueReleaseExternalMemObjectsKHR)( + cl_command_queue command_queue, cl_uint num_mem_objects, + const cl_mem *mem_objects, cl_uint num_events_in_wait_list, + const cl_event *event_wait_list, cl_event *event); +typedef cl_int (*pfnclReleaseSemaphoreObjectKHR)(cl_semaphore_khr sema_object); + +extern pfnclCreateSemaphoreWithPropertiesKHR + clCreateSemaphoreWithPropertiesKHRptr; +extern pfnclEnqueueWaitSemaphoresKHR clEnqueueWaitSemaphoresKHRptr; +extern pfnclEnqueueSignalSemaphoresKHR clEnqueueSignalSemaphoresKHRptr; +extern pfnclEnqueueAcquireExternalMemObjectsKHR + clEnqueueAcquireExternalMemObjectsKHRptr; +extern pfnclEnqueueReleaseExternalMemObjectsKHR + clEnqueueReleaseExternalMemObjectsKHRptr; +extern pfnclReleaseSemaphoreObjectKHR clReleaseSemaphoreObjectKHRptr; + +cl_int getCLImageInfoFromVkImageInfo(const VkImageCreateInfo *, size_t, + cl_image_format *, cl_image_desc *); +cl_int check_external_memory_handle_type( + cl_device_id deviceID, + cl_external_memory_handle_type_khr requiredHandleType); +cl_int check_external_semaphore_handle_type( + cl_device_id deviceID, + cl_external_semaphore_handle_type_khr requiredHandleType); + +class clExternalMemory { +protected: + cl_mem m_externalMemory; + int fd; + void *handle; + clExternalMemory(const clExternalMemory &externalMemory); + +public: + clExternalMemory(); + clExternalMemory(const VulkanDeviceMemory *deviceMemory, + VulkanExternalMemoryHandleType externalMemoryHandleType, + uint64_t offset, uint64_t size, cl_context context, + cl_device_id deviceId); + + virtual ~clExternalMemory(); + cl_mem getExternalMemoryBuffer(); +}; +class clExternalMemoryImage { +protected: + cl_mem m_externalMemory; + int fd; + void *handle; + cl_command_queue cmd_queue; + clExternalMemoryImage(); + +public: + clExternalMemoryImage( + const VulkanDeviceMemory &deviceMemory, + VulkanExternalMemoryHandleType externalMemoryHandleType, + cl_context context, size_t totalImageMemSize, size_t imageWidth, + size_t imageHeight, size_t totalSize, const VulkanImage2D &image2D, + cl_device_id deviceId); + virtual ~clExternalMemoryImage(); + cl_mem getExternalMemoryImage(); +}; + +class clExternalSemaphore { +protected: + cl_semaphore_khr m_externalSemaphore; + int fd; + void *handle; + clExternalSemaphore(const clExternalSemaphore &externalSemaphore); + +public: + clExternalSemaphore( + const VulkanSemaphore &deviceSemaphore, cl_context context, + VulkanExternalSemaphoreHandleType externalSemaphoreHandleType, + cl_device_id deviceId); + virtual ~clExternalSemaphore(); + void signal(cl_command_queue command_queue); + void wait(cl_command_queue command_queue); + // operator openclExternalSemaphore_t() const; +}; + +extern void init_cl_vk_ext(cl_platform_id); + +#endif // _opencl_vulkan_wrapper_hpp_ diff --git a/test_conformance/vulkan/vulkan_interop_common/vulkan_api_list.hpp b/test_conformance/vulkan/vulkan_interop_common/vulkan_api_list.hpp new file mode 100644 index 00000000..017aefd2 --- /dev/null +++ b/test_conformance/vulkan/vulkan_interop_common/vulkan_api_list.hpp @@ -0,0 +1,195 @@ +// +// Copyright (c) 2022 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#ifndef _vulkan_api_list_hpp_ +#define _vulkan_api_list_hpp_ + +#define VK_FUNC_LIST \ + VK_FUNC_DECL(vkEnumerateInstanceVersion) \ + VK_FUNC_DECL(vkEnumerateInstanceExtensionProperties) \ + VK_FUNC_DECL(vkEnumerateInstanceLayerProperties) \ + VK_FUNC_DECL(vkCreateInstance) \ + VK_FUNC_DECL(vkGetInstanceProcAddr) \ + VK_FUNC_DECL(vkGetDeviceProcAddr) \ + VK_FUNC_DECL(vkEnumeratePhysicalDevices) \ + VK_FUNC_DECL(vkGetPhysicalDeviceProperties) \ + VK_FUNC_DECL(vkCreateDevice) \ + VK_FUNC_DECL(vkDestroyDevice) \ + VK_FUNC_DECL(vkGetDeviceQueue) \ + VK_FUNC_DECL(vkQueueWaitIdle) \ + VK_FUNC_DECL(vkCreateDescriptorSetLayout) \ + VK_FUNC_DECL(vkCreatePipelineLayout) \ + VK_FUNC_DECL(vkCreateShaderModule) \ + VK_FUNC_DECL(vkCreateComputePipelines) \ + VK_FUNC_DECL(vkCreateDescriptorPool) \ + VK_FUNC_DECL(vkAllocateDescriptorSets) \ + VK_FUNC_DECL(vkFreeDescriptorSets) \ + VK_FUNC_DECL(vkAllocateCommandBuffers) \ + VK_FUNC_DECL(vkBeginCommandBuffer) \ + VK_FUNC_DECL(vkCmdBindPipeline) \ + VK_FUNC_DECL(vkCmdBindDescriptorSets) \ + VK_FUNC_DECL(vkCmdPipelineBarrier) \ + VK_FUNC_DECL(vkCmdDispatch) \ + VK_FUNC_DECL(vkCmdFillBuffer) \ + VK_FUNC_DECL(vkCmdCopyBuffer) \ + VK_FUNC_DECL(vkCmdUpdateBuffer) \ + VK_FUNC_DECL(vkCmdCopyBufferToImage) \ + VK_FUNC_DECL(vkCmdCopyImageToBuffer) \ + VK_FUNC_DECL(vkEndCommandBuffer) \ + VK_FUNC_DECL(vkCreateBuffer) \ + VK_FUNC_DECL(vkCreateImageView) \ + VK_FUNC_DECL(vkAllocateMemory) \ + VK_FUNC_DECL(vkMapMemory) \ + VK_FUNC_DECL(vkBindBufferMemory) \ + VK_FUNC_DECL(vkBindImageMemory) \ + VK_FUNC_DECL(vkUnmapMemory) \ + VK_FUNC_DECL(vkFreeMemory) \ + VK_FUNC_DECL(vkCreateCommandPool) \ + VK_FUNC_DECL(vkResetCommandPool) \ + VK_FUNC_DECL(vkDestroyCommandPool) \ + VK_FUNC_DECL(vkResetCommandBuffer) \ + VK_FUNC_DECL(vkFreeCommandBuffers) \ + VK_FUNC_DECL(vkQueueSubmit) \ + VK_FUNC_DECL(vkCmdExecuteCommands) \ + VK_FUNC_DECL(vkCreateFence) \ + VK_FUNC_DECL(vkDestroyFence) \ + VK_FUNC_DECL(vkGetFenceStatus) \ + VK_FUNC_DECL(vkResetFences) \ + VK_FUNC_DECL(vkWaitForFences) \ + VK_FUNC_DECL(vkCreateSemaphore) \ + VK_FUNC_DECL(vkDestroySemaphore) \ + VK_FUNC_DECL(vkCreateEvent) \ + VK_FUNC_DECL(vkDestroyImageView) \ + VK_FUNC_DECL(vkCreateImage) \ + VK_FUNC_DECL(vkGetImageMemoryRequirements) \ + VK_FUNC_DECL(vkDestroyImage) \ + VK_FUNC_DECL(vkDestroyBuffer) \ + VK_FUNC_DECL(vkDestroyPipeline) \ + VK_FUNC_DECL(vkDestroyShaderModule) \ + VK_FUNC_DECL(vkGetPhysicalDeviceMemoryProperties) \ + VK_FUNC_DECL(vkDestroyInstance) \ + VK_FUNC_DECL(vkUpdateDescriptorSets) \ + VK_FUNC_DECL(vkDestroyDescriptorPool) \ + VK_FUNC_DECL(vkDestroyPipelineLayout) \ + VK_FUNC_DECL(vkDestroyDescriptorSetLayout) \ + VK_FUNC_DECL(vkGetPhysicalDeviceQueueFamilyProperties) \ + VK_FUNC_DECL(vkGetPhysicalDeviceFeatures) \ + VK_FUNC_DECL(vkGetPhysicalDeviceProperties2KHR) \ + VK_FUNC_DECL(vkGetBufferMemoryRequirements) \ + VK_FUNC_DECL(vkGetMemoryFdKHR) \ + VK_FUNC_DECL(vkGetSemaphoreFdKHR) \ + VK_FUNC_DECL(vkEnumeratePhysicalDeviceGroups) \ + VK_FUNC_DECL(vkGetPhysicalDeviceSurfaceCapabilitiesKHR) \ + VK_FUNC_DECL(vkGetPhysicalDeviceSurfaceFormatsKHR) \ + VK_FUNC_DECL(vkGetPhysicalDeviceSurfacePresentModesKHR) \ + VK_FUNC_DECL(vkEnumerateDeviceExtensionProperties) \ + VK_FUNC_DECL(vkGetPhysicalDeviceSurfaceSupportKHR) + +#define VK_WINDOWS_FUNC_LIST \ + VK_FUNC_DECL(vkGetMemoryWin32HandleKHR) \ + VK_FUNC_DECL(vkGetSemaphoreWin32HandleKHR) + +#define vkEnumerateInstanceVersion _vkEnumerateInstanceVersion +#define vkEnumerateInstanceExtensionProperties \ + _vkEnumerateInstanceExtensionProperties +#define vkEnumerateInstanceLayerProperties _vkEnumerateInstanceLayerProperties +#define vkCreateInstance _vkCreateInstance +#define vkGetInstanceProcAddr _vkGetInstanceProcAddr +#define vkGetDeviceProcAddr _vkGetDeviceProcAddr +#define vkEnumeratePhysicalDevices _vkEnumeratePhysicalDevices +#define vkGetPhysicalDeviceProperties _vkGetPhysicalDeviceProperties +#define vkCreateDevice _vkCreateDevice +#define vkDestroyDevice _vkDestroyDevice +#define vkGetDeviceQueue _vkGetDeviceQueue +#define vkQueueWaitIdle _vkQueueWaitIdle +#define vkCreateDescriptorSetLayout _vkCreateDescriptorSetLayout +#define vkCreatePipelineLayout _vkCreatePipelineLayout +#define vkCreateShaderModule _vkCreateShaderModule +#define vkCreateComputePipelines _vkCreateComputePipelines +#define vkCreateDescriptorPool _vkCreateDescriptorPool +#define vkAllocateDescriptorSets _vkAllocateDescriptorSets +#define vkFreeDescriptorSets _vkFreeDescriptorSets +#define vkAllocateCommandBuffers _vkAllocateCommandBuffers +#define vkBeginCommandBuffer _vkBeginCommandBuffer +#define vkCmdBindPipeline _vkCmdBindPipeline +#define vkCmdBindDescriptorSets _vkCmdBindDescriptorSets +#define vkCmdPipelineBarrier _vkCmdPipelineBarrier +#define vkCmdDispatch _vkCmdDispatch +#define vkCmdFillBuffer _vkCmdFillBuffer +#define vkCmdCopyBuffer _vkCmdCopyBuffer +#define vkCmdUpdateBuffer _vkCmdUpdateBuffer +#define vkCmdCopyBufferToImage _vkCmdCopyBufferToImage +#define vkCmdCopyImageToBuffer _vkCmdCopyImageToBuffer +#define vkEndCommandBuffer _vkEndCommandBuffer +#define vkCreateBuffer _vkCreateBuffer +#define vkCreateImageView _vkCreateImageView +#define vkAllocateMemory _vkAllocateMemory +#define vkMapMemory _vkMapMemory +#define vkBindBufferMemory _vkBindBufferMemory +#define vkBindImageMemory _vkBindImageMemory +#define vkUnmapMemory _vkUnmapMemory +#define vkFreeMemory _vkFreeMemory +#define vkCreateCommandPool _vkCreateCommandPool +#define vkResetCommandPool _vkResetCommandPool +#define vkDestroyCommandPool _vkDestroyCommandPool +#define vkResetCommandBuffer _vkResetCommandBuffer +#define vkFreeCommandBuffers _vkFreeCommandBuffers +#define vkQueueSubmit _vkQueueSubmit +#define vkCmdExecuteCommands _vkCmdExecuteCommands +#define vkCreateFence _vkCreateFence +#define vkDestroyFence _vkDestroyFence +#define vkGetFenceStatus _vkGetFenceStatus +#define vkResetFences _vkResetFences +#define vkWaitForFences _vkWaitForFences +#define vkCreateSemaphore _vkCreateSemaphore +#define vkDestroySemaphore _vkDestroySemaphore +#define vkCreateEvent _vkCreateEvent +#define vkDestroyImageView _vkDestroyImageView +#define vkCreateImage _vkCreateImage +#define vkGetImageMemoryRequirements _vkGetImageMemoryRequirements +#define vkDestroyImage _vkDestroyImage +#define vkDestroyBuffe _vkDestroyBuffer +#define vkDestroyPipeline _vkDestroyPipeline +#define vkDestroyShaderModule _vkDestroyShaderModule +#define vkGetPhysicalDeviceMemoryProperties _vkGetPhysicalDeviceMemoryProperties +#define vkDestroyInstance _vkDestroyInstance +#define vkUpdateDescriptorSets _vkUpdateDescriptorSets +#define vkDestroyDescriptorPool _vkDestroyDescriptorPool +#define vkDestroyPipelineLayout _vkDestroyPipelineLayout +#define vkDestroyDescriptorSetLayout _vkDestroyDescriptorSetLayout +#define vkGetPhysicalDeviceQueueFamilyProperties \ + _vkGetPhysicalDeviceQueueFamilyProperties +#define vkGetPhysicalDeviceFeatures _vkGetPhysicalDeviceFeatures +#define vkGetPhysicalDeviceProperties2KHR _vkGetPhysicalDeviceProperties2KHR +#define vkGetBufferMemoryRequirements _vkGetBufferMemoryRequirements +#define vkGetMemoryFdKHR _vkGetMemoryFdKHR +#define vkGetSemaphoreFdKHR _vkGetSemaphoreFdKHR +#define vkEnumeratePhysicalDeviceGroups _vkEnumeratePhysicalDeviceGroups +#define vkGetPhysicalDeviceSurfaceCapabilitiesKHR \ + _vkGetPhysicalDeviceSurfaceCapabilitiesKHR +#define vkGetPhysicalDeviceSurfaceFormatsKHR \ + _vkGetPhysicalDeviceSurfaceFormatsKHR +#define vkGetPhysicalDeviceSurfacePresentModesKHR \ + _vkGetPhysicalDeviceSurfacePresentModesKHR +#define vkEnumerateDeviceExtensionProperties \ + _vkEnumerateDeviceExtensionProperties +#define vkGetPhysicalDeviceSurfaceSupportKHR \ + _vkGetPhysicalDeviceSurfaceSupportKHR + +#define vkGetMemoryWin32HandleKHR _vkGetMemoryWin32HandleKHR +#define vkGetSemaphoreWin32HandleKHR _vkGetSemaphoreWin32HandleKHR + +#endif //_vulkan_api_list_hpp_ diff --git a/test_conformance/vulkan/vulkan_interop_common/vulkan_interop_common.cpp b/test_conformance/vulkan/vulkan_interop_common/vulkan_interop_common.cpp new file mode 100644 index 00000000..db9d168f --- /dev/null +++ b/test_conformance/vulkan/vulkan_interop_common/vulkan_interop_common.cpp @@ -0,0 +1,22 @@ +// +// Copyright (c) 2022 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#include "vulkan_interop_common.hpp" + +uint32_t innerIterations(5); +uint32_t perfIterations(100); +uint32_t stressIterations(1000); +size_t cpuThreadsPerGpu(3); diff --git a/test_conformance/vulkan/vulkan_interop_common/vulkan_interop_common.hpp b/test_conformance/vulkan/vulkan_interop_common/vulkan_interop_common.hpp new file mode 100644 index 00000000..18d84f09 --- /dev/null +++ b/test_conformance/vulkan/vulkan_interop_common/vulkan_interop_common.hpp @@ -0,0 +1,50 @@ +// +// Copyright (c) 2022 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#ifndef _vulkan_interop_common_hpp_ +#define _vulkan_interop_common_hpp_ + +#include "vulkan_wrapper_types.hpp" +#include "vulkan_wrapper.hpp" +#include "vulkan_list_map.hpp" +#include "vulkan_utility.hpp" +#include "opencl_vulkan_wrapper.hpp" + +// Number of iterations for loops within tests (default value 5) +extern unsigned int innerIterations; +// Number of iterations for loops within perf tests (default value 100) +extern unsigned int perfIterations; +// Number of iterations for loops within stress tests (default value 1000) +extern unsigned int stressIterations; +// Number of CPU threads per GPU (default value 3) +extern size_t cpuThreadsPerGpu; +// Number of command queues (default value 1) +extern unsigned int numCQ; +// Enable Multi-import of vulkan device memory +extern bool multiImport; +// Enable Multi-import of vulkan device memory under different context +extern bool multiCtx; +// Enable additional debug info logging +extern bool debug_trace; + +extern bool useSingleImageKernel; +extern bool useDeviceLocal; +extern bool disableNTHandleType; +// Enable offset for multiImport of vulkan device memory +extern bool enableOffset; +extern bool non_dedicated; + +#endif // _vulkan_interop_common_hpp_ diff --git a/test_conformance/vulkan/vulkan_interop_common/vulkan_list_map.cpp b/test_conformance/vulkan/vulkan_interop_common/vulkan_list_map.cpp new file mode 100644 index 00000000..bdae5d22 --- /dev/null +++ b/test_conformance/vulkan/vulkan_interop_common/vulkan_list_map.cpp @@ -0,0 +1,424 @@ +// +// Copyright (c) 2022 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#ifdef _WIN32 +#define NOMINMAX +#endif +#include "vulkan_list_map.hpp" +#include "vulkan_utility.hpp" +#include "vulkan_wrapper.hpp" + +///////////////////////////////////////////// +// VulkanPhysicalDeviceList implementation // +///////////////////////////////////////////// + +VulkanPhysicalDeviceList::VulkanPhysicalDeviceList( + const VulkanPhysicalDeviceList &physicalDeviceList) +{} + +VulkanPhysicalDeviceList::VulkanPhysicalDeviceList() {} + +VulkanPhysicalDeviceList::~VulkanPhysicalDeviceList() {} + +///////////////////////////////////////// +// VulkanMemoryHeapList implementation // +///////////////////////////////////////// + +VulkanMemoryHeapList::VulkanMemoryHeapList( + const VulkanMemoryHeapList &memoryHeapList) +{} + +VulkanMemoryHeapList::VulkanMemoryHeapList() {} + +VulkanMemoryHeapList::~VulkanMemoryHeapList() {} + +///////////////////////////////////////// +// VulkanMemoryTypeList implementation // +///////////////////////////////////////// + +VulkanMemoryTypeList::VulkanMemoryTypeList( + const VulkanMemoryTypeList &memoryTypeList) +{} + +VulkanMemoryTypeList::VulkanMemoryTypeList() {} + +VulkanMemoryTypeList::~VulkanMemoryTypeList() {} + +////////////////////////////////////////// +// VulkanQueueFamilyList implementation // +////////////////////////////////////////// + +VulkanQueueFamilyList::VulkanQueueFamilyList( + const VulkanQueueFamilyList &queueFamilyList) +{} + +VulkanQueueFamilyList::VulkanQueueFamilyList() {} + +VulkanQueueFamilyList::~VulkanQueueFamilyList() {} + +///////////////////////////////////////////////////// +// VulkanQueueFamilyToQueueCountMap implementation // +///////////////////////////////////////////////////// + +VulkanQueueFamilyToQueueCountMap::VulkanQueueFamilyToQueueCountMap( + const VulkanQueueFamilyToQueueCountMap &queueFamilyToQueueCountMap) +{} + +VulkanQueueFamilyToQueueCountMap::VulkanQueueFamilyToQueueCountMap( + uint32_t numQueuesPerFamily) +{ + uint32_t maxQueueFamilyCount = 0; + const VulkanPhysicalDeviceList &physicalDeviceList = + getVulkanInstance().getPhysicalDeviceList(); + for (size_t pdIdx = 0; pdIdx < physicalDeviceList.size(); pdIdx++) + { + maxQueueFamilyCount = std::max( + maxQueueFamilyCount, + (uint32_t)physicalDeviceList[pdIdx].getQueueFamilyList().size()); + } + + for (uint32_t qfIdx = 0; qfIdx < maxQueueFamilyCount; qfIdx++) + { + insert(qfIdx, numQueuesPerFamily); + } +} + +VulkanQueueFamilyToQueueCountMap::~VulkanQueueFamilyToQueueCountMap() {} + +//////////////////////////////////////////////////// +// VulkanQueueFamilyToQueueListMap implementation // +//////////////////////////////////////////////////// + +VulkanQueueFamilyToQueueListMap::VulkanQueueFamilyToQueueListMap( + const VulkanQueueFamilyToQueueListMap &queueFamilyToQueueMap) +{} + +VulkanQueueFamilyToQueueListMap::VulkanQueueFamilyToQueueListMap() {} + +VulkanQueueFamilyToQueueListMap::~VulkanQueueFamilyToQueueListMap() {} + +void VulkanQueueFamilyToQueueListMap::insert(uint32_t key, + VulkanQueueList &queueList) +{ + m_map.insert(std::pair>( + key, std::reference_wrapper(queueList))); +} + +VulkanQueueList &VulkanQueueFamilyToQueueListMap::operator[](uint32_t key) +{ + return m_map.at(key).get(); +} + +//////////////////////////////////// +// VulkanQueueList implementation // +//////////////////////////////////// + +VulkanQueueList::VulkanQueueList(const VulkanQueueList &queueList) {} + +VulkanQueueList::VulkanQueueList() {} + +VulkanQueueList::~VulkanQueueList() {} + +///////////////////////////////////////////////////////// +// VulkanDescriptorSetLayoutBindingList implementation // +///////////////////////////////////////////////////////// + +VulkanDescriptorSetLayoutBindingList::VulkanDescriptorSetLayoutBindingList( + const VulkanDescriptorSetLayoutBindingList &descriptorSetLayoutBindingList) +{} + +VulkanDescriptorSetLayoutBindingList::VulkanDescriptorSetLayoutBindingList() {} + +VulkanDescriptorSetLayoutBindingList::VulkanDescriptorSetLayoutBindingList( + size_t numDescriptorSetLayoutBindings, VulkanDescriptorType descriptorType, + uint32_t descriptorCount, VulkanShaderStage shaderStage) +{ + for (size_t idx = 0; idx < numDescriptorSetLayoutBindings; idx++) + { + VulkanDescriptorSetLayoutBinding *descriptorSetLayoutBinding = + new VulkanDescriptorSetLayoutBinding((uint32_t)idx, descriptorType, + descriptorCount, shaderStage); + add(*descriptorSetLayoutBinding); + } +} + +VulkanDescriptorSetLayoutBindingList::VulkanDescriptorSetLayoutBindingList( + VulkanDescriptorType descriptorType0, uint32_t descriptorCount0, + VulkanDescriptorType descriptorType1, uint32_t descriptorCount1, + VulkanShaderStage shaderStage) +{ + for (uint32_t idx = 0; idx < descriptorCount0; idx++) + { + VulkanDescriptorSetLayoutBinding *descriptorSetLayoutBinding0 = + new VulkanDescriptorSetLayoutBinding(idx, descriptorType0, 1, + shaderStage); + add(*descriptorSetLayoutBinding0); + } + for (uint32_t idx = 0; idx < descriptorCount1; idx++) + { + VulkanDescriptorSetLayoutBinding *descriptorSetLayoutBinding1 = + new VulkanDescriptorSetLayoutBinding( + descriptorCount0 + idx, descriptorType1, 1, shaderStage); + add(*descriptorSetLayoutBinding1); + } +} + +VulkanDescriptorSetLayoutBindingList::~VulkanDescriptorSetLayoutBindingList() +{ + for (size_t idx = 0; idx < m_wrapperList.size(); idx++) + { + VulkanDescriptorSetLayoutBinding &descriptorSetLayoutBinding = + m_wrapperList[idx]; + delete &descriptorSetLayoutBinding; + } +} + +////////////////////////////////////////////////// +// VulkanDescriptorSetLayoutList implementation // +////////////////////////////////////////////////// + +VulkanDescriptorSetLayoutList::VulkanDescriptorSetLayoutList( + const VulkanDescriptorSetLayoutList &descriptorSetLayoutList) +{} + +VulkanDescriptorSetLayoutList::VulkanDescriptorSetLayoutList() {} + +VulkanDescriptorSetLayoutList::~VulkanDescriptorSetLayoutList() {} + +//////////////////////////////////////////// +// VulkanCommandBufferList implementation // +//////////////////////////////////////////// + +VulkanCommandBufferList::VulkanCommandBufferList( + const VulkanCommandBufferList &commandBufferList) +{} + +VulkanCommandBufferList::VulkanCommandBufferList() {} + +VulkanCommandBufferList::VulkanCommandBufferList( + size_t numCommandBuffers, const VulkanDevice &device, + const VulkanCommandPool &commandPool) +{ + for (size_t idx = 0; idx < numCommandBuffers; idx++) + { + VulkanCommandBuffer *commandBuffer = + new VulkanCommandBuffer(device, commandPool); + add(*commandBuffer); + } +} + +VulkanCommandBufferList::~VulkanCommandBufferList() +{ + for (size_t idx = 0; idx < m_wrapperList.size(); idx++) + { + VulkanCommandBuffer &commandBuffer = m_wrapperList[idx]; + delete &commandBuffer; + } +} + +///////////////////////////////////// +// VulkanBufferList implementation // +///////////////////////////////////// + +VulkanBufferList::VulkanBufferList(const VulkanBufferList &bufferList) {} + +VulkanBufferList::VulkanBufferList( + size_t numBuffers, const VulkanDevice &device, uint64_t size, + VulkanExternalMemoryHandleType externalMemoryHandleType, + VulkanBufferUsage bufferUsage, VulkanSharingMode sharingMode, + const VulkanQueueFamilyList &queueFamilyList) +{ + for (size_t bIdx = 0; bIdx < numBuffers; bIdx++) + { + VulkanBuffer *buffer = + new VulkanBuffer(device, size, externalMemoryHandleType, + bufferUsage, sharingMode, queueFamilyList); + add(*buffer); + } +} + +VulkanBufferList::~VulkanBufferList() +{ + for (size_t bIdx = 0; bIdx < m_wrapperList.size(); bIdx++) + { + VulkanBuffer &buffer = m_wrapperList[bIdx]; + delete &buffer; + } +} + +////////////////////////////////////// +// VulkanImage2DList implementation // +////////////////////////////////////// + +VulkanImage2DList::VulkanImage2DList(const VulkanImage2DList &image2DList) {} + +VulkanImage2DList::VulkanImage2DList( + size_t numImages, std::vector &deviceMemory, + uint64_t baseOffset, uint64_t interImageOffset, const VulkanDevice &device, + VulkanFormat format, uint32_t width, uint32_t height, uint32_t mipLevels, + VulkanExternalMemoryHandleType externalMemoryHandleType, + VulkanImageCreateFlag imageCreateFlag, VulkanImageUsage imageUsage, + VulkanSharingMode sharingMode) +{ + for (size_t i2DIdx = 0; i2DIdx < numImages; i2DIdx++) + { + VulkanImage2D *image2D = new VulkanImage2D( + device, format, width, height, mipLevels, externalMemoryHandleType, + imageCreateFlag, imageUsage, sharingMode); + add(*image2D); + deviceMemory[i2DIdx]->bindImage( + *image2D, baseOffset + (i2DIdx * interImageOffset)); + } +} + +VulkanImage2DList::VulkanImage2DList( + size_t numImages, const VulkanDevice &device, VulkanFormat format, + uint32_t width, uint32_t height, uint32_t mipLevels, + VulkanExternalMemoryHandleType externalMemoryHandleType, + VulkanImageCreateFlag imageCreateFlag, VulkanImageUsage imageUsage, + VulkanSharingMode sharingMode) +{ + for (size_t bIdx = 0; bIdx < numImages; bIdx++) + { + VulkanImage2D *image2D = new VulkanImage2D( + device, format, width, height, mipLevels, externalMemoryHandleType, + imageCreateFlag, imageUsage, sharingMode); + add(*image2D); + } +} + +VulkanImage2DList::~VulkanImage2DList() +{ + for (size_t i2DIdx = 0; i2DIdx < m_wrapperList.size(); i2DIdx++) + { + VulkanImage2D &image2D = m_wrapperList[i2DIdx]; + delete &image2D; + } +} + +//////////////////////////////////////// +// VulkanImageViewList implementation // +//////////////////////////////////////// + +VulkanImageViewList::VulkanImageViewList(const VulkanImageViewList &image2DList) +{} + +VulkanImageViewList::VulkanImageViewList(const VulkanDevice &device, + const VulkanImage2DList &image2DList, + bool createImageViewPerMipLevel) +{ + for (size_t i2DIdx = 0; i2DIdx < image2DList.size(); i2DIdx++) + { + if (createImageViewPerMipLevel) + { + for (uint32_t mipLevel = 0; + mipLevel < image2DList[i2DIdx].getNumMipLevels(); mipLevel++) + { + VulkanImageView *image2DView = + new VulkanImageView(device, image2DList[i2DIdx], + VULKAN_IMAGE_VIEW_TYPE_2D, mipLevel, 1); + add(*image2DView); + } + } + else + { + VulkanImageView *image2DView = new VulkanImageView( + device, image2DList[i2DIdx], VULKAN_IMAGE_VIEW_TYPE_2D); + add(*image2DView); + } + } +} + +VulkanImageViewList::~VulkanImageViewList() +{ + for (size_t ivIdx = 0; ivIdx < m_wrapperList.size(); ivIdx++) + { + VulkanImageView &imageView = m_wrapperList[ivIdx]; + delete &imageView; + } +} + +/////////////////////////////////////////// +// VulkanDeviceMemoryList implementation // +/////////////////////////////////////////// + +VulkanDeviceMemoryList::VulkanDeviceMemoryList( + const VulkanDeviceMemoryList &deviceMemoryList) +{} + +VulkanDeviceMemoryList::VulkanDeviceMemoryList( + size_t numImages, const VulkanImage2DList &image2DList, + const VulkanDevice &device, const VulkanMemoryType &memoryType, + VulkanExternalMemoryHandleType externalMemoryHandleType) +{ + for (size_t i2DIdx = 0; i2DIdx < image2DList.size(); i2DIdx++) + { + VulkanDeviceMemory *deviceMemory = new VulkanDeviceMemory( + device, image2DList[i2DIdx], memoryType, externalMemoryHandleType); + add(*deviceMemory); + deviceMemory->bindImage(image2DList[i2DIdx]); + } +} + +VulkanDeviceMemoryList::~VulkanDeviceMemoryList() +{ + for (size_t dmIdx = 0; dmIdx < m_wrapperList.size(); dmIdx++) + { + VulkanDeviceMemory &deviceMemory = m_wrapperList[dmIdx]; + delete &deviceMemory; + } +} + +//////////////////////////////////////// +// VulkanSemaphoreList implementation // +//////////////////////////////////////// + +VulkanSemaphoreList::VulkanSemaphoreList( + const VulkanSemaphoreList &semaphoreList) +{} + +VulkanSemaphoreList::VulkanSemaphoreList() {} + +VulkanSemaphoreList::VulkanSemaphoreList( + size_t numSemaphores, const VulkanDevice &device, + VulkanExternalSemaphoreHandleType externalSemaphoreHandleType, + const std::wstring namePrefix) +{ + std::wstring name = L""; + for (size_t idx = 0; idx < numSemaphores; idx++) + { + if (namePrefix.size()) + { + const size_t maxNameSize = 256; + wchar_t tempName[maxNameSize]; + swprintf(tempName, maxNameSize, L"%s%d", namePrefix.c_str(), + (int)idx); + name = tempName; + } + VulkanSemaphore *semaphore = + new VulkanSemaphore(device, externalSemaphoreHandleType, name); + add(*semaphore); + } +} + +VulkanSemaphoreList::~VulkanSemaphoreList() +{ + for (size_t idx = 0; idx < m_wrapperList.size(); idx++) + { + VulkanSemaphore &Semaphore = m_wrapperList[idx]; + delete &Semaphore; + } +} diff --git a/test_conformance/vulkan/vulkan_interop_common/vulkan_list_map.hpp b/test_conformance/vulkan/vulkan_interop_common/vulkan_list_map.hpp new file mode 100644 index 00000000..831403e1 --- /dev/null +++ b/test_conformance/vulkan/vulkan_interop_common/vulkan_list_map.hpp @@ -0,0 +1,389 @@ +// +// Copyright (c) 2022 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#ifndef _vulkan_list_map_hpp_ +#define _vulkan_list_map_hpp_ + +#include +#include "vulkan_wrapper_types.hpp" +#include "vulkan_utility.hpp" +#include +template class VulkanList { +protected: + std::vector> m_wrapperList; + std::vector> m_constWrapperList; + std::vector m_nativeList; + + VulkanList(const VulkanList &list); + VulkanList(); + virtual ~VulkanList(); + virtual void add(VulkanWrapper &wrapper); + +public: + virtual void add(const VulkanWrapper &wrapper); + virtual size_t size() const; + virtual const VulkanWrapper &operator[](size_t idx) const; + virtual VulkanWrapper &operator[](size_t idx); + virtual operator const VulkanNative *() const; +}; + +template class VulkanMap { +protected: + std::map m_map; + + VulkanMap(const VulkanMap &map); + VulkanMap(); + virtual ~VulkanMap(); + +public: + void insert(const VulkanKey &key, VulkanValue &value); + const VulkanValue &operator[](const VulkanKey &key) const; + VulkanValue &operator[](const VulkanKey &key); +}; + +class VulkanPhysicalDeviceList + : public VulkanList { + friend class VulkanInstance; + +protected: + VulkanPhysicalDeviceList( + const VulkanPhysicalDeviceList &physicalDeviceList); + +public: + VulkanPhysicalDeviceList(); + virtual ~VulkanPhysicalDeviceList(); +}; + +class VulkanQueueFamilyList : public VulkanList { + friend class VulkanPhysicalDevice; + +protected: + VulkanQueueFamilyList(const VulkanQueueFamilyList &queueFamilyList); + +public: + VulkanQueueFamilyList(); + virtual ~VulkanQueueFamilyList(); +}; + +class VulkanMemoryHeapList : public VulkanList { + friend class VulkanPhysicalDevice; + +protected: + VulkanMemoryHeapList(const VulkanMemoryHeapList &memoryHeapList); + +public: + VulkanMemoryHeapList(); + virtual ~VulkanMemoryHeapList(); +}; + +class VulkanMemoryTypeList : public VulkanList { + friend class VulkanPhysicalDevice; + friend class VulkanBuffer; + friend class VulkanImage; + +protected: + VulkanMemoryTypeList(const VulkanMemoryTypeList &memoryTypeList); + +public: + VulkanMemoryTypeList(); + virtual ~VulkanMemoryTypeList(); +}; + +class VulkanQueueFamilyToQueueCountMap : public VulkanMap { +protected: + VulkanQueueFamilyToQueueCountMap( + const VulkanQueueFamilyToQueueCountMap &queueFamilyToQueueCountMap); + +public: + VulkanQueueFamilyToQueueCountMap(uint32_t numQueuesPerFamily = 0); + virtual ~VulkanQueueFamilyToQueueCountMap(); +}; + +class VulkanQueueList : public VulkanList { + friend class VulkanDevice; + +protected: + VulkanQueueList(const VulkanQueueList &queueList); + +public: + VulkanQueueList(); + virtual ~VulkanQueueList(); +}; + +class VulkanQueueFamilyToQueueListMap + : public VulkanMap> { +protected: + VulkanQueueFamilyToQueueListMap( + const VulkanQueueFamilyToQueueListMap &queueFamilyToQueueMap); + +public: + VulkanQueueFamilyToQueueListMap(); + virtual ~VulkanQueueFamilyToQueueListMap(); + void insert(uint32_t key, VulkanQueueList &queueList); + VulkanQueueList &operator[](uint32_t key); +}; + +class VulkanDescriptorSetLayoutBindingList + : public VulkanList { +protected: + VulkanDescriptorSetLayoutBindingList( + const VulkanDescriptorSetLayoutBindingList + &descriptorSetLayoutBindingList); + +public: + VulkanDescriptorSetLayoutBindingList(); + VulkanDescriptorSetLayoutBindingList( + size_t numDescriptorSetLayoutBindings, + VulkanDescriptorType descriptorType, uint32_t descriptorCount = 1, + VulkanShaderStage shaderStage = VULKAN_SHADER_STAGE_COMPUTE); + VulkanDescriptorSetLayoutBindingList( + VulkanDescriptorType descriptorType0, uint32_t descriptorCount0, + VulkanDescriptorType descriptorType1, uint32_t descriptorCount1, + VulkanShaderStage shaderStage = VULKAN_SHADER_STAGE_COMPUTE); + virtual ~VulkanDescriptorSetLayoutBindingList(); +}; + +class VulkanDescriptorSetLayoutList + : public VulkanList { +protected: + VulkanDescriptorSetLayoutList( + const VulkanDescriptorSetLayoutList &descriptorSetLayoutList); + +public: + VulkanDescriptorSetLayoutList(); + virtual ~VulkanDescriptorSetLayoutList(); +}; + +class VulkanCommandBufferList + : public VulkanList { +protected: + VulkanCommandBufferList(const VulkanCommandBufferList &commandBufferList); + +public: + VulkanCommandBufferList(); + VulkanCommandBufferList(size_t numCommandBuffers, + const VulkanDevice &device, + const VulkanCommandPool &commandPool); + virtual ~VulkanCommandBufferList(); +}; + +class VulkanBufferList : public VulkanList { +protected: + VulkanBufferList(const VulkanBufferList &bufferList); + +public: + VulkanBufferList( + size_t numBuffers, const VulkanDevice &device, uint64_t size, + VulkanExternalMemoryHandleType externalMemoryHandleType = + VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_NONE, + VulkanBufferUsage bufferUsage = + VULKAN_BUFFER_USAGE_STORAGE_BUFFER_TRANSFER_SRC_DST, + VulkanSharingMode sharingMode = VULKAN_SHARING_MODE_EXCLUSIVE, + const VulkanQueueFamilyList &queueFamilyList = + getEmptyVulkanQueueFamilyList()); + virtual ~VulkanBufferList(); +}; + +class VulkanImage2DList : public VulkanList { +protected: + VulkanImage2DList(const VulkanImage2DList &image2DList); + +public: + VulkanImage2DList( + size_t numImages, std::vector &deviceMemory, + uint64_t baseOffset, uint64_t interImageOffset, + const VulkanDevice &device, VulkanFormat format, uint32_t width, + uint32_t height, uint32_t mipLevels, + VulkanExternalMemoryHandleType externalMemoryHandleType = + VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_NONE, + VulkanImageCreateFlag imageCreateFlag = VULKAN_IMAGE_CREATE_FLAG_NONE, + VulkanImageUsage imageUsage = + VULKAN_IMAGE_USAGE_SAMPLED_STORAGE_TRANSFER_SRC_DST, + VulkanSharingMode sharingMode = VULKAN_SHARING_MODE_EXCLUSIVE); + VulkanImage2DList( + size_t numImages, const VulkanDevice &device, VulkanFormat format, + uint32_t width, uint32_t height, uint32_t mipLevels = 1, + VulkanExternalMemoryHandleType externalMemoryHandleType = + VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_NONE, + VulkanImageCreateFlag imageCreateFlag = VULKAN_IMAGE_CREATE_FLAG_NONE, + VulkanImageUsage imageUsage = + VULKAN_IMAGE_USAGE_SAMPLED_STORAGE_TRANSFER_SRC_DST, + VulkanSharingMode sharingMode = VULKAN_SHARING_MODE_EXCLUSIVE); + virtual ~VulkanImage2DList(); +}; + +class VulkanImageViewList : public VulkanList { +protected: + VulkanImageViewList(const VulkanImageViewList &imageViewList); + +public: + VulkanImageViewList(const VulkanDevice &device, + const VulkanImage2DList &image2DList, + bool createImageViewPerMipLevel = true); + virtual ~VulkanImageViewList(); +}; + +class VulkanDeviceMemoryList + : public VulkanList { +protected: + VulkanDeviceMemoryList(const VulkanDeviceMemoryList &deviceMemoryList); + +public: + VulkanDeviceMemoryList( + size_t numImages, const VulkanImage2DList &image2DList, + const VulkanDevice &device, const VulkanMemoryType &memoryType, + VulkanExternalMemoryHandleType externalMemoryHandleType = + VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_NONE); + virtual ~VulkanDeviceMemoryList(); +}; + +class VulkanSemaphoreList : public VulkanList { +protected: + VulkanSemaphoreList(const VulkanSemaphoreList &semaphoreList); + +public: + VulkanSemaphoreList(); + VulkanSemaphoreList( + size_t numSemaphores, const VulkanDevice &device, + VulkanExternalSemaphoreHandleType externalSemaphoreHandleType = + VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_NONE, + const std::wstring namePrefix = L""); + virtual ~VulkanSemaphoreList(); +}; + +/////////////////////////////// +// VulkanList implementation // +/////////////////////////////// + +template +VulkanList::VulkanList(const VulkanList &list) + : m_wrapperList(list.m_wrapperList), + m_constWrapperList(list.m_constWrapperList), + m_nativeList(list.m_nativeList) +{} + +template +VulkanList::VulkanList() +{} + +template +VulkanList::~VulkanList() +{} + +template +void VulkanList::add(VulkanWrapper &wrapper) +{ + + if (m_constWrapperList.size() != size_t(0)) + { + std::cout << "This list can only contain externally allocated objects" + << std::endl; + return; + } + m_wrapperList.push_back(std::reference_wrapper(wrapper)); + m_nativeList.push_back((VulkanNative)wrapper); +} + +template +void VulkanList::add(const VulkanWrapper &wrapper) +{ + if (m_wrapperList.size() != size_t(0)) + { + std::cout << "This list cannot contain externally allocated objects" + << std::endl; + return; + } + + m_constWrapperList.push_back( + std::reference_wrapper(wrapper)); + m_nativeList.push_back((VulkanNative)wrapper); +} + +template +size_t VulkanList::size() const +{ + return (m_wrapperList.size() > 0) ? m_wrapperList.size() + : m_constWrapperList.size(); +} + +template +const VulkanWrapper & + VulkanList::operator[](size_t idx) const +{ + if (idx < size()) + { + // CHECK_LT(idx, size()); + return (m_wrapperList.size() > 0) ? m_wrapperList[idx].get() + : m_constWrapperList[idx].get(); + } +} + +template +VulkanWrapper &VulkanList::operator[](size_t idx) +{ + if (idx < m_wrapperList.size()) + { + // CHECK_LT(idx, m_wrapperList.size()); + return m_wrapperList[idx].get(); + } +} + +template +VulkanList::operator const VulkanNative *() const +{ + return m_nativeList.data(); +} + +////////////////////////////// +// VulkanMap implementation // +////////////////////////////// + +template +VulkanMap::VulkanMap(const VulkanMap &map) + : m_map(map.m_map) +{} + +template +VulkanMap::VulkanMap() +{} + +template +VulkanMap::~VulkanMap() +{} + +template +void VulkanMap::insert(const VulkanKey &key, + VulkanValue &value) +{ + m_map.insert(std::pair>( + key, std::reference_wrapper(value))); +} + +template +const VulkanValue & + VulkanMap::operator[](const VulkanKey &key) const +{ + return m_map.at(key); +} + +template +VulkanValue &VulkanMap::operator[](const VulkanKey &key) +{ + return m_map.at(key); +} + +#endif // _vulkan_list_map_hpp_ diff --git a/test_conformance/vulkan/vulkan_interop_common/vulkan_utility.cpp b/test_conformance/vulkan/vulkan_interop_common/vulkan_utility.cpp new file mode 100644 index 00000000..81e12621 --- /dev/null +++ b/test_conformance/vulkan/vulkan_interop_common/vulkan_utility.cpp @@ -0,0 +1,693 @@ +// +// Copyright (c) 2022 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#include "vulkan_utility.hpp" +#include "vulkan_wrapper.hpp" +#include +#include +#include +#include +#include +#include +#if defined(_WIN32) || defined(_WIN64) +#include +#endif +#define ASSERT(x) assert((x)) +#define BUFFERSIZE 3000 + + +const VulkanInstance &getVulkanInstance() +{ + static VulkanInstance instance; + return instance; +} + +const VulkanPhysicalDevice &getVulkanPhysicalDevice() +{ + size_t pdIdx; + cl_int errNum = 0; + cl_platform_id platform = NULL; + cl_uchar uuid[CL_UUID_SIZE_KHR]; + cl_device_id *devices; + char *extensions = NULL; + size_t extensionSize = 0; + cl_uint num_devices = 0; + cl_uint device_no = 0; + const size_t bufsize = BUFFERSIZE; + char buf[BUFFERSIZE]; + const VulkanInstance &instance = getVulkanInstance(); + const VulkanPhysicalDeviceList &physicalDeviceList = + instance.getPhysicalDeviceList(); + + // get the platform ID + errNum = clGetPlatformIDs(1, &platform, NULL); + if (errNum != CL_SUCCESS) + { + printf("Error: Failed to get platform\n"); + throw std::runtime_error("Error: Failed to get number of platform\n"); + } + + errNum = + clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 0, NULL, &num_devices); + if (CL_SUCCESS != errNum) + { + throw std::runtime_error( + "Error: clGetDeviceIDs failed in returning of devices\n"); + } + devices = (cl_device_id *)malloc(num_devices * sizeof(cl_device_id)); + if (NULL == devices) + { + throw std::runtime_error( + "Error: Unable to allocate memory for devices\n"); + } + errNum = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, num_devices, devices, + NULL); + if (CL_SUCCESS != errNum) + { + throw std::runtime_error("Error: Failed to get deviceID.\n"); + } + bool is_selected = false; + for (device_no = 0; device_no < num_devices; device_no++) + { + errNum = clGetDeviceInfo(devices[device_no], CL_DEVICE_EXTENSIONS, 0, + NULL, &extensionSize); + if (CL_SUCCESS != errNum) + { + throw std::runtime_error("Error in clGetDeviceInfo for getting " + "device_extension size....\n"); + } + extensions = (char *)malloc(extensionSize); + if (NULL == extensions) + { + throw std::runtime_error( + "Unable to allocate memory for extensions\n"); + } + errNum = clGetDeviceInfo(devices[device_no], CL_DEVICE_EXTENSIONS, + extensionSize, extensions, NULL); + if (CL_SUCCESS != errNum) + { + throw std::runtime_error("Error: Error in clGetDeviceInfo for " + "getting device_extension\n"); + } + errNum = clGetDeviceInfo(devices[device_no], CL_DEVICE_UUID_KHR, + CL_UUID_SIZE_KHR, uuid, &extensionSize); + if (CL_SUCCESS != errNum) + { + throw std::runtime_error( + "Error: clGetDeviceInfo failed with error\n"); + } + free(extensions); + for (pdIdx = 0; pdIdx < physicalDeviceList.size(); pdIdx++) + { + if (!memcmp(&uuid, physicalDeviceList[pdIdx].getUUID(), + VK_UUID_SIZE)) + { + std::cout << "Selected physical device = " + << physicalDeviceList[pdIdx] << std::endl; + is_selected = true; + break; + } + } + if (is_selected) + { + break; + } + } + + if ((pdIdx >= physicalDeviceList.size()) + || (physicalDeviceList[pdIdx] == (VkPhysicalDevice)VK_NULL_HANDLE)) + { + throw std::runtime_error("failed to find a suitable GPU!"); + } + std::cout << "Selected physical device is: " << physicalDeviceList[pdIdx] + << std::endl; + return physicalDeviceList[pdIdx]; +} + +const VulkanQueueFamily &getVulkanQueueFamily(uint32_t queueFlags) +{ + size_t qfIdx; + const VulkanPhysicalDevice &physicalDevice = getVulkanPhysicalDevice(); + const VulkanQueueFamilyList &queueFamilyList = + physicalDevice.getQueueFamilyList(); + + for (qfIdx = 0; qfIdx < queueFamilyList.size(); qfIdx++) + { + if ((queueFamilyList[qfIdx].getQueueFlags() & queueFlags) == queueFlags) + { + break; + } + } + + return queueFamilyList[qfIdx]; +} + +const VulkanMemoryType & +getVulkanMemoryType(const VulkanDevice &device, + VulkanMemoryTypeProperty memoryTypeProperty) +{ + size_t mtIdx; + const VulkanMemoryTypeList &memoryTypeList = + device.getPhysicalDevice().getMemoryTypeList(); + + for (mtIdx = 0; mtIdx < memoryTypeList.size(); mtIdx++) + { + if ((memoryTypeList[mtIdx].getMemoryTypeProperty() & memoryTypeProperty) + == memoryTypeProperty) + { + break; + } + } + + // CHECK_LT(mtIdx, memoryTypeList.size()); + return memoryTypeList[mtIdx]; +} + +bool checkVkSupport() +{ + bool result = true; + const VulkanInstance &instance = getVulkanInstance(); + const VulkanPhysicalDeviceList &physicalDeviceList = + instance.getPhysicalDeviceList(); + if (physicalDeviceList == NULL) + { + std::cout << "physicalDeviceList is null, No GPUs found with " + "Vulkan support !!!\n"; + result = false; + } + return result; +} + +const VulkanQueueFamilyList &getEmptyVulkanQueueFamilyList() +{ + static VulkanQueueFamilyList queueFamilyList; + return queueFamilyList; +} + +const VulkanDescriptorSetLayoutList &getEmptyVulkanDescriptorSetLayoutList() +{ + static VulkanDescriptorSetLayoutList descriptorSetLayoutList; + + return descriptorSetLayoutList; +} + +const VulkanQueueFamilyToQueueCountMap & +getDefaultVulkanQueueFamilyToQueueCountMap() +{ + static VulkanQueueFamilyToQueueCountMap queueFamilyToQueueCountMap(1); + + return queueFamilyToQueueCountMap; +} + +const std::vector +getSupportedVulkanExternalMemoryHandleTypeList() +{ + std::vector externalMemoryHandleTypeList; + +#if _WIN32 + if (IsWindows8OrGreater()) + { + externalMemoryHandleTypeList.push_back( + VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_NT); + } + externalMemoryHandleTypeList.push_back( + VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT); +#else + externalMemoryHandleTypeList.push_back( + VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD); +#endif + + return externalMemoryHandleTypeList; +} + +const std::vector +getSupportedVulkanExternalSemaphoreHandleTypeList() +{ + std::vector + externalSemaphoreHandleTypeList; + +#if _WIN32 + if (IsWindows8OrGreater()) + { + externalSemaphoreHandleTypeList.push_back( + VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_NT); + } + externalSemaphoreHandleTypeList.push_back( + VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT); +#else + externalSemaphoreHandleTypeList.push_back( + VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD); +#endif + + return externalSemaphoreHandleTypeList; +} + +const std::vector getSupportedVulkanFormatList() +{ + std::vector formatList; + + formatList.push_back(VULKAN_FORMAT_R8_UINT); + formatList.push_back(VULKAN_FORMAT_R8_SINT); + formatList.push_back(VULKAN_FORMAT_R8G8_UINT); + formatList.push_back(VULKAN_FORMAT_R8G8_SINT); + formatList.push_back(VULKAN_FORMAT_R8G8B8A8_UINT); + formatList.push_back(VULKAN_FORMAT_R8G8B8A8_SINT); + formatList.push_back(VULKAN_FORMAT_R16_UINT); + formatList.push_back(VULKAN_FORMAT_R16_SINT); + formatList.push_back(VULKAN_FORMAT_R16G16_UINT); + formatList.push_back(VULKAN_FORMAT_R16G16_SINT); + formatList.push_back(VULKAN_FORMAT_R16G16B16A16_UINT); + formatList.push_back(VULKAN_FORMAT_R16G16B16A16_SINT); + formatList.push_back(VULKAN_FORMAT_R32_UINT); + formatList.push_back(VULKAN_FORMAT_R32_SINT); + formatList.push_back(VULKAN_FORMAT_R32_SFLOAT); + formatList.push_back(VULKAN_FORMAT_R32G32_UINT); + formatList.push_back(VULKAN_FORMAT_R32G32_SINT); + formatList.push_back(VULKAN_FORMAT_R32G32_SFLOAT); + formatList.push_back(VULKAN_FORMAT_R32G32B32A32_UINT); + formatList.push_back(VULKAN_FORMAT_R32G32B32A32_SINT); + formatList.push_back(VULKAN_FORMAT_R32G32B32A32_SFLOAT); + + for (size_t fIdx = 0; fIdx < formatList.size(); fIdx++) + { + switch (formatList[fIdx]) + { + case VULKAN_FORMAT_R8_UINT: + case VULKAN_FORMAT_R8_SINT: + case VULKAN_FORMAT_R8G8_UINT: + case VULKAN_FORMAT_R8G8_SINT: + case VULKAN_FORMAT_R8G8B8A8_UINT: + case VULKAN_FORMAT_R8G8B8A8_SINT: + case VULKAN_FORMAT_R16_UINT: + case VULKAN_FORMAT_R16_SINT: + case VULKAN_FORMAT_R16G16_UINT: + case VULKAN_FORMAT_R16G16_SINT: + case VULKAN_FORMAT_R16G16B16A16_UINT: + case VULKAN_FORMAT_R16G16B16A16_SINT: + case VULKAN_FORMAT_R32_UINT: + case VULKAN_FORMAT_R32_SINT: + case VULKAN_FORMAT_R32_SFLOAT: + case VULKAN_FORMAT_R32G32_UINT: + case VULKAN_FORMAT_R32G32_SINT: + case VULKAN_FORMAT_R32G32_SFLOAT: + case VULKAN_FORMAT_R32G32B32A32_UINT: + case VULKAN_FORMAT_R32G32B32A32_SINT: + case VULKAN_FORMAT_R32G32B32A32_SFLOAT: break; + + case VULKAN_FORMAT_UNDEFINED: + case VULKAN_FORMAT_R4G4_UNORM_PACK8: + case VULKAN_FORMAT_R4G4B4A4_UNORM_PACK16: + case VULKAN_FORMAT_B4G4R4A4_UNORM_PACK16: + case VULKAN_FORMAT_R5G6B5_UNORM_PACK16: + case VULKAN_FORMAT_B5G6R5_UNORM_PACK16: + case VULKAN_FORMAT_R5G5B5A1_UNORM_PACK16: + case VULKAN_FORMAT_B5G5R5A1_UNORM_PACK16: + case VULKAN_FORMAT_A1R5G5B5_UNORM_PACK16: + case VULKAN_FORMAT_R8_UNORM: + case VULKAN_FORMAT_R8_SNORM: + case VULKAN_FORMAT_R8_USCALED: + case VULKAN_FORMAT_R8_SSCALED: + case VULKAN_FORMAT_R8_SRGB: + case VULKAN_FORMAT_R8G8_SNORM: + case VULKAN_FORMAT_R8G8_UNORM: + case VULKAN_FORMAT_R8G8_USCALED: + case VULKAN_FORMAT_R8G8_SSCALED: + case VULKAN_FORMAT_R8G8_SRGB: + case VULKAN_FORMAT_R8G8B8_UNORM: + case VULKAN_FORMAT_R8G8B8_SNORM: + case VULKAN_FORMAT_R8G8B8_USCALED: + case VULKAN_FORMAT_R8G8B8_SSCALED: + case VULKAN_FORMAT_R8G8B8_UINT: + case VULKAN_FORMAT_R8G8B8_SINT: + case VULKAN_FORMAT_R8G8B8_SRGB: + case VULKAN_FORMAT_B8G8R8_UNORM: + case VULKAN_FORMAT_B8G8R8_SNORM: + case VULKAN_FORMAT_B8G8R8_USCALED: + case VULKAN_FORMAT_B8G8R8_SSCALED: + case VULKAN_FORMAT_B8G8R8_UINT: + case VULKAN_FORMAT_B8G8R8_SINT: + case VULKAN_FORMAT_B8G8R8_SRGB: + case VULKAN_FORMAT_R8G8B8A8_UNORM: + case VULKAN_FORMAT_R8G8B8A8_SNORM: + case VULKAN_FORMAT_R8G8B8A8_USCALED: + case VULKAN_FORMAT_R8G8B8A8_SSCALED: + case VULKAN_FORMAT_R8G8B8A8_SRGB: + case VULKAN_FORMAT_B8G8R8A8_UNORM: + case VULKAN_FORMAT_B8G8R8A8_SNORM: + case VULKAN_FORMAT_B8G8R8A8_USCALED: + case VULKAN_FORMAT_B8G8R8A8_SSCALED: + case VULKAN_FORMAT_B8G8R8A8_UINT: + case VULKAN_FORMAT_B8G8R8A8_SINT: + case VULKAN_FORMAT_B8G8R8A8_SRGB: + case VULKAN_FORMAT_A8B8G8R8_UNORM_PACK32: + case VULKAN_FORMAT_A8B8G8R8_SNORM_PACK32: + case VULKAN_FORMAT_A8B8G8R8_USCALED_PACK32: + case VULKAN_FORMAT_A8B8G8R8_SSCALED_PACK32: + case VULKAN_FORMAT_A8B8G8R8_UINT_PACK32: + case VULKAN_FORMAT_A8B8G8R8_SINT_PACK32: + case VULKAN_FORMAT_A8B8G8R8_SRGB_PACK32: + case VULKAN_FORMAT_A2R10G10B10_UNORM_PACK32: + case VULKAN_FORMAT_A2R10G10B10_SNORM_PACK32: + case VULKAN_FORMAT_A2R10G10B10_USCALED_PACK32: + case VULKAN_FORMAT_A2R10G10B10_SSCALED_PACK32: + case VULKAN_FORMAT_A2R10G10B10_UINT_PACK32: + case VULKAN_FORMAT_A2R10G10B10_SINT_PACK32: + case VULKAN_FORMAT_A2B10G10R10_UNORM_PACK32: + case VULKAN_FORMAT_A2B10G10R10_SNORM_PACK32: + case VULKAN_FORMAT_A2B10G10R10_USCALED_PACK32: + case VULKAN_FORMAT_A2B10G10R10_SSCALED_PACK32: + case VULKAN_FORMAT_A2B10G10R10_UINT_PACK32: + case VULKAN_FORMAT_A2B10G10R10_SINT_PACK32: + case VULKAN_FORMAT_R16_UNORM: + case VULKAN_FORMAT_R16_SNORM: + case VULKAN_FORMAT_R16_USCALED: + case VULKAN_FORMAT_R16_SSCALED: + case VULKAN_FORMAT_R16_SFLOAT: + case VULKAN_FORMAT_R16G16_UNORM: + case VULKAN_FORMAT_R16G16_SNORM: + case VULKAN_FORMAT_R16G16_USCALED: + case VULKAN_FORMAT_R16G16_SSCALED: + case VULKAN_FORMAT_R16G16_SFLOAT: + case VULKAN_FORMAT_R16G16B16_UNORM: + case VULKAN_FORMAT_R16G16B16_SNORM: + case VULKAN_FORMAT_R16G16B16_USCALED: + case VULKAN_FORMAT_R16G16B16_SSCALED: + case VULKAN_FORMAT_R16G16B16_UINT: + case VULKAN_FORMAT_R16G16B16_SINT: + case VULKAN_FORMAT_R16G16B16_SFLOAT: + case VULKAN_FORMAT_R16G16B16A16_UNORM: + case VULKAN_FORMAT_R16G16B16A16_SNORM: + case VULKAN_FORMAT_R16G16B16A16_USCALED: + case VULKAN_FORMAT_R16G16B16A16_SSCALED: + case VULKAN_FORMAT_R16G16B16A16_SFLOAT: + case VULKAN_FORMAT_R32G32B32_UINT: + case VULKAN_FORMAT_R32G32B32_SINT: + case VULKAN_FORMAT_R32G32B32_SFLOAT: + case VULKAN_FORMAT_R64_UINT: + case VULKAN_FORMAT_R64_SINT: + case VULKAN_FORMAT_R64_SFLOAT: + case VULKAN_FORMAT_R64G64_UINT: + case VULKAN_FORMAT_R64G64_SINT: + case VULKAN_FORMAT_R64G64_SFLOAT: + case VULKAN_FORMAT_R64G64B64_UINT: + case VULKAN_FORMAT_R64G64B64_SINT: + case VULKAN_FORMAT_R64G64B64_SFLOAT: + case VULKAN_FORMAT_R64G64B64A64_UINT: + case VULKAN_FORMAT_R64G64B64A64_SINT: + case VULKAN_FORMAT_R64G64B64A64_SFLOAT: + case VULKAN_FORMAT_B10G11R11_UFLOAT_PACK32: + case VULKAN_FORMAT_E5B9G9R9_UFLOAT_PACK32: + case VULKAN_FORMAT_D16_UNORM: + case VULKAN_FORMAT_X8_D24_UNORM_PACK32: + case VULKAN_FORMAT_D32_SFLOAT: + case VULKAN_FORMAT_S8_UINT: + case VULKAN_FORMAT_D16_UNORM_S8_UINT: + case VULKAN_FORMAT_D24_UNORM_S8_UINT: + case VULKAN_FORMAT_D32_SFLOAT_S8_UINT: + case VULKAN_FORMAT_BC1_RGB_UNORM_BLOCK: + case VULKAN_FORMAT_BC1_RGB_SRGB_BLOCK: + case VULKAN_FORMAT_BC1_RGBA_UNORM_BLOCK: + case VULKAN_FORMAT_BC1_RGBA_SRGB_BLOCK: + case VULKAN_FORMAT_BC2_UNORM_BLOCK: + case VULKAN_FORMAT_BC2_SRGB_BLOCK: + case VULKAN_FORMAT_BC3_UNORM_BLOCK: + case VULKAN_FORMAT_BC3_SRGB_BLOCK: + case VULKAN_FORMAT_BC4_UNORM_BLOCK: + case VULKAN_FORMAT_BC4_SNORM_BLOCK: + case VULKAN_FORMAT_BC5_UNORM_BLOCK: + case VULKAN_FORMAT_BC5_SNORM_BLOCK: + case VULKAN_FORMAT_BC6H_UFLOAT_BLOCK: + case VULKAN_FORMAT_BC6H_SFLOAT_BLOCK: + case VULKAN_FORMAT_BC7_UNORM_BLOCK: + case VULKAN_FORMAT_BC7_SRGB_BLOCK: + case VULKAN_FORMAT_ETC2_R8G8B8_UNORM_BLOCK: + case VULKAN_FORMAT_ETC2_R8G8B8_SRGB_BLOCK: + case VULKAN_FORMAT_ETC2_R8G8B8A1_UNORM_BLOCK: + case VULKAN_FORMAT_ETC2_R8G8B8A1_SRGB_BLOCK: + case VULKAN_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK: + case VULKAN_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK: + case VULKAN_FORMAT_EAC_R11_UNORM_BLOCK: + case VULKAN_FORMAT_EAC_R11_SNORM_BLOCK: + case VULKAN_FORMAT_EAC_R11G11_UNORM_BLOCK: + case VULKAN_FORMAT_EAC_R11G11_SNORM_BLOCK: + case VULKAN_FORMAT_ASTC_4x4_UNORM_BLOCK: + case VULKAN_FORMAT_ASTC_4x4_SRGB_BLOCK: + case VULKAN_FORMAT_ASTC_5x4_UNORM_BLOCK: + case VULKAN_FORMAT_ASTC_5x4_SRGB_BLOCK: + case VULKAN_FORMAT_ASTC_5x5_UNORM_BLOCK: + case VULKAN_FORMAT_ASTC_5x5_SRGB_BLOCK: + case VULKAN_FORMAT_ASTC_6x5_UNORM_BLOCK: + case VULKAN_FORMAT_ASTC_6x5_SRGB_BLOCK: + case VULKAN_FORMAT_ASTC_6x6_UNORM_BLOCK: + case VULKAN_FORMAT_ASTC_6x6_SRGB_BLOCK: + case VULKAN_FORMAT_ASTC_8x5_UNORM_BLOCK: + case VULKAN_FORMAT_ASTC_8x5_SRGB_BLOCK: + case VULKAN_FORMAT_ASTC_8x6_UNORM_BLOCK: + case VULKAN_FORMAT_ASTC_8x6_SRGB_BLOCK: + case VULKAN_FORMAT_ASTC_8x8_UNORM_BLOCK: + case VULKAN_FORMAT_ASTC_8x8_SRGB_BLOCK: + case VULKAN_FORMAT_ASTC_10x5_UNORM_BLOCK: + case VULKAN_FORMAT_ASTC_10x5_SRGB_BLOCK: + case VULKAN_FORMAT_ASTC_10x6_UNORM_BLOCK: + case VULKAN_FORMAT_ASTC_10x6_SRGB_BLOCK: + case VULKAN_FORMAT_ASTC_10x8_UNORM_BLOCK: + case VULKAN_FORMAT_ASTC_10x8_SRGB_BLOCK: + case VULKAN_FORMAT_ASTC_10x10_UNORM_BLOCK: + case VULKAN_FORMAT_ASTC_10x10_SRGB_BLOCK: + case VULKAN_FORMAT_ASTC_12x10_UNORM_BLOCK: + case VULKAN_FORMAT_ASTC_12x10_SRGB_BLOCK: + case VULKAN_FORMAT_ASTC_12x12_UNORM_BLOCK: + case VULKAN_FORMAT_ASTC_12x12_SRGB_BLOCK: + ASSERT(0); + std::cout << "Unsupport texture format"; + } + } + + return formatList; +} + +uint32_t getVulkanFormatElementSize(VulkanFormat format) +{ + switch (format) + { + case VULKAN_FORMAT_R8_UINT: return uint32_t(1); + case VULKAN_FORMAT_R8_SINT: return uint32_t(1); + case VULKAN_FORMAT_R8G8_UINT: return uint32_t(2); + case VULKAN_FORMAT_R8G8_SINT: return uint32_t(2); + case VULKAN_FORMAT_R8G8B8A8_UINT: return uint32_t(4); + case VULKAN_FORMAT_R8G8B8A8_SINT: return uint32_t(4); + case VULKAN_FORMAT_R16_UINT: return uint32_t(2); + case VULKAN_FORMAT_R16_SINT: return uint32_t(2); + case VULKAN_FORMAT_R16G16_UINT: return uint32_t(4); + case VULKAN_FORMAT_R16G16_SINT: return uint32_t(4); + case VULKAN_FORMAT_R16G16B16A16_UINT: return uint32_t(8); + case VULKAN_FORMAT_R16G16B16A16_SINT: return uint32_t(8); + case VULKAN_FORMAT_R32_UINT: return uint32_t(4); + case VULKAN_FORMAT_R32_SINT: return uint32_t(4); + case VULKAN_FORMAT_R32_SFLOAT: return uint32_t(4); + case VULKAN_FORMAT_R32G32_UINT: return uint32_t(8); + case VULKAN_FORMAT_R32G32_SINT: return uint32_t(8); + case VULKAN_FORMAT_R32G32_SFLOAT: return uint32_t(8); + case VULKAN_FORMAT_R32G32B32A32_UINT: return uint32_t(16); + case VULKAN_FORMAT_R32G32B32A32_SINT: return uint32_t(16); + case VULKAN_FORMAT_R32G32B32A32_SFLOAT: return uint32_t(16); + default: ASSERT(0); std::cout << "Unknown format"; + } + + return uint32_t(0); +} + +const char *getVulkanFormatGLSLFormat(VulkanFormat format) +{ + switch (format) + { + case VULKAN_FORMAT_R8_UINT: return "r8ui"; + case VULKAN_FORMAT_R8_SINT: return "r8i"; + case VULKAN_FORMAT_R8G8_UINT: return "rg8ui"; + case VULKAN_FORMAT_R8G8_SINT: return "rg8i"; + case VULKAN_FORMAT_R8G8B8A8_UINT: return "rgba8ui"; + case VULKAN_FORMAT_R8G8B8A8_SINT: return "rgba8i"; + case VULKAN_FORMAT_R16_UINT: return "r16ui"; + case VULKAN_FORMAT_R16_SINT: return "r16i"; + case VULKAN_FORMAT_R16G16_UINT: return "rg16ui"; + case VULKAN_FORMAT_R16G16_SINT: return "rg16i"; + case VULKAN_FORMAT_R16G16B16A16_UINT: return "rgba16ui"; + case VULKAN_FORMAT_R16G16B16A16_SINT: return "rgba16i"; + case VULKAN_FORMAT_R32_UINT: return "r32ui"; + case VULKAN_FORMAT_R32_SINT: return "r32i"; + case VULKAN_FORMAT_R32_SFLOAT: return "r32f"; + case VULKAN_FORMAT_R32G32_UINT: return "rg32ui"; + case VULKAN_FORMAT_R32G32_SINT: return "rg32i"; + case VULKAN_FORMAT_R32G32_SFLOAT: return "rg32f"; + case VULKAN_FORMAT_R32G32B32A32_UINT: return "rgba32ui"; + case VULKAN_FORMAT_R32G32B32A32_SINT: return "rgba32i"; + case VULKAN_FORMAT_R32G32B32A32_SFLOAT: return "rgba32f"; + default: ASSERT(0); std::cout << "Unknown format"; + } + + return (const char *)size_t(0); +} + +const char *getVulkanFormatGLSLTypePrefix(VulkanFormat format) +{ + switch (format) + { + case VULKAN_FORMAT_R8_UINT: + case VULKAN_FORMAT_R8G8_UINT: + case VULKAN_FORMAT_R8G8B8A8_UINT: + case VULKAN_FORMAT_R16_UINT: + case VULKAN_FORMAT_R16G16_UINT: + case VULKAN_FORMAT_R16G16B16A16_UINT: + case VULKAN_FORMAT_R32_UINT: + case VULKAN_FORMAT_R32G32_UINT: + case VULKAN_FORMAT_R32G32B32A32_UINT: return "u"; + + case VULKAN_FORMAT_R8_SINT: + case VULKAN_FORMAT_R8G8_SINT: + case VULKAN_FORMAT_R8G8B8A8_SINT: + case VULKAN_FORMAT_R16_SINT: + case VULKAN_FORMAT_R16G16_SINT: + case VULKAN_FORMAT_R16G16B16A16_SINT: + case VULKAN_FORMAT_R32_SINT: + case VULKAN_FORMAT_R32G32_SINT: + case VULKAN_FORMAT_R32G32B32A32_SINT: return "i"; + + case VULKAN_FORMAT_R32_SFLOAT: + case VULKAN_FORMAT_R32G32_SFLOAT: + case VULKAN_FORMAT_R32G32B32A32_SFLOAT: return ""; + + default: ASSERT(0); std::cout << "Unknown format"; + } + + return ""; +} + +std::string prepareVulkanShader( + std::string shaderCode, + const std::map &patternToSubstituteMap) +{ + for (std::map::const_iterator psIt = + patternToSubstituteMap.begin(); + psIt != patternToSubstituteMap.end(); ++psIt) + { + std::string::size_type pos = 0u; + while ((pos = shaderCode.find(psIt->first, pos)) != std::string::npos) + { + shaderCode.replace(pos, psIt->first.length(), psIt->second); + pos += psIt->second.length(); + } + } + + return shaderCode; +} + +std::ostream &operator<<(std::ostream &os, + VulkanMemoryTypeProperty memoryTypeProperty) +{ + switch (memoryTypeProperty) + { + case VULKAN_MEMORY_TYPE_PROPERTY_NONE: return os << "None"; + case VULKAN_MEMORY_TYPE_PROPERTY_DEVICE_LOCAL: + return os << "Device local"; + case VULKAN_MEMORY_TYPE_PROPERTY_HOST_VISIBLE_COHERENT: + return os << "Host visible and coherent"; + case VULKAN_MEMORY_TYPE_PROPERTY_HOST_VISIBLE_CACHED: + return os << "Host visible and cached"; + case VULKAN_MEMORY_TYPE_PROPERTY_HOST_VISIBLE_CACHED_COHERENT: + return os << "Host visible, cached and coherent"; + case VULKAN_MEMORY_TYPE_PROPERTY_DEVICE_LOCAL_HOST_VISIBLE_COHERENT: + return os << "Device local, Host visible and coherent"; + case VULKAN_MEMORY_TYPE_PROPERTY_DEVICE_LOCAL_HOST_VISIBLE_CACHED: + return os << "Device local, Host visible and cached"; + case VULKAN_MEMORY_TYPE_PROPERTY_DEVICE_LOCAL_HOST_VISIBLE_CACHED_COHERENT: + return os << "Device local, Host visible, cached and coherent"; + } + + return os; +} + +std::ostream & +operator<<(std::ostream &os, + VulkanExternalMemoryHandleType externalMemoryHandleType) +{ + switch (externalMemoryHandleType) + { + case VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_NONE: return os << "None"; + case VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD: + return os << "Opaque file descriptor"; + case VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_NT: + return os << "Opaque NT handle"; + case VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT: + return os << "Opaque D3DKMT handle"; + case VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_NT_KMT: + return os << "Opaque NT and D3DKMT handle"; + } + + return os; +} + +std::ostream & +operator<<(std::ostream &os, + VulkanExternalSemaphoreHandleType externalSemaphoreHandleType) +{ + switch (externalSemaphoreHandleType) + { + case VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_NONE: return os << "None"; + case VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD: + return os << "Opaque file descriptor"; + case VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_NT: + return os << "Opaque NT handle"; + case VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT: + return os << "Opaque D3DKMT handle"; + case VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_NT_KMT: + return os << "Opaque NT and D3DKMT handle"; + } + + return os; +} + +std::ostream &operator<<(std::ostream &os, VulkanFormat format) +{ + switch (format) + { + case VULKAN_FORMAT_R8_UINT: return os << "R8_UINT"; + case VULKAN_FORMAT_R8_SINT: return os << "R8_SINT"; + case VULKAN_FORMAT_R8G8_UINT: return os << "R8G8_UINT"; + case VULKAN_FORMAT_R8G8_SINT: return os << "R8G8_SINT"; + case VULKAN_FORMAT_R8G8B8A8_UINT: return os << "R8G8B8A8_UINT"; + case VULKAN_FORMAT_R8G8B8A8_SINT: return os << "R8G8B8A8_SINT"; + case VULKAN_FORMAT_R16_UINT: return os << "R16_UINT"; + case VULKAN_FORMAT_R16_SINT: return os << "R16_SINT"; + case VULKAN_FORMAT_R16G16_UINT: return os << "R16G16_UINT"; + case VULKAN_FORMAT_R16G16_SINT: return os << "R16G16_SINT"; + case VULKAN_FORMAT_R16G16B16A16_UINT: return os << "R16G16B16A16_UINT"; + case VULKAN_FORMAT_R16G16B16A16_SINT: return os << "R16G16B16A16_SINT"; + case VULKAN_FORMAT_R32_UINT: return os << "R32_UINT"; + case VULKAN_FORMAT_R32_SINT: return os << "R32_SINT"; + case VULKAN_FORMAT_R32_SFLOAT: return os << "R32_SFLOAT"; + case VULKAN_FORMAT_R32G32_UINT: return os << "R32G32_UINT"; + case VULKAN_FORMAT_R32G32_SINT: return os << "R32G32_SINT"; + case VULKAN_FORMAT_R32G32_SFLOAT: return os << "R32G32_SFLOAT"; + case VULKAN_FORMAT_R32G32B32A32_UINT: return os << "R32G32B32A32_UINT"; + case VULKAN_FORMAT_R32G32B32A32_SINT: return os << "R32G32B32A32_SINT"; + case VULKAN_FORMAT_R32G32B32A32_SFLOAT: + return os << "R32G32B32A32_SFLOAT"; + break; + default: ASSERT(0); std::cout << "Unknown format"; + } + + return os; +} diff --git a/test_conformance/vulkan/vulkan_interop_common/vulkan_utility.hpp b/test_conformance/vulkan/vulkan_interop_common/vulkan_utility.hpp new file mode 100644 index 00000000..7022fd5a --- /dev/null +++ b/test_conformance/vulkan/vulkan_interop_common/vulkan_utility.hpp @@ -0,0 +1,69 @@ +// +// Copyright (c) 2022 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#ifndef _vulkan_utility_hpp_ +#define _vulkan_utility_hpp_ + +#include "vulkan_wrapper_types.hpp" +#include +#include +#include +#include +#include "../../../test_common/harness/testHarness.h" + +#define STRING_(str) #str +#define STRING(str) STRING_(str) + +#define ROUND_UP(n, multiple) \ + (((n) + (multiple)-1) - ((((n) + (multiple)-1)) % (multiple))) + +const VulkanInstance& getVulkanInstance(); +const VulkanPhysicalDevice& getVulkanPhysicalDevice(); +const VulkanQueueFamily& +getVulkanQueueFamily(uint32_t queueFlags = VULKAN_QUEUE_FLAG_MASK_ALL); +const VulkanMemoryType& +getVulkanMemoryType(const VulkanDevice& device, + VulkanMemoryTypeProperty memoryTypeProperty); +bool checkVkSupport(); +const VulkanQueueFamilyList& getEmptyVulkanQueueFamilyList(); +const VulkanDescriptorSetLayoutList& getEmptyVulkanDescriptorSetLayoutList(); +const VulkanQueueFamilyToQueueCountMap& +getDefaultVulkanQueueFamilyToQueueCountMap(); +const std::vector +getSupportedVulkanExternalMemoryHandleTypeList(); +const std::vector +getSupportedVulkanExternalSemaphoreHandleTypeList(); +const std::vector getSupportedVulkanFormatList(); + +uint32_t getVulkanFormatElementSize(VulkanFormat format); +const char* getVulkanFormatGLSLFormat(VulkanFormat format); +const char* getVulkanFormatGLSLTypePrefix(VulkanFormat format); + +std::string prepareVulkanShader( + std::string shaderCode, + const std::map& patternToSubstituteMap); + +std::ostream& operator<<(std::ostream& os, + VulkanMemoryTypeProperty memoryTypeProperty); +std::ostream& +operator<<(std::ostream& os, + VulkanExternalMemoryHandleType externalMemoryHandleType); +std::ostream& +operator<<(std::ostream& os, + VulkanExternalSemaphoreHandleType externalSemaphoreHandleType); +std::ostream& operator<<(std::ostream& os, VulkanFormat format); + +#endif // _vulkan_utility_hpp_ diff --git a/test_conformance/vulkan/vulkan_interop_common/vulkan_wrapper.cpp b/test_conformance/vulkan/vulkan_interop_common/vulkan_wrapper.cpp new file mode 100644 index 00000000..c044e009 --- /dev/null +++ b/test_conformance/vulkan/vulkan_interop_common/vulkan_wrapper.cpp @@ -0,0 +1,2075 @@ +// +// Copyright (c) 2022 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#ifdef _WIN32 +#define NOMINMAX +#include +#include +#include +#endif +#include +#include "vulkan_wrapper.hpp" +#if defined(__linux__) && !defined(__ANDROID__) +#include +#include +#elif defined(__ANDROID__) +#include +#endif +#if defined _WIN32 +#define LoadFunction GetProcAddress +#elif defined __linux +#define LoadFunction dlsym +#endif + +extern "C" { +#define VK_FUNC_DECL(name) PFN_##name _##name = NULL; +VK_FUNC_LIST +#if defined(_WIN32) || defined(_WIN64) +VK_WINDOWS_FUNC_LIST +#endif +#undef VK_FUNC_DECL +} + +#define WAIVED 2 +#define HANDLE_ERROR -1 + +#define CHECK_VK(call) \ + if (call != VK_SUCCESS) return call; +/////////////////////////////////// +// VulkanInstance implementation // +/////////////////////////////////// + +VulkanInstance::VulkanInstance(const VulkanInstance &instance) + : m_vkInstance(instance.m_vkInstance), + m_physicalDeviceList(instance.m_physicalDeviceList) +{} + +VulkanInstance::VulkanInstance(): m_vkInstance(VK_NULL_HANDLE) +{ +#if defined(__linux__) && !defined(__ANDROID__) + char *glibcVersion = strdup(gnu_get_libc_version()); + int majNum = (int)atoi(strtok(glibcVersion, ".")); + int minNum = (int)atoi(strtok(NULL, ".")); + free(glibcVersion); + if ((majNum < 2) || (majNum == 2 && minNum < 17)) + { + // WAIVE_TEST() << "Insufficient GLIBC version. Test waived!"; + } +#endif + +#if defined(_WIN32) || defined(_WIN64) + const char *vulkanLoaderLibraryName = "vulkan-1.dll"; +#elif defined(__linux__) + const char *vulkanLoaderLibraryName = "libvulkan.so.1"; +#endif +#ifdef _WIN32 + HINSTANCE hDLL; + hDLL = LoadLibrary(vulkanLoaderLibraryName); + if (hDLL == NULL) + { + throw std::runtime_error("LoadLibrary failed!"); + } + vkGetInstanceProcAddr = + (PFN_vkGetInstanceProcAddr)LoadFunction(hDLL, "vkGetInstanceProcAddr"); +#else +#if !defined(__APPLE__) + void *handle; + handle = dlopen(vulkanLoaderLibraryName, RTLD_LAZY); + if (!handle) + { + fputs(dlerror(), stderr); + throw std::runtime_error("dlopen failed !!!"); + } + vkGetInstanceProcAddr = (PFN_vkGetInstanceProcAddr)LoadFunction( + handle, "vkGetInstanceProcAddr"); +#endif +#endif + if ((unsigned long long)vkGetInstanceProcAddr == (unsigned long long)NULL) + { + throw std::runtime_error("vkGetInstanceProcAddr() not found!"); + } +#define VK_GET_NULL_INSTANCE_PROC_ADDR(name) \ + _##name = (PFN_##name)vkGetInstanceProcAddr(NULL, #name); + + if ((unsigned long long)vkGetInstanceProcAddr == (unsigned long long)NULL) + { + throw std::runtime_error("Couldn't obtain address for function"); + } + VK_GET_NULL_INSTANCE_PROC_ADDR(vkEnumerateInstanceExtensionProperties); + uint32_t instanceExtensionPropertiesCount; + VkResult vkStatus = VK_SUCCESS; + vkStatus = vkEnumerateInstanceExtensionProperties( + NULL, &instanceExtensionPropertiesCount, NULL); + // Something went wrong in vulkan initialization (most likely incompatible + // device/driver combination) + if (vkStatus == VK_ERROR_INCOMPATIBLE_DRIVER) + { + throw std::runtime_error( + "Waiving vulkan test because " + "vkEnumerateInstanceExtensionProperties failed."); + // return WAIVED; + } + + VK_GET_NULL_INSTANCE_PROC_ADDR(vkEnumerateInstanceVersion); + VK_GET_NULL_INSTANCE_PROC_ADDR(vkEnumerateInstanceLayerProperties); + VK_GET_NULL_INSTANCE_PROC_ADDR(vkCreateInstance); +#undef VK_GET_NULL_INSTANCE_PROC_ADDR + + VkApplicationInfo vkApplicationInfo = {}; + vkApplicationInfo.sType = VK_STRUCTURE_TYPE_APPLICATION_INFO; + vkApplicationInfo.pNext = NULL; + vkApplicationInfo.pApplicationName = "Default app"; + vkApplicationInfo.applicationVersion = VK_MAKE_VERSION(1, 0, 0); + vkApplicationInfo.pEngineName = "No engine"; + vkApplicationInfo.engineVersion = VK_MAKE_VERSION(1, 0, 0); + vkApplicationInfo.apiVersion = VK_API_VERSION_1_0; + + std::vector enabledExtensionNameList; + enabledExtensionNameList.push_back( + VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME); + enabledExtensionNameList.push_back( + VK_KHR_EXTERNAL_MEMORY_CAPABILITIES_EXTENSION_NAME); + enabledExtensionNameList.push_back( + VK_KHR_EXTERNAL_SEMAPHORE_CAPABILITIES_EXTENSION_NAME); + + std::vector vkExtensionPropertiesList( + instanceExtensionPropertiesCount); + vkEnumerateInstanceExtensionProperties(NULL, + &instanceExtensionPropertiesCount, + vkExtensionPropertiesList.data()); + + for (size_t eenIdx = 0; eenIdx < enabledExtensionNameList.size(); eenIdx++) + { + bool isSupported = false; + for (size_t epIdx = 0; epIdx < vkExtensionPropertiesList.size(); + epIdx++) + { + if (!strcmp(enabledExtensionNameList[eenIdx], + vkExtensionPropertiesList[epIdx].extensionName)) + { + isSupported = true; + break; + } + } + if (!isSupported) + { + return; + } + } + + VkInstanceCreateInfo vkInstanceCreateInfo = {}; + vkInstanceCreateInfo.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO; + vkInstanceCreateInfo.pNext = NULL; + vkInstanceCreateInfo.flags = 0; + vkInstanceCreateInfo.pApplicationInfo = &vkApplicationInfo; + vkInstanceCreateInfo.enabledLayerCount = 0; + vkInstanceCreateInfo.ppEnabledLayerNames = NULL; + vkInstanceCreateInfo.enabledExtensionCount = + (uint32_t)enabledExtensionNameList.size(); + vkInstanceCreateInfo.ppEnabledExtensionNames = + enabledExtensionNameList.data(); + + vkCreateInstance(&vkInstanceCreateInfo, NULL, &m_vkInstance); + +#define VK_FUNC_DECL(name) \ + _##name = (PFN_##name)vkGetInstanceProcAddr(m_vkInstance, #name); \ + // ASSERT_NEQ((unsigned long long)name, 0ULL) << "Couldn't obtain address + // for function" << #name; + + VK_FUNC_LIST +#if defined(_WIN32) || defined(_WIN64) + VK_WINDOWS_FUNC_LIST +#endif +#undef VK_FUNC_DECL + + uint32_t physicalDeviceCount = 0; + vkEnumeratePhysicalDevices(m_vkInstance, &physicalDeviceCount, NULL); + // CHECK_NEQ(physicalDeviceCount, uint32_t(0)); + + if (physicalDeviceCount == uint32_t(0)) + { + throw std::runtime_error("failed to find GPUs with Vulkan support!"); + } + + std::vector vkPhysicalDeviceList(physicalDeviceCount, + VK_NULL_HANDLE); + vkEnumeratePhysicalDevices(m_vkInstance, &physicalDeviceCount, + vkPhysicalDeviceList.data()); + + for (size_t ppdIdx = 0; ppdIdx < vkPhysicalDeviceList.size(); ppdIdx++) + { + VulkanPhysicalDevice *physicalDevice = + new VulkanPhysicalDevice(vkPhysicalDeviceList[ppdIdx]); + m_physicalDeviceList.add(*physicalDevice); + } +} + +VulkanInstance::~VulkanInstance() +{ + for (size_t pdIdx = 0; pdIdx < m_physicalDeviceList.size(); pdIdx++) + { + const VulkanPhysicalDevice &physicalDevice = + m_physicalDeviceList[pdIdx]; + delete &physicalDevice; + } + if (m_vkInstance) + { + vkDestroyInstance(m_vkInstance, NULL); + } +} + +const VulkanPhysicalDeviceList &VulkanInstance::getPhysicalDeviceList() const +{ + return m_physicalDeviceList; +} + +VulkanInstance::operator VkInstance() const { return m_vkInstance; } + +///////////////////////////////////////// +// VulkanPhysicalDevice implementation // +///////////////////////////////////////// + +VulkanPhysicalDevice::VulkanPhysicalDevice( + const VulkanPhysicalDevice &physicalDevice) + : m_vkPhysicalDevice(physicalDevice.m_vkPhysicalDevice), + m_vkPhysicalDeviceProperties(physicalDevice.m_vkPhysicalDeviceProperties), + m_vkDeviceNodeMask(physicalDevice.m_vkDeviceNodeMask), + m_vkPhysicalDeviceFeatures(physicalDevice.m_vkPhysicalDeviceFeatures), + m_vkPhysicalDeviceMemoryProperties( + physicalDevice.m_vkPhysicalDeviceMemoryProperties), + m_queueFamilyList(physicalDevice.m_queueFamilyList) +{ + memcpy(m_vkDeviceUUID, physicalDevice.m_vkDeviceUUID, VK_UUID_SIZE); +} + +VulkanPhysicalDevice::VulkanPhysicalDevice(VkPhysicalDevice vkPhysicalDevice) + : m_vkPhysicalDevice(vkPhysicalDevice) +{ + if (m_vkPhysicalDevice == (VkPhysicalDevice)VK_NULL_HANDLE) + { + throw std::runtime_error("failed to find a suitable GPU!"); + } + + vkGetPhysicalDeviceProperties(m_vkPhysicalDevice, + &m_vkPhysicalDeviceProperties); + vkGetPhysicalDeviceFeatures(m_vkPhysicalDevice, + &m_vkPhysicalDeviceFeatures); + + VkPhysicalDeviceIDPropertiesKHR vkPhysicalDeviceIDPropertiesKHR = {}; + vkPhysicalDeviceIDPropertiesKHR.sType = + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES_KHR; + vkPhysicalDeviceIDPropertiesKHR.pNext = NULL; + + VkPhysicalDeviceProperties2KHR vkPhysicalDeviceProperties2KHR = {}; + vkPhysicalDeviceProperties2KHR.sType = + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2_KHR; + vkPhysicalDeviceProperties2KHR.pNext = &vkPhysicalDeviceIDPropertiesKHR; + + vkGetPhysicalDeviceProperties2KHR(m_vkPhysicalDevice, + &vkPhysicalDeviceProperties2KHR); + + memcpy(m_vkDeviceUUID, vkPhysicalDeviceIDPropertiesKHR.deviceUUID, + sizeof(m_vkDeviceUUID)); + memcpy(m_vkDeviceLUID, vkPhysicalDeviceIDPropertiesKHR.deviceLUID, + sizeof(m_vkDeviceLUID)); + m_vkDeviceNodeMask = vkPhysicalDeviceIDPropertiesKHR.deviceNodeMask; + + uint32_t queueFamilyCount = 0; + vkGetPhysicalDeviceQueueFamilyProperties(m_vkPhysicalDevice, + &queueFamilyCount, NULL); + + std::vector vkQueueFamilyPropertiesList( + queueFamilyCount); + vkGetPhysicalDeviceQueueFamilyProperties( + m_vkPhysicalDevice, &queueFamilyCount, + vkQueueFamilyPropertiesList.data()); + + for (size_t qfpIdx = 0; qfpIdx < vkQueueFamilyPropertiesList.size(); + qfpIdx++) + { + VulkanQueueFamily *queueFamily = new VulkanQueueFamily( + uint32_t(qfpIdx), vkQueueFamilyPropertiesList[qfpIdx]); + m_queueFamilyList.add(*queueFamily); + } + + vkGetPhysicalDeviceMemoryProperties(m_vkPhysicalDevice, + &m_vkPhysicalDeviceMemoryProperties); + + for (uint32_t mhIdx = 0; + mhIdx < m_vkPhysicalDeviceMemoryProperties.memoryHeapCount; mhIdx++) + { + VulkanMemoryHeap *memoryHeap = new VulkanMemoryHeap( + mhIdx, m_vkPhysicalDeviceMemoryProperties.memoryHeaps[mhIdx].size, + (VulkanMemoryHeapFlag)m_vkPhysicalDeviceMemoryProperties + .memoryHeaps[mhIdx] + .flags); + m_memoryHeapList.add(*memoryHeap); + } + + for (uint32_t mtIdx = 0; + mtIdx < m_vkPhysicalDeviceMemoryProperties.memoryTypeCount; mtIdx++) + { + const VulkanMemoryHeap &memoryHeap = m_memoryHeapList + [m_vkPhysicalDeviceMemoryProperties.memoryTypes[mtIdx].heapIndex]; + VulkanMemoryType *memoryType = new VulkanMemoryType( + mtIdx, + (VulkanMemoryTypeProperty)m_vkPhysicalDeviceMemoryProperties + .memoryTypes[mtIdx] + .propertyFlags, + memoryHeap); + m_memoryTypeList.add(*memoryType); + } +} + +VulkanPhysicalDevice::~VulkanPhysicalDevice() +{ + for (size_t mtIdx = 0; mtIdx < m_memoryTypeList.size(); mtIdx++) + { + const VulkanMemoryType &memoryType = m_memoryTypeList[mtIdx]; + delete &memoryType; + } + + for (size_t mhIdx = 0; mhIdx < m_memoryHeapList.size(); mhIdx++) + { + const VulkanMemoryHeap &memoryHeap = m_memoryHeapList[mhIdx]; + delete &memoryHeap; + } + + for (size_t qfIdx = 0; qfIdx < m_queueFamilyList.size(); qfIdx++) + { + const VulkanQueueFamily &queueFamily = m_queueFamilyList[qfIdx]; + delete &queueFamily; + } +} + + +const VulkanQueueFamilyList &VulkanPhysicalDevice::getQueueFamilyList() const +{ + return m_queueFamilyList; +} + +const VulkanMemoryHeapList &VulkanPhysicalDevice::getMemoryHeapList() const +{ + return m_memoryHeapList; +} + +const VulkanMemoryTypeList &VulkanPhysicalDevice::getMemoryTypeList() const +{ + return m_memoryTypeList; +} + +const uint8_t *VulkanPhysicalDevice::getUUID() const { return m_vkDeviceUUID; } + +const uint8_t *VulkanPhysicalDevice::getLUID() const { return m_vkDeviceLUID; } + +uint32_t VulkanPhysicalDevice::getNodeMask() const +{ + return m_vkDeviceNodeMask; +} + +VulkanPhysicalDevice::operator VkPhysicalDevice() const +{ + return m_vkPhysicalDevice; +} + +bool operator<(const VulkanQueueFamily &queueFamilyA, + const VulkanQueueFamily &queueFamilyB) +{ + return (uint32_t)queueFamilyA < (uint32_t)queueFamilyB; +} + +///////////////////////////////////// +// VulkanMemoryHeap implementation // +///////////////////////////////////// + +VulkanMemoryHeap::VulkanMemoryHeap(const VulkanMemoryHeap &memoryHeap) + : m_memoryHeapIndex(memoryHeap.m_memoryHeapIndex), + m_size(memoryHeap.m_size), m_memoryHeapFlag(memoryHeap.m_memoryHeapFlag) +{} + +VulkanMemoryHeap::VulkanMemoryHeap(uint32_t memoryHeapIndex, uint64_t size, + VulkanMemoryHeapFlag memoryHeapFlag) + : m_memoryHeapIndex(memoryHeapIndex), m_size(size), + m_memoryHeapFlag(memoryHeapFlag) +{} + +VulkanMemoryHeap::~VulkanMemoryHeap() {} + +uint64_t VulkanMemoryHeap::getSize() const { return m_size; } + + +VulkanMemoryHeapFlag VulkanMemoryHeap::getMemoryHeapFlag() const +{ + return m_memoryHeapFlag; +} + +VulkanMemoryHeap::operator uint32_t() const { return m_memoryHeapIndex; } + +///////////////////////////////////// +// VulkanMemoryType implementation // +///////////////////////////////////// + +VulkanMemoryType::VulkanMemoryType(const VulkanMemoryType &memoryType) + : m_memoryTypeIndex(memoryType.m_memoryTypeIndex), + m_memoryTypeProperty(memoryType.m_memoryTypeProperty), + m_memoryHeap(memoryType.m_memoryHeap) +{} + +VulkanMemoryType::VulkanMemoryType(uint32_t memoryTypeIndex, + VulkanMemoryTypeProperty memoryTypeProperty, + const VulkanMemoryHeap &memoryHeap) + : m_memoryTypeIndex(memoryTypeIndex), + m_memoryTypeProperty(memoryTypeProperty), m_memoryHeap(memoryHeap) +{} + +VulkanMemoryType::~VulkanMemoryType() {} + +VulkanMemoryTypeProperty VulkanMemoryType::getMemoryTypeProperty() const +{ + return m_memoryTypeProperty; +} + +const VulkanMemoryHeap &VulkanMemoryType::getMemoryHeap() const +{ + return m_memoryHeap; +} + +VulkanMemoryType::operator uint32_t() const { return m_memoryTypeIndex; } + +////////////////////////////////////// +// VulkanQueueFamily implementation // +////////////////////////////////////// + +VulkanQueueFamily::VulkanQueueFamily(const VulkanQueueFamily &queueFamily) + : m_queueFamilyIndex(queueFamily.m_queueFamilyIndex), + m_vkQueueFamilyProperties(queueFamily.m_vkQueueFamilyProperties) +{} + +VulkanQueueFamily::VulkanQueueFamily( + uint32_t queueFamilyIndex, VkQueueFamilyProperties vkQueueFamilyProperties) + : m_queueFamilyIndex(queueFamilyIndex), + m_vkQueueFamilyProperties(vkQueueFamilyProperties) +{} + +VulkanQueueFamily::~VulkanQueueFamily() {} + +uint32_t VulkanQueueFamily::getQueueFlags() const +{ + return m_vkQueueFamilyProperties.queueFlags + & (uint32_t)VULKAN_QUEUE_FLAG_MASK_ALL; +} + +uint32_t VulkanQueueFamily::getQueueCount() const +{ + return m_vkQueueFamilyProperties.queueCount; +} + +VulkanQueueFamily::operator uint32_t() const { return m_queueFamilyIndex; } + +///////////////////////////////// +// VulkanDevice implementation // +///////////////////////////////// + +VulkanDevice::VulkanDevice(const VulkanDevice &device) + : m_physicalDevice(device.m_physicalDevice), m_vkDevice(device.m_vkDevice) +{} + +VulkanDevice::VulkanDevice( + const VulkanPhysicalDevice &physicalDevice, + const VulkanQueueFamilyToQueueCountMap &queueFamilyToQueueCountMap) + : m_physicalDevice(physicalDevice), m_vkDevice(NULL) +{ + uint32_t maxQueueCount = 0; + for (uint32_t qfIdx = 0; + qfIdx < (uint32_t)physicalDevice.getQueueFamilyList().size(); qfIdx++) + { + maxQueueCount = + std::max(maxQueueCount, queueFamilyToQueueCountMap[qfIdx]); + } + + std::vector vkDeviceQueueCreateInfoList; + std::vector queuePriorities(maxQueueCount); + for (uint32_t qfIdx = 0; + qfIdx < (uint32_t)physicalDevice.getQueueFamilyList().size(); qfIdx++) + { + if (queueFamilyToQueueCountMap[qfIdx]) + { + VkDeviceQueueCreateInfo vkDeviceQueueCreateInfo = {}; + vkDeviceQueueCreateInfo.sType = + VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO; + vkDeviceQueueCreateInfo.pNext = NULL; + vkDeviceQueueCreateInfo.flags = 0; + vkDeviceQueueCreateInfo.queueFamilyIndex = qfIdx; + vkDeviceQueueCreateInfo.queueCount = + queueFamilyToQueueCountMap[qfIdx]; + vkDeviceQueueCreateInfo.pQueuePriorities = queuePriorities.data(); + + vkDeviceQueueCreateInfoList.push_back(vkDeviceQueueCreateInfo); + } + } + + std::vector enabledExtensionNameList; + enabledExtensionNameList.push_back(VK_KHR_EXTERNAL_MEMORY_EXTENSION_NAME); + enabledExtensionNameList.push_back( + VK_KHR_EXTERNAL_SEMAPHORE_EXTENSION_NAME); +#if defined(_WIN32) || defined(_WIN64) + enabledExtensionNameList.push_back( + VK_KHR_EXTERNAL_MEMORY_WIN32_EXTENSION_NAME); + enabledExtensionNameList.push_back( + VK_KHR_EXTERNAL_SEMAPHORE_WIN32_EXTENSION_NAME); +#else + enabledExtensionNameList.push_back( + VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME); + enabledExtensionNameList.push_back( + VK_KHR_EXTERNAL_SEMAPHORE_FD_EXTENSION_NAME); +#endif + + + VkDeviceCreateInfo vkDeviceCreateInfo = {}; + vkDeviceCreateInfo.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO; + vkDeviceCreateInfo.pNext = NULL; + vkDeviceCreateInfo.flags = 0; + vkDeviceCreateInfo.queueCreateInfoCount = + (uint32_t)vkDeviceQueueCreateInfoList.size(); + vkDeviceCreateInfo.pQueueCreateInfos = vkDeviceQueueCreateInfoList.data(); + vkDeviceCreateInfo.enabledLayerCount = 0; + vkDeviceCreateInfo.ppEnabledLayerNames = NULL; + vkDeviceCreateInfo.enabledExtensionCount = + (uint32_t)enabledExtensionNameList.size(); + vkDeviceCreateInfo.ppEnabledExtensionNames = + enabledExtensionNameList.data(); + vkDeviceCreateInfo.pEnabledFeatures = NULL; + + vkCreateDevice(physicalDevice, &vkDeviceCreateInfo, NULL, &m_vkDevice); + + for (uint32_t qfIdx = 0; + qfIdx < (uint32_t)m_physicalDevice.getQueueFamilyList().size(); + qfIdx++) + { + VulkanQueueList *queueList = new VulkanQueueList(); + m_queueFamilyIndexToQueueListMap.insert(qfIdx, *queueList); + for (uint32_t qIdx = 0; qIdx < queueFamilyToQueueCountMap[qfIdx]; + qIdx++) + { + VkQueue vkQueue; + vkGetDeviceQueue(m_vkDevice, qfIdx, qIdx, &vkQueue); + VulkanQueue *queue = new VulkanQueue(vkQueue); + m_queueFamilyIndexToQueueListMap[qfIdx].add(*queue); + } + } +} + +VulkanDevice::~VulkanDevice() +{ + for (uint32_t qfIdx = 0; + qfIdx < (uint32_t)m_physicalDevice.getQueueFamilyList().size(); + qfIdx++) + { + for (size_t qIdx = 0; + qIdx < m_queueFamilyIndexToQueueListMap[qfIdx].size(); qIdx++) + { + VulkanQueue &queue = m_queueFamilyIndexToQueueListMap[qfIdx][qIdx]; + delete &queue; + } + VulkanQueueList &queueList = m_queueFamilyIndexToQueueListMap[qfIdx]; + delete &queueList; + } + vkDestroyDevice(m_vkDevice, NULL); +} + +const VulkanPhysicalDevice &VulkanDevice::getPhysicalDevice() const +{ + return m_physicalDevice; +} + +VulkanQueue &VulkanDevice::getQueue(const VulkanQueueFamily &queueFamily, + uint32_t queueIndex) +{ + return m_queueFamilyIndexToQueueListMap[queueFamily][queueIndex]; +} + +VulkanDevice::operator VkDevice() const { return m_vkDevice; } + +//////////////////////////////// +// VulkanQueue implementation // +//////////////////////////////// + +VulkanQueue::VulkanQueue(const VulkanQueue &queue): m_vkQueue(queue.m_vkQueue) +{} + +VulkanQueue::VulkanQueue(VkQueue vkQueue): m_vkQueue(vkQueue) {} + +VulkanQueue::~VulkanQueue() {} + +void VulkanQueue::submit(const VulkanSemaphoreList &waitSemaphoreList, + const VulkanCommandBufferList &commandBufferList, + const VulkanSemaphoreList &signalSemaphoreList) +{ + std::vector vkPipelineStageFlagsList( + waitSemaphoreList.size(), VK_PIPELINE_STAGE_ALL_COMMANDS_BIT); + + VkSubmitInfo vkSubmitInfo = {}; + vkSubmitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; + vkSubmitInfo.pNext = NULL; + vkSubmitInfo.waitSemaphoreCount = (uint32_t)waitSemaphoreList.size(); + vkSubmitInfo.pWaitSemaphores = waitSemaphoreList; + vkSubmitInfo.pWaitDstStageMask = vkPipelineStageFlagsList.data(); + vkSubmitInfo.commandBufferCount = (uint32_t)commandBufferList.size(); + vkSubmitInfo.pCommandBuffers = commandBufferList; + vkSubmitInfo.signalSemaphoreCount = (uint32_t)signalSemaphoreList.size(); + vkSubmitInfo.pSignalSemaphores = signalSemaphoreList; + + vkQueueSubmit(m_vkQueue, 1, &vkSubmitInfo, NULL); +} + +void VulkanQueue::submit(const VulkanSemaphore &waitSemaphore, + const VulkanCommandBuffer &commandBuffer, + const VulkanSemaphore &signalSemaphore) +{ + VulkanSemaphoreList waitSemaphoreList; + VulkanCommandBufferList commandBufferList; + VulkanSemaphoreList signalSemaphoreList; + + waitSemaphoreList.add(waitSemaphore); + commandBufferList.add(commandBuffer); + signalSemaphoreList.add(signalSemaphore); + + submit(waitSemaphoreList, commandBufferList, signalSemaphoreList); +} + +void VulkanQueue::submit(const VulkanCommandBuffer &commandBuffer, + const VulkanSemaphore &signalSemaphore) +{ + VulkanSemaphoreList waitSemaphoreList; + VulkanCommandBufferList commandBufferList; + VulkanSemaphoreList signalSemaphoreList; + + commandBufferList.add(commandBuffer); + signalSemaphoreList.add(signalSemaphore); + + submit(waitSemaphoreList, commandBufferList, signalSemaphoreList); +} + +void VulkanQueue::submit(const VulkanCommandBuffer &commandBuffer) +{ + VulkanSemaphoreList waitSemaphoreList; + VulkanCommandBufferList commandBufferList; + VulkanSemaphoreList signalSemaphoreList; + + commandBufferList.add(commandBuffer); + + submit(waitSemaphoreList, commandBufferList, signalSemaphoreList); +} + +void VulkanQueue::waitIdle() { vkQueueWaitIdle(m_vkQueue); } + +VulkanQueue::operator VkQueue() const { return m_vkQueue; } + +///////////////////////////////////////////////////// +// VulkanDescriptorSetLayoutBinding implementation // +///////////////////////////////////////////////////// + +VulkanDescriptorSetLayoutBinding::VulkanDescriptorSetLayoutBinding( + const VulkanDescriptorSetLayoutBinding &descriptorSetLayoutBinding) + : m_vkDescriptorSetLayoutBinding( + descriptorSetLayoutBinding.m_vkDescriptorSetLayoutBinding) +{} + +VulkanDescriptorSetLayoutBinding::VulkanDescriptorSetLayoutBinding( + uint32_t binding, VulkanDescriptorType descriptorType, + uint32_t descriptorCount, VulkanShaderStage shaderStage) +{ + m_vkDescriptorSetLayoutBinding.binding = binding; + m_vkDescriptorSetLayoutBinding.descriptorType = + (VkDescriptorType)descriptorType; + m_vkDescriptorSetLayoutBinding.descriptorCount = descriptorCount; + m_vkDescriptorSetLayoutBinding.stageFlags = + (VkShaderStageFlags)(VkShaderStageFlagBits)shaderStage; + m_vkDescriptorSetLayoutBinding.pImmutableSamplers = NULL; +} + +VulkanDescriptorSetLayoutBinding::~VulkanDescriptorSetLayoutBinding() {} + +VulkanDescriptorSetLayoutBinding::operator VkDescriptorSetLayoutBinding() const +{ + return m_vkDescriptorSetLayoutBinding; +} + +////////////////////////////////////////////// +// VulkanDescriptorSetLayout implementation // +////////////////////////////////////////////// + +VulkanDescriptorSetLayout::VulkanDescriptorSetLayout( + const VulkanDescriptorSetLayout &descriptorSetLayout) + : m_device(descriptorSetLayout.m_device), + m_vkDescriptorSetLayout(descriptorSetLayout.m_vkDescriptorSetLayout) +{} + +void VulkanDescriptorSetLayout::VulkanDescriptorSetLayoutCommon( + const VulkanDescriptorSetLayoutBindingList &descriptorSetLayoutBindingList) +{ + VkDescriptorSetLayoutCreateInfo vkDescriptorSetLayoutCreateInfo = {}; + vkDescriptorSetLayoutCreateInfo.sType = + VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO; + vkDescriptorSetLayoutCreateInfo.pNext = NULL; + vkDescriptorSetLayoutCreateInfo.flags = 0; + vkDescriptorSetLayoutCreateInfo.bindingCount = + (uint32_t)descriptorSetLayoutBindingList.size(); + vkDescriptorSetLayoutCreateInfo.pBindings = descriptorSetLayoutBindingList; + + vkCreateDescriptorSetLayout(m_device, &vkDescriptorSetLayoutCreateInfo, + NULL, &m_vkDescriptorSetLayout); +} + +VulkanDescriptorSetLayout::VulkanDescriptorSetLayout( + const VulkanDevice &device, + const VulkanDescriptorSetLayoutBinding &descriptorSetLayoutBinding) + : m_device(device), m_vkDescriptorSetLayout(VK_NULL_HANDLE) +{ + VulkanDescriptorSetLayoutBindingList descriptorSetLayoutBindingList; + descriptorSetLayoutBindingList.add(descriptorSetLayoutBinding); + + VulkanDescriptorSetLayoutCommon(descriptorSetLayoutBindingList); +} + +VulkanDescriptorSetLayout::VulkanDescriptorSetLayout( + const VulkanDevice &device, + const VulkanDescriptorSetLayoutBinding &descriptorSetLayoutBinding0, + const VulkanDescriptorSetLayoutBinding &descriptorSetLayoutBinding1) + : m_device(device), m_vkDescriptorSetLayout(VK_NULL_HANDLE) +{ + VulkanDescriptorSetLayoutBindingList descriptorSetLayoutBindingList; + descriptorSetLayoutBindingList.add(descriptorSetLayoutBinding0); + descriptorSetLayoutBindingList.add(descriptorSetLayoutBinding1); + + VulkanDescriptorSetLayoutCommon(descriptorSetLayoutBindingList); +} + +VulkanDescriptorSetLayout::VulkanDescriptorSetLayout( + const VulkanDevice &device, + const VulkanDescriptorSetLayoutBindingList &descriptorSetLayoutBindingList) + : m_device(device), m_vkDescriptorSetLayout(VK_NULL_HANDLE) +{ + VulkanDescriptorSetLayoutCommon(descriptorSetLayoutBindingList); +} + +VulkanDescriptorSetLayout::~VulkanDescriptorSetLayout() +{ + if (m_vkDescriptorSetLayout != VK_NULL_HANDLE) + { + vkDestroyDescriptorSetLayout(m_device, m_vkDescriptorSetLayout, NULL); + } +} + +VulkanDescriptorSetLayout::operator VkDescriptorSetLayout() const +{ + return m_vkDescriptorSetLayout; +} + +///////////////////////////////////////// +// VulkanPipelineLayout implementation // +///////////////////////////////////////// + +VulkanPipelineLayout::VulkanPipelineLayout( + const VulkanPipelineLayout &pipelineLayout) + : m_device(pipelineLayout.m_device), + m_vkPipelineLayout(pipelineLayout.m_vkPipelineLayout) +{} + +void VulkanPipelineLayout::VulkanPipelineLayoutCommon( + const VulkanDescriptorSetLayoutList &descriptorSetLayoutList) +{ + VkPipelineLayoutCreateInfo vkPipelineLayoutCreateInfo = {}; + vkPipelineLayoutCreateInfo.sType = + VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO; + vkPipelineLayoutCreateInfo.pNext = NULL; + vkPipelineLayoutCreateInfo.flags = 0; + vkPipelineLayoutCreateInfo.setLayoutCount = + (uint32_t)descriptorSetLayoutList.size(); + vkPipelineLayoutCreateInfo.pSetLayouts = descriptorSetLayoutList; + vkPipelineLayoutCreateInfo.pushConstantRangeCount = 0; + vkPipelineLayoutCreateInfo.pPushConstantRanges = NULL; + + vkCreatePipelineLayout(m_device, &vkPipelineLayoutCreateInfo, NULL, + &m_vkPipelineLayout); +} + +VulkanPipelineLayout::VulkanPipelineLayout( + const VulkanDevice &device, + const VulkanDescriptorSetLayout &descriptorSetLayout) + : m_device(device), m_vkPipelineLayout(VK_NULL_HANDLE) +{ + VulkanDescriptorSetLayoutList descriptorSetLayoutList; + descriptorSetLayoutList.add(descriptorSetLayout); + + VulkanPipelineLayoutCommon(descriptorSetLayoutList); +} + +VulkanPipelineLayout::VulkanPipelineLayout( + const VulkanDevice &device, + const VulkanDescriptorSetLayoutList &descriptorSetLayoutList) + : m_device(device), m_vkPipelineLayout(VK_NULL_HANDLE) +{ + VulkanPipelineLayoutCommon(descriptorSetLayoutList); +} + +VulkanPipelineLayout::~VulkanPipelineLayout() +{ + vkDestroyPipelineLayout(m_device, m_vkPipelineLayout, NULL); +} + +VulkanPipelineLayout::operator VkPipelineLayout() const +{ + return m_vkPipelineLayout; +} + +/////////////////////////////////////// +// VulkanShaderModule implementation // +/////////////////////////////////////// + +VulkanShaderModule::VulkanShaderModule(const VulkanShaderModule &shaderModule) + : m_device(shaderModule.m_device), + m_vkShaderModule(shaderModule.m_vkShaderModule) +{} + +VulkanShaderModule::VulkanShaderModule(const VulkanDevice &device, + const std::string &code) + : m_device(device) +{ + std::string paddedCode = code; + while (paddedCode.size() % 4) + { + paddedCode += " "; + } + + VkShaderModuleCreateInfo vkShaderModuleCreateInfo = {}; + vkShaderModuleCreateInfo.sType = + VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; + vkShaderModuleCreateInfo.pNext = NULL; + vkShaderModuleCreateInfo.flags = 0; + vkShaderModuleCreateInfo.codeSize = paddedCode.size(); + vkShaderModuleCreateInfo.pCode = + (const uint32_t *)(void *)paddedCode.c_str(); + + vkCreateShaderModule(m_device, &vkShaderModuleCreateInfo, NULL, + &m_vkShaderModule); +} + +VulkanShaderModule::~VulkanShaderModule() +{ + vkDestroyShaderModule(m_device, m_vkShaderModule, NULL); +} + +VulkanShaderModule::operator VkShaderModule() const { return m_vkShaderModule; } + +/////////////////////////////////// +// VulkanPipeline implementation // +/////////////////////////////////// + +VulkanPipeline::VulkanPipeline(const VulkanPipeline &pipeline) + : m_device(pipeline.m_device), m_vkPipeline(pipeline.m_vkPipeline) +{} + +VulkanPipeline::VulkanPipeline(const VulkanDevice &device) + : m_device(device), m_vkPipeline(VK_NULL_HANDLE) +{} + +VulkanPipeline::~VulkanPipeline() +{ + vkDestroyPipeline(m_device, m_vkPipeline, NULL); +} + +VulkanPipeline::operator VkPipeline() const { return m_vkPipeline; } + +////////////////////////////////////////// +// VulkanComputePipeline implementation // +////////////////////////////////////////// + +VulkanComputePipeline::VulkanComputePipeline( + const VulkanComputePipeline &computePipeline) + : VulkanPipeline(computePipeline) +{} + +VulkanComputePipeline::VulkanComputePipeline( + const VulkanDevice &device, const VulkanPipelineLayout &pipelineLayout, + const VulkanShaderModule &shaderModule, const std::string &entryFuncName) + : VulkanPipeline(device) +{ + VkPipelineShaderStageCreateInfo vkPipelineShaderStageCreateInfo = {}; + vkPipelineShaderStageCreateInfo.sType = + VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; + vkPipelineShaderStageCreateInfo.pNext = NULL; + vkPipelineShaderStageCreateInfo.flags = 0; + vkPipelineShaderStageCreateInfo.stage = VK_SHADER_STAGE_COMPUTE_BIT; + vkPipelineShaderStageCreateInfo.module = shaderModule; + vkPipelineShaderStageCreateInfo.pName = entryFuncName.c_str(); + vkPipelineShaderStageCreateInfo.pSpecializationInfo = NULL; + + VkComputePipelineCreateInfo vkComputePipelineCreateInfo = {}; + vkComputePipelineCreateInfo.sType = + VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO; + vkComputePipelineCreateInfo.pNext = NULL; + vkComputePipelineCreateInfo.flags = 0; + vkComputePipelineCreateInfo.stage = vkPipelineShaderStageCreateInfo; + vkComputePipelineCreateInfo.layout = pipelineLayout; + vkComputePipelineCreateInfo.basePipelineHandle = VK_NULL_HANDLE; + vkComputePipelineCreateInfo.basePipelineIndex = 0; + + vkCreateComputePipelines(device, VK_NULL_HANDLE, 1, + &vkComputePipelineCreateInfo, NULL, &m_vkPipeline); +} + +VulkanComputePipeline::~VulkanComputePipeline() {} + +VulkanPipelineBindPoint VulkanComputePipeline::getPipelineBindPoint() const +{ + return VULKAN_PIPELINE_BIND_POINT_COMPUTE; +} + +///////////////////////////////////////// +// VulkanDescriptorPool implementation // +///////////////////////////////////////// + +VulkanDescriptorPool::VulkanDescriptorPool( + const VulkanDescriptorPool &descriptorPool) + : m_device(descriptorPool.m_device), + m_vkDescriptorPool(descriptorPool.m_vkDescriptorPool) +{} + +void VulkanDescriptorPool::VulkanDescriptorPoolCommon( + const VulkanDescriptorSetLayoutBindingList &descriptorSetLayoutBindingList) +{ + if (descriptorSetLayoutBindingList.size()) + { + std::map + vkDescriptorTypeToDescriptorCountMap; + + for (size_t dslbIdx = 0; + dslbIdx < descriptorSetLayoutBindingList.size(); dslbIdx++) + { + VkDescriptorSetLayoutBinding vkDescriptorSetLayoutBinding = + descriptorSetLayoutBindingList[dslbIdx]; + if (vkDescriptorTypeToDescriptorCountMap.find( + vkDescriptorSetLayoutBinding.descriptorType) + == vkDescriptorTypeToDescriptorCountMap.end()) + { + vkDescriptorTypeToDescriptorCountMap + [vkDescriptorSetLayoutBinding.descriptorType] = 1; + } + else + { + vkDescriptorTypeToDescriptorCountMap + [vkDescriptorSetLayoutBinding.descriptorType]++; + } + } + + std::vector vkDescriptorPoolSizeList; + std::map::iterator dtdcIt; + for (dtdcIt = vkDescriptorTypeToDescriptorCountMap.begin(); + dtdcIt != vkDescriptorTypeToDescriptorCountMap.end(); ++dtdcIt) + { + VkDescriptorPoolSize vkDescriptorPoolSize = {}; + vkDescriptorPoolSize.type = dtdcIt->first; + vkDescriptorPoolSize.descriptorCount = dtdcIt->second; + + vkDescriptorPoolSizeList.push_back(vkDescriptorPoolSize); + } + + VkDescriptorPoolCreateInfo vkDescriptorPoolCreateInfo = {}; + vkDescriptorPoolCreateInfo.sType = + VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO; + vkDescriptorPoolCreateInfo.pNext = NULL; + vkDescriptorPoolCreateInfo.flags = + VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT; + vkDescriptorPoolCreateInfo.maxSets = 1; + vkDescriptorPoolCreateInfo.poolSizeCount = + (uint32_t)vkDescriptorPoolSizeList.size(); + vkDescriptorPoolCreateInfo.pPoolSizes = vkDescriptorPoolSizeList.data(); + + vkCreateDescriptorPool(m_device, &vkDescriptorPoolCreateInfo, NULL, + &m_vkDescriptorPool); + } +} + +VulkanDescriptorPool::VulkanDescriptorPool( + const VulkanDevice &device, + const VulkanDescriptorSetLayoutBinding &descriptorSetLayoutBinding) + : m_device(device), m_vkDescriptorPool(VK_NULL_HANDLE) +{ + VulkanDescriptorSetLayoutBindingList descriptorSetLayoutBindingList; + descriptorSetLayoutBindingList.add(descriptorSetLayoutBinding); + + VulkanDescriptorPoolCommon(descriptorSetLayoutBindingList); +} + +VulkanDescriptorPool::VulkanDescriptorPool( + const VulkanDevice &device, + const VulkanDescriptorSetLayoutBinding &descriptorSetLayoutBinding0, + const VulkanDescriptorSetLayoutBinding &descriptorSetLayoutBinding1) + : m_device(device), m_vkDescriptorPool(VK_NULL_HANDLE) +{ + VulkanDescriptorSetLayoutBindingList descriptorSetLayoutBindingList; + descriptorSetLayoutBindingList.add(descriptorSetLayoutBinding0); + descriptorSetLayoutBindingList.add(descriptorSetLayoutBinding1); + + VulkanDescriptorPoolCommon(descriptorSetLayoutBindingList); +} + +VulkanDescriptorPool::VulkanDescriptorPool( + const VulkanDevice &device, + const VulkanDescriptorSetLayoutBindingList &descriptorSetLayoutBindingList) + : m_device(device), m_vkDescriptorPool(VK_NULL_HANDLE) +{ + VulkanDescriptorPoolCommon(descriptorSetLayoutBindingList); +} + +VulkanDescriptorPool::~VulkanDescriptorPool() +{ + if (m_vkDescriptorPool != VK_NULL_HANDLE) + { + vkDestroyDescriptorPool(m_device, m_vkDescriptorPool, NULL); + } +} + +VulkanDescriptorPool::operator VkDescriptorPool() const +{ + return m_vkDescriptorPool; +} + +//////////////////////////////////////// +// VulkanDescriptorSet implementation // +//////////////////////////////////////// + +VulkanDescriptorSet::VulkanDescriptorSet( + const VulkanDescriptorSet &descriptorSet) + : m_device(descriptorSet.m_device), + m_descriptorPool(descriptorSet.m_descriptorPool), + m_vkDescriptorSet(descriptorSet.m_vkDescriptorSet) +{} + +VulkanDescriptorSet::VulkanDescriptorSet( + const VulkanDevice &device, const VulkanDescriptorPool &descriptorPool, + const VulkanDescriptorSetLayout &descriptorSetLayout) + : m_device(device), m_descriptorPool(descriptorPool), + m_vkDescriptorSet(VK_NULL_HANDLE) +{ + VkDescriptorSetLayout vkDescriptorSetLayout = descriptorSetLayout; + + if ((VkDescriptorPool)m_descriptorPool) + { + VkDescriptorSetAllocateInfo vkDescriptorSetAllocateInfo = {}; + vkDescriptorSetAllocateInfo.sType = + VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO; + vkDescriptorSetAllocateInfo.pNext = NULL; + vkDescriptorSetAllocateInfo.descriptorPool = descriptorPool; + vkDescriptorSetAllocateInfo.descriptorSetCount = 1; + vkDescriptorSetAllocateInfo.pSetLayouts = &vkDescriptorSetLayout; + + vkAllocateDescriptorSets(m_device, &vkDescriptorSetAllocateInfo, + &m_vkDescriptorSet); + } +} + +VulkanDescriptorSet::~VulkanDescriptorSet() +{ + if ((VkDescriptorPool)m_descriptorPool) + { + vkFreeDescriptorSets(m_device, m_descriptorPool, 1, &m_vkDescriptorSet); + } +} + +void VulkanDescriptorSet::update(uint32_t binding, const VulkanBuffer &buffer) +{ + VkDescriptorBufferInfo vkDescriptorBufferInfo = {}; + vkDescriptorBufferInfo.buffer = buffer; + vkDescriptorBufferInfo.offset = 0; + vkDescriptorBufferInfo.range = VK_WHOLE_SIZE; + + VkWriteDescriptorSet vkWriteDescriptorSet = {}; + vkWriteDescriptorSet.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + vkWriteDescriptorSet.pNext = NULL; + vkWriteDescriptorSet.dstSet = m_vkDescriptorSet; + vkWriteDescriptorSet.dstBinding = binding; + vkWriteDescriptorSet.dstArrayElement = 0; + vkWriteDescriptorSet.descriptorCount = 1; + vkWriteDescriptorSet.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; + vkWriteDescriptorSet.pImageInfo = NULL; + vkWriteDescriptorSet.pBufferInfo = &vkDescriptorBufferInfo; + vkWriteDescriptorSet.pTexelBufferView = NULL; + + vkUpdateDescriptorSets(m_device, 1, &vkWriteDescriptorSet, 0, NULL); +} + +void VulkanDescriptorSet::update(uint32_t binding, + const VulkanImageView &imageView) +{ + VkDescriptorImageInfo vkDescriptorImageInfo = {}; + vkDescriptorImageInfo.sampler = VK_NULL_HANDLE; + vkDescriptorImageInfo.imageView = imageView; + vkDescriptorImageInfo.imageLayout = VK_IMAGE_LAYOUT_GENERAL; + + VkWriteDescriptorSet vkWriteDescriptorSet = {}; + vkWriteDescriptorSet.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + vkWriteDescriptorSet.pNext = NULL; + vkWriteDescriptorSet.dstSet = m_vkDescriptorSet; + vkWriteDescriptorSet.dstBinding = binding; + vkWriteDescriptorSet.dstArrayElement = 0; + vkWriteDescriptorSet.descriptorCount = 1; + vkWriteDescriptorSet.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; + vkWriteDescriptorSet.pImageInfo = &vkDescriptorImageInfo; + vkWriteDescriptorSet.pBufferInfo = NULL; + vkWriteDescriptorSet.pTexelBufferView = NULL; + + vkUpdateDescriptorSets(m_device, 1, &vkWriteDescriptorSet, 0, NULL); +} + +VulkanDescriptorSet::operator VkDescriptorSet() const +{ + return m_vkDescriptorSet; +} + +/////////////////////////////////// +// VulkanOffset3D implementation // +/////////////////////////////////// + +VulkanOffset3D::VulkanOffset3D(const VulkanOffset3D &offset3D) + : m_vkOffset3D(offset3D.m_vkOffset3D) +{} + +VulkanOffset3D::VulkanOffset3D(uint32_t x, uint32_t y, uint32_t z) +{ + m_vkOffset3D.x = x; + m_vkOffset3D.y = y; + m_vkOffset3D.z = z; +} + +VulkanOffset3D::~VulkanOffset3D() {} + +uint32_t VulkanOffset3D::getX() const { return m_vkOffset3D.x; } + +uint32_t VulkanOffset3D::getY() const { return m_vkOffset3D.y; } + +uint32_t VulkanOffset3D::getZ() const { return m_vkOffset3D.z; } + +VulkanOffset3D::operator VkOffset3D() const { return m_vkOffset3D; } + +/////////////////////////////////// +// VulkanExtent3D implementation // +/////////////////////////////////// + +VulkanExtent3D::VulkanExtent3D(const VulkanExtent3D &extent3D) + : m_vkExtent3D(extent3D.m_vkExtent3D) +{} + +VulkanExtent3D::VulkanExtent3D(uint32_t width, uint32_t height, uint32_t depth) +{ + m_vkExtent3D.width = width; + m_vkExtent3D.height = height; + m_vkExtent3D.depth = depth; +} + +VulkanExtent3D::~VulkanExtent3D() {} + +uint32_t VulkanExtent3D::getWidth() const { return m_vkExtent3D.width; } + +uint32_t VulkanExtent3D::getHeight() const { return m_vkExtent3D.height; } + +uint32_t VulkanExtent3D::getDepth() const { return m_vkExtent3D.depth; } + +VulkanExtent3D::operator VkExtent3D() const { return m_vkExtent3D; } + +////////////////////////////////////// +// VulkanCommandPool implementation // +////////////////////////////////////// + +VulkanCommandPool::VulkanCommandPool(const VulkanCommandPool &commandPool) + : m_device(commandPool.m_device), + m_vkCommandPool(commandPool.m_vkCommandPool) +{} + +VulkanCommandPool::VulkanCommandPool(const VulkanDevice &device, + const VulkanQueueFamily &queueFamily) + : m_device(device) +{ + VkCommandPoolCreateInfo vkCommandPoolCreateInfo = {}; + vkCommandPoolCreateInfo.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO; + vkCommandPoolCreateInfo.pNext = NULL; + vkCommandPoolCreateInfo.flags = + VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT; + vkCommandPoolCreateInfo.queueFamilyIndex = queueFamily; + + vkCreateCommandPool(m_device, &vkCommandPoolCreateInfo, NULL, + &m_vkCommandPool); +} + +VulkanCommandPool::~VulkanCommandPool() +{ + vkDestroyCommandPool(m_device, m_vkCommandPool, NULL); +} + +VulkanCommandPool::operator VkCommandPool() const { return m_vkCommandPool; } + +//////////////////////////////////////// +// VulkanCommandBuffer implementation // +//////////////////////////////////////// + +VulkanCommandBuffer::VulkanCommandBuffer( + const VulkanCommandBuffer &commandBuffer) + : m_device(commandBuffer.m_device), + m_commandPool(commandBuffer.m_commandPool), + m_vkCommandBuffer(commandBuffer.m_vkCommandBuffer) +{} + +VulkanCommandBuffer::VulkanCommandBuffer(const VulkanDevice &device, + const VulkanCommandPool &commandPool) + : m_device(device), m_commandPool(commandPool) +{ + VkCommandBufferAllocateInfo vkCommandBufferAllocateInfo = {}; + vkCommandBufferAllocateInfo.sType = + VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO; + vkCommandBufferAllocateInfo.pNext = NULL; + vkCommandBufferAllocateInfo.commandPool = commandPool; + vkCommandBufferAllocateInfo.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY; + vkCommandBufferAllocateInfo.commandBufferCount = 1; + + vkAllocateCommandBuffers(m_device, &vkCommandBufferAllocateInfo, + &m_vkCommandBuffer); +} + +VulkanCommandBuffer::~VulkanCommandBuffer() +{ + vkFreeCommandBuffers(m_device, m_commandPool, 1, &m_vkCommandBuffer); +} + +void VulkanCommandBuffer::begin() +{ + VkCommandBufferBeginInfo vkCommandBufferBeginInfo = {}; + vkCommandBufferBeginInfo.sType = + VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; + vkCommandBufferBeginInfo.pNext = NULL; + vkCommandBufferBeginInfo.flags = + VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT; + vkCommandBufferBeginInfo.pInheritanceInfo = NULL; + + vkBeginCommandBuffer(m_vkCommandBuffer, &vkCommandBufferBeginInfo); +} + +void VulkanCommandBuffer::bindPipeline(const VulkanPipeline &pipeline) +{ + VkPipelineBindPoint vkPipelineBindPoint = + (VkPipelineBindPoint)pipeline.getPipelineBindPoint(); + + vkCmdBindPipeline(m_vkCommandBuffer, vkPipelineBindPoint, pipeline); +} + +void VulkanCommandBuffer::bindDescriptorSets( + const VulkanPipeline &pipeline, const VulkanPipelineLayout &pipelineLayout, + const VulkanDescriptorSet &descriptorSet) +{ + VkPipelineBindPoint vkPipelineBindPoint = + (VkPipelineBindPoint)pipeline.getPipelineBindPoint(); + VkDescriptorSet vkDescriptorSet = descriptorSet; + + vkCmdBindDescriptorSets(m_vkCommandBuffer, vkPipelineBindPoint, + pipelineLayout, 0, 1, &vkDescriptorSet, 0, NULL); +} + +void VulkanCommandBuffer::pipelineBarrier(const VulkanImage2DList &image2DList, + VulkanImageLayout oldImageLayout, + VulkanImageLayout newImageLayout) +{ + std::vector vkImageMemoryBarrierList; + for (size_t i2DIdx = 0; i2DIdx < image2DList.size(); i2DIdx++) + { + VkImageSubresourceRange vkImageSubresourceRange = {}; + vkImageSubresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + vkImageSubresourceRange.baseMipLevel = 0; + vkImageSubresourceRange.levelCount = VK_REMAINING_MIP_LEVELS; + vkImageSubresourceRange.baseArrayLayer = 0; + vkImageSubresourceRange.layerCount = VK_REMAINING_ARRAY_LAYERS; + + VkImageMemoryBarrier vkImageMemoryBarrier = {}; + vkImageMemoryBarrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; + vkImageMemoryBarrier.pNext = NULL; + vkImageMemoryBarrier.srcAccessMask = 0; + vkImageMemoryBarrier.dstAccessMask = 0; + vkImageMemoryBarrier.oldLayout = (VkImageLayout)oldImageLayout; + vkImageMemoryBarrier.newLayout = (VkImageLayout)newImageLayout; + vkImageMemoryBarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + vkImageMemoryBarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + vkImageMemoryBarrier.image = image2DList[i2DIdx]; + vkImageMemoryBarrier.subresourceRange = vkImageSubresourceRange; + + vkImageMemoryBarrierList.push_back(vkImageMemoryBarrier); + } + + vkCmdPipelineBarrier(m_vkCommandBuffer, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, 0, NULL, 0, + NULL, (uint32_t)vkImageMemoryBarrierList.size(), + vkImageMemoryBarrierList.data()); +} + +void VulkanCommandBuffer::dispatch(uint32_t groupCountX, uint32_t groupCountY, + uint32_t groupCountZ) +{ + vkCmdDispatch(m_vkCommandBuffer, groupCountX, groupCountY, groupCountZ); +} + +void VulkanCommandBuffer::fillBuffer(const VulkanBuffer &buffer, uint32_t data, + uint64_t offset, uint64_t size) +{ + vkCmdFillBuffer(m_vkCommandBuffer, buffer, offset, size, data); +} + +void VulkanCommandBuffer::updateBuffer(const VulkanBuffer &buffer, void *pdata, + uint64_t offset, uint64_t size) +{ + vkCmdUpdateBuffer(m_vkCommandBuffer, buffer, offset, size, pdata); +} + +void VulkanCommandBuffer::copyBufferToImage(const VulkanBuffer &buffer, + const VulkanImage &image, + VulkanImageLayout imageLayout) +{ + VkDeviceSize bufferOffset = 0; + + std::vector vkBufferImageCopyList; + for (uint32_t mipLevel = 0; mipLevel < image.getNumMipLevels(); mipLevel++) + { + VulkanExtent3D extent3D = image.getExtent3D(mipLevel); + size_t elementSize = getVulkanFormatElementSize(image.getFormat()); + + VkImageSubresourceLayers vkImageSubresourceLayers = {}; + vkImageSubresourceLayers.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + vkImageSubresourceLayers.mipLevel = mipLevel; + vkImageSubresourceLayers.baseArrayLayer = 0; + vkImageSubresourceLayers.layerCount = image.getNumLayers(); + + VkBufferImageCopy vkBufferImageCopy = {}; + vkBufferImageCopy.bufferOffset = bufferOffset; + vkBufferImageCopy.bufferRowLength = 0; + vkBufferImageCopy.bufferImageHeight = 0; + vkBufferImageCopy.imageSubresource = vkImageSubresourceLayers; + vkBufferImageCopy.imageOffset = VulkanOffset3D(0, 0, 0); + vkBufferImageCopy.imageExtent = extent3D; + + vkBufferImageCopyList.push_back(vkBufferImageCopy); + + bufferOffset += extent3D.getWidth() * extent3D.getHeight() + * extent3D.getDepth() * elementSize; + bufferOffset = + ROUND_UP(bufferOffset, + std::max(elementSize, + (size_t)VULKAN_MIN_BUFFER_OFFSET_COPY_ALIGNMENT)); + } + + vkCmdCopyBufferToImage( + m_vkCommandBuffer, buffer, image, (VkImageLayout)imageLayout, + (uint32_t)vkBufferImageCopyList.size(), vkBufferImageCopyList.data()); +} + +void VulkanCommandBuffer::copyBufferToImage( + const VulkanBuffer &buffer, const VulkanImage &image, uint64_t bufferOffset, + uint32_t mipLevel, uint32_t baseArrayLayer, uint32_t layerCount, + VulkanOffset3D offset3D, VulkanExtent3D extent3D) +{ + VkImageSubresourceLayers vkImageSubresourceLayers = {}; + vkImageSubresourceLayers.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + vkImageSubresourceLayers.mipLevel = mipLevel; + vkImageSubresourceLayers.baseArrayLayer = baseArrayLayer; + vkImageSubresourceLayers.layerCount = layerCount; + + VkExtent3D vkExtent3D = extent3D; + if ((extent3D.getWidth() == 0) && (extent3D.getHeight() == 0) + && (extent3D.getDepth() == 0)) + { + vkExtent3D = image.getExtent3D(mipLevel); + } + + VkBufferImageCopy vkBufferImageCopy = {}; + vkBufferImageCopy.bufferOffset = bufferOffset; + vkBufferImageCopy.bufferRowLength = 0; + vkBufferImageCopy.bufferImageHeight = 0; + vkBufferImageCopy.imageSubresource = vkImageSubresourceLayers; + vkBufferImageCopy.imageOffset = offset3D; + vkBufferImageCopy.imageExtent = vkExtent3D; + + vkCmdCopyBufferToImage(m_vkCommandBuffer, buffer, image, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, + &vkBufferImageCopy); +} + +void VulkanCommandBuffer::copyImageToBuffer( + const VulkanImage &image, const VulkanBuffer &buffer, uint64_t bufferOffset, + uint32_t mipLevel, uint32_t baseArrayLayer, uint32_t layerCount, + VulkanOffset3D offset3D, VulkanExtent3D extent3D) +{ + VkImageSubresourceLayers vkImageSubresourceLayers = {}; + vkImageSubresourceLayers.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + vkImageSubresourceLayers.mipLevel = mipLevel; + vkImageSubresourceLayers.baseArrayLayer = baseArrayLayer; + vkImageSubresourceLayers.layerCount = layerCount; + + VkExtent3D vkExtent3D = extent3D; + if ((extent3D.getWidth() == 0) && (extent3D.getHeight() == 0) + && (extent3D.getDepth() == 0)) + { + vkExtent3D = image.getExtent3D(mipLevel); + } + + VkBufferImageCopy vkBufferImageCopy = {}; + vkBufferImageCopy.bufferOffset = bufferOffset; + vkBufferImageCopy.bufferRowLength = 0; + vkBufferImageCopy.bufferImageHeight = 0; + vkBufferImageCopy.imageSubresource = vkImageSubresourceLayers; + vkBufferImageCopy.imageOffset = offset3D; + vkBufferImageCopy.imageExtent = vkExtent3D; + + vkCmdCopyImageToBuffer(m_vkCommandBuffer, image, VK_IMAGE_LAYOUT_GENERAL, + buffer, 1, &vkBufferImageCopy); +} + +void VulkanCommandBuffer::end() { vkEndCommandBuffer(m_vkCommandBuffer); } + +VulkanCommandBuffer::operator VkCommandBuffer() const +{ + return m_vkCommandBuffer; +} + +///////////////////////////////// +// VulkanBuffer implementation // +///////////////////////////////// + +VulkanBuffer::VulkanBuffer(const VulkanBuffer &buffer) + : m_device(buffer.m_device), m_vkBuffer(buffer.m_vkBuffer), + m_size(buffer.m_size), m_alignment(buffer.m_alignment), + m_memoryTypeList(buffer.m_memoryTypeList) +{} + +VulkanBuffer::VulkanBuffer( + const VulkanDevice &device, uint64_t size, + VulkanExternalMemoryHandleType externalMemoryHandleType, + VulkanBufferUsage bufferUsage, VulkanSharingMode sharingMode, + const VulkanQueueFamilyList &queueFamilyList) + : m_device(device), m_vkBuffer(VK_NULL_HANDLE) +{ + std::vector queueFamilyIndexList; + if (queueFamilyList.size() == 0) + { + for (size_t qfIdx = 0; + qfIdx < device.getPhysicalDevice().getQueueFamilyList().size(); + qfIdx++) + { + queueFamilyIndexList.push_back( + device.getPhysicalDevice().getQueueFamilyList()[qfIdx]); + } + } + else + { + for (size_t qfIdx = 0; qfIdx < queueFamilyList.size(); qfIdx++) + { + queueFamilyIndexList.push_back(queueFamilyList[qfIdx]); + } + } + + VkBufferCreateInfo vkBufferCreateInfo = {}; + vkBufferCreateInfo.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; + vkBufferCreateInfo.pNext = NULL; + vkBufferCreateInfo.flags = 0; + vkBufferCreateInfo.size = (VkDeviceSize)size; + vkBufferCreateInfo.usage = (VkBufferUsageFlags)bufferUsage; + vkBufferCreateInfo.sharingMode = (VkSharingMode)sharingMode; + vkBufferCreateInfo.queueFamilyIndexCount = + (uint32_t)queueFamilyIndexList.size(); + vkBufferCreateInfo.pQueueFamilyIndices = queueFamilyIndexList.data(); + + VkExternalMemoryBufferCreateInfo vkExternalMemoryBufferCreateInfo = {}; + if (externalMemoryHandleType != VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_NONE) + { + vkExternalMemoryBufferCreateInfo.sType = + VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_BUFFER_CREATE_INFO_KHR; + vkExternalMemoryBufferCreateInfo.pNext = NULL; + vkExternalMemoryBufferCreateInfo.handleTypes = + (VkExternalMemoryHandleTypeFlags)externalMemoryHandleType; + + vkBufferCreateInfo.pNext = &vkExternalMemoryBufferCreateInfo; + } + + vkCreateBuffer(m_device, &vkBufferCreateInfo, NULL, &m_vkBuffer); + + VkMemoryRequirements vkMemoryRequirements = {}; + vkGetBufferMemoryRequirements(m_device, m_vkBuffer, &vkMemoryRequirements); + m_size = vkMemoryRequirements.size; + m_alignment = vkMemoryRequirements.alignment; + const VulkanMemoryTypeList &memoryTypeList = + m_device.getPhysicalDevice().getMemoryTypeList(); + for (size_t mtIdx = 0; mtIdx < memoryTypeList.size(); mtIdx++) + { + uint32_t memoryTypeIndex = memoryTypeList[mtIdx]; + if ((1 << memoryTypeIndex) & vkMemoryRequirements.memoryTypeBits) + { + m_memoryTypeList.add(memoryTypeList[mtIdx]); + } + } +} + +VulkanBuffer::~VulkanBuffer() { vkDestroyBuffer(m_device, m_vkBuffer, NULL); } + +uint64_t VulkanBuffer::getSize() const { return m_size; } + +uint64_t VulkanBuffer::getAlignment() const { return m_alignment; } + +const VulkanMemoryTypeList &VulkanBuffer::getMemoryTypeList() const +{ + return m_memoryTypeList; +} + +VulkanBuffer::operator VkBuffer() const { return m_vkBuffer; } + +//////////////////////////////// +// VulkanImage implementation // +//////////////////////////////// + +VulkanImage::VulkanImage(const VulkanImage &image) + : m_device(image.m_device), m_imageType(image.m_imageType), + m_extent3D(image.m_extent3D), m_format(image.m_format), + m_numMipLevels(image.m_numMipLevels), m_numLayers(image.m_numLayers), + m_vkImage(image.m_vkImage), m_size(image.m_size), + m_alignment(image.m_alignment), m_memoryTypeList(image.m_memoryTypeList) +{} + +VulkanImage::VulkanImage( + const VulkanDevice &device, VulkanImageType imageType, VulkanFormat format, + const VulkanExtent3D &extent3D, uint32_t numMipLevels, uint32_t arrayLayers, + VulkanExternalMemoryHandleType externalMemoryHandleType, + VulkanImageCreateFlag imageCreateFlag, VulkanImageTiling imageTiling, + VulkanImageUsage imageUsage, VulkanSharingMode sharingMode) + : m_device(device), m_imageType(imageType), m_extent3D(extent3D), + m_format(format), m_numMipLevels(numMipLevels), m_numLayers(arrayLayers), + m_vkImage(VK_NULL_HANDLE) +{ + VkImageCreateInfo vkImageCreateInfo = {}; + vkImageCreateInfo.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO; + vkImageCreateInfo.pNext = NULL; + vkImageCreateInfo.flags = (VkImageCreateFlags)imageCreateFlag; + vkImageCreateInfo.imageType = (VkImageType)imageType; + vkImageCreateInfo.format = (VkFormat)format; + vkImageCreateInfo.extent = extent3D; + vkImageCreateInfo.mipLevels = numMipLevels; + vkImageCreateInfo.arrayLayers = arrayLayers; + vkImageCreateInfo.samples = VK_SAMPLE_COUNT_1_BIT; + vkImageCreateInfo.tiling = (VkImageTiling)imageTiling; + vkImageCreateInfo.usage = (VkImageUsageFlags)imageUsage; + vkImageCreateInfo.sharingMode = (VkSharingMode)sharingMode; + vkImageCreateInfo.queueFamilyIndexCount = + (uint32_t)m_device.getPhysicalDevice().getQueueFamilyList().size(); + vkImageCreateInfo.pQueueFamilyIndices = + m_device.getPhysicalDevice().getQueueFamilyList(); + vkImageCreateInfo.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; + + VkExternalMemoryImageCreateInfo vkExternalMemoryImageCreateInfo = {}; + if (externalMemoryHandleType != VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_NONE) + { + vkExternalMemoryImageCreateInfo.sType = + VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO; + vkExternalMemoryImageCreateInfo.pNext = NULL; + vkExternalMemoryImageCreateInfo.handleTypes = + (VkExternalMemoryHandleTypeFlags)externalMemoryHandleType; + + vkImageCreateInfo.pNext = &vkExternalMemoryImageCreateInfo; + } + + vkCreateImage(m_device, &vkImageCreateInfo, NULL, &m_vkImage); + VulkanImageCreateInfo = vkImageCreateInfo; + VkMemoryRequirements vkMemoryRequirements = {}; + vkGetImageMemoryRequirements(m_device, m_vkImage, &vkMemoryRequirements); + m_size = vkMemoryRequirements.size; + m_alignment = vkMemoryRequirements.alignment; + const VulkanMemoryTypeList &memoryTypeList = + m_device.getPhysicalDevice().getMemoryTypeList(); + for (size_t mtIdx = 0; mtIdx < memoryTypeList.size(); mtIdx++) + { + uint32_t memoryTypeIndex = memoryTypeList[mtIdx]; + if ((1 << memoryTypeIndex) & vkMemoryRequirements.memoryTypeBits) + { + m_memoryTypeList.add(memoryTypeList[mtIdx]); + } + } +} + +VulkanImage::~VulkanImage() { vkDestroyImage(m_device, m_vkImage, NULL); } + +VulkanExtent3D VulkanImage::getExtent3D(uint32_t mipLevel) const +{ + return VulkanExtent3D(0, 0, 0); +} + +VulkanFormat VulkanImage::getFormat() const { return m_format; } + +VkImageCreateInfo VulkanImage::getVkImageCreateInfo() const +{ + return VulkanImageCreateInfo; +} + +uint32_t VulkanImage::getNumMipLevels() const { return m_numMipLevels; } + +uint32_t VulkanImage::getNumLayers() const { return m_numLayers; } + +uint64_t VulkanImage::getSize() const { return m_size; } + +uint64_t VulkanImage::getAlignment() const { return m_alignment; } + +const VulkanMemoryTypeList &VulkanImage::getMemoryTypeList() const +{ + return m_memoryTypeList; +} + +VulkanImage::operator VkImage() const { return m_vkImage; } + +////////////////////////////////// +// VulkanImage2D implementation // +////////////////////////////////// + +VulkanImage2D::VulkanImage2D(const VulkanImage2D &image2D): VulkanImage(image2D) +{} + +VulkanImage2D::VulkanImage2D( + const VulkanDevice &device, VulkanFormat format, uint32_t width, + uint32_t height, uint32_t numMipLevels, + VulkanExternalMemoryHandleType externalMemoryHandleType, + VulkanImageCreateFlag imageCreateFlag, VulkanImageUsage imageUsage, + VulkanSharingMode sharingMode) + : VulkanImage(device, VULKAN_IMAGE_TYPE_2D, format, + VulkanExtent3D(width, height, 1), numMipLevels, 1, + externalMemoryHandleType, imageCreateFlag, + VULKAN_IMAGE_TILING_OPTIMAL, imageUsage, sharingMode) +{} + +VulkanImage2D::~VulkanImage2D() {} + +VulkanExtent3D VulkanImage2D::getExtent3D(uint32_t mipLevel) const +{ + uint32_t width = std::max(m_extent3D.getWidth() >> mipLevel, uint32_t(1)); + uint32_t height = std::max(m_extent3D.getHeight() >> mipLevel, uint32_t(1)); + uint32_t depth = 1; + + return VulkanExtent3D(width, height, depth); +} + +//////////////////////////////////// +// VulkanImageView implementation // +//////////////////////////////////// + +VulkanImageView::VulkanImageView(const VulkanImageView &imageView) + : m_device(imageView.m_device), m_vkImageView(imageView.m_vkImageView) +{} + +VulkanImageView::VulkanImageView(const VulkanDevice &device, + const VulkanImage &image, + VulkanImageViewType imageViewType, + uint32_t baseMipLevel, uint32_t levelCount, + uint32_t baseArrayLayer, uint32_t layerCount) + : m_device(device), m_vkImageView(VK_NULL_HANDLE) +{ + VkComponentMapping vkComponentMapping = {}; + vkComponentMapping.r = VK_COMPONENT_SWIZZLE_IDENTITY; + vkComponentMapping.g = VK_COMPONENT_SWIZZLE_IDENTITY; + vkComponentMapping.b = VK_COMPONENT_SWIZZLE_IDENTITY; + vkComponentMapping.a = VK_COMPONENT_SWIZZLE_IDENTITY; + + VkImageSubresourceRange vkImageSubresourceRange = {}; + vkImageSubresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + vkImageSubresourceRange.baseMipLevel = baseMipLevel; + vkImageSubresourceRange.levelCount = levelCount; + vkImageSubresourceRange.baseArrayLayer = baseArrayLayer; + vkImageSubresourceRange.layerCount = layerCount; + + VkImageViewCreateInfo vkImageViewCreateInfo = {}; + vkImageViewCreateInfo.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO; + vkImageViewCreateInfo.pNext = NULL; + vkImageViewCreateInfo.flags = 0; + vkImageViewCreateInfo.image = image; + vkImageViewCreateInfo.viewType = (VkImageViewType)imageViewType; + vkImageViewCreateInfo.format = (VkFormat)image.getFormat(); + vkImageViewCreateInfo.components = vkComponentMapping; + vkImageViewCreateInfo.subresourceRange = vkImageSubresourceRange; + + vkCreateImageView(m_device, &vkImageViewCreateInfo, NULL, &m_vkImageView); +} + +VulkanImageView::~VulkanImageView() +{ + vkDestroyImageView(m_device, m_vkImageView, NULL); +} + +VulkanImageView::operator VkImageView() const { return m_vkImageView; } + +/////////////////////////////////////// +// VulkanDeviceMemory implementation // +/////////////////////////////////////// + +#if defined(_WIN32) || defined(_WIN64) + +class WindowsSecurityAttributes { +protected: + SECURITY_ATTRIBUTES m_winSecurityAttributes; + PSECURITY_DESCRIPTOR m_winPSecurityDescriptor; + +public: + WindowsSecurityAttributes(); + SECURITY_ATTRIBUTES *operator&(); + ~WindowsSecurityAttributes(); +}; + + +WindowsSecurityAttributes::WindowsSecurityAttributes() +{ + m_winPSecurityDescriptor = (PSECURITY_DESCRIPTOR)calloc( + 1, SECURITY_DESCRIPTOR_MIN_LENGTH + 2 * sizeof(void **)); + // CHECK_NEQ(m_winPSecurityDescriptor, (PSECURITY_DESCRIPTOR)NULL); + PSID *ppSID = (PSID *)((PBYTE)m_winPSecurityDescriptor + + SECURITY_DESCRIPTOR_MIN_LENGTH); + PACL *ppACL = (PACL *)((PBYTE)ppSID + sizeof(PSID *)); + InitializeSecurityDescriptor(m_winPSecurityDescriptor, + SECURITY_DESCRIPTOR_REVISION); + SID_IDENTIFIER_AUTHORITY sidIdentifierAuthority = + SECURITY_WORLD_SID_AUTHORITY; + AllocateAndInitializeSid(&sidIdentifierAuthority, 1, SECURITY_WORLD_RID, 0, + 0, 0, 0, 0, 0, 0, ppSID); + EXPLICIT_ACCESS explicitAccess; + ZeroMemory(&explicitAccess, sizeof(EXPLICIT_ACCESS)); + explicitAccess.grfAccessPermissions = + STANDARD_RIGHTS_ALL | SPECIFIC_RIGHTS_ALL; + explicitAccess.grfAccessMode = SET_ACCESS; + explicitAccess.grfInheritance = INHERIT_ONLY; + explicitAccess.Trustee.TrusteeForm = TRUSTEE_IS_SID; + explicitAccess.Trustee.TrusteeType = TRUSTEE_IS_WELL_KNOWN_GROUP; + explicitAccess.Trustee.ptstrName = (LPTSTR)*ppSID; + SetEntriesInAcl(1, &explicitAccess, NULL, ppACL); + SetSecurityDescriptorDacl(m_winPSecurityDescriptor, TRUE, *ppACL, FALSE); + m_winSecurityAttributes.nLength = sizeof(m_winSecurityAttributes); + m_winSecurityAttributes.lpSecurityDescriptor = m_winPSecurityDescriptor; + m_winSecurityAttributes.bInheritHandle = TRUE; +} + +SECURITY_ATTRIBUTES *WindowsSecurityAttributes::operator&() +{ + return &m_winSecurityAttributes; +} + +WindowsSecurityAttributes::~WindowsSecurityAttributes() +{ + PSID *ppSID = (PSID *)((PBYTE)m_winPSecurityDescriptor + + SECURITY_DESCRIPTOR_MIN_LENGTH); + PACL *ppACL = (PACL *)((PBYTE)ppSID + sizeof(PSID *)); + if (*ppSID) + { + FreeSid(*ppSID); + } + if (*ppACL) + { + LocalFree(*ppACL); + } + free(m_winPSecurityDescriptor); +} + +#endif + +VulkanDeviceMemory::VulkanDeviceMemory(const VulkanDeviceMemory &deviceMemory) + : m_device(deviceMemory.m_device), + m_vkDeviceMemory(deviceMemory.m_vkDeviceMemory), + m_size(deviceMemory.m_size), m_isDedicated(deviceMemory.m_isDedicated) +{} + +VulkanDeviceMemory::VulkanDeviceMemory( + const VulkanDevice &device, uint64_t size, + const VulkanMemoryType &memoryType, + VulkanExternalMemoryHandleType externalMemoryHandleType, const void *name) + : m_device(device), m_size(size), m_isDedicated(false) +{ +#if defined(_WIN32) || defined(_WIN64) + WindowsSecurityAttributes winSecurityAttributes; + + VkExportMemoryWin32HandleInfoKHR vkExportMemoryWin32HandleInfoKHR = {}; + vkExportMemoryWin32HandleInfoKHR.sType = + VK_STRUCTURE_TYPE_EXPORT_MEMORY_WIN32_HANDLE_INFO_KHR; + vkExportMemoryWin32HandleInfoKHR.pNext = NULL; + vkExportMemoryWin32HandleInfoKHR.pAttributes = &winSecurityAttributes; + vkExportMemoryWin32HandleInfoKHR.dwAccess = + DXGI_SHARED_RESOURCE_READ | DXGI_SHARED_RESOURCE_WRITE; + vkExportMemoryWin32HandleInfoKHR.name = (LPCWSTR)name; + +#endif + + VkExportMemoryAllocateInfoKHR vkExportMemoryAllocateInfoKHR = {}; + vkExportMemoryAllocateInfoKHR.sType = + VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO_KHR; +#if defined(_WIN32) || defined(_WIN64) + vkExportMemoryAllocateInfoKHR.pNext = externalMemoryHandleType + & VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_NT + ? &vkExportMemoryWin32HandleInfoKHR + : NULL; +#else + vkExportMemoryAllocateInfoKHR.pNext = NULL; +#endif + vkExportMemoryAllocateInfoKHR.handleTypes = + (VkExternalMemoryHandleTypeFlagsKHR)externalMemoryHandleType; + + VkMemoryAllocateInfo vkMemoryAllocateInfo = {}; + vkMemoryAllocateInfo.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; + vkMemoryAllocateInfo.pNext = + externalMemoryHandleType ? &vkExportMemoryAllocateInfoKHR : NULL; + vkMemoryAllocateInfo.allocationSize = m_size; + vkMemoryAllocateInfo.memoryTypeIndex = (uint32_t)memoryType; + + vkAllocateMemory(m_device, &vkMemoryAllocateInfo, NULL, &m_vkDeviceMemory); +} + +VulkanDeviceMemory::VulkanDeviceMemory( + const VulkanDevice &device, const VulkanImage &image, + const VulkanMemoryType &memoryType, + VulkanExternalMemoryHandleType externalMemoryHandleType, const void *name) + : m_device(device), m_size(image.getSize()), m_isDedicated(true) +{ +#if defined(_WIN32) || defined(_WIN64) + WindowsSecurityAttributes winSecurityAttributes; + + VkExportMemoryWin32HandleInfoKHR vkExportMemoryWin32HandleInfoKHR = {}; + vkExportMemoryWin32HandleInfoKHR.sType = + VK_STRUCTURE_TYPE_EXPORT_MEMORY_WIN32_HANDLE_INFO_KHR; + vkExportMemoryWin32HandleInfoKHR.pNext = NULL; + vkExportMemoryWin32HandleInfoKHR.pAttributes = &winSecurityAttributes; + vkExportMemoryWin32HandleInfoKHR.dwAccess = + DXGI_SHARED_RESOURCE_READ | DXGI_SHARED_RESOURCE_WRITE; + vkExportMemoryWin32HandleInfoKHR.name = (LPCWSTR)name; + +#endif + + VkExportMemoryAllocateInfoKHR vkExportMemoryAllocateInfoKHR = {}; + vkExportMemoryAllocateInfoKHR.sType = + VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO_KHR; +#if defined(_WIN32) || defined(_WIN64) + vkExportMemoryAllocateInfoKHR.pNext = externalMemoryHandleType + & VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_NT + ? &vkExportMemoryWin32HandleInfoKHR + : NULL; +#else + vkExportMemoryAllocateInfoKHR.pNext = NULL; +#endif + vkExportMemoryAllocateInfoKHR.handleTypes = + (VkExternalMemoryHandleTypeFlagsKHR)externalMemoryHandleType; + + VkMemoryDedicatedAllocateInfo vkMemoryDedicatedAllocateInfo = {}; + vkMemoryDedicatedAllocateInfo.sType = + VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO; + vkMemoryDedicatedAllocateInfo.pNext = + externalMemoryHandleType ? &vkExportMemoryAllocateInfoKHR : NULL; + vkMemoryDedicatedAllocateInfo.image = image; + vkMemoryDedicatedAllocateInfo.buffer = VK_NULL_HANDLE; + + VkMemoryAllocateInfo vkMemoryAllocateInfo = {}; + vkMemoryAllocateInfo.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; + vkMemoryAllocateInfo.pNext = &vkMemoryDedicatedAllocateInfo; + vkMemoryAllocateInfo.allocationSize = m_size; + vkMemoryAllocateInfo.memoryTypeIndex = (uint32_t)memoryType; + + vkAllocateMemory(m_device, &vkMemoryAllocateInfo, NULL, &m_vkDeviceMemory); +} + +VulkanDeviceMemory::~VulkanDeviceMemory() +{ + vkFreeMemory(m_device, m_vkDeviceMemory, NULL); +} + +uint64_t VulkanDeviceMemory::getSize() const { return m_size; } + +#ifdef _WIN32 +HANDLE VulkanDeviceMemory::getHandle( + VulkanExternalMemoryHandleType externalMemoryHandleType) const +{ + HANDLE handle; + + VkMemoryGetWin32HandleInfoKHR vkMemoryGetWin32HandleInfoKHR = {}; + vkMemoryGetWin32HandleInfoKHR.sType = + VK_STRUCTURE_TYPE_MEMORY_GET_WIN32_HANDLE_INFO_KHR; + vkMemoryGetWin32HandleInfoKHR.pNext = NULL; + vkMemoryGetWin32HandleInfoKHR.memory = m_vkDeviceMemory; + vkMemoryGetWin32HandleInfoKHR.handleType = + (VkExternalMemoryHandleTypeFlagBitsKHR)externalMemoryHandleType; + + vkGetMemoryWin32HandleKHR(m_device, &vkMemoryGetWin32HandleInfoKHR, + &handle); + + return handle; +} +#else +int VulkanDeviceMemory::getHandle( + VulkanExternalMemoryHandleType externalMemoryHandleType) const +{ + if (externalMemoryHandleType + == VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD) + { + int fd; + + VkMemoryGetFdInfoKHR vkMemoryGetFdInfoKHR = {}; + vkMemoryGetFdInfoKHR.sType = VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR; + vkMemoryGetFdInfoKHR.pNext = NULL; + vkMemoryGetFdInfoKHR.memory = m_vkDeviceMemory; + vkMemoryGetFdInfoKHR.handleType = + VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR; + + vkGetMemoryFdKHR(m_device, &vkMemoryGetFdInfoKHR, &fd); + + return fd; + } + return HANDLE_ERROR; +} +#endif + +bool VulkanDeviceMemory::isDedicated() const { return m_isDedicated; } + +void *VulkanDeviceMemory::map(size_t offset, size_t size) +{ + void *pData; + + vkMapMemory(m_device, m_vkDeviceMemory, (VkDeviceSize)offset, + (VkDeviceSize)size, 0, &pData); + + return pData; +} + +void VulkanDeviceMemory::unmap() { vkUnmapMemory(m_device, m_vkDeviceMemory); } + +void VulkanDeviceMemory::bindBuffer(const VulkanBuffer &buffer, uint64_t offset) +{ + vkBindBufferMemory(m_device, buffer, m_vkDeviceMemory, offset); +} + +void VulkanDeviceMemory::bindImage(const VulkanImage &image, uint64_t offset) +{ + vkBindImageMemory(m_device, image, m_vkDeviceMemory, offset); +} + +VulkanDeviceMemory::operator VkDeviceMemory() const { return m_vkDeviceMemory; } + +//////////////////////////////////// +// VulkanSemaphore implementation // +//////////////////////////////////// + +VulkanSemaphore::VulkanSemaphore(const VulkanSemaphore &semaphore) + : m_device(semaphore.m_device), m_vkSemaphore(semaphore.m_vkSemaphore) +{} + +VulkanSemaphore::VulkanSemaphore( + const VulkanDevice &device, + VulkanExternalSemaphoreHandleType externalSemaphoreHandleType, + const std::wstring name) + : m_device(device), m_name(name) +{ +#if defined(_WIN32) || defined(_WIN64) + WindowsSecurityAttributes winSecurityAttributes; + + VkExportSemaphoreWin32HandleInfoKHR + vkExportSemaphoreWin32HandleInfoKHR = {}; + vkExportSemaphoreWin32HandleInfoKHR.sType = + VK_STRUCTURE_TYPE_EXPORT_SEMAPHORE_WIN32_HANDLE_INFO_KHR; + vkExportSemaphoreWin32HandleInfoKHR.pNext = NULL; + vkExportSemaphoreWin32HandleInfoKHR.pAttributes = &winSecurityAttributes; + vkExportSemaphoreWin32HandleInfoKHR.dwAccess = + DXGI_SHARED_RESOURCE_READ | DXGI_SHARED_RESOURCE_WRITE; + vkExportSemaphoreWin32HandleInfoKHR.name = + m_name.size() ? (LPCWSTR)m_name.c_str() : NULL; +#endif + + VkExportSemaphoreCreateInfoKHR vkExportSemaphoreCreateInfoKHR = {}; + vkExportSemaphoreCreateInfoKHR.sType = + VK_STRUCTURE_TYPE_EXPORT_SEMAPHORE_CREATE_INFO_KHR; +#if defined(_WIN32) || defined(_WIN64) + vkExportSemaphoreCreateInfoKHR.pNext = + (externalSemaphoreHandleType + & VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_NT) + ? &vkExportSemaphoreWin32HandleInfoKHR + : NULL; +#else + vkExportSemaphoreCreateInfoKHR.pNext = NULL; +#endif + vkExportSemaphoreCreateInfoKHR.handleTypes = + (VkExternalSemaphoreHandleTypeFlagsKHR)externalSemaphoreHandleType; + + VkSemaphoreCreateInfo vkSemaphoreCreateInfo = {}; + vkSemaphoreCreateInfo.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO; + vkSemaphoreCreateInfo.pNext = + (externalSemaphoreHandleType + != VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_NONE) + ? &vkExportSemaphoreCreateInfoKHR + : NULL; + vkSemaphoreCreateInfo.flags = 0; + + vkCreateSemaphore(m_device, &vkSemaphoreCreateInfo, NULL, &m_vkSemaphore); +} + +VulkanSemaphore::~VulkanSemaphore() +{ + vkDestroySemaphore(m_device, m_vkSemaphore, NULL); +} + +#if defined(_WIN32) || defined(_WIN64) +HANDLE VulkanSemaphore::getHandle( + VulkanExternalSemaphoreHandleType externalSemaphoreHandleType) const +{ + HANDLE handle; + + VkSemaphoreGetWin32HandleInfoKHR vkSemaphoreGetWin32HandleInfoKHR = {}; + vkSemaphoreGetWin32HandleInfoKHR.sType = + VK_STRUCTURE_TYPE_SEMAPHORE_GET_WIN32_HANDLE_INFO_KHR; + vkSemaphoreGetWin32HandleInfoKHR.pNext = NULL; + vkSemaphoreGetWin32HandleInfoKHR.semaphore = m_vkSemaphore; + vkSemaphoreGetWin32HandleInfoKHR.handleType = + (VkExternalSemaphoreHandleTypeFlagBitsKHR)externalSemaphoreHandleType; + + vkGetSemaphoreWin32HandleKHR(m_device, &vkSemaphoreGetWin32HandleInfoKHR, + &handle); + + return handle; +} +#else +int VulkanSemaphore::getHandle( + VulkanExternalSemaphoreHandleType externalSemaphoreHandleType) const +{ + if (externalSemaphoreHandleType + == VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD) + { + int fd; + + VkSemaphoreGetFdInfoKHR vkSemaphoreGetFdInfoKHR = {}; + vkSemaphoreGetFdInfoKHR.sType = + VK_STRUCTURE_TYPE_SEMAPHORE_GET_FD_INFO_KHR; + vkSemaphoreGetFdInfoKHR.pNext = NULL; + vkSemaphoreGetFdInfoKHR.semaphore = m_vkSemaphore; + vkSemaphoreGetFdInfoKHR.handleType = + VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR; + + vkGetSemaphoreFdKHR(m_device, &vkSemaphoreGetFdInfoKHR, &fd); + + return fd; + } + return HANDLE_ERROR; +} +#endif + +const std::wstring &VulkanSemaphore::getName() const { return m_name; } + +VulkanSemaphore::operator VkSemaphore() const { return m_vkSemaphore; } diff --git a/test_conformance/vulkan/vulkan_interop_common/vulkan_wrapper.hpp b/test_conformance/vulkan/vulkan_interop_common/vulkan_wrapper.hpp new file mode 100644 index 00000000..1f68a92b --- /dev/null +++ b/test_conformance/vulkan/vulkan_interop_common/vulkan_wrapper.hpp @@ -0,0 +1,579 @@ +// +// Copyright (c) 2022 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#ifndef _vulkan_wrapper_hpp_ +#define _vulkan_wrapper_hpp_ + +#include +#include "vulkan_wrapper_types.hpp" +#include "vulkan_list_map.hpp" +#include "vulkan_api_list.hpp" + +class VulkanInstance { + friend const VulkanInstance &getVulkanInstance(); + +protected: + VkInstance m_vkInstance; + VulkanPhysicalDeviceList m_physicalDeviceList; + + VulkanInstance(); + VulkanInstance(const VulkanInstance &); + virtual ~VulkanInstance(); + +public: + const VulkanPhysicalDeviceList &getPhysicalDeviceList() const; + operator VkInstance() const; +}; + +class VulkanPhysicalDevice { + friend class VulkanInstance; + +protected: + VkPhysicalDevice m_vkPhysicalDevice; + VkPhysicalDeviceProperties m_vkPhysicalDeviceProperties; + uint8_t m_vkDeviceUUID[VK_UUID_SIZE]; + uint8_t m_vkDeviceLUID[VK_LUID_SIZE]; + uint32_t m_vkDeviceNodeMask; + VkPhysicalDeviceFeatures m_vkPhysicalDeviceFeatures; + VkPhysicalDeviceMemoryProperties m_vkPhysicalDeviceMemoryProperties; + VulkanQueueFamilyList m_queueFamilyList; + VulkanMemoryHeapList m_memoryHeapList; + VulkanMemoryTypeList m_memoryTypeList; + + VulkanPhysicalDevice(const VulkanPhysicalDevice &physicalDevice); + VulkanPhysicalDevice(VkPhysicalDevice vkPhysicalDevice); + virtual ~VulkanPhysicalDevice(); + +public: + const VulkanQueueFamilyList &getQueueFamilyList() const; + const VulkanMemoryHeapList &getMemoryHeapList() const; + const VulkanMemoryTypeList &getMemoryTypeList() const; + const uint8_t *getUUID() const; + const uint8_t *getLUID() const; + uint32_t getNodeMask() const; + operator VkPhysicalDevice() const; +}; + +class VulkanMemoryHeap { + friend class VulkanPhysicalDevice; + +protected: + uint32_t m_memoryHeapIndex; + uint64_t m_size; + VulkanMemoryHeapFlag m_memoryHeapFlag; + + VulkanMemoryHeap(const VulkanMemoryHeap &memoryHeap); + VulkanMemoryHeap(uint32_t m_memoryHeapIndex, uint64_t m_size, + VulkanMemoryHeapFlag m_memoryHeapFlag); + virtual ~VulkanMemoryHeap(); + +public: + uint64_t getSize() const; + VulkanMemoryHeapFlag getMemoryHeapFlag() const; + operator uint32_t() const; +}; + +class VulkanMemoryType { + friend class VulkanPhysicalDevice; + +protected: + uint32_t m_memoryTypeIndex; + const VulkanMemoryTypeProperty m_memoryTypeProperty; + const VulkanMemoryHeap &m_memoryHeap; + + VulkanMemoryType(const VulkanMemoryType &memoryType); + VulkanMemoryType(uint32_t memoryTypeIndex, + VulkanMemoryTypeProperty memoryTypeProperty, + const VulkanMemoryHeap &memoryHeap); + virtual ~VulkanMemoryType(); + +public: + VulkanMemoryTypeProperty getMemoryTypeProperty() const; + const VulkanMemoryHeap &getMemoryHeap() const; + operator uint32_t() const; +}; + +class VulkanQueueFamily { + friend class VulkanPhysicalDevice; + +protected: + uint32_t m_queueFamilyIndex; + VkQueueFamilyProperties m_vkQueueFamilyProperties; + + VulkanQueueFamily(const VulkanQueueFamily &queueFamily); + VulkanQueueFamily(uint32_t queueFamilyIndex, + VkQueueFamilyProperties vkQueueFamilyProperties); + virtual ~VulkanQueueFamily(); + +public: + uint32_t getQueueFlags() const; + uint32_t getQueueCount() const; + operator uint32_t() const; +}; + +class VulkanDevice { +protected: + const VulkanPhysicalDevice &m_physicalDevice; + VkDevice m_vkDevice; + VulkanQueueFamilyToQueueListMap m_queueFamilyIndexToQueueListMap; + + VulkanDevice(const VulkanDevice &device); + +public: + VulkanDevice( + const VulkanPhysicalDevice &physicalDevice = getVulkanPhysicalDevice(), + const VulkanQueueFamilyToQueueCountMap &queueFamilyToQueueCountMap = + getDefaultVulkanQueueFamilyToQueueCountMap()); + virtual ~VulkanDevice(); + const VulkanPhysicalDevice &getPhysicalDevice() const; + VulkanQueue & + getQueue(const VulkanQueueFamily &queueFamily = getVulkanQueueFamily(), + uint32_t queueIndex = 0); + operator VkDevice() const; +}; + +class VulkanQueue { + friend class VulkanDevice; + +protected: + VkQueue m_vkQueue; + + VulkanQueue(VkQueue vkQueue); + VulkanQueue(const VulkanQueue &queue); + virtual ~VulkanQueue(); + +public: + const VulkanQueueFamily &getQueueFamily(); + void submit(const VulkanSemaphoreList &waitSemaphoreList, + const VulkanCommandBufferList &commandBufferList, + const VulkanSemaphoreList &signalSemaphoreList); + void submit(const VulkanSemaphore &waitSemaphore, + const VulkanCommandBuffer &commandBuffer, + const VulkanSemaphore &signalSemaphore); + void submit(const VulkanCommandBuffer &commandBuffer, + const VulkanSemaphore &signalSemaphore); + void submit(const VulkanCommandBuffer &commandBuffer); + void waitIdle(); + operator VkQueue() const; +}; + +class VulkanDescriptorSetLayoutBinding { +protected: + VkDescriptorSetLayoutBinding m_vkDescriptorSetLayoutBinding; + + VulkanDescriptorSetLayoutBinding( + const VulkanDescriptorSetLayoutBinding &descriptorSetLayoutBinding); + +public: + VulkanDescriptorSetLayoutBinding( + uint32_t binding, VulkanDescriptorType descriptorType, + uint32_t descriptorCount = 1, + VulkanShaderStage shaderStage = VULKAN_SHADER_STAGE_COMPUTE); + virtual ~VulkanDescriptorSetLayoutBinding(); + operator VkDescriptorSetLayoutBinding() const; +}; + +class VulkanDescriptorSetLayout { +protected: + const VulkanDevice &m_device; + VkDescriptorSetLayout m_vkDescriptorSetLayout; + + VulkanDescriptorSetLayout( + const VulkanDescriptorSetLayout &descriptorSetLayout); + void + VulkanDescriptorSetLayoutCommon(const VulkanDescriptorSetLayoutBindingList + &descriptorSetLayoutBindingList); + +public: + VulkanDescriptorSetLayout( + const VulkanDevice &device, + const VulkanDescriptorSetLayoutBinding &descriptorSetLayoutBinding); + VulkanDescriptorSetLayout( + const VulkanDevice &device, + const VulkanDescriptorSetLayoutBinding &descriptorSetLayoutBinding0, + const VulkanDescriptorSetLayoutBinding &descriptorSetLayoutBinding1); + VulkanDescriptorSetLayout(const VulkanDevice &device, + const VulkanDescriptorSetLayoutBindingList + &descriptorSetLayoutBindingList); + virtual ~VulkanDescriptorSetLayout(); + operator VkDescriptorSetLayout() const; +}; + +class VulkanPipelineLayout { +protected: + const VulkanDevice &m_device; + VkPipelineLayout m_vkPipelineLayout; + + VulkanPipelineLayout(const VulkanPipelineLayout &pipelineLayout); + void VulkanPipelineLayoutCommon( + const VulkanDescriptorSetLayoutList &descriptorSetLayoutList); + +public: + VulkanPipelineLayout(const VulkanDevice &device, + const VulkanDescriptorSetLayout &descriptorSetLayout); + VulkanPipelineLayout( + const VulkanDevice &device, + const VulkanDescriptorSetLayoutList &descriptorSetLayoutList = + getEmptyVulkanDescriptorSetLayoutList()); + virtual ~VulkanPipelineLayout(); + operator VkPipelineLayout() const; +}; + +class VulkanShaderModule { +protected: + const VulkanDevice &m_device; + VkShaderModule m_vkShaderModule; + + VulkanShaderModule(const VulkanShaderModule &shaderModule); + +public: + VulkanShaderModule(const VulkanDevice &device, const std::string &code); + virtual ~VulkanShaderModule(); + operator VkShaderModule() const; +}; + +class VulkanPipeline { +protected: + const VulkanDevice &m_device; + VkPipeline m_vkPipeline; + + VulkanPipeline(const VulkanPipeline &pipeline); + +public: + VulkanPipeline(const VulkanDevice &device); + virtual ~VulkanPipeline(); + virtual VulkanPipelineBindPoint getPipelineBindPoint() const = 0; + operator VkPipeline() const; +}; + +class VulkanComputePipeline : public VulkanPipeline { +protected: + VulkanComputePipeline(const VulkanComputePipeline &computePipeline); + +public: + VulkanComputePipeline(const VulkanDevice &device, + const VulkanPipelineLayout &pipelineLayout, + const VulkanShaderModule &shaderModule, + const std::string &entryFuncName = "main"); + virtual ~VulkanComputePipeline(); + VulkanPipelineBindPoint getPipelineBindPoint() const; +}; + +class VulkanDescriptorPool { +protected: + const VulkanDevice &m_device; + VkDescriptorPool m_vkDescriptorPool; + + VulkanDescriptorPool(const VulkanDescriptorPool &descriptorPool); + void VulkanDescriptorPoolCommon(const VulkanDescriptorSetLayoutBindingList + &descriptorSetLayoutBindingList); + +public: + VulkanDescriptorPool( + const VulkanDevice &device, + const VulkanDescriptorSetLayoutBinding &descriptorSetLayoutBinding); + VulkanDescriptorPool( + const VulkanDevice &device, + const VulkanDescriptorSetLayoutBinding &descriptorSetLayoutBinding0, + const VulkanDescriptorSetLayoutBinding &descriptorSetLayoutBinding1); + VulkanDescriptorPool(const VulkanDevice &device, + const VulkanDescriptorSetLayoutBindingList + &descriptorSetLayoutBindingList); + virtual ~VulkanDescriptorPool(); + operator VkDescriptorPool() const; +}; + +class VulkanDescriptorSet { +protected: + const VulkanDevice &m_device; + const VulkanDescriptorPool &m_descriptorPool; + VkDescriptorSet m_vkDescriptorSet; + + VulkanDescriptorSet(const VulkanDescriptorSet &descriptorSet); + +public: + VulkanDescriptorSet(const VulkanDevice &device, + const VulkanDescriptorPool &descriptorPool, + const VulkanDescriptorSetLayout &descriptorSetLayout); + virtual ~VulkanDescriptorSet(); + void update(uint32_t binding, const VulkanBuffer &buffer); + void update(uint32_t binding, const VulkanImageView &imageView); + operator VkDescriptorSet() const; +}; + +class VulkanOffset3D { +protected: + VkOffset3D m_vkOffset3D; + +public: + VulkanOffset3D(const VulkanOffset3D &extent3D); + VulkanOffset3D(uint32_t x = 0, uint32_t y = 0, uint32_t z = 0); + virtual ~VulkanOffset3D(); + uint32_t getX() const; + uint32_t getY() const; + uint32_t getZ() const; + operator VkOffset3D() const; +}; + +class VulkanExtent3D { +protected: + VkExtent3D m_vkExtent3D; + +public: + VulkanExtent3D(const VulkanExtent3D &extent3D); + VulkanExtent3D(uint32_t width, uint32_t height = 1, uint32_t depth = 1); + virtual ~VulkanExtent3D(); + uint32_t getWidth() const; + uint32_t getHeight() const; + uint32_t getDepth() const; + operator VkExtent3D() const; +}; + +class VulkanCommandPool { +protected: + const VulkanDevice &m_device; + VkCommandPool m_vkCommandPool; + + VulkanCommandPool(const VulkanCommandPool &commandPool); + +public: + VulkanCommandPool( + const VulkanDevice &device, + const VulkanQueueFamily &queueFamily = getVulkanQueueFamily()); + virtual ~VulkanCommandPool(); + operator VkCommandPool() const; +}; + +class VulkanCommandBuffer { +protected: + const VulkanDevice &m_device; + const VulkanCommandPool &m_commandPool; + VkCommandBuffer m_vkCommandBuffer; + + VulkanCommandBuffer(const VulkanCommandBuffer &commandBuffer); + +public: + VulkanCommandBuffer(const VulkanDevice &device, + const VulkanCommandPool &commandPool); + virtual ~VulkanCommandBuffer(); + void begin(); + void bindPipeline(const VulkanPipeline &pipeline); + void bindDescriptorSets(const VulkanPipeline &pipeline, + const VulkanPipelineLayout &pipelineLayout, + const VulkanDescriptorSet &descriptorSet); + void pipelineBarrier(const VulkanImage2DList &image2DList, + VulkanImageLayout oldImageLayout, + VulkanImageLayout newImageLayout); + void dispatch(uint32_t groupCountX, uint32_t groupCountY, + uint32_t groupCountZ); + void fillBuffer(const VulkanBuffer &buffer, uint32_t data, + uint64_t offset = 0, uint64_t size = VK_WHOLE_SIZE); + void updateBuffer(const VulkanBuffer &buffer, void *pdata, + uint64_t offset = 0, uint64_t size = VK_WHOLE_SIZE); + void copyBufferToImage(const VulkanBuffer &buffer, const VulkanImage &image, + VulkanImageLayout imageLayout = + VULKAN_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); + void copyBufferToImage(const VulkanBuffer &buffer, const VulkanImage &image, + uint64_t bufferOffset = 0, uint32_t mipLevel = 0, + uint32_t baseArrayLayer = 0, uint32_t layerCount = 1, + VulkanOffset3D offset3D = VulkanOffset3D(0, 0, 0), + VulkanExtent3D extent3D = VulkanExtent3D(0, 0, 0)); + void copyImageToBuffer(const VulkanImage &image, const VulkanBuffer &buffer, + uint64_t bufferOffset = 0, uint32_t mipLevel = 0, + uint32_t baseArrayLayer = 0, uint32_t layerCount = 1, + VulkanOffset3D offset3D = VulkanOffset3D(0, 0, 0), + VulkanExtent3D extent3D = VulkanExtent3D(0, 0, 0)); + void end(); + operator VkCommandBuffer() const; +}; + +class VulkanBuffer { +protected: + const VulkanDevice &m_device; + VkBuffer m_vkBuffer; + uint64_t m_size; + uint64_t m_alignment; + VulkanMemoryTypeList m_memoryTypeList; + + VulkanBuffer(const VulkanBuffer &buffer); + +public: + VulkanBuffer(const VulkanDevice &device, uint64_t size, + VulkanExternalMemoryHandleType externalMemoryHandleType = + VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_NONE, + VulkanBufferUsage bufferUsage = + VULKAN_BUFFER_USAGE_STORAGE_BUFFER_TRANSFER_SRC_DST, + VulkanSharingMode sharingMode = VULKAN_SHARING_MODE_EXCLUSIVE, + const VulkanQueueFamilyList &queueFamilyList = + getEmptyVulkanQueueFamilyList()); + virtual ~VulkanBuffer(); + uint64_t getSize() const; + uint64_t getAlignment() const; + const VulkanMemoryTypeList &getMemoryTypeList() const; + operator VkBuffer() const; +}; + +class VulkanImage { +protected: + const VulkanDevice &m_device; + const VulkanImageType m_imageType; + const VulkanExtent3D m_extent3D; + const VulkanFormat m_format; + const uint32_t m_numMipLevels; + const uint32_t m_numLayers; + VkImage m_vkImage; + uint64_t m_size; + uint64_t m_alignment; + VulkanMemoryTypeList m_memoryTypeList; + VkImageCreateInfo VulkanImageCreateInfo; + VulkanImage(const VulkanImage &image); + +public: + VulkanImage( + const VulkanDevice &device, VulkanImageType imageType, + VulkanFormat format, const VulkanExtent3D &extent3D, + uint32_t numMipLevels = 1, uint32_t arrayLayers = 1, + VulkanExternalMemoryHandleType externalMemoryHandleType = + VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_NONE, + VulkanImageCreateFlag imageCreateFlags = VULKAN_IMAGE_CREATE_FLAG_NONE, + VulkanImageTiling imageTiling = VULKAN_IMAGE_TILING_OPTIMAL, + VulkanImageUsage imageUsage = + VULKAN_IMAGE_USAGE_SAMPLED_STORAGE_TRANSFER_SRC_DST, + VulkanSharingMode sharingMode = VULKAN_SHARING_MODE_EXCLUSIVE); + virtual ~VulkanImage(); + virtual VulkanExtent3D getExtent3D(uint32_t mipLevel = 0) const; + VulkanFormat getFormat() const; + uint32_t getNumMipLevels() const; + uint32_t getNumLayers() const; + uint64_t getSize() const; + uint64_t getAlignment() const; + const VulkanMemoryTypeList &getMemoryTypeList() const; + VkImageCreateInfo getVkImageCreateInfo() const; + operator VkImage() const; +}; + +class VulkanImage2D : public VulkanImage { +protected: + VkImageView m_vkImageView; + + VulkanImage2D(const VulkanImage2D &image2D); + +public: + VulkanImage2D( + const VulkanDevice &device, VulkanFormat format, uint32_t width, + uint32_t height, uint32_t numMipLevels = 1, + VulkanExternalMemoryHandleType externalMemoryHandleType = + VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_NONE, + VulkanImageCreateFlag imageCreateFlag = VULKAN_IMAGE_CREATE_FLAG_NONE, + VulkanImageUsage imageUsage = + VULKAN_IMAGE_USAGE_SAMPLED_STORAGE_TRANSFER_SRC_DST, + VulkanSharingMode sharingMode = VULKAN_SHARING_MODE_EXCLUSIVE); + virtual ~VulkanImage2D(); + virtual VulkanExtent3D getExtent3D(uint32_t mipLevel = 0) const; +}; + +class VulkanImageView { +protected: + const VulkanDevice &m_device; + VkImageView m_vkImageView; + + VulkanImageView(const VulkanImageView &imageView); + +public: + VulkanImageView(const VulkanDevice &device, const VulkanImage &image, + VulkanImageViewType imageViewType, + uint32_t baseMipLevel = 0, + uint32_t mipLevelCount = VULKAN_REMAINING_MIP_LEVELS, + uint32_t baseArrayLayer = 0, + uint32_t layerCount = VULKAN_REMAINING_ARRAY_LAYERS); + virtual ~VulkanImageView(); + operator VkImageView() const; +}; + +class VulkanDeviceMemory { +protected: + const VulkanDevice &m_device; + VkDeviceMemory m_vkDeviceMemory; + uint64_t m_size; + bool m_isDedicated; + + VulkanDeviceMemory(const VulkanDeviceMemory &deviceMemory); + +public: + VulkanDeviceMemory(const VulkanDevice &device, uint64_t size, + const VulkanMemoryType &memoryType, + VulkanExternalMemoryHandleType externalMemoryHandleType = + VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_NONE, + const void *name = NULL); + VulkanDeviceMemory(const VulkanDevice &device, const VulkanImage &image, + const VulkanMemoryType &memoryType, + VulkanExternalMemoryHandleType externalMemoryHandleType = + VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_NONE, + const void *name = NULL); + virtual ~VulkanDeviceMemory(); + uint64_t getSize() const; +#ifdef _WIN32 + HANDLE + getHandle(VulkanExternalMemoryHandleType externalMemoryHandleType) const; +#else + int + getHandle(VulkanExternalMemoryHandleType externalMemoryHandleType) const; +#endif + bool isDedicated() const; + void *map(size_t offset = 0, size_t size = VK_WHOLE_SIZE); + void unmap(); + void bindBuffer(const VulkanBuffer &buffer, uint64_t offset = 0); + void bindImage(const VulkanImage &image, uint64_t offset = 0); + operator VkDeviceMemory() const; +}; + +class VulkanSemaphore { + friend class VulkanQueue; + +protected: + const VulkanDevice &m_device; + VkSemaphore m_vkSemaphore; + const std::wstring m_name; + + VulkanSemaphore(const VulkanSemaphore &semaphore); + +public: + VulkanSemaphore( + const VulkanDevice &device, + VulkanExternalSemaphoreHandleType externalSemaphoreHandleType = + VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_NONE, + const std::wstring name = L""); + virtual ~VulkanSemaphore(); +#ifdef _WIN32 + HANDLE getHandle( + VulkanExternalSemaphoreHandleType externalSemaphoreHandleType) const; +#else + int getHandle( + VulkanExternalSemaphoreHandleType externalSemaphoreHandleType) const; +#endif + const std::wstring &getName() const; + operator VkSemaphore() const; +}; + + +#define VK_FUNC_DECL(name) extern "C" PFN_##name _##name; +VK_FUNC_LIST +#if defined(_WIN32) || defined(_WIN64) +VK_WINDOWS_FUNC_LIST +#endif +#undef VK_FUNC_DECL + +#endif // _vulkan_wrapper_hpp_ diff --git a/test_conformance/vulkan/vulkan_interop_common/vulkan_wrapper_types.hpp b/test_conformance/vulkan/vulkan_interop_common/vulkan_wrapper_types.hpp new file mode 100644 index 00000000..359bcae4 --- /dev/null +++ b/test_conformance/vulkan/vulkan_interop_common/vulkan_wrapper_types.hpp @@ -0,0 +1,463 @@ +// +// Copyright (c) 2022 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#ifndef _vulkan_wrapper_types_hpp_ +#define _vulkan_wrapper_types_hpp_ + +#include + +#define VULKAN_MIN_BUFFER_OFFSET_COPY_ALIGNMENT 4 +#define VULKAN_REMAINING_MIP_LEVELS VK_REMAINING_MIP_LEVELS +#define VULKAN_REMAINING_ARRAY_LAYERS VK_REMAINING_ARRAY_LAYERS + +class VulkanInstance; +class VulkanPhysicalDevice; +class VulkanMemoryHeap; +class VulkanMemoryType; +class VulkanQueueFamily; +class VulkanDevice; +class VulkanQueue; +class VulkanDescriptorSetLayoutBinding; +class VulkanDescriptorSetLayout; +class VulkanPipelineLayout; +class VulkanShaderModule; +class VulkanPipeline; +class VulkanComputePipeline; +class VulkanDescriptorPool; +class VulkanDescriptorSet; +class VulkanCommandPool; +class VulkanCommandBuffer; +class VulkanBuffer; +class VulkanOffset3D; +class VulkanExtent3D; +class VulkanImage; +class VulkanImage2D; +class VulkanImageView; +class VulkanDeviceMemory; +class VulkanSemaphore; + +class VulkanPhysicalDeviceList; +class VulkanMemoryHeapList; +class VulkanMemoryTypeList; +class VulkanQueueFamilyList; +class VulkanQueueFamilyToQueueCountMap; +class VulkanQueueFamilyToQueueListMap; +class VulkanQueueList; +class VulkanCommandBufferList; +class VulkanDescriptorSetLayoutList; +class VulkanBufferList; +class VulkanImage2DList; +class VulkanImageViewList; +class VulkanDeviceMemoryList; +class VulkanSemaphoreList; + +enum VulkanQueueFlag +{ + VULKAN_QUEUE_FLAG_GRAPHICS = VK_QUEUE_GRAPHICS_BIT, + VULKAN_QUEUE_FLAG_COMPUTE = VK_QUEUE_COMPUTE_BIT, + VULKAN_QUEUE_FLAG_TRANSFER = VK_QUEUE_TRANSFER_BIT, + VULKAN_QUEUE_FLAG_MASK_ALL = VULKAN_QUEUE_FLAG_GRAPHICS + | VULKAN_QUEUE_FLAG_COMPUTE | VULKAN_QUEUE_FLAG_TRANSFER +}; + +enum VulkanDescriptorType +{ + VULKAN_DESCRIPTOR_TYPE_SAMPLER = VK_DESCRIPTOR_TYPE_SAMPLER, + VULKAN_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER = + VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + VULKAN_DESCRIPTOR_TYPE_SAMPLED_IMAGE = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, + VULKAN_DESCRIPTOR_TYPE_STORAGE_IMAGE = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + VULKAN_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER = + VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, + VULKAN_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER = + VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, + VULKAN_DESCRIPTOR_TYPE_UNIFORM_BUFFER = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, + VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + VULKAN_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC = + VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC, + VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC = + VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC, + VULKAN_DESCRIPTOR_TYPE_INPUT_ATTACHMENT = + VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT, +}; + +enum VulkanShaderStage +{ + VULKAN_SHADER_STAGE_VERTEX = VK_SHADER_STAGE_VERTEX_BIT, + VULKAN_SHADER_STAGE_FRAGMENT = VK_SHADER_STAGE_FRAGMENT_BIT, + VULKAN_SHADER_STAGE_COMPUTE = VK_SHADER_STAGE_COMPUTE_BIT, + VULKAN_SHADER_STAGE_ALL_GRAPHICS = VK_SHADER_STAGE_ALL_GRAPHICS, + VULKAN_SHADER_STAGE_ALL = VK_SHADER_STAGE_ALL +}; + +enum VulkanPipelineBindPoint +{ + VULKAN_PIPELINE_BIND_POINT_GRAPHICS = VK_PIPELINE_BIND_POINT_GRAPHICS, + VULKAN_PIPELINE_BIND_POINT_COMPUTE = VK_PIPELINE_BIND_POINT_COMPUTE +}; + +enum VulkanMemoryTypeProperty +{ + VULKAN_MEMORY_TYPE_PROPERTY_NONE = 0, + VULKAN_MEMORY_TYPE_PROPERTY_DEVICE_LOCAL = + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, + VULKAN_MEMORY_TYPE_PROPERTY_HOST_VISIBLE_COHERENT = + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT + | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, + VULKAN_MEMORY_TYPE_PROPERTY_HOST_VISIBLE_CACHED = + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT + | VK_MEMORY_PROPERTY_HOST_CACHED_BIT, + VULKAN_MEMORY_TYPE_PROPERTY_HOST_VISIBLE_CACHED_COHERENT = + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT + | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, + VULKAN_MEMORY_TYPE_PROPERTY_DEVICE_LOCAL_HOST_VISIBLE_COHERENT = + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT + | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT + | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, + VULKAN_MEMORY_TYPE_PROPERTY_DEVICE_LOCAL_HOST_VISIBLE_CACHED = + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT + | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT + | VK_MEMORY_PROPERTY_HOST_CACHED_BIT, + VULKAN_MEMORY_TYPE_PROPERTY_DEVICE_LOCAL_HOST_VISIBLE_CACHED_COHERENT = + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT + | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT + | VK_MEMORY_PROPERTY_HOST_CACHED_BIT + | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT +}; + +enum VulkanMemoryHeapFlag +{ + VULKAN_MEMORY_HEAP_FLAG_NONE = 0, + VULKAN_MEMORY_HEAP_FLAG_DEVICE_LOCAL = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT +}; + +enum VulkanExternalMemoryHandleType +{ + VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_NONE = 0, + VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD = + VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR, + VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_NT = + VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT_KHR, + VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT = + VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT_KHR, + VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_NT_KMT = + VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT_KHR + | VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT_KHR +}; + +enum VulkanExternalSemaphoreHandleType +{ + VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_NONE = 0, + VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD = + VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR, + VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_NT = + VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_BIT_KHR, + VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT = + VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT_KHR, + VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_NT_KMT = + VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_BIT_KHR + | VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT_KHR +}; + +enum VulkanBufferUsage +{ + VULKAN_BUFFER_USAGE_TRANSFER_SRC = VK_BUFFER_USAGE_TRANSFER_SRC_BIT, + VULKAN_BUFFER_USAGE_TRANSFER_DST = VK_BUFFER_USAGE_TRANSFER_DST_BIT, + VULKAN_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER = + VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT, + VULKAN_BUFFER_USAGE_STORAGE_TEXEL_BUFFER = + VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT, + VULKAN_BUFFER_USAGE_UNIFORM_BUFFER = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, + VULKAN_BUFFER_USAGE_STORAGE_BUFFER = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, + VULKAN_BUFFER_USAGE_INDEX_BUFFER = VK_BUFFER_USAGE_INDEX_BUFFER_BIT, + VULKAN_BUFFER_USAGE_VERTEX_BUFFER = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, + VULKAN_BUFFER_USAGE_INDIRECT_BUFFER = VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT, + VULKAN_BUFFER_USAGE_STORAGE_BUFFER_TRANSFER_SRC_DST = + VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT + | VK_BUFFER_USAGE_TRANSFER_DST_BIT, + VULKAN_BUFFER_USAGE_UNIFORM_BUFFER_TRANSFER_SRC_DST = + VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT + | VK_BUFFER_USAGE_TRANSFER_DST_BIT, +}; + +enum VulkanSharingMode +{ + VULKAN_SHARING_MODE_EXCLUSIVE = VK_SHARING_MODE_EXCLUSIVE, + VULKAN_SHARING_MODE_CONCURRENT = VK_SHARING_MODE_CONCURRENT +}; + +enum VulkanImageType +{ + VULKAN_IMAGE_TYPE_1D = VK_IMAGE_TYPE_1D, + VULKAN_IMAGE_TYPE_2D = VK_IMAGE_TYPE_2D, + VULKAN_IMAGE_TYPE_3D = VK_IMAGE_TYPE_3D +}; + +enum VulkanFormat +{ + VULKAN_FORMAT_UNDEFINED = VK_FORMAT_UNDEFINED, + VULKAN_FORMAT_R4G4_UNORM_PACK8 = VK_FORMAT_R4G4_UNORM_PACK8, + VULKAN_FORMAT_R4G4B4A4_UNORM_PACK16 = VK_FORMAT_R4G4B4A4_UNORM_PACK16, + VULKAN_FORMAT_B4G4R4A4_UNORM_PACK16 = VK_FORMAT_B4G4R4A4_UNORM_PACK16, + VULKAN_FORMAT_R5G6B5_UNORM_PACK16 = VK_FORMAT_R5G6B5_UNORM_PACK16, + VULKAN_FORMAT_B5G6R5_UNORM_PACK16 = VK_FORMAT_B5G6R5_UNORM_PACK16, + VULKAN_FORMAT_R5G5B5A1_UNORM_PACK16 = VK_FORMAT_R5G5B5A1_UNORM_PACK16, + VULKAN_FORMAT_B5G5R5A1_UNORM_PACK16 = VK_FORMAT_B5G5R5A1_UNORM_PACK16, + VULKAN_FORMAT_A1R5G5B5_UNORM_PACK16 = VK_FORMAT_A1R5G5B5_UNORM_PACK16, + VULKAN_FORMAT_R8_UNORM = VK_FORMAT_R8_UNORM, + VULKAN_FORMAT_R8_SNORM = VK_FORMAT_R8_SNORM, + VULKAN_FORMAT_R8_USCALED = VK_FORMAT_R8_USCALED, + VULKAN_FORMAT_R8_SSCALED = VK_FORMAT_R8_SSCALED, + VULKAN_FORMAT_R8_UINT = VK_FORMAT_R8_UINT, + VULKAN_FORMAT_R8_SINT = VK_FORMAT_R8_SINT, + VULKAN_FORMAT_R8_SRGB = VK_FORMAT_R8_SRGB, + VULKAN_FORMAT_R8G8_SNORM = VK_FORMAT_R8G8_SNORM, + VULKAN_FORMAT_R8G8_UNORM = VK_FORMAT_R8G8_UNORM, + VULKAN_FORMAT_R8G8_USCALED = VK_FORMAT_R8G8_USCALED, + VULKAN_FORMAT_R8G8_SSCALED = VK_FORMAT_R8G8_SSCALED, + VULKAN_FORMAT_R8G8_UINT = VK_FORMAT_R8G8_UINT, + VULKAN_FORMAT_R8G8_SINT = VK_FORMAT_R8G8_SINT, + VULKAN_FORMAT_R8G8_SRGB = VK_FORMAT_R8G8_SRGB, + VULKAN_FORMAT_R8G8B8_UNORM = VK_FORMAT_R8G8B8_UNORM, + VULKAN_FORMAT_R8G8B8_SNORM = VK_FORMAT_R8G8B8_SNORM, + VULKAN_FORMAT_R8G8B8_USCALED = VK_FORMAT_R8G8B8_USCALED, + VULKAN_FORMAT_R8G8B8_SSCALED = VK_FORMAT_R8G8B8_SSCALED, + VULKAN_FORMAT_R8G8B8_UINT = VK_FORMAT_R8G8B8_UINT, + VULKAN_FORMAT_R8G8B8_SINT = VK_FORMAT_R8G8B8_SINT, + VULKAN_FORMAT_R8G8B8_SRGB = VK_FORMAT_R8G8B8_SRGB, + VULKAN_FORMAT_B8G8R8_UNORM = VK_FORMAT_B8G8R8_UNORM, + VULKAN_FORMAT_B8G8R8_SNORM = VK_FORMAT_B8G8R8_SNORM, + VULKAN_FORMAT_B8G8R8_USCALED = VK_FORMAT_B8G8R8_USCALED, + VULKAN_FORMAT_B8G8R8_SSCALED = VK_FORMAT_B8G8R8_SSCALED, + VULKAN_FORMAT_B8G8R8_UINT = VK_FORMAT_B8G8R8_UINT, + VULKAN_FORMAT_B8G8R8_SINT = VK_FORMAT_B8G8R8_SINT, + VULKAN_FORMAT_B8G8R8_SRGB = VK_FORMAT_B8G8R8_SRGB, + VULKAN_FORMAT_R8G8B8A8_UNORM = VK_FORMAT_R8G8B8A8_UNORM, + VULKAN_FORMAT_R8G8B8A8_SNORM = VK_FORMAT_R8G8B8A8_SNORM, + VULKAN_FORMAT_R8G8B8A8_USCALED = VK_FORMAT_R8G8B8A8_USCALED, + VULKAN_FORMAT_R8G8B8A8_SSCALED = VK_FORMAT_R8G8B8A8_SSCALED, + VULKAN_FORMAT_R8G8B8A8_UINT = VK_FORMAT_R8G8B8A8_UINT, + VULKAN_FORMAT_R8G8B8A8_SINT = VK_FORMAT_R8G8B8A8_SINT, + VULKAN_FORMAT_R8G8B8A8_SRGB = VK_FORMAT_R8G8B8A8_SRGB, + VULKAN_FORMAT_B8G8R8A8_UNORM = VK_FORMAT_B8G8R8A8_UNORM, + VULKAN_FORMAT_B8G8R8A8_SNORM = VK_FORMAT_B8G8R8A8_SNORM, + VULKAN_FORMAT_B8G8R8A8_USCALED = VK_FORMAT_B8G8R8A8_USCALED, + VULKAN_FORMAT_B8G8R8A8_SSCALED = VK_FORMAT_B8G8R8A8_SSCALED, + VULKAN_FORMAT_B8G8R8A8_UINT = VK_FORMAT_B8G8R8A8_UINT, + VULKAN_FORMAT_B8G8R8A8_SINT = VK_FORMAT_B8G8R8A8_SINT, + VULKAN_FORMAT_B8G8R8A8_SRGB = VK_FORMAT_B8G8R8A8_SRGB, + VULKAN_FORMAT_A8B8G8R8_UNORM_PACK32 = VK_FORMAT_A8B8G8R8_UNORM_PACK32, + VULKAN_FORMAT_A8B8G8R8_SNORM_PACK32 = VK_FORMAT_A8B8G8R8_SNORM_PACK32, + VULKAN_FORMAT_A8B8G8R8_USCALED_PACK32 = VK_FORMAT_A8B8G8R8_USCALED_PACK32, + VULKAN_FORMAT_A8B8G8R8_SSCALED_PACK32 = VK_FORMAT_A8B8G8R8_SSCALED_PACK32, + VULKAN_FORMAT_A8B8G8R8_UINT_PACK32 = VK_FORMAT_A8B8G8R8_UINT_PACK32, + VULKAN_FORMAT_A8B8G8R8_SINT_PACK32 = VK_FORMAT_A8B8G8R8_SINT_PACK32, + VULKAN_FORMAT_A8B8G8R8_SRGB_PACK32 = VK_FORMAT_A8B8G8R8_SRGB_PACK32, + VULKAN_FORMAT_A2R10G10B10_UNORM_PACK32 = VK_FORMAT_A2R10G10B10_UNORM_PACK32, + VULKAN_FORMAT_A2R10G10B10_SNORM_PACK32 = VK_FORMAT_A2R10G10B10_SNORM_PACK32, + VULKAN_FORMAT_A2R10G10B10_USCALED_PACK32 = + VK_FORMAT_A2R10G10B10_USCALED_PACK32, + VULKAN_FORMAT_A2R10G10B10_SSCALED_PACK32 = + VK_FORMAT_A2R10G10B10_SSCALED_PACK32, + VULKAN_FORMAT_A2R10G10B10_UINT_PACK32 = VK_FORMAT_A2R10G10B10_UINT_PACK32, + VULKAN_FORMAT_A2R10G10B10_SINT_PACK32 = VK_FORMAT_A2R10G10B10_SINT_PACK32, + VULKAN_FORMAT_A2B10G10R10_UNORM_PACK32 = VK_FORMAT_A2B10G10R10_UNORM_PACK32, + VULKAN_FORMAT_A2B10G10R10_SNORM_PACK32 = VK_FORMAT_A2B10G10R10_SNORM_PACK32, + VULKAN_FORMAT_A2B10G10R10_USCALED_PACK32 = + VK_FORMAT_A2B10G10R10_USCALED_PACK32, + VULKAN_FORMAT_A2B10G10R10_SSCALED_PACK32 = + VK_FORMAT_A2B10G10R10_SSCALED_PACK32, + VULKAN_FORMAT_A2B10G10R10_UINT_PACK32 = VK_FORMAT_A2B10G10R10_UINT_PACK32, + VULKAN_FORMAT_A2B10G10R10_SINT_PACK32 = VK_FORMAT_A2B10G10R10_SINT_PACK32, + VULKAN_FORMAT_R16_UNORM = VK_FORMAT_R16_UNORM, + VULKAN_FORMAT_R16_SNORM = VK_FORMAT_R16_SNORM, + VULKAN_FORMAT_R16_USCALED = VK_FORMAT_R16_USCALED, + VULKAN_FORMAT_R16_SSCALED = VK_FORMAT_R16_SSCALED, + VULKAN_FORMAT_R16_UINT = VK_FORMAT_R16_UINT, + VULKAN_FORMAT_R16_SINT = VK_FORMAT_R16_SINT, + VULKAN_FORMAT_R16_SFLOAT = VK_FORMAT_R16_SFLOAT, + VULKAN_FORMAT_R16G16_UNORM = VK_FORMAT_R16G16_UNORM, + VULKAN_FORMAT_R16G16_SNORM = VK_FORMAT_R16G16_SNORM, + VULKAN_FORMAT_R16G16_USCALED = VK_FORMAT_R16G16_USCALED, + VULKAN_FORMAT_R16G16_SSCALED = VK_FORMAT_R16G16_SSCALED, + VULKAN_FORMAT_R16G16_UINT = VK_FORMAT_R16G16_UINT, + VULKAN_FORMAT_R16G16_SINT = VK_FORMAT_R16G16_SINT, + VULKAN_FORMAT_R16G16_SFLOAT = VK_FORMAT_R16G16_SFLOAT, + VULKAN_FORMAT_R16G16B16_UNORM = VK_FORMAT_R16G16B16_UNORM, + VULKAN_FORMAT_R16G16B16_SNORM = VK_FORMAT_R16G16B16_SNORM, + VULKAN_FORMAT_R16G16B16_USCALED = VK_FORMAT_R16G16B16_USCALED, + VULKAN_FORMAT_R16G16B16_SSCALED = VK_FORMAT_R16G16B16_SSCALED, + VULKAN_FORMAT_R16G16B16_UINT = VK_FORMAT_R16G16B16_UINT, + VULKAN_FORMAT_R16G16B16_SINT = VK_FORMAT_R16G16B16_SINT, + VULKAN_FORMAT_R16G16B16_SFLOAT = VK_FORMAT_R16G16B16_SFLOAT, + VULKAN_FORMAT_R16G16B16A16_UNORM = VK_FORMAT_R16G16B16A16_UNORM, + VULKAN_FORMAT_R16G16B16A16_SNORM = VK_FORMAT_R16G16B16A16_SNORM, + VULKAN_FORMAT_R16G16B16A16_USCALED = VK_FORMAT_R16G16B16A16_USCALED, + VULKAN_FORMAT_R16G16B16A16_SSCALED = VK_FORMAT_R16G16B16A16_SSCALED, + VULKAN_FORMAT_R16G16B16A16_UINT = VK_FORMAT_R16G16B16A16_UINT, + VULKAN_FORMAT_R16G16B16A16_SINT = VK_FORMAT_R16G16B16A16_SINT, + VULKAN_FORMAT_R16G16B16A16_SFLOAT = VK_FORMAT_R16G16B16A16_SFLOAT, + VULKAN_FORMAT_R32_UINT = VK_FORMAT_R32_UINT, + VULKAN_FORMAT_R32_SINT = VK_FORMAT_R32_SINT, + VULKAN_FORMAT_R32_SFLOAT = VK_FORMAT_R32_SFLOAT, + VULKAN_FORMAT_R32G32_UINT = VK_FORMAT_R32G32_UINT, + VULKAN_FORMAT_R32G32_SINT = VK_FORMAT_R32G32_SINT, + VULKAN_FORMAT_R32G32_SFLOAT = VK_FORMAT_R32G32_SFLOAT, + VULKAN_FORMAT_R32G32B32_UINT = VK_FORMAT_R32G32B32_UINT, + VULKAN_FORMAT_R32G32B32_SINT = VK_FORMAT_R32G32B32_SINT, + VULKAN_FORMAT_R32G32B32_SFLOAT = VK_FORMAT_R32G32B32_SFLOAT, + VULKAN_FORMAT_R32G32B32A32_UINT = VK_FORMAT_R32G32B32A32_UINT, + VULKAN_FORMAT_R32G32B32A32_SINT = VK_FORMAT_R32G32B32A32_SINT, + VULKAN_FORMAT_R32G32B32A32_SFLOAT = VK_FORMAT_R32G32B32A32_SFLOAT, + VULKAN_FORMAT_R64_UINT = VK_FORMAT_R64_UINT, + VULKAN_FORMAT_R64_SINT = VK_FORMAT_R64_SINT, + VULKAN_FORMAT_R64_SFLOAT = VK_FORMAT_R64_SFLOAT, + VULKAN_FORMAT_R64G64_UINT = VK_FORMAT_R64G64_UINT, + VULKAN_FORMAT_R64G64_SINT = VK_FORMAT_R64G64_SINT, + VULKAN_FORMAT_R64G64_SFLOAT = VK_FORMAT_R64G64_SFLOAT, + VULKAN_FORMAT_R64G64B64_UINT = VK_FORMAT_R64G64B64_UINT, + VULKAN_FORMAT_R64G64B64_SINT = VK_FORMAT_R64G64B64_SINT, + VULKAN_FORMAT_R64G64B64_SFLOAT = VK_FORMAT_R64G64B64_SFLOAT, + VULKAN_FORMAT_R64G64B64A64_UINT = VK_FORMAT_R64G64B64A64_UINT, + VULKAN_FORMAT_R64G64B64A64_SINT = VK_FORMAT_R64G64B64A64_SINT, + VULKAN_FORMAT_R64G64B64A64_SFLOAT = VK_FORMAT_R64G64B64A64_SFLOAT, + VULKAN_FORMAT_B10G11R11_UFLOAT_PACK32 = VK_FORMAT_B10G11R11_UFLOAT_PACK32, + VULKAN_FORMAT_E5B9G9R9_UFLOAT_PACK32 = VK_FORMAT_E5B9G9R9_UFLOAT_PACK32, + VULKAN_FORMAT_D16_UNORM = VK_FORMAT_D16_UNORM, + VULKAN_FORMAT_X8_D24_UNORM_PACK32 = VK_FORMAT_X8_D24_UNORM_PACK32, + VULKAN_FORMAT_D32_SFLOAT = VK_FORMAT_D32_SFLOAT, + VULKAN_FORMAT_S8_UINT = VK_FORMAT_S8_UINT, + VULKAN_FORMAT_D16_UNORM_S8_UINT = VK_FORMAT_D16_UNORM_S8_UINT, + VULKAN_FORMAT_D24_UNORM_S8_UINT = VK_FORMAT_D24_UNORM_S8_UINT, + VULKAN_FORMAT_D32_SFLOAT_S8_UINT = VK_FORMAT_D32_SFLOAT_S8_UINT, + VULKAN_FORMAT_BC1_RGB_UNORM_BLOCK = VK_FORMAT_BC1_RGB_UNORM_BLOCK, + VULKAN_FORMAT_BC1_RGB_SRGB_BLOCK = VK_FORMAT_BC1_RGB_SRGB_BLOCK, + VULKAN_FORMAT_BC1_RGBA_UNORM_BLOCK = VK_FORMAT_BC1_RGBA_UNORM_BLOCK, + VULKAN_FORMAT_BC1_RGBA_SRGB_BLOCK = VK_FORMAT_BC1_RGBA_SRGB_BLOCK, + VULKAN_FORMAT_BC2_UNORM_BLOCK = VK_FORMAT_BC2_UNORM_BLOCK, + VULKAN_FORMAT_BC2_SRGB_BLOCK = VK_FORMAT_BC2_SRGB_BLOCK, + VULKAN_FORMAT_BC3_UNORM_BLOCK = VK_FORMAT_BC3_UNORM_BLOCK, + VULKAN_FORMAT_BC3_SRGB_BLOCK = VK_FORMAT_BC3_SRGB_BLOCK, + VULKAN_FORMAT_BC4_UNORM_BLOCK = VK_FORMAT_BC4_UNORM_BLOCK, + VULKAN_FORMAT_BC4_SNORM_BLOCK = VK_FORMAT_BC4_SNORM_BLOCK, + VULKAN_FORMAT_BC5_UNORM_BLOCK = VK_FORMAT_BC5_UNORM_BLOCK, + VULKAN_FORMAT_BC5_SNORM_BLOCK = VK_FORMAT_BC5_SNORM_BLOCK, + VULKAN_FORMAT_BC6H_UFLOAT_BLOCK = VK_FORMAT_BC6H_UFLOAT_BLOCK, + VULKAN_FORMAT_BC6H_SFLOAT_BLOCK = VK_FORMAT_BC6H_SFLOAT_BLOCK, + VULKAN_FORMAT_BC7_UNORM_BLOCK = VK_FORMAT_BC7_UNORM_BLOCK, + VULKAN_FORMAT_BC7_SRGB_BLOCK = VK_FORMAT_BC7_SRGB_BLOCK, + VULKAN_FORMAT_ETC2_R8G8B8_UNORM_BLOCK = VK_FORMAT_ETC2_R8G8B8_UNORM_BLOCK, + VULKAN_FORMAT_ETC2_R8G8B8_SRGB_BLOCK = VK_FORMAT_ETC2_R8G8B8_SRGB_BLOCK, + VULKAN_FORMAT_ETC2_R8G8B8A1_UNORM_BLOCK = + VK_FORMAT_ETC2_R8G8B8A1_UNORM_BLOCK, + VULKAN_FORMAT_ETC2_R8G8B8A1_SRGB_BLOCK = VK_FORMAT_ETC2_R8G8B8A1_SRGB_BLOCK, + VULKAN_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK = + VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK, + VULKAN_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK = VK_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK, + VULKAN_FORMAT_EAC_R11_UNORM_BLOCK = VK_FORMAT_EAC_R11_UNORM_BLOCK, + VULKAN_FORMAT_EAC_R11_SNORM_BLOCK = VK_FORMAT_EAC_R11_SNORM_BLOCK, + VULKAN_FORMAT_EAC_R11G11_UNORM_BLOCK = VK_FORMAT_EAC_R11G11_UNORM_BLOCK, + VULKAN_FORMAT_EAC_R11G11_SNORM_BLOCK = VK_FORMAT_EAC_R11G11_SNORM_BLOCK, + VULKAN_FORMAT_ASTC_4x4_UNORM_BLOCK = VK_FORMAT_ASTC_4x4_UNORM_BLOCK, + VULKAN_FORMAT_ASTC_4x4_SRGB_BLOCK = VK_FORMAT_ASTC_4x4_SRGB_BLOCK, + VULKAN_FORMAT_ASTC_5x4_UNORM_BLOCK = VK_FORMAT_ASTC_5x4_UNORM_BLOCK, + VULKAN_FORMAT_ASTC_5x4_SRGB_BLOCK = VK_FORMAT_ASTC_5x4_SRGB_BLOCK, + VULKAN_FORMAT_ASTC_5x5_UNORM_BLOCK = VK_FORMAT_ASTC_5x5_UNORM_BLOCK, + VULKAN_FORMAT_ASTC_5x5_SRGB_BLOCK = VK_FORMAT_ASTC_5x5_SRGB_BLOCK, + VULKAN_FORMAT_ASTC_6x5_UNORM_BLOCK = VK_FORMAT_ASTC_6x5_UNORM_BLOCK, + VULKAN_FORMAT_ASTC_6x5_SRGB_BLOCK = VK_FORMAT_ASTC_6x5_SRGB_BLOCK, + VULKAN_FORMAT_ASTC_6x6_UNORM_BLOCK = VK_FORMAT_ASTC_6x6_UNORM_BLOCK, + VULKAN_FORMAT_ASTC_6x6_SRGB_BLOCK = VK_FORMAT_ASTC_6x6_SRGB_BLOCK, + VULKAN_FORMAT_ASTC_8x5_UNORM_BLOCK = VK_FORMAT_ASTC_8x5_UNORM_BLOCK, + VULKAN_FORMAT_ASTC_8x5_SRGB_BLOCK = VK_FORMAT_ASTC_8x5_SRGB_BLOCK, + VULKAN_FORMAT_ASTC_8x6_UNORM_BLOCK = VK_FORMAT_ASTC_8x6_UNORM_BLOCK, + VULKAN_FORMAT_ASTC_8x6_SRGB_BLOCK = VK_FORMAT_ASTC_8x6_SRGB_BLOCK, + VULKAN_FORMAT_ASTC_8x8_UNORM_BLOCK = VK_FORMAT_ASTC_8x8_UNORM_BLOCK, + VULKAN_FORMAT_ASTC_8x8_SRGB_BLOCK = VK_FORMAT_ASTC_8x8_SRGB_BLOCK, + VULKAN_FORMAT_ASTC_10x5_UNORM_BLOCK = VK_FORMAT_ASTC_10x5_UNORM_BLOCK, + VULKAN_FORMAT_ASTC_10x5_SRGB_BLOCK = VK_FORMAT_ASTC_10x5_SRGB_BLOCK, + VULKAN_FORMAT_ASTC_10x6_UNORM_BLOCK = VK_FORMAT_ASTC_10x6_UNORM_BLOCK, + VULKAN_FORMAT_ASTC_10x6_SRGB_BLOCK = VK_FORMAT_ASTC_10x6_SRGB_BLOCK, + VULKAN_FORMAT_ASTC_10x8_UNORM_BLOCK = VK_FORMAT_ASTC_10x8_UNORM_BLOCK, + VULKAN_FORMAT_ASTC_10x8_SRGB_BLOCK = VK_FORMAT_ASTC_10x8_SRGB_BLOCK, + VULKAN_FORMAT_ASTC_10x10_UNORM_BLOCK = VK_FORMAT_ASTC_10x10_UNORM_BLOCK, + VULKAN_FORMAT_ASTC_10x10_SRGB_BLOCK = VK_FORMAT_ASTC_10x10_SRGB_BLOCK, + VULKAN_FORMAT_ASTC_12x10_UNORM_BLOCK = VK_FORMAT_ASTC_12x10_UNORM_BLOCK, + VULKAN_FORMAT_ASTC_12x10_SRGB_BLOCK = VK_FORMAT_ASTC_12x10_SRGB_BLOCK, + VULKAN_FORMAT_ASTC_12x12_UNORM_BLOCK = VK_FORMAT_ASTC_12x12_UNORM_BLOCK, + VULKAN_FORMAT_ASTC_12x12_SRGB_BLOCK = VK_FORMAT_ASTC_12x12_SRGB_BLOCK, +}; + +enum VulkanImageLayout +{ + VULKAN_IMAGE_LAYOUT_UNDEFINED = VK_IMAGE_LAYOUT_UNDEFINED, + VULKAN_IMAGE_LAYOUT_GENERAL = VK_IMAGE_LAYOUT_GENERAL, + VULKAN_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL = + VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, + VULKAN_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL = + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, +}; + +enum VulkanImageUsage +{ + VULKAN_IMAGE_USAGE_TRANSFER_SRC = VK_IMAGE_USAGE_TRANSFER_SRC_BIT, + VULKAN_IMAGE_USAGE_TRANSFER_DST = VK_IMAGE_USAGE_TRANSFER_DST_BIT, + VULKAN_IMAGE_USAGE_SAMPLED = VK_IMAGE_USAGE_SAMPLED_BIT, + VULKAN_IMAGE_USAGE_STORAGE = VK_IMAGE_USAGE_STORAGE_BIT, + VULKAN_IMAGE_USAGE_COLOR_ATTACHMENT = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, + VULKAN_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT = + VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT, + VULKAN_IMAGE_USAGE_TRANSIENT_ATTACHMENT = + VK_IMAGE_USAGE_TRANSIENT_ATTACHMENT_BIT, + VULKAN_IMAGE_USAGE_INPUT_ATTACHMENT = VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT, + VULKAN_IMAGE_USAGE_TRANSFER_SRC_DST = + VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT, + VULKAN_IMAGE_USAGE_STORAGE_TRANSFER_SRC_DST = VULKAN_IMAGE_USAGE_STORAGE + | VULKAN_IMAGE_USAGE_TRANSFER_SRC | VULKAN_IMAGE_USAGE_TRANSFER_DST, + VULKAN_IMAGE_USAGE_SAMPLED_STORAGE_TRANSFER_SRC_DST = + VK_IMAGE_USAGE_SAMPLED_BIT | VULKAN_IMAGE_USAGE_STORAGE + | VULKAN_IMAGE_USAGE_TRANSFER_SRC | VULKAN_IMAGE_USAGE_TRANSFER_DST +}; + +enum VulkanImageTiling +{ + VULKAN_IMAGE_TILING_OPTIMAL = VK_IMAGE_TILING_OPTIMAL, + VULKAN_IMAGE_TILING_LINEAR = VK_IMAGE_TILING_LINEAR +}; + +enum VulkanImageCreateFlag +{ + VULKAN_IMAGE_CREATE_FLAG_NONE = 0, + VULKAN_IMAGE_CREATE_FLAG_MUTABLE_FORMAT = + VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT, + VULKAN_IMAGE_CREATE_FLAG_CUBE_COMPATIBLE = + VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT, + VULKAN_IMAGE_CREATE_FLAG_CUBE_COMPATIBLE_MUTABLE_FORMAT = + VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT | VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT +}; + +enum VulkanImageViewType +{ + VULKAN_IMAGE_VIEW_TYPE_1D = VK_IMAGE_VIEW_TYPE_1D, + VULKAN_IMAGE_VIEW_TYPE_2D = VK_IMAGE_VIEW_TYPE_2D, + VULKAN_IMAGE_VIEW_TYPE_3D = VK_IMAGE_VIEW_TYPE_3D, + VULKAN_IMAGE_VIEW_TYPE_CUBE = VK_IMAGE_VIEW_TYPE_CUBE, + VULKAN_IMAGE_VIEW_TYPE_1D_ARRAY = VK_IMAGE_VIEW_TYPE_1D_ARRAY, + VULKAN_IMAGE_VIEW_TYPE_2D_ARRAY = VK_IMAGE_VIEW_TYPE_2D_ARRAY, + VULKAN_IMAGE_VIEW_TYPE_CUBE_ARRAY = VK_IMAGE_VIEW_TYPE_CUBE_ARRAY, +}; + +#endif // _vulkan_wrapper_types_hpp_ -- cgit v1.2.3 From 1c19a4cbdbcaa9d8a683fed26d883735742b41c9 Mon Sep 17 00:00:00 2001 From: Stuart Brady Date: Tue, 28 Jun 2022 17:05:11 +0100 Subject: Add tests for cl_khr_subgroup_rotate (#1439) Signed-off-by: Stuart Brady --- test_conformance/subgroups/CMakeLists.txt | 1 + test_conformance/subgroups/main.cpp | 3 +- test_conformance/subgroups/procs.h | 4 + .../subgroups/subgroup_common_templates.h | 35 ++++++- test_conformance/subgroups/subhelpers.h | 6 +- .../subgroups/test_subgroup_rotate.cpp | 109 +++++++++++++++++++++ 6 files changed, 155 insertions(+), 3 deletions(-) create mode 100644 test_conformance/subgroups/test_subgroup_rotate.cpp diff --git a/test_conformance/subgroups/CMakeLists.txt b/test_conformance/subgroups/CMakeLists.txt index d48af9cc..1ff249cf 100644 --- a/test_conformance/subgroups/CMakeLists.txt +++ b/test_conformance/subgroups/CMakeLists.txt @@ -15,6 +15,7 @@ set(${MODULE_NAME}_SOURCES test_subgroup_clustered_reduce.cpp test_subgroup_shuffle.cpp test_subgroup_shuffle_relative.cpp + test_subgroup_rotate.cpp ) include(../CMakeCommon.txt) diff --git a/test_conformance/subgroups/main.cpp b/test_conformance/subgroups/main.cpp index ebe94558..a3ae910d 100644 --- a/test_conformance/subgroups/main.cpp +++ b/test_conformance/subgroups/main.cpp @@ -41,7 +41,8 @@ test_definition test_list[] = { ADD_TEST(subgroup_functions_ballot), ADD_TEST(subgroup_functions_clustered_reduce), ADD_TEST(subgroup_functions_shuffle), - ADD_TEST(subgroup_functions_shuffle_relative) + ADD_TEST(subgroup_functions_shuffle_relative), + ADD_TEST(subgroup_functions_rotate), }; const int test_num = ARRAY_SIZE(test_list); diff --git a/test_conformance/subgroups/procs.h b/test_conformance/subgroups/procs.h index d09e8242..d4f51bec 100644 --- a/test_conformance/subgroups/procs.h +++ b/test_conformance/subgroups/procs.h @@ -81,4 +81,8 @@ extern int test_subgroup_functions_shuffle_relative(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements); +extern int test_subgroup_functions_rotate(cl_device_id device, + cl_context context, + cl_command_queue queue, + int num_elements); #endif /*_procs_h*/ diff --git a/test_conformance/subgroups/subgroup_common_templates.h b/test_conformance/subgroups/subgroup_common_templates.h index 0ffa46c8..5051f2e9 100644 --- a/test_conformance/subgroups/subgroup_common_templates.h +++ b/test_conformance/subgroups/subgroup_common_templates.h @@ -501,7 +501,31 @@ template struct SHF l = (((cl_uint)(genrand_int32(gMTdata) & 0x7fffffff) + 1) % (ns * 2 + 1)) - 1; - m[midx] = l; + switch (operation) + { + case ShuffleOp::shuffle: + case ShuffleOp::shuffle_xor: + case ShuffleOp::shuffle_up: + case ShuffleOp::shuffle_down: + // storing information about shuffle index/delta + m[midx] = (cl_int)l; + break; + case ShuffleOp::rotate: + case ShuffleOp::clustered_rotate: + // Storing information about rotate delta. + // The delta must be the same for each thread in + // the subgroup. + if (i == 0) + { + m[midx] = (cl_int)l; + } + else + { + m[midx] = m[midx - 4]; + } + break; + default: break; + } cl_ulong number = genrand_int64(gMTdata); set_value(t[ii + i], number); } @@ -565,6 +589,15 @@ template struct SHF if (l >= ns) skip = true; tr_idx = i + l; break; + // rotate - treat l as delta + case ShuffleOp::rotate: + tr_idx = (i + l) % test_params.subgroup_size; + break; + case ShuffleOp::clustered_rotate: { + tr_idx = ((i & ~(test_params.cluster_size - 1)) + + ((i + l) % test_params.cluster_size)); + break; + } default: break; } diff --git a/test_conformance/subgroups/subhelpers.h b/test_conformance/subgroups/subhelpers.h index 12704db8..a305639a 100644 --- a/test_conformance/subgroups/subhelpers.h +++ b/test_conformance/subgroups/subhelpers.h @@ -251,7 +251,9 @@ enum class ShuffleOp shuffle, shuffle_up, shuffle_down, - shuffle_xor + shuffle_xor, + rotate, + clustered_rotate, }; enum class ArithmeticOp @@ -317,6 +319,8 @@ static const char *const operation_names(ShuffleOp operation) case ShuffleOp::shuffle_up: return "shuffle_up"; case ShuffleOp::shuffle_down: return "shuffle_down"; case ShuffleOp::shuffle_xor: return "shuffle_xor"; + case ShuffleOp::rotate: return "rotate"; + case ShuffleOp::clustered_rotate: return "clustered_rotate"; default: log_error("Unknown operation request"); break; } return ""; diff --git a/test_conformance/subgroups/test_subgroup_rotate.cpp b/test_conformance/subgroups/test_subgroup_rotate.cpp new file mode 100644 index 00000000..db0f48eb --- /dev/null +++ b/test_conformance/subgroups/test_subgroup_rotate.cpp @@ -0,0 +1,109 @@ +// +// Copyright (c) 2022 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +#include "procs.h" +#include "subhelpers.h" +#include "subgroup_common_kernels.h" +#include "subgroup_common_templates.h" +#include "harness/conversions.h" +#include "harness/typeWrappers.h" + +namespace { + +template int run_rotate_for_type(RunTestForType rft) +{ + int error = rft.run_impl>("sub_group_rotate"); + return error; +} + +std::string sub_group_clustered_rotate_source = R"( + __kernel void test_%s(const __global Type *in, __global int4 *xy, __global Type *out, + uint cluster_size) { + Type r; + int gid = get_global_id(0); + XY(xy,gid); + Type x = in[gid]; + int delta = xy[gid].z; + switch (cluster_size) { + case 1: r = %s(x, delta, 1); break; + case 2: r = %s(x, delta, 2); break; + case 4: r = %s(x, delta, 4); break; + case 8: r = %s(x, delta, 8); break; + case 16: r = %s(x, delta, 16); break; + case 32: r = %s(x, delta, 32); break; + case 64: r = %s(x, delta, 64); break; + case 128: r = %s(x, delta, 128); break; + } + out[gid] = r; + } +)"; + +template int run_clustered_rotate_for_type(RunTestForType rft) +{ + int error = rft.run_impl>( + "sub_group_clustered_rotate"); + return error; +} + +} + +int test_subgroup_functions_rotate(cl_device_id device, cl_context context, + cl_command_queue queue, int num_elements) +{ + if (!is_extension_available(device, "cl_khr_subgroup_rotate")) + { + log_info("cl_khr_subgroup_rotate is not supported on this device, " + "skipping test.\n"); + return TEST_SKIPPED_ITSELF; + } + + constexpr size_t global_work_size = 2000; + constexpr size_t local_work_size = 200; + WorkGroupParams test_params(global_work_size, local_work_size); + test_params.save_kernel_source(sub_group_generic_source); + RunTestForType rft(device, context, queue, num_elements, test_params); + + int error = run_rotate_for_type(rft); + error |= run_rotate_for_type(rft); + error |= run_rotate_for_type(rft); + error |= run_rotate_for_type(rft); + error |= run_rotate_for_type(rft); + error |= run_rotate_for_type(rft); + error |= run_rotate_for_type(rft); + error |= run_rotate_for_type(rft); + error |= run_rotate_for_type(rft); + error |= run_rotate_for_type(rft); + error |= run_rotate_for_type(rft); + + WorkGroupParams test_params_clustered(global_work_size, local_work_size, -1, + 3); + test_params_clustered.save_kernel_source(sub_group_clustered_rotate_source); + RunTestForType rft_clustered(device, context, queue, num_elements, + test_params_clustered); + + error |= run_clustered_rotate_for_type(rft_clustered); + error |= run_clustered_rotate_for_type(rft_clustered); + error |= run_clustered_rotate_for_type(rft_clustered); + error |= run_clustered_rotate_for_type(rft_clustered); + error |= run_clustered_rotate_for_type(rft_clustered); + error |= run_clustered_rotate_for_type(rft_clustered); + error |= run_clustered_rotate_for_type(rft_clustered); + error |= run_clustered_rotate_for_type(rft_clustered); + error |= run_clustered_rotate_for_type(rft_clustered); + error |= run_clustered_rotate_for_type(rft_clustered); + error |= run_clustered_rotate_for_type(rft_clustered); + + return error; +} -- cgit v1.2.3 From e3e178676168c171b6d005403c0f1f408b6b4f29 Mon Sep 17 00:00:00 2001 From: Sven van Haastregt Date: Fri, 1 Jul 2022 15:38:42 +0100 Subject: Fix newline in sample_image_pixel_float_offset log (#1446) --- test_common/harness/imageHelpers.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test_common/harness/imageHelpers.cpp b/test_common/harness/imageHelpers.cpp index a254c48f..3dbdffa0 100644 --- a/test_common/harness/imageHelpers.cpp +++ b/test_common/harness/imageHelpers.cpp @@ -1994,7 +1994,7 @@ FloatPixel sample_image_pixel_float_offset( break; case CL_MEM_OBJECT_IMAGE1D: case CL_MEM_OBJECT_IMAGE1D_BUFFER: - log_info("Starting coordinate: %f\b", x); + log_info("Starting coordinate: %f\n", x); break; case CL_MEM_OBJECT_IMAGE2D: log_info("Starting coordinate: %f, %f\n", x, y); -- cgit v1.2.3 From 8d9d1f3e9da069cf5d224025160628ab3911ba00 Mon Sep 17 00:00:00 2001 From: Nikhil Joshi Date: Tue, 5 Jul 2022 22:28:18 +0530 Subject: Fix math tests to allow ftz in relaxed mode. (#1371) * Fix math tests to allow ftz in relaxed mode. In recent spec clarification, it is agreed that ftz is a valid optimization in case of cl-fast-math-relaxed and doesn't require cl-denorms-are-zero to be passed explicitly to enforce ftz behavior for implementations that already support this. GitHub Spec Issue OpenCL-Docs#579 GitHub Spec Issue OpenCL-Docs#597 GitHub CTS Issue OpenCL-CTS#1267 --- test_conformance/math_brute_force/binary_double.cpp | 4 +++- test_conformance/math_brute_force/binary_float.cpp | 4 ++-- test_conformance/math_brute_force/binary_i_double.cpp | 6 +++++- test_conformance/math_brute_force/binary_i_float.cpp | 7 +++++-- test_conformance/math_brute_force/binary_operator_double.cpp | 3 ++- test_conformance/math_brute_force/binary_operator_float.cpp | 6 +++--- test_conformance/math_brute_force/binary_two_results_i_double.cpp | 2 +- test_conformance/math_brute_force/binary_two_results_i_float.cpp | 2 +- test_conformance/math_brute_force/i_unary_double.cpp | 2 +- test_conformance/math_brute_force/i_unary_float.cpp | 2 +- test_conformance/math_brute_force/macro_binary_double.cpp | 8 ++++++-- test_conformance/math_brute_force/macro_binary_float.cpp | 8 ++++++-- test_conformance/math_brute_force/macro_unary_double.cpp | 8 ++++++-- test_conformance/math_brute_force/macro_unary_float.cpp | 8 ++++++-- test_conformance/math_brute_force/ternary_double.cpp | 2 +- test_conformance/math_brute_force/ternary_float.cpp | 2 +- test_conformance/math_brute_force/unary_double.cpp | 3 ++- test_conformance/math_brute_force/unary_float.cpp | 2 +- test_conformance/math_brute_force/unary_two_results_double.cpp | 2 +- test_conformance/math_brute_force/unary_two_results_float.cpp | 4 ++-- test_conformance/math_brute_force/unary_two_results_i_double.cpp | 2 +- test_conformance/math_brute_force/unary_two_results_i_float.cpp | 2 +- test_conformance/math_brute_force/unary_u_double.cpp | 2 +- test_conformance/math_brute_force/unary_u_float.cpp | 2 +- 24 files changed, 60 insertions(+), 33 deletions(-) diff --git a/test_conformance/math_brute_force/binary_double.cpp b/test_conformance/math_brute_force/binary_double.cpp index a2b7d28b..ec8eb300 100644 --- a/test_conformance/math_brute_force/binary_double.cpp +++ b/test_conformance/math_brute_force/binary_double.cpp @@ -297,6 +297,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) float ulps = job->ulps; dptr func = job->f->dfunc; int ftz = job->ftz; + bool relaxedMode = job->relaxedMode; MTdata d = tinfo->d; cl_int error; const char *name = job->f->name; @@ -481,7 +482,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) float err = Bruteforce_Ulp_Error_Double(test, correct); int fail = !(fabsf(err) <= ulps); - if (fail && ftz) + if (fail && (ftz || relaxedMode)) { // retry per section 6.5.3.2 if (IsDoubleResultSubnormal(correct, ulps)) @@ -680,6 +681,7 @@ int TestFunc_Double_Double_Double(const Func *f, MTdata d, bool relaxedMode) test_info.f = f; test_info.ulps = f->double_ulps; test_info.ftz = f->ftz || gForceFTZ; + test_info.relaxedMode = relaxedMode; test_info.isFDim = 0 == strcmp("fdim", f->nameInCode); test_info.skipNanInf = 0; diff --git a/test_conformance/math_brute_force/binary_float.cpp b/test_conformance/math_brute_force/binary_float.cpp index 97712ee8..a706f772 100644 --- a/test_conformance/math_brute_force/binary_float.cpp +++ b/test_conformance/math_brute_force/binary_float.cpp @@ -461,7 +461,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) { // Calculate the correctly rounded reference result memset(&oldMode, 0, sizeof(oldMode)); - if (ftz) ForceFTZ(&oldMode); + if (ftz || relaxedMode) ForceFTZ(&oldMode); // Set the rounding mode to match the device if (gIsInRTZMode) oldRoundMode = set_round(kRoundTowardZero, kfloat); @@ -546,7 +546,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) float err = Ulp_Error(test, correct); int fail = !(fabsf(err) <= ulps); - if (fail && ftz) + if (fail && (ftz || relaxedMode)) { // retry per section 6.5.3.2 if (IsFloatResultSubnormal(correct, ulps)) diff --git a/test_conformance/math_brute_force/binary_i_double.cpp b/test_conformance/math_brute_force/binary_i_double.cpp index f15c21ed..23a729e0 100644 --- a/test_conformance/math_brute_force/binary_i_double.cpp +++ b/test_conformance/math_brute_force/binary_i_double.cpp @@ -164,6 +164,8 @@ struct TestInfo cl_uint scale; // stride between individual test values float ulps; // max_allowed ulps int ftz; // non-zero if running in flush to zero mode + bool relaxedMode; // True if test is running in relaxed mode, false + // otherwise. // no special values }; @@ -300,6 +302,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) float ulps = job->ulps; dptr func = job->f->dfunc; int ftz = job->ftz; + bool relaxedMode = job->relaxedMode; MTdata d = tinfo->d; cl_int error; const char *name = job->f->name; @@ -482,7 +485,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) float err = Bruteforce_Ulp_Error_Double(test, correct); int fail = !(fabsf(err) <= ulps); - if (fail && ftz) + if (fail && (ftz || relaxedMode)) { // retry per section 6.5.3.2 if (IsDoubleResultSubnormal(correct, ulps)) @@ -601,6 +604,7 @@ int TestFunc_Double_Double_Int(const Func *f, MTdata d, bool relaxedMode) test_info.f = f; test_info.ulps = f->double_ulps; test_info.ftz = f->ftz || gForceFTZ; + test_info.relaxedMode = relaxedMode; // cl_kernels aren't thread safe, so we make one for each vector size for // every thread diff --git a/test_conformance/math_brute_force/binary_i_float.cpp b/test_conformance/math_brute_force/binary_i_float.cpp index 9e27b007..0cf7494f 100644 --- a/test_conformance/math_brute_force/binary_i_float.cpp +++ b/test_conformance/math_brute_force/binary_i_float.cpp @@ -162,7 +162,8 @@ struct TestInfo cl_uint scale; // stride between individual test values float ulps; // max_allowed ulps int ftz; // non-zero if running in flush to zero mode - + bool relaxedMode; // True if test is running in relaxed mode, false + // otherwise. // no special values }; @@ -291,6 +292,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) ThreadInfo *tinfo = &(job->tinfo[thread_id]); fptr func = job->f->func; int ftz = job->ftz; + bool relaxedMode = job->relaxedMode; float ulps = job->ulps; MTdata d = tinfo->d; cl_int error; @@ -473,7 +475,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) float err = Ulp_Error(test, correct); int fail = !(fabsf(err) <= ulps); - if (fail && ftz) + if (fail && (ftz || relaxedMode)) { // retry per section 6.5.3.2 if (IsFloatResultSubnormal(correct, ulps)) @@ -595,6 +597,7 @@ int TestFunc_Float_Float_Int(const Func *f, MTdata d, bool relaxedMode) test_info.ulps = gIsEmbedded ? f->float_embedded_ulps : f->float_ulps; test_info.ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities); + test_info.relaxedMode = relaxedMode; // cl_kernels aren't thread safe, so we make one for each vector size for // every thread diff --git a/test_conformance/math_brute_force/binary_operator_double.cpp b/test_conformance/math_brute_force/binary_operator_double.cpp index c407fdaa..f90a4d64 100644 --- a/test_conformance/math_brute_force/binary_operator_double.cpp +++ b/test_conformance/math_brute_force/binary_operator_double.cpp @@ -294,6 +294,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) float ulps = job->ulps; dptr func = job->f->dfunc; int ftz = job->ftz; + bool relaxedMode = job->relaxedMode; MTdata d = tinfo->d; cl_int error; const char *name = job->f->name; @@ -476,7 +477,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) float err = Bruteforce_Ulp_Error_Double(test, correct); int fail = !(fabsf(err) <= ulps); - if (fail && ftz) + if (fail && (ftz || relaxedMode)) { // retry per section 6.5.3.2 if (IsDoubleResultSubnormal(correct, ulps)) diff --git a/test_conformance/math_brute_force/binary_operator_float.cpp b/test_conformance/math_brute_force/binary_operator_float.cpp index 7fbb07c2..535d7209 100644 --- a/test_conformance/math_brute_force/binary_operator_float.cpp +++ b/test_conformance/math_brute_force/binary_operator_float.cpp @@ -456,7 +456,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) // Calculate the correctly rounded reference result FPU_mode_type oldMode; memset(&oldMode, 0, sizeof(oldMode)); - if (ftz) ForceFTZ(&oldMode); + if (ftz || relaxedMode) ForceFTZ(&oldMode); // Set the rounding mode to match the device oldRoundMode = kRoundToNearestEven; @@ -484,7 +484,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) if (gIsInRTZMode) (void)set_round(oldRoundMode, kfloat); - if (ftz) RestoreFPState(&oldMode); + if (ftz || relaxedMode) RestoreFPState(&oldMode); // Read the data back -- no need to wait for the first N-1 buffers but wait // for the last buffer. This is an in order queue. @@ -541,7 +541,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) ((!(fabsf(err) <= ulps)) && (!(fabsf(errB) <= ulps))); if (fabsf(errB) < fabsf(err)) err = errB; - if (fail && ftz) + if (fail && (ftz || relaxedMode)) { // retry per section 6.5.3.2 if (IsFloatResultSubnormal(correct, ulps)) diff --git a/test_conformance/math_brute_force/binary_two_results_i_double.cpp b/test_conformance/math_brute_force/binary_two_results_i_double.cpp index 43dc1d30..be7064e4 100644 --- a/test_conformance/math_brute_force/binary_two_results_i_double.cpp +++ b/test_conformance/math_brute_force/binary_two_results_i_double.cpp @@ -379,7 +379,7 @@ int TestFunc_DoubleI_Double_Double(const Func *f, MTdata d, bool relaxedMode) if (iptrUndefined) iErr = 0; int fail = !(fabsf(err) <= f->double_ulps && iErr == 0); - if (ftz && fail) + if ((ftz || relaxedMode) && fail) { // retry per section 6.5.3.2 if (IsDoubleResultSubnormal(correct, f->double_ulps)) diff --git a/test_conformance/math_brute_force/binary_two_results_i_float.cpp b/test_conformance/math_brute_force/binary_two_results_i_float.cpp index 83ceeaab..901c8598 100644 --- a/test_conformance/math_brute_force/binary_two_results_i_float.cpp +++ b/test_conformance/math_brute_force/binary_two_results_i_float.cpp @@ -379,7 +379,7 @@ int TestFunc_FloatI_Float_Float(const Func *f, MTdata d, bool relaxedMode) if (iptrUndefined) iErr = 0; int fail = !(fabsf(err) <= float_ulps && iErr == 0); - if (ftz && fail) + if ((ftz || relaxedMode) && fail) { // retry per section 6.5.3.2 if (IsFloatResultSubnormal(correct, float_ulps)) diff --git a/test_conformance/math_brute_force/i_unary_double.cpp b/test_conformance/math_brute_force/i_unary_double.cpp index d09e14c1..f07dd78d 100644 --- a/test_conformance/math_brute_force/i_unary_double.cpp +++ b/test_conformance/math_brute_force/i_unary_double.cpp @@ -248,7 +248,7 @@ int TestFunc_Int_Double(const Func *f, MTdata d, bool relaxedMode) // If we aren't getting the correctly rounded result if (t[j] != q[j]) { - if (ftz && IsDoubleSubnormal(s[j])) + if ((ftz || relaxedMode) && IsDoubleSubnormal(s[j])) { unsigned int correct0 = f->dfunc.i_f(0.0); unsigned int correct1 = f->dfunc.i_f(-0.0); diff --git a/test_conformance/math_brute_force/i_unary_float.cpp b/test_conformance/math_brute_force/i_unary_float.cpp index 89b566d9..c38bdcf9 100644 --- a/test_conformance/math_brute_force/i_unary_float.cpp +++ b/test_conformance/math_brute_force/i_unary_float.cpp @@ -245,7 +245,7 @@ int TestFunc_Int_Float(const Func *f, MTdata d, bool relaxedMode) // If we aren't getting the correctly rounded result if (t[j] != q[j]) { - if (ftz && IsFloatSubnormal(s[j])) + if ((ftz || relaxedMode) && IsFloatSubnormal(s[j])) { unsigned int correct0 = f->func.i_f(0.0); unsigned int correct1 = f->func.i_f(-0.0); diff --git a/test_conformance/math_brute_force/macro_binary_double.cpp b/test_conformance/math_brute_force/macro_binary_double.cpp index d3e8071f..bb036a24 100644 --- a/test_conformance/math_brute_force/macro_binary_double.cpp +++ b/test_conformance/math_brute_force/macro_binary_double.cpp @@ -157,6 +157,8 @@ struct TestInfo cl_uint step; // step between each chunk and the next. cl_uint scale; // stride between individual test values int ftz; // non-zero if running in flush to zero mode + bool relaxedMode; // True if test is running in relaxed mode, false + // otherwise. }; // A table of more difficult cases to get right @@ -282,6 +284,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) ThreadInfo *tinfo = &(job->tinfo[thread_id]); dptr dfunc = job->f->dfunc; int ftz = job->ftz; + bool relaxedMode = job->relaxedMode; MTdata d = tinfo->d; cl_int error; const char *name = job->f->name; @@ -455,7 +458,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) if (gMinVectorSizeIndex == 0 && t[j] != q[j]) { // If we aren't getting the correctly rounded result - if (ftz) + if (ftz || relaxedMode) { if (IsDoubleSubnormal(s[j])) { @@ -503,7 +506,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) // If we aren't getting the correctly rounded result if (-t[j] != q[j]) { - if (ftz) + if (ftz || relaxedMode) { if (IsDoubleSubnormal(s[j])) { @@ -607,6 +610,7 @@ int TestMacro_Int_Double_Double(const Func *f, MTdata d, bool relaxedMode) test_info.f = f; test_info.ftz = f->ftz || gForceFTZ; + test_info.relaxedMode = relaxedMode; // cl_kernels aren't thread safe, so we make one for each vector size for // every thread diff --git a/test_conformance/math_brute_force/macro_binary_float.cpp b/test_conformance/math_brute_force/macro_binary_float.cpp index 6c7c8c05..f8cfc9b7 100644 --- a/test_conformance/math_brute_force/macro_binary_float.cpp +++ b/test_conformance/math_brute_force/macro_binary_float.cpp @@ -155,6 +155,8 @@ struct TestInfo cl_uint step; // step between each chunk and the next. cl_uint scale; // stride between individual test values int ftz; // non-zero if running in flush to zero mode + bool relaxedMode; // True if test is running in relaxed mode, false + // otherwise. }; // A table of more difficult cases to get right @@ -272,6 +274,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) ThreadInfo *tinfo = &(job->tinfo[thread_id]); fptr func = job->f->func; int ftz = job->ftz; + bool relaxedMode = job->relaxedMode; MTdata d = tinfo->d; cl_int error; const char *name = job->f->name; @@ -445,7 +448,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) if (gMinVectorSizeIndex == 0 && t[j] != q[j]) { - if (ftz) + if (ftz || relaxedMode) { if (IsFloatSubnormal(s[j])) { @@ -492,7 +495,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) // If we aren't getting the correctly rounded result if (-t[j] != q[j]) { - if (ftz) + if (ftz || relaxedMode) { if (IsFloatSubnormal(s[j])) { @@ -596,6 +599,7 @@ int TestMacro_Int_Float_Float(const Func *f, MTdata d, bool relaxedMode) test_info.f = f; test_info.ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities); + test_info.relaxedMode = relaxedMode; // cl_kernels aren't thread safe, so we make one for each vector size for // every thread diff --git a/test_conformance/math_brute_force/macro_unary_double.cpp b/test_conformance/math_brute_force/macro_unary_double.cpp index 7f3521c6..0e71f8a0 100644 --- a/test_conformance/math_brute_force/macro_unary_double.cpp +++ b/test_conformance/math_brute_force/macro_unary_double.cpp @@ -149,6 +149,8 @@ struct TestInfo cl_uint step; // step between each chunk and the next. cl_uint scale; // stride between individual test values int ftz; // non-zero if running in flush to zero mode + bool relaxedMode; // True if test is running in relaxed mode, false + // otherwise. }; cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) @@ -161,6 +163,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) ThreadInfo *tinfo = &(job->tinfo[thread_id]); dptr dfunc = job->f->dfunc; int ftz = job->ftz; + bool relaxedMode = job->relaxedMode; cl_int error; const char *name = job->f->name; @@ -286,7 +289,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) if (gMinVectorSizeIndex == 0 && t[j] != q[j]) { // If we aren't getting the correctly rounded result - if (ftz) + if (ftz || relaxedMode) { if (IsDoubleSubnormal(s[j])) { @@ -311,7 +314,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) // If we aren't getting the correctly rounded result if (-t[j] != q[j]) { - if (ftz) + if (ftz || relaxedMode) { if (IsDoubleSubnormal(s[j])) { @@ -392,6 +395,7 @@ int TestMacro_Int_Double(const Func *f, MTdata d, bool relaxedMode) test_info.f = f; test_info.ftz = f->ftz || gForceFTZ; + test_info.relaxedMode = relaxedMode; // cl_kernels aren't thread safe, so we make one for each vector size for // every thread diff --git a/test_conformance/math_brute_force/macro_unary_float.cpp b/test_conformance/math_brute_force/macro_unary_float.cpp index 0cd54de4..3b53bdb0 100644 --- a/test_conformance/math_brute_force/macro_unary_float.cpp +++ b/test_conformance/math_brute_force/macro_unary_float.cpp @@ -148,6 +148,8 @@ struct TestInfo cl_uint step; // step between each chunk and the next. cl_uint scale; // stride between individual test values int ftz; // non-zero if running in flush to zero mode + bool relaxedMode; // True if test is running in relaxed mode, false + // otherwise. }; cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) @@ -160,6 +162,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) ThreadInfo *tinfo = &(job->tinfo[thread_id]); fptr func = job->f->func; int ftz = job->ftz; + bool relaxedMode = job->relaxedMode; cl_int error = CL_SUCCESS; cl_int ret = CL_SUCCESS; const char *name = job->f->name; @@ -290,7 +293,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) if (gMinVectorSizeIndex == 0 && t[j] != q[j]) { // If we aren't getting the correctly rounded result - if (ftz) + if (ftz || relaxedMode) { if (IsFloatSubnormal(s[j])) { @@ -316,7 +319,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) // If we aren't getting the correctly rounded result if (-t[j] != q[j]) { - if (ftz) + if (ftz || relaxedMode) { if (IsFloatSubnormal(s[j])) { @@ -406,6 +409,7 @@ int TestMacro_Int_Float(const Func *f, MTdata d, bool relaxedMode) test_info.f = f; test_info.ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities); + test_info.relaxedMode = relaxedMode; // cl_kernels aren't thread safe, so we make one for each vector size for // every thread diff --git a/test_conformance/math_brute_force/ternary_double.cpp b/test_conformance/math_brute_force/ternary_double.cpp index 8af136ac..a3db3353 100644 --- a/test_conformance/math_brute_force/ternary_double.cpp +++ b/test_conformance/math_brute_force/ternary_double.cpp @@ -391,7 +391,7 @@ int TestFunc_Double_Double_Double_Double(const Func *f, MTdata d, float err = Bruteforce_Ulp_Error_Double(test, correct); int fail = !(fabsf(err) <= f->double_ulps); - if (fail && ftz) + if (fail && (ftz || relaxedMode)) { // retry per section 6.5.3.2 if (IsDoubleSubnormal(correct)) diff --git a/test_conformance/math_brute_force/ternary_float.cpp b/test_conformance/math_brute_force/ternary_float.cpp index c69083ad..fdcb48c4 100644 --- a/test_conformance/math_brute_force/ternary_float.cpp +++ b/test_conformance/math_brute_force/ternary_float.cpp @@ -443,7 +443,7 @@ int TestFunc_Float_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode) err = Ulp_Error(test, correct); fail = !(fabsf(err) <= float_ulps); - if (fail && ftz) + if (fail && (ftz || relaxedMode)) { float correct2, err2; diff --git a/test_conformance/math_brute_force/unary_double.cpp b/test_conformance/math_brute_force/unary_double.cpp index 2d455047..3430fe34 100644 --- a/test_conformance/math_brute_force/unary_double.cpp +++ b/test_conformance/math_brute_force/unary_double.cpp @@ -172,6 +172,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) dptr func = job->f->dfunc; cl_int error; int ftz = job->ftz; + bool relaxedMode = job->relaxedMode; Force64BitFPUPrecision(); @@ -305,7 +306,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) if (fail) { - if (ftz) + if (ftz || relaxedMode) { // retry per section 6.5.3.2 if (IsDoubleResultSubnormal(correct, ulps)) diff --git a/test_conformance/math_brute_force/unary_float.cpp b/test_conformance/math_brute_force/unary_float.cpp index 83d27b0b..02a5c2cf 100644 --- a/test_conformance/math_brute_force/unary_float.cpp +++ b/test_conformance/math_brute_force/unary_float.cpp @@ -435,7 +435,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) if (fail) { - if (ftz) + if (ftz || relaxedMode) { typedef int (*CheckForSubnormal)( double, float); // If we are in fast relaxed math, diff --git a/test_conformance/math_brute_force/unary_two_results_double.cpp b/test_conformance/math_brute_force/unary_two_results_double.cpp index 8757fbc4..5556a080 100644 --- a/test_conformance/math_brute_force/unary_two_results_double.cpp +++ b/test_conformance/math_brute_force/unary_two_results_double.cpp @@ -291,7 +291,7 @@ int TestFunc_Double2_Double(const Func *f, MTdata d, bool relaxedMode) float err2 = Bruteforce_Ulp_Error_Double(test2, correct2); int fail = !(fabsf(err) <= f->double_ulps && fabsf(err2) <= f->double_ulps); - if (ftz) + if (ftz || relaxedMode) { // retry per section 6.5.3.2 if (IsDoubleResultSubnormal(correct, f->double_ulps)) diff --git a/test_conformance/math_brute_force/unary_two_results_float.cpp b/test_conformance/math_brute_force/unary_two_results_float.cpp index a54bd024..c95b10d3 100644 --- a/test_conformance/math_brute_force/unary_two_results_float.cpp +++ b/test_conformance/math_brute_force/unary_two_results_float.cpp @@ -258,7 +258,7 @@ int TestFunc_Float2_Float(const Func *f, MTdata d, bool relaxedMode) { // Calculate the correctly rounded reference result memset(&oldMode, 0, sizeof(oldMode)); - if (ftz) ForceFTZ(&oldMode); + if (ftz || relaxedMode) ForceFTZ(&oldMode); // Set the rounding mode to match the device if (gIsInRTZMode) @@ -385,7 +385,7 @@ int TestFunc_Float2_Float(const Func *f, MTdata d, bool relaxedMode) int fail = !(fabsf(err) <= float_ulps && fabsf(err2) <= float_ulps); - if (ftz) + if (ftz || relaxedMode) { // retry per section 6.5.3.2 if ((*isFloatResultSubnormalPtr)(correct, float_ulps)) diff --git a/test_conformance/math_brute_force/unary_two_results_i_double.cpp b/test_conformance/math_brute_force/unary_two_results_i_double.cpp index 9ed77dce..c976061c 100644 --- a/test_conformance/math_brute_force/unary_two_results_i_double.cpp +++ b/test_conformance/math_brute_force/unary_two_results_i_double.cpp @@ -294,7 +294,7 @@ int TestFunc_DoubleI_Double(const Func *f, MTdata d, bool relaxedMode) cl_long iErr = (long long)q2[j] - (long long)correct2; int fail = !(fabsf(err) <= f->double_ulps && abs_cl_long(iErr) <= maxiError); - if (ftz) + if (ftz || relaxedMode) { // retry per section 6.5.3.2 if (IsDoubleResultSubnormal(correct, f->double_ulps)) diff --git a/test_conformance/math_brute_force/unary_two_results_i_float.cpp b/test_conformance/math_brute_force/unary_two_results_i_float.cpp index d048220b..7a3cd981 100644 --- a/test_conformance/math_brute_force/unary_two_results_i_float.cpp +++ b/test_conformance/math_brute_force/unary_two_results_i_float.cpp @@ -297,7 +297,7 @@ int TestFunc_FloatI_Float(const Func *f, MTdata d, bool relaxedMode) cl_long iErr = (int64_t)q2[j] - (int64_t)correct2; int fail = !(fabsf(err) <= float_ulps && abs_cl_long(iErr) <= maxiError); - if (ftz) + if (ftz || relaxedMode) { // retry per section 6.5.3.2 if (IsFloatResultSubnormal(correct, float_ulps)) diff --git a/test_conformance/math_brute_force/unary_u_double.cpp b/test_conformance/math_brute_force/unary_u_double.cpp index 9478d0bc..621ee6bb 100644 --- a/test_conformance/math_brute_force/unary_u_double.cpp +++ b/test_conformance/math_brute_force/unary_u_double.cpp @@ -249,7 +249,7 @@ int TestFunc_Double_ULong(const Func *f, MTdata d, bool relaxedMode) if (fail) { - if (ftz) + if (ftz || relaxedMode) { // retry per section 6.5.3.2 if (IsDoubleResultSubnormal(correct, diff --git a/test_conformance/math_brute_force/unary_u_float.cpp b/test_conformance/math_brute_force/unary_u_float.cpp index 848a9bac..0eae2e54 100644 --- a/test_conformance/math_brute_force/unary_u_float.cpp +++ b/test_conformance/math_brute_force/unary_u_float.cpp @@ -253,7 +253,7 @@ int TestFunc_Float_UInt(const Func *f, MTdata d, bool relaxedMode) if (fail) { - if (ftz) + if (ftz || relaxedMode) { // retry per section 6.5.3.2 if (IsFloatResultSubnormal(correct, float_ulps)) -- cgit v1.2.3 From a37884fe4461362c39a444f39402baebac3e713b Mon Sep 17 00:00:00 2001 From: Callum Fare Date: Tue, 19 Jul 2022 17:43:36 +0100 Subject: Update cl_khr_extended_async_copies tests to the latest extension version (#1426) * Update cl_khr_extended_async_copies tests to the latest version of the extension Update the 2D and 3D extended async copies tests. Previously they were based on an older provisional version of the extension. Also update the variable names to only use 'stride' to refer to the actual stride values. Previously the tests used 'stride' to refer to the end of one line or plane and the start of the next. This is not the commonly understood meaning. * Address cl_khr_extended_async_copies PR feedback * Remove unnecessary parenthesis in kernel code * Make variables `const` and rearrange so that we can reuse variables, rather than repeating expressions. * Add in missing vector size of 3 for 2D tests * Use C++ String literals for kernel code Rather than C strings use C++11 string literals to define the kernel code in the extended async-copy tests. Doing this makes the kernel code more readable. Co-authored-by: Ewan Crawford --- test_conformance/basic/test_async_copy2D.cpp | 238 ++++++++++--------- test_conformance/basic/test_async_copy3D.cpp | 331 +++++++++++++-------------- 2 files changed, 289 insertions(+), 280 deletions(-) diff --git a/test_conformance/basic/test_async_copy2D.cpp b/test_conformance/basic/test_async_copy2D.cpp index fafcac83..54633a31 100644 --- a/test_conformance/basic/test_async_copy2D.cpp +++ b/test_conformance/basic/test_async_copy2D.cpp @@ -25,77 +25,81 @@ #include "../../test_common/harness/conversions.h" #include "procs.h" -static const char *async_global_to_local_kernel2D = - "#pragma OPENCL EXTENSION cl_khr_extended_async_copies : enable\n" - "%s\n" // optional pragma string - "__kernel void test_fn( const __global %s *src, __global %s *dst, __local " - "%s *localBuffer, int numElementsPerLine, int lineCopiesPerWorkgroup, int " - "lineCopiesPerWorkItem, int srcStride, int dstStride )\n" - "{\n" - " int i, j;\n" - // Zero the local storage first - " for(i=0; i max_local_workgroup_size[0]) max_workgroup_size = max_local_workgroup_size[0]; - size_t numElementsPerLine = 10; - size_t lineCopiesPerWorkItem = 13; + const size_t numElementsPerLine = 10; + const cl_int dstStride = numElementsPerLine + dstMargin; + const cl_int srcStride = numElementsPerLine + srcMargin; + elementSize = get_explicit_type_size(vecType) * ((vecSize == 3) ? 4 : vecSize); - size_t localStorageSpacePerWorkitem = lineCopiesPerWorkItem * elementSize - * (numElementsPerLine + (localIsDst ? dstStride : srcStride)); + + const size_t lineCopiesPerWorkItem = 13; + const size_t localStorageSpacePerWorkitem = lineCopiesPerWorkItem + * elementSize * (localIsDst ? dstStride : srcStride); + size_t maxLocalWorkgroupSize = (((int)max_local_mem_size / 2) / localStorageSpacePerWorkitem); @@ -199,34 +208,39 @@ int test_copy2D(cl_device_id deviceID, cl_context context, if (maxLocalWorkgroupSize > max_workgroup_size) localWorkgroupSize = max_workgroup_size; - size_t maxTotalLinesIn = (max_alloc_size / elementSize + srcStride) - / (numElementsPerLine + srcStride); - size_t maxTotalLinesOut = (max_alloc_size / elementSize + dstStride) - / (numElementsPerLine + dstStride); - size_t maxTotalLines = std::min(maxTotalLinesIn, maxTotalLinesOut); - size_t maxLocalWorkgroups = + + const size_t maxTotalLinesIn = + (max_alloc_size / elementSize + srcMargin) / srcStride; + const size_t maxTotalLinesOut = + (max_alloc_size / elementSize + dstMargin) / dstStride; + const size_t maxTotalLines = std::min(maxTotalLinesIn, maxTotalLinesOut); + const size_t maxLocalWorkgroups = maxTotalLines / (localWorkgroupSize * lineCopiesPerWorkItem); - size_t localBufferSize = localWorkgroupSize * localStorageSpacePerWorkitem - - (localIsDst ? dstStride : srcStride); - size_t numberOfLocalWorkgroups = std::min(1111, (int)maxLocalWorkgroups); - size_t totalLines = + const size_t localBufferSize = + localWorkgroupSize * localStorageSpacePerWorkitem + - (localIsDst ? dstMargin : srcMargin); + const size_t numberOfLocalWorkgroups = + std::min(1111, (int)maxLocalWorkgroups); + const size_t totalLines = numberOfLocalWorkgroups * localWorkgroupSize * lineCopiesPerWorkItem; - size_t inBufferSize = elementSize - * (totalLines * numElementsPerLine + (totalLines - 1) * srcStride); - size_t outBufferSize = elementSize - * (totalLines * numElementsPerLine + (totalLines - 1) * dstStride); - size_t globalWorkgroupSize = numberOfLocalWorkgroups * localWorkgroupSize; + const size_t inBufferSize = elementSize + * (totalLines * numElementsPerLine + (totalLines - 1) * srcMargin); + const size_t outBufferSize = elementSize + * (totalLines * numElementsPerLine + (totalLines - 1) * dstMargin); + const size_t globalWorkgroupSize = + numberOfLocalWorkgroups * localWorkgroupSize; inBuffer = (void *)malloc(inBufferSize); outBuffer = (void *)malloc(outBufferSize); outBufferCopy = (void *)malloc(outBufferSize); - cl_int lineCopiesPerWorkItemInt, numElementsPerLineInt, - lineCopiesPerWorkgroup; - lineCopiesPerWorkItemInt = (int)lineCopiesPerWorkItem; - numElementsPerLineInt = (int)numElementsPerLine; - lineCopiesPerWorkgroup = (int)(lineCopiesPerWorkItem * localWorkgroupSize); + const cl_int lineCopiesPerWorkItemInt = + static_cast(lineCopiesPerWorkItem); + const cl_int numElementsPerLineInt = + static_cast(numElementsPerLine); + const cl_int lineCopiesPerWorkgroup = + static_cast(lineCopiesPerWorkItem * localWorkgroupSize); log_info( "Global: %d, local %d, local buffer %db, global in buffer %db, " @@ -296,8 +310,8 @@ int test_copy2D(cl_device_id deviceID, cl_context context, for (int j = 0; j < (int)numElementsPerLine * elementSize; j += elementSize) { - int inIdx = i * (numElementsPerLine + srcStride) + j; - int outIdx = i * (numElementsPerLine + dstStride) + j; + int inIdx = i * srcStride + j; + int outIdx = i * dstStride + j; if (memcmp(((char *)inBuffer) + inIdx, ((char *)outBuffer) + outIdx, typeSize) != 0) @@ -332,11 +346,10 @@ int test_copy2D(cl_device_id deviceID, cl_context context, if (i < (int)(globalWorkgroupSize * lineCopiesPerWorkItem - 1) * elementSize) { - int outIdx = i * (numElementsPerLine + dstStride) - + numElementsPerLine * elementSize; + int outIdx = i * dstStride + numElementsPerLine * elementSize; if (memcmp(((char *)outBuffer) + outIdx, ((char *)outBufferCopy) + outIdx, - dstStride * elementSize) + dstMargin * elementSize) != 0) { if (failuresPrinted == 0) @@ -373,9 +386,12 @@ int test_copy2D_all_types(cl_device_id deviceID, cl_context context, kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kFloat, kDouble, kNumExplicitTypes }; + // The margins below represent the number of elements between the end of + // one line and the start of the next. The strides are equivalent to the + // length of the line plus the chosen margin. unsigned int vecSizes[] = { 1, 2, 3, 4, 8, 16, 0 }; - unsigned int smallTypesStrideSizes[] = { 0, 10, 100 }; - unsigned int size, typeIndex, srcStride, dstStride; + unsigned int smallTypesMarginSizes[] = { 0, 10, 100 }; + unsigned int size, typeIndex, srcMargin, dstMargin; int errors = 0; @@ -401,19 +417,19 @@ int test_copy2D_all_types(cl_device_id deviceID, cl_context context, if (get_explicit_type_size(vecType[typeIndex]) * vecSizes[size] <= 2) // small type { - for (srcStride = 0; srcStride < sizeof(smallTypesStrideSizes) - / sizeof(smallTypesStrideSizes[0]); - srcStride++) + for (srcMargin = 0; srcMargin < sizeof(smallTypesMarginSizes) + / sizeof(smallTypesMarginSizes[0]); + srcMargin++) { - for (dstStride = 0; - dstStride < sizeof(smallTypesStrideSizes) - / sizeof(smallTypesStrideSizes[0]); - dstStride++) + for (dstMargin = 0; + dstMargin < sizeof(smallTypesMarginSizes) + / sizeof(smallTypesMarginSizes[0]); + dstMargin++) { if (test_copy2D(deviceID, context, queue, kernelCode, vecType[typeIndex], vecSizes[size], - smallTypesStrideSizes[srcStride], - smallTypesStrideSizes[dstStride], + smallTypesMarginSizes[srcMargin], + smallTypesMarginSizes[dstMargin], localIsDst)) { errors++; diff --git a/test_conformance/basic/test_async_copy3D.cpp b/test_conformance/basic/test_async_copy3D.cpp index 2b184ee5..5eb41ebc 100644 --- a/test_conformance/basic/test_async_copy3D.cpp +++ b/test_conformance/basic/test_async_copy3D.cpp @@ -25,96 +25,95 @@ #include "../../test_common/harness/conversions.h" #include "procs.h" -static const char *async_global_to_local_kernel3D = - "#pragma OPENCL EXTENSION cl_khr_extended_async_copies : enable\n" - "%s\n" // optional pragma string - "__kernel void test_fn( const __global %s *src, __global %s *dst, __local " - "%s *localBuffer, int numElementsPerLine, int numLines, int " - "planesCopiesPerWorkgroup, int planesCopiesPerWorkItem, int srcLineStride, " - "int dstLineStride, int srcPlaneStride, int dstPlaneStride )\n" - "{\n" - " int i, j, k;\n" - // Zero the local storage first - " for(i=0; i max_local_workgroup_size[0]) max_workgroup_size = max_local_workgroup_size[0]; - size_t numElementsPerLine = 10; - size_t numLines = 13; - size_t planesCopiesPerWorkItem = 2; + const size_t numElementsPerLine = 10; + const cl_int dstLineStride = numElementsPerLine + dstLineMargin; + const cl_int srcLineStride = numElementsPerLine + srcLineMargin; + + const size_t numLines = 13; + const cl_int dstPlaneStride = (numLines * dstLineStride) + dstPlaneMargin; + const cl_int srcPlaneStride = (numLines * srcLineStride) + srcPlaneMargin; + elementSize = get_explicit_type_size(vecType) * ((vecSize == 3) ? 4 : vecSize); - size_t localStorageSpacePerWorkitem = elementSize - * (planesCopiesPerWorkItem - * (numLines * numElementsPerLine - + numLines * (localIsDst ? dstLineStride : srcLineStride) - + (localIsDst ? dstPlaneStride : srcPlaneStride))); + const size_t planesCopiesPerWorkItem = 2; + const size_t localStorageSpacePerWorkitem = elementSize + * planesCopiesPerWorkItem + * (localIsDst ? dstPlaneStride : srcPlaneStride); size_t maxLocalWorkgroupSize = (((int)max_local_mem_size / 2) / localStorageSpacePerWorkitem); @@ -224,42 +227,41 @@ int test_copy3D(cl_device_id deviceID, cl_context context, if (maxLocalWorkgroupSize > max_workgroup_size) localWorkgroupSize = max_workgroup_size; - size_t maxTotalPlanesIn = ((max_alloc_size / elementSize) + srcPlaneStride) - / ((numLines * numElementsPerLine + numLines * srcLineStride) - + srcPlaneStride); - size_t maxTotalPlanesOut = ((max_alloc_size / elementSize) + dstPlaneStride) - / ((numLines * numElementsPerLine + numLines * dstLineStride) - + dstPlaneStride); - size_t maxTotalPlanes = std::min(maxTotalPlanesIn, maxTotalPlanesOut); - size_t maxLocalWorkgroups = + const size_t maxTotalPlanesIn = + ((max_alloc_size / elementSize) + srcPlaneMargin) / srcPlaneStride; + const size_t maxTotalPlanesOut = + ((max_alloc_size / elementSize) + dstPlaneMargin) / dstPlaneStride; + const size_t maxTotalPlanes = std::min(maxTotalPlanesIn, maxTotalPlanesOut); + const size_t maxLocalWorkgroups = maxTotalPlanes / (localWorkgroupSize * planesCopiesPerWorkItem); - size_t localBufferSize = localWorkgroupSize * localStorageSpacePerWorkitem - - (localIsDst ? dstPlaneStride : srcPlaneStride); - size_t numberOfLocalWorkgroups = std::min(1111, (int)maxLocalWorkgroups); - size_t totalPlanes = + const size_t localBufferSize = + localWorkgroupSize * localStorageSpacePerWorkitem + - (localIsDst ? dstPlaneMargin : srcPlaneMargin); + const size_t numberOfLocalWorkgroups = + std::min(1111, (int)maxLocalWorkgroups); + const size_t totalPlanes = numberOfLocalWorkgroups * localWorkgroupSize * planesCopiesPerWorkItem; - size_t inBufferSize = elementSize - * (totalPlanes - * (numLines * numElementsPerLine + numLines * srcLineStride) - + (totalPlanes - 1) * srcPlaneStride); - size_t outBufferSize = elementSize - * (totalPlanes - * (numLines * numElementsPerLine + numLines * dstLineStride) - + (totalPlanes - 1) * dstPlaneStride); - size_t globalWorkgroupSize = numberOfLocalWorkgroups * localWorkgroupSize; + const size_t inBufferSize = elementSize + * (totalPlanes * numLines * srcLineStride + + (totalPlanes - 1) * srcPlaneMargin); + const size_t outBufferSize = elementSize + * (totalPlanes * numLines * dstLineStride + + (totalPlanes - 1) * dstPlaneMargin); + const size_t globalWorkgroupSize = + numberOfLocalWorkgroups * localWorkgroupSize; inBuffer = (void *)malloc(inBufferSize); outBuffer = (void *)malloc(outBufferSize); outBufferCopy = (void *)malloc(outBufferSize); - cl_int planesCopiesPerWorkItemInt, numElementsPerLineInt, numLinesInt, - planesCopiesPerWorkgroup; - planesCopiesPerWorkItemInt = (int)planesCopiesPerWorkItem; - numElementsPerLineInt = (int)numElementsPerLine; - numLinesInt = (int)numLines; - planesCopiesPerWorkgroup = - (int)(planesCopiesPerWorkItem * localWorkgroupSize); + const cl_int planesCopiesPerWorkItemInt = + static_cast(planesCopiesPerWorkItem); + const cl_int numElementsPerLineInt = + static_cast(numElementsPerLine); + const cl_int numLinesInt = static_cast(numLines); + const cl_int planesCopiesPerWorkgroup = + static_cast(planesCopiesPerWorkItem * localWorkgroupSize); log_info("Global: %d, local %d, local buffer %db, global in buffer %db, " "global out buffer %db, each work group will copy %d planes and " @@ -336,14 +338,8 @@ int test_copy3D(cl_device_id deviceID, cl_context context, for (int k = 0; k < (int)numElementsPerLine * elementSize; k += elementSize) { - int inIdx = i - * (numLines * numElementsPerLine - + numLines * srcLineStride + srcPlaneStride) - + j * (numElementsPerLine + srcLineStride) + k; - int outIdx = i - * (numLines * numElementsPerLine - + numLines * dstLineStride + dstPlaneStride) - + j * (numElementsPerLine + dstLineStride) + k; + int inIdx = i * srcPlaneStride + j * srcLineStride + k; + int outIdx = i * dstPlaneStride + j * dstLineStride + k; if (memcmp(((char *)inBuffer) + inIdx, ((char *)outBuffer) + outIdx, typeSize) != 0) @@ -378,14 +374,11 @@ int test_copy3D(cl_device_id deviceID, cl_context context, } if (j < (int)numLines * elementSize) { - int outIdx = i - * (numLines * numElementsPerLine - + numLines * dstLineStride + dstPlaneStride) - + j * (numElementsPerLine + dstLineStride) + int outIdx = i * dstPlaneStride + j * dstLineStride + numElementsPerLine * elementSize; if (memcmp(((char *)outBuffer) + outIdx, ((char *)outBufferCopy) + outIdx, - dstLineStride * elementSize) + dstLineMargin * elementSize) != 0) { if (failuresPrinted == 0) @@ -409,14 +402,11 @@ int test_copy3D(cl_device_id deviceID, cl_context context, if (i < (int)(globalWorkgroupSize * planesCopiesPerWorkItem - 1) * elementSize) { - int outIdx = i - * (numLines * numElementsPerLine + numLines * dstLineStride - + dstPlaneStride) - + (numLines * elementSize) * (numElementsPerLine) - + (numLines * elementSize) * (dstLineStride); + int outIdx = + i * dstPlaneStride + numLines * dstLineStride * elementSize; if (memcmp(((char *)outBuffer) + outIdx, ((char *)outBufferCopy) + outIdx, - dstPlaneStride * elementSize) + dstPlaneMargin * elementSize) != 0) { if (failuresPrinted == 0) @@ -453,10 +443,13 @@ int test_copy3D_all_types(cl_device_id deviceID, cl_context context, kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kFloat, kDouble, kNumExplicitTypes }; + // The margins below represent the number of elements between the end of + // one line or plane and the start of the next. The strides are equivalent + // to the size of the line or plane plus the chosen margin. unsigned int vecSizes[] = { 1, 2, 3, 4, 8, 16, 0 }; - unsigned int smallTypesStrideSizes[] = { 0, 10, 100 }; - unsigned int size, typeIndex, srcLineStride, dstLineStride, srcPlaneStride, - dstPlaneStride; + unsigned int smallTypesMarginSizes[] = { 0, 10, 100 }; + unsigned int size, typeIndex, srcLineMargin, dstLineMargin, srcPlaneMargin, + dstPlaneMargin; int errors = 0; @@ -482,33 +475,33 @@ int test_copy3D_all_types(cl_device_id deviceID, cl_context context, if (get_explicit_type_size(vecType[typeIndex]) * vecSizes[size] <= 2) // small type { - for (srcLineStride = 0; - srcLineStride < sizeof(smallTypesStrideSizes) - / sizeof(smallTypesStrideSizes[0]); - srcLineStride++) + for (srcLineMargin = 0; + srcLineMargin < sizeof(smallTypesMarginSizes) + / sizeof(smallTypesMarginSizes[0]); + srcLineMargin++) { - for (dstLineStride = 0; - dstLineStride < sizeof(smallTypesStrideSizes) - / sizeof(smallTypesStrideSizes[0]); - dstLineStride++) + for (dstLineMargin = 0; + dstLineMargin < sizeof(smallTypesMarginSizes) + / sizeof(smallTypesMarginSizes[0]); + dstLineMargin++) { - for (srcPlaneStride = 0; - srcPlaneStride < sizeof(smallTypesStrideSizes) - / sizeof(smallTypesStrideSizes[0]); - srcPlaneStride++) + for (srcPlaneMargin = 0; + srcPlaneMargin < sizeof(smallTypesMarginSizes) + / sizeof(smallTypesMarginSizes[0]); + srcPlaneMargin++) { - for (dstPlaneStride = 0; - dstPlaneStride < sizeof(smallTypesStrideSizes) - / sizeof(smallTypesStrideSizes[0]); - dstPlaneStride++) + for (dstPlaneMargin = 0; + dstPlaneMargin < sizeof(smallTypesMarginSizes) + / sizeof(smallTypesMarginSizes[0]); + dstPlaneMargin++) { if (test_copy3D( deviceID, context, queue, kernelCode, vecType[typeIndex], vecSizes[size], - smallTypesStrideSizes[srcLineStride], - smallTypesStrideSizes[dstLineStride], - smallTypesStrideSizes[srcPlaneStride], - smallTypesStrideSizes[dstPlaneStride], + smallTypesMarginSizes[srcLineMargin], + smallTypesMarginSizes[dstLineMargin], + smallTypesMarginSizes[srcPlaneMargin], + smallTypesMarginSizes[dstPlaneMargin], localIsDst)) { errors++; -- cgit v1.2.3 From 2cf24e63b7f1c2817a2d37b02612185c59f05faf Mon Sep 17 00:00:00 2001 From: Sven van Haastregt Date: Thu, 21 Jul 2022 18:54:20 +0100 Subject: Fix function name in error messages (#1450) Signed-off-by: Sven van Haastregt --- test_conformance/math_brute_force/binary_double.cpp | 3 ++- test_conformance/math_brute_force/binary_float.cpp | 3 ++- test_conformance/math_brute_force/binary_i_double.cpp | 3 ++- test_conformance/math_brute_force/binary_i_float.cpp | 3 ++- test_conformance/math_brute_force/binary_operator_double.cpp | 3 ++- test_conformance/math_brute_force/binary_operator_float.cpp | 3 ++- test_conformance/math_brute_force/macro_binary_double.cpp | 3 ++- test_conformance/math_brute_force/macro_binary_float.cpp | 3 ++- test_conformance/math_brute_force/macro_unary_double.cpp | 3 ++- test_conformance/math_brute_force/macro_unary_float.cpp | 3 ++- test_conformance/math_brute_force/unary_double.cpp | 3 ++- test_conformance/math_brute_force/unary_float.cpp | 3 ++- 12 files changed, 24 insertions(+), 12 deletions(-) diff --git a/test_conformance/math_brute_force/binary_double.cpp b/test_conformance/math_brute_force/binary_double.cpp index ec8eb300..e987774a 100644 --- a/test_conformance/math_brute_force/binary_double.cpp +++ b/test_conformance/math_brute_force/binary_double.cpp @@ -400,7 +400,8 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j], out[j], 0, NULL, NULL))) { - vlog_error("Error: clEnqueueMapBuffer failed! err: %d\n", error); + vlog_error("Error: clEnqueueUnmapMemObject failed! err: %d\n", + error); goto exit; } diff --git a/test_conformance/math_brute_force/binary_float.cpp b/test_conformance/math_brute_force/binary_float.cpp index a706f772..e8baccdc 100644 --- a/test_conformance/math_brute_force/binary_float.cpp +++ b/test_conformance/math_brute_force/binary_float.cpp @@ -404,7 +404,8 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j], out[j], 0, NULL, NULL))) { - vlog_error("Error: clEnqueueMapBuffer failed! err: %d\n", error); + vlog_error("Error: clEnqueueUnmapMemObject failed! err: %d\n", + error); goto exit; } diff --git a/test_conformance/math_brute_force/binary_i_double.cpp b/test_conformance/math_brute_force/binary_i_double.cpp index 23a729e0..3c4ef4a4 100644 --- a/test_conformance/math_brute_force/binary_i_double.cpp +++ b/test_conformance/math_brute_force/binary_i_double.cpp @@ -403,7 +403,8 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j], out[j], 0, NULL, NULL))) { - vlog_error("Error: clEnqueueMapBuffer failed! err: %d\n", error); + vlog_error("Error: clEnqueueUnmapMemObject failed! err: %d\n", + error); goto exit; } diff --git a/test_conformance/math_brute_force/binary_i_float.cpp b/test_conformance/math_brute_force/binary_i_float.cpp index 0cf7494f..dcda5f82 100644 --- a/test_conformance/math_brute_force/binary_i_float.cpp +++ b/test_conformance/math_brute_force/binary_i_float.cpp @@ -393,7 +393,8 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j], out[j], 0, NULL, NULL))) { - vlog_error("Error: clEnqueueMapBuffer failed! err: %d\n", error); + vlog_error("Error: clEnqueueUnmapMemObject failed! err: %d\n", + error); goto exit; } diff --git a/test_conformance/math_brute_force/binary_operator_double.cpp b/test_conformance/math_brute_force/binary_operator_double.cpp index f90a4d64..4661f240 100644 --- a/test_conformance/math_brute_force/binary_operator_double.cpp +++ b/test_conformance/math_brute_force/binary_operator_double.cpp @@ -395,7 +395,8 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j], out[j], 0, NULL, NULL))) { - vlog_error("Error: clEnqueueMapBuffer failed! err: %d\n", error); + vlog_error("Error: clEnqueueUnmapMemObject failed! err: %d\n", + error); goto exit; } diff --git a/test_conformance/math_brute_force/binary_operator_float.cpp b/test_conformance/math_brute_force/binary_operator_float.cpp index 535d7209..7a239963 100644 --- a/test_conformance/math_brute_force/binary_operator_float.cpp +++ b/test_conformance/math_brute_force/binary_operator_float.cpp @@ -407,7 +407,8 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j], out[j], 0, NULL, NULL))) { - vlog_error("Error: clEnqueueMapBuffer failed! err: %d\n", error); + vlog_error("Error: clEnqueueUnmapMemObject failed! err: %d\n", + error); goto exit; } diff --git a/test_conformance/math_brute_force/macro_binary_double.cpp b/test_conformance/math_brute_force/macro_binary_double.cpp index bb036a24..a6f65ac4 100644 --- a/test_conformance/math_brute_force/macro_binary_double.cpp +++ b/test_conformance/math_brute_force/macro_binary_double.cpp @@ -383,7 +383,8 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j], out[j], 0, NULL, NULL))) { - vlog_error("Error: clEnqueueMapBuffer failed! err: %d\n", error); + vlog_error("Error: clEnqueueUnmapMemObject failed! err: %d\n", + error); goto exit; } diff --git a/test_conformance/math_brute_force/macro_binary_float.cpp b/test_conformance/math_brute_force/macro_binary_float.cpp index f8cfc9b7..3fe02629 100644 --- a/test_conformance/math_brute_force/macro_binary_float.cpp +++ b/test_conformance/math_brute_force/macro_binary_float.cpp @@ -375,7 +375,8 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j], out[j], 0, NULL, NULL))) { - vlog_error("Error: clEnqueueMapBuffer failed! err: %d\n", error); + vlog_error("Error: clEnqueueUnmapMemObject failed! err: %d\n", + error); goto exit; } diff --git a/test_conformance/math_brute_force/macro_unary_double.cpp b/test_conformance/math_brute_force/macro_unary_double.cpp index 0e71f8a0..c44ebe21 100644 --- a/test_conformance/math_brute_force/macro_unary_double.cpp +++ b/test_conformance/math_brute_force/macro_unary_double.cpp @@ -221,7 +221,8 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j], out[j], 0, NULL, NULL))) { - vlog_error("Error: clEnqueueMapBuffer failed! err: %d\n", error); + vlog_error("Error: clEnqueueUnmapMemObject failed! err: %d\n", + error); return error; } diff --git a/test_conformance/math_brute_force/macro_unary_float.cpp b/test_conformance/math_brute_force/macro_unary_float.cpp index 3b53bdb0..d9d79094 100644 --- a/test_conformance/math_brute_force/macro_unary_float.cpp +++ b/test_conformance/math_brute_force/macro_unary_float.cpp @@ -223,7 +223,8 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j], out[j], 0, NULL, NULL))) { - vlog_error("Error: clEnqueueMapBuffer failed! err: %d\n", error); + vlog_error("Error: clEnqueueUnmapMemObject failed! err: %d\n", + error); return error; } diff --git a/test_conformance/math_brute_force/unary_double.cpp b/test_conformance/math_brute_force/unary_double.cpp index 3430fe34..3b91b4cd 100644 --- a/test_conformance/math_brute_force/unary_double.cpp +++ b/test_conformance/math_brute_force/unary_double.cpp @@ -228,7 +228,8 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j], out[j], 0, NULL, NULL))) { - vlog_error("Error: clEnqueueMapBuffer failed! err: %d\n", error); + vlog_error("Error: clEnqueueUnmapMemObject failed! err: %d\n", + error); return error; } diff --git a/test_conformance/math_brute_force/unary_float.cpp b/test_conformance/math_brute_force/unary_float.cpp index 02a5c2cf..e5576e7e 100644 --- a/test_conformance/math_brute_force/unary_float.cpp +++ b/test_conformance/math_brute_force/unary_float.cpp @@ -256,7 +256,8 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j], out[j], 0, NULL, NULL))) { - vlog_error("Error: clEnqueueMapBuffer failed! err: %d\n", error); + vlog_error("Error: clEnqueueUnmapMemObject failed! err: %d\n", + error); return error; } -- cgit v1.2.3 From 0a5a8f90c96d6456f7c163af9c183fda0ed7af0d Mon Sep 17 00:00:00 2001 From: Sven van Haastregt Date: Thu, 28 Jul 2022 13:33:16 +0100 Subject: Use clProgramWrapper in math_brute_force (#1451) Simplify code by avoiding manual resource management. This allows removing clReleaseProgram from `MakeKernels` to reduce behavioral differences between `MakeKernels` and `MakeKernel`. Original patch by Marco Antognini. Signed-off-by: Marco Antognini Signed-off-by: Sven van Haastregt --- test_conformance/math_brute_force/binary_double.cpp | 9 +++++---- test_conformance/math_brute_force/binary_float.cpp | 9 +++++---- test_conformance/math_brute_force/binary_i_double.cpp | 9 +++++---- test_conformance/math_brute_force/binary_i_float.cpp | 9 +++++---- test_conformance/math_brute_force/binary_operator_double.cpp | 9 +++++---- test_conformance/math_brute_force/binary_operator_float.cpp | 9 +++++---- .../math_brute_force/binary_two_results_i_double.cpp | 8 ++++---- test_conformance/math_brute_force/binary_two_results_i_float.cpp | 8 ++++---- test_conformance/math_brute_force/common.h | 4 ++++ test_conformance/math_brute_force/i_unary_double.cpp | 8 ++++---- test_conformance/math_brute_force/i_unary_float.cpp | 8 ++++---- test_conformance/math_brute_force/macro_binary_double.cpp | 9 +++++---- test_conformance/math_brute_force/macro_binary_float.cpp | 9 +++++---- test_conformance/math_brute_force/macro_unary_double.cpp | 9 +++++---- test_conformance/math_brute_force/macro_unary_float.cpp | 9 +++++---- test_conformance/math_brute_force/mad_double.cpp | 8 ++++---- test_conformance/math_brute_force/mad_float.cpp | 8 ++++---- test_conformance/math_brute_force/main.cpp | 1 - test_conformance/math_brute_force/ternary_double.cpp | 8 ++++---- test_conformance/math_brute_force/ternary_float.cpp | 8 ++++---- test_conformance/math_brute_force/unary_double.cpp | 9 +++++---- test_conformance/math_brute_force/unary_float.cpp | 9 +++++---- test_conformance/math_brute_force/unary_two_results_double.cpp | 8 ++++---- test_conformance/math_brute_force/unary_two_results_float.cpp | 8 ++++---- test_conformance/math_brute_force/unary_two_results_i_double.cpp | 8 ++++---- test_conformance/math_brute_force/unary_two_results_i_float.cpp | 8 ++++---- test_conformance/math_brute_force/unary_u_double.cpp | 8 ++++---- test_conformance/math_brute_force/unary_u_float.cpp | 8 ++++---- 28 files changed, 120 insertions(+), 105 deletions(-) diff --git a/test_conformance/math_brute_force/binary_double.cpp b/test_conformance/math_brute_force/binary_double.cpp index e987774a..0869acad 100644 --- a/test_conformance/math_brute_force/binary_double.cpp +++ b/test_conformance/math_brute_force/binary_double.cpp @@ -117,7 +117,7 @@ struct BuildKernelInfo cl_uint offset; // the first vector size to build cl_uint kernel_count; KernelMatrix &kernels; - cl_program *programs; + Programs &programs; const char *nameInCode; bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. }; @@ -127,7 +127,7 @@ cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) BuildKernelInfo *info = (BuildKernelInfo *)p; cl_uint i = info->offset + job_id; return BuildKernel(info->nameInCode, i, info->kernel_count, - info->kernels[i].data(), info->programs + i, + info->kernels[i].data(), &(info->programs[i]), info->relaxedMode); } @@ -150,7 +150,9 @@ struct TestInfo { size_t subBufferSize; // Size of the sub-buffer in elements const Func *f; // A pointer to the function info - cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes + + // Programs for various vector sizes. + Programs programs; // Thread-specific kernels for each vector size: // k[vector_size][thread_id] @@ -791,7 +793,6 @@ exit: // Release for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) { - clReleaseProgram(test_info.programs[i]); for (auto &kernel : test_info.k[i]) { clReleaseKernel(kernel); diff --git a/test_conformance/math_brute_force/binary_float.cpp b/test_conformance/math_brute_force/binary_float.cpp index e8baccdc..aea86ca7 100644 --- a/test_conformance/math_brute_force/binary_float.cpp +++ b/test_conformance/math_brute_force/binary_float.cpp @@ -115,7 +115,7 @@ struct BuildKernelInfo cl_uint offset; // the first vector size to build cl_uint kernel_count; KernelMatrix &kernels; - cl_program *programs; + Programs &programs; const char *nameInCode; bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. }; @@ -125,7 +125,7 @@ cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) BuildKernelInfo *info = (BuildKernelInfo *)p; cl_uint i = info->offset + job_id; return BuildKernel(info->nameInCode, i, info->kernel_count, - info->kernels[i].data(), info->programs + i, + info->kernels[i].data(), &(info->programs[i]), info->relaxedMode); } @@ -148,7 +148,9 @@ struct TestInfo { size_t subBufferSize; // Size of the sub-buffer in elements const Func *f; // A pointer to the function info - cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes + + // Programs for various vector sizes. + Programs programs; // Thread-specific kernels for each vector size: // k[vector_size][thread_id] @@ -948,7 +950,6 @@ exit: // Release for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) { - clReleaseProgram(test_info.programs[i]); for (auto &kernel : test_info.k[i]) { clReleaseKernel(kernel); diff --git a/test_conformance/math_brute_force/binary_i_double.cpp b/test_conformance/math_brute_force/binary_i_double.cpp index 3c4ef4a4..eb94b5f7 100644 --- a/test_conformance/math_brute_force/binary_i_double.cpp +++ b/test_conformance/math_brute_force/binary_i_double.cpp @@ -116,7 +116,7 @@ struct BuildKernelInfo cl_uint offset; // the first vector size to build cl_uint kernel_count; KernelMatrix &kernels; - cl_program *programs; + Programs &programs; const char *nameInCode; bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. }; @@ -126,7 +126,7 @@ cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) BuildKernelInfo *info = (BuildKernelInfo *)p; cl_uint i = info->offset + job_id; return BuildKernel(info->nameInCode, i, info->kernel_count, - info->kernels[i].data(), info->programs + i, + info->kernels[i].data(), &(info->programs[i]), info->relaxedMode); } @@ -149,7 +149,9 @@ struct TestInfo { size_t subBufferSize; // Size of the sub-buffer in elements const Func *f; // A pointer to the function info - cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes + + // Programs for various vector sizes. + Programs programs; // Thread-specific kernels for each vector size: // k[vector_size][thread_id] @@ -713,7 +715,6 @@ exit: // Release for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) { - clReleaseProgram(test_info.programs[i]); for (auto &kernel : test_info.k[i]) { clReleaseKernel(kernel); diff --git a/test_conformance/math_brute_force/binary_i_float.cpp b/test_conformance/math_brute_force/binary_i_float.cpp index dcda5f82..f6e4d2fc 100644 --- a/test_conformance/math_brute_force/binary_i_float.cpp +++ b/test_conformance/math_brute_force/binary_i_float.cpp @@ -114,7 +114,7 @@ struct BuildKernelInfo cl_uint offset; // the first vector size to build cl_uint kernel_count; KernelMatrix &kernels; - cl_program *programs; + Programs &programs; const char *nameInCode; bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. }; @@ -124,7 +124,7 @@ cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) BuildKernelInfo *info = (BuildKernelInfo *)p; cl_uint i = info->offset + job_id; return BuildKernel(info->nameInCode, i, info->kernel_count, - info->kernels[i].data(), info->programs + i, + info->kernels[i].data(), &(info->programs[i]), info->relaxedMode); } @@ -147,7 +147,9 @@ struct TestInfo { size_t subBufferSize; // Size of the sub-buffer in elements const Func *f; // A pointer to the function info - cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes + + // Programs for various vector sizes. + Programs programs; // Thread-specific kernels for each vector size: // k[vector_size][thread_id] @@ -706,7 +708,6 @@ exit: // Release for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) { - clReleaseProgram(test_info.programs[i]); for (auto &kernel : test_info.k[i]) { clReleaseKernel(kernel); diff --git a/test_conformance/math_brute_force/binary_operator_double.cpp b/test_conformance/math_brute_force/binary_operator_double.cpp index 4661f240..f94b8aa4 100644 --- a/test_conformance/math_brute_force/binary_operator_double.cpp +++ b/test_conformance/math_brute_force/binary_operator_double.cpp @@ -116,7 +116,7 @@ struct BuildKernelInfo cl_uint offset; // the first vector size to build cl_uint kernel_count; KernelMatrix &kernels; - cl_program *programs; + Programs &programs; const char *operator_symbol; bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. }; @@ -126,7 +126,7 @@ cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) BuildKernelInfo *info = (BuildKernelInfo *)p; cl_uint i = info->offset + job_id; return BuildKernel(info->operator_symbol, i, info->kernel_count, - info->kernels[i].data(), info->programs + i, + info->kernels[i].data(), &(info->programs[i]), info->relaxedMode); } @@ -149,7 +149,9 @@ struct TestInfo { size_t subBufferSize; // Size of the sub-buffer in elements const Func *f; // A pointer to the function info - cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes + + // Programs for various vector sizes. + Programs programs; // Thread-specific kernels for each vector size: // k[vector_size][thread_id] @@ -758,7 +760,6 @@ exit: // Release for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) { - clReleaseProgram(test_info.programs[i]); for (auto &kernel : test_info.k[i]) { clReleaseKernel(kernel); diff --git a/test_conformance/math_brute_force/binary_operator_float.cpp b/test_conformance/math_brute_force/binary_operator_float.cpp index 7a239963..64a4c4af 100644 --- a/test_conformance/math_brute_force/binary_operator_float.cpp +++ b/test_conformance/math_brute_force/binary_operator_float.cpp @@ -114,7 +114,7 @@ struct BuildKernelInfo cl_uint offset; // the first vector size to build cl_uint kernel_count; KernelMatrix &kernels; - cl_program *programs; + Programs &programs; const char *operator_symbol; bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. }; @@ -124,7 +124,7 @@ cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) BuildKernelInfo *info = (BuildKernelInfo *)p; cl_uint i = info->offset + job_id; return BuildKernel(info->operator_symbol, i, info->kernel_count, - info->kernels[i].data(), info->programs + i, + info->kernels[i].data(), &(info->programs[i]), info->relaxedMode); } @@ -147,7 +147,9 @@ struct TestInfo { size_t subBufferSize; // Size of the sub-buffer in elements const Func *f; // A pointer to the function info - cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes + + // Programs for various vector sizes. + Programs programs; // Thread-specific kernels for each vector size: // k[vector_size][thread_id] @@ -885,7 +887,6 @@ exit: // Release for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) { - clReleaseProgram(test_info.programs[i]); for (auto &kernel : test_info.k[i]) { clReleaseKernel(kernel); diff --git a/test_conformance/math_brute_force/binary_two_results_i_double.cpp b/test_conformance/math_brute_force/binary_two_results_i_double.cpp index be7064e4..132ff593 100644 --- a/test_conformance/math_brute_force/binary_two_results_i_double.cpp +++ b/test_conformance/math_brute_force/binary_two_results_i_double.cpp @@ -14,6 +14,7 @@ // limitations under the License. // +#include "common.h" #include "function_list.h" #include "test_functions.h" #include "utility.h" @@ -121,7 +122,7 @@ struct BuildKernelInfo { cl_uint offset; // the first vector size to build cl_kernel *kernels; - cl_program *programs; + Programs &programs; const char *nameInCode; bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. }; @@ -131,7 +132,7 @@ cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) BuildKernelInfo *info = (BuildKernelInfo *)p; cl_uint i = info->offset + job_id; return BuildKernel(info->nameInCode, i, info->kernels + i, - info->programs + i, info->relaxedMode); + &(info->programs[i]), info->relaxedMode); } struct ComputeReferenceInfoD @@ -172,7 +173,7 @@ cl_int ReferenceD(cl_uint jid, cl_uint tid, void *userInfo) int TestFunc_DoubleI_Double_Double(const Func *f, MTdata d, bool relaxedMode) { int error; - cl_program programs[VECTOR_SIZE_COUNT]; + Programs programs; cl_kernel kernels[VECTOR_SIZE_COUNT]; float maxError = 0.0f; int64_t maxError2 = 0; @@ -577,7 +578,6 @@ exit: for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++) { clReleaseKernel(kernels[k]); - clReleaseProgram(programs[k]); } return error; diff --git a/test_conformance/math_brute_force/binary_two_results_i_float.cpp b/test_conformance/math_brute_force/binary_two_results_i_float.cpp index 901c8598..017ad125 100644 --- a/test_conformance/math_brute_force/binary_two_results_i_float.cpp +++ b/test_conformance/math_brute_force/binary_two_results_i_float.cpp @@ -14,6 +14,7 @@ // limitations under the License. // +#include "common.h" #include "function_list.h" #include "test_functions.h" #include "utility.h" @@ -119,7 +120,7 @@ struct BuildKernelInfo { cl_uint offset; // the first vector size to build cl_kernel *kernels; - cl_program *programs; + Programs &programs; const char *nameInCode; bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. }; @@ -129,7 +130,7 @@ cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) BuildKernelInfo *info = (BuildKernelInfo *)p; cl_uint i = info->offset + job_id; return BuildKernel(info->nameInCode, i, info->kernels + i, - info->programs + i, info->relaxedMode); + &(info->programs[i]), info->relaxedMode); } struct ComputeReferenceInfoF @@ -171,7 +172,7 @@ int TestFunc_FloatI_Float_Float(const Func *f, MTdata d, bool relaxedMode) logFunctionInfo(f->name, sizeof(cl_float), relaxedMode); - cl_program programs[VECTOR_SIZE_COUNT]; + Programs programs; cl_kernel kernels[VECTOR_SIZE_COUNT]; float maxError = 0.0f; int ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities); @@ -562,7 +563,6 @@ exit: for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++) { clReleaseKernel(kernels[k]); - clReleaseProgram(programs[k]); } return error; diff --git a/test_conformance/math_brute_force/common.h b/test_conformance/math_brute_force/common.h index 3eafb6de..5c1f8e6d 100644 --- a/test_conformance/math_brute_force/common.h +++ b/test_conformance/math_brute_force/common.h @@ -16,6 +16,7 @@ #ifndef COMMON_H #define COMMON_H +#include "harness/typeWrappers.h" #include "utility.h" #include @@ -24,4 +25,7 @@ // Array of thread-specific kernels for each vector size. using KernelMatrix = std::array, VECTOR_SIZE_COUNT>; +// Array of programs for each vector size. +using Programs = std::array; + #endif /* COMMON_H */ diff --git a/test_conformance/math_brute_force/i_unary_double.cpp b/test_conformance/math_brute_force/i_unary_double.cpp index f07dd78d..1900afe4 100644 --- a/test_conformance/math_brute_force/i_unary_double.cpp +++ b/test_conformance/math_brute_force/i_unary_double.cpp @@ -14,6 +14,7 @@ // limitations under the License. // +#include "common.h" #include "function_list.h" #include "test_functions.h" #include "utility.h" @@ -106,7 +107,7 @@ struct BuildKernelInfo { cl_uint offset; // the first vector size to build cl_kernel *kernels; - cl_program *programs; + Programs &programs; const char *nameInCode; bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. }; @@ -116,7 +117,7 @@ cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) BuildKernelInfo *info = (BuildKernelInfo *)p; cl_uint i = info->offset + job_id; return BuildKernel(info->nameInCode, i, info->kernels + i, - info->programs + i, info->relaxedMode); + &(info->programs[i]), info->relaxedMode); } } // anonymous namespace @@ -124,7 +125,7 @@ cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) int TestFunc_Int_Double(const Func *f, MTdata d, bool relaxedMode) { int error; - cl_program programs[VECTOR_SIZE_COUNT]; + Programs programs; cl_kernel kernels[VECTOR_SIZE_COUNT]; int ftz = f->ftz || gForceFTZ; uint64_t step = getTestStep(sizeof(cl_double), BUFFER_SIZE); @@ -299,7 +300,6 @@ exit: for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++) { clReleaseKernel(kernels[k]); - clReleaseProgram(programs[k]); } return error; diff --git a/test_conformance/math_brute_force/i_unary_float.cpp b/test_conformance/math_brute_force/i_unary_float.cpp index c38bdcf9..baf0ab67 100644 --- a/test_conformance/math_brute_force/i_unary_float.cpp +++ b/test_conformance/math_brute_force/i_unary_float.cpp @@ -14,6 +14,7 @@ // limitations under the License. // +#include "common.h" #include "function_list.h" #include "test_functions.h" #include "utility.h" @@ -104,7 +105,7 @@ struct BuildKernelInfo { cl_uint offset; // the first vector size to build cl_kernel *kernels; - cl_program *programs; + Programs &programs; const char *nameInCode; bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. }; @@ -114,7 +115,7 @@ cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) BuildKernelInfo *info = (BuildKernelInfo *)p; cl_uint i = info->offset + job_id; return BuildKernel(info->nameInCode, i, info->kernels + i, - info->programs + i, info->relaxedMode); + &(info->programs[i]), info->relaxedMode); } } // anonymous namespace @@ -122,7 +123,7 @@ cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) int TestFunc_Int_Float(const Func *f, MTdata d, bool relaxedMode) { int error; - cl_program programs[VECTOR_SIZE_COUNT]; + Programs programs; cl_kernel kernels[VECTOR_SIZE_COUNT]; int ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities); uint64_t step = getTestStep(sizeof(float), BUFFER_SIZE); @@ -295,7 +296,6 @@ exit: for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++) { clReleaseKernel(kernels[k]); - clReleaseProgram(programs[k]); } return error; diff --git a/test_conformance/math_brute_force/macro_binary_double.cpp b/test_conformance/math_brute_force/macro_binary_double.cpp index a6f65ac4..8f723a08 100644 --- a/test_conformance/math_brute_force/macro_binary_double.cpp +++ b/test_conformance/math_brute_force/macro_binary_double.cpp @@ -115,7 +115,7 @@ struct BuildKernelInfo cl_uint offset; // the first vector size to build cl_uint kernel_count; KernelMatrix &kernels; - cl_program *programs; + Programs &programs; const char *nameInCode; bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. }; @@ -125,7 +125,7 @@ cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) BuildKernelInfo *info = (BuildKernelInfo *)p; cl_uint i = info->offset + job_id; return BuildKernel(info->nameInCode, i, info->kernel_count, - info->kernels[i].data(), info->programs + i, + info->kernels[i].data(), &(info->programs[i]), info->relaxedMode); } @@ -143,7 +143,9 @@ struct TestInfo { size_t subBufferSize; // Size of the sub-buffer in elements const Func *f; // A pointer to the function info - cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes + + // Programs for various vector sizes. + Programs programs; // Thread-specific kernels for each vector size: // k[vector_size][thread_id] @@ -703,7 +705,6 @@ exit: // Release for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) { - clReleaseProgram(test_info.programs[i]); for (auto &kernel : test_info.k[i]) { clReleaseKernel(kernel); diff --git a/test_conformance/math_brute_force/macro_binary_float.cpp b/test_conformance/math_brute_force/macro_binary_float.cpp index 3fe02629..bdcb0925 100644 --- a/test_conformance/math_brute_force/macro_binary_float.cpp +++ b/test_conformance/math_brute_force/macro_binary_float.cpp @@ -113,7 +113,7 @@ struct BuildKernelInfo cl_uint offset; // the first vector size to build cl_uint kernel_count; KernelMatrix &kernels; - cl_program *programs; + Programs &programs; const char *nameInCode; bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. }; @@ -123,7 +123,7 @@ cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) BuildKernelInfo *info = (BuildKernelInfo *)p; cl_uint i = info->offset + job_id; return BuildKernel(info->nameInCode, i, info->kernel_count, - info->kernels[i].data(), info->programs + i, + info->kernels[i].data(), &(info->programs[i]), info->relaxedMode); } @@ -141,7 +141,9 @@ struct TestInfo { size_t subBufferSize; // Size of the sub-buffer in elements const Func *f; // A pointer to the function info - cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes + + // Programs for various vector sizes. + Programs programs; // Thread-specific kernels for each vector size: // k[vector_size][thread_id] @@ -692,7 +694,6 @@ exit: // Release for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) { - clReleaseProgram(test_info.programs[i]); for (auto &kernel : test_info.k[i]) { clReleaseKernel(kernel); diff --git a/test_conformance/math_brute_force/macro_unary_double.cpp b/test_conformance/math_brute_force/macro_unary_double.cpp index c44ebe21..0d086614 100644 --- a/test_conformance/math_brute_force/macro_unary_double.cpp +++ b/test_conformance/math_brute_force/macro_unary_double.cpp @@ -109,7 +109,7 @@ struct BuildKernelInfo cl_uint offset; // the first vector size to build cl_uint kernel_count; KernelMatrix &kernels; - cl_program *programs; + Programs &programs; const char *nameInCode; bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. }; @@ -119,7 +119,7 @@ cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) BuildKernelInfo *info = (BuildKernelInfo *)p; cl_uint i = info->offset + job_id; return BuildKernel(info->nameInCode, i, info->kernel_count, - info->kernels[i].data(), info->programs + i, + info->kernels[i].data(), &(info->programs[i]), info->relaxedMode); } @@ -135,7 +135,9 @@ struct TestInfo { size_t subBufferSize; // Size of the sub-buffer in elements const Func *f; // A pointer to the function info - cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes + + // Programs for various vector sizes. + Programs programs; // Thread-specific kernels for each vector size: // k[vector_size][thread_id] @@ -476,7 +478,6 @@ exit: // Release for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) { - clReleaseProgram(test_info.programs[i]); for (auto &kernel : test_info.k[i]) { clReleaseKernel(kernel); diff --git a/test_conformance/math_brute_force/macro_unary_float.cpp b/test_conformance/math_brute_force/macro_unary_float.cpp index d9d79094..ea485b05 100644 --- a/test_conformance/math_brute_force/macro_unary_float.cpp +++ b/test_conformance/math_brute_force/macro_unary_float.cpp @@ -108,7 +108,7 @@ struct BuildKernelInfo cl_uint offset; // the first vector size to build cl_uint kernel_count; KernelMatrix &kernels; - cl_program *programs; + Programs &programs; const char *nameInCode; bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. }; @@ -118,7 +118,7 @@ cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) BuildKernelInfo *info = (BuildKernelInfo *)p; cl_uint i = info->offset + job_id; return BuildKernel(info->nameInCode, i, info->kernel_count, - info->kernels[i].data(), info->programs + i, + info->kernels[i].data(), &(info->programs[i]), info->relaxedMode); } @@ -134,7 +134,9 @@ struct TestInfo { size_t subBufferSize; // Size of the sub-buffer in elements const Func *f; // A pointer to the function info - cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes + + // Programs for various vector sizes. + Programs programs; // Thread-specific kernels for each vector size: // k[vector_size][thread_id] @@ -490,7 +492,6 @@ exit: // Release for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) { - clReleaseProgram(test_info.programs[i]); for (auto &kernel : test_info.k[i]) { clReleaseKernel(kernel); diff --git a/test_conformance/math_brute_force/mad_double.cpp b/test_conformance/math_brute_force/mad_double.cpp index 8e88f9f6..77428d06 100644 --- a/test_conformance/math_brute_force/mad_double.cpp +++ b/test_conformance/math_brute_force/mad_double.cpp @@ -14,6 +14,7 @@ // limitations under the License. // +#include "common.h" #include "function_list.h" #include "test_functions.h" #include "utility.h" @@ -119,7 +120,7 @@ struct BuildKernelInfo { cl_uint offset; // the first vector size to build cl_kernel *kernels; - cl_program *programs; + Programs &programs; const char *nameInCode; bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. }; @@ -129,7 +130,7 @@ cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) BuildKernelInfo *info = (BuildKernelInfo *)p; cl_uint i = info->offset + job_id; return BuildKernel(info->nameInCode, i, info->kernels + i, - info->programs + i, info->relaxedMode); + &(info->programs[i]), info->relaxedMode); } } // anonymous namespace @@ -137,7 +138,7 @@ cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) int TestFunc_mad_Double(const Func *f, MTdata d, bool relaxedMode) { int error; - cl_program programs[VECTOR_SIZE_COUNT]; + Programs programs; cl_kernel kernels[VECTOR_SIZE_COUNT]; float maxError = 0.0f; double maxErrorVal = 0.0f; @@ -298,7 +299,6 @@ exit: for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++) { clReleaseKernel(kernels[k]); - clReleaseProgram(programs[k]); } return error; diff --git a/test_conformance/math_brute_force/mad_float.cpp b/test_conformance/math_brute_force/mad_float.cpp index 0552ba4b..9a7730f1 100644 --- a/test_conformance/math_brute_force/mad_float.cpp +++ b/test_conformance/math_brute_force/mad_float.cpp @@ -14,6 +14,7 @@ // limitations under the License. // +#include "common.h" #include "function_list.h" #include "test_functions.h" #include "utility.h" @@ -117,7 +118,7 @@ struct BuildKernelInfo { cl_uint offset; // the first vector size to build cl_kernel *kernels; - cl_program *programs; + Programs &programs; const char *nameInCode; bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. }; @@ -127,7 +128,7 @@ cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) BuildKernelInfo *info = (BuildKernelInfo *)p; cl_uint i = info->offset + job_id; return BuildKernel(info->nameInCode, i, info->kernels + i, - info->programs + i, info->relaxedMode); + &(info->programs[i]), info->relaxedMode); } } // anonymous namespace @@ -138,7 +139,7 @@ int TestFunc_mad_Float(const Func *f, MTdata d, bool relaxedMode) logFunctionInfo(f->name, sizeof(cl_float), relaxedMode); - cl_program programs[VECTOR_SIZE_COUNT]; + Programs programs; cl_kernel kernels[VECTOR_SIZE_COUNT]; float maxError = 0.0f; float maxErrorVal = 0.0f; @@ -297,7 +298,6 @@ exit: for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++) { clReleaseKernel(kernels[k]); - clReleaseProgram(programs[k]); } return error; diff --git a/test_conformance/math_brute_force/main.cpp b/test_conformance/math_brute_force/main.cpp index 1a6e0c4e..d1d146a1 100644 --- a/test_conformance/math_brute_force/main.cpp +++ b/test_conformance/math_brute_force/main.cpp @@ -1092,7 +1092,6 @@ int MakeKernels(const char **c, cl_uint count, const char *name, clGetProgramBuildInfo(*p, gDevice, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, NULL); vlog_error("Log: %s\n", buffer); - clReleaseProgram(*p); return error; } } diff --git a/test_conformance/math_brute_force/ternary_double.cpp b/test_conformance/math_brute_force/ternary_double.cpp index a3db3353..c4a9a1c4 100644 --- a/test_conformance/math_brute_force/ternary_double.cpp +++ b/test_conformance/math_brute_force/ternary_double.cpp @@ -14,6 +14,7 @@ // limitations under the License. // +#include "common.h" #include "function_list.h" #include "test_functions.h" #include "utility.h" @@ -122,7 +123,7 @@ struct BuildKernelInfo { cl_uint offset; // the first vector size to build cl_kernel *kernels; - cl_program *programs; + Programs &programs; const char *nameInCode; bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. }; @@ -132,7 +133,7 @@ cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) BuildKernelInfo *info = (BuildKernelInfo *)p; cl_uint i = info->offset + job_id; return BuildKernel(info->nameInCode, i, info->kernels + i, - info->programs + i, info->relaxedMode); + &(info->programs[i]), info->relaxedMode); } // A table of more difficult cases to get right @@ -213,7 +214,7 @@ int TestFunc_Double_Double_Double_Double(const Func *f, MTdata d, bool relaxedMode) { int error; - cl_program programs[VECTOR_SIZE_COUNT]; + Programs programs; cl_kernel kernels[VECTOR_SIZE_COUNT]; float maxError = 0.0f; int ftz = f->ftz || gForceFTZ; @@ -737,7 +738,6 @@ exit: for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++) { clReleaseKernel(kernels[k]); - clReleaseProgram(programs[k]); } return error; diff --git a/test_conformance/math_brute_force/ternary_float.cpp b/test_conformance/math_brute_force/ternary_float.cpp index fdcb48c4..36d957ce 100644 --- a/test_conformance/math_brute_force/ternary_float.cpp +++ b/test_conformance/math_brute_force/ternary_float.cpp @@ -14,6 +14,7 @@ // limitations under the License. // +#include "common.h" #include "function_list.h" #include "test_functions.h" #include "utility.h" @@ -120,7 +121,7 @@ struct BuildKernelInfo { cl_uint offset; // the first vector size to build cl_kernel *kernels; - cl_program *programs; + Programs &programs; const char *nameInCode; bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. }; @@ -130,7 +131,7 @@ cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) BuildKernelInfo *info = (BuildKernelInfo *)p; cl_uint i = info->offset + job_id; return BuildKernel(info->nameInCode, i, info->kernels + i, - info->programs + i, info->relaxedMode); + &(info->programs[i]), info->relaxedMode); } // A table of more difficult cases to get right @@ -223,7 +224,7 @@ int TestFunc_Float_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode) logFunctionInfo(f->name, sizeof(cl_float), relaxedMode); - cl_program programs[VECTOR_SIZE_COUNT]; + Programs programs; cl_kernel kernels[VECTOR_SIZE_COUNT]; float maxError = 0.0f; int ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities); @@ -872,7 +873,6 @@ exit: for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++) { clReleaseKernel(kernels[k]); - clReleaseProgram(programs[k]); } return error; diff --git a/test_conformance/math_brute_force/unary_double.cpp b/test_conformance/math_brute_force/unary_double.cpp index 3b91b4cd..3351ea35 100644 --- a/test_conformance/math_brute_force/unary_double.cpp +++ b/test_conformance/math_brute_force/unary_double.cpp @@ -109,7 +109,7 @@ struct BuildKernelInfo cl_uint offset; // the first vector size to build cl_uint kernel_count; KernelMatrix &kernels; - cl_program *programs; + Programs &programs; const char *nameInCode; bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. }; @@ -119,7 +119,7 @@ cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) BuildKernelInfo *info = (BuildKernelInfo *)p; cl_uint i = info->offset + job_id; return BuildKernel(info->nameInCode, i, info->kernel_count, - info->kernels[i].data(), info->programs + i, + info->kernels[i].data(), &(info->programs[i]), info->relaxedMode); } @@ -137,7 +137,9 @@ struct TestInfo { size_t subBufferSize; // Size of the sub-buffer in elements const Func *f; // A pointer to the function info - cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes + + // Programs for various vector sizes. + Programs programs; // Thread-specific kernels for each vector size: // k[vector_size][thread_id] @@ -514,7 +516,6 @@ exit: // Release for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) { - clReleaseProgram(test_info.programs[i]); for (auto &kernel : test_info.k[i]) { clReleaseKernel(kernel); diff --git a/test_conformance/math_brute_force/unary_float.cpp b/test_conformance/math_brute_force/unary_float.cpp index e5576e7e..37a63732 100644 --- a/test_conformance/math_brute_force/unary_float.cpp +++ b/test_conformance/math_brute_force/unary_float.cpp @@ -107,7 +107,7 @@ struct BuildKernelInfo cl_uint offset; // the first vector size to build cl_uint kernel_count; KernelMatrix &kernels; - cl_program *programs; + Programs &programs; const char *nameInCode; bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. }; @@ -117,7 +117,7 @@ cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) BuildKernelInfo *info = (BuildKernelInfo *)p; cl_uint i = info->offset + job_id; return BuildKernel(info->nameInCode, i, info->kernel_count, - info->kernels[i].data(), info->programs + i, + info->kernels[i].data(), &(info->programs[i]), info->relaxedMode); } @@ -135,7 +135,9 @@ struct TestInfo { size_t subBufferSize; // Size of the sub-buffer in elements const Func *f; // A pointer to the function info - cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes + + // Programs for various vector sizes. + Programs programs; // Thread-specific kernels for each vector size: // k[vector_size][thread_id] @@ -691,7 +693,6 @@ exit: // Release for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) { - clReleaseProgram(test_info.programs[i]); for (auto &kernel : test_info.k[i]) { clReleaseKernel(kernel); diff --git a/test_conformance/math_brute_force/unary_two_results_double.cpp b/test_conformance/math_brute_force/unary_two_results_double.cpp index 5556a080..5887f192 100644 --- a/test_conformance/math_brute_force/unary_two_results_double.cpp +++ b/test_conformance/math_brute_force/unary_two_results_double.cpp @@ -14,6 +14,7 @@ // limitations under the License. // +#include "common.h" #include "function_list.h" #include "test_functions.h" #include "utility.h" @@ -113,7 +114,7 @@ struct BuildKernelInfo { cl_uint offset; // the first vector size to build cl_kernel *kernels; - cl_program *programs; + Programs &programs; const char *nameInCode; bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. }; @@ -123,7 +124,7 @@ cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) BuildKernelInfo *info = (BuildKernelInfo *)p; cl_uint i = info->offset + job_id; return BuildKernel(info->nameInCode, i, info->kernels + i, - info->programs + i, info->relaxedMode); + &(info->programs[i]), info->relaxedMode); } } // anonymous namespace @@ -131,7 +132,7 @@ cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) int TestFunc_Double2_Double(const Func *f, MTdata d, bool relaxedMode) { int error; - cl_program programs[VECTOR_SIZE_COUNT]; + Programs programs; cl_kernel kernels[VECTOR_SIZE_COUNT]; float maxError0 = 0.0f; float maxError1 = 0.0f; @@ -443,7 +444,6 @@ exit: for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++) { clReleaseKernel(kernels[k]); - clReleaseProgram(programs[k]); } return error; diff --git a/test_conformance/math_brute_force/unary_two_results_float.cpp b/test_conformance/math_brute_force/unary_two_results_float.cpp index c95b10d3..fb8d5535 100644 --- a/test_conformance/math_brute_force/unary_two_results_float.cpp +++ b/test_conformance/math_brute_force/unary_two_results_float.cpp @@ -14,6 +14,7 @@ // limitations under the License. // +#include "common.h" #include "function_list.h" #include "test_functions.h" #include "utility.h" @@ -111,7 +112,7 @@ struct BuildKernelInfo { cl_uint offset; // the first vector size to build cl_kernel *kernels; - cl_program *programs; + Programs &programs; const char *nameInCode; bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. }; @@ -121,7 +122,7 @@ cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) BuildKernelInfo *info = (BuildKernelInfo *)p; cl_uint i = info->offset + job_id; return BuildKernel(info->nameInCode, i, info->kernels + i, - info->programs + i, info->relaxedMode); + &(info->programs[i]), info->relaxedMode); } } // anonymous namespace @@ -129,7 +130,7 @@ cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) int TestFunc_Float2_Float(const Func *f, MTdata d, bool relaxedMode) { int error; - cl_program programs[VECTOR_SIZE_COUNT]; + Programs programs; cl_kernel kernels[VECTOR_SIZE_COUNT]; float maxError0 = 0.0f; float maxError1 = 0.0f; @@ -575,7 +576,6 @@ exit: for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++) { clReleaseKernel(kernels[k]); - clReleaseProgram(programs[k]); } return error; diff --git a/test_conformance/math_brute_force/unary_two_results_i_double.cpp b/test_conformance/math_brute_force/unary_two_results_i_double.cpp index c976061c..6f2de049 100644 --- a/test_conformance/math_brute_force/unary_two_results_i_double.cpp +++ b/test_conformance/math_brute_force/unary_two_results_i_double.cpp @@ -14,6 +14,7 @@ // limitations under the License. // +#include "common.h" #include "function_list.h" #include "test_functions.h" #include "utility.h" @@ -114,7 +115,7 @@ struct BuildKernelInfo { cl_uint offset; // the first vector size to build cl_kernel *kernels; - cl_program *programs; + Programs &programs; const char *nameInCode; bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. }; @@ -124,7 +125,7 @@ cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) BuildKernelInfo *info = (BuildKernelInfo *)p; cl_uint i = info->offset + job_id; return BuildKernel(info->nameInCode, i, info->kernels + i, - info->programs + i, info->relaxedMode); + &(info->programs[i]), info->relaxedMode); } cl_ulong abs_cl_long(cl_long i) @@ -138,7 +139,7 @@ cl_ulong abs_cl_long(cl_long i) int TestFunc_DoubleI_Double(const Func *f, MTdata d, bool relaxedMode) { int error; - cl_program programs[VECTOR_SIZE_COUNT]; + Programs programs; cl_kernel kernels[VECTOR_SIZE_COUNT]; float maxError = 0.0f; int64_t maxError2 = 0; @@ -415,7 +416,6 @@ exit: for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++) { clReleaseKernel(kernels[k]); - clReleaseProgram(programs[k]); } return error; diff --git a/test_conformance/math_brute_force/unary_two_results_i_float.cpp b/test_conformance/math_brute_force/unary_two_results_i_float.cpp index 7a3cd981..529da8dc 100644 --- a/test_conformance/math_brute_force/unary_two_results_i_float.cpp +++ b/test_conformance/math_brute_force/unary_two_results_i_float.cpp @@ -14,6 +14,7 @@ // limitations under the License. // +#include "common.h" #include "function_list.h" #include "test_functions.h" #include "utility.h" @@ -112,7 +113,7 @@ struct BuildKernelInfo { cl_uint offset; // the first vector size to build cl_kernel *kernels; - cl_program *programs; + Programs &programs; const char *nameInCode; bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. }; @@ -122,7 +123,7 @@ cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) BuildKernelInfo *info = (BuildKernelInfo *)p; cl_uint i = info->offset + job_id; return BuildKernel(info->nameInCode, i, info->kernels + i, - info->programs + i, info->relaxedMode); + &(info->programs[i]), info->relaxedMode); } cl_ulong abs_cl_long(cl_long i) @@ -136,7 +137,7 @@ cl_ulong abs_cl_long(cl_long i) int TestFunc_FloatI_Float(const Func *f, MTdata d, bool relaxedMode) { int error; - cl_program programs[VECTOR_SIZE_COUNT]; + Programs programs; cl_kernel kernels[VECTOR_SIZE_COUNT]; float maxError = 0.0f; int64_t maxError2 = 0; @@ -413,7 +414,6 @@ exit: for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++) { clReleaseKernel(kernels[k]); - clReleaseProgram(programs[k]); } return error; diff --git a/test_conformance/math_brute_force/unary_u_double.cpp b/test_conformance/math_brute_force/unary_u_double.cpp index 621ee6bb..8113b955 100644 --- a/test_conformance/math_brute_force/unary_u_double.cpp +++ b/test_conformance/math_brute_force/unary_u_double.cpp @@ -14,6 +14,7 @@ // limitations under the License. // +#include "common.h" #include "function_list.h" #include "test_functions.h" #include "utility.h" @@ -108,7 +109,7 @@ struct BuildKernelInfo { cl_uint offset; // the first vector size to build cl_kernel *kernels; - cl_program *programs; + Programs &programs; const char *nameInCode; bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. }; @@ -118,7 +119,7 @@ cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) BuildKernelInfo *info = (BuildKernelInfo *)p; cl_uint i = info->offset + job_id; return BuildKernel(info->nameInCode, i, info->kernels + i, - info->programs + i, info->relaxedMode); + &(info->programs[i]), info->relaxedMode); } cl_ulong random64(MTdata d) @@ -131,7 +132,7 @@ cl_ulong random64(MTdata d) int TestFunc_Double_ULong(const Func *f, MTdata d, bool relaxedMode) { int error; - cl_program programs[VECTOR_SIZE_COUNT]; + Programs programs; cl_kernel kernels[VECTOR_SIZE_COUNT]; float maxError = 0.0f; int ftz = f->ftz || gForceFTZ; @@ -311,7 +312,6 @@ exit: for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++) { clReleaseKernel(kernels[k]); - clReleaseProgram(programs[k]); } return error; diff --git a/test_conformance/math_brute_force/unary_u_float.cpp b/test_conformance/math_brute_force/unary_u_float.cpp index 0eae2e54..ee077c80 100644 --- a/test_conformance/math_brute_force/unary_u_float.cpp +++ b/test_conformance/math_brute_force/unary_u_float.cpp @@ -14,6 +14,7 @@ // limitations under the License. // +#include "common.h" #include "function_list.h" #include "test_functions.h" #include "utility.h" @@ -105,7 +106,7 @@ struct BuildKernelInfo { cl_uint offset; // the first vector size to build cl_kernel *kernels; - cl_program *programs; + Programs &programs; const char *nameInCode; bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. }; @@ -115,7 +116,7 @@ cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) BuildKernelInfo *info = (BuildKernelInfo *)p; cl_uint i = info->offset + job_id; return BuildKernel(info->nameInCode, i, info->kernels + i, - info->programs + i, info->relaxedMode); + &(info->programs[i]), info->relaxedMode); } } // anonymous namespace @@ -123,7 +124,7 @@ cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) int TestFunc_Float_UInt(const Func *f, MTdata d, bool relaxedMode) { int error; - cl_program programs[VECTOR_SIZE_COUNT]; + Programs programs; cl_kernel kernels[VECTOR_SIZE_COUNT]; float maxError = 0.0f; int ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities); @@ -313,7 +314,6 @@ exit: for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++) { clReleaseKernel(kernels[k]); - clReleaseProgram(programs[k]); } return error; -- cgit v1.2.3 From b06ccc6cd96fa3e3625c40cd50445ceeef38d048 Mon Sep 17 00:00:00 2001 From: Sven van Haastregt Date: Fri, 29 Jul 2022 18:22:18 +0100 Subject: Share BuildKernelInfo struct definition (#1453) Move the main `BuildKernelInfo` definition into `common.h` to reduce code duplication. Some tests (e.g. `i_unary_double.cpp`) use a different struct; rename those structs to `BuildKernelInfo2` for now to avoid ambiguity. Signed-off-by: Sven van Haastregt --- test_conformance/math_brute_force/binary_double.cpp | 10 ---------- test_conformance/math_brute_force/binary_float.cpp | 10 ---------- test_conformance/math_brute_force/binary_i_double.cpp | 10 ---------- test_conformance/math_brute_force/binary_i_float.cpp | 10 ---------- test_conformance/math_brute_force/binary_operator_double.cpp | 12 +----------- test_conformance/math_brute_force/binary_operator_float.cpp | 12 +----------- .../math_brute_force/binary_two_results_i_double.cpp | 8 ++++---- .../math_brute_force/binary_two_results_i_float.cpp | 8 ++++---- test_conformance/math_brute_force/common.h | 10 ++++++++++ test_conformance/math_brute_force/i_unary_double.cpp | 8 ++++---- test_conformance/math_brute_force/i_unary_float.cpp | 8 ++++---- test_conformance/math_brute_force/macro_binary_double.cpp | 10 ---------- test_conformance/math_brute_force/macro_binary_float.cpp | 10 ---------- test_conformance/math_brute_force/macro_unary_double.cpp | 10 ---------- test_conformance/math_brute_force/macro_unary_float.cpp | 10 ---------- test_conformance/math_brute_force/mad_double.cpp | 8 ++++---- test_conformance/math_brute_force/mad_float.cpp | 8 ++++---- test_conformance/math_brute_force/ternary_double.cpp | 8 ++++---- test_conformance/math_brute_force/ternary_float.cpp | 8 ++++---- test_conformance/math_brute_force/unary_double.cpp | 10 ---------- test_conformance/math_brute_force/unary_float.cpp | 10 ---------- .../math_brute_force/unary_two_results_double.cpp | 8 ++++---- .../math_brute_force/unary_two_results_float.cpp | 8 ++++---- .../math_brute_force/unary_two_results_i_double.cpp | 8 ++++---- .../math_brute_force/unary_two_results_i_float.cpp | 8 ++++---- test_conformance/math_brute_force/unary_u_double.cpp | 8 ++++---- test_conformance/math_brute_force/unary_u_float.cpp | 8 ++++---- 27 files changed, 68 insertions(+), 178 deletions(-) diff --git a/test_conformance/math_brute_force/binary_double.cpp b/test_conformance/math_brute_force/binary_double.cpp index 0869acad..3eb7dccc 100644 --- a/test_conformance/math_brute_force/binary_double.cpp +++ b/test_conformance/math_brute_force/binary_double.cpp @@ -112,16 +112,6 @@ int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count, relaxedMode); } -struct BuildKernelInfo -{ - cl_uint offset; // the first vector size to build - cl_uint kernel_count; - KernelMatrix &kernels; - Programs &programs; - const char *nameInCode; - bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. -}; - cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) { BuildKernelInfo *info = (BuildKernelInfo *)p; diff --git a/test_conformance/math_brute_force/binary_float.cpp b/test_conformance/math_brute_force/binary_float.cpp index aea86ca7..db4604a3 100644 --- a/test_conformance/math_brute_force/binary_float.cpp +++ b/test_conformance/math_brute_force/binary_float.cpp @@ -110,16 +110,6 @@ int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count, relaxedMode); } -struct BuildKernelInfo -{ - cl_uint offset; // the first vector size to build - cl_uint kernel_count; - KernelMatrix &kernels; - Programs &programs; - const char *nameInCode; - bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. -}; - cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) { BuildKernelInfo *info = (BuildKernelInfo *)p; diff --git a/test_conformance/math_brute_force/binary_i_double.cpp b/test_conformance/math_brute_force/binary_i_double.cpp index eb94b5f7..37e27ac0 100644 --- a/test_conformance/math_brute_force/binary_i_double.cpp +++ b/test_conformance/math_brute_force/binary_i_double.cpp @@ -111,16 +111,6 @@ int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count, relaxedMode); } -struct BuildKernelInfo -{ - cl_uint offset; // the first vector size to build - cl_uint kernel_count; - KernelMatrix &kernels; - Programs &programs; - const char *nameInCode; - bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. -}; - cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) { BuildKernelInfo *info = (BuildKernelInfo *)p; diff --git a/test_conformance/math_brute_force/binary_i_float.cpp b/test_conformance/math_brute_force/binary_i_float.cpp index f6e4d2fc..539e10d0 100644 --- a/test_conformance/math_brute_force/binary_i_float.cpp +++ b/test_conformance/math_brute_force/binary_i_float.cpp @@ -109,16 +109,6 @@ int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count, relaxedMode); } -struct BuildKernelInfo -{ - cl_uint offset; // the first vector size to build - cl_uint kernel_count; - KernelMatrix &kernels; - Programs &programs; - const char *nameInCode; - bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. -}; - cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) { BuildKernelInfo *info = (BuildKernelInfo *)p; diff --git a/test_conformance/math_brute_force/binary_operator_double.cpp b/test_conformance/math_brute_force/binary_operator_double.cpp index f94b8aa4..7c0766be 100644 --- a/test_conformance/math_brute_force/binary_operator_double.cpp +++ b/test_conformance/math_brute_force/binary_operator_double.cpp @@ -111,21 +111,11 @@ int BuildKernel(const char *operator_symbol, int vectorSize, relaxedMode); } -struct BuildKernelInfo -{ - cl_uint offset; // the first vector size to build - cl_uint kernel_count; - KernelMatrix &kernels; - Programs &programs; - const char *operator_symbol; - bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. -}; - cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) { BuildKernelInfo *info = (BuildKernelInfo *)p; cl_uint i = info->offset + job_id; - return BuildKernel(info->operator_symbol, i, info->kernel_count, + return BuildKernel(info->nameInCode, i, info->kernel_count, info->kernels[i].data(), &(info->programs[i]), info->relaxedMode); } diff --git a/test_conformance/math_brute_force/binary_operator_float.cpp b/test_conformance/math_brute_force/binary_operator_float.cpp index 64a4c4af..fe2db19e 100644 --- a/test_conformance/math_brute_force/binary_operator_float.cpp +++ b/test_conformance/math_brute_force/binary_operator_float.cpp @@ -109,21 +109,11 @@ int BuildKernel(const char *operator_symbol, int vectorSize, relaxedMode); } -struct BuildKernelInfo -{ - cl_uint offset; // the first vector size to build - cl_uint kernel_count; - KernelMatrix &kernels; - Programs &programs; - const char *operator_symbol; - bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. -}; - cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) { BuildKernelInfo *info = (BuildKernelInfo *)p; cl_uint i = info->offset + job_id; - return BuildKernel(info->operator_symbol, i, info->kernel_count, + return BuildKernel(info->nameInCode, i, info->kernel_count, info->kernels[i].data(), &(info->programs[i]), info->relaxedMode); } diff --git a/test_conformance/math_brute_force/binary_two_results_i_double.cpp b/test_conformance/math_brute_force/binary_two_results_i_double.cpp index 132ff593..9c98ebb7 100644 --- a/test_conformance/math_brute_force/binary_two_results_i_double.cpp +++ b/test_conformance/math_brute_force/binary_two_results_i_double.cpp @@ -118,7 +118,7 @@ int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p, return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode); } -struct BuildKernelInfo +struct BuildKernelInfo2 { cl_uint offset; // the first vector size to build cl_kernel *kernels; @@ -129,7 +129,7 @@ struct BuildKernelInfo cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) { - BuildKernelInfo *info = (BuildKernelInfo *)p; + BuildKernelInfo2 *info = (BuildKernelInfo2 *)p; cl_uint i = info->offset + job_id; return BuildKernel(info->nameInCode, i, info->kernels + i, &(info->programs[i]), info->relaxedMode); @@ -192,8 +192,8 @@ int TestFunc_DoubleI_Double_Double(const Func *f, MTdata d, bool relaxedMode) // Init the kernels { - BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, - f->nameInCode, relaxedMode }; + BuildKernelInfo2 build_info = { gMinVectorSizeIndex, kernels, programs, + f->nameInCode, relaxedMode }; if ((error = ThreadPool_Do(BuildKernelFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info))) diff --git a/test_conformance/math_brute_force/binary_two_results_i_float.cpp b/test_conformance/math_brute_force/binary_two_results_i_float.cpp index 017ad125..354148ea 100644 --- a/test_conformance/math_brute_force/binary_two_results_i_float.cpp +++ b/test_conformance/math_brute_force/binary_two_results_i_float.cpp @@ -116,7 +116,7 @@ int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p, return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode); } -struct BuildKernelInfo +struct BuildKernelInfo2 { cl_uint offset; // the first vector size to build cl_kernel *kernels; @@ -127,7 +127,7 @@ struct BuildKernelInfo cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) { - BuildKernelInfo *info = (BuildKernelInfo *)p; + BuildKernelInfo2 *info = (BuildKernelInfo2 *)p; cl_uint i = info->offset + job_id; return BuildKernel(info->nameInCode, i, info->kernels + i, &(info->programs[i]), info->relaxedMode); @@ -193,8 +193,8 @@ int TestFunc_FloatI_Float_Float(const Func *f, MTdata d, bool relaxedMode) // Init the kernels { - BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, - f->nameInCode, relaxedMode }; + BuildKernelInfo2 build_info = { gMinVectorSizeIndex, kernels, programs, + f->nameInCode, relaxedMode }; if ((error = ThreadPool_Do(BuildKernelFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info))) diff --git a/test_conformance/math_brute_force/common.h b/test_conformance/math_brute_force/common.h index 5c1f8e6d..7c296952 100644 --- a/test_conformance/math_brute_force/common.h +++ b/test_conformance/math_brute_force/common.h @@ -28,4 +28,14 @@ using KernelMatrix = std::array, VECTOR_SIZE_COUNT>; // Array of programs for each vector size. using Programs = std::array; +struct BuildKernelInfo +{ + cl_uint offset; // the first vector size to build + cl_uint kernel_count; + KernelMatrix &kernels; + Programs &programs; + const char *nameInCode; + bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. +}; + #endif /* COMMON_H */ diff --git a/test_conformance/math_brute_force/i_unary_double.cpp b/test_conformance/math_brute_force/i_unary_double.cpp index 1900afe4..f52a1292 100644 --- a/test_conformance/math_brute_force/i_unary_double.cpp +++ b/test_conformance/math_brute_force/i_unary_double.cpp @@ -103,7 +103,7 @@ int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p, return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode); } -struct BuildKernelInfo +struct BuildKernelInfo2 { cl_uint offset; // the first vector size to build cl_kernel *kernels; @@ -114,7 +114,7 @@ struct BuildKernelInfo cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) { - BuildKernelInfo *info = (BuildKernelInfo *)p; + BuildKernelInfo2 *info = (BuildKernelInfo2 *)p; cl_uint i = info->offset + job_id; return BuildKernel(info->nameInCode, i, info->kernels + i, &(info->programs[i]), info->relaxedMode); @@ -143,8 +143,8 @@ int TestFunc_Int_Double(const Func *f, MTdata d, bool relaxedMode) // Init the kernels { - BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, - f->nameInCode, relaxedMode }; + BuildKernelInfo2 build_info = { gMinVectorSizeIndex, kernels, programs, + f->nameInCode, relaxedMode }; if ((error = ThreadPool_Do(BuildKernelFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info))) diff --git a/test_conformance/math_brute_force/i_unary_float.cpp b/test_conformance/math_brute_force/i_unary_float.cpp index baf0ab67..633584a7 100644 --- a/test_conformance/math_brute_force/i_unary_float.cpp +++ b/test_conformance/math_brute_force/i_unary_float.cpp @@ -101,7 +101,7 @@ int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p, return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode); } -struct BuildKernelInfo +struct BuildKernelInfo2 { cl_uint offset; // the first vector size to build cl_kernel *kernels; @@ -112,7 +112,7 @@ struct BuildKernelInfo cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) { - BuildKernelInfo *info = (BuildKernelInfo *)p; + BuildKernelInfo2 *info = (BuildKernelInfo2 *)p; cl_uint i = info->offset + job_id; return BuildKernel(info->nameInCode, i, info->kernels + i, &(info->programs[i]), info->relaxedMode); @@ -140,8 +140,8 @@ int TestFunc_Int_Float(const Func *f, MTdata d, bool relaxedMode) // Init the kernels { - BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, - f->nameInCode, relaxedMode }; + BuildKernelInfo2 build_info = { gMinVectorSizeIndex, kernels, programs, + f->nameInCode, relaxedMode }; if ((error = ThreadPool_Do(BuildKernelFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info))) diff --git a/test_conformance/math_brute_force/macro_binary_double.cpp b/test_conformance/math_brute_force/macro_binary_double.cpp index 8f723a08..624eaebb 100644 --- a/test_conformance/math_brute_force/macro_binary_double.cpp +++ b/test_conformance/math_brute_force/macro_binary_double.cpp @@ -110,16 +110,6 @@ int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count, relaxedMode); } -struct BuildKernelInfo -{ - cl_uint offset; // the first vector size to build - cl_uint kernel_count; - KernelMatrix &kernels; - Programs &programs; - const char *nameInCode; - bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. -}; - cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) { BuildKernelInfo *info = (BuildKernelInfo *)p; diff --git a/test_conformance/math_brute_force/macro_binary_float.cpp b/test_conformance/math_brute_force/macro_binary_float.cpp index bdcb0925..04f759cf 100644 --- a/test_conformance/math_brute_force/macro_binary_float.cpp +++ b/test_conformance/math_brute_force/macro_binary_float.cpp @@ -108,16 +108,6 @@ int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count, relaxedMode); } -struct BuildKernelInfo -{ - cl_uint offset; // the first vector size to build - cl_uint kernel_count; - KernelMatrix &kernels; - Programs &programs; - const char *nameInCode; - bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. -}; - cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) { BuildKernelInfo *info = (BuildKernelInfo *)p; diff --git a/test_conformance/math_brute_force/macro_unary_double.cpp b/test_conformance/math_brute_force/macro_unary_double.cpp index 0d086614..d0786d1b 100644 --- a/test_conformance/math_brute_force/macro_unary_double.cpp +++ b/test_conformance/math_brute_force/macro_unary_double.cpp @@ -104,16 +104,6 @@ int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count, relaxedMode); } -struct BuildKernelInfo -{ - cl_uint offset; // the first vector size to build - cl_uint kernel_count; - KernelMatrix &kernels; - Programs &programs; - const char *nameInCode; - bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. -}; - cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) { BuildKernelInfo *info = (BuildKernelInfo *)p; diff --git a/test_conformance/math_brute_force/macro_unary_float.cpp b/test_conformance/math_brute_force/macro_unary_float.cpp index ea485b05..b03a6003 100644 --- a/test_conformance/math_brute_force/macro_unary_float.cpp +++ b/test_conformance/math_brute_force/macro_unary_float.cpp @@ -103,16 +103,6 @@ int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count, relaxedMode); } -struct BuildKernelInfo -{ - cl_uint offset; // the first vector size to build - cl_uint kernel_count; - KernelMatrix &kernels; - Programs &programs; - const char *nameInCode; - bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. -}; - cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) { BuildKernelInfo *info = (BuildKernelInfo *)p; diff --git a/test_conformance/math_brute_force/mad_double.cpp b/test_conformance/math_brute_force/mad_double.cpp index 77428d06..e5ab68f6 100644 --- a/test_conformance/math_brute_force/mad_double.cpp +++ b/test_conformance/math_brute_force/mad_double.cpp @@ -116,7 +116,7 @@ int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p, return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode); } -struct BuildKernelInfo +struct BuildKernelInfo2 { cl_uint offset; // the first vector size to build cl_kernel *kernels; @@ -127,7 +127,7 @@ struct BuildKernelInfo cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) { - BuildKernelInfo *info = (BuildKernelInfo *)p; + BuildKernelInfo2 *info = (BuildKernelInfo2 *)p; cl_uint i = info->offset + job_id; return BuildKernel(info->nameInCode, i, info->kernels + i, &(info->programs[i]), info->relaxedMode); @@ -150,8 +150,8 @@ int TestFunc_mad_Double(const Func *f, MTdata d, bool relaxedMode) // Init the kernels { - BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, - f->nameInCode, relaxedMode }; + BuildKernelInfo2 build_info = { gMinVectorSizeIndex, kernels, programs, + f->nameInCode, relaxedMode }; if ((error = ThreadPool_Do(BuildKernelFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info))) diff --git a/test_conformance/math_brute_force/mad_float.cpp b/test_conformance/math_brute_force/mad_float.cpp index 9a7730f1..6760ce99 100644 --- a/test_conformance/math_brute_force/mad_float.cpp +++ b/test_conformance/math_brute_force/mad_float.cpp @@ -114,7 +114,7 @@ int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p, return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode); } -struct BuildKernelInfo +struct BuildKernelInfo2 { cl_uint offset; // the first vector size to build cl_kernel *kernels; @@ -125,7 +125,7 @@ struct BuildKernelInfo cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) { - BuildKernelInfo *info = (BuildKernelInfo *)p; + BuildKernelInfo2 *info = (BuildKernelInfo2 *)p; cl_uint i = info->offset + job_id; return BuildKernel(info->nameInCode, i, info->kernels + i, &(info->programs[i]), info->relaxedMode); @@ -149,8 +149,8 @@ int TestFunc_mad_Float(const Func *f, MTdata d, bool relaxedMode) // Init the kernels { - BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, - f->nameInCode, relaxedMode }; + BuildKernelInfo2 build_info = { gMinVectorSizeIndex, kernels, programs, + f->nameInCode, relaxedMode }; if ((error = ThreadPool_Do(BuildKernelFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info))) diff --git a/test_conformance/math_brute_force/ternary_double.cpp b/test_conformance/math_brute_force/ternary_double.cpp index c4a9a1c4..0639b27a 100644 --- a/test_conformance/math_brute_force/ternary_double.cpp +++ b/test_conformance/math_brute_force/ternary_double.cpp @@ -119,7 +119,7 @@ int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p, return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode); } -struct BuildKernelInfo +struct BuildKernelInfo2 { cl_uint offset; // the first vector size to build cl_kernel *kernels; @@ -130,7 +130,7 @@ struct BuildKernelInfo cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) { - BuildKernelInfo *info = (BuildKernelInfo *)p; + BuildKernelInfo2 *info = (BuildKernelInfo2 *)p; cl_uint i = info->offset + job_id; return BuildKernel(info->nameInCode, i, info->kernels + i, &(info->programs[i]), info->relaxedMode); @@ -229,8 +229,8 @@ int TestFunc_Double_Double_Double_Double(const Func *f, MTdata d, // Init the kernels { - BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, - f->nameInCode, relaxedMode }; + BuildKernelInfo2 build_info = { gMinVectorSizeIndex, kernels, programs, + f->nameInCode, relaxedMode }; if ((error = ThreadPool_Do(BuildKernelFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info))) diff --git a/test_conformance/math_brute_force/ternary_float.cpp b/test_conformance/math_brute_force/ternary_float.cpp index 36d957ce..6f19ef7a 100644 --- a/test_conformance/math_brute_force/ternary_float.cpp +++ b/test_conformance/math_brute_force/ternary_float.cpp @@ -117,7 +117,7 @@ int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p, return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode); } -struct BuildKernelInfo +struct BuildKernelInfo2 { cl_uint offset; // the first vector size to build cl_kernel *kernels; @@ -128,7 +128,7 @@ struct BuildKernelInfo cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) { - BuildKernelInfo *info = (BuildKernelInfo *)p; + BuildKernelInfo2 *info = (BuildKernelInfo2 *)p; cl_uint i = info->offset + job_id; return BuildKernel(info->nameInCode, i, info->kernels + i, &(info->programs[i]), info->relaxedMode); @@ -245,8 +245,8 @@ int TestFunc_Float_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode) // Init the kernels { - BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, - f->nameInCode, relaxedMode }; + BuildKernelInfo2 build_info = { gMinVectorSizeIndex, kernels, programs, + f->nameInCode, relaxedMode }; if ((error = ThreadPool_Do(BuildKernelFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info))) diff --git a/test_conformance/math_brute_force/unary_double.cpp b/test_conformance/math_brute_force/unary_double.cpp index 3351ea35..2043e5a0 100644 --- a/test_conformance/math_brute_force/unary_double.cpp +++ b/test_conformance/math_brute_force/unary_double.cpp @@ -104,16 +104,6 @@ int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count, relaxedMode); } -struct BuildKernelInfo -{ - cl_uint offset; // the first vector size to build - cl_uint kernel_count; - KernelMatrix &kernels; - Programs &programs; - const char *nameInCode; - bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. -}; - cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) { BuildKernelInfo *info = (BuildKernelInfo *)p; diff --git a/test_conformance/math_brute_force/unary_float.cpp b/test_conformance/math_brute_force/unary_float.cpp index 37a63732..b3b8056b 100644 --- a/test_conformance/math_brute_force/unary_float.cpp +++ b/test_conformance/math_brute_force/unary_float.cpp @@ -102,16 +102,6 @@ int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count, relaxedMode); } -struct BuildKernelInfo -{ - cl_uint offset; // the first vector size to build - cl_uint kernel_count; - KernelMatrix &kernels; - Programs &programs; - const char *nameInCode; - bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. -}; - cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) { BuildKernelInfo *info = (BuildKernelInfo *)p; diff --git a/test_conformance/math_brute_force/unary_two_results_double.cpp b/test_conformance/math_brute_force/unary_two_results_double.cpp index 5887f192..cf1d3e93 100644 --- a/test_conformance/math_brute_force/unary_two_results_double.cpp +++ b/test_conformance/math_brute_force/unary_two_results_double.cpp @@ -110,7 +110,7 @@ int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p, return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode); } -struct BuildKernelInfo +struct BuildKernelInfo2 { cl_uint offset; // the first vector size to build cl_kernel *kernels; @@ -121,7 +121,7 @@ struct BuildKernelInfo cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) { - BuildKernelInfo *info = (BuildKernelInfo *)p; + BuildKernelInfo2 *info = (BuildKernelInfo2 *)p; cl_uint i = info->offset + job_id; return BuildKernel(info->nameInCode, i, info->kernels + i, &(info->programs[i]), info->relaxedMode); @@ -149,8 +149,8 @@ int TestFunc_Double2_Double(const Func *f, MTdata d, bool relaxedMode) // Init the kernels { - BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, - f->nameInCode, relaxedMode }; + BuildKernelInfo2 build_info = { gMinVectorSizeIndex, kernels, programs, + f->nameInCode, relaxedMode }; if ((error = ThreadPool_Do(BuildKernelFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info))) diff --git a/test_conformance/math_brute_force/unary_two_results_float.cpp b/test_conformance/math_brute_force/unary_two_results_float.cpp index fb8d5535..051aca51 100644 --- a/test_conformance/math_brute_force/unary_two_results_float.cpp +++ b/test_conformance/math_brute_force/unary_two_results_float.cpp @@ -108,7 +108,7 @@ int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p, return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode); } -struct BuildKernelInfo +struct BuildKernelInfo2 { cl_uint offset; // the first vector size to build cl_kernel *kernels; @@ -119,7 +119,7 @@ struct BuildKernelInfo cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) { - BuildKernelInfo *info = (BuildKernelInfo *)p; + BuildKernelInfo2 *info = (BuildKernelInfo2 *)p; cl_uint i = info->offset + job_id; return BuildKernel(info->nameInCode, i, info->kernels + i, &(info->programs[i]), info->relaxedMode); @@ -148,8 +148,8 @@ int TestFunc_Float2_Float(const Func *f, MTdata d, bool relaxedMode) float float_ulps = getAllowedUlpError(f, relaxedMode); // Init the kernels { - BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, - f->nameInCode, relaxedMode }; + BuildKernelInfo2 build_info = { gMinVectorSizeIndex, kernels, programs, + f->nameInCode, relaxedMode }; if ((error = ThreadPool_Do(BuildKernelFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info))) diff --git a/test_conformance/math_brute_force/unary_two_results_i_double.cpp b/test_conformance/math_brute_force/unary_two_results_i_double.cpp index 6f2de049..d45ad59d 100644 --- a/test_conformance/math_brute_force/unary_two_results_i_double.cpp +++ b/test_conformance/math_brute_force/unary_two_results_i_double.cpp @@ -111,7 +111,7 @@ int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p, return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode); } -struct BuildKernelInfo +struct BuildKernelInfo2 { cl_uint offset; // the first vector size to build cl_kernel *kernels; @@ -122,7 +122,7 @@ struct BuildKernelInfo cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) { - BuildKernelInfo *info = (BuildKernelInfo *)p; + BuildKernelInfo2 *info = (BuildKernelInfo2 *)p; cl_uint i = info->offset + job_id; return BuildKernel(info->nameInCode, i, info->kernels + i, &(info->programs[i]), info->relaxedMode); @@ -157,8 +157,8 @@ int TestFunc_DoubleI_Double(const Func *f, MTdata d, bool relaxedMode) // Init the kernels { - BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, - f->nameInCode, relaxedMode }; + BuildKernelInfo2 build_info = { gMinVectorSizeIndex, kernels, programs, + f->nameInCode, relaxedMode }; if ((error = ThreadPool_Do(BuildKernelFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info))) diff --git a/test_conformance/math_brute_force/unary_two_results_i_float.cpp b/test_conformance/math_brute_force/unary_two_results_i_float.cpp index 529da8dc..9efe861a 100644 --- a/test_conformance/math_brute_force/unary_two_results_i_float.cpp +++ b/test_conformance/math_brute_force/unary_two_results_i_float.cpp @@ -109,7 +109,7 @@ int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p, return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode); } -struct BuildKernelInfo +struct BuildKernelInfo2 { cl_uint offset; // the first vector size to build cl_kernel *kernels; @@ -120,7 +120,7 @@ struct BuildKernelInfo cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) { - BuildKernelInfo *info = (BuildKernelInfo *)p; + BuildKernelInfo2 *info = (BuildKernelInfo2 *)p; cl_uint i = info->offset + job_id; return BuildKernel(info->nameInCode, i, info->kernels + i, &(info->programs[i]), info->relaxedMode); @@ -160,8 +160,8 @@ int TestFunc_FloatI_Float(const Func *f, MTdata d, bool relaxedMode) // Init the kernels { - BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, - f->nameInCode, relaxedMode }; + BuildKernelInfo2 build_info = { gMinVectorSizeIndex, kernels, programs, + f->nameInCode, relaxedMode }; if ((error = ThreadPool_Do(BuildKernelFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info))) diff --git a/test_conformance/math_brute_force/unary_u_double.cpp b/test_conformance/math_brute_force/unary_u_double.cpp index 8113b955..e81ddada 100644 --- a/test_conformance/math_brute_force/unary_u_double.cpp +++ b/test_conformance/math_brute_force/unary_u_double.cpp @@ -105,7 +105,7 @@ int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p, return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode); } -struct BuildKernelInfo +struct BuildKernelInfo2 { cl_uint offset; // the first vector size to build cl_kernel *kernels; @@ -116,7 +116,7 @@ struct BuildKernelInfo cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) { - BuildKernelInfo *info = (BuildKernelInfo *)p; + BuildKernelInfo2 *info = (BuildKernelInfo2 *)p; cl_uint i = info->offset + job_id; return BuildKernel(info->nameInCode, i, info->kernels + i, &(info->programs[i]), info->relaxedMode); @@ -145,8 +145,8 @@ int TestFunc_Double_ULong(const Func *f, MTdata d, bool relaxedMode) // Init the kernels { - BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, - f->nameInCode, relaxedMode }; + BuildKernelInfo2 build_info = { gMinVectorSizeIndex, kernels, programs, + f->nameInCode, relaxedMode }; if ((error = ThreadPool_Do(BuildKernelFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info))) diff --git a/test_conformance/math_brute_force/unary_u_float.cpp b/test_conformance/math_brute_force/unary_u_float.cpp index ee077c80..bfbf2cf8 100644 --- a/test_conformance/math_brute_force/unary_u_float.cpp +++ b/test_conformance/math_brute_force/unary_u_float.cpp @@ -102,7 +102,7 @@ int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p, return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode); } -struct BuildKernelInfo +struct BuildKernelInfo2 { cl_uint offset; // the first vector size to build cl_kernel *kernels; @@ -113,7 +113,7 @@ struct BuildKernelInfo cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) { - BuildKernelInfo *info = (BuildKernelInfo *)p; + BuildKernelInfo2 *info = (BuildKernelInfo2 *)p; cl_uint i = info->offset + job_id; return BuildKernel(info->nameInCode, i, info->kernels + i, &(info->programs[i]), info->relaxedMode); @@ -142,8 +142,8 @@ int TestFunc_Float_UInt(const Func *f, MTdata d, bool relaxedMode) // Init the kernels { - BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, - f->nameInCode, relaxedMode }; + BuildKernelInfo2 build_info = { gMinVectorSizeIndex, kernels, programs, + f->nameInCode, relaxedMode }; if ((error = ThreadPool_Do(BuildKernelFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info))) -- cgit v1.2.3 From 017f514c2139803bf2097714be9a7345476e9b2d Mon Sep 17 00:00:00 2001 From: Stuart Brady Date: Mon, 1 Aug 2022 10:18:36 +0100 Subject: Tidy up subgroup log messages (#1454) Add missing newlines and improve wording of messages. Signed-off-by: Stuart Brady --- .../subgroups/subgroup_common_templates.h | 8 +++---- test_conformance/subgroups/subhelpers.h | 28 +++++++++++----------- test_conformance/subgroups/test_queries.cpp | 8 +++---- .../subgroups/test_subgroup_ballot.cpp | 2 +- 4 files changed, 23 insertions(+), 23 deletions(-) diff --git a/test_conformance/subgroups/subgroup_common_templates.h b/test_conformance/subgroups/subgroup_common_templates.h index 5051f2e9..c1a8316c 100644 --- a/test_conformance/subgroups/subgroup_common_templates.h +++ b/test_conformance/subgroups/subgroup_common_templates.h @@ -321,7 +321,7 @@ template inline Ty calculate(Ty a, Ty b, ArithmeticOp operation) case ArithmeticOp::logical_and: return a && b; case ArithmeticOp::logical_or: return a || b; case ArithmeticOp::logical_xor: return !a ^ !b; - default: log_error("Unknown operation request"); break; + default: log_error("Unknown operation request\n"); break; } return 0; } @@ -343,7 +343,7 @@ inline cl_double calculate(cl_double a, cl_double b, ArithmeticOp operation) case ArithmeticOp::mul_: { return a * b; } - default: log_error("Unknown operation request"); break; + default: log_error("Unknown operation request\n"); break; } return 0; } @@ -365,7 +365,7 @@ inline cl_float calculate(cl_float a, cl_float b, ArithmeticOp operation) case ArithmeticOp::mul_: { return a * b; } - default: log_error("Unknown operation request"); break; + default: log_error("Unknown operation request\n"); break; } return 0; } @@ -382,7 +382,7 @@ inline subgroups::cl_half calculate(subgroups::cl_half a, subgroups::cl_half b, case ArithmeticOp::min_: return to_float(a) < to_float(b) || is_half_nan(b.data) ? a : b; case ArithmeticOp::mul_: return to_half(to_float(a) * to_float(b)); - default: log_error("Unknown operation request"); break; + default: log_error("Unknown operation request\n"); break; } return to_half(0); } diff --git a/test_conformance/subgroups/subhelpers.h b/test_conformance/subgroups/subhelpers.h index a305639a..cc03fc4c 100644 --- a/test_conformance/subgroups/subhelpers.h +++ b/test_conformance/subgroups/subhelpers.h @@ -86,7 +86,7 @@ struct WorkGroupParams if (kernel_function_name.find(name) != kernel_function_name.end()) { log_info("Kernel definition duplication. Source will be " - "overwritten for function name %s", + "overwritten for function name %s\n", name.c_str()); } kernel_function_name[name] = source; @@ -284,7 +284,7 @@ static const char *const operation_names(ArithmeticOp operation) case ArithmeticOp::logical_and: return "logical_and"; case ArithmeticOp::logical_or: return "logical_or"; case ArithmeticOp::logical_xor: return "logical_xor"; - default: log_error("Unknown operation request"); break; + default: log_error("Unknown operation request\n"); break; } return ""; } @@ -306,7 +306,7 @@ static const char *const operation_names(BallotOp operation) case BallotOp::gt_mask: return "gt"; case BallotOp::le_mask: return "le"; case BallotOp::lt_mask: return "lt"; - default: log_error("Unknown operation request"); break; + default: log_error("Unknown operation request\n"); break; } return ""; } @@ -321,7 +321,7 @@ static const char *const operation_names(ShuffleOp operation) case ShuffleOp::shuffle_xor: return "shuffle_xor"; case ShuffleOp::rotate: return "rotate"; case ShuffleOp::clustered_rotate: return "clustered_rotate"; - default: log_error("Unknown operation request"); break; + default: log_error("Unknown operation request\n"); break; } return ""; } @@ -334,7 +334,7 @@ static const char *const operation_names(NonUniformVoteOp operation) case NonUniformVoteOp::all_equal: return "all_equal"; case NonUniformVoteOp::any: return "any"; case NonUniformVoteOp::elect: return "elect"; - default: log_error("Unknown operation request"); break; + default: log_error("Unknown operation request\n"); break; } return ""; } @@ -347,7 +347,7 @@ static const char *const operation_names(SubgroupsBroadcastOp operation) case SubgroupsBroadcastOp::broadcast_first: return "broadcast_first"; case SubgroupsBroadcastOp::non_uniform_broadcast: return "non_uniform_broadcast"; - default: log_error("Unknown operation request"); break; + default: log_error("Unknown operation request\n"); break; } return ""; } @@ -524,7 +524,7 @@ template struct CommonTypeManager case ArithmeticOp::and_: return (Ty)~0; case ArithmeticOp::or_: return (Ty)0; case ArithmeticOp::xor_: return (Ty)0; - default: log_error("Unknown operation request"); break; + default: log_error("Unknown operation request\n"); break; } return 0; } @@ -552,7 +552,7 @@ template <> struct TypeManager : public CommonTypeManager case ArithmeticOp::logical_and: return (cl_int)1; case ArithmeticOp::logical_or: return (cl_int)0; case ArithmeticOp::logical_xor: return (cl_int)0; - default: log_error("Unknown operation request"); break; + default: log_error("Unknown operation request\n"); break; } return 0; } @@ -966,7 +966,7 @@ template <> struct TypeManager : public CommonTypeManager case ArithmeticOp::min_: return std::numeric_limits::infinity(); case ArithmeticOp::mul_: return (cl_float)1; - default: log_error("Unknown operation request"); break; + default: log_error("Unknown operation request\n"); break; } return 0; } @@ -1025,7 +1025,7 @@ template <> struct TypeManager : public CommonTypeManager case ArithmeticOp::min_: return std::numeric_limits::infinity(); case ArithmeticOp::mul_: return (cl_double)1; - default: log_error("Unknown operation request"); break; + default: log_error("Unknown operation request\n"); break; } return 0; } @@ -1112,7 +1112,7 @@ struct TypeManager case ArithmeticOp::max_: return { 0xfc00 }; case ArithmeticOp::min_: return { 0x7c00 }; case ArithmeticOp::mul_: return { 0x3c00 }; - default: log_error("Unknown operation request"); break; + default: log_error("Unknown operation request\n"); break; } return { 0 }; } @@ -1566,7 +1566,7 @@ template struct test subgroupsApiSet.clGetKernelSubGroupInfo_ptr(); if (clGetKernelSubGroupInfo_ptr == NULL) { - log_error("ERROR: %s function not available", + log_error("ERROR: %s function not available\n", subgroupsApiSet.clGetKernelSubGroupInfo_name); return TEST_FAIL; } @@ -1576,7 +1576,7 @@ template struct test if (error != CL_SUCCESS) { log_error("ERROR: %s function error for " - "CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE", + "CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE\n", subgroupsApiSet.clGetKernelSubGroupInfo_name); return TEST_FAIL; } @@ -1589,7 +1589,7 @@ template struct test if (error != CL_SUCCESS) { log_error("ERROR: %s function error for " - "CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE", + "CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE\n", subgroupsApiSet.clGetKernelSubGroupInfo_name); return TEST_FAIL; } diff --git a/test_conformance/subgroups/test_queries.cpp b/test_conformance/subgroups/test_queries.cpp index 761ca7a6..6b940935 100644 --- a/test_conformance/subgroups/test_queries.cpp +++ b/test_conformance/subgroups/test_queries.cpp @@ -100,7 +100,7 @@ int test_sub_group_info(cl_device_id device, cl_context context, subgroupsApiSet.clGetKernelSubGroupInfo_ptr(); if (clGetKernelSubGroupInfo_ptr == NULL) { - log_error("ERROR: %s function not available", + log_error("ERROR: %s function not available\n", subgroupsApiSet.clGetKernelSubGroupInfo_name); return TEST_FAIL; } @@ -112,7 +112,7 @@ int test_sub_group_info(cl_device_id device, cl_context context, if (error != CL_SUCCESS) { log_error("ERROR: %s function error for " - "CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE", + "CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE\n", subgroupsApiSet.clGetKernelSubGroupInfo_name); return TEST_FAIL; } @@ -133,7 +133,7 @@ int test_sub_group_info(cl_device_id device, cl_context context, if (error != CL_SUCCESS) { log_error("ERROR: %s function error " - "for CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE", + "for CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE\n", subgroupsApiSet.clGetKernelSubGroupInfo_name); return TEST_FAIL; } @@ -209,4 +209,4 @@ int test_sub_group_info_ext(cl_device_id device, cl_context context, } return test_sub_group_info(device, context, queue, num_elements, false); -} \ No newline at end of file +} diff --git a/test_conformance/subgroups/test_subgroup_ballot.cpp b/test_conformance/subgroups/test_subgroup_ballot.cpp index b1e6944f..b35520e6 100644 --- a/test_conformance/subgroups/test_subgroup_ballot.cpp +++ b/test_conformance/subgroups/test_subgroup_ballot.cpp @@ -502,7 +502,7 @@ template struct BALLOT_COUNT_SCAN_FIND } else { - log_error("Unknown operation..."); + log_error("Unknown operation...\n"); } } -- cgit v1.2.3 From bd03e17a56c86116d6254bd56ead2fa84710f919 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?K=C3=A9vin=20Petit?= Date: Mon, 1 Aug 2022 10:18:53 +0100 Subject: Add missing external memory/sync extensions to list of known khr extensions (#1455) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Kévin Petit --- test_conformance/compiler/test_compiler_defines_for_extensions.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/test_conformance/compiler/test_compiler_defines_for_extensions.cpp b/test_conformance/compiler/test_compiler_defines_for_extensions.cpp index 84b7798f..4e5b2841 100644 --- a/test_conformance/compiler/test_compiler_defines_for_extensions.cpp +++ b/test_conformance/compiler/test_compiler_defines_for_extensions.cpp @@ -77,11 +77,14 @@ const char *known_extensions[] = { "cl_khr_spirv_linkonce_odr", "cl_khr_semaphore", "cl_khr_external_semaphore", + "cl_khr_external_semaphore_opaque_fd", "cl_khr_external_semaphore_sync_fd", "cl_khr_command_buffer", + "cl_khr_external_memory", + "cl_khr_external_memory_opaque_fd", }; -size_t num_known_extensions = sizeof(known_extensions) / sizeof(char *); +size_t num_known_extensions = ARRAY_SIZE(known_extensions); size_t first_API_extension = 29; const char *known_embedded_extensions[] = { -- cgit v1.2.3 From cdf5a105fcdc32b9203c94e0623e36f64117e6a8 Mon Sep 17 00:00:00 2001 From: Stuart Brady Date: Tue, 2 Aug 2022 18:16:03 +0100 Subject: Fix misleading indentation and enable -Wmisleading-indentation (#1458) Signed-off-by: Stuart Brady --- CMakeLists.txt | 1 + test_conformance/basic/test_arraycopy.cpp | 5 ++-- .../basic/test_multireadimageonefmt.cpp | 24 +++++++++---------- test_conformance/basic/test_writeimage_fp32.cpp | 7 +++--- test_conformance/basic/test_writeimage_int16.cpp | 7 +++--- test_conformance/commonfns/test_sign.cpp | 13 +++++------ test_conformance/commonfns/test_step.cpp | 27 ++++++++++------------ test_conformance/events/test_callbacks.cpp | 25 ++++++++++---------- test_conformance/half/Test_vStoreHalf.cpp | 6 ++--- .../images/clReadWriteImage/test_read_1D.cpp | 19 ++++++++------- .../test_multiple_devices.cpp | 5 ++-- test_conformance/profiling/execute.cpp | 4 ++-- test_conformance/profiling/writeImage.cpp | 4 ++-- 13 files changed, 72 insertions(+), 75 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 8f5f4472..fe56d0fa 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -88,6 +88,7 @@ macro(add_cxx_flag_if_supported flag) endmacro(add_cxx_flag_if_supported) if(CMAKE_COMPILER_IS_GNUCC OR "${CMAKE_CXX_COMPILER_ID}" MATCHES "(Apple)?Clang") + add_cxx_flag_if_supported(-Wmisleading-indentation) add_cxx_flag_if_supported(-Wno-narrowing) add_cxx_flag_if_supported(-Wno-format) add_cxx_flag_if_supported(-Werror) diff --git a/test_conformance/basic/test_arraycopy.cpp b/test_conformance/basic/test_arraycopy.cpp index 5a352869..d9dbcc1b 100644 --- a/test_conformance/basic/test_arraycopy.cpp +++ b/test_conformance/basic/test_arraycopy.cpp @@ -181,9 +181,8 @@ test_arraycopy(cl_device_id device, cl_context context, cl_command_queue queue, } } - // Keep track of multiple errors. - if (error_count != 0) - err = error_count; + // Keep track of multiple errors. + if (error_count != 0) err = error_count; if (err) log_error("\tCL_MEM_USE_HOST_PTR buffer with kernel copy FAILED\n"); diff --git a/test_conformance/basic/test_multireadimageonefmt.cpp b/test_conformance/basic/test_multireadimageonefmt.cpp index b37c8414..c230e67a 100644 --- a/test_conformance/basic/test_multireadimageonefmt.cpp +++ b/test_conformance/basic/test_multireadimageonefmt.cpp @@ -153,14 +153,14 @@ int test_mri_one(cl_device_id device, cl_context context, cl_command_queue queue err = clSetKernelArg(kernel, 0, sizeof i, &i); err |= clSetKernelArg(kernel, 1, sizeof err, &err); err |= clSetKernelArg(kernel, 2, sizeof sampler, &sampler); - for (i=0; i<8; i++) - err |= clSetKernelArg(kernel, 3+i, sizeof streams[i], &streams[i]); + for (i = 0; i < 8; i++) + err |= clSetKernelArg(kernel, 3 + i, sizeof streams[i], &streams[i]); - if (err != CL_SUCCESS) - { - log_error("clSetKernelArgs failed\n"); - return -1; - } + if (err != CL_SUCCESS) + { + log_error("clSetKernelArgs failed\n"); + return -1; + } threads[0] = (unsigned int)img_width; threads[1] = (unsigned int)img_height; @@ -182,15 +182,13 @@ int test_mri_one(cl_device_id device, cl_context context, cl_command_queue queue // cleanup clReleaseSampler(sampler); - for (i=0; i<8; i++) - clReleaseMemObject(streams[i]); + for (i = 0; i < 8; i++) clReleaseMemObject(streams[i]); clReleaseKernel(kernel); clReleaseProgram(program); - for (i=0; i<7; i++) - free(input_ptr[i]); - free(output_ptr); + for (i = 0; i < 7; i++) free(input_ptr[i]); + free(output_ptr); - return err; + return err; } diff --git a/test_conformance/basic/test_writeimage_fp32.cpp b/test_conformance/basic/test_writeimage_fp32.cpp index fef71874..c68463ac 100644 --- a/test_conformance/basic/test_writeimage_fp32.cpp +++ b/test_conformance/basic/test_writeimage_fp32.cpp @@ -122,9 +122,10 @@ int test_writeimage_fp32(cl_device_id device, cl_context context, cl_command_que return -1; } - err = create_single_kernel_helper(context, &program, &kernel[0], 1, &rgbaFFFF_write_kernel_code, "test_rgbaFFFF_write" ); - if (err) - return -1; + err = create_single_kernel_helper(context, &program, &kernel[0], 1, + &rgbaFFFF_write_kernel_code, + "test_rgbaFFFF_write"); + if (err) return -1; kernel[1] = clCreateKernel(program, "test_rgbaFFFF_write", NULL); if (!kernel[1]) { diff --git a/test_conformance/basic/test_writeimage_int16.cpp b/test_conformance/basic/test_writeimage_int16.cpp index 8afb77a9..d863a3a3 100644 --- a/test_conformance/basic/test_writeimage_int16.cpp +++ b/test_conformance/basic/test_writeimage_int16.cpp @@ -128,9 +128,10 @@ int test_writeimage_int16(cl_device_id device, cl_context context, cl_command_qu return -1; } - err = create_single_kernel_helper(context, &program, &kernel[0], 1, &rgba16_write_kernel_code, "test_rgba16_write" ); - if (err) - return -1; + err = create_single_kernel_helper(context, &program, &kernel[0], 1, + &rgba16_write_kernel_code, + "test_rgba16_write"); + if (err) return -1; kernel[1] = clCreateKernel(program, "test_rgba16_write", NULL); if (!kernel[1]) { diff --git a/test_conformance/commonfns/test_sign.cpp b/test_conformance/commonfns/test_sign.cpp index 1b842e35..6dba58da 100644 --- a/test_conformance/commonfns/test_sign.cpp +++ b/test_conformance/commonfns/test_sign.cpp @@ -223,14 +223,13 @@ test_sign(cl_device_id device, cl_context context, cl_command_queue queue, int n free(input_ptr[0]); free(output_ptr); - if(err) - return err; + if (err) return err; - if( ! is_extension_available( device, "cl_khr_fp64")) - { - log_info( "skipping double test -- cl_khr_fp64 not supported.\n" ); - return 0; - } + if (!is_extension_available(device, "cl_khr_fp64")) + { + log_info("skipping double test -- cl_khr_fp64 not supported.\n"); + return 0; + } return test_sign_double( device, context, queue, n_elems); } diff --git a/test_conformance/commonfns/test_step.cpp b/test_conformance/commonfns/test_step.cpp index 0e3cfe07..330083b2 100644 --- a/test_conformance/commonfns/test_step.cpp +++ b/test_conformance/commonfns/test_step.cpp @@ -158,23 +158,20 @@ test_step(cl_device_id device, cl_context context, cl_command_queue queue, int n } err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &step_kernel_code, "test_step" ); - if (err) - return -1; + if (err) return -1; err = create_single_kernel_helper( context, &program[1], &kernel[1], 1, &step2_kernel_code, "test_step2" ); - if (err) - return -1; + if (err) return -1; err = create_single_kernel_helper( context, &program[2], &kernel[2], 1, &step4_kernel_code, "test_step4" ); - if (err) - return -1; - err = create_single_kernel_helper( context, &program[3], &kernel[3], 1, &step8_kernel_code, "test_step8" ); - if (err) - return -1; - err = create_single_kernel_helper( context, &program[4], &kernel[4], 1, &step16_kernel_code, "test_step16" ); - if (err) - return -1; - err = create_single_kernel_helper( context, &program[5], &kernel[5], 1, &step3_kernel_code, "test_step3" ); - if (err) - return -1; + if (err) return -1; + err = create_single_kernel_helper(context, &program[3], &kernel[3], 1, + &step8_kernel_code, "test_step8"); + if (err) return -1; + err = create_single_kernel_helper(context, &program[4], &kernel[4], 1, + &step16_kernel_code, "test_step16"); + if (err) return -1; + err = create_single_kernel_helper(context, &program[5], &kernel[5], 1, + &step3_kernel_code, "test_step3"); + if (err) return -1; values[0] = streams[0]; values[1] = streams[1]; diff --git a/test_conformance/events/test_callbacks.cpp b/test_conformance/events/test_callbacks.cpp index 2ffb9ca7..6025afb7 100644 --- a/test_conformance/events/test_callbacks.cpp +++ b/test_conformance/events/test_callbacks.cpp @@ -110,11 +110,12 @@ int test_callback_event_single( cl_device_id device, cl_context context, cl_comm { usleep( 100000 ); // 1/10th second - int cc=0; - for( int k=0;k< EVENT_CALLBACK_TYPE_TOTAL;k++) - if (sCallbackTriggered_flag[k]) { - cc++; - } + int cc = 0; + for (int k = 0; k < EVENT_CALLBACK_TYPE_TOTAL; k++) + if (sCallbackTriggered_flag[k]) + { + cc++; + } if (cc== EVENT_CALLBACK_TYPE_TOTAL ) { @@ -260,8 +261,8 @@ int test_callbacks_simultaneous( cl_device_id deviceID, cl_context context, cl_c if (actionEvents == NULL) { log_error(" memory error in test_callbacks_simultaneous \n"); - for (size_t i=0;i<(sizeof(actions)/sizeof(actions[0]));++i) - if (actions[i]) delete actions[i]; + for (size_t i = 0; i < (sizeof(actions) / sizeof(actions[0])); ++i) + if (actions[i]) delete actions[i]; return -1; } @@ -317,11 +318,11 @@ int test_callbacks_simultaneous( cl_device_id deviceID, cl_context context, cl_c usleep( 100000 ); // 1/10th second if( ((last_count = sSimultaneousCount)) == total_callbacks ) { - // All of the callbacks were executed - if (actionEvents) delete [] actionEvents; - for (size_t i=0;i<(sizeof(actions)/sizeof(actions[0]));++i) - if (actions[i]) delete actions[i]; - return 0; + // All of the callbacks were executed + if (actionEvents) delete[] actionEvents; + for (size_t i = 0; i < (sizeof(actions) / sizeof(actions[0])); ++i) + if (actions[i]) delete actions[i]; + return 0; } } diff --git a/test_conformance/half/Test_vStoreHalf.cpp b/test_conformance/half/Test_vStoreHalf.cpp index 3ca5920b..b1491025 100644 --- a/test_conformance/half/Test_vStoreHalf.cpp +++ b/test_conformance/half/Test_vStoreHalf.cpp @@ -117,8 +117,7 @@ CheckF(cl_uint jid, cl_uint tid, void *userInfo) return 0; for (j = 0; j < count; j++) { - if (s[j] == r[j]) - continue; + if (s[j] == r[j]) continue; // Pass any NaNs if ((s[j] & 0x7fff) > 0x7c00 && (r[j] & 0x7fff) > 0x7c00 ) @@ -189,8 +188,7 @@ CheckD(cl_uint jid, cl_uint tid, void *userInfo) return 0; for (j = 0; j < count; j++) { - if (s[j] == r[j]) - continue; + if (s[j] == r[j]) continue; // Pass any NaNs if ((s[j] & 0x7fff) > 0x7c00 && (r[j] & 0x7fff) > 0x7c00) diff --git a/test_conformance/images/clReadWriteImage/test_read_1D.cpp b/test_conformance/images/clReadWriteImage/test_read_1D.cpp index eef5bf4e..2a42a70e 100644 --- a/test_conformance/images/clReadWriteImage/test_read_1D.cpp +++ b/test_conformance/images/clReadWriteImage/test_read_1D.cpp @@ -90,14 +90,17 @@ int test_read_image_1D(cl_context context, cl_command_queue queue, region[0] = width_lod; - if ( gDebugTrace ) - if ( gTestMipmaps) { - log_info(" - Working at mipLevel :%llu\n", (unsigned long long)lod); - } - error = clEnqueueWriteImage(queue, image, CL_FALSE, - origin, region, ( gEnablePitch ? row_pitch_lod : 0 ), 0, - (char*)imageValues + imgValMipLevelOffset, 0, NULL, NULL); - if (error != CL_SUCCESS) { + if (gDebugTrace) + if (gTestMipmaps) + { + log_info(" - Working at mipLevel :%llu\n", (unsigned long long)lod); + } + error = clEnqueueWriteImage(queue, image, CL_FALSE, origin, region, + (gEnablePitch ? row_pitch_lod : 0), 0, + (char *)imageValues + imgValMipLevelOffset, 0, + NULL, NULL); + if (error != CL_SUCCESS) + { log_error( "ERROR: Unable to write to 1D image of size %d \n", (int)width_lod ); return -1; } diff --git a/test_conformance/multiple_device_context/test_multiple_devices.cpp b/test_conformance/multiple_device_context/test_multiple_devices.cpp index 59543ade..4f187b9c 100644 --- a/test_conformance/multiple_device_context/test_multiple_devices.cpp +++ b/test_conformance/multiple_device_context/test_multiple_devices.cpp @@ -175,9 +175,8 @@ int test_device_set(size_t deviceCount, size_t queueCount, cl_device_id *devices } /* All done now! */ - if (errors) - return -1; - return 0; + if (errors) return -1; + return 0; } int test_two_devices(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) diff --git a/test_conformance/profiling/execute.cpp b/test_conformance/profiling/execute.cpp index 0541bfa5..44b1bcd4 100644 --- a/test_conformance/profiling/execute.cpp +++ b/test_conformance/profiling/execute.cpp @@ -335,8 +335,8 @@ static int kernelFilter( cl_device_id device, cl_context context, cl_command_que clReleaseMemObject( memobjs[1] ); clReleaseMemObject( memobjs[0] ); - if (check_times(queueStart, submitStart, writeStart, writeEnd, device)) - err = -1; + if (check_times(queueStart, submitStart, writeStart, writeEnd, device)) + err = -1; return err; diff --git a/test_conformance/profiling/writeImage.cpp b/test_conformance/profiling/writeImage.cpp index fbc8fbcd..ec2fbdaf 100644 --- a/test_conformance/profiling/writeImage.cpp +++ b/test_conformance/profiling/writeImage.cpp @@ -628,8 +628,8 @@ int write_image( cl_device_id device, cl_context context, cl_command_queue queue free( dst ); free( inptr ); - if (check_times(queueStart, submitStart, writeStart, writeEnd, device)) - err = -1; + if (check_times(queueStart, submitStart, writeStart, writeEnd, device)) + err = -1; return err; -- cgit v1.2.3 From c44b5b6ae3c1d762d73c5e16eb51e244f9d708a2 Mon Sep 17 00:00:00 2001 From: Matthias Diener Date: Tue, 2 Aug 2022 14:31:24 -0500 Subject: Fix indentation of test_waitlists.cpp (#1459) * fix indentation of test_waitlists.cpp Followup of #1458 * run formatter --- test_conformance/events/test_waitlists.cpp | 171 +++++++++++++++++------------ 1 file changed, 101 insertions(+), 70 deletions(-) diff --git a/test_conformance/events/test_waitlists.cpp b/test_conformance/events/test_waitlists.cpp index e23cacf4..ebf5da9b 100644 --- a/test_conformance/events/test_waitlists.cpp +++ b/test_conformance/events/test_waitlists.cpp @@ -28,10 +28,13 @@ int test_waitlist( cl_device_id device, cl_context context, cl_command_queue que cl_int status[ 3 ]; cl_int error; - if (multiple) - log_info("\tExecuting reference event 0, then reference event 1 with reference event 0 in its waitlist, then test event 2 with reference events 0 and 1 in its waitlist.\n"); - else - log_info("\tExecuting reference event 0, then test event 2 with reference event 0 in its waitlist.\n"); + if (multiple) + log_info("\tExecuting reference event 0, then reference event 1 with " + "reference event 0 in its waitlist, then test event 2 with " + "reference events 0 and 1 in its waitlist.\n"); + else + log_info("\tExecuting reference event 0, then test event 2 with " + "reference event 0 in its waitlist.\n"); // Set up the first base action to wait against error = actions[ 0 ].Setup( device, context, queue ); @@ -49,7 +52,7 @@ int test_waitlist( cl_device_id device, cl_context context, cl_command_queue que test_error( error, "Unable to set up test event" ); // Execute all events now - if (PRINT_OPS) log_info("\tExecuting action 0...\n"); + if (PRINT_OPS) log_info("\tExecuting action 0...\n"); error = actions[ 0 ].Execute( queue, 0, NULL, &events[ 0 ] ); test_error( error, "Unable to execute first event" ); @@ -61,17 +64,20 @@ int test_waitlist( cl_device_id device, cl_context context, cl_command_queue que } // Sanity check - if( multiple ) { - if (PRINT_OPS) log_info("\tChecking status of action 1...\n"); + if (multiple) + { + if (PRINT_OPS) log_info("\tChecking status of action 1...\n"); error = clGetEventInfo( events[ 1 ], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof( status[ 1 ] ), &status[ 1 ], NULL ); - test_error( error, "Unable to get event status" ); - } - if (PRINT_OPS) log_info("\tChecking status of action 0...\n"); + test_error(error, "Unable to get event status"); + } + if (PRINT_OPS) log_info("\tChecking status of action 0...\n"); error = clGetEventInfo( events[ 0 ], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof( status[ 0 ] ), &status[ 0 ], NULL ); - test_error( error, "Unable to get event status" ); + test_error(error, "Unable to get event status"); - log_info("\t\tEvent status after starting reference events: reference event 0: %s, reference event 1: %s, test event 2: %s.\n", - IGetStatusString( status[ 0 ] ), (multiple ? IGetStatusString( status[ 1 ] ) : "N/A"), "N/A"); + log_info("\t\tEvent status after starting reference events: reference " + "event 0: %s, reference event 1: %s, test event 2: %s.\n", + IGetStatusString(status[0]), + (multiple ? IGetStatusString(status[1]) : "N/A"), "N/A"); if( ( status[ 0 ] == CL_COMPLETE ) || ( multiple && status[ 1 ] == CL_COMPLETE ) ) { @@ -79,25 +85,29 @@ int test_waitlist( cl_device_id device, cl_context context, cl_command_queue que return 0; } - if (PRINT_OPS) log_info("\tExecuting action to test...\n"); + if (PRINT_OPS) log_info("\tExecuting action to test...\n"); error = actionToTest->Execute( queue, ( multiple ) ? 2 : 1, &events[ 0 ], &events[ 2 ] ); test_error( error, "Unable to execute test event" ); // Hopefully, the first event is still running - if (PRINT_OPS) log_info("\tChecking status of action to test 2...\n"); + if (PRINT_OPS) log_info("\tChecking status of action to test 2...\n"); error = clGetEventInfo( events[ 2 ], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof( status[ 2 ] ), &status[ 2 ], NULL ); test_error( error, "Unable to get event status" ); - if( multiple ) { - if (PRINT_OPS) log_info("\tChecking status of action 1...\n"); + if (multiple) + { + if (PRINT_OPS) log_info("\tChecking status of action 1...\n"); error = clGetEventInfo( events[ 1 ], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof( status[ 1 ] ), &status[ 1 ], NULL ); - test_error( error, "Unable to get event status" ); - } - if (PRINT_OPS) log_info("\tChecking status of action 0...\n"); + test_error(error, "Unable to get event status"); + } + if (PRINT_OPS) log_info("\tChecking status of action 0...\n"); error = clGetEventInfo( events[ 0 ], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof( status[ 0 ] ), &status[ 0 ], NULL ); - test_error( error, "Unable to get event status" ); + test_error(error, "Unable to get event status"); - log_info("\t\tEvent status after starting test event: reference event 0: %s, reference event 1: %s, test event 2: %s.\n", - IGetStatusString( status[ 0 ] ), (multiple ? IGetStatusString( status[ 1 ] ) : "N/A"), IGetStatusString( status[ 2 ] )); + log_info("\t\tEvent status after starting test event: reference event 0: " + "%s, reference event 1: %s, test event 2: %s.\n", + IGetStatusString(status[0]), + (multiple ? IGetStatusString(status[1]) : "N/A"), + IGetStatusString(status[2])); if( multiple ) { @@ -108,12 +118,15 @@ int test_waitlist( cl_device_id device, cl_context context, cl_command_queue que return 0; } - if(status[1] == CL_COMPLETE && status[0] != CL_COMPLETE) - { - log_error("ERROR: Test failed because the second wait event is complete and the first is not.(status: 0: %s and 1: %s)\n", IGetStatusString( status[ 0 ] ), IGetStatusString( status[ 1 ] ) ); + if (status[1] == CL_COMPLETE && status[0] != CL_COMPLETE) + { + log_error( + "ERROR: Test failed because the second wait event is complete " + "and the first is not.(status: 0: %s and 1: %s)\n", + IGetStatusString(status[0]), IGetStatusString(status[1])); clFinish( queue ); return -1; - } + } } else { @@ -139,25 +152,29 @@ int test_waitlist( cl_device_id device, cl_context context, cl_command_queue que } // Now wait for the first reference event - if (PRINT_OPS) log_info("\tWaiting for action 1 to finish...\n"); + if (PRINT_OPS) log_info("\tWaiting for action 1 to finish...\n"); error = clWaitForEvents( 1, &events[ 0 ] ); test_error( error, "Unable to wait for reference event" ); // Grab statuses again - if (PRINT_OPS) log_info("\tChecking status of action to test 2...\n"); + if (PRINT_OPS) log_info("\tChecking status of action to test 2...\n"); error = clGetEventInfo( events[ 2 ], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof( status[ 2 ] ), &status[ 2 ], NULL ); test_error( error, "Unable to get event status" ); - if( multiple ) { - if (PRINT_OPS) log_info("\tChecking status of action 1...\n"); + if (multiple) + { + if (PRINT_OPS) log_info("\tChecking status of action 1...\n"); error = clGetEventInfo( events[ 1 ], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof( status[ 1 ] ), &status[ 1 ], NULL ); - test_error( error, "Unable to get event status" ); - } - if (PRINT_OPS) log_info("\tChecking status of action 0...\n"); + test_error(error, "Unable to get event status"); + } + if (PRINT_OPS) log_info("\tChecking status of action 0...\n"); error = clGetEventInfo( events[ 0 ], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof( status[ 0 ] ), &status[ 0 ], NULL ); - test_error( error, "Unable to get event status" ); + test_error(error, "Unable to get event status"); - log_info("\t\tEvent status after waiting for reference event 0: reference event 0: %s, reference event 1: %s, test event 2: %s.\n", - IGetStatusString( status[ 0 ] ), (multiple ? IGetStatusString( status[ 1 ] ) : "N/A"), IGetStatusString( status[ 2 ] )); + log_info("\t\tEvent status after waiting for reference event 0: reference " + "event 0: %s, reference event 1: %s, test event 2: %s.\n", + IGetStatusString(status[0]), + (multiple ? IGetStatusString(status[1]) : "N/A"), + IGetStatusString(status[2])); // Sanity if( status[ 0 ] != CL_COMPLETE ) @@ -170,11 +187,12 @@ int test_waitlist( cl_device_id device, cl_context context, cl_command_queue que // If we're multiple, and the second event isn't complete, then our test event should still be queued if( multiple && status[ 1 ] != CL_COMPLETE ) { - if( status[ 1 ] == CL_RUNNING && status[ 2 ] == CL_RUNNING ) { - log_error("ERROR: Test event and second event are both running.\n"); - clFinish( queue ); - return -1; - } + if (status[1] == CL_RUNNING && status[2] == CL_RUNNING) + { + log_error("ERROR: Test event and second event are both running.\n"); + clFinish(queue); + return -1; + } if( status[ 2 ] != CL_QUEUED && status[ 2 ] != CL_SUBMITTED ) { log_error( "ERROR: Test event did not wait for second event before starting! (status of ref: 1: %s, of test: 2: %s)\n", IGetStatusString( status[ 1 ] ), IGetStatusString( status[ 2 ] ) ); @@ -183,25 +201,33 @@ int test_waitlist( cl_device_id device, cl_context context, cl_command_queue que } // Now wait for second event to complete, too - if (PRINT_OPS) log_info("\tWaiting for action 1 to finish...\n"); + if (PRINT_OPS) log_info("\tWaiting for action 1 to finish...\n"); error = clWaitForEvents( 1, &events[ 1 ] ); test_error( error, "Unable to wait for second reference event" ); // Grab statuses again - if (PRINT_OPS) log_info("\tChecking status of action to test 2...\n"); - error = clGetEventInfo( events[ 2 ], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof( status[ 2 ] ), &status[ 2 ], NULL ); - test_error( error, "Unable to get event status" ); - if( multiple ) { - if (PRINT_OPS) log_info("\tChecking status of action 1...\n"); - error = clGetEventInfo( events[ 1 ], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof( status[ 1 ] ), &status[ 1 ], NULL ); - test_error( error, "Unable to get event status" ); - } - if (PRINT_OPS) log_info("\tChecking status of action 0...\n"); - error = clGetEventInfo( events[ 0 ], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof( status[ 0 ] ), &status[ 0 ], NULL ); - test_error( error, "Unable to get event status" ); - - log_info("\t\tEvent status after waiting for reference event 1: reference event 0: %s, reference event 1: %s, test event 2: %s.\n", - IGetStatusString( status[ 0 ] ), (multiple ? IGetStatusString( status[ 1 ] ) : "N/A"), IGetStatusString( status[ 2 ] )); + if (PRINT_OPS) log_info("\tChecking status of action to test 2...\n"); + error = clGetEventInfo(events[2], CL_EVENT_COMMAND_EXECUTION_STATUS, + sizeof(status[2]), &status[2], NULL); + test_error(error, "Unable to get event status"); + if (multiple) + { + if (PRINT_OPS) log_info("\tChecking status of action 1...\n"); + error = clGetEventInfo(events[1], CL_EVENT_COMMAND_EXECUTION_STATUS, + sizeof(status[1]), &status[1], NULL); + test_error(error, "Unable to get event status"); + } + if (PRINT_OPS) log_info("\tChecking status of action 0...\n"); + error = clGetEventInfo(events[0], CL_EVENT_COMMAND_EXECUTION_STATUS, + sizeof(status[0]), &status[0], NULL); + test_error(error, "Unable to get event status"); + + log_info( + "\t\tEvent status after waiting for reference event 1: reference " + "event 0: %s, reference event 1: %s, test event 2: %s.\n", + IGetStatusString(status[0]), + (multiple ? IGetStatusString(status[1]) : "N/A"), + IGetStatusString(status[2])); // Sanity if( status[ 1 ] != CL_COMPLETE ) @@ -227,25 +253,30 @@ int test_waitlist( cl_device_id device, cl_context context, cl_command_queue que } // Wait for the test event, then return - if (PRINT_OPS) log_info("\tWaiting for action 2 to test to finish...\n"); + if (PRINT_OPS) log_info("\tWaiting for action 2 to test to finish...\n"); error = clWaitForEvents( 1, &events[ 2 ] ); test_error( error, "Unable to wait for test event" ); - error |= clGetEventInfo( events[ 2 ], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof( status[ 2 ] ), &status[ 2 ], NULL ); - test_error( error, "Unable to get event status" ); + error |= clGetEventInfo(events[2], CL_EVENT_COMMAND_EXECUTION_STATUS, + sizeof(status[2]), &status[2], NULL); + test_error(error, "Unable to get event status"); - log_info("\t\tEvent status after waiting for test event: reference event 0: %s, reference event 1: %s, test event 2: %s.\n", - IGetStatusString( status[ 0 ] ), (multiple ? IGetStatusString( status[ 1 ] ) : "N/A"), IGetStatusString( status[ 2 ] )); + log_info("\t\tEvent status after waiting for test event: reference event " + "0: %s, reference event 1: %s, test event 2: %s.\n", + IGetStatusString(status[0]), + (multiple ? IGetStatusString(status[1]) : "N/A"), + IGetStatusString(status[2])); - // Sanity - if( status[ 2 ] != CL_COMPLETE ) - { - log_error( "ERROR: Test event didn't complete (status: 2: %s)\n", IGetStatusString( status[ 2 ] ) ); - clFinish( queue ); - return -1; - } + // Sanity + if (status[2] != CL_COMPLETE) + { + log_error("ERROR: Test event didn't complete (status: 2: %s)\n", + IGetStatusString(status[2])); + clFinish(queue); + return -1; + } - clFinish(queue); + clFinish(queue); return 0; } -- cgit v1.2.3 From c12bff46c605b9326908c9aaf4e50a5e6e81d166 Mon Sep 17 00:00:00 2001 From: Sven van Haastregt Date: Thu, 4 Aug 2022 15:03:52 +0100 Subject: Tidy up BuildKernelInfo (#1461) Remove the `offset` field from both structures, because it was always set to the global `gMinVectorSizeIndex`. Improve documentation and rename some variables: - `i` becomes `vectorSize`; - `kernel_count` becomes `threadCount`. Original patch by Marco Antognini. Signed-off-by: Marco Antognini Signed-off-by: Sven van Haastregt --- test_conformance/math_brute_force/binary_double.cpp | 15 +++++++-------- test_conformance/math_brute_force/binary_float.cpp | 15 +++++++-------- test_conformance/math_brute_force/binary_i_double.cpp | 15 +++++++-------- test_conformance/math_brute_force/binary_i_float.cpp | 15 +++++++-------- .../math_brute_force/binary_operator_double.cpp | 15 +++++++-------- .../math_brute_force/binary_operator_float.cpp | 15 +++++++-------- .../math_brute_force/binary_two_results_i_double.cpp | 11 +++++------ .../math_brute_force/binary_two_results_i_float.cpp | 11 +++++------ test_conformance/math_brute_force/common.h | 13 ++++++++++--- test_conformance/math_brute_force/i_unary_double.cpp | 11 +++++------ test_conformance/math_brute_force/i_unary_float.cpp | 11 +++++------ test_conformance/math_brute_force/macro_binary_double.cpp | 15 +++++++-------- test_conformance/math_brute_force/macro_binary_float.cpp | 15 +++++++-------- test_conformance/math_brute_force/macro_unary_double.cpp | 15 +++++++-------- test_conformance/math_brute_force/macro_unary_float.cpp | 15 +++++++-------- test_conformance/math_brute_force/mad_double.cpp | 11 +++++------ test_conformance/math_brute_force/mad_float.cpp | 11 +++++------ test_conformance/math_brute_force/ternary_double.cpp | 11 +++++------ test_conformance/math_brute_force/ternary_float.cpp | 11 +++++------ test_conformance/math_brute_force/unary_double.cpp | 15 +++++++-------- test_conformance/math_brute_force/unary_float.cpp | 15 +++++++-------- .../math_brute_force/unary_two_results_double.cpp | 11 +++++------ .../math_brute_force/unary_two_results_float.cpp | 11 +++++------ .../math_brute_force/unary_two_results_i_double.cpp | 11 +++++------ .../math_brute_force/unary_two_results_i_float.cpp | 11 +++++------ test_conformance/math_brute_force/unary_u_double.cpp | 11 +++++------ test_conformance/math_brute_force/unary_u_float.cpp | 11 +++++------ 27 files changed, 164 insertions(+), 183 deletions(-) diff --git a/test_conformance/math_brute_force/binary_double.cpp b/test_conformance/math_brute_force/binary_double.cpp index 3eb7dccc..034b325a 100644 --- a/test_conformance/math_brute_force/binary_double.cpp +++ b/test_conformance/math_brute_force/binary_double.cpp @@ -115,10 +115,10 @@ int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count, cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) { BuildKernelInfo *info = (BuildKernelInfo *)p; - cl_uint i = info->offset + job_id; - return BuildKernel(info->nameInCode, i, info->kernel_count, - info->kernels[i].data(), &(info->programs[i]), - info->relaxedMode); + cl_uint vectorSize = gMinVectorSizeIndex + job_id; + return BuildKernel(info->nameInCode, vectorSize, info->threadCount, + info->kernels[vectorSize].data(), + &(info->programs[vectorSize]), info->relaxedMode); } // Thread specific data for a worker thread @@ -741,10 +741,9 @@ int TestFunc_Double_Double_Double(const Func *f, MTdata d, bool relaxedMode) // Init the kernels { - BuildKernelInfo build_info = { - gMinVectorSizeIndex, test_info.threadCount, test_info.k, - test_info.programs, f->nameInCode, relaxedMode - }; + BuildKernelInfo build_info{ test_info.threadCount, test_info.k, + test_info.programs, f->nameInCode, + relaxedMode }; if ((error = ThreadPool_Do(BuildKernelFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info))) diff --git a/test_conformance/math_brute_force/binary_float.cpp b/test_conformance/math_brute_force/binary_float.cpp index db4604a3..7abaa0e4 100644 --- a/test_conformance/math_brute_force/binary_float.cpp +++ b/test_conformance/math_brute_force/binary_float.cpp @@ -113,10 +113,10 @@ int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count, cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) { BuildKernelInfo *info = (BuildKernelInfo *)p; - cl_uint i = info->offset + job_id; - return BuildKernel(info->nameInCode, i, info->kernel_count, - info->kernels[i].data(), &(info->programs[i]), - info->relaxedMode); + cl_uint vectorSize = gMinVectorSizeIndex + job_id; + return BuildKernel(info->nameInCode, vectorSize, info->threadCount, + info->kernels[vectorSize].data(), + &(info->programs[vectorSize]), info->relaxedMode); } // Thread specific data for a worker thread @@ -898,10 +898,9 @@ int TestFunc_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode) // Init the kernels { - BuildKernelInfo build_info = { - gMinVectorSizeIndex, test_info.threadCount, test_info.k, - test_info.programs, f->nameInCode, relaxedMode - }; + BuildKernelInfo build_info{ test_info.threadCount, test_info.k, + test_info.programs, f->nameInCode, + relaxedMode }; if ((error = ThreadPool_Do(BuildKernelFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info))) diff --git a/test_conformance/math_brute_force/binary_i_double.cpp b/test_conformance/math_brute_force/binary_i_double.cpp index 37e27ac0..bba93617 100644 --- a/test_conformance/math_brute_force/binary_i_double.cpp +++ b/test_conformance/math_brute_force/binary_i_double.cpp @@ -114,10 +114,10 @@ int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count, cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) { BuildKernelInfo *info = (BuildKernelInfo *)p; - cl_uint i = info->offset + job_id; - return BuildKernel(info->nameInCode, i, info->kernel_count, - info->kernels[i].data(), &(info->programs[i]), - info->relaxedMode); + cl_uint vectorSize = gMinVectorSizeIndex + job_id; + return BuildKernel(info->nameInCode, vectorSize, info->threadCount, + info->kernels[vectorSize].data(), + &(info->programs[vectorSize]), info->relaxedMode); } // Thread specific data for a worker thread @@ -663,10 +663,9 @@ int TestFunc_Double_Double_Int(const Func *f, MTdata d, bool relaxedMode) // Init the kernels { - BuildKernelInfo build_info = { - gMinVectorSizeIndex, test_info.threadCount, test_info.k, - test_info.programs, f->nameInCode, relaxedMode - }; + BuildKernelInfo build_info{ test_info.threadCount, test_info.k, + test_info.programs, f->nameInCode, + relaxedMode }; if ((error = ThreadPool_Do(BuildKernelFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info))) diff --git a/test_conformance/math_brute_force/binary_i_float.cpp b/test_conformance/math_brute_force/binary_i_float.cpp index 539e10d0..4821830c 100644 --- a/test_conformance/math_brute_force/binary_i_float.cpp +++ b/test_conformance/math_brute_force/binary_i_float.cpp @@ -112,10 +112,10 @@ int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count, cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) { BuildKernelInfo *info = (BuildKernelInfo *)p; - cl_uint i = info->offset + job_id; - return BuildKernel(info->nameInCode, i, info->kernel_count, - info->kernels[i].data(), &(info->programs[i]), - info->relaxedMode); + cl_uint vectorSize = gMinVectorSizeIndex + job_id; + return BuildKernel(info->nameInCode, vectorSize, info->threadCount, + info->kernels[vectorSize].data(), + &(info->programs[vectorSize]), info->relaxedMode); } // Thread specific data for a worker thread @@ -656,10 +656,9 @@ int TestFunc_Float_Float_Int(const Func *f, MTdata d, bool relaxedMode) // Init the kernels { - BuildKernelInfo build_info = { - gMinVectorSizeIndex, test_info.threadCount, test_info.k, - test_info.programs, f->nameInCode, relaxedMode - }; + BuildKernelInfo build_info{ test_info.threadCount, test_info.k, + test_info.programs, f->nameInCode, + relaxedMode }; if ((error = ThreadPool_Do(BuildKernelFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info))) diff --git a/test_conformance/math_brute_force/binary_operator_double.cpp b/test_conformance/math_brute_force/binary_operator_double.cpp index 7c0766be..09c560e9 100644 --- a/test_conformance/math_brute_force/binary_operator_double.cpp +++ b/test_conformance/math_brute_force/binary_operator_double.cpp @@ -114,10 +114,10 @@ int BuildKernel(const char *operator_symbol, int vectorSize, cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) { BuildKernelInfo *info = (BuildKernelInfo *)p; - cl_uint i = info->offset + job_id; - return BuildKernel(info->nameInCode, i, info->kernel_count, - info->kernels[i].data(), &(info->programs[i]), - info->relaxedMode); + cl_uint vectorSize = gMinVectorSizeIndex + job_id; + return BuildKernel(info->nameInCode, vectorSize, info->threadCount, + info->kernels[vectorSize].data(), + &(info->programs[vectorSize]), info->relaxedMode); } // Thread specific data for a worker thread @@ -708,10 +708,9 @@ int TestFunc_Double_Double_Double_Operator(const Func *f, MTdata d, // Init the kernels { - BuildKernelInfo build_info = { - gMinVectorSizeIndex, test_info.threadCount, test_info.k, - test_info.programs, f->nameInCode, relaxedMode - }; + BuildKernelInfo build_info{ test_info.threadCount, test_info.k, + test_info.programs, f->nameInCode, + relaxedMode }; if ((error = ThreadPool_Do(BuildKernelFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info))) diff --git a/test_conformance/math_brute_force/binary_operator_float.cpp b/test_conformance/math_brute_force/binary_operator_float.cpp index fe2db19e..f2e57bc1 100644 --- a/test_conformance/math_brute_force/binary_operator_float.cpp +++ b/test_conformance/math_brute_force/binary_operator_float.cpp @@ -112,10 +112,10 @@ int BuildKernel(const char *operator_symbol, int vectorSize, cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) { BuildKernelInfo *info = (BuildKernelInfo *)p; - cl_uint i = info->offset + job_id; - return BuildKernel(info->nameInCode, i, info->kernel_count, - info->kernels[i].data(), &(info->programs[i]), - info->relaxedMode); + cl_uint vectorSize = gMinVectorSizeIndex + job_id; + return BuildKernel(info->nameInCode, vectorSize, info->threadCount, + info->kernels[vectorSize].data(), + &(info->programs[vectorSize]), info->relaxedMode); } // Thread specific data for a worker thread @@ -835,10 +835,9 @@ int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d, // Init the kernels { - BuildKernelInfo build_info = { - gMinVectorSizeIndex, test_info.threadCount, test_info.k, - test_info.programs, f->nameInCode, relaxedMode - }; + BuildKernelInfo build_info{ test_info.threadCount, test_info.k, + test_info.programs, f->nameInCode, + relaxedMode }; if ((error = ThreadPool_Do(BuildKernelFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info))) diff --git a/test_conformance/math_brute_force/binary_two_results_i_double.cpp b/test_conformance/math_brute_force/binary_two_results_i_double.cpp index 9c98ebb7..59a5bfe2 100644 --- a/test_conformance/math_brute_force/binary_two_results_i_double.cpp +++ b/test_conformance/math_brute_force/binary_two_results_i_double.cpp @@ -120,7 +120,6 @@ int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p, struct BuildKernelInfo2 { - cl_uint offset; // the first vector size to build cl_kernel *kernels; Programs &programs; const char *nameInCode; @@ -130,9 +129,9 @@ struct BuildKernelInfo2 cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) { BuildKernelInfo2 *info = (BuildKernelInfo2 *)p; - cl_uint i = info->offset + job_id; - return BuildKernel(info->nameInCode, i, info->kernels + i, - &(info->programs[i]), info->relaxedMode); + cl_uint vectorSize = gMinVectorSizeIndex + job_id; + return BuildKernel(info->nameInCode, vectorSize, info->kernels + vectorSize, + &(info->programs[vectorSize]), info->relaxedMode); } struct ComputeReferenceInfoD @@ -192,8 +191,8 @@ int TestFunc_DoubleI_Double_Double(const Func *f, MTdata d, bool relaxedMode) // Init the kernels { - BuildKernelInfo2 build_info = { gMinVectorSizeIndex, kernels, programs, - f->nameInCode, relaxedMode }; + BuildKernelInfo2 build_info{ kernels, programs, f->nameInCode, + relaxedMode }; if ((error = ThreadPool_Do(BuildKernelFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info))) diff --git a/test_conformance/math_brute_force/binary_two_results_i_float.cpp b/test_conformance/math_brute_force/binary_two_results_i_float.cpp index 354148ea..6c1dd3bc 100644 --- a/test_conformance/math_brute_force/binary_two_results_i_float.cpp +++ b/test_conformance/math_brute_force/binary_two_results_i_float.cpp @@ -118,7 +118,6 @@ int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p, struct BuildKernelInfo2 { - cl_uint offset; // the first vector size to build cl_kernel *kernels; Programs &programs; const char *nameInCode; @@ -128,9 +127,9 @@ struct BuildKernelInfo2 cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) { BuildKernelInfo2 *info = (BuildKernelInfo2 *)p; - cl_uint i = info->offset + job_id; - return BuildKernel(info->nameInCode, i, info->kernels + i, - &(info->programs[i]), info->relaxedMode); + cl_uint vectorSize = gMinVectorSizeIndex + job_id; + return BuildKernel(info->nameInCode, vectorSize, info->kernels + vectorSize, + &(info->programs[vectorSize]), info->relaxedMode); } struct ComputeReferenceInfoF @@ -193,8 +192,8 @@ int TestFunc_FloatI_Float_Float(const Func *f, MTdata d, bool relaxedMode) // Init the kernels { - BuildKernelInfo2 build_info = { gMinVectorSizeIndex, kernels, programs, - f->nameInCode, relaxedMode }; + BuildKernelInfo2 build_info{ kernels, programs, f->nameInCode, + relaxedMode }; if ((error = ThreadPool_Do(BuildKernelFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info))) diff --git a/test_conformance/math_brute_force/common.h b/test_conformance/math_brute_force/common.h index 7c296952..f0d18dd9 100644 --- a/test_conformance/math_brute_force/common.h +++ b/test_conformance/math_brute_force/common.h @@ -28,14 +28,21 @@ using KernelMatrix = std::array, VECTOR_SIZE_COUNT>; // Array of programs for each vector size. using Programs = std::array; +// Information to generate OpenCL kernels. struct BuildKernelInfo { - cl_uint offset; // the first vector size to build - cl_uint kernel_count; + // Number of kernels to build, one for each thread to avoid data races. + cl_uint threadCount; + KernelMatrix &kernels; + Programs &programs; + + // Function, macro or symbol tested by the kernel. const char *nameInCode; - bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. + + // Whether to build with -cl-fast-relaxed-math. + bool relaxedMode; }; #endif /* COMMON_H */ diff --git a/test_conformance/math_brute_force/i_unary_double.cpp b/test_conformance/math_brute_force/i_unary_double.cpp index f52a1292..a05737da 100644 --- a/test_conformance/math_brute_force/i_unary_double.cpp +++ b/test_conformance/math_brute_force/i_unary_double.cpp @@ -105,7 +105,6 @@ int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p, struct BuildKernelInfo2 { - cl_uint offset; // the first vector size to build cl_kernel *kernels; Programs &programs; const char *nameInCode; @@ -115,9 +114,9 @@ struct BuildKernelInfo2 cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) { BuildKernelInfo2 *info = (BuildKernelInfo2 *)p; - cl_uint i = info->offset + job_id; - return BuildKernel(info->nameInCode, i, info->kernels + i, - &(info->programs[i]), info->relaxedMode); + cl_uint vectorSize = gMinVectorSizeIndex + job_id; + return BuildKernel(info->nameInCode, vectorSize, info->kernels + vectorSize, + &(info->programs[vectorSize]), info->relaxedMode); } } // anonymous namespace @@ -143,8 +142,8 @@ int TestFunc_Int_Double(const Func *f, MTdata d, bool relaxedMode) // Init the kernels { - BuildKernelInfo2 build_info = { gMinVectorSizeIndex, kernels, programs, - f->nameInCode, relaxedMode }; + BuildKernelInfo2 build_info{ kernels, programs, f->nameInCode, + relaxedMode }; if ((error = ThreadPool_Do(BuildKernelFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info))) diff --git a/test_conformance/math_brute_force/i_unary_float.cpp b/test_conformance/math_brute_force/i_unary_float.cpp index 633584a7..13442e61 100644 --- a/test_conformance/math_brute_force/i_unary_float.cpp +++ b/test_conformance/math_brute_force/i_unary_float.cpp @@ -103,7 +103,6 @@ int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p, struct BuildKernelInfo2 { - cl_uint offset; // the first vector size to build cl_kernel *kernels; Programs &programs; const char *nameInCode; @@ -113,9 +112,9 @@ struct BuildKernelInfo2 cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) { BuildKernelInfo2 *info = (BuildKernelInfo2 *)p; - cl_uint i = info->offset + job_id; - return BuildKernel(info->nameInCode, i, info->kernels + i, - &(info->programs[i]), info->relaxedMode); + cl_uint vectorSize = gMinVectorSizeIndex + job_id; + return BuildKernel(info->nameInCode, vectorSize, info->kernels + vectorSize, + &(info->programs[vectorSize]), info->relaxedMode); } } // anonymous namespace @@ -140,8 +139,8 @@ int TestFunc_Int_Float(const Func *f, MTdata d, bool relaxedMode) // Init the kernels { - BuildKernelInfo2 build_info = { gMinVectorSizeIndex, kernels, programs, - f->nameInCode, relaxedMode }; + BuildKernelInfo2 build_info{ kernels, programs, f->nameInCode, + relaxedMode }; if ((error = ThreadPool_Do(BuildKernelFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info))) diff --git a/test_conformance/math_brute_force/macro_binary_double.cpp b/test_conformance/math_brute_force/macro_binary_double.cpp index 624eaebb..88b0f86c 100644 --- a/test_conformance/math_brute_force/macro_binary_double.cpp +++ b/test_conformance/math_brute_force/macro_binary_double.cpp @@ -113,10 +113,10 @@ int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count, cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) { BuildKernelInfo *info = (BuildKernelInfo *)p; - cl_uint i = info->offset + job_id; - return BuildKernel(info->nameInCode, i, info->kernel_count, - info->kernels[i].data(), &(info->programs[i]), - info->relaxedMode); + cl_uint vectorSize = gMinVectorSizeIndex + job_id; + return BuildKernel(info->nameInCode, vectorSize, info->threadCount, + info->kernels[vectorSize].data(), + &(info->programs[vectorSize]), info->relaxedMode); } // Thread specific data for a worker thread @@ -666,10 +666,9 @@ int TestMacro_Int_Double_Double(const Func *f, MTdata d, bool relaxedMode) // Init the kernels { - BuildKernelInfo build_info = { - gMinVectorSizeIndex, test_info.threadCount, test_info.k, - test_info.programs, f->nameInCode, relaxedMode - }; + BuildKernelInfo build_info{ test_info.threadCount, test_info.k, + test_info.programs, f->nameInCode, + relaxedMode }; if ((error = ThreadPool_Do(BuildKernelFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info))) diff --git a/test_conformance/math_brute_force/macro_binary_float.cpp b/test_conformance/math_brute_force/macro_binary_float.cpp index 04f759cf..6199dd1a 100644 --- a/test_conformance/math_brute_force/macro_binary_float.cpp +++ b/test_conformance/math_brute_force/macro_binary_float.cpp @@ -111,10 +111,10 @@ int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count, cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) { BuildKernelInfo *info = (BuildKernelInfo *)p; - cl_uint i = info->offset + job_id; - return BuildKernel(info->nameInCode, i, info->kernel_count, - info->kernels[i].data(), &(info->programs[i]), - info->relaxedMode); + cl_uint vectorSize = gMinVectorSizeIndex + job_id; + return BuildKernel(info->nameInCode, vectorSize, info->threadCount, + info->kernels[vectorSize].data(), + &(info->programs[vectorSize]), info->relaxedMode); } // Thread specific data for a worker thread @@ -655,10 +655,9 @@ int TestMacro_Int_Float_Float(const Func *f, MTdata d, bool relaxedMode) // Init the kernels { - BuildKernelInfo build_info = { - gMinVectorSizeIndex, test_info.threadCount, test_info.k, - test_info.programs, f->nameInCode, relaxedMode - }; + BuildKernelInfo build_info{ test_info.threadCount, test_info.k, + test_info.programs, f->nameInCode, + relaxedMode }; if ((error = ThreadPool_Do(BuildKernelFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info))) diff --git a/test_conformance/math_brute_force/macro_unary_double.cpp b/test_conformance/math_brute_force/macro_unary_double.cpp index d0786d1b..b7fb8a96 100644 --- a/test_conformance/math_brute_force/macro_unary_double.cpp +++ b/test_conformance/math_brute_force/macro_unary_double.cpp @@ -107,10 +107,10 @@ int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count, cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) { BuildKernelInfo *info = (BuildKernelInfo *)p; - cl_uint i = info->offset + job_id; - return BuildKernel(info->nameInCode, i, info->kernel_count, - info->kernels[i].data(), &(info->programs[i]), - info->relaxedMode); + cl_uint vectorSize = gMinVectorSizeIndex + job_id; + return BuildKernel(info->nameInCode, vectorSize, info->threadCount, + info->kernels[vectorSize].data(), + &(info->programs[vectorSize]), info->relaxedMode); } // Thread specific data for a worker thread @@ -439,10 +439,9 @@ int TestMacro_Int_Double(const Func *f, MTdata d, bool relaxedMode) // Init the kernels { - BuildKernelInfo build_info = { - gMinVectorSizeIndex, test_info.threadCount, test_info.k, - test_info.programs, f->nameInCode, relaxedMode - }; + BuildKernelInfo build_info{ test_info.threadCount, test_info.k, + test_info.programs, f->nameInCode, + relaxedMode }; if ((error = ThreadPool_Do(BuildKernelFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info))) diff --git a/test_conformance/math_brute_force/macro_unary_float.cpp b/test_conformance/math_brute_force/macro_unary_float.cpp index b03a6003..e4c22369 100644 --- a/test_conformance/math_brute_force/macro_unary_float.cpp +++ b/test_conformance/math_brute_force/macro_unary_float.cpp @@ -106,10 +106,10 @@ int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count, cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) { BuildKernelInfo *info = (BuildKernelInfo *)p; - cl_uint i = info->offset + job_id; - return BuildKernel(info->nameInCode, i, info->kernel_count, - info->kernels[i].data(), &(info->programs[i]), - info->relaxedMode); + cl_uint vectorSize = gMinVectorSizeIndex + job_id; + return BuildKernel(info->nameInCode, vectorSize, info->threadCount, + info->kernels[vectorSize].data(), + &(info->programs[vectorSize]), info->relaxedMode); } // Thread specific data for a worker thread @@ -453,10 +453,9 @@ int TestMacro_Int_Float(const Func *f, MTdata d, bool relaxedMode) // Init the kernels { - BuildKernelInfo build_info = { - gMinVectorSizeIndex, test_info.threadCount, test_info.k, - test_info.programs, f->nameInCode, relaxedMode - }; + BuildKernelInfo build_info{ test_info.threadCount, test_info.k, + test_info.programs, f->nameInCode, + relaxedMode }; if ((error = ThreadPool_Do(BuildKernelFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info))) diff --git a/test_conformance/math_brute_force/mad_double.cpp b/test_conformance/math_brute_force/mad_double.cpp index e5ab68f6..3def6a80 100644 --- a/test_conformance/math_brute_force/mad_double.cpp +++ b/test_conformance/math_brute_force/mad_double.cpp @@ -118,7 +118,6 @@ int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p, struct BuildKernelInfo2 { - cl_uint offset; // the first vector size to build cl_kernel *kernels; Programs &programs; const char *nameInCode; @@ -128,9 +127,9 @@ struct BuildKernelInfo2 cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) { BuildKernelInfo2 *info = (BuildKernelInfo2 *)p; - cl_uint i = info->offset + job_id; - return BuildKernel(info->nameInCode, i, info->kernels + i, - &(info->programs[i]), info->relaxedMode); + cl_uint vectorSize = gMinVectorSizeIndex + job_id; + return BuildKernel(info->nameInCode, vectorSize, info->kernels + vectorSize, + &(info->programs[vectorSize]), info->relaxedMode); } } // anonymous namespace @@ -150,8 +149,8 @@ int TestFunc_mad_Double(const Func *f, MTdata d, bool relaxedMode) // Init the kernels { - BuildKernelInfo2 build_info = { gMinVectorSizeIndex, kernels, programs, - f->nameInCode, relaxedMode }; + BuildKernelInfo2 build_info{ kernels, programs, f->nameInCode, + relaxedMode }; if ((error = ThreadPool_Do(BuildKernelFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info))) diff --git a/test_conformance/math_brute_force/mad_float.cpp b/test_conformance/math_brute_force/mad_float.cpp index 6760ce99..498f25eb 100644 --- a/test_conformance/math_brute_force/mad_float.cpp +++ b/test_conformance/math_brute_force/mad_float.cpp @@ -116,7 +116,6 @@ int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p, struct BuildKernelInfo2 { - cl_uint offset; // the first vector size to build cl_kernel *kernels; Programs &programs; const char *nameInCode; @@ -126,9 +125,9 @@ struct BuildKernelInfo2 cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) { BuildKernelInfo2 *info = (BuildKernelInfo2 *)p; - cl_uint i = info->offset + job_id; - return BuildKernel(info->nameInCode, i, info->kernels + i, - &(info->programs[i]), info->relaxedMode); + cl_uint vectorSize = gMinVectorSizeIndex + job_id; + return BuildKernel(info->nameInCode, vectorSize, info->kernels + vectorSize, + &(info->programs[vectorSize]), info->relaxedMode); } } // anonymous namespace @@ -149,8 +148,8 @@ int TestFunc_mad_Float(const Func *f, MTdata d, bool relaxedMode) // Init the kernels { - BuildKernelInfo2 build_info = { gMinVectorSizeIndex, kernels, programs, - f->nameInCode, relaxedMode }; + BuildKernelInfo2 build_info{ kernels, programs, f->nameInCode, + relaxedMode }; if ((error = ThreadPool_Do(BuildKernelFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info))) diff --git a/test_conformance/math_brute_force/ternary_double.cpp b/test_conformance/math_brute_force/ternary_double.cpp index 0639b27a..94fbe268 100644 --- a/test_conformance/math_brute_force/ternary_double.cpp +++ b/test_conformance/math_brute_force/ternary_double.cpp @@ -121,7 +121,6 @@ int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p, struct BuildKernelInfo2 { - cl_uint offset; // the first vector size to build cl_kernel *kernels; Programs &programs; const char *nameInCode; @@ -131,9 +130,9 @@ struct BuildKernelInfo2 cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) { BuildKernelInfo2 *info = (BuildKernelInfo2 *)p; - cl_uint i = info->offset + job_id; - return BuildKernel(info->nameInCode, i, info->kernels + i, - &(info->programs[i]), info->relaxedMode); + cl_uint vectorSize = gMinVectorSizeIndex + job_id; + return BuildKernel(info->nameInCode, vectorSize, info->kernels + vectorSize, + &(info->programs[vectorSize]), info->relaxedMode); } // A table of more difficult cases to get right @@ -229,8 +228,8 @@ int TestFunc_Double_Double_Double_Double(const Func *f, MTdata d, // Init the kernels { - BuildKernelInfo2 build_info = { gMinVectorSizeIndex, kernels, programs, - f->nameInCode, relaxedMode }; + BuildKernelInfo2 build_info{ kernels, programs, f->nameInCode, + relaxedMode }; if ((error = ThreadPool_Do(BuildKernelFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info))) diff --git a/test_conformance/math_brute_force/ternary_float.cpp b/test_conformance/math_brute_force/ternary_float.cpp index 6f19ef7a..762c57de 100644 --- a/test_conformance/math_brute_force/ternary_float.cpp +++ b/test_conformance/math_brute_force/ternary_float.cpp @@ -119,7 +119,6 @@ int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p, struct BuildKernelInfo2 { - cl_uint offset; // the first vector size to build cl_kernel *kernels; Programs &programs; const char *nameInCode; @@ -129,9 +128,9 @@ struct BuildKernelInfo2 cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) { BuildKernelInfo2 *info = (BuildKernelInfo2 *)p; - cl_uint i = info->offset + job_id; - return BuildKernel(info->nameInCode, i, info->kernels + i, - &(info->programs[i]), info->relaxedMode); + cl_uint vectorSize = gMinVectorSizeIndex + job_id; + return BuildKernel(info->nameInCode, vectorSize, info->kernels + vectorSize, + &(info->programs[vectorSize]), info->relaxedMode); } // A table of more difficult cases to get right @@ -245,8 +244,8 @@ int TestFunc_Float_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode) // Init the kernels { - BuildKernelInfo2 build_info = { gMinVectorSizeIndex, kernels, programs, - f->nameInCode, relaxedMode }; + BuildKernelInfo2 build_info{ kernels, programs, f->nameInCode, + relaxedMode }; if ((error = ThreadPool_Do(BuildKernelFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info))) diff --git a/test_conformance/math_brute_force/unary_double.cpp b/test_conformance/math_brute_force/unary_double.cpp index 2043e5a0..76bcfd46 100644 --- a/test_conformance/math_brute_force/unary_double.cpp +++ b/test_conformance/math_brute_force/unary_double.cpp @@ -107,10 +107,10 @@ int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count, cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) { BuildKernelInfo *info = (BuildKernelInfo *)p; - cl_uint i = info->offset + job_id; - return BuildKernel(info->nameInCode, i, info->kernel_count, - info->kernels[i].data(), &(info->programs[i]), - info->relaxedMode); + cl_uint vectorSize = gMinVectorSizeIndex + job_id; + return BuildKernel(info->nameInCode, vectorSize, info->threadCount, + info->kernels[vectorSize].data(), + &(info->programs[vectorSize]), info->relaxedMode); } // Thread specific data for a worker thread @@ -465,10 +465,9 @@ int TestFunc_Double_Double(const Func *f, MTdata d, bool relaxedMode) // Init the kernels { - BuildKernelInfo build_info = { - gMinVectorSizeIndex, test_info.threadCount, test_info.k, - test_info.programs, f->nameInCode, relaxedMode - }; + BuildKernelInfo build_info{ test_info.threadCount, test_info.k, + test_info.programs, f->nameInCode, + relaxedMode }; if ((error = ThreadPool_Do(BuildKernelFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info))) diff --git a/test_conformance/math_brute_force/unary_float.cpp b/test_conformance/math_brute_force/unary_float.cpp index b3b8056b..d310054d 100644 --- a/test_conformance/math_brute_force/unary_float.cpp +++ b/test_conformance/math_brute_force/unary_float.cpp @@ -105,10 +105,10 @@ int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count, cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) { BuildKernelInfo *info = (BuildKernelInfo *)p; - cl_uint i = info->offset + job_id; - return BuildKernel(info->nameInCode, i, info->kernel_count, - info->kernels[i].data(), &(info->programs[i]), - info->relaxedMode); + cl_uint vectorSize = gMinVectorSizeIndex + job_id; + return BuildKernel(info->nameInCode, vectorSize, info->threadCount, + info->kernels[vectorSize].data(), + &(info->programs[vectorSize]), info->relaxedMode); } // Thread specific data for a worker thread @@ -636,10 +636,9 @@ int TestFunc_Float_Float(const Func *f, MTdata d, bool relaxedMode) // Init the kernels { - BuildKernelInfo build_info = { - gMinVectorSizeIndex, test_info.threadCount, test_info.k, - test_info.programs, f->nameInCode, relaxedMode - }; + BuildKernelInfo build_info{ test_info.threadCount, test_info.k, + test_info.programs, f->nameInCode, + relaxedMode }; if ((error = ThreadPool_Do(BuildKernelFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info))) diff --git a/test_conformance/math_brute_force/unary_two_results_double.cpp b/test_conformance/math_brute_force/unary_two_results_double.cpp index cf1d3e93..858b2c35 100644 --- a/test_conformance/math_brute_force/unary_two_results_double.cpp +++ b/test_conformance/math_brute_force/unary_two_results_double.cpp @@ -112,7 +112,6 @@ int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p, struct BuildKernelInfo2 { - cl_uint offset; // the first vector size to build cl_kernel *kernels; Programs &programs; const char *nameInCode; @@ -122,9 +121,9 @@ struct BuildKernelInfo2 cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) { BuildKernelInfo2 *info = (BuildKernelInfo2 *)p; - cl_uint i = info->offset + job_id; - return BuildKernel(info->nameInCode, i, info->kernels + i, - &(info->programs[i]), info->relaxedMode); + cl_uint vectorSize = gMinVectorSizeIndex + job_id; + return BuildKernel(info->nameInCode, vectorSize, info->kernels + vectorSize, + &(info->programs[vectorSize]), info->relaxedMode); } } // anonymous namespace @@ -149,8 +148,8 @@ int TestFunc_Double2_Double(const Func *f, MTdata d, bool relaxedMode) // Init the kernels { - BuildKernelInfo2 build_info = { gMinVectorSizeIndex, kernels, programs, - f->nameInCode, relaxedMode }; + BuildKernelInfo2 build_info{ kernels, programs, f->nameInCode, + relaxedMode }; if ((error = ThreadPool_Do(BuildKernelFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info))) diff --git a/test_conformance/math_brute_force/unary_two_results_float.cpp b/test_conformance/math_brute_force/unary_two_results_float.cpp index 051aca51..85e5d014 100644 --- a/test_conformance/math_brute_force/unary_two_results_float.cpp +++ b/test_conformance/math_brute_force/unary_two_results_float.cpp @@ -110,7 +110,6 @@ int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p, struct BuildKernelInfo2 { - cl_uint offset; // the first vector size to build cl_kernel *kernels; Programs &programs; const char *nameInCode; @@ -120,9 +119,9 @@ struct BuildKernelInfo2 cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) { BuildKernelInfo2 *info = (BuildKernelInfo2 *)p; - cl_uint i = info->offset + job_id; - return BuildKernel(info->nameInCode, i, info->kernels + i, - &(info->programs[i]), info->relaxedMode); + cl_uint vectorSize = gMinVectorSizeIndex + job_id; + return BuildKernel(info->nameInCode, vectorSize, info->kernels + vectorSize, + &(info->programs[vectorSize]), info->relaxedMode); } } // anonymous namespace @@ -148,8 +147,8 @@ int TestFunc_Float2_Float(const Func *f, MTdata d, bool relaxedMode) float float_ulps = getAllowedUlpError(f, relaxedMode); // Init the kernels { - BuildKernelInfo2 build_info = { gMinVectorSizeIndex, kernels, programs, - f->nameInCode, relaxedMode }; + BuildKernelInfo2 build_info{ kernels, programs, f->nameInCode, + relaxedMode }; if ((error = ThreadPool_Do(BuildKernelFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info))) diff --git a/test_conformance/math_brute_force/unary_two_results_i_double.cpp b/test_conformance/math_brute_force/unary_two_results_i_double.cpp index d45ad59d..4cfbca9c 100644 --- a/test_conformance/math_brute_force/unary_two_results_i_double.cpp +++ b/test_conformance/math_brute_force/unary_two_results_i_double.cpp @@ -113,7 +113,6 @@ int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p, struct BuildKernelInfo2 { - cl_uint offset; // the first vector size to build cl_kernel *kernels; Programs &programs; const char *nameInCode; @@ -123,9 +122,9 @@ struct BuildKernelInfo2 cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) { BuildKernelInfo2 *info = (BuildKernelInfo2 *)p; - cl_uint i = info->offset + job_id; - return BuildKernel(info->nameInCode, i, info->kernels + i, - &(info->programs[i]), info->relaxedMode); + cl_uint vectorSize = gMinVectorSizeIndex + job_id; + return BuildKernel(info->nameInCode, vectorSize, info->kernels + vectorSize, + &(info->programs[vectorSize]), info->relaxedMode); } cl_ulong abs_cl_long(cl_long i) @@ -157,8 +156,8 @@ int TestFunc_DoubleI_Double(const Func *f, MTdata d, bool relaxedMode) // Init the kernels { - BuildKernelInfo2 build_info = { gMinVectorSizeIndex, kernels, programs, - f->nameInCode, relaxedMode }; + BuildKernelInfo2 build_info{ kernels, programs, f->nameInCode, + relaxedMode }; if ((error = ThreadPool_Do(BuildKernelFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info))) diff --git a/test_conformance/math_brute_force/unary_two_results_i_float.cpp b/test_conformance/math_brute_force/unary_two_results_i_float.cpp index 9efe861a..e324ad09 100644 --- a/test_conformance/math_brute_force/unary_two_results_i_float.cpp +++ b/test_conformance/math_brute_force/unary_two_results_i_float.cpp @@ -111,7 +111,6 @@ int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p, struct BuildKernelInfo2 { - cl_uint offset; // the first vector size to build cl_kernel *kernels; Programs &programs; const char *nameInCode; @@ -121,9 +120,9 @@ struct BuildKernelInfo2 cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) { BuildKernelInfo2 *info = (BuildKernelInfo2 *)p; - cl_uint i = info->offset + job_id; - return BuildKernel(info->nameInCode, i, info->kernels + i, - &(info->programs[i]), info->relaxedMode); + cl_uint vectorSize = gMinVectorSizeIndex + job_id; + return BuildKernel(info->nameInCode, vectorSize, info->kernels + vectorSize, + &(info->programs[vectorSize]), info->relaxedMode); } cl_ulong abs_cl_long(cl_long i) @@ -160,8 +159,8 @@ int TestFunc_FloatI_Float(const Func *f, MTdata d, bool relaxedMode) // Init the kernels { - BuildKernelInfo2 build_info = { gMinVectorSizeIndex, kernels, programs, - f->nameInCode, relaxedMode }; + BuildKernelInfo2 build_info{ kernels, programs, f->nameInCode, + relaxedMode }; if ((error = ThreadPool_Do(BuildKernelFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info))) diff --git a/test_conformance/math_brute_force/unary_u_double.cpp b/test_conformance/math_brute_force/unary_u_double.cpp index e81ddada..a0c6b793 100644 --- a/test_conformance/math_brute_force/unary_u_double.cpp +++ b/test_conformance/math_brute_force/unary_u_double.cpp @@ -107,7 +107,6 @@ int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p, struct BuildKernelInfo2 { - cl_uint offset; // the first vector size to build cl_kernel *kernels; Programs &programs; const char *nameInCode; @@ -117,9 +116,9 @@ struct BuildKernelInfo2 cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) { BuildKernelInfo2 *info = (BuildKernelInfo2 *)p; - cl_uint i = info->offset + job_id; - return BuildKernel(info->nameInCode, i, info->kernels + i, - &(info->programs[i]), info->relaxedMode); + cl_uint vectorSize = gMinVectorSizeIndex + job_id; + return BuildKernel(info->nameInCode, vectorSize, info->kernels + vectorSize, + &(info->programs[vectorSize]), info->relaxedMode); } cl_ulong random64(MTdata d) @@ -145,8 +144,8 @@ int TestFunc_Double_ULong(const Func *f, MTdata d, bool relaxedMode) // Init the kernels { - BuildKernelInfo2 build_info = { gMinVectorSizeIndex, kernels, programs, - f->nameInCode, relaxedMode }; + BuildKernelInfo2 build_info{ kernels, programs, f->nameInCode, + relaxedMode }; if ((error = ThreadPool_Do(BuildKernelFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info))) diff --git a/test_conformance/math_brute_force/unary_u_float.cpp b/test_conformance/math_brute_force/unary_u_float.cpp index bfbf2cf8..ccfbc3be 100644 --- a/test_conformance/math_brute_force/unary_u_float.cpp +++ b/test_conformance/math_brute_force/unary_u_float.cpp @@ -104,7 +104,6 @@ int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p, struct BuildKernelInfo2 { - cl_uint offset; // the first vector size to build cl_kernel *kernels; Programs &programs; const char *nameInCode; @@ -114,9 +113,9 @@ struct BuildKernelInfo2 cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) { BuildKernelInfo2 *info = (BuildKernelInfo2 *)p; - cl_uint i = info->offset + job_id; - return BuildKernel(info->nameInCode, i, info->kernels + i, - &(info->programs[i]), info->relaxedMode); + cl_uint vectorSize = gMinVectorSizeIndex + job_id; + return BuildKernel(info->nameInCode, vectorSize, info->kernels + vectorSize, + &(info->programs[vectorSize]), info->relaxedMode); } } // anonymous namespace @@ -142,8 +141,8 @@ int TestFunc_Float_UInt(const Func *f, MTdata d, bool relaxedMode) // Init the kernels { - BuildKernelInfo2 build_info = { gMinVectorSizeIndex, kernels, programs, - f->nameInCode, relaxedMode }; + BuildKernelInfo2 build_info{ kernels, programs, f->nameInCode, + relaxedMode }; if ((error = ThreadPool_Do(BuildKernelFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info))) -- cgit v1.2.3 From 4ee8022230f2cde0cc59a327f85dc31ccb34f778 Mon Sep 17 00:00:00 2001 From: Stuart Brady Date: Thu, 4 Aug 2022 15:04:14 +0100 Subject: Remove unused variables in subgroup tests (#1460) Signed-off-by: Stuart Brady --- test_conformance/subgroups/subgroup_common_templates.h | 2 +- test_conformance/subgroups/subhelpers.h | 1 - test_conformance/subgroups/test_barrier.cpp | 1 - test_conformance/subgroups/test_subgroup.cpp | 17 ----------------- test_conformance/subgroups/test_subgroup_ballot.cpp | 15 +++------------ .../subgroups/test_subgroup_clustered_reduce.cpp | 1 - 6 files changed, 4 insertions(+), 33 deletions(-) diff --git a/test_conformance/subgroups/subgroup_common_templates.h b/test_conformance/subgroups/subgroup_common_templates.h index c1a8316c..b2648c30 100644 --- a/test_conformance/subgroups/subgroup_common_templates.h +++ b/test_conformance/subgroups/subgroup_common_templates.h @@ -481,7 +481,7 @@ template struct SHF static void gen(Ty *x, Ty *t, cl_int *m, const WorkGroupParams &test_params) { - int i, ii, j, k, n, delta; + int i, ii, j, k, n; cl_uint l; int nw = test_params.local_workgroup_size; int ns = test_params.subgroup_size; diff --git a/test_conformance/subgroups/subhelpers.h b/test_conformance/subgroups/subhelpers.h index cc03fc4c..0944ffb3 100644 --- a/test_conformance/subgroups/subhelpers.h +++ b/test_conformance/subgroups/subhelpers.h @@ -1496,7 +1496,6 @@ template struct test size_t tmp; cl_int error; int subgroup_size, num_subgroups; - size_t realSize; size_t global = test_params.global_workgroup_size; size_t local = test_params.local_workgroup_size; clProgramWrapper program; diff --git a/test_conformance/subgroups/test_barrier.cpp b/test_conformance/subgroups/test_barrier.cpp index d415eefb..fb93ddb1 100644 --- a/test_conformance/subgroups/test_barrier.cpp +++ b/test_conformance/subgroups/test_barrier.cpp @@ -79,7 +79,6 @@ template struct BAR int ng = test_params.global_workgroup_size; int nj = (nw + ns - 1) / ns; ng = ng / nw; - int e; ii = 0; for (k = 0; k < ng; ++k) diff --git a/test_conformance/subgroups/test_subgroup.cpp b/test_conformance/subgroups/test_subgroup.cpp index aa9b32cb..75e9d4ae 100644 --- a/test_conformance/subgroups/test_subgroup.cpp +++ b/test_conformance/subgroups/test_subgroup.cpp @@ -134,23 +134,6 @@ template struct AA } }; -static const char *any_source = "__kernel void test_any(const __global Type " - "*in, __global int4 *xy, __global Type *out)\n" - "{\n" - " int gid = get_global_id(0);\n" - " XY(xy,gid);\n" - " out[gid] = sub_group_any(in[gid]);\n" - "}\n"; - -static const char *all_source = "__kernel void test_all(const __global Type " - "*in, __global int4 *xy, __global Type *out)\n" - "{\n" - " int gid = get_global_id(0);\n" - " XY(xy,gid);\n" - " out[gid] = sub_group_all(in[gid]);\n" - "}\n"; - - template int run_broadcast_scan_reduction_for_type(RunTestForType rft) { diff --git a/test_conformance/subgroups/test_subgroup_ballot.cpp b/test_conformance/subgroups/test_subgroup_ballot.cpp index b35520e6..3882311d 100644 --- a/test_conformance/subgroups/test_subgroup_ballot.cpp +++ b/test_conformance/subgroups/test_subgroup_ballot.cpp @@ -190,14 +190,13 @@ template struct BALLOT_BIT_EXTRACT static void gen(Ty *x, Ty *t, cl_int *m, const WorkGroupParams &test_params) { - int wi_id, sb_id, wg_id, l; + int wi_id, sb_id, wg_id; int gws = test_params.global_workgroup_size; int lws = test_params.local_workgroup_size; int sbs = test_params.subgroup_size; int sb_number = (lws + sbs - 1) / sbs; int wg_number = gws / lws; int limit_sbs = sbs > 100 ? 100 : sbs; - int non_uniform_size = gws % lws; for (wg_id = 0; wg_id < wg_number; ++wg_id) { // for each work_group @@ -235,7 +234,7 @@ template struct BALLOT_BIT_EXTRACT static test_status chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m, const WorkGroupParams &test_params) { - int wi_id, wg_id, l, sb_id; + int wi_id, wg_id, sb_id; int gws = test_params.global_workgroup_size; int lws = test_params.local_workgroup_size; int sbs = test_params.subgroup_size; @@ -351,10 +350,6 @@ template struct BALLOT_INVERSE static void gen(Ty *x, Ty *t, cl_int *m, const WorkGroupParams &test_params) { - int gws = test_params.global_workgroup_size; - int lws = test_params.local_workgroup_size; - int sbs = test_params.subgroup_size; - int non_uniform_size = gws % lws; // no work here } @@ -398,9 +393,6 @@ template struct BALLOT_INVERSE { current_sbs = wg_offset + sbs > lws ? lws - wg_offset : sbs; } - // take index of array where info which work_item will - // be broadcast its value is stored - int midx = 4 * wg_offset + 2; // take subgroup local id of this work_item // Check result for (wi_id = 0; wi_id < current_sbs; ++wi_id) @@ -461,7 +453,6 @@ template struct BALLOT_COUNT_SCAN_FIND { wg_number++; } - int e; for (wg_id = 0; wg_id < wg_number; ++wg_id) { // for each work_group if (non_uniform_size && wg_id == wg_number - 1) @@ -683,7 +674,7 @@ template struct SMASK static void gen(Ty *x, Ty *t, cl_int *m, const WorkGroupParams &test_params) { - int wi_id, wg_id, l, sb_id; + int wi_id, wg_id, sb_id; int gws = test_params.global_workgroup_size; int lws = test_params.local_workgroup_size; int sbs = test_params.subgroup_size; diff --git a/test_conformance/subgroups/test_subgroup_clustered_reduce.cpp b/test_conformance/subgroups/test_subgroup_clustered_reduce.cpp index b016bf99..38652d51 100644 --- a/test_conformance/subgroups/test_subgroup_clustered_reduce.cpp +++ b/test_conformance/subgroups/test_subgroup_clustered_reduce.cpp @@ -102,7 +102,6 @@ template struct RED_CLU { int ii = j * ns; int n = ii + ns > nw ? nw - ii : ns; - int midx = 4 * ii + 2; std::vector clusters_results; int clusters_counter = ns / test_params.cluster_size; clusters_results.resize(clusters_counter); -- cgit v1.2.3 From 38639f229ddc3618eaed9591135538aff976fdca Mon Sep 17 00:00:00 2001 From: Sven van Haastregt Date: Thu, 4 Aug 2022 15:05:10 +0100 Subject: Fix test_select verification failure reporting (#1462) When verification of the computed result fails, the test would still report as "passed". This is because `s_test_fail` is only written to and never read. Fix the immediate issue by returning a failure value and incrementing `gFailCount` if any error was detected. The error handling can be improved further, but I'm leaving that out of the scope of this fix. Fixes https://github.com/KhronosGroup/OpenCL-CTS/issues/1445 Signed-off-by: Sven van Haastregt --- test_conformance/select/test_select.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/test_conformance/select/test_select.cpp b/test_conformance/select/test_select.cpp index 35f154ac..e659206e 100644 --- a/test_conformance/select/test_select.cpp +++ b/test_conformance/select/test_select.cpp @@ -79,7 +79,6 @@ static int s_wimpy_reduction_factor = 256; // sub tests which is for each individual test. The following // tracks the subtests int s_test_cnt = 0; -int s_test_fail = 0; //----------------------------------------- // Static helper functions @@ -297,6 +296,7 @@ static cl_program makeSelectProgram(cl_kernel *kernel_ptr, const cl_context cont static int doTest(cl_command_queue queue, cl_context context, Type stype, Type cmptype, cl_device_id device) { int err = CL_SUCCESS; + int s_test_fail = 0; MTdata d; const size_t element_count[VECTOR_SIZE_COUNT] = { 1, 2, 3, 4, 8, 16 }; cl_mem src1 = NULL; @@ -468,6 +468,11 @@ exit: clReleaseProgram(programs[vecsize]); } ++s_test_cnt; + if (s_test_fail) + { + err = TEST_FAIL; + gFailCount++; + } return err; } -- cgit v1.2.3 From d647529fec1a9f6d28f07a2a85cae345aacb2dd6 Mon Sep 17 00:00:00 2001 From: Sven van Haastregt Date: Tue, 16 Aug 2022 14:42:33 +0100 Subject: [NFC] Fix missing `double_double.lo` initializer (#1466) Fixes a missing-field-initializers warning. The original intent was most likely to initialize both fields (similar to other functions in this file), but a `,` was missed. Signed-off-by: Sven van Haastregt --- test_conformance/math_brute_force/reference_math.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test_conformance/math_brute_force/reference_math.cpp b/test_conformance/math_brute_force/reference_math.cpp index 16db3d67..a0a3d65d 100644 --- a/test_conformance/math_brute_force/reference_math.cpp +++ b/test_conformance/math_brute_force/reference_math.cpp @@ -2321,7 +2321,7 @@ static inline double_double accum_d(double_double a, double b) static inline double_double add_dd(double_double a, double_double b) { - double_double r = { -0.0 - 0.0 }; + double_double r = { -0.0, -0.0 }; if (isinf(a.hi) || isinf(b.hi) || isnan(a.hi) || isnan(b.hi) || 0.0 == a.hi || 0.0 == b.hi) -- cgit v1.2.3 From e52a97e4e9800ccf34678d915281b22524461ea8 Mon Sep 17 00:00:00 2001 From: Sven van Haastregt Date: Tue, 23 Aug 2022 17:57:05 +0100 Subject: [NFC] Use Unix-style line endings (#1468) Use the same line ending style across all source files. Signed-off-by: Sven van Haastregt --- ...est_cl_khr_spirv_no_integer_wrap_decoration.cpp | 438 ++++++++++----------- 1 file changed, 219 insertions(+), 219 deletions(-) diff --git a/test_conformance/spirv_new/test_cl_khr_spirv_no_integer_wrap_decoration.cpp b/test_conformance/spirv_new/test_cl_khr_spirv_no_integer_wrap_decoration.cpp index 9e1789c2..6a4982eb 100644 --- a/test_conformance/spirv_new/test_cl_khr_spirv_no_integer_wrap_decoration.cpp +++ b/test_conformance/spirv_new/test_cl_khr_spirv_no_integer_wrap_decoration.cpp @@ -1,219 +1,219 @@ -/****************************************************************** -Copyright (c) 2018 The Khronos Group Inc. All Rights Reserved. - -This code is protected by copyright laws and contains material proprietary to the Khronos Group, Inc. -This is UNPUBLISHED PROPRIETARY SOURCE CODE that may not be disclosed in whole or in part to -third parties, and may not be reproduced, republished, distributed, transmitted, displayed, -broadcast or otherwise exploited in any manner without the express prior written permission -of Khronos Group. The receipt or possession of this code does not convey any rights to reproduce, -disclose, or distribute its contents, or to manufacture, use, or sell anything that it may describe, -in whole or in part other than under the terms of the Khronos Adopters Agreement -or Khronos Conformance Test Source License Agreement as executed between Khronos and the recipient. -******************************************************************/ - -#include "testBase.h" -#include "types.hpp" - -#include -#include -#include - - -template -int test_ext_cl_khr_spirv_no_integer_wrap_decoration(cl_device_id deviceID, - cl_context context, - cl_command_queue queue, - const char *spvName, - const char *funcName, - const char *Tname) -{ - - cl_int err = CL_SUCCESS; - const int num = 10; - std::vector h_lhs(num); - std::vector h_rhs(num); - std::vector expected_results(num); - std::vector h_ref(num); - if (!is_extension_available(deviceID, "cl_khr_spirv_no_integer_wrap_decoration")) { - log_info("Extension cl_khr_spirv_no_integer_wrap_decoration not supported; skipping tests.\n"); - return 0; - } - - /*Test with some values that do not cause overflow*/ - if (std::is_signed::value == true) { - h_lhs.push_back((T)-25000); - h_lhs.push_back((T)-3333); - h_lhs.push_back((T)-7); - h_lhs.push_back((T)-1); - h_lhs.push_back(0); - h_lhs.push_back(1); - h_lhs.push_back(1024); - h_lhs.push_back(2048); - h_lhs.push_back(4094); - h_lhs.push_back(10000); - } else { - h_lhs.push_back(0); - h_lhs.push_back(1); - h_lhs.push_back(3); - h_lhs.push_back(5); - h_lhs.push_back(10); - h_lhs.push_back(100); - h_lhs.push_back(1024); - h_lhs.push_back(2048); - h_lhs.push_back(4094); - h_lhs.push_back(52888); - } - - h_rhs.push_back(0); - h_rhs.push_back(1); - h_rhs.push_back(2); - h_rhs.push_back(3); - h_rhs.push_back(4); - h_rhs.push_back(5); - h_rhs.push_back(6); - h_rhs.push_back(7); - h_rhs.push_back(8); - h_rhs.push_back(9); - size_t bytes = num * sizeof(T); - - clMemWrapper lhs = clCreateBuffer(context, CL_MEM_READ_ONLY, bytes, NULL, &err); - SPIRV_CHECK_ERROR(err, "Failed to create lhs buffer"); - - err = clEnqueueWriteBuffer(queue, lhs, CL_TRUE, 0, bytes, &h_lhs[0], 0, NULL, NULL); - SPIRV_CHECK_ERROR(err, "Failed to copy to lhs buffer"); - - clMemWrapper rhs = clCreateBuffer(context, CL_MEM_READ_ONLY, bytes, NULL, &err); - SPIRV_CHECK_ERROR(err, "Failed to create rhs buffer"); - - err = clEnqueueWriteBuffer(queue, rhs, CL_TRUE, 0, bytes, &h_rhs[0], 0, NULL, NULL); - SPIRV_CHECK_ERROR(err, "Failed to copy to rhs buffer"); - - std::string kernelStr; - - { - std::stringstream kernelStream; - kernelStream << "#define spirv_fadd(a, b) (a) + (b) \n"; - kernelStream << "#define spirv_fsub(a, b) (a) - (b) \n"; - kernelStream << "#define spirv_fmul(a, b) (a) * (b) \n"; - kernelStream << "#define spirv_fshiftleft(a, b) (a) << (b) \n"; - kernelStream << "#define spirv_fnegate(a, b) (-a) \n"; - - kernelStream << "#define T " << Tname << "\n"; - kernelStream << "#define FUNC spirv_" << funcName << "\n"; - kernelStream << "__kernel void fmath_cl(__global T *out, \n"; - kernelStream << "const __global T *lhs, const __global T *rhs) \n"; - kernelStream << "{ \n"; - kernelStream << " int id = get_global_id(0); \n"; - kernelStream << " out[id] = FUNC(lhs[id], rhs[id]); \n"; - kernelStream << "} \n"; - kernelStr = kernelStream.str(); - } - - size_t kernelLen = kernelStr.size(); - const char *kernelBuf = kernelStr.c_str(); - - for (int i = 0; i < num; i++) { - if (std::string(funcName) == std::string("fadd")) { - expected_results[i] = h_lhs[i] + h_rhs[i]; - } else if (std::string(funcName) == std::string("fsub")) { - expected_results[i] = h_lhs[i] - h_rhs[i]; - } else if (std::string(funcName) == std::string("fmul")) { - expected_results[i] = h_lhs[i] * h_rhs[i]; - } else if (std::string(funcName) == std::string("fshiftleft")) { - expected_results[i] = h_lhs[i] << h_rhs[i]; - } else if (std::string(funcName) == std::string("fnegate")) { - expected_results[i] = 0 - h_lhs[i]; - } - } - - { - // Run the cl kernel for reference results - clProgramWrapper prog; - clKernelWrapper kernel; - err = create_single_kernel_helper(context, &prog, &kernel, 1, - &kernelBuf, "fmath_cl"); - SPIRV_CHECK_ERROR(err, "Failed to create cl kernel"); - - clMemWrapper ref = clCreateBuffer(context, CL_MEM_READ_WRITE, bytes, NULL, &err); - SPIRV_CHECK_ERROR(err, "Failed to create ref buffer"); - - err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &ref); - SPIRV_CHECK_ERROR(err, "Failed to set arg 0"); - - err = clSetKernelArg(kernel, 1, sizeof(cl_mem), &lhs); - SPIRV_CHECK_ERROR(err, "Failed to set arg 1"); - - err = clSetKernelArg(kernel, 2, sizeof(cl_mem), &rhs); - SPIRV_CHECK_ERROR(err, "Failed to set arg 2"); - - size_t global = num; - err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, NULL, 0, NULL, NULL); - SPIRV_CHECK_ERROR(err, "Failed to enqueue cl kernel"); - - err = clEnqueueReadBuffer(queue, ref, CL_TRUE, 0, bytes, &h_ref[0], 0, NULL, NULL); - SPIRV_CHECK_ERROR(err, "Failed to read from ref"); - } - - for (int i = 0; i < num; i++) { - if (expected_results[i] != h_ref[i]) { - log_error("Values do not match at index %d expected = %d got = %d\n", i, expected_results[i], h_ref[i]); - return -1; - } - } - - clProgramWrapper prog; - err = get_program_with_il(prog, deviceID, context, spvName); - SPIRV_CHECK_ERROR(err, "Failed to build program"); - - clKernelWrapper kernel = clCreateKernel(prog, "fmath_cl", &err); - SPIRV_CHECK_ERROR(err, "Failed to create spv kernel"); - - clMemWrapper res = clCreateBuffer(context, CL_MEM_READ_WRITE, bytes, NULL, &err); - SPIRV_CHECK_ERROR(err, "Failed to create res buffer"); - - err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &res); - SPIRV_CHECK_ERROR(err, "Failed to set arg 0"); - - err = clSetKernelArg(kernel, 1, sizeof(cl_mem), &lhs); - SPIRV_CHECK_ERROR(err, "Failed to set arg 1"); - - err = clSetKernelArg(kernel, 2, sizeof(cl_mem), &rhs); - SPIRV_CHECK_ERROR(err, "Failed to set arg 2"); - - size_t global = num; - err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, NULL, 0, NULL, NULL); - SPIRV_CHECK_ERROR(err, "Failed to enqueue cl kernel"); - - std::vector h_res(num); - err = clEnqueueReadBuffer(queue, res, CL_TRUE, 0, bytes, &h_res[0], 0, NULL, NULL); - SPIRV_CHECK_ERROR(err, "Failed to read from ref"); - - for (int i = 0; i < num; i++) { - if (expected_results[i] != h_res[i]) { - log_error("Values do not match at location %d expected = %d got = %d\n", i, expected_results[i], h_res[i]); - return -1; - } - } - - return 0; -} - -#define TEST_FMATH_FUNC(TYPE, FUNC) \ - TEST_SPIRV_FUNC(ext_cl_khr_spirv_no_integer_wrap_decoration_##FUNC##_##TYPE) \ - { \ - return test_ext_cl_khr_spirv_no_integer_wrap_decoration(deviceID, context, queue, \ - "ext_cl_khr_spirv_no_integer_wrap_decoration_"#FUNC"_"#TYPE, \ - #FUNC, \ - #TYPE \ - ); \ - } - -TEST_FMATH_FUNC(int, fadd) -TEST_FMATH_FUNC(int, fsub) -TEST_FMATH_FUNC(int, fmul) -TEST_FMATH_FUNC(int, fshiftleft) -TEST_FMATH_FUNC(int, fnegate) -TEST_FMATH_FUNC(uint, fadd) -TEST_FMATH_FUNC(uint, fsub) -TEST_FMATH_FUNC(uint, fmul) -TEST_FMATH_FUNC(uint, fshiftleft) \ No newline at end of file +/****************************************************************** +Copyright (c) 2018 The Khronos Group Inc. All Rights Reserved. + +This code is protected by copyright laws and contains material proprietary to the Khronos Group, Inc. +This is UNPUBLISHED PROPRIETARY SOURCE CODE that may not be disclosed in whole or in part to +third parties, and may not be reproduced, republished, distributed, transmitted, displayed, +broadcast or otherwise exploited in any manner without the express prior written permission +of Khronos Group. The receipt or possession of this code does not convey any rights to reproduce, +disclose, or distribute its contents, or to manufacture, use, or sell anything that it may describe, +in whole or in part other than under the terms of the Khronos Adopters Agreement +or Khronos Conformance Test Source License Agreement as executed between Khronos and the recipient. +******************************************************************/ + +#include "testBase.h" +#include "types.hpp" + +#include +#include +#include + + +template +int test_ext_cl_khr_spirv_no_integer_wrap_decoration(cl_device_id deviceID, + cl_context context, + cl_command_queue queue, + const char *spvName, + const char *funcName, + const char *Tname) +{ + + cl_int err = CL_SUCCESS; + const int num = 10; + std::vector h_lhs(num); + std::vector h_rhs(num); + std::vector expected_results(num); + std::vector h_ref(num); + if (!is_extension_available(deviceID, "cl_khr_spirv_no_integer_wrap_decoration")) { + log_info("Extension cl_khr_spirv_no_integer_wrap_decoration not supported; skipping tests.\n"); + return 0; + } + + /*Test with some values that do not cause overflow*/ + if (std::is_signed::value == true) { + h_lhs.push_back((T)-25000); + h_lhs.push_back((T)-3333); + h_lhs.push_back((T)-7); + h_lhs.push_back((T)-1); + h_lhs.push_back(0); + h_lhs.push_back(1); + h_lhs.push_back(1024); + h_lhs.push_back(2048); + h_lhs.push_back(4094); + h_lhs.push_back(10000); + } else { + h_lhs.push_back(0); + h_lhs.push_back(1); + h_lhs.push_back(3); + h_lhs.push_back(5); + h_lhs.push_back(10); + h_lhs.push_back(100); + h_lhs.push_back(1024); + h_lhs.push_back(2048); + h_lhs.push_back(4094); + h_lhs.push_back(52888); + } + + h_rhs.push_back(0); + h_rhs.push_back(1); + h_rhs.push_back(2); + h_rhs.push_back(3); + h_rhs.push_back(4); + h_rhs.push_back(5); + h_rhs.push_back(6); + h_rhs.push_back(7); + h_rhs.push_back(8); + h_rhs.push_back(9); + size_t bytes = num * sizeof(T); + + clMemWrapper lhs = clCreateBuffer(context, CL_MEM_READ_ONLY, bytes, NULL, &err); + SPIRV_CHECK_ERROR(err, "Failed to create lhs buffer"); + + err = clEnqueueWriteBuffer(queue, lhs, CL_TRUE, 0, bytes, &h_lhs[0], 0, NULL, NULL); + SPIRV_CHECK_ERROR(err, "Failed to copy to lhs buffer"); + + clMemWrapper rhs = clCreateBuffer(context, CL_MEM_READ_ONLY, bytes, NULL, &err); + SPIRV_CHECK_ERROR(err, "Failed to create rhs buffer"); + + err = clEnqueueWriteBuffer(queue, rhs, CL_TRUE, 0, bytes, &h_rhs[0], 0, NULL, NULL); + SPIRV_CHECK_ERROR(err, "Failed to copy to rhs buffer"); + + std::string kernelStr; + + { + std::stringstream kernelStream; + kernelStream << "#define spirv_fadd(a, b) (a) + (b) \n"; + kernelStream << "#define spirv_fsub(a, b) (a) - (b) \n"; + kernelStream << "#define spirv_fmul(a, b) (a) * (b) \n"; + kernelStream << "#define spirv_fshiftleft(a, b) (a) << (b) \n"; + kernelStream << "#define spirv_fnegate(a, b) (-a) \n"; + + kernelStream << "#define T " << Tname << "\n"; + kernelStream << "#define FUNC spirv_" << funcName << "\n"; + kernelStream << "__kernel void fmath_cl(__global T *out, \n"; + kernelStream << "const __global T *lhs, const __global T *rhs) \n"; + kernelStream << "{ \n"; + kernelStream << " int id = get_global_id(0); \n"; + kernelStream << " out[id] = FUNC(lhs[id], rhs[id]); \n"; + kernelStream << "} \n"; + kernelStr = kernelStream.str(); + } + + size_t kernelLen = kernelStr.size(); + const char *kernelBuf = kernelStr.c_str(); + + for (int i = 0; i < num; i++) { + if (std::string(funcName) == std::string("fadd")) { + expected_results[i] = h_lhs[i] + h_rhs[i]; + } else if (std::string(funcName) == std::string("fsub")) { + expected_results[i] = h_lhs[i] - h_rhs[i]; + } else if (std::string(funcName) == std::string("fmul")) { + expected_results[i] = h_lhs[i] * h_rhs[i]; + } else if (std::string(funcName) == std::string("fshiftleft")) { + expected_results[i] = h_lhs[i] << h_rhs[i]; + } else if (std::string(funcName) == std::string("fnegate")) { + expected_results[i] = 0 - h_lhs[i]; + } + } + + { + // Run the cl kernel for reference results + clProgramWrapper prog; + clKernelWrapper kernel; + err = create_single_kernel_helper(context, &prog, &kernel, 1, + &kernelBuf, "fmath_cl"); + SPIRV_CHECK_ERROR(err, "Failed to create cl kernel"); + + clMemWrapper ref = clCreateBuffer(context, CL_MEM_READ_WRITE, bytes, NULL, &err); + SPIRV_CHECK_ERROR(err, "Failed to create ref buffer"); + + err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &ref); + SPIRV_CHECK_ERROR(err, "Failed to set arg 0"); + + err = clSetKernelArg(kernel, 1, sizeof(cl_mem), &lhs); + SPIRV_CHECK_ERROR(err, "Failed to set arg 1"); + + err = clSetKernelArg(kernel, 2, sizeof(cl_mem), &rhs); + SPIRV_CHECK_ERROR(err, "Failed to set arg 2"); + + size_t global = num; + err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, NULL, 0, NULL, NULL); + SPIRV_CHECK_ERROR(err, "Failed to enqueue cl kernel"); + + err = clEnqueueReadBuffer(queue, ref, CL_TRUE, 0, bytes, &h_ref[0], 0, NULL, NULL); + SPIRV_CHECK_ERROR(err, "Failed to read from ref"); + } + + for (int i = 0; i < num; i++) { + if (expected_results[i] != h_ref[i]) { + log_error("Values do not match at index %d expected = %d got = %d\n", i, expected_results[i], h_ref[i]); + return -1; + } + } + + clProgramWrapper prog; + err = get_program_with_il(prog, deviceID, context, spvName); + SPIRV_CHECK_ERROR(err, "Failed to build program"); + + clKernelWrapper kernel = clCreateKernel(prog, "fmath_cl", &err); + SPIRV_CHECK_ERROR(err, "Failed to create spv kernel"); + + clMemWrapper res = clCreateBuffer(context, CL_MEM_READ_WRITE, bytes, NULL, &err); + SPIRV_CHECK_ERROR(err, "Failed to create res buffer"); + + err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &res); + SPIRV_CHECK_ERROR(err, "Failed to set arg 0"); + + err = clSetKernelArg(kernel, 1, sizeof(cl_mem), &lhs); + SPIRV_CHECK_ERROR(err, "Failed to set arg 1"); + + err = clSetKernelArg(kernel, 2, sizeof(cl_mem), &rhs); + SPIRV_CHECK_ERROR(err, "Failed to set arg 2"); + + size_t global = num; + err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, NULL, 0, NULL, NULL); + SPIRV_CHECK_ERROR(err, "Failed to enqueue cl kernel"); + + std::vector h_res(num); + err = clEnqueueReadBuffer(queue, res, CL_TRUE, 0, bytes, &h_res[0], 0, NULL, NULL); + SPIRV_CHECK_ERROR(err, "Failed to read from ref"); + + for (int i = 0; i < num; i++) { + if (expected_results[i] != h_res[i]) { + log_error("Values do not match at location %d expected = %d got = %d\n", i, expected_results[i], h_res[i]); + return -1; + } + } + + return 0; +} + +#define TEST_FMATH_FUNC(TYPE, FUNC) \ + TEST_SPIRV_FUNC(ext_cl_khr_spirv_no_integer_wrap_decoration_##FUNC##_##TYPE) \ + { \ + return test_ext_cl_khr_spirv_no_integer_wrap_decoration(deviceID, context, queue, \ + "ext_cl_khr_spirv_no_integer_wrap_decoration_"#FUNC"_"#TYPE, \ + #FUNC, \ + #TYPE \ + ); \ + } + +TEST_FMATH_FUNC(int, fadd) +TEST_FMATH_FUNC(int, fsub) +TEST_FMATH_FUNC(int, fmul) +TEST_FMATH_FUNC(int, fshiftleft) +TEST_FMATH_FUNC(int, fnegate) +TEST_FMATH_FUNC(uint, fadd) +TEST_FMATH_FUNC(uint, fsub) +TEST_FMATH_FUNC(uint, fmul) +TEST_FMATH_FUNC(uint, fshiftleft) -- cgit v1.2.3 From 9666ca3c70192002c89130913205458db0a3d334 Mon Sep 17 00:00:00 2001 From: Sven van Haastregt Date: Tue, 23 Aug 2022 18:02:33 +0100 Subject: [NFC] Fix sign-compare warnings in math_brute_force (#1467) Signed-off-by: Sven van Haastregt --- test_conformance/math_brute_force/main.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/test_conformance/math_brute_force/main.cpp b/test_conformance/math_brute_force/main.cpp index d1d146a1..45b6e97d 100644 --- a/test_conformance/math_brute_force/main.cpp +++ b/test_conformance/math_brute_force/main.cpp @@ -129,8 +129,9 @@ static int doTest(const char *name) const Func *const temp_func = functionList + i; if (strcmp(temp_func->name, name) == 0) { - if ((gStartTestNumber != -1 && i < gStartTestNumber) - || i > gEndTestNumber) + if ((gStartTestNumber != -1 + && static_cast(i) < gStartTestNumber) + || static_cast(i) > gEndTestNumber) { vlog("Skipping function #%d\n", i); return 0; @@ -524,7 +525,7 @@ static int ParseArgs(int argc, const char **argv) static void PrintFunctions(void) { vlog("\nMath function names:\n"); - for (int i = 0; i < functionListCount; i++) + for (size_t i = 0; i < functionListCount; i++) { vlog("\t%s\n", functionList[i].name); } -- cgit v1.2.3 From c82dabd4bbe7c61f5251488e471f9938ed20630d Mon Sep 17 00:00:00 2001 From: Sven van Haastregt Date: Wed, 24 Aug 2022 10:31:32 +0100 Subject: Use clCommandQueueWrapper in math_brute_force (#1463) Simplify code by avoiding manual resource management. This commit only modifies tests that use one queue per thread. The other unmodified tests are single-threaded and use the global `gQueue`. Original patch by Marco Antognini. Signed-off-by: Marco Antognini Signed-off-by: Sven van Haastregt --- test_conformance/math_brute_force/binary_double.cpp | 5 +++-- test_conformance/math_brute_force/binary_float.cpp | 5 +++-- test_conformance/math_brute_force/binary_i_double.cpp | 5 +++-- test_conformance/math_brute_force/binary_i_float.cpp | 5 +++-- test_conformance/math_brute_force/binary_operator_double.cpp | 5 +++-- test_conformance/math_brute_force/binary_operator_float.cpp | 5 +++-- test_conformance/math_brute_force/macro_binary_double.cpp | 5 +++-- test_conformance/math_brute_force/macro_binary_float.cpp | 5 +++-- test_conformance/math_brute_force/macro_unary_double.cpp | 5 +++-- test_conformance/math_brute_force/macro_unary_float.cpp | 5 +++-- test_conformance/math_brute_force/unary_double.cpp | 5 +++-- test_conformance/math_brute_force/unary_float.cpp | 5 +++-- 12 files changed, 36 insertions(+), 24 deletions(-) diff --git a/test_conformance/math_brute_force/binary_double.cpp b/test_conformance/math_brute_force/binary_double.cpp index 034b325a..490c17b6 100644 --- a/test_conformance/math_brute_force/binary_double.cpp +++ b/test_conformance/math_brute_force/binary_double.cpp @@ -133,7 +133,9 @@ struct ThreadInfo double maxErrorValue2; // position of the max error value (param 2). Init // to 0. MTdata d; - cl_command_queue tQueue; // per thread command queue to improve performance + + // Per thread command queue to improve performance + clCommandQueueWrapper tQueue; }; struct TestInfo @@ -795,7 +797,6 @@ exit: clReleaseMemObject(threadInfo.inBuf2); for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) clReleaseMemObject(threadInfo.outBuf[j]); - clReleaseCommandQueue(threadInfo.tQueue); } return error; diff --git a/test_conformance/math_brute_force/binary_float.cpp b/test_conformance/math_brute_force/binary_float.cpp index 7abaa0e4..01082bc1 100644 --- a/test_conformance/math_brute_force/binary_float.cpp +++ b/test_conformance/math_brute_force/binary_float.cpp @@ -131,7 +131,9 @@ struct ThreadInfo double maxErrorValue2; // position of the max error value (param 2). Init // to 0. MTdata d; - cl_command_queue tQueue; // per thread command queue to improve performance + + // Per thread command queue to improve performance + clCommandQueueWrapper tQueue; }; struct TestInfo @@ -952,7 +954,6 @@ exit: clReleaseMemObject(threadInfo.inBuf2); for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) clReleaseMemObject(threadInfo.outBuf[j]); - clReleaseCommandQueue(threadInfo.tQueue); } return error; diff --git a/test_conformance/math_brute_force/binary_i_double.cpp b/test_conformance/math_brute_force/binary_i_double.cpp index bba93617..def0bd41 100644 --- a/test_conformance/math_brute_force/binary_i_double.cpp +++ b/test_conformance/math_brute_force/binary_i_double.cpp @@ -132,7 +132,9 @@ struct ThreadInfo cl_int maxErrorValue2; // position of the max error value (param 2). Init // to 0. MTdata d; - cl_command_queue tQueue; // per thread command queue to improve performance + + // Per thread command queue to improve performance + clCommandQueueWrapper tQueue; }; struct TestInfo @@ -717,7 +719,6 @@ exit: clReleaseMemObject(threadInfo.inBuf2); for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) clReleaseMemObject(threadInfo.outBuf[j]); - clReleaseCommandQueue(threadInfo.tQueue); } return error; diff --git a/test_conformance/math_brute_force/binary_i_float.cpp b/test_conformance/math_brute_force/binary_i_float.cpp index 4821830c..ed207098 100644 --- a/test_conformance/math_brute_force/binary_i_float.cpp +++ b/test_conformance/math_brute_force/binary_i_float.cpp @@ -130,7 +130,9 @@ struct ThreadInfo cl_int maxErrorValue2; // position of the max error value (param 2). Init // to 0. MTdata d; - cl_command_queue tQueue; // per thread command queue to improve performance + + // Per thread command queue to improve performance + clCommandQueueWrapper tQueue; }; struct TestInfo @@ -710,7 +712,6 @@ exit: clReleaseMemObject(threadInfo.inBuf2); for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) clReleaseMemObject(threadInfo.outBuf[j]); - clReleaseCommandQueue(threadInfo.tQueue); } return error; diff --git a/test_conformance/math_brute_force/binary_operator_double.cpp b/test_conformance/math_brute_force/binary_operator_double.cpp index 09c560e9..992df276 100644 --- a/test_conformance/math_brute_force/binary_operator_double.cpp +++ b/test_conformance/math_brute_force/binary_operator_double.cpp @@ -132,7 +132,9 @@ struct ThreadInfo double maxErrorValue2; // position of the max error value (param 2). Init // to 0. MTdata d; - cl_command_queue tQueue; // per thread command queue to improve performance + + // Per thread command queue to improve performance + clCommandQueueWrapper tQueue; }; struct TestInfo @@ -762,7 +764,6 @@ exit: clReleaseMemObject(threadInfo.inBuf2); for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) clReleaseMemObject(threadInfo.outBuf[j]); - clReleaseCommandQueue(threadInfo.tQueue); } return error; diff --git a/test_conformance/math_brute_force/binary_operator_float.cpp b/test_conformance/math_brute_force/binary_operator_float.cpp index f2e57bc1..a555beaa 100644 --- a/test_conformance/math_brute_force/binary_operator_float.cpp +++ b/test_conformance/math_brute_force/binary_operator_float.cpp @@ -130,7 +130,9 @@ struct ThreadInfo double maxErrorValue2; // position of the max error value (param 2). Init // to 0. MTdata d; - cl_command_queue tQueue; // per thread command queue to improve performance + + // Per thread command queue to improve performance + clCommandQueueWrapper tQueue; }; struct TestInfo @@ -889,7 +891,6 @@ exit: clReleaseMemObject(threadInfo.inBuf2); for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) clReleaseMemObject(threadInfo.outBuf[j]); - clReleaseCommandQueue(threadInfo.tQueue); } return error; diff --git a/test_conformance/math_brute_force/macro_binary_double.cpp b/test_conformance/math_brute_force/macro_binary_double.cpp index 88b0f86c..fb28d823 100644 --- a/test_conformance/math_brute_force/macro_binary_double.cpp +++ b/test_conformance/math_brute_force/macro_binary_double.cpp @@ -126,7 +126,9 @@ struct ThreadInfo cl_mem inBuf2; // input buffer for the thread cl_mem outBuf[VECTOR_SIZE_COUNT]; // output buffers for the thread MTdata d; - cl_command_queue tQueue; // per thread command queue to improve performance + + // Per thread command queue to improve performance + clCommandQueueWrapper tQueue; }; struct TestInfo @@ -707,7 +709,6 @@ exit: clReleaseMemObject(threadInfo.inBuf2); for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) clReleaseMemObject(threadInfo.outBuf[j]); - clReleaseCommandQueue(threadInfo.tQueue); } return error; diff --git a/test_conformance/math_brute_force/macro_binary_float.cpp b/test_conformance/math_brute_force/macro_binary_float.cpp index 6199dd1a..fd93e2e6 100644 --- a/test_conformance/math_brute_force/macro_binary_float.cpp +++ b/test_conformance/math_brute_force/macro_binary_float.cpp @@ -124,7 +124,9 @@ struct ThreadInfo cl_mem inBuf2; // input buffer for the thread cl_mem outBuf[VECTOR_SIZE_COUNT]; // output buffers for the thread MTdata d; - cl_command_queue tQueue; // per thread command queue to improve performance + + // Per thread command queue to improve performance + clCommandQueueWrapper tQueue; }; struct TestInfo @@ -696,7 +698,6 @@ exit: clReleaseMemObject(threadInfo.inBuf2); for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) clReleaseMemObject(threadInfo.outBuf[j]); - clReleaseCommandQueue(threadInfo.tQueue); } return error; diff --git a/test_conformance/math_brute_force/macro_unary_double.cpp b/test_conformance/math_brute_force/macro_unary_double.cpp index b7fb8a96..2365a195 100644 --- a/test_conformance/math_brute_force/macro_unary_double.cpp +++ b/test_conformance/math_brute_force/macro_unary_double.cpp @@ -118,7 +118,9 @@ struct ThreadInfo { cl_mem inBuf; // input buffer for the thread cl_mem outBuf[VECTOR_SIZE_COUNT]; // output buffers for the thread - cl_command_queue tQueue; // per thread command queue to improve performance + + // Per thread command queue to improve performance + clCommandQueueWrapper tQueue; }; struct TestInfo @@ -478,7 +480,6 @@ exit: clReleaseMemObject(threadInfo.inBuf); for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) clReleaseMemObject(threadInfo.outBuf[j]); - clReleaseCommandQueue(threadInfo.tQueue); } return error; diff --git a/test_conformance/math_brute_force/macro_unary_float.cpp b/test_conformance/math_brute_force/macro_unary_float.cpp index e4c22369..adc6c3ec 100644 --- a/test_conformance/math_brute_force/macro_unary_float.cpp +++ b/test_conformance/math_brute_force/macro_unary_float.cpp @@ -117,7 +117,9 @@ struct ThreadInfo { cl_mem inBuf; // input buffer for the thread cl_mem outBuf[VECTOR_SIZE_COUNT]; // output buffers for the thread - cl_command_queue tQueue; // per thread command queue to improve performance + + // Per thread command queue to improve performance + clCommandQueueWrapper tQueue; }; struct TestInfo @@ -492,7 +494,6 @@ exit: clReleaseMemObject(threadInfo.inBuf); for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) clReleaseMemObject(threadInfo.outBuf[j]); - clReleaseCommandQueue(threadInfo.tQueue); } return error; diff --git a/test_conformance/math_brute_force/unary_double.cpp b/test_conformance/math_brute_force/unary_double.cpp index 76bcfd46..19402283 100644 --- a/test_conformance/math_brute_force/unary_double.cpp +++ b/test_conformance/math_brute_force/unary_double.cpp @@ -120,7 +120,9 @@ struct ThreadInfo cl_mem outBuf[VECTOR_SIZE_COUNT]; // output buffers for the thread float maxError; // max error value. Init to 0. double maxErrorValue; // position of the max error value. Init to 0. - cl_command_queue tQueue; // per thread command queue to improve performance + + // Per thread command queue to improve performance + clCommandQueueWrapper tQueue; }; struct TestInfo @@ -516,7 +518,6 @@ exit: clReleaseMemObject(threadInfo.inBuf); for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) clReleaseMemObject(threadInfo.outBuf[j]); - clReleaseCommandQueue(threadInfo.tQueue); } return error; diff --git a/test_conformance/math_brute_force/unary_float.cpp b/test_conformance/math_brute_force/unary_float.cpp index d310054d..5a9a7361 100644 --- a/test_conformance/math_brute_force/unary_float.cpp +++ b/test_conformance/math_brute_force/unary_float.cpp @@ -118,7 +118,9 @@ struct ThreadInfo cl_mem outBuf[VECTOR_SIZE_COUNT]; // output buffers for the thread float maxError; // max error value. Init to 0. double maxErrorValue; // position of the max error value. Init to 0. - cl_command_queue tQueue; // per thread command queue to improve performance + + // Per thread command queue to improve performance + clCommandQueueWrapper tQueue; }; struct TestInfo @@ -693,7 +695,6 @@ exit: clReleaseMemObject(threadInfo.inBuf); for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) clReleaseMemObject(threadInfo.outBuf[j]); - clReleaseCommandQueue(threadInfo.tQueue); } return error; -- cgit v1.2.3 From afe4ef8b8f63f13c0cb3a6d7eaff5dc761c3d2b1 Mon Sep 17 00:00:00 2001 From: Sven van Haastregt Date: Wed, 24 Aug 2022 12:05:01 +0100 Subject: Fix test skipping in math_brute_force (#1475) Commit 9666ca3c ("[NFC] Fix sign-compare warnings in math_brute_force (#1467)", 2022-08-23) inadvertently changed the semantics of the if condition. The `i > gEndTestNumber` comparison was relying on `gEndTestNumber` being promoted to unsigned. When casting `i` to `int32_t`, this promotion no longer happens and as a result any tests given on the command line were being skipped. Use an unsigned type for `gStartTestNumber` and `gEndTestNumber` to eliminate the casts and any implicit conversions between signed and unsigned types. Signed-off-by: Sven van Haastregt --- test_conformance/math_brute_force/main.cpp | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/test_conformance/math_brute_force/main.cpp b/test_conformance/math_brute_force/main.cpp index 45b6e97d..8cebff9d 100644 --- a/test_conformance/math_brute_force/main.cpp +++ b/test_conformance/math_brute_force/main.cpp @@ -58,8 +58,8 @@ static char appName[MAXPATHLEN] = ""; cl_device_id gDevice = NULL; cl_context gContext = NULL; cl_command_queue gQueue = NULL; -static int32_t gStartTestNumber = -1; -static int32_t gEndTestNumber = -1; +static size_t gStartTestNumber = ~0u; +static size_t gEndTestNumber = ~0u; int gSkipCorrectnessTesting = 0; static int gStopOnError = 0; static bool gSkipRestOfTests; @@ -129,9 +129,8 @@ static int doTest(const char *name) const Func *const temp_func = functionList + i; if (strcmp(temp_func->name, name) == 0) { - if ((gStartTestNumber != -1 - && static_cast(i) < gStartTestNumber) - || static_cast(i) > gEndTestNumber) + if ((gStartTestNumber != ~0u && i < gStartTestNumber) + || i > gEndTestNumber) { vlog("Skipping function #%d\n", i); return 0; @@ -468,7 +467,7 @@ static int ParseArgs(int argc, const char **argv) long number = strtol(arg, &t, 0); if (t != arg) { - if (-1 == gStartTestNumber) + if (~0u == gStartTestNumber) gStartTestNumber = (int32_t)number; else gEndTestNumber = gStartTestNumber + (int32_t)number; -- cgit v1.2.3 From f4eb852b6d376afb827da4999cdfd5e0376b6a40 Mon Sep 17 00:00:00 2001 From: stoneforestwhu Date: Wed, 31 Aug 2022 00:47:15 +0800 Subject: support format CL_ABGR (#1474) * support format CL_ABGR add code to handle format CL_ABGR * Update imageHelpers.h * fix format --- test_common/harness/imageHelpers.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/test_common/harness/imageHelpers.h b/test_common/harness/imageHelpers.h index 2cc8e68e..f8ae4fb9 100644 --- a/test_common/harness/imageHelpers.h +++ b/test_common/harness/imageHelpers.h @@ -482,6 +482,13 @@ void read_image_pixel(void *imageData, image_descriptor *imageInfo, int x, outData[2] = tempData[3]; outData[3] = tempData[0]; } + else if (format->image_channel_order == CL_ABGR) + { + outData[0] = tempData[3]; + outData[1] = tempData[2]; + outData[2] = tempData[1]; + outData[3] = tempData[0]; + } else if ((format->image_channel_order == CL_BGRA) || (format->image_channel_order == CL_sBGRA)) { -- cgit v1.2.3 From 8f5a2f0ae8b083665773281e01ff8e87e286b671 Mon Sep 17 00:00:00 2001 From: Ewan Crawford Date: Tue, 30 Aug 2022 17:54:50 +0100 Subject: Initial command-buffer extension tests (#1368) * Initial command-buffer tests Introduce some basic testing of the [cl_khr_command_buffer](https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#cl_khr_command_buffer) extension. This is intended as a starting point from which we can iteratively build up tests for the extension collaboratively. * Move tests into derived classes * Move tests from methods into derived classes implementing a `Run()` interface. * Fix memory leak when command_buffer isn't freed when a test is skipped. * Print correct error code for `CL_DEVICE_COMMAND_BUFFER_CAPABILITIES_KHR` * Pass `nullptr` for queue parameter to command recording entry-points * Define command-buffer type wrapper Other OpenCL object have a wrapper to reference count their use and free the wrapped object. The command-buffer object can't use the generic type wrappers which are templated on the appropriate release/retain function, as the release/retain functions are queried at runtime. Instead, define our own command-buffer wrapper class where a base object is passed on construction which contains function pointers to the release/retain functions that can be used in the wrapper. * Use create_single_kernel_helper_create_program Use `create_single_kernel_helper_create_program` rather than hardcoding `clCreateProgramWithSource` to allow for other types of program input. Also fix bug using wrong enum for passing properties on command-buffer creation, should be `CL_COMMAND_BUFFER_FLAGS_KHR` * Add out-of-order command-buffer test Introduce a basic test for checking sync-point use with out-of-order command-buffers. This also includes better checking of required queue properties. --- test_conformance/extensions/CMakeLists.txt | 1 + .../cl_khr_command_buffer/CMakeLists.txt | 8 + .../cl_khr_command_buffer/basic_command_buffer.cpp | 588 +++++++++++++++++++++ .../command_buffer_test_base.h | 177 +++++++ .../extensions/cl_khr_command_buffer/main.cpp | 35 ++ .../extensions/cl_khr_command_buffer/procs.h | 35 ++ 6 files changed, 844 insertions(+) create mode 100644 test_conformance/extensions/cl_khr_command_buffer/CMakeLists.txt create mode 100644 test_conformance/extensions/cl_khr_command_buffer/basic_command_buffer.cpp create mode 100644 test_conformance/extensions/cl_khr_command_buffer/command_buffer_test_base.h create mode 100644 test_conformance/extensions/cl_khr_command_buffer/main.cpp create mode 100644 test_conformance/extensions/cl_khr_command_buffer/procs.h diff --git a/test_conformance/extensions/CMakeLists.txt b/test_conformance/extensions/CMakeLists.txt index 53d77ee5..d95d29aa 100644 --- a/test_conformance/extensions/CMakeLists.txt +++ b/test_conformance/extensions/CMakeLists.txt @@ -1,2 +1,3 @@ add_subdirectory( cl_ext_cxx_for_opencl ) +add_subdirectory( cl_khr_command_buffer ) add_subdirectory( cl_khr_dx9_media_sharing ) diff --git a/test_conformance/extensions/cl_khr_command_buffer/CMakeLists.txt b/test_conformance/extensions/cl_khr_command_buffer/CMakeLists.txt new file mode 100644 index 00000000..ac259f6d --- /dev/null +++ b/test_conformance/extensions/cl_khr_command_buffer/CMakeLists.txt @@ -0,0 +1,8 @@ +set(MODULE_NAME CL_KHR_COMMAND_BUFFER) + +set(${MODULE_NAME}_SOURCES + main.cpp + basic_command_buffer.cpp +) + +include(../../CMakeCommon.txt) diff --git a/test_conformance/extensions/cl_khr_command_buffer/basic_command_buffer.cpp b/test_conformance/extensions/cl_khr_command_buffer/basic_command_buffer.cpp new file mode 100644 index 00000000..62a02d83 --- /dev/null +++ b/test_conformance/extensions/cl_khr_command_buffer/basic_command_buffer.cpp @@ -0,0 +1,588 @@ +// +// Copyright (c) 2022 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +#include "command_buffer_test_base.h" +#include "procs.h" +#include "harness/typeWrappers.h" + +#include +#include +#include + +#define CHECK_VERIFICATION_ERROR(reference, result, index) \ + { \ + if (reference != result) \ + { \ + log_error("Expected %d was %d at index %u\n", reference, result, \ + index); \ + return TEST_FAIL; \ + } \ + } + +namespace { + +// Helper test fixture for constructing OpenCL objects used in testing +// a variety of simple command-buffer enqueue scenarios. +struct BasicCommandBufferTest : CommandBufferTestBase +{ + + BasicCommandBufferTest(cl_device_id device, cl_context context, + cl_command_queue queue) + : CommandBufferTestBase(device), context(context), queue(queue), + command_buffer(this), simultaneous_use(false), + out_of_order_support(false), num_elements(0) + {} + + virtual bool Skip() + { + cl_command_queue_properties required_properties; + cl_int error = clGetDeviceInfo( + device, CL_DEVICE_COMMAND_BUFFER_REQUIRED_QUEUE_PROPERTIES_KHR, + sizeof(required_properties), &required_properties, NULL); + test_error(error, + "Unable to query " + "CL_DEVICE_COMMAND_BUFFER_REQUIRED_QUEUE_PROPERTIES_KHR"); + + cl_command_queue_properties queue_properties; + + error = clGetCommandQueueInfo(queue, CL_QUEUE_PROPERTIES, + sizeof(queue_properties), + &queue_properties, NULL); + test_error(error, "Unable to query CL_QUEUE_PROPERTIES"); + + // Skip if queue properties don't contain those required + return required_properties != (required_properties & queue_properties); + } + + virtual cl_int SetUp(int elements) + { + cl_int error = init_extension_functions(); + if (error != CL_SUCCESS) + { + return error; + } + + // Query if device supports simultaneous use + cl_device_command_buffer_capabilities_khr capabilities; + error = + clGetDeviceInfo(device, CL_DEVICE_COMMAND_BUFFER_CAPABILITIES_KHR, + sizeof(capabilities), &capabilities, NULL); + test_error(error, + "Unable to query CL_DEVICE_COMMAND_BUFFER_CAPABILITIES_KHR"); + simultaneous_use = + capabilities & CL_COMMAND_BUFFER_CAPABILITY_SIMULTANEOUS_USE_KHR; + out_of_order_support = + capabilities & CL_COMMAND_BUFFER_CAPABILITY_OUT_OF_ORDER_KHR; + + if (elements <= 0) + { + return CL_INVALID_VALUE; + } + num_elements = static_cast(elements); + + // Kernel performs a parallel copy from an input buffer to output buffer + // is created. + const char *kernel_str = + R"( + __kernel void copy(__global int* in, __global int* out) { + size_t id = get_global_id(0); + out[id] = in[id]; + })"; + + error = create_single_kernel_helper_create_program(context, &program, 1, + &kernel_str); + test_error(error, "Failed to create program with source"); + + error = clBuildProgram(program, 1, &device, nullptr, nullptr, nullptr); + test_error(error, "Failed to build program"); + + in_mem = clCreateBuffer(context, CL_MEM_READ_ONLY, + sizeof(cl_int) * num_elements, nullptr, &error); + test_error(error, "clCreateBuffer failed"); + + out_mem = + clCreateBuffer(context, CL_MEM_WRITE_ONLY, + sizeof(cl_int) * num_elements, nullptr, &error); + test_error(error, "clCreateBuffer failed"); + + kernel = clCreateKernel(program, "copy", &error); + test_error(error, "Failed to create copy kernel"); + + error = clSetKernelArg(kernel, 0, sizeof(in_mem), &in_mem); + test_error(error, "clSetKernelArg failed"); + + error = clSetKernelArg(kernel, 1, sizeof(out_mem), &out_mem); + test_error(error, "clSetKernelArg failed"); + + if (simultaneous_use) + { + cl_command_buffer_properties_khr properties[3] = { + CL_COMMAND_BUFFER_FLAGS_KHR, + CL_COMMAND_BUFFER_SIMULTANEOUS_USE_KHR, 0 + }; + command_buffer = + clCreateCommandBufferKHR(1, &queue, properties, &error); + } + else + { + command_buffer = + clCreateCommandBufferKHR(1, &queue, nullptr, &error); + } + test_error(error, "clCreateCommandBufferKHR failed"); + + return CL_SUCCESS; + } + + // Test body returning an OpenCL error code + virtual cl_int Run() = 0; + + +protected: + size_t data_size() const { return num_elements * sizeof(cl_int); } + + cl_context context; + cl_command_queue queue; + clCommandBufferWrapper command_buffer; + clProgramWrapper program; + clKernelWrapper kernel; + clMemWrapper in_mem, out_mem; + size_t num_elements; + + // Device support query results + bool simultaneous_use; + bool out_of_order_support; +}; + +// Test enqueuing a command-buffer containing a single NDRange command once +struct BasicEnqueueTest : public BasicCommandBufferTest +{ + using BasicCommandBufferTest::BasicCommandBufferTest; + + cl_int Run() override + { + cl_int error = clCommandNDRangeKernelKHR( + command_buffer, nullptr, nullptr, kernel, 1, nullptr, &num_elements, + nullptr, 0, nullptr, nullptr, nullptr); + test_error(error, "clCommandNDRangeKernelKHR failed"); + + error = clFinalizeCommandBufferKHR(command_buffer); + test_error(error, "clFinalizeCommandBufferKHR failed"); + + const cl_int pattern = 42; + error = clEnqueueFillBuffer(queue, in_mem, &pattern, sizeof(cl_int), 0, + data_size(), 0, nullptr, nullptr); + test_error(error, "clEnqueueFillBuffer failed"); + + error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0, + nullptr, nullptr); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + std::vector output_data(num_elements); + error = clEnqueueReadBuffer(queue, out_mem, CL_TRUE, 0, data_size(), + output_data.data(), 0, nullptr, nullptr); + test_error(error, "clEnqueueReadBuffer failed"); + + for (size_t i = 0; i < num_elements; i++) + { + CHECK_VERIFICATION_ERROR(pattern, output_data[i], i); + } + + return CL_SUCCESS; + } +}; + +// Test enqueuing a command-buffer containing multiple command, including +// operations other than NDRange kernel execution. +struct MixedCommandsTest : public BasicCommandBufferTest +{ + using BasicCommandBufferTest::BasicCommandBufferTest; + + cl_int Run() override + { + cl_int error; + const size_t iterations = 4; + clMemWrapper result_mem = + clCreateBuffer(context, CL_MEM_READ_WRITE, + sizeof(cl_int) * iterations, nullptr, &error); + test_error(error, "clCreateBuffer failed"); + + const cl_int pattern_base = 42; + for (size_t i = 0; i < iterations; i++) + { + const cl_int pattern = pattern_base + i; + cl_int error = clCommandFillBufferKHR( + command_buffer, nullptr, in_mem, &pattern, sizeof(cl_int), 0, + data_size(), 0, nullptr, nullptr, nullptr); + test_error(error, "clCommandFillBufferKHR failed"); + + error = clCommandNDRangeKernelKHR( + command_buffer, nullptr, nullptr, kernel, 1, nullptr, + &num_elements, nullptr, 0, nullptr, nullptr, nullptr); + test_error(error, "clCommandNDRangeKernelKHR failed"); + + const size_t result_offset = i * sizeof(cl_int); + error = clCommandCopyBufferKHR( + command_buffer, nullptr, out_mem, result_mem, 0, result_offset, + sizeof(cl_int), 0, nullptr, nullptr, nullptr); + test_error(error, "clCommandCopyBufferKHR failed"); + } + + error = clFinalizeCommandBufferKHR(command_buffer); + test_error(error, "clFinalizeCommandBufferKHR failed"); + + error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0, + nullptr, nullptr); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + std::vector result_data(num_elements); + error = clEnqueueReadBuffer(queue, result_mem, CL_TRUE, 0, + iterations * sizeof(cl_int), + result_data.data(), 0, nullptr, nullptr); + test_error(error, "clEnqueueReadBuffer failed"); + + for (size_t i = 0; i < iterations; i++) + { + const cl_int ref = pattern_base + i; + CHECK_VERIFICATION_ERROR(ref, result_data[i], i); + } + + return CL_SUCCESS; + } +}; + +// Test enqueueing a command-buffer blocked on a user-event +struct UserEventTest : public BasicCommandBufferTest +{ + using BasicCommandBufferTest::BasicCommandBufferTest; + + cl_int Run() override + { + cl_int error = clCommandNDRangeKernelKHR( + command_buffer, nullptr, nullptr, kernel, 1, nullptr, &num_elements, + nullptr, 0, nullptr, nullptr, nullptr); + test_error(error, "clCommandNDRangeKernelKHR failed"); + + error = clFinalizeCommandBufferKHR(command_buffer); + test_error(error, "clFinalizeCommandBufferKHR failed"); + + clEventWrapper user_event = clCreateUserEvent(context, &error); + test_error(error, "clCreateUserEvent failed"); + + const cl_int pattern = 42; + error = clEnqueueFillBuffer(queue, in_mem, &pattern, sizeof(cl_int), 0, + data_size(), 0, nullptr, nullptr); + test_error(error, "clEnqueueFillBuffer failed"); + + error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 1, + &user_event, nullptr); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + std::vector output_data(num_elements); + error = clEnqueueReadBuffer(queue, out_mem, CL_FALSE, 0, data_size(), + output_data.data(), 0, nullptr, nullptr); + test_error(error, "clEnqueueReadBuffer failed"); + + error = clSetUserEventStatus(user_event, CL_COMPLETE); + test_error(error, "clSetUserEventStatus failed"); + + error = clFinish(queue); + test_error(error, "clFinish failed"); + + for (size_t i = 0; i < num_elements; i++) + { + CHECK_VERIFICATION_ERROR(pattern, output_data[i], i); + } + + return CL_SUCCESS; + } +}; + +// Test flushing the command-queue between command-buffer enqueues +struct ExplicitFlushTest : public BasicCommandBufferTest +{ + using BasicCommandBufferTest::BasicCommandBufferTest; + + cl_int Run() override + { + cl_int error = clCommandNDRangeKernelKHR( + command_buffer, nullptr, nullptr, kernel, 1, nullptr, &num_elements, + nullptr, 0, nullptr, nullptr, nullptr); + test_error(error, "clCommandNDRangeKernelKHR failed"); + + error = clFinalizeCommandBufferKHR(command_buffer); + test_error(error, "clFinalizeCommandBufferKHR failed"); + + const cl_int pattern_A = 42; + error = clEnqueueFillBuffer(queue, in_mem, &pattern_A, sizeof(cl_int), + 0, data_size(), 0, nullptr, nullptr); + test_error(error, "clEnqueueFillBuffer failed"); + + error = clFlush(queue); + test_error(error, "clFlush failed"); + + error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0, + nullptr, nullptr); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + std::vector output_data_A(num_elements); + error = clEnqueueReadBuffer(queue, out_mem, CL_FALSE, 0, data_size(), + output_data_A.data(), 0, nullptr, nullptr); + test_error(error, "clEnqueueReadBuffer failed"); + + const cl_int pattern_B = 0xA; + error = clEnqueueFillBuffer(queue, in_mem, &pattern_B, sizeof(cl_int), + 0, data_size(), 0, nullptr, nullptr); + test_error(error, "clEnqueueFillBuffer failed"); + + error = clFlush(queue); + test_error(error, "clFlush failed"); + + error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0, + nullptr, nullptr); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + error = clFlush(queue); + test_error(error, "clFlush failed"); + + std::vector output_data_B(num_elements); + error = clEnqueueReadBuffer(queue, out_mem, CL_FALSE, 0, data_size(), + output_data_B.data(), 0, nullptr, nullptr); + test_error(error, "clEnqueueReadBuffer failed"); + + error = clFinish(queue); + test_error(error, "clFinish failed"); + + for (size_t i = 0; i < num_elements; i++) + { + CHECK_VERIFICATION_ERROR(pattern_A, output_data_A[i], i); + + CHECK_VERIFICATION_ERROR(pattern_B, output_data_B[i], i); + } + return CL_SUCCESS; + } + + bool Skip() override + { + return !simultaneous_use || BasicCommandBufferTest::Skip(); + } +}; + +// Test enqueueing a command-buffer twice separated by another enqueue operation +struct InterleavedEnqueueTest : public BasicCommandBufferTest +{ + using BasicCommandBufferTest::BasicCommandBufferTest; + + cl_int Run() override + { + cl_int error = clCommandNDRangeKernelKHR( + command_buffer, nullptr, nullptr, kernel, 1, nullptr, &num_elements, + nullptr, 0, nullptr, nullptr, nullptr); + test_error(error, "clCommandNDRangeKernelKHR failed"); + + error = clFinalizeCommandBufferKHR(command_buffer); + test_error(error, "clFinalizeCommandBufferKHR failed"); + + cl_int pattern = 42; + error = clEnqueueFillBuffer(queue, in_mem, &pattern, sizeof(cl_int), 0, + data_size(), 0, nullptr, nullptr); + test_error(error, "clEnqueueFillBuffer failed"); + + error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0, + nullptr, nullptr); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + pattern = 0xABCD; + error = clEnqueueFillBuffer(queue, in_mem, &pattern, sizeof(cl_int), 0, + data_size(), 0, nullptr, nullptr); + test_error(error, "clEnqueueFillBuffer failed"); + + error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0, + nullptr, nullptr); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + error = clEnqueueCopyBuffer(queue, in_mem, out_mem, 0, 0, data_size(), + 0, nullptr, nullptr); + test_error(error, "clEnqueueCopyBuffer failed"); + + std::vector output_data(num_elements); + error = clEnqueueReadBuffer(queue, out_mem, CL_TRUE, 0, data_size(), + output_data.data(), 0, nullptr, nullptr); + test_error(error, "clEnqueueReadBuffer failed"); + + for (size_t i = 0; i < num_elements; i++) + { + CHECK_VERIFICATION_ERROR(pattern, output_data[i], i); + } + + return CL_SUCCESS; + } + + bool Skip() override + { + return !simultaneous_use || BasicCommandBufferTest::Skip(); + } +}; + +// Test sync-points with an out-of-order command-buffer +struct OutOfOrderTest : public BasicCommandBufferTest +{ + using BasicCommandBufferTest::BasicCommandBufferTest; + OutOfOrderTest(cl_device_id device, cl_context context, + cl_command_queue queue) + : BasicCommandBufferTest(device, context, queue), + out_of_order_command_buffer(this), out_of_order_queue(nullptr), + event(nullptr) + {} + + cl_int Run() override + { + cl_sync_point_khr sync_points[2]; + + const cl_int pattern = 42; + cl_int error = + clCommandFillBufferKHR(out_of_order_command_buffer, nullptr, in_mem, + &pattern, sizeof(cl_int), 0, data_size(), 0, + nullptr, &sync_points[0], nullptr); + test_error(error, "clCommandFillBufferKHR failed"); + + const cl_int overwritten_pattern = 0xACDC; + error = clCommandFillBufferKHR(out_of_order_command_buffer, nullptr, + out_mem, &overwritten_pattern, + sizeof(cl_int), 0, data_size(), 0, + nullptr, &sync_points[1], nullptr); + test_error(error, "clCommandFillBufferKHR failed"); + + error = clCommandNDRangeKernelKHR( + out_of_order_command_buffer, nullptr, nullptr, kernel, 1, nullptr, + &num_elements, nullptr, 2, sync_points, nullptr, nullptr); + test_error(error, "clCommandNDRangeKernelKHR failed"); + + error = clFinalizeCommandBufferKHR(out_of_order_command_buffer); + test_error(error, "clFinalizeCommandBufferKHR failed"); + + error = clEnqueueCommandBufferKHR( + 0, nullptr, out_of_order_command_buffer, 0, nullptr, &event); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + std::vector output_data(num_elements); + error = clEnqueueReadBuffer(out_of_order_queue, out_mem, CL_TRUE, 0, + data_size(), output_data.data(), 1, &event, + nullptr); + test_error(error, "clEnqueueReadBuffer failed"); + + for (size_t i = 0; i < num_elements; i++) + { + CHECK_VERIFICATION_ERROR(pattern, output_data[i], i); + } + + return CL_SUCCESS; + } + + cl_int SetUp(int elements) override + { + cl_int error = BasicCommandBufferTest::SetUp(elements); + test_error(error, "BasicCommandBufferTest::SetUp failed"); + + if (!out_of_order_support) + { + // Test will skip as device doesn't support out-of-order + // command-buffers + return CL_SUCCESS; + } + + out_of_order_queue = clCreateCommandQueue( + context, device, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &error); + test_error(error, "Unable to create command queue to test with"); + + out_of_order_command_buffer = + clCreateCommandBufferKHR(1, &out_of_order_queue, nullptr, &error); + test_error(error, "clCreateCommandBufferKHR failed"); + + return CL_SUCCESS; + } + + bool Skip() override + { + return !out_of_order_support || BasicCommandBufferTest::Skip(); + } + + clCommandQueueWrapper out_of_order_queue; + clCommandBufferWrapper out_of_order_command_buffer; + clEventWrapper event; +}; + +#undef CHECK_VERIFICATION_ERROR + +template +int MakeAndRunTest(cl_device_id device, cl_context context, + cl_command_queue queue, int num_elements) +{ + CHECK_COMMAND_BUFFER_EXTENSION_AVAILABLE(device); + + auto test_fixture = T(device, context, queue); + cl_int error = test_fixture.SetUp(num_elements); + test_error_ret(error, "Error in test initialization", TEST_FAIL); + + if (test_fixture.Skip()) + { + return TEST_SKIPPED_ITSELF; + } + + error = test_fixture.Run(); + test_error_ret(error, "Test Failed", TEST_FAIL); + + return TEST_PASS; +} +} // anonymous namespace + +int test_single_ndrange(cl_device_id device, cl_context context, + cl_command_queue queue, int num_elements) +{ + return MakeAndRunTest(device, context, queue, + num_elements); +} + +int test_interleaved_enqueue(cl_device_id device, cl_context context, + cl_command_queue queue, int num_elements) +{ + return MakeAndRunTest(device, context, queue, + num_elements); +} + +int test_mixed_commands(cl_device_id device, cl_context context, + cl_command_queue queue, int num_elements) +{ + return MakeAndRunTest(device, context, queue, + num_elements); +} + +int test_explicit_flush(cl_device_id device, cl_context context, + cl_command_queue queue, int num_elements) +{ + return MakeAndRunTest(device, context, queue, + num_elements); +} + +int test_user_events(cl_device_id device, cl_context context, + cl_command_queue queue, int num_elements) +{ + return MakeAndRunTest(device, context, queue, num_elements); +} + +int test_out_of_order(cl_device_id device, cl_context context, + cl_command_queue queue, int num_elements) +{ + return MakeAndRunTest(device, context, queue, num_elements); +} diff --git a/test_conformance/extensions/cl_khr_command_buffer/command_buffer_test_base.h b/test_conformance/extensions/cl_khr_command_buffer/command_buffer_test_base.h new file mode 100644 index 00000000..0fd2e4ec --- /dev/null +++ b/test_conformance/extensions/cl_khr_command_buffer/command_buffer_test_base.h @@ -0,0 +1,177 @@ +// +// Copyright (c) 2022 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef _CL_KHR_COMMAND_BUFFER_TEST_BASE_H +#define _CL_KHR_COMMAND_BUFFER_TEST_BASE_H + +#include +#include "harness/deviceInfo.h" +#include "harness/testHarness.h" + + +// Base class for setting function pointers to new extension entry points +struct CommandBufferTestBase +{ + CommandBufferTestBase(cl_device_id device): device(device) {} + + cl_int init_extension_functions() + { + cl_platform_id platform; + cl_int error = + clGetDeviceInfo(device, CL_DEVICE_PLATFORM, sizeof(cl_platform_id), + &platform, nullptr); + test_error(error, "clGetDeviceInfo for CL_DEVICE_PLATFORM failed"); + + // If it is supported get the addresses of all the APIs here. +#define GET_EXTENSION_ADDRESS(FUNC) \ + FUNC = reinterpret_cast( \ + clGetExtensionFunctionAddressForPlatform(platform, #FUNC)); \ + if (FUNC == nullptr) \ + { \ + log_error("ERROR: clGetExtensionFunctionAddressForPlatform failed" \ + " with " #FUNC "\n"); \ + return TEST_FAIL; \ + } + + GET_EXTENSION_ADDRESS(clCreateCommandBufferKHR); + GET_EXTENSION_ADDRESS(clReleaseCommandBufferKHR); + GET_EXTENSION_ADDRESS(clRetainCommandBufferKHR); + GET_EXTENSION_ADDRESS(clFinalizeCommandBufferKHR); + GET_EXTENSION_ADDRESS(clEnqueueCommandBufferKHR); + GET_EXTENSION_ADDRESS(clCommandBarrierWithWaitListKHR); + GET_EXTENSION_ADDRESS(clCommandCopyBufferKHR); + GET_EXTENSION_ADDRESS(clCommandCopyBufferRectKHR); + GET_EXTENSION_ADDRESS(clCommandCopyBufferToImageKHR); + GET_EXTENSION_ADDRESS(clCommandCopyImageKHR); + GET_EXTENSION_ADDRESS(clCommandCopyImageToBufferKHR); + GET_EXTENSION_ADDRESS(clCommandFillBufferKHR); + GET_EXTENSION_ADDRESS(clCommandFillImageKHR); + GET_EXTENSION_ADDRESS(clCommandNDRangeKernelKHR); + GET_EXTENSION_ADDRESS(clGetCommandBufferInfoKHR); +#undef GET_EXTENSION_ADDRESS + return CL_SUCCESS; + } + + clCreateCommandBufferKHR_fn clCreateCommandBufferKHR = nullptr; + clReleaseCommandBufferKHR_fn clReleaseCommandBufferKHR = nullptr; + clRetainCommandBufferKHR_fn clRetainCommandBufferKHR = nullptr; + clFinalizeCommandBufferKHR_fn clFinalizeCommandBufferKHR = nullptr; + clEnqueueCommandBufferKHR_fn clEnqueueCommandBufferKHR = nullptr; + clCommandBarrierWithWaitListKHR_fn clCommandBarrierWithWaitListKHR = + nullptr; + clCommandCopyBufferKHR_fn clCommandCopyBufferKHR = nullptr; + clCommandCopyBufferRectKHR_fn clCommandCopyBufferRectKHR = nullptr; + clCommandCopyBufferToImageKHR_fn clCommandCopyBufferToImageKHR = nullptr; + clCommandCopyImageKHR_fn clCommandCopyImageKHR = nullptr; + clCommandCopyImageToBufferKHR_fn clCommandCopyImageToBufferKHR = nullptr; + clCommandFillBufferKHR_fn clCommandFillBufferKHR = nullptr; + clCommandFillImageKHR_fn clCommandFillImageKHR = nullptr; + clCommandNDRangeKernelKHR_fn clCommandNDRangeKernelKHR = nullptr; + clGetCommandBufferInfoKHR_fn clGetCommandBufferInfoKHR = nullptr; + + cl_device_id device = nullptr; +}; + +// Wrapper class based off generic typeWrappers.h wrappers. However, because +// the release/retain functions are queried at runtime from the platform, +// rather than known at compile time we cannot link the instantiated template. +// Instead, pass an instance of `CommandBufferTestBase` on wrapper construction +// to access the release/retain functions. +class clCommandBufferWrapper { + cl_command_buffer_khr object = nullptr; + + void retain() + { + if (!object) return; + + auto err = base->clRetainCommandBufferKHR(object); + if (err != CL_SUCCESS) + { + print_error(err, "clRetainCommandBufferKHR() failed"); + std::abort(); + } + } + + void release() + { + if (!object) return; + + auto err = base->clReleaseCommandBufferKHR(object); + if (err != CL_SUCCESS) + { + print_error(err, "clReleaseCommandBufferKHR() failed"); + std::abort(); + } + } + + // Used to access release/retain functions + CommandBufferTestBase *base; + +public: + // We always want to have base available to dereference + clCommandBufferWrapper() = delete; + + clCommandBufferWrapper(CommandBufferTestBase *base): base(base) {} + + // On assignment, assume the object has a refcount of one. + clCommandBufferWrapper &operator=(cl_command_buffer_khr rhs) + { + reset(rhs); + return *this; + } + + // Copy semantics, increase retain count. + clCommandBufferWrapper(clCommandBufferWrapper const &w) { *this = w; } + clCommandBufferWrapper &operator=(clCommandBufferWrapper const &w) + { + reset(w.object); + retain(); + return *this; + } + + // Move semantics, directly take ownership. + clCommandBufferWrapper(clCommandBufferWrapper &&w) { *this = std::move(w); } + clCommandBufferWrapper &operator=(clCommandBufferWrapper &&w) + { + reset(w.object); + w.object = nullptr; + return *this; + } + + ~clCommandBufferWrapper() { reset(); } + + // Release the existing object, if any, and own the new one, if any. + void reset(cl_command_buffer_khr new_object = nullptr) + { + release(); + object = new_object; + } + + operator cl_command_buffer_khr() const { return object; } +}; + +#define CHECK_COMMAND_BUFFER_EXTENSION_AVAILABLE(device) \ + { \ + if (!is_extension_available(device, "cl_khr_command_buffer")) \ + { \ + log_info( \ + "Device does not support 'cl_khr_command_buffer'. Skipping " \ + "the test.\n"); \ + return TEST_SKIPPED_ITSELF; \ + } \ + } + + +#endif // _CL_KHR_COMMAND_BUFFER_TEST_BASE_H diff --git a/test_conformance/extensions/cl_khr_command_buffer/main.cpp b/test_conformance/extensions/cl_khr_command_buffer/main.cpp new file mode 100644 index 00000000..4dece455 --- /dev/null +++ b/test_conformance/extensions/cl_khr_command_buffer/main.cpp @@ -0,0 +1,35 @@ +// Copyright (c) 2022 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +#include "procs.h" +#include "harness/testHarness.h" + +test_definition test_list[] = { + ADD_TEST(single_ndrange), ADD_TEST(interleaved_enqueue), + ADD_TEST(mixed_commands), ADD_TEST(explicit_flush), + ADD_TEST(user_events), ADD_TEST(out_of_order) +}; + + +int main(int argc, const char *argv[]) +{ + // A device may report the required properties of a queue that + // is compatible with command-buffers via the query + // CL_DEVICE_COMMAND_BUFFER_REQUIRED_QUEUE_PROPERTIES_KHR. We account + // for this in the tests themselves, rather than here, where we have a + // device to query. + const cl_command_queue_properties queue_properties = 0; + return runTestHarnessWithCheck(argc, argv, ARRAY_SIZE(test_list), test_list, + false, queue_properties, nullptr); +} diff --git a/test_conformance/extensions/cl_khr_command_buffer/procs.h b/test_conformance/extensions/cl_khr_command_buffer/procs.h new file mode 100644 index 00000000..58fd228f --- /dev/null +++ b/test_conformance/extensions/cl_khr_command_buffer/procs.h @@ -0,0 +1,35 @@ +// +// Copyright (c) 2022 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +#ifndef _CL_KHR_COMMAND_BUFFER_PROCS_H +#define _CL_KHR_COMMAND_BUFFER_PROCS_H + +#include + +// Basic command-buffer tests +extern int test_single_ndrange(cl_device_id device, cl_context context, + cl_command_queue queue, int num_elements); +extern int test_interleaved_enqueue(cl_device_id device, cl_context context, + cl_command_queue queue, int num_elements); +extern int test_mixed_commands(cl_device_id device, cl_context context, + cl_command_queue queue, int num_elements); +extern int test_explicit_flush(cl_device_id device, cl_context context, + cl_command_queue queue, int num_elements); +extern int test_user_events(cl_device_id device, cl_context context, + cl_command_queue queue, int num_elements); +extern int test_out_of_order(cl_device_id device, cl_context context, + cl_command_queue queue, int num_elements); + +#endif /*_CL_KHR_COMMAND_BUFFER_PROCS_H*/ -- cgit v1.2.3 From f94c1357558a78cef2af752240c3f805b4b83ce9 Mon Sep 17 00:00:00 2001 From: Sven van Haastregt Date: Thu, 1 Sep 2022 06:43:43 +0100 Subject: Use clMemWrapper in math_brute_force (#1476) Simplify code by avoiding manual resource management. Original patch by Marco Antognini. Signed-off-by: Marco Antognini Signed-off-by: Sven van Haastregt Signed-off-by: Marco Antognini Signed-off-by: Sven van Haastregt --- test_conformance/math_brute_force/binary_double.cpp | 12 +++++------- test_conformance/math_brute_force/binary_float.cpp | 12 +++++------- test_conformance/math_brute_force/binary_i_double.cpp | 12 +++++------- test_conformance/math_brute_force/binary_i_float.cpp | 12 +++++------- .../math_brute_force/binary_operator_double.cpp | 12 +++++------- test_conformance/math_brute_force/binary_operator_float.cpp | 12 +++++------- test_conformance/math_brute_force/common.h | 3 +++ test_conformance/math_brute_force/macro_binary_double.cpp | 12 +++++------- test_conformance/math_brute_force/macro_binary_float.cpp | 12 +++++------- test_conformance/math_brute_force/macro_unary_double.cpp | 12 +++--------- test_conformance/math_brute_force/macro_unary_float.cpp | 12 +++--------- test_conformance/math_brute_force/unary_double.cpp | 13 ++++--------- test_conformance/math_brute_force/unary_float.cpp | 13 ++++--------- 13 files changed, 57 insertions(+), 92 deletions(-) diff --git a/test_conformance/math_brute_force/binary_double.cpp b/test_conformance/math_brute_force/binary_double.cpp index 490c17b6..1b1f7d4c 100644 --- a/test_conformance/math_brute_force/binary_double.cpp +++ b/test_conformance/math_brute_force/binary_double.cpp @@ -124,9 +124,11 @@ cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) // Thread specific data for a worker thread struct ThreadInfo { - cl_mem inBuf; // input buffer for the thread - cl_mem inBuf2; // input buffer for the thread - cl_mem outBuf[VECTOR_SIZE_COUNT]; // output buffers for the thread + // Input and output buffers for the thread + clMemWrapper inBuf; + clMemWrapper inBuf2; + Buffers outBuf; + float maxError; // max error value. Init to 0. double maxErrorValue; // position of the max error value (param 1). Init to 0. @@ -793,10 +795,6 @@ exit: for (auto &threadInfo : test_info.tinfo) { free_mtdata(threadInfo.d); - clReleaseMemObject(threadInfo.inBuf); - clReleaseMemObject(threadInfo.inBuf2); - for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) - clReleaseMemObject(threadInfo.outBuf[j]); } return error; diff --git a/test_conformance/math_brute_force/binary_float.cpp b/test_conformance/math_brute_force/binary_float.cpp index 01082bc1..d229a376 100644 --- a/test_conformance/math_brute_force/binary_float.cpp +++ b/test_conformance/math_brute_force/binary_float.cpp @@ -122,9 +122,11 @@ cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) // Thread specific data for a worker thread struct ThreadInfo { - cl_mem inBuf; // input buffer for the thread - cl_mem inBuf2; // input buffer for the thread - cl_mem outBuf[VECTOR_SIZE_COUNT]; // output buffers for the thread + // Input and output buffers for the thread + clMemWrapper inBuf; + clMemWrapper inBuf2; + Buffers outBuf; + float maxError; // max error value. Init to 0. double maxErrorValue; // position of the max error value (param 1). Init to 0. @@ -950,10 +952,6 @@ exit: for (auto &threadInfo : test_info.tinfo) { free_mtdata(threadInfo.d); - clReleaseMemObject(threadInfo.inBuf); - clReleaseMemObject(threadInfo.inBuf2); - for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) - clReleaseMemObject(threadInfo.outBuf[j]); } return error; diff --git a/test_conformance/math_brute_force/binary_i_double.cpp b/test_conformance/math_brute_force/binary_i_double.cpp index def0bd41..7baa21a2 100644 --- a/test_conformance/math_brute_force/binary_i_double.cpp +++ b/test_conformance/math_brute_force/binary_i_double.cpp @@ -123,9 +123,11 @@ cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) // Thread specific data for a worker thread struct ThreadInfo { - cl_mem inBuf; // input buffer for the thread - cl_mem inBuf2; // input buffer for the thread - cl_mem outBuf[VECTOR_SIZE_COUNT]; // output buffers for the thread + // Input and output buffers for the thread + clMemWrapper inBuf; + clMemWrapper inBuf2; + Buffers outBuf; + float maxError; // max error value. Init to 0. double maxErrorValue; // position of the max error value (param 1). Init to 0. @@ -715,10 +717,6 @@ exit: for (auto &threadInfo : test_info.tinfo) { free_mtdata(threadInfo.d); - clReleaseMemObject(threadInfo.inBuf); - clReleaseMemObject(threadInfo.inBuf2); - for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) - clReleaseMemObject(threadInfo.outBuf[j]); } return error; diff --git a/test_conformance/math_brute_force/binary_i_float.cpp b/test_conformance/math_brute_force/binary_i_float.cpp index ed207098..3f998e2e 100644 --- a/test_conformance/math_brute_force/binary_i_float.cpp +++ b/test_conformance/math_brute_force/binary_i_float.cpp @@ -121,9 +121,11 @@ cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) // Thread specific data for a worker thread struct ThreadInfo { - cl_mem inBuf; // input buffer for the thread - cl_mem inBuf2; // input buffer for the thread - cl_mem outBuf[VECTOR_SIZE_COUNT]; // output buffers for the thread + // Input and output buffers for the thread + clMemWrapper inBuf; + clMemWrapper inBuf2; + Buffers outBuf; + float maxError; // max error value. Init to 0. double maxErrorValue; // position of the max error value (param 1). Init to 0. @@ -708,10 +710,6 @@ exit: for (auto &threadInfo : test_info.tinfo) { free_mtdata(threadInfo.d); - clReleaseMemObject(threadInfo.inBuf); - clReleaseMemObject(threadInfo.inBuf2); - for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) - clReleaseMemObject(threadInfo.outBuf[j]); } return error; diff --git a/test_conformance/math_brute_force/binary_operator_double.cpp b/test_conformance/math_brute_force/binary_operator_double.cpp index 992df276..74883664 100644 --- a/test_conformance/math_brute_force/binary_operator_double.cpp +++ b/test_conformance/math_brute_force/binary_operator_double.cpp @@ -123,9 +123,11 @@ cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) // Thread specific data for a worker thread struct ThreadInfo { - cl_mem inBuf; // input buffer for the thread - cl_mem inBuf2; // input buffer for the thread - cl_mem outBuf[VECTOR_SIZE_COUNT]; // output buffers for the thread + // Input and output buffers for the thread + clMemWrapper inBuf; + clMemWrapper inBuf2; + Buffers outBuf; + float maxError; // max error value. Init to 0. double maxErrorValue; // position of the max error value (param 1). Init to 0. @@ -760,10 +762,6 @@ exit: for (auto &threadInfo : test_info.tinfo) { free_mtdata(threadInfo.d); - clReleaseMemObject(threadInfo.inBuf); - clReleaseMemObject(threadInfo.inBuf2); - for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) - clReleaseMemObject(threadInfo.outBuf[j]); } return error; diff --git a/test_conformance/math_brute_force/binary_operator_float.cpp b/test_conformance/math_brute_force/binary_operator_float.cpp index a555beaa..56f293c1 100644 --- a/test_conformance/math_brute_force/binary_operator_float.cpp +++ b/test_conformance/math_brute_force/binary_operator_float.cpp @@ -121,9 +121,11 @@ cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) // Thread specific data for a worker thread struct ThreadInfo { - cl_mem inBuf; // input buffer for the thread - cl_mem inBuf2; // input buffer for the thread - cl_mem outBuf[VECTOR_SIZE_COUNT]; // output buffers for the thread + // Input and output buffers for the thread + clMemWrapper inBuf; + clMemWrapper inBuf2; + Buffers outBuf; + float maxError; // max error value. Init to 0. double maxErrorValue; // position of the max error value (param 1). Init to 0. @@ -887,10 +889,6 @@ exit: for (auto &threadInfo : test_info.tinfo) { free_mtdata(threadInfo.d); - clReleaseMemObject(threadInfo.inBuf); - clReleaseMemObject(threadInfo.inBuf2); - for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) - clReleaseMemObject(threadInfo.outBuf[j]); } return error; diff --git a/test_conformance/math_brute_force/common.h b/test_conformance/math_brute_force/common.h index f0d18dd9..6f17898f 100644 --- a/test_conformance/math_brute_force/common.h +++ b/test_conformance/math_brute_force/common.h @@ -28,6 +28,9 @@ using KernelMatrix = std::array, VECTOR_SIZE_COUNT>; // Array of programs for each vector size. using Programs = std::array; +// Array of buffers for each vector size. +using Buffers = std::array; + // Information to generate OpenCL kernels. struct BuildKernelInfo { diff --git a/test_conformance/math_brute_force/macro_binary_double.cpp b/test_conformance/math_brute_force/macro_binary_double.cpp index fb28d823..a697a7be 100644 --- a/test_conformance/math_brute_force/macro_binary_double.cpp +++ b/test_conformance/math_brute_force/macro_binary_double.cpp @@ -122,9 +122,11 @@ cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) // Thread specific data for a worker thread struct ThreadInfo { - cl_mem inBuf; // input buffer for the thread - cl_mem inBuf2; // input buffer for the thread - cl_mem outBuf[VECTOR_SIZE_COUNT]; // output buffers for the thread + // Input and output buffers for the thread + clMemWrapper inBuf; + clMemWrapper inBuf2; + Buffers outBuf; + MTdata d; // Per thread command queue to improve performance @@ -705,10 +707,6 @@ exit: for (auto &threadInfo : test_info.tinfo) { free_mtdata(threadInfo.d); - clReleaseMemObject(threadInfo.inBuf); - clReleaseMemObject(threadInfo.inBuf2); - for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) - clReleaseMemObject(threadInfo.outBuf[j]); } return error; diff --git a/test_conformance/math_brute_force/macro_binary_float.cpp b/test_conformance/math_brute_force/macro_binary_float.cpp index fd93e2e6..97e2f675 100644 --- a/test_conformance/math_brute_force/macro_binary_float.cpp +++ b/test_conformance/math_brute_force/macro_binary_float.cpp @@ -120,9 +120,11 @@ cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) // Thread specific data for a worker thread struct ThreadInfo { - cl_mem inBuf; // input buffer for the thread - cl_mem inBuf2; // input buffer for the thread - cl_mem outBuf[VECTOR_SIZE_COUNT]; // output buffers for the thread + // Input and output buffers for the thread + clMemWrapper inBuf; + clMemWrapper inBuf2; + Buffers outBuf; + MTdata d; // Per thread command queue to improve performance @@ -694,10 +696,6 @@ exit: for (auto &threadInfo : test_info.tinfo) { free_mtdata(threadInfo.d); - clReleaseMemObject(threadInfo.inBuf); - clReleaseMemObject(threadInfo.inBuf2); - for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) - clReleaseMemObject(threadInfo.outBuf[j]); } return error; diff --git a/test_conformance/math_brute_force/macro_unary_double.cpp b/test_conformance/math_brute_force/macro_unary_double.cpp index 2365a195..5a3ad355 100644 --- a/test_conformance/math_brute_force/macro_unary_double.cpp +++ b/test_conformance/math_brute_force/macro_unary_double.cpp @@ -116,8 +116,9 @@ cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) // Thread specific data for a worker thread struct ThreadInfo { - cl_mem inBuf; // input buffer for the thread - cl_mem outBuf[VECTOR_SIZE_COUNT]; // output buffers for the thread + // Input and output buffers for the thread + clMemWrapper inBuf; + Buffers outBuf; // Per thread command queue to improve performance clCommandQueueWrapper tQueue; @@ -475,12 +476,5 @@ exit: } } - for (auto &threadInfo : test_info.tinfo) - { - clReleaseMemObject(threadInfo.inBuf); - for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) - clReleaseMemObject(threadInfo.outBuf[j]); - } - return error; } diff --git a/test_conformance/math_brute_force/macro_unary_float.cpp b/test_conformance/math_brute_force/macro_unary_float.cpp index adc6c3ec..d2982156 100644 --- a/test_conformance/math_brute_force/macro_unary_float.cpp +++ b/test_conformance/math_brute_force/macro_unary_float.cpp @@ -115,8 +115,9 @@ cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) // Thread specific data for a worker thread struct ThreadInfo { - cl_mem inBuf; // input buffer for the thread - cl_mem outBuf[VECTOR_SIZE_COUNT]; // output buffers for the thread + // Input and output buffers for the thread + clMemWrapper inBuf; + Buffers outBuf; // Per thread command queue to improve performance clCommandQueueWrapper tQueue; @@ -489,12 +490,5 @@ exit: } } - for (auto &threadInfo : test_info.tinfo) - { - clReleaseMemObject(threadInfo.inBuf); - for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) - clReleaseMemObject(threadInfo.outBuf[j]); - } - return error; } diff --git a/test_conformance/math_brute_force/unary_double.cpp b/test_conformance/math_brute_force/unary_double.cpp index 19402283..7dfc12b1 100644 --- a/test_conformance/math_brute_force/unary_double.cpp +++ b/test_conformance/math_brute_force/unary_double.cpp @@ -116,8 +116,10 @@ cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) // Thread specific data for a worker thread struct ThreadInfo { - cl_mem inBuf; // input buffer for the thread - cl_mem outBuf[VECTOR_SIZE_COUNT]; // output buffers for the thread + // Input and output buffers for the thread + clMemWrapper inBuf; + Buffers outBuf; + float maxError; // max error value. Init to 0. double maxErrorValue; // position of the max error value. Init to 0. @@ -513,12 +515,5 @@ exit: } } - for (auto &threadInfo : test_info.tinfo) - { - clReleaseMemObject(threadInfo.inBuf); - for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) - clReleaseMemObject(threadInfo.outBuf[j]); - } - return error; } diff --git a/test_conformance/math_brute_force/unary_float.cpp b/test_conformance/math_brute_force/unary_float.cpp index 5a9a7361..6a5c3539 100644 --- a/test_conformance/math_brute_force/unary_float.cpp +++ b/test_conformance/math_brute_force/unary_float.cpp @@ -114,8 +114,10 @@ cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) // Thread specific data for a worker thread struct ThreadInfo { - cl_mem inBuf; // input buffer for the thread - cl_mem outBuf[VECTOR_SIZE_COUNT]; // output buffers for the thread + // Input and output buffers for the thread + clMemWrapper inBuf; + Buffers outBuf; + float maxError; // max error value. Init to 0. double maxErrorValue; // position of the max error value. Init to 0. @@ -690,12 +692,5 @@ exit: } } - for (auto &threadInfo : test_info.tinfo) - { - clReleaseMemObject(threadInfo.inBuf); - for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) - clReleaseMemObject(threadInfo.outBuf[j]); - } - return error; } -- cgit v1.2.3 From 2dc253313047c3ab7f0cf77ae8f8cdf1727ff3b2 Mon Sep 17 00:00:00 2001 From: Romaric Jodin <89833130+rjodinchr@users.noreply.github.com> Date: Thu, 1 Sep 2022 07:56:10 +0200 Subject: fix test kernel attributes when api fcts are failing (#1449) test_error returns the err given as the first argument. As the run_test function returns a bool, we end up returning true (meaning pass) when an api function fails. Instead return explicitly false (meaning fail). --- test_conformance/api/test_kernel_attributes.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test_conformance/api/test_kernel_attributes.cpp b/test_conformance/api/test_kernel_attributes.cpp index 2e4e0a7f..ad4baa0f 100644 --- a/test_conformance/api/test_kernel_attributes.cpp +++ b/test_conformance/api/test_kernel_attributes.cpp @@ -275,16 +275,16 @@ static bool run_test(cl_context context, cl_device_id deviceID, clKernelWrapper kernel; cl_int err = create_single_kernel_helper(context, &program, &kernel, 1, &kernel_src, "test_kernel"); - test_error(err, "create_single_kernel_helper"); + test_error_ret(err, "create_single_kernel_helper", false); // Get the size of the kernel attribute string returned size_t size = 0; err = clGetKernelInfo(kernel, CL_KERNEL_ATTRIBUTES, 0, nullptr, &size); - test_error(err, "clGetKernelInfo"); + test_error_ret(err, "clGetKernelInfo", false); std::vector attributes(size); err = clGetKernelInfo(kernel, CL_KERNEL_ATTRIBUTES, attributes.size(), attributes.data(), nullptr); - test_error(err, "clGetKernelInfo"); + test_error_ret(err, "clGetKernelInfo", false); std::string attribute_string(attributes.data()); attribute_string.erase( std::remove(attribute_string.begin(), attribute_string.end(), ' '), -- cgit v1.2.3 From 9ad4899862f95091c95754ed26981c57cb5a52e7 Mon Sep 17 00:00:00 2001 From: niranjanjoshi121 <43807392+niranjanjoshi121@users.noreply.github.com> Date: Thu, 1 Sep 2022 11:28:13 +0530 Subject: Use size_t instead of cl_int (#1414) * Use size_t instead of cl_int Memory is allocated for cl_int, but mapped as size_t. Use size_t instead of cl_int during allocation and mapping for consistency. * Use size_t instead of cl_int Memory is allocated for cl_int, but mapped as size_t. Use size_t instead of cl_int during allocation and mapping for consistency. * Use size_t instead of cl_int Memory is allocated for cl_int, but mapped as size_t. Use size_t instead of cl_int during allocation and mapping for consistency. * Remove test_half changes. Remove test_half changes from other fix that got included in this commit. * Final formatting fix. --- test_conformance/SVM/test_shared_address_space_coarse_grain.cpp | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/test_conformance/SVM/test_shared_address_space_coarse_grain.cpp b/test_conformance/SVM/test_shared_address_space_coarse_grain.cpp index f26981bc..12358167 100644 --- a/test_conformance/SVM/test_shared_address_space_coarse_grain.cpp +++ b/test_conformance/SVM/test_shared_address_space_coarse_grain.cpp @@ -98,7 +98,9 @@ cl_int create_linked_lists_on_device(int ci, cl_command_queue cmdq, cl_mem alloc cl_int error = CL_SUCCESS; log_info("SVM: creating linked list on device: %d ", ci); - size_t *pAllocator = (size_t*) clEnqueueMapBuffer(cmdq, allocator, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, sizeof(cl_int), 0, NULL,NULL, &error); + size_t *pAllocator = (size_t *)clEnqueueMapBuffer( + cmdq, allocator, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, sizeof(size_t), + 0, NULL, NULL, &error); test_error2(error, pAllocator, "clEnqueueMapBuffer failed"); // reset allocator index *pAllocator = numLists; // the first numLists elements of the nodes array are already allocated (they hold the head of each list). @@ -206,7 +208,9 @@ int shared_address_space_coarse_grain(cl_device_id deviceID, cl_context context2 } // this buffer holds an index into the nodes buffer, it is used for node allocation - clMemWrapper allocator = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_int), NULL, &error); + clMemWrapper allocator = clCreateBuffer(context, CL_MEM_READ_WRITE, + sizeof(size_t), NULL, &error); + test_error(error, "clCreateBuffer failed."); error = clGetMemObjectInfo(allocator, CL_MEM_USES_SVM_POINTER, sizeof(cl_bool), &usesSVMpointer, 0); -- cgit v1.2.3 From 7caa4c4421a966e8b4db23aff1ee12cf5c2b7aa6 Mon Sep 17 00:00:00 2001 From: Ewan Crawford Date: Thu, 1 Sep 2022 23:13:01 +0100 Subject: Update known extensions in compiler define test (#1480) Add [cl_khr_command_buffer_mutable_dispatch](https://github.com/KhronosGroup/OpenCL-Docs/pull/819), [cl_khr_subgroup_rotate](https://registry.khronos.org/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#cl_khr_subgroup_rotate), and [cl_khr_extended_async_copies](https://registry.khronos.org/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#cl_khr_extended_async_copies) to the list of known extensions used in `test_compiler_defines_for_extensions` --- test_conformance/compiler/test_compiler_defines_for_extensions.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/test_conformance/compiler/test_compiler_defines_for_extensions.cpp b/test_conformance/compiler/test_compiler_defines_for_extensions.cpp index 4e5b2841..91441416 100644 --- a/test_conformance/compiler/test_compiler_defines_for_extensions.cpp +++ b/test_conformance/compiler/test_compiler_defines_for_extensions.cpp @@ -20,7 +20,7 @@ #include #endif - +// List should follow order in the extension spec const char *known_extensions[] = { "cl_khr_byte_addressable_store", "cl_khr_3d_image_writes", @@ -42,6 +42,7 @@ const char *known_extensions[] = { "cl_khr_mipmap_image_writes", "cl_khr_srgb_image_writes", "cl_khr_subgroup_named_barrier", + "cl_khr_extended_async_copies", "cl_khr_subgroup_extended_types", "cl_khr_subgroup_non_uniform_vote", "cl_khr_subgroup_ballot", @@ -51,6 +52,7 @@ const char *known_extensions[] = { "cl_khr_subgroup_clustered_reduce", "cl_khr_extended_bit_ops", "cl_khr_integer_dot_product", + "cl_khr_subgroup_rotate", // API-only extensions after this point. If you add above here, modify // first_API_extension below. "cl_khr_icd", @@ -82,10 +84,11 @@ const char *known_extensions[] = { "cl_khr_command_buffer", "cl_khr_external_memory", "cl_khr_external_memory_opaque_fd", + "cl_khr_command_buffer_mutable_dispatch", }; size_t num_known_extensions = ARRAY_SIZE(known_extensions); -size_t first_API_extension = 29; +size_t first_API_extension = 31; const char *known_embedded_extensions[] = { "cles_khr_int64", -- cgit v1.2.3 From 388944c01cbfc4272d11b3a9d520e2eed2d1288d Mon Sep 17 00:00:00 2001 From: Ahmed <36049290+AhmedAmraniAkdi@users.noreply.github.com> Date: Tue, 6 Sep 2022 17:53:12 +0100 Subject: Minimum 2 non atomic variables per thread for the c11 atomic fence test for embedded profile devices. (#1452) * Minimum 2 Non atomic variables per thread for an embedded profile device - https://github.com/KhronosGroup/OpenCL-CTS/issues/1274 * Formatting --- test_conformance/c11_atomics/common.h | 5 +- test_conformance/c11_atomics/test_atomics.cpp | 4907 +++++++++++++++---------- 2 files changed, 2960 insertions(+), 1952 deletions(-) diff --git a/test_conformance/c11_atomics/common.h b/test_conformance/c11_atomics/common.h index 5bb9e5b7..6c7d0b12 100644 --- a/test_conformance/c11_atomics/common.h +++ b/test_conformance/c11_atomics/common.h @@ -1361,9 +1361,8 @@ int CBasicTest::ExecuteSingleTest( error = clSetKernelArg(kernel, argInd++, LocalRefValues() ? typeSize - * ((CurrentGroupSize() - * NumNonAtomicVariablesPerThread()) - + 4) + * (CurrentGroupSize() + * NumNonAtomicVariablesPerThread()) : 1, NULL); test_error(error, "Unable to set indexed kernel argument"); diff --git a/test_conformance/c11_atomics/test_atomics.cpp b/test_conformance/c11_atomics/test_atomics.cpp index 38b4e9a7..09c14ed1 100644 --- a/test_conformance/c11_atomics/test_atomics.cpp +++ b/test_conformance/c11_atomics/test_atomics.cpp @@ -1,6 +1,6 @@ // // Copyright (c) 2017 The Khronos Group Inc. -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -23,2200 +23,3209 @@ #include #include -template -class CBasicTestStore : public CBasicTestMemOrderScope -{ +template +class CBasicTestStore + : public CBasicTestMemOrderScope { public: - using CBasicTestMemOrderScope::OldValueCheck; - using CBasicTestMemOrderScope::MemoryOrder; - using CBasicTestMemOrderScope::MemoryScope; - using CBasicTestMemOrderScope::MemoryOrderScopeStr; - using CBasicTest::CheckCapabilities; - CBasicTestStore(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope(dataType, useSVM) - { - OldValueCheck(false); - } - virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) - { - return threadCount; - } - virtual int ExecuteSingleTest(cl_device_id deviceID, cl_context context, cl_command_queue queue) - { - if(MemoryOrder() == MEMORY_ORDER_ACQUIRE || - MemoryOrder() == MEMORY_ORDER_ACQ_REL) - return 0; //skip test - not applicable - - if (CheckCapabilities(MemoryScope(), MemoryOrder()) == TEST_SKIPPED_ITSELF) - return 0; // skip test - not applicable - - return CBasicTestMemOrderScope::ExecuteSingleTest(deviceID, context, queue); - } - virtual std::string ProgramCore() - { - std::string memoryOrderScope = MemoryOrderScopeStr(); - std::string postfix(memoryOrderScope.empty() ? "" : "_explicit"); - return - " atomic_store"+postfix+"(&destMemory[tid], tid"+memoryOrderScope+");\n"; - } - virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues) - { - host_atomic_store(&destMemory[tid], (HostDataType)tid, MemoryOrder()); - } - virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue) - { - expected = (HostDataType)whichDestValue; - return true; - } + using CBasicTestMemOrderScope::OldValueCheck; + using CBasicTestMemOrderScope::MemoryOrder; + using CBasicTestMemOrderScope::MemoryScope; + using CBasicTestMemOrderScope::MemoryOrderScopeStr; + using CBasicTest::CheckCapabilities; + CBasicTestStore(TExplicitAtomicType dataType, bool useSVM) + : CBasicTestMemOrderScope(dataType, + useSVM) + { + OldValueCheck(false); + } + virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) + { + return threadCount; + } + virtual int ExecuteSingleTest(cl_device_id deviceID, cl_context context, + cl_command_queue queue) + { + if (MemoryOrder() == MEMORY_ORDER_ACQUIRE + || MemoryOrder() == MEMORY_ORDER_ACQ_REL) + return 0; // skip test - not applicable + + if (CheckCapabilities(MemoryScope(), MemoryOrder()) + == TEST_SKIPPED_ITSELF) + return 0; // skip test - not applicable + + return CBasicTestMemOrderScope< + HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context, + queue); + } + virtual std::string ProgramCore() + { + std::string memoryOrderScope = MemoryOrderScopeStr(); + std::string postfix(memoryOrderScope.empty() ? "" : "_explicit"); + return " atomic_store" + postfix + "(&destMemory[tid], tid" + + memoryOrderScope + ");\n"; + } + virtual void HostFunction(cl_uint tid, cl_uint threadCount, + volatile HostAtomicType *destMemory, + HostDataType *oldValues) + { + host_atomic_store(&destMemory[tid], (HostDataType)tid, MemoryOrder()); + } + virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, + HostDataType *startRefValues, + cl_uint whichDestValue) + { + expected = (HostDataType)whichDestValue; + return true; + } }; -int test_atomic_store_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM) +int test_atomic_store_generic(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements, + bool useSVM) { - int error = 0; - CBasicTestStore test_int(TYPE_ATOMIC_INT, useSVM); - EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements)); - CBasicTestStore test_uint(TYPE_ATOMIC_UINT, useSVM); - EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements)); - CBasicTestStore test_long(TYPE_ATOMIC_LONG, useSVM); - EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements)); - CBasicTestStore test_ulong(TYPE_ATOMIC_ULONG, useSVM); - EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements)); - CBasicTestStore test_float(TYPE_ATOMIC_FLOAT, useSVM); - EXECUTE_TEST(error, test_float.Execute(deviceID, context, queue, num_elements)); - CBasicTestStore test_double(TYPE_ATOMIC_DOUBLE, useSVM); - EXECUTE_TEST(error, test_double.Execute(deviceID, context, queue, num_elements)); - if(AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4) - { - CBasicTestStore test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); - EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestStore test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); - EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestStore test_size_t(TYPE_ATOMIC_SIZE_T, useSVM); - EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestStore test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); - EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); - } - else - { - CBasicTestStore test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); - EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestStore test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); - EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestStore test_size_t(TYPE_ATOMIC_SIZE_T, useSVM); - EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestStore test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); - EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); - } - return error; + int error = 0; + CBasicTestStore test_int(TYPE_ATOMIC_INT, + useSVM); + EXECUTE_TEST(error, + test_int.Execute(deviceID, context, queue, num_elements)); + CBasicTestStore test_uint(TYPE_ATOMIC_UINT, + useSVM); + EXECUTE_TEST(error, + test_uint.Execute(deviceID, context, queue, num_elements)); + CBasicTestStore test_long(TYPE_ATOMIC_LONG, + useSVM); + EXECUTE_TEST(error, + test_long.Execute(deviceID, context, queue, num_elements)); + CBasicTestStore test_ulong(TYPE_ATOMIC_ULONG, + useSVM); + EXECUTE_TEST(error, + test_ulong.Execute(deviceID, context, queue, num_elements)); + CBasicTestStore test_float(TYPE_ATOMIC_FLOAT, + useSVM); + EXECUTE_TEST(error, + test_float.Execute(deviceID, context, queue, num_elements)); + CBasicTestStore test_double( + TYPE_ATOMIC_DOUBLE, useSVM); + EXECUTE_TEST(error, + test_double.Execute(deviceID, context, queue, num_elements)); + if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4) + { + CBasicTestStore test_intptr_t( + TYPE_ATOMIC_INTPTR_T, useSVM); + EXECUTE_TEST( + error, + test_intptr_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestStore + test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); + EXECUTE_TEST( + error, + test_uintptr_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestStore test_size_t( + TYPE_ATOMIC_SIZE_T, useSVM); + EXECUTE_TEST( + error, test_size_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestStore + test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); + EXECUTE_TEST( + error, + test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); + } + else + { + CBasicTestStore test_intptr_t( + TYPE_ATOMIC_INTPTR_T, useSVM); + EXECUTE_TEST( + error, + test_intptr_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestStore + test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); + EXECUTE_TEST( + error, + test_uintptr_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestStore test_size_t( + TYPE_ATOMIC_SIZE_T, useSVM); + EXECUTE_TEST( + error, test_size_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestStore + test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); + EXECUTE_TEST( + error, + test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); + } + return error; } -int test_atomic_store(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_atomic_store(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { - return test_atomic_store_generic(deviceID, context, queue, num_elements, false); + return test_atomic_store_generic(deviceID, context, queue, num_elements, + false); } -int test_svm_atomic_store(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_svm_atomic_store(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { - return test_atomic_store_generic(deviceID, context, queue, num_elements, true); + return test_atomic_store_generic(deviceID, context, queue, num_elements, + true); } -template -class CBasicTestInit : public CBasicTest -{ +template +class CBasicTestInit : public CBasicTest { public: - using CBasicTest::OldValueCheck; - CBasicTestInit(TExplicitAtomicType dataType, bool useSVM) : CBasicTest(dataType, useSVM) - { - OldValueCheck(false); - } - virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) - { - return threadCount; - } - virtual std::string ProgramCore() - { - return - " atomic_init(&destMemory[tid], tid);\n"; - } - virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues) - { - host_atomic_init(&destMemory[tid], (HostDataType)tid); - } - virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue) - { - expected = (HostDataType)whichDestValue; - return true; - } + using CBasicTest::OldValueCheck; + CBasicTestInit(TExplicitAtomicType dataType, bool useSVM) + : CBasicTest(dataType, useSVM) + { + OldValueCheck(false); + } + virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) + { + return threadCount; + } + virtual std::string ProgramCore() + { + return " atomic_init(&destMemory[tid], tid);\n"; + } + virtual void HostFunction(cl_uint tid, cl_uint threadCount, + volatile HostAtomicType *destMemory, + HostDataType *oldValues) + { + host_atomic_init(&destMemory[tid], (HostDataType)tid); + } + virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, + HostDataType *startRefValues, + cl_uint whichDestValue) + { + expected = (HostDataType)whichDestValue; + return true; + } }; -int test_atomic_init_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM) +int test_atomic_init_generic(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements, + bool useSVM) { - int error = 0; - CBasicTestInit test_int(TYPE_ATOMIC_INT, useSVM); - EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements)); - CBasicTestInit test_uint(TYPE_ATOMIC_UINT, useSVM); - EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements)); - CBasicTestInit test_long(TYPE_ATOMIC_LONG, useSVM); - EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements)); - CBasicTestInit test_ulong(TYPE_ATOMIC_ULONG, useSVM); - EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements)); - CBasicTestInit test_float(TYPE_ATOMIC_FLOAT, useSVM); - EXECUTE_TEST(error, test_float.Execute(deviceID, context, queue, num_elements)); - CBasicTestInit test_double(TYPE_ATOMIC_DOUBLE, useSVM); - EXECUTE_TEST(error, test_double.Execute(deviceID, context, queue, num_elements)); - if(AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4) - { - CBasicTestInit test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); - EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestInit test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); - EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestInit test_size_t(TYPE_ATOMIC_SIZE_T, useSVM); - EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestInit test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); - EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); - } - else - { - CBasicTestInit test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); - EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestInit test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); - EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestInit test_size_t(TYPE_ATOMIC_SIZE_T, useSVM); - EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestInit test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); - EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); - } - return error; + int error = 0; + CBasicTestInit test_int(TYPE_ATOMIC_INT, useSVM); + EXECUTE_TEST(error, + test_int.Execute(deviceID, context, queue, num_elements)); + CBasicTestInit test_uint(TYPE_ATOMIC_UINT, + useSVM); + EXECUTE_TEST(error, + test_uint.Execute(deviceID, context, queue, num_elements)); + CBasicTestInit test_long(TYPE_ATOMIC_LONG, + useSVM); + EXECUTE_TEST(error, + test_long.Execute(deviceID, context, queue, num_elements)); + CBasicTestInit test_ulong(TYPE_ATOMIC_ULONG, + useSVM); + EXECUTE_TEST(error, + test_ulong.Execute(deviceID, context, queue, num_elements)); + CBasicTestInit test_float(TYPE_ATOMIC_FLOAT, + useSVM); + EXECUTE_TEST(error, + test_float.Execute(deviceID, context, queue, num_elements)); + CBasicTestInit test_double( + TYPE_ATOMIC_DOUBLE, useSVM); + EXECUTE_TEST(error, + test_double.Execute(deviceID, context, queue, num_elements)); + if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4) + { + CBasicTestInit test_intptr_t( + TYPE_ATOMIC_INTPTR_T, useSVM); + EXECUTE_TEST( + error, + test_intptr_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestInit + test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); + EXECUTE_TEST( + error, + test_uintptr_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestInit test_size_t( + TYPE_ATOMIC_SIZE_T, useSVM); + EXECUTE_TEST( + error, test_size_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestInit + test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); + EXECUTE_TEST( + error, + test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); + } + else + { + CBasicTestInit test_intptr_t( + TYPE_ATOMIC_INTPTR_T, useSVM); + EXECUTE_TEST( + error, + test_intptr_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestInit + test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); + EXECUTE_TEST( + error, + test_uintptr_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestInit test_size_t( + TYPE_ATOMIC_SIZE_T, useSVM); + EXECUTE_TEST( + error, test_size_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestInit + test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); + EXECUTE_TEST( + error, + test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); + } + return error; } -int test_atomic_init(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_atomic_init(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { - return test_atomic_init_generic(deviceID, context, queue, num_elements, false); + return test_atomic_init_generic(deviceID, context, queue, num_elements, + false); } -int test_svm_atomic_init(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_svm_atomic_init(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { - return test_atomic_init_generic(deviceID, context, queue, num_elements, true); + return test_atomic_init_generic(deviceID, context, queue, num_elements, + true); } -template -class CBasicTestLoad : public CBasicTestMemOrderScope -{ +template +class CBasicTestLoad + : public CBasicTestMemOrderScope { public: - using CBasicTestMemOrderScope::OldValueCheck; - using CBasicTestMemOrderScope::MemoryOrder; - using CBasicTestMemOrderScope::MemoryScope; - using CBasicTestMemOrderScope::MemoryOrderScopeStr; - using CBasicTestMemOrderScope::MemoryScopeStr; - using CBasicTest::CheckCapabilities; - CBasicTestLoad(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope(dataType, useSVM) - { - OldValueCheck(false); - } - virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) - { - return threadCount; - } - virtual int ExecuteSingleTest(cl_device_id deviceID, cl_context context, cl_command_queue queue) - { - if(MemoryOrder() == MEMORY_ORDER_RELEASE || - MemoryOrder() == MEMORY_ORDER_ACQ_REL) - return 0; //skip test - not applicable - - if (CheckCapabilities(MemoryScope(), MemoryOrder()) == TEST_SKIPPED_ITSELF) - return 0; // skip test - not applicable - - return CBasicTestMemOrderScope::ExecuteSingleTest(deviceID, context, queue); - } - virtual std::string ProgramCore() - { - // In the case this test is run with MEMORY_ORDER_ACQUIRE, the store - // should be MEMORY_ORDER_RELEASE - std::string memoryOrderScopeLoad = MemoryOrderScopeStr(); - std::string memoryOrderScopeStore = - (MemoryOrder() == MEMORY_ORDER_ACQUIRE) - ? (", memory_order_release" + MemoryScopeStr()) - : memoryOrderScopeLoad; - std::string postfix(memoryOrderScopeLoad.empty() ? "" : "_explicit"); - return " atomic_store" + postfix + "(&destMemory[tid], tid" - + memoryOrderScopeStore - + ");\n" - " oldValues[tid] = atomic_load" - + postfix + "(&destMemory[tid]" + memoryOrderScopeLoad + ");\n"; - } - virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues) - { - host_atomic_store(&destMemory[tid], (HostDataType)tid, MEMORY_ORDER_SEQ_CST); - oldValues[tid] = host_atomic_load(&destMemory[tid], MemoryOrder()); - } - virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue) - { - expected = (HostDataType)whichDestValue; - return true; - } - virtual bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues, HostAtomicType *finalValues) - { - correct = true; - for(cl_uint i = 0; i < threadCount; i++ ) - { - if(refValues[i] != (HostDataType)i) - { - log_error("Invalid value for thread %u\n", (cl_uint)i); - correct = false; + using CBasicTestMemOrderScope::OldValueCheck; + using CBasicTestMemOrderScope::MemoryOrder; + using CBasicTestMemOrderScope::MemoryScope; + using CBasicTestMemOrderScope::MemoryOrderScopeStr; + using CBasicTestMemOrderScope::MemoryScopeStr; + using CBasicTest::CheckCapabilities; + CBasicTestLoad(TExplicitAtomicType dataType, bool useSVM) + : CBasicTestMemOrderScope(dataType, + useSVM) + { + OldValueCheck(false); + } + virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) + { + return threadCount; + } + virtual int ExecuteSingleTest(cl_device_id deviceID, cl_context context, + cl_command_queue queue) + { + if (MemoryOrder() == MEMORY_ORDER_RELEASE + || MemoryOrder() == MEMORY_ORDER_ACQ_REL) + return 0; // skip test - not applicable + + if (CheckCapabilities(MemoryScope(), MemoryOrder()) + == TEST_SKIPPED_ITSELF) + return 0; // skip test - not applicable + + return CBasicTestMemOrderScope< + HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context, + queue); + } + virtual std::string ProgramCore() + { + // In the case this test is run with MEMORY_ORDER_ACQUIRE, the store + // should be MEMORY_ORDER_RELEASE + std::string memoryOrderScopeLoad = MemoryOrderScopeStr(); + std::string memoryOrderScopeStore = + (MemoryOrder() == MEMORY_ORDER_ACQUIRE) + ? (", memory_order_release" + MemoryScopeStr()) + : memoryOrderScopeLoad; + std::string postfix(memoryOrderScopeLoad.empty() ? "" : "_explicit"); + return " atomic_store" + postfix + "(&destMemory[tid], tid" + + memoryOrderScopeStore + + ");\n" + " oldValues[tid] = atomic_load" + + postfix + "(&destMemory[tid]" + memoryOrderScopeLoad + ");\n"; + } + virtual void HostFunction(cl_uint tid, cl_uint threadCount, + volatile HostAtomicType *destMemory, + HostDataType *oldValues) + { + host_atomic_store(&destMemory[tid], (HostDataType)tid, + MEMORY_ORDER_SEQ_CST); + oldValues[tid] = host_atomic_load( + &destMemory[tid], MemoryOrder()); + } + virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, + HostDataType *startRefValues, + cl_uint whichDestValue) + { + expected = (HostDataType)whichDestValue; + return true; + } + virtual bool VerifyRefs(bool &correct, cl_uint threadCount, + HostDataType *refValues, + HostAtomicType *finalValues) + { + correct = true; + for (cl_uint i = 0; i < threadCount; i++) + { + if (refValues[i] != (HostDataType)i) + { + log_error("Invalid value for thread %u\n", (cl_uint)i); + correct = false; + return true; + } + } return true; - } } - return true; - } }; -int test_atomic_load_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM) +int test_atomic_load_generic(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements, + bool useSVM) { - int error = 0; - CBasicTestLoad test_int(TYPE_ATOMIC_INT, useSVM); - EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements)); - CBasicTestLoad test_uint(TYPE_ATOMIC_UINT, useSVM); - EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements)); - CBasicTestLoad test_long(TYPE_ATOMIC_LONG, useSVM); - EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements)); - CBasicTestLoad test_ulong(TYPE_ATOMIC_ULONG, useSVM); - EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements)); - CBasicTestLoad test_float(TYPE_ATOMIC_FLOAT, useSVM); - EXECUTE_TEST(error, test_float.Execute(deviceID, context, queue, num_elements)); - CBasicTestLoad test_double(TYPE_ATOMIC_DOUBLE, useSVM); - EXECUTE_TEST(error, test_double.Execute(deviceID, context, queue, num_elements)); - if(AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4) - { - CBasicTestLoad test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); - EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestLoad test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); - EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestLoad test_size_t(TYPE_ATOMIC_SIZE_T, useSVM); - EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestLoad test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); - EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); - } - else - { - CBasicTestLoad test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); - EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestLoad test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); - EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestLoad test_size_t(TYPE_ATOMIC_SIZE_T, useSVM); - EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestLoad test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); - EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); - } - return error; + int error = 0; + CBasicTestLoad test_int(TYPE_ATOMIC_INT, useSVM); + EXECUTE_TEST(error, + test_int.Execute(deviceID, context, queue, num_elements)); + CBasicTestLoad test_uint(TYPE_ATOMIC_UINT, + useSVM); + EXECUTE_TEST(error, + test_uint.Execute(deviceID, context, queue, num_elements)); + CBasicTestLoad test_long(TYPE_ATOMIC_LONG, + useSVM); + EXECUTE_TEST(error, + test_long.Execute(deviceID, context, queue, num_elements)); + CBasicTestLoad test_ulong(TYPE_ATOMIC_ULONG, + useSVM); + EXECUTE_TEST(error, + test_ulong.Execute(deviceID, context, queue, num_elements)); + CBasicTestLoad test_float(TYPE_ATOMIC_FLOAT, + useSVM); + EXECUTE_TEST(error, + test_float.Execute(deviceID, context, queue, num_elements)); + CBasicTestLoad test_double( + TYPE_ATOMIC_DOUBLE, useSVM); + EXECUTE_TEST(error, + test_double.Execute(deviceID, context, queue, num_elements)); + if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4) + { + CBasicTestLoad test_intptr_t( + TYPE_ATOMIC_INTPTR_T, useSVM); + EXECUTE_TEST( + error, + test_intptr_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestLoad + test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); + EXECUTE_TEST( + error, + test_uintptr_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestLoad test_size_t( + TYPE_ATOMIC_SIZE_T, useSVM); + EXECUTE_TEST( + error, test_size_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestLoad + test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); + EXECUTE_TEST( + error, + test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); + } + else + { + CBasicTestLoad test_intptr_t( + TYPE_ATOMIC_INTPTR_T, useSVM); + EXECUTE_TEST( + error, + test_intptr_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestLoad + test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); + EXECUTE_TEST( + error, + test_uintptr_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestLoad test_size_t( + TYPE_ATOMIC_SIZE_T, useSVM); + EXECUTE_TEST( + error, test_size_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestLoad + test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); + EXECUTE_TEST( + error, + test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); + } + return error; } -int test_atomic_load(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_atomic_load(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { - return test_atomic_load_generic(deviceID, context, queue, num_elements, false); + return test_atomic_load_generic(deviceID, context, queue, num_elements, + false); } -int test_svm_atomic_load(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_svm_atomic_load(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { - return test_atomic_load_generic(deviceID, context, queue, num_elements, true); + return test_atomic_load_generic(deviceID, context, queue, num_elements, + true); } -template -class CBasicTestExchange : public CBasicTestMemOrderScope -{ +template +class CBasicTestExchange + : public CBasicTestMemOrderScope { public: - using CBasicTestMemOrderScope::OldValueCheck; - using CBasicTestMemOrderScope::StartValue; - using CBasicTestMemOrderScope::MemoryOrder; - using CBasicTestMemOrderScope::MemoryOrderScopeStr; - using CBasicTestMemOrderScope::Iterations; - using CBasicTestMemOrderScope::IterationsStr; - CBasicTestExchange(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope(dataType, useSVM) - { - StartValue(123456); - } - virtual std::string ProgramCore() - { - std::string memoryOrderScope = MemoryOrderScopeStr(); - std::string postfix(memoryOrderScope.empty() ? "" : "_explicit"); - return - " oldValues[tid] = atomic_exchange"+postfix+"(&destMemory[0], tid"+memoryOrderScope+");\n" - " for(int i = 0; i < "+IterationsStr()+"; i++)\n" - " oldValues[tid] = atomic_exchange"+postfix+"(&destMemory[0], oldValues[tid]"+memoryOrderScope+");\n"; - } - - virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues) - { - oldValues[tid] = host_atomic_exchange(&destMemory[0], (HostDataType)tid, MemoryOrder()); - for(int i = 0; i < Iterations(); i++) - oldValues[tid] = host_atomic_exchange(&destMemory[0], oldValues[tid], MemoryOrder()); - } - virtual bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues, HostAtomicType *finalValues) - { - OldValueCheck(Iterations()%2 == 0); //check is valid for even number of iterations only - correct = true; - /* We are expecting values from 0 to size-1 and initial value from atomic variable */ - /* These values must be distributed across refValues array and atomic variable finalVaue[0] */ - /* Any repeated value is treated as an error */ - std::vector tidFound(threadCount); - bool startValueFound = false; - cl_uint i; - - for(i = 0; i <= threadCount; i++) - { - cl_uint value; - if(i == threadCount) - value = (cl_uint)finalValues[0]; //additional value from atomic variable (last written) - else - value = (cl_uint)refValues[i]; - if(value == (cl_uint)StartValue()) - { - // Special initial value - if(startValueFound) + using CBasicTestMemOrderScope::OldValueCheck; + using CBasicTestMemOrderScope::StartValue; + using CBasicTestMemOrderScope::MemoryOrder; + using CBasicTestMemOrderScope::MemoryOrderScopeStr; + using CBasicTestMemOrderScope::Iterations; + using CBasicTestMemOrderScope::IterationsStr; + CBasicTestExchange(TExplicitAtomicType dataType, bool useSVM) + : CBasicTestMemOrderScope(dataType, + useSVM) + { + StartValue(123456); + } + virtual std::string ProgramCore() + { + std::string memoryOrderScope = MemoryOrderScopeStr(); + std::string postfix(memoryOrderScope.empty() ? "" : "_explicit"); + return " oldValues[tid] = atomic_exchange" + postfix + + "(&destMemory[0], tid" + memoryOrderScope + + ");\n" + " for(int i = 0; i < " + + IterationsStr() + + "; i++)\n" + " oldValues[tid] = atomic_exchange" + + postfix + "(&destMemory[0], oldValues[tid]" + memoryOrderScope + + ");\n"; + } + + virtual void HostFunction(cl_uint tid, cl_uint threadCount, + volatile HostAtomicType *destMemory, + HostDataType *oldValues) + { + oldValues[tid] = host_atomic_exchange(&destMemory[0], (HostDataType)tid, + MemoryOrder()); + for (int i = 0; i < Iterations(); i++) + oldValues[tid] = host_atomic_exchange( + &destMemory[0], oldValues[tid], MemoryOrder()); + } + virtual bool VerifyRefs(bool &correct, cl_uint threadCount, + HostDataType *refValues, + HostAtomicType *finalValues) + { + OldValueCheck( + Iterations() % 2 + == 0); // check is valid for even number of iterations only + correct = true; + /* We are expecting values from 0 to size-1 and initial value from + * atomic variable */ + /* These values must be distributed across refValues array and atomic + * variable finalVaue[0] */ + /* Any repeated value is treated as an error */ + std::vector tidFound(threadCount); + bool startValueFound = false; + cl_uint i; + + for (i = 0; i <= threadCount; i++) { - log_error("ERROR: Starting reference value (%u) occurred more thane once\n", (cl_uint)StartValue()); - correct = false; - return true; + cl_uint value; + if (i == threadCount) + value = (cl_uint)finalValues[0]; // additional value from atomic + // variable (last written) + else + value = (cl_uint)refValues[i]; + if (value == (cl_uint)StartValue()) + { + // Special initial value + if (startValueFound) + { + log_error("ERROR: Starting reference value (%u) occurred " + "more thane once\n", + (cl_uint)StartValue()); + correct = false; + return true; + } + startValueFound = true; + continue; + } + if (value >= threadCount) + { + log_error( + "ERROR: Reference value %u outside of valid range! (%u)\n", + i, value); + correct = false; + return true; + } + if (tidFound[value]) + { + log_error("ERROR: Value (%u) occurred more thane once\n", + value); + correct = false; + return true; + } + tidFound[value] = true; } - startValueFound = true; - continue; - } - if(value >= threadCount) - { - log_error("ERROR: Reference value %u outside of valid range! (%u)\n", i, value); - correct = false; return true; - } - if(tidFound[value]) - { - log_error("ERROR: Value (%u) occurred more thane once\n", value); - correct = false; - return true; - } - tidFound[value] = true; } - return true; - } }; -int test_atomic_exchange_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM) +int test_atomic_exchange_generic(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements, + bool useSVM) { - int error = 0; - CBasicTestExchange test_int(TYPE_ATOMIC_INT, useSVM); - EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements)); - CBasicTestExchange test_uint(TYPE_ATOMIC_UINT, useSVM); - EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements)); - CBasicTestExchange test_long(TYPE_ATOMIC_LONG, useSVM); - EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements)); - CBasicTestExchange test_ulong(TYPE_ATOMIC_ULONG, useSVM); - EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements)); - CBasicTestExchange test_float(TYPE_ATOMIC_FLOAT, useSVM); - EXECUTE_TEST(error, test_float.Execute(deviceID, context, queue, num_elements)); - CBasicTestExchange test_double(TYPE_ATOMIC_DOUBLE, useSVM); - EXECUTE_TEST(error, test_double.Execute(deviceID, context, queue, num_elements)); - if(AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4) - { - CBasicTestExchange test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); - EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestExchange test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); - EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestExchange test_size_t(TYPE_ATOMIC_SIZE_T, useSVM); - EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestExchange test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); - EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); - } - else - { - CBasicTestExchange test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); - EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestExchange test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); - EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestExchange test_size_t(TYPE_ATOMIC_SIZE_T, useSVM); - EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestExchange test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); - EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); - } - return error; + int error = 0; + CBasicTestExchange test_int(TYPE_ATOMIC_INT, + useSVM); + EXECUTE_TEST(error, + test_int.Execute(deviceID, context, queue, num_elements)); + CBasicTestExchange test_uint(TYPE_ATOMIC_UINT, + useSVM); + EXECUTE_TEST(error, + test_uint.Execute(deviceID, context, queue, num_elements)); + CBasicTestExchange test_long(TYPE_ATOMIC_LONG, + useSVM); + EXECUTE_TEST(error, + test_long.Execute(deviceID, context, queue, num_elements)); + CBasicTestExchange test_ulong( + TYPE_ATOMIC_ULONG, useSVM); + EXECUTE_TEST(error, + test_ulong.Execute(deviceID, context, queue, num_elements)); + CBasicTestExchange test_float( + TYPE_ATOMIC_FLOAT, useSVM); + EXECUTE_TEST(error, + test_float.Execute(deviceID, context, queue, num_elements)); + CBasicTestExchange test_double( + TYPE_ATOMIC_DOUBLE, useSVM); + EXECUTE_TEST(error, + test_double.Execute(deviceID, context, queue, num_elements)); + if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4) + { + CBasicTestExchange + test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); + EXECUTE_TEST( + error, + test_intptr_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestExchange + test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); + EXECUTE_TEST( + error, + test_uintptr_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestExchange test_size_t( + TYPE_ATOMIC_SIZE_T, useSVM); + EXECUTE_TEST( + error, test_size_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestExchange + test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); + EXECUTE_TEST( + error, + test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); + } + else + { + CBasicTestExchange + test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); + EXECUTE_TEST( + error, + test_intptr_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestExchange + test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); + EXECUTE_TEST( + error, + test_uintptr_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestExchange test_size_t( + TYPE_ATOMIC_SIZE_T, useSVM); + EXECUTE_TEST( + error, test_size_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestExchange + test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); + EXECUTE_TEST( + error, + test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); + } + return error; } -int test_atomic_exchange(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_atomic_exchange(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { - return test_atomic_exchange_generic(deviceID, context, queue, num_elements, false); + return test_atomic_exchange_generic(deviceID, context, queue, num_elements, + false); } -int test_svm_atomic_exchange(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_svm_atomic_exchange(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { - return test_atomic_exchange_generic(deviceID, context, queue, num_elements, true); + return test_atomic_exchange_generic(deviceID, context, queue, num_elements, + true); } -template -class CBasicTestCompareStrong : public CBasicTestMemOrder2Scope -{ +template +class CBasicTestCompareStrong + : public CBasicTestMemOrder2Scope { public: - using CBasicTestMemOrder2Scope::StartValue; - using CBasicTestMemOrder2Scope::OldValueCheck; - using CBasicTestMemOrder2Scope::MemoryOrder; - using CBasicTestMemOrder2Scope::MemoryOrder2; - using CBasicTestMemOrder2Scope::MemoryOrderScope; - using CBasicTestMemOrder2Scope::MemoryScope; - using CBasicTestMemOrder2Scope::DataType; - using CBasicTestMemOrder2Scope::Iterations; - using CBasicTestMemOrder2Scope::IterationsStr; - using CBasicTest::CheckCapabilities; - CBasicTestCompareStrong(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrder2Scope(dataType, useSVM) - { - StartValue(123456); - OldValueCheck(false); - } - virtual int ExecuteSingleTest(cl_device_id deviceID, cl_context context, cl_command_queue queue) - { - if(MemoryOrder2() == MEMORY_ORDER_RELEASE || - MemoryOrder2() == MEMORY_ORDER_ACQ_REL) - return 0; // not allowed as 'failure' argument - if((MemoryOrder() == MEMORY_ORDER_RELAXED && MemoryOrder2() != MEMORY_ORDER_RELAXED) || - (MemoryOrder() != MEMORY_ORDER_SEQ_CST && MemoryOrder2() == MEMORY_ORDER_SEQ_CST)) - return 0; // failure argument shall be no stronger than the success - - if (CheckCapabilities(MemoryScope(), MemoryOrder()) == TEST_SKIPPED_ITSELF) - return 0; // skip test - not applicable - - if (CheckCapabilities(MemoryScope(), MemoryOrder2()) == TEST_SKIPPED_ITSELF) - return 0; // skip test - not applicable - - return CBasicTestMemOrder2Scope::ExecuteSingleTest(deviceID, context, queue); - } - virtual std::string ProgramCore() - { - std::string memoryOrderScope = MemoryOrderScope(); - std::string postfix(memoryOrderScope.empty() ? "" : "_explicit"); - return - std::string(" ")+DataType().RegularTypeName()+" expected, previous;\n" - " int successCount = 0;\n" - " oldValues[tid] = tid;\n" - " expected = tid; // force failure at the beginning\n" - " if(atomic_compare_exchange_strong"+postfix+"(&destMemory[0], &expected, oldValues[tid]"+memoryOrderScope+") || expected == tid)\n" - " oldValues[tid] = threadCount+1; //mark unexpected success with invalid value\n" - " else\n" - " {\n" - " for(int i = 0; i < "+IterationsStr()+" || successCount == 0; i++)\n" - " {\n" - " previous = expected;\n" - " if(atomic_compare_exchange_strong"+postfix+"(&destMemory[0], &expected, oldValues[tid]"+memoryOrderScope+"))\n" - " {\n" - " oldValues[tid] = expected;\n" - " successCount++;\n" - " }\n" - " else\n" - " {\n" - " if(previous == expected) // spurious failure - shouldn't occur for 'strong'\n" - " {\n" - " oldValues[tid] = threadCount; //mark fail with invalid value\n" - " break;\n" - " }\n" - " }\n" - " }\n" - " }\n"; - } - virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues) - { - HostDataType expected = (HostDataType)StartValue(), previous; - oldValues[tid] = (HostDataType)tid; - for(int i = 0; i < Iterations(); i++) - { - previous = expected; - if(host_atomic_compare_exchange(&destMemory[0], &expected, oldValues[tid], MemoryOrder(), MemoryOrder2())) - oldValues[tid] = expected; - else - { - if(previous == expected) // shouldn't occur for 'strong' + using CBasicTestMemOrder2Scope::StartValue; + using CBasicTestMemOrder2Scope::OldValueCheck; + using CBasicTestMemOrder2Scope::MemoryOrder; + using CBasicTestMemOrder2Scope::MemoryOrder2; + using CBasicTestMemOrder2Scope::MemoryOrderScope; + using CBasicTestMemOrder2Scope::MemoryScope; + using CBasicTestMemOrder2Scope::DataType; + using CBasicTestMemOrder2Scope::Iterations; + using CBasicTestMemOrder2Scope::IterationsStr; + using CBasicTest::CheckCapabilities; + CBasicTestCompareStrong(TExplicitAtomicType dataType, bool useSVM) + : CBasicTestMemOrder2Scope(dataType, + useSVM) + { + StartValue(123456); + OldValueCheck(false); + } + virtual int ExecuteSingleTest(cl_device_id deviceID, cl_context context, + cl_command_queue queue) + { + if (MemoryOrder2() == MEMORY_ORDER_RELEASE + || MemoryOrder2() == MEMORY_ORDER_ACQ_REL) + return 0; // not allowed as 'failure' argument + if ((MemoryOrder() == MEMORY_ORDER_RELAXED + && MemoryOrder2() != MEMORY_ORDER_RELAXED) + || (MemoryOrder() != MEMORY_ORDER_SEQ_CST + && MemoryOrder2() == MEMORY_ORDER_SEQ_CST)) + return 0; // failure argument shall be no stronger than the success + + if (CheckCapabilities(MemoryScope(), MemoryOrder()) + == TEST_SKIPPED_ITSELF) + return 0; // skip test - not applicable + + if (CheckCapabilities(MemoryScope(), MemoryOrder2()) + == TEST_SKIPPED_ITSELF) + return 0; // skip test - not applicable + + return CBasicTestMemOrder2Scope< + HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context, + queue); + } + virtual std::string ProgramCore() + { + std::string memoryOrderScope = MemoryOrderScope(); + std::string postfix(memoryOrderScope.empty() ? "" : "_explicit"); + return std::string(" ") + DataType().RegularTypeName() + + " expected, previous;\n" + " int successCount = 0;\n" + " oldValues[tid] = tid;\n" + " expected = tid; // force failure at the beginning\n" + " if(atomic_compare_exchange_strong" + + postfix + "(&destMemory[0], &expected, oldValues[tid]" + + memoryOrderScope + + ") || expected == tid)\n" + " oldValues[tid] = threadCount+1; //mark unexpected success " + "with invalid value\n" + " else\n" + " {\n" + " for(int i = 0; i < " + + IterationsStr() + + " || successCount == 0; i++)\n" + " {\n" + " previous = expected;\n" + " if(atomic_compare_exchange_strong" + + postfix + "(&destMemory[0], &expected, oldValues[tid]" + + memoryOrderScope + + "))\n" + " {\n" + " oldValues[tid] = expected;\n" + " successCount++;\n" + " }\n" + " else\n" + " {\n" + " if(previous == expected) // spurious failure - " + "shouldn't occur for 'strong'\n" + " {\n" + " oldValues[tid] = threadCount; //mark fail with " + "invalid value\n" + " break;\n" + " }\n" + " }\n" + " }\n" + " }\n"; + } + virtual void HostFunction(cl_uint tid, cl_uint threadCount, + volatile HostAtomicType *destMemory, + HostDataType *oldValues) + { + HostDataType expected = (HostDataType)StartValue(), previous; + oldValues[tid] = (HostDataType)tid; + for (int i = 0; i < Iterations(); i++) { - oldValues[tid] = threadCount; //mark fail with invalid value + previous = expected; + if (host_atomic_compare_exchange(&destMemory[0], &expected, + oldValues[tid], MemoryOrder(), + MemoryOrder2())) + oldValues[tid] = expected; + else + { + if (previous == expected) // shouldn't occur for 'strong' + { + oldValues[tid] = threadCount; // mark fail with invalid + // value + } + } } - } - } - } - virtual bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues, HostAtomicType *finalValues) - { - correct = true; - /* We are expecting values from 0 to size-1 and initial value from atomic variable */ - /* These values must be distributed across refValues array and atomic variable finalVaue[0] */ - /* Any repeated value is treated as an error */ - std::vector tidFound(threadCount); - bool startValueFound = false; - cl_uint i; - - for(i = 0; i <= threadCount; i++) - { - cl_uint value; - if(i == threadCount) - value = (cl_uint)finalValues[0]; //additional value from atomic variable (last written) - else - value = (cl_uint)refValues[i]; - if(value == (cl_uint)StartValue()) - { - // Special initial value - if(startValueFound) + } + virtual bool VerifyRefs(bool &correct, cl_uint threadCount, + HostDataType *refValues, + HostAtomicType *finalValues) + { + correct = true; + /* We are expecting values from 0 to size-1 and initial value from + * atomic variable */ + /* These values must be distributed across refValues array and atomic + * variable finalVaue[0] */ + /* Any repeated value is treated as an error */ + std::vector tidFound(threadCount); + bool startValueFound = false; + cl_uint i; + + for (i = 0; i <= threadCount; i++) { - log_error("ERROR: Starting reference value (%u) occurred more thane once\n", (cl_uint)StartValue()); - correct = false; - return true; + cl_uint value; + if (i == threadCount) + value = (cl_uint)finalValues[0]; // additional value from atomic + // variable (last written) + else + value = (cl_uint)refValues[i]; + if (value == (cl_uint)StartValue()) + { + // Special initial value + if (startValueFound) + { + log_error("ERROR: Starting reference value (%u) occurred " + "more thane once\n", + (cl_uint)StartValue()); + correct = false; + return true; + } + startValueFound = true; + continue; + } + if (value >= threadCount) + { + if (value == threadCount) + log_error("ERROR: Spurious failure detected for " + "atomic_compare_exchange_strong\n"); + log_error( + "ERROR: Reference value %u outside of valid range! (%u)\n", + i, value); + correct = false; + return true; + } + if (tidFound[value]) + { + log_error("ERROR: Value (%u) occurred more thane once\n", + value); + correct = false; + return true; + } + tidFound[value] = true; } - startValueFound = true; - continue; - } - if(value >= threadCount) - { - if(value == threadCount) - log_error("ERROR: Spurious failure detected for atomic_compare_exchange_strong\n"); - log_error("ERROR: Reference value %u outside of valid range! (%u)\n", i, value); - correct = false; - return true; - } - if(tidFound[value]) - { - log_error("ERROR: Value (%u) occurred more thane once\n", value); - correct = false; return true; - } - tidFound[value] = true; } - return true; - } }; -int test_atomic_compare_exchange_strong_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM) +int test_atomic_compare_exchange_strong_generic(cl_device_id deviceID, + cl_context context, + cl_command_queue queue, + int num_elements, bool useSVM) { - int error = 0; - CBasicTestCompareStrong test_int(TYPE_ATOMIC_INT, useSVM); - EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements)); - CBasicTestCompareStrong test_uint(TYPE_ATOMIC_UINT, useSVM); - EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements)); - CBasicTestCompareStrong test_long(TYPE_ATOMIC_LONG, useSVM); - EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements)); - CBasicTestCompareStrong test_ulong(TYPE_ATOMIC_ULONG, useSVM); - EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements)); - if(AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4) - { - CBasicTestCompareStrong test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); - EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestCompareStrong test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); - EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestCompareStrong test_size_t(TYPE_ATOMIC_SIZE_T, useSVM); - EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestCompareStrong test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); - EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); - } - else - { - CBasicTestCompareStrong test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); - EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestCompareStrong test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); - EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestCompareStrong test_size_t(TYPE_ATOMIC_SIZE_T, useSVM); - EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestCompareStrong test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); - EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); - } - return error; + int error = 0; + CBasicTestCompareStrong test_int(TYPE_ATOMIC_INT, + useSVM); + EXECUTE_TEST(error, + test_int.Execute(deviceID, context, queue, num_elements)); + CBasicTestCompareStrong test_uint( + TYPE_ATOMIC_UINT, useSVM); + EXECUTE_TEST(error, + test_uint.Execute(deviceID, context, queue, num_elements)); + CBasicTestCompareStrong test_long( + TYPE_ATOMIC_LONG, useSVM); + EXECUTE_TEST(error, + test_long.Execute(deviceID, context, queue, num_elements)); + CBasicTestCompareStrong test_ulong( + TYPE_ATOMIC_ULONG, useSVM); + EXECUTE_TEST(error, + test_ulong.Execute(deviceID, context, queue, num_elements)); + if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4) + { + CBasicTestCompareStrong + test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); + EXECUTE_TEST( + error, + test_intptr_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestCompareStrong + test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); + EXECUTE_TEST( + error, + test_uintptr_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestCompareStrong + test_size_t(TYPE_ATOMIC_SIZE_T, useSVM); + EXECUTE_TEST( + error, test_size_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestCompareStrong + test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); + EXECUTE_TEST( + error, + test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); + } + else + { + CBasicTestCompareStrong + test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); + EXECUTE_TEST( + error, + test_intptr_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestCompareStrong + test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); + EXECUTE_TEST( + error, + test_uintptr_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestCompareStrong + test_size_t(TYPE_ATOMIC_SIZE_T, useSVM); + EXECUTE_TEST( + error, test_size_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestCompareStrong + test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); + EXECUTE_TEST( + error, + test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); + } + return error; } -int test_atomic_compare_exchange_strong(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_atomic_compare_exchange_strong(cl_device_id deviceID, + cl_context context, + cl_command_queue queue, + int num_elements) { - return test_atomic_compare_exchange_strong_generic(deviceID, context, queue, num_elements, false); + return test_atomic_compare_exchange_strong_generic(deviceID, context, queue, + num_elements, false); } -int test_svm_atomic_compare_exchange_strong(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_svm_atomic_compare_exchange_strong(cl_device_id deviceID, + cl_context context, + cl_command_queue queue, + int num_elements) { - return test_atomic_compare_exchange_strong_generic(deviceID, context, queue, num_elements, true); + return test_atomic_compare_exchange_strong_generic(deviceID, context, queue, + num_elements, true); } -template -class CBasicTestCompareWeak : public CBasicTestCompareStrong -{ +template +class CBasicTestCompareWeak + : public CBasicTestCompareStrong { public: - using CBasicTestCompareStrong::StartValue; - using CBasicTestCompareStrong::MemoryOrderScope; - using CBasicTestCompareStrong::DataType; - using CBasicTestCompareStrong::Iterations; - using CBasicTestCompareStrong::IterationsStr; - CBasicTestCompareWeak(TExplicitAtomicType dataType, bool useSVM) : CBasicTestCompareStrong(dataType, useSVM) - { - } - virtual std::string ProgramCore() - { - std::string memoryOrderScope = MemoryOrderScope(); - std::string postfix(memoryOrderScope.empty() ? "" : "_explicit"); - return - std::string(" ")+DataType().RegularTypeName()+" expected , previous;\n" - " int successCount = 0;\n" - " oldValues[tid] = tid;\n" - " expected = tid; // force failure at the beginning\n" - " if(atomic_compare_exchange_weak"+postfix+"(&destMemory[0], &expected, oldValues[tid]"+memoryOrderScope+") || expected == tid)\n" - " oldValues[tid] = threadCount+1; //mark unexpected success with invalid value\n" - " else\n" - " {\n" - " for(int i = 0; i < "+IterationsStr()+" || successCount == 0; i++)\n" - " {\n" - " previous = expected;\n" - " if(atomic_compare_exchange_weak"+postfix+"(&destMemory[0], &expected, oldValues[tid]"+memoryOrderScope+"))\n" - " {\n" - " oldValues[tid] = expected;\n" - " successCount++;\n" - " }\n" - " }\n" - " }\n"; - } + using CBasicTestCompareStrong::StartValue; + using CBasicTestCompareStrong::MemoryOrderScope; + using CBasicTestCompareStrong::DataType; + using CBasicTestCompareStrong::Iterations; + using CBasicTestCompareStrong::IterationsStr; + CBasicTestCompareWeak(TExplicitAtomicType dataType, bool useSVM) + : CBasicTestCompareStrong(dataType, + useSVM) + {} + virtual std::string ProgramCore() + { + std::string memoryOrderScope = MemoryOrderScope(); + std::string postfix(memoryOrderScope.empty() ? "" : "_explicit"); + return std::string(" ") + DataType().RegularTypeName() + + " expected , previous;\n" + " int successCount = 0;\n" + " oldValues[tid] = tid;\n" + " expected = tid; // force failure at the beginning\n" + " if(atomic_compare_exchange_weak" + + postfix + "(&destMemory[0], &expected, oldValues[tid]" + + memoryOrderScope + + ") || expected == tid)\n" + " oldValues[tid] = threadCount+1; //mark unexpected success " + "with invalid value\n" + " else\n" + " {\n" + " for(int i = 0; i < " + + IterationsStr() + + " || successCount == 0; i++)\n" + " {\n" + " previous = expected;\n" + " if(atomic_compare_exchange_weak" + + postfix + "(&destMemory[0], &expected, oldValues[tid]" + + memoryOrderScope + + "))\n" + " {\n" + " oldValues[tid] = expected;\n" + " successCount++;\n" + " }\n" + " }\n" + " }\n"; + } }; -int test_atomic_compare_exchange_weak_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM) +int test_atomic_compare_exchange_weak_generic(cl_device_id deviceID, + cl_context context, + cl_command_queue queue, + int num_elements, bool useSVM) { - int error = 0; - CBasicTestCompareWeak test_int(TYPE_ATOMIC_INT, useSVM); - EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements)); - CBasicTestCompareWeak test_uint(TYPE_ATOMIC_UINT, useSVM); - EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements)); - CBasicTestCompareWeak test_long(TYPE_ATOMIC_LONG, useSVM); - EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements)); - CBasicTestCompareWeak test_ulong(TYPE_ATOMIC_ULONG, useSVM); - EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements)); - if(AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4) - { - CBasicTestCompareWeak test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); - EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestCompareWeak test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); - EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestCompareWeak test_size_t(TYPE_ATOMIC_SIZE_T, useSVM); - EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestCompareWeak test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); - EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); - } - else - { - CBasicTestCompareWeak test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); - EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestCompareWeak test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); - EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestCompareWeak test_size_t(TYPE_ATOMIC_SIZE_T, useSVM); - EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestCompareWeak test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); - EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); - } - return error; + int error = 0; + CBasicTestCompareWeak test_int(TYPE_ATOMIC_INT, + useSVM); + EXECUTE_TEST(error, + test_int.Execute(deviceID, context, queue, num_elements)); + CBasicTestCompareWeak test_uint( + TYPE_ATOMIC_UINT, useSVM); + EXECUTE_TEST(error, + test_uint.Execute(deviceID, context, queue, num_elements)); + CBasicTestCompareWeak test_long( + TYPE_ATOMIC_LONG, useSVM); + EXECUTE_TEST(error, + test_long.Execute(deviceID, context, queue, num_elements)); + CBasicTestCompareWeak test_ulong( + TYPE_ATOMIC_ULONG, useSVM); + EXECUTE_TEST(error, + test_ulong.Execute(deviceID, context, queue, num_elements)); + if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4) + { + CBasicTestCompareWeak + test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); + EXECUTE_TEST( + error, + test_intptr_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestCompareWeak + test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); + EXECUTE_TEST( + error, + test_uintptr_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestCompareWeak test_size_t( + TYPE_ATOMIC_SIZE_T, useSVM); + EXECUTE_TEST( + error, test_size_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestCompareWeak + test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); + EXECUTE_TEST( + error, + test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); + } + else + { + CBasicTestCompareWeak + test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); + EXECUTE_TEST( + error, + test_intptr_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestCompareWeak + test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); + EXECUTE_TEST( + error, + test_uintptr_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestCompareWeak test_size_t( + TYPE_ATOMIC_SIZE_T, useSVM); + EXECUTE_TEST( + error, test_size_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestCompareWeak + test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); + EXECUTE_TEST( + error, + test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); + } + return error; } -int test_atomic_compare_exchange_weak(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_atomic_compare_exchange_weak(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { - return test_atomic_compare_exchange_weak_generic(deviceID, context, queue, num_elements, false); + return test_atomic_compare_exchange_weak_generic(deviceID, context, queue, + num_elements, false); } -int test_svm_atomic_compare_exchange_weak(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_svm_atomic_compare_exchange_weak(cl_device_id deviceID, + cl_context context, + cl_command_queue queue, + int num_elements) { - return test_atomic_compare_exchange_weak_generic(deviceID, context, queue, num_elements, true); + return test_atomic_compare_exchange_weak_generic(deviceID, context, queue, + num_elements, true); } -template -class CBasicTestFetchAdd : public CBasicTestMemOrderScope -{ +template +class CBasicTestFetchAdd + : public CBasicTestMemOrderScope { public: - using CBasicTestMemOrderScope::MemoryOrder; - using CBasicTestMemOrderScope::MemoryOrderScopeStr; - using CBasicTestMemOrderScope::StartValue; - using CBasicTestMemOrderScope::DataType; - CBasicTestFetchAdd(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope(dataType, useSVM) - { - } - virtual std::string ProgramCore() - { - std::string memoryOrderScope = MemoryOrderScopeStr(); - std::string postfix(memoryOrderScope.empty() ? "" : "_explicit"); - return - " oldValues[tid] = atomic_fetch_add"+postfix+"(&destMemory[0], ("+DataType().AddSubOperandTypeName()+")tid + 3"+memoryOrderScope+");\n"+ - " atomic_fetch_add"+postfix+"(&destMemory[0], ("+DataType().AddSubOperandTypeName()+")tid + 3"+memoryOrderScope+");\n" - " atomic_fetch_add"+postfix+"(&destMemory[0], ("+DataType().AddSubOperandTypeName()+")tid + 3"+memoryOrderScope+");\n" - " atomic_fetch_add"+postfix+"(&destMemory[0], (("+DataType().AddSubOperandTypeName()+")tid + 3) << (sizeof("+DataType().AddSubOperandTypeName()+")-1)*8"+memoryOrderScope+");\n"; - } - virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues) - { - oldValues[tid] = host_atomic_fetch_add(&destMemory[0], (HostDataType)tid + 3, MemoryOrder()); - host_atomic_fetch_add(&destMemory[0], (HostDataType)tid + 3, MemoryOrder()); - host_atomic_fetch_add(&destMemory[0], (HostDataType)tid + 3, MemoryOrder()); - host_atomic_fetch_add(&destMemory[0], ((HostDataType)tid + 3) << (sizeof(HostDataType)-1)*8, MemoryOrder()); - } - virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue) - { - expected = StartValue(); - for(cl_uint i = 0; i < threadCount; i++) - expected += ((HostDataType)i+3)*3+(((HostDataType)i + 3) << (sizeof(HostDataType)-1)*8); - return true; - } + using CBasicTestMemOrderScope::MemoryOrder; + using CBasicTestMemOrderScope::MemoryOrderScopeStr; + using CBasicTestMemOrderScope::StartValue; + using CBasicTestMemOrderScope::DataType; + CBasicTestFetchAdd(TExplicitAtomicType dataType, bool useSVM) + : CBasicTestMemOrderScope(dataType, + useSVM) + {} + virtual std::string ProgramCore() + { + std::string memoryOrderScope = MemoryOrderScopeStr(); + std::string postfix(memoryOrderScope.empty() ? "" : "_explicit"); + return " oldValues[tid] = atomic_fetch_add" + postfix + + "(&destMemory[0], (" + DataType().AddSubOperandTypeName() + + ")tid + 3" + memoryOrderScope + ");\n" + " atomic_fetch_add" + + postfix + "(&destMemory[0], (" + + DataType().AddSubOperandTypeName() + ")tid + 3" + memoryOrderScope + + ");\n" + " atomic_fetch_add" + + postfix + "(&destMemory[0], (" + + DataType().AddSubOperandTypeName() + ")tid + 3" + memoryOrderScope + + ");\n" + " atomic_fetch_add" + + postfix + "(&destMemory[0], ((" + + DataType().AddSubOperandTypeName() + ")tid + 3) << (sizeof(" + + DataType().AddSubOperandTypeName() + ")-1)*8" + memoryOrderScope + + ");\n"; + } + virtual void HostFunction(cl_uint tid, cl_uint threadCount, + volatile HostAtomicType *destMemory, + HostDataType *oldValues) + { + oldValues[tid] = host_atomic_fetch_add( + &destMemory[0], (HostDataType)tid + 3, MemoryOrder()); + host_atomic_fetch_add(&destMemory[0], (HostDataType)tid + 3, + MemoryOrder()); + host_atomic_fetch_add(&destMemory[0], (HostDataType)tid + 3, + MemoryOrder()); + host_atomic_fetch_add(&destMemory[0], + ((HostDataType)tid + 3) + << (sizeof(HostDataType) - 1) * 8, + MemoryOrder()); + } + virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, + HostDataType *startRefValues, + cl_uint whichDestValue) + { + expected = StartValue(); + for (cl_uint i = 0; i < threadCount; i++) + expected += ((HostDataType)i + 3) * 3 + + (((HostDataType)i + 3) << (sizeof(HostDataType) - 1) * 8); + return true; + } }; -int test_atomic_fetch_add_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM) +int test_atomic_fetch_add_generic(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements, + bool useSVM) { - int error = 0; - CBasicTestFetchAdd test_int(TYPE_ATOMIC_INT, useSVM); - EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchAdd test_uint(TYPE_ATOMIC_UINT, useSVM); - EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchAdd test_long(TYPE_ATOMIC_LONG, useSVM); - EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchAdd test_ulong(TYPE_ATOMIC_ULONG, useSVM); - EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements)); - if(AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4) - { - CBasicTestFetchAdd test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); - EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchAdd test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); - EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchAdd test_size_t(TYPE_ATOMIC_SIZE_T, useSVM); - EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchAdd test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); - EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); - } - else - { - CBasicTestFetchAdd test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); - EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchAdd test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); - EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchAdd test_size_t(TYPE_ATOMIC_SIZE_T, useSVM); - EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchAdd test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); - EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); - } - return error; + int error = 0; + CBasicTestFetchAdd test_int(TYPE_ATOMIC_INT, + useSVM); + EXECUTE_TEST(error, + test_int.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchAdd test_uint(TYPE_ATOMIC_UINT, + useSVM); + EXECUTE_TEST(error, + test_uint.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchAdd test_long(TYPE_ATOMIC_LONG, + useSVM); + EXECUTE_TEST(error, + test_long.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchAdd test_ulong( + TYPE_ATOMIC_ULONG, useSVM); + EXECUTE_TEST(error, + test_ulong.Execute(deviceID, context, queue, num_elements)); + if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4) + { + CBasicTestFetchAdd + test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); + EXECUTE_TEST( + error, + test_intptr_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchAdd + test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); + EXECUTE_TEST( + error, + test_uintptr_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchAdd test_size_t( + TYPE_ATOMIC_SIZE_T, useSVM); + EXECUTE_TEST( + error, test_size_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchAdd + test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); + EXECUTE_TEST( + error, + test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); + } + else + { + CBasicTestFetchAdd + test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); + EXECUTE_TEST( + error, + test_intptr_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchAdd + test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); + EXECUTE_TEST( + error, + test_uintptr_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchAdd test_size_t( + TYPE_ATOMIC_SIZE_T, useSVM); + EXECUTE_TEST( + error, test_size_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchAdd + test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); + EXECUTE_TEST( + error, + test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); + } + return error; } -int test_atomic_fetch_add(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_atomic_fetch_add(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { - return test_atomic_fetch_add_generic(deviceID, context, queue, num_elements, false); + return test_atomic_fetch_add_generic(deviceID, context, queue, num_elements, + false); } -int test_svm_atomic_fetch_add(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_svm_atomic_fetch_add(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { - return test_atomic_fetch_add_generic(deviceID, context, queue, num_elements, true); + return test_atomic_fetch_add_generic(deviceID, context, queue, num_elements, + true); } -template -class CBasicTestFetchSub : public CBasicTestMemOrderScope -{ +template +class CBasicTestFetchSub + : public CBasicTestMemOrderScope { public: - using CBasicTestMemOrderScope::MemoryOrder; - using CBasicTestMemOrderScope::MemoryOrderScopeStr; - using CBasicTestMemOrderScope::StartValue; - using CBasicTestMemOrderScope::DataType; - CBasicTestFetchSub(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope(dataType, useSVM) - { - } - virtual std::string ProgramCore() - { - std::string memoryOrderScope = MemoryOrderScopeStr(); - std::string postfix(memoryOrderScope.empty() ? "" : "_explicit"); - return - " oldValues[tid] = atomic_fetch_sub"+postfix+"(&destMemory[0], tid + 3 +((("+DataType().AddSubOperandTypeName()+")tid + 3) << (sizeof("+DataType().AddSubOperandTypeName()+")-1)*8)"+memoryOrderScope+");\n"; - } - virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues) - { - oldValues[tid] = host_atomic_fetch_sub(&destMemory[0], (HostDataType)tid + 3+(((HostDataType)tid + 3) << (sizeof(HostDataType)-1)*8), MemoryOrder()); - } - virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue) - { - expected = StartValue(); - for(cl_uint i = 0; i < threadCount; i++) - expected -= (HostDataType)i + 3 +(((HostDataType)i + 3) << (sizeof(HostDataType)-1)*8); - return true; - } + using CBasicTestMemOrderScope::MemoryOrder; + using CBasicTestMemOrderScope::MemoryOrderScopeStr; + using CBasicTestMemOrderScope::StartValue; + using CBasicTestMemOrderScope::DataType; + CBasicTestFetchSub(TExplicitAtomicType dataType, bool useSVM) + : CBasicTestMemOrderScope(dataType, + useSVM) + {} + virtual std::string ProgramCore() + { + std::string memoryOrderScope = MemoryOrderScopeStr(); + std::string postfix(memoryOrderScope.empty() ? "" : "_explicit"); + return " oldValues[tid] = atomic_fetch_sub" + postfix + + "(&destMemory[0], tid + 3 +(((" + + DataType().AddSubOperandTypeName() + ")tid + 3) << (sizeof(" + + DataType().AddSubOperandTypeName() + ")-1)*8)" + memoryOrderScope + + ");\n"; + } + virtual void HostFunction(cl_uint tid, cl_uint threadCount, + volatile HostAtomicType *destMemory, + HostDataType *oldValues) + { + oldValues[tid] = host_atomic_fetch_sub( + &destMemory[0], + (HostDataType)tid + 3 + + (((HostDataType)tid + 3) << (sizeof(HostDataType) - 1) * 8), + MemoryOrder()); + } + virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, + HostDataType *startRefValues, + cl_uint whichDestValue) + { + expected = StartValue(); + for (cl_uint i = 0; i < threadCount; i++) + expected -= (HostDataType)i + 3 + + (((HostDataType)i + 3) << (sizeof(HostDataType) - 1) * 8); + return true; + } }; -int test_atomic_fetch_sub_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM) +int test_atomic_fetch_sub_generic(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements, + bool useSVM) { - int error = 0; - CBasicTestFetchSub test_int(TYPE_ATOMIC_INT, useSVM); - EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchSub test_uint(TYPE_ATOMIC_UINT, useSVM); - EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchSub test_long(TYPE_ATOMIC_LONG, useSVM); - EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchSub test_ulong(TYPE_ATOMIC_ULONG, useSVM); - EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements)); - if(AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4) - { - CBasicTestFetchSub test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); - EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchSub test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); - EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchSub test_size_t(TYPE_ATOMIC_SIZE_T, useSVM); - EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchSub test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); - EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); - } - else - { - CBasicTestFetchSub test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); - EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchSub test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); - EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchSub test_size_t(TYPE_ATOMIC_SIZE_T, useSVM); - EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchSub test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); - EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); - } - return error; + int error = 0; + CBasicTestFetchSub test_int(TYPE_ATOMIC_INT, + useSVM); + EXECUTE_TEST(error, + test_int.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchSub test_uint(TYPE_ATOMIC_UINT, + useSVM); + EXECUTE_TEST(error, + test_uint.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchSub test_long(TYPE_ATOMIC_LONG, + useSVM); + EXECUTE_TEST(error, + test_long.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchSub test_ulong( + TYPE_ATOMIC_ULONG, useSVM); + EXECUTE_TEST(error, + test_ulong.Execute(deviceID, context, queue, num_elements)); + if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4) + { + CBasicTestFetchSub + test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); + EXECUTE_TEST( + error, + test_intptr_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchSub + test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); + EXECUTE_TEST( + error, + test_uintptr_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchSub test_size_t( + TYPE_ATOMIC_SIZE_T, useSVM); + EXECUTE_TEST( + error, test_size_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchSub + test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); + EXECUTE_TEST( + error, + test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); + } + else + { + CBasicTestFetchSub + test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); + EXECUTE_TEST( + error, + test_intptr_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchSub + test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); + EXECUTE_TEST( + error, + test_uintptr_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchSub test_size_t( + TYPE_ATOMIC_SIZE_T, useSVM); + EXECUTE_TEST( + error, test_size_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchSub + test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); + EXECUTE_TEST( + error, + test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); + } + return error; } -int test_atomic_fetch_sub(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_atomic_fetch_sub(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { - return test_atomic_fetch_sub_generic(deviceID, context, queue, num_elements, false); + return test_atomic_fetch_sub_generic(deviceID, context, queue, num_elements, + false); } -int test_svm_atomic_fetch_sub(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_svm_atomic_fetch_sub(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { - return test_atomic_fetch_sub_generic(deviceID, context, queue, num_elements, true); + return test_atomic_fetch_sub_generic(deviceID, context, queue, num_elements, + true); } -template -class CBasicTestFetchOr : public CBasicTestMemOrderScope -{ +template +class CBasicTestFetchOr + : public CBasicTestMemOrderScope { public: - using CBasicTestMemOrderScope::StartValue; - using CBasicTestMemOrderScope::DataType; - using CBasicTestMemOrderScope::MemoryOrder; - using CBasicTestMemOrderScope::MemoryOrderScopeStr; - CBasicTestFetchOr(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope(dataType, useSVM) - { - StartValue(0); - } - virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) - { - cl_uint numBits = DataType().Size(deviceID) * 8; - - return (threadCount + numBits - 1) / numBits; - } - virtual std::string ProgramCore() - { - std::string memoryOrderScope = MemoryOrderScopeStr(); - std::string postfix(memoryOrderScope.empty() ? "" : "_explicit"); - return - std::string(" size_t numBits = sizeof(")+DataType().RegularTypeName()+") * 8;\n" - " int whichResult = tid / numBits;\n" - " int bitIndex = tid - (whichResult * numBits);\n" - "\n" - " oldValues[tid] = atomic_fetch_or"+postfix+"(&destMemory[whichResult], (("+DataType().RegularTypeName()+")1 << bitIndex) "+memoryOrderScope+");\n"; - } - virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues) - { - size_t numBits = sizeof(HostDataType) * 8; - size_t whichResult = tid / numBits; - size_t bitIndex = tid - (whichResult * numBits); - - oldValues[tid] = host_atomic_fetch_or(&destMemory[whichResult], ((HostDataType)1 << bitIndex), MemoryOrder()); - } - virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue) - { - cl_uint numValues = (threadCount + (sizeof(HostDataType)*8-1)) / (sizeof(HostDataType)*8); - if(whichDestValue < numValues - 1) - { - expected = ~(HostDataType)0; - return true; - } - // Last item doesn't get or'ed on every bit, so we have to mask away - cl_uint numBits = threadCount - whichDestValue * (sizeof(HostDataType)*8); - expected = StartValue(); - for(cl_uint i = 0; i < numBits; i++) - expected |= ((HostDataType)1 << i); - return true; - } + using CBasicTestMemOrderScope::StartValue; + using CBasicTestMemOrderScope::DataType; + using CBasicTestMemOrderScope::MemoryOrder; + using CBasicTestMemOrderScope::MemoryOrderScopeStr; + CBasicTestFetchOr(TExplicitAtomicType dataType, bool useSVM) + : CBasicTestMemOrderScope(dataType, + useSVM) + { + StartValue(0); + } + virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) + { + cl_uint numBits = DataType().Size(deviceID) * 8; + + return (threadCount + numBits - 1) / numBits; + } + virtual std::string ProgramCore() + { + std::string memoryOrderScope = MemoryOrderScopeStr(); + std::string postfix(memoryOrderScope.empty() ? "" : "_explicit"); + return std::string(" size_t numBits = sizeof(") + + DataType().RegularTypeName() + + ") * 8;\n" + " int whichResult = tid / numBits;\n" + " int bitIndex = tid - (whichResult * numBits);\n" + "\n" + " oldValues[tid] = atomic_fetch_or" + + postfix + "(&destMemory[whichResult], ((" + + DataType().RegularTypeName() + ")1 << bitIndex) " + + memoryOrderScope + ");\n"; + } + virtual void HostFunction(cl_uint tid, cl_uint threadCount, + volatile HostAtomicType *destMemory, + HostDataType *oldValues) + { + size_t numBits = sizeof(HostDataType) * 8; + size_t whichResult = tid / numBits; + size_t bitIndex = tid - (whichResult * numBits); + + oldValues[tid] = + host_atomic_fetch_or(&destMemory[whichResult], + ((HostDataType)1 << bitIndex), MemoryOrder()); + } + virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, + HostDataType *startRefValues, + cl_uint whichDestValue) + { + cl_uint numValues = (threadCount + (sizeof(HostDataType) * 8 - 1)) + / (sizeof(HostDataType) * 8); + if (whichDestValue < numValues - 1) + { + expected = ~(HostDataType)0; + return true; + } + // Last item doesn't get or'ed on every bit, so we have to mask away + cl_uint numBits = + threadCount - whichDestValue * (sizeof(HostDataType) * 8); + expected = StartValue(); + for (cl_uint i = 0; i < numBits; i++) + expected |= ((HostDataType)1 << i); + return true; + } }; -int test_atomic_fetch_or_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM) +int test_atomic_fetch_or_generic(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements, + bool useSVM) { - int error = 0; - CBasicTestFetchOr test_int(TYPE_ATOMIC_INT, useSVM); - EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchOr test_uint(TYPE_ATOMIC_UINT, useSVM); - EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchOr test_long(TYPE_ATOMIC_LONG, useSVM); - EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchOr test_ulong(TYPE_ATOMIC_ULONG, useSVM); - EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements)); - if(AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4) - { - CBasicTestFetchOr test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); - EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchOr test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); - EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchOr test_size_t(TYPE_ATOMIC_SIZE_T, useSVM); - EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchOr test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); - EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); - } - else - { - CBasicTestFetchOr test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); - EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchOr test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); - EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchOr test_size_t(TYPE_ATOMIC_SIZE_T, useSVM); - EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchOr test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); - EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); - } - return error; + int error = 0; + CBasicTestFetchOr test_int(TYPE_ATOMIC_INT, + useSVM); + EXECUTE_TEST(error, + test_int.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchOr test_uint(TYPE_ATOMIC_UINT, + useSVM); + EXECUTE_TEST(error, + test_uint.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchOr test_long(TYPE_ATOMIC_LONG, + useSVM); + EXECUTE_TEST(error, + test_long.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchOr test_ulong( + TYPE_ATOMIC_ULONG, useSVM); + EXECUTE_TEST(error, + test_ulong.Execute(deviceID, context, queue, num_elements)); + if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4) + { + CBasicTestFetchOr + test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); + EXECUTE_TEST( + error, + test_intptr_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchOr + test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); + EXECUTE_TEST( + error, + test_uintptr_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchOr test_size_t( + TYPE_ATOMIC_SIZE_T, useSVM); + EXECUTE_TEST( + error, test_size_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchOr + test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); + EXECUTE_TEST( + error, + test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); + } + else + { + CBasicTestFetchOr + test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); + EXECUTE_TEST( + error, + test_intptr_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchOr + test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); + EXECUTE_TEST( + error, + test_uintptr_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchOr test_size_t( + TYPE_ATOMIC_SIZE_T, useSVM); + EXECUTE_TEST( + error, test_size_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchOr + test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); + EXECUTE_TEST( + error, + test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); + } + return error; } -int test_atomic_fetch_or(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_atomic_fetch_or(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { - return test_atomic_fetch_or_generic(deviceID, context, queue, num_elements, false); + return test_atomic_fetch_or_generic(deviceID, context, queue, num_elements, + false); } -int test_svm_atomic_fetch_or(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_svm_atomic_fetch_or(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { - return test_atomic_fetch_or_generic(deviceID, context, queue, num_elements, true); + return test_atomic_fetch_or_generic(deviceID, context, queue, num_elements, + true); } -template -class CBasicTestFetchXor : public CBasicTestMemOrderScope -{ +template +class CBasicTestFetchXor + : public CBasicTestMemOrderScope { public: - using CBasicTestMemOrderScope::StartValue; - using CBasicTestMemOrderScope::MemoryOrder; - using CBasicTestMemOrderScope::MemoryOrderScopeStr; - using CBasicTestMemOrderScope::DataType; - CBasicTestFetchXor(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope(dataType, useSVM) - { - StartValue((HostDataType)0x2f08ab418ba0541LL); - } - virtual std::string ProgramCore() - { - std::string memoryOrderScope = MemoryOrderScopeStr(); - std::string postfix(memoryOrderScope.empty() ? "" : "_explicit"); - return - std::string(" int numBits = sizeof(")+DataType().RegularTypeName()+") * 8;\n" - " int bitIndex = (numBits-1)*(tid+1)/threadCount;\n" - "\n" - " oldValues[tid] = atomic_fetch_xor"+postfix+"(&destMemory[0], (("+DataType().RegularTypeName()+")1 << bitIndex) "+memoryOrderScope+");\n"; - } - virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues) - { - int numBits = sizeof(HostDataType) * 8; - int bitIndex = (numBits-1)*(tid+1)/threadCount; - - oldValues[tid] = host_atomic_fetch_xor(&destMemory[0], ((HostDataType)1 << bitIndex), MemoryOrder()); - } - virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue) - { - int numBits = sizeof(HostDataType)*8; - expected = StartValue(); - for(cl_uint i = 0; i < threadCount; i++) - { - int bitIndex = (numBits-1)*(i+1)/threadCount; - expected ^= ((HostDataType)1 << bitIndex); - } - return true; - } + using CBasicTestMemOrderScope::StartValue; + using CBasicTestMemOrderScope::MemoryOrder; + using CBasicTestMemOrderScope::MemoryOrderScopeStr; + using CBasicTestMemOrderScope::DataType; + CBasicTestFetchXor(TExplicitAtomicType dataType, bool useSVM) + : CBasicTestMemOrderScope(dataType, + useSVM) + { + StartValue((HostDataType)0x2f08ab418ba0541LL); + } + virtual std::string ProgramCore() + { + std::string memoryOrderScope = MemoryOrderScopeStr(); + std::string postfix(memoryOrderScope.empty() ? "" : "_explicit"); + return std::string(" int numBits = sizeof(") + + DataType().RegularTypeName() + + ") * 8;\n" + " int bitIndex = (numBits-1)*(tid+1)/threadCount;\n" + "\n" + " oldValues[tid] = atomic_fetch_xor" + + postfix + "(&destMemory[0], ((" + DataType().RegularTypeName() + + ")1 << bitIndex) " + memoryOrderScope + ");\n"; + } + virtual void HostFunction(cl_uint tid, cl_uint threadCount, + volatile HostAtomicType *destMemory, + HostDataType *oldValues) + { + int numBits = sizeof(HostDataType) * 8; + int bitIndex = (numBits - 1) * (tid + 1) / threadCount; + + oldValues[tid] = host_atomic_fetch_xor( + &destMemory[0], ((HostDataType)1 << bitIndex), MemoryOrder()); + } + virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, + HostDataType *startRefValues, + cl_uint whichDestValue) + { + int numBits = sizeof(HostDataType) * 8; + expected = StartValue(); + for (cl_uint i = 0; i < threadCount; i++) + { + int bitIndex = (numBits - 1) * (i + 1) / threadCount; + expected ^= ((HostDataType)1 << bitIndex); + } + return true; + } }; -int test_atomic_fetch_xor_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM) +int test_atomic_fetch_xor_generic(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements, + bool useSVM) { - int error = 0; - CBasicTestFetchXor test_int(TYPE_ATOMIC_INT, useSVM); - EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchXor test_uint(TYPE_ATOMIC_UINT, useSVM); - EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchXor test_long(TYPE_ATOMIC_LONG, useSVM); - EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchXor test_ulong(TYPE_ATOMIC_ULONG, useSVM); - EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements)); - if(AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4) - { - CBasicTestFetchXor test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); - EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchXor test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); - EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchXor test_size_t(TYPE_ATOMIC_SIZE_T, useSVM); - EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchXor test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); - EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); - } - else - { - CBasicTestFetchXor test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); - EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchXor test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); - EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchXor test_size_t(TYPE_ATOMIC_SIZE_T, useSVM); - EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchXor test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); - EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); - } - return error; + int error = 0; + CBasicTestFetchXor test_int(TYPE_ATOMIC_INT, + useSVM); + EXECUTE_TEST(error, + test_int.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchXor test_uint(TYPE_ATOMIC_UINT, + useSVM); + EXECUTE_TEST(error, + test_uint.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchXor test_long(TYPE_ATOMIC_LONG, + useSVM); + EXECUTE_TEST(error, + test_long.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchXor test_ulong( + TYPE_ATOMIC_ULONG, useSVM); + EXECUTE_TEST(error, + test_ulong.Execute(deviceID, context, queue, num_elements)); + if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4) + { + CBasicTestFetchXor + test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); + EXECUTE_TEST( + error, + test_intptr_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchXor + test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); + EXECUTE_TEST( + error, + test_uintptr_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchXor test_size_t( + TYPE_ATOMIC_SIZE_T, useSVM); + EXECUTE_TEST( + error, test_size_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchXor + test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); + EXECUTE_TEST( + error, + test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); + } + else + { + CBasicTestFetchXor + test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); + EXECUTE_TEST( + error, + test_intptr_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchXor + test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); + EXECUTE_TEST( + error, + test_uintptr_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchXor test_size_t( + TYPE_ATOMIC_SIZE_T, useSVM); + EXECUTE_TEST( + error, test_size_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchXor + test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); + EXECUTE_TEST( + error, + test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); + } + return error; } -int test_atomic_fetch_xor(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_atomic_fetch_xor(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { - return test_atomic_fetch_xor_generic(deviceID, context, queue, num_elements, false); + return test_atomic_fetch_xor_generic(deviceID, context, queue, num_elements, + false); } -int test_svm_atomic_fetch_xor(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_svm_atomic_fetch_xor(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { - return test_atomic_fetch_xor_generic(deviceID, context, queue, num_elements, true); + return test_atomic_fetch_xor_generic(deviceID, context, queue, num_elements, + true); } -template -class CBasicTestFetchAnd : public CBasicTestMemOrderScope -{ +template +class CBasicTestFetchAnd + : public CBasicTestMemOrderScope { public: - using CBasicTestMemOrderScope::StartValue; - using CBasicTestMemOrderScope::DataType; - using CBasicTestMemOrderScope::MemoryOrder; - using CBasicTestMemOrderScope::MemoryOrderScopeStr; - CBasicTestFetchAnd(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope(dataType, useSVM) - { - StartValue(~(HostDataType)0); - } - virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) - { - cl_uint numBits = DataType().Size(deviceID) * 8; - - return (threadCount + numBits - 1) / numBits; - } - virtual std::string ProgramCore() - { - std::string memoryOrderScope = MemoryOrderScopeStr(); - std::string postfix(memoryOrderScope.empty() ? "" : "_explicit"); - return - std::string(" size_t numBits = sizeof(")+DataType().RegularTypeName()+") * 8;\n" - " int whichResult = tid / numBits;\n" - " int bitIndex = tid - (whichResult * numBits);\n" - "\n" - " oldValues[tid] = atomic_fetch_and"+postfix+"(&destMemory[whichResult], ~(("+DataType().RegularTypeName()+")1 << bitIndex) "+memoryOrderScope+");\n"; - } - virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues) - { - size_t numBits = sizeof(HostDataType) * 8; - size_t whichResult = tid / numBits; - size_t bitIndex = tid - (whichResult * numBits); - - oldValues[tid] = host_atomic_fetch_and(&destMemory[whichResult], ~((HostDataType)1 << bitIndex), MemoryOrder()); - } - virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue) - { - cl_uint numValues = (threadCount + (sizeof(HostDataType)*8-1)) / (sizeof(HostDataType)*8); - if(whichDestValue < numValues - 1) - { - expected = 0; - return true; - } - // Last item doesn't get and'ed on every bit, so we have to mask away - size_t numBits = threadCount - whichDestValue * (sizeof(HostDataType)*8); - expected = StartValue(); - for(size_t i = 0; i < numBits; i++) - expected &= ~((HostDataType)1 << i); - return true; - } + using CBasicTestMemOrderScope::StartValue; + using CBasicTestMemOrderScope::DataType; + using CBasicTestMemOrderScope::MemoryOrder; + using CBasicTestMemOrderScope::MemoryOrderScopeStr; + CBasicTestFetchAnd(TExplicitAtomicType dataType, bool useSVM) + : CBasicTestMemOrderScope(dataType, + useSVM) + { + StartValue(~(HostDataType)0); + } + virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) + { + cl_uint numBits = DataType().Size(deviceID) * 8; + + return (threadCount + numBits - 1) / numBits; + } + virtual std::string ProgramCore() + { + std::string memoryOrderScope = MemoryOrderScopeStr(); + std::string postfix(memoryOrderScope.empty() ? "" : "_explicit"); + return std::string(" size_t numBits = sizeof(") + + DataType().RegularTypeName() + + ") * 8;\n" + " int whichResult = tid / numBits;\n" + " int bitIndex = tid - (whichResult * numBits);\n" + "\n" + " oldValues[tid] = atomic_fetch_and" + + postfix + "(&destMemory[whichResult], ~((" + + DataType().RegularTypeName() + ")1 << bitIndex) " + + memoryOrderScope + ");\n"; + } + virtual void HostFunction(cl_uint tid, cl_uint threadCount, + volatile HostAtomicType *destMemory, + HostDataType *oldValues) + { + size_t numBits = sizeof(HostDataType) * 8; + size_t whichResult = tid / numBits; + size_t bitIndex = tid - (whichResult * numBits); + + oldValues[tid] = host_atomic_fetch_and(&destMemory[whichResult], + ~((HostDataType)1 << bitIndex), + MemoryOrder()); + } + virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, + HostDataType *startRefValues, + cl_uint whichDestValue) + { + cl_uint numValues = (threadCount + (sizeof(HostDataType) * 8 - 1)) + / (sizeof(HostDataType) * 8); + if (whichDestValue < numValues - 1) + { + expected = 0; + return true; + } + // Last item doesn't get and'ed on every bit, so we have to mask away + size_t numBits = + threadCount - whichDestValue * (sizeof(HostDataType) * 8); + expected = StartValue(); + for (size_t i = 0; i < numBits; i++) + expected &= ~((HostDataType)1 << i); + return true; + } }; -int test_atomic_fetch_and_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM) +int test_atomic_fetch_and_generic(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements, + bool useSVM) { - int error = 0; - CBasicTestFetchAnd test_int(TYPE_ATOMIC_INT, useSVM); - EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchAnd test_uint(TYPE_ATOMIC_UINT, useSVM); - EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchAnd test_long(TYPE_ATOMIC_LONG, useSVM); - EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchAnd test_ulong(TYPE_ATOMIC_ULONG, useSVM); - EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements)); - if(AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4) - { - CBasicTestFetchAnd test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); - EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchAnd test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); - EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchAnd test_size_t(TYPE_ATOMIC_SIZE_T, useSVM); - EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchAnd test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); - EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); - } - else - { - CBasicTestFetchAnd test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); - EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchAnd test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); - EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchAnd test_size_t(TYPE_ATOMIC_SIZE_T, useSVM); - EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchAnd test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); - EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); - } - return error; + int error = 0; + CBasicTestFetchAnd test_int(TYPE_ATOMIC_INT, + useSVM); + EXECUTE_TEST(error, + test_int.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchAnd test_uint(TYPE_ATOMIC_UINT, + useSVM); + EXECUTE_TEST(error, + test_uint.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchAnd test_long(TYPE_ATOMIC_LONG, + useSVM); + EXECUTE_TEST(error, + test_long.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchAnd test_ulong( + TYPE_ATOMIC_ULONG, useSVM); + EXECUTE_TEST(error, + test_ulong.Execute(deviceID, context, queue, num_elements)); + if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4) + { + CBasicTestFetchAnd + test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); + EXECUTE_TEST( + error, + test_intptr_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchAnd + test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); + EXECUTE_TEST( + error, + test_uintptr_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchAnd test_size_t( + TYPE_ATOMIC_SIZE_T, useSVM); + EXECUTE_TEST( + error, test_size_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchAnd + test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); + EXECUTE_TEST( + error, + test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); + } + else + { + CBasicTestFetchAnd + test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); + EXECUTE_TEST( + error, + test_intptr_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchAnd + test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); + EXECUTE_TEST( + error, + test_uintptr_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchAnd test_size_t( + TYPE_ATOMIC_SIZE_T, useSVM); + EXECUTE_TEST( + error, test_size_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchAnd + test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); + EXECUTE_TEST( + error, + test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); + } + return error; } -int test_atomic_fetch_and(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_atomic_fetch_and(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { - return test_atomic_fetch_and_generic(deviceID, context, queue, num_elements, false); + return test_atomic_fetch_and_generic(deviceID, context, queue, num_elements, + false); } -int test_svm_atomic_fetch_and(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_svm_atomic_fetch_and(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { - return test_atomic_fetch_and_generic(deviceID, context, queue, num_elements, true); + return test_atomic_fetch_and_generic(deviceID, context, queue, num_elements, + true); } -template -class CBasicTestFetchOrAnd : public CBasicTestMemOrderScope -{ +template +class CBasicTestFetchOrAnd + : public CBasicTestMemOrderScope { public: - using CBasicTestMemOrderScope::StartValue; - using CBasicTestMemOrderScope::DataType; - using CBasicTestMemOrderScope::MemoryOrder; - using CBasicTestMemOrderScope::MemoryOrderScopeStr; - using CBasicTestMemOrderScope::Iterations; - using CBasicTestMemOrderScope::IterationsStr; - CBasicTestFetchOrAnd(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope(dataType, useSVM) - { - StartValue(0); - } - virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) - { - return 1+(threadCount-1)/(DataType().Size(deviceID)*8); - } - // each thread modifies (with OR and AND operations) and verifies - // only one bit in atomic variable - // other bits are modified by other threads but it must not affect current thread operation - virtual std::string ProgramCore() - { - std::string memoryOrderScope = MemoryOrderScopeStr(); - std::string postfix(memoryOrderScope.empty() ? "" : "_explicit"); - return - std::string(" int bits = sizeof(")+DataType().RegularTypeName()+")*8;\n"+ - " size_t valueInd = tid/bits;\n" - " "+DataType().RegularTypeName()+" value, bitMask = ("+DataType().RegularTypeName()+")1 << tid%bits;\n" - " oldValues[tid] = 0;\n" - " for(int i = 0; i < "+IterationsStr()+"; i++)\n" - " {\n" - " value = atomic_fetch_or"+postfix+"(destMemory+valueInd, bitMask"+memoryOrderScope+");\n" - " if(value & bitMask) // bit should be set to 0\n" - " oldValues[tid]++;\n" - " value = atomic_fetch_and"+postfix+"(destMemory+valueInd, ~bitMask"+memoryOrderScope+");\n" - " if(!(value & bitMask)) // bit should be set to 1\n" - " oldValues[tid]++;\n" - " }\n"; - } - virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues) - { - int bits = sizeof(HostDataType)*8; - size_t valueInd = tid/bits; - HostDataType value, bitMask = (HostDataType)1 << tid%bits; - oldValues[tid] = 0; - for(int i = 0; i < Iterations(); i++) - { - value = host_atomic_fetch_or(destMemory+valueInd, bitMask, MemoryOrder()); - if(value & bitMask) // bit should be set to 0 - oldValues[tid]++; - value = host_atomic_fetch_and(destMemory+valueInd, ~bitMask, MemoryOrder()); - if(!(value & bitMask)) // bit should be set to 1 - oldValues[tid]++; - } - } - virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue) - { - expected = 0; - return true; - } - virtual bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues, HostAtomicType *finalValues) - { - correct = true; - for(cl_uint i = 0; i < threadCount; i++) - { - if(refValues[i] > 0) - { - log_error("Thread %d found %d mismatch(es)\n", i, (cl_uint)refValues[i]); - correct = false; - } - } - return true; - } + using CBasicTestMemOrderScope::StartValue; + using CBasicTestMemOrderScope::DataType; + using CBasicTestMemOrderScope::MemoryOrder; + using CBasicTestMemOrderScope::MemoryOrderScopeStr; + using CBasicTestMemOrderScope::Iterations; + using CBasicTestMemOrderScope::IterationsStr; + CBasicTestFetchOrAnd(TExplicitAtomicType dataType, bool useSVM) + : CBasicTestMemOrderScope(dataType, + useSVM) + { + StartValue(0); + } + virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) + { + return 1 + (threadCount - 1) / (DataType().Size(deviceID) * 8); + } + // each thread modifies (with OR and AND operations) and verifies + // only one bit in atomic variable + // other bits are modified by other threads but it must not affect current + // thread operation + virtual std::string ProgramCore() + { + std::string memoryOrderScope = MemoryOrderScopeStr(); + std::string postfix(memoryOrderScope.empty() ? "" : "_explicit"); + return std::string(" int bits = sizeof(") + + DataType().RegularTypeName() + ")*8;\n" + + " size_t valueInd = tid/bits;\n" + " " + + DataType().RegularTypeName() + " value, bitMask = (" + + DataType().RegularTypeName() + + ")1 << tid%bits;\n" + " oldValues[tid] = 0;\n" + " for(int i = 0; i < " + + IterationsStr() + + "; i++)\n" + " {\n" + " value = atomic_fetch_or" + + postfix + "(destMemory+valueInd, bitMask" + memoryOrderScope + + ");\n" + " if(value & bitMask) // bit should be set to 0\n" + " oldValues[tid]++;\n" + " value = atomic_fetch_and" + + postfix + "(destMemory+valueInd, ~bitMask" + memoryOrderScope + + ");\n" + " if(!(value & bitMask)) // bit should be set to 1\n" + " oldValues[tid]++;\n" + " }\n"; + } + virtual void HostFunction(cl_uint tid, cl_uint threadCount, + volatile HostAtomicType *destMemory, + HostDataType *oldValues) + { + int bits = sizeof(HostDataType) * 8; + size_t valueInd = tid / bits; + HostDataType value, bitMask = (HostDataType)1 << tid % bits; + oldValues[tid] = 0; + for (int i = 0; i < Iterations(); i++) + { + value = host_atomic_fetch_or(destMemory + valueInd, bitMask, + MemoryOrder()); + if (value & bitMask) // bit should be set to 0 + oldValues[tid]++; + value = host_atomic_fetch_and(destMemory + valueInd, ~bitMask, + MemoryOrder()); + if (!(value & bitMask)) // bit should be set to 1 + oldValues[tid]++; + } + } + virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, + HostDataType *startRefValues, + cl_uint whichDestValue) + { + expected = 0; + return true; + } + virtual bool VerifyRefs(bool &correct, cl_uint threadCount, + HostDataType *refValues, + HostAtomicType *finalValues) + { + correct = true; + for (cl_uint i = 0; i < threadCount; i++) + { + if (refValues[i] > 0) + { + log_error("Thread %d found %d mismatch(es)\n", i, + (cl_uint)refValues[i]); + correct = false; + } + } + return true; + } }; -int test_atomic_fetch_orand_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM) +int test_atomic_fetch_orand_generic(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements, + bool useSVM) { - int error = 0; - CBasicTestFetchOrAnd test_int(TYPE_ATOMIC_INT, useSVM); - EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchOrAnd test_uint(TYPE_ATOMIC_UINT, useSVM); - EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchOrAnd test_long(TYPE_ATOMIC_LONG, useSVM); - EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchOrAnd test_ulong(TYPE_ATOMIC_ULONG, useSVM); - EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements)); - if(AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4) - { - CBasicTestFetchOrAnd test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); - EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchOrAnd test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); - EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchOrAnd test_size_t(TYPE_ATOMIC_SIZE_T, useSVM); - EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchOrAnd test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); - EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); - } - else - { - CBasicTestFetchOrAnd test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); - EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchOrAnd test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); - EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchOrAnd test_size_t(TYPE_ATOMIC_SIZE_T, useSVM); - EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchOrAnd test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); - EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); - } - return error; + int error = 0; + CBasicTestFetchOrAnd test_int(TYPE_ATOMIC_INT, + useSVM); + EXECUTE_TEST(error, + test_int.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchOrAnd test_uint( + TYPE_ATOMIC_UINT, useSVM); + EXECUTE_TEST(error, + test_uint.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchOrAnd test_long( + TYPE_ATOMIC_LONG, useSVM); + EXECUTE_TEST(error, + test_long.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchOrAnd test_ulong( + TYPE_ATOMIC_ULONG, useSVM); + EXECUTE_TEST(error, + test_ulong.Execute(deviceID, context, queue, num_elements)); + if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4) + { + CBasicTestFetchOrAnd + test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); + EXECUTE_TEST( + error, + test_intptr_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchOrAnd + test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); + EXECUTE_TEST( + error, + test_uintptr_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchOrAnd test_size_t( + TYPE_ATOMIC_SIZE_T, useSVM); + EXECUTE_TEST( + error, test_size_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchOrAnd + test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); + EXECUTE_TEST( + error, + test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); + } + else + { + CBasicTestFetchOrAnd + test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); + EXECUTE_TEST( + error, + test_intptr_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchOrAnd + test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); + EXECUTE_TEST( + error, + test_uintptr_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchOrAnd test_size_t( + TYPE_ATOMIC_SIZE_T, useSVM); + EXECUTE_TEST( + error, test_size_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchOrAnd + test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); + EXECUTE_TEST( + error, + test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); + } + return error; } -int test_atomic_fetch_orand(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_atomic_fetch_orand(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { - return test_atomic_fetch_orand_generic(deviceID, context, queue, num_elements, false); + return test_atomic_fetch_orand_generic(deviceID, context, queue, + num_elements, false); } -int test_svm_atomic_fetch_orand(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_svm_atomic_fetch_orand(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { - return test_atomic_fetch_orand_generic(deviceID, context, queue, num_elements, true); + return test_atomic_fetch_orand_generic(deviceID, context, queue, + num_elements, true); } -template -class CBasicTestFetchXor2 : public CBasicTestMemOrderScope -{ +template +class CBasicTestFetchXor2 + : public CBasicTestMemOrderScope { public: - using CBasicTestMemOrderScope::StartValue; - using CBasicTestMemOrderScope::DataType; - using CBasicTestMemOrderScope::MemoryOrder; - using CBasicTestMemOrderScope::MemoryOrderScopeStr; - using CBasicTestMemOrderScope::Iterations; - using CBasicTestMemOrderScope::IterationsStr; - CBasicTestFetchXor2(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope(dataType, useSVM) - { - StartValue(0); - } - virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) - { - return 1+(threadCount-1)/(DataType().Size(deviceID)*8); - } - // each thread modifies (with XOR operation) and verifies - // only one bit in atomic variable - // other bits are modified by other threads but it must not affect current thread operation - virtual std::string ProgramCore() - { - std::string memoryOrderScope = MemoryOrderScopeStr(); - std::string postfix(memoryOrderScope.empty() ? "" : "_explicit"); - return - std::string(" int bits = sizeof(")+DataType().RegularTypeName()+")*8;\n"+ - " size_t valueInd = tid/bits;\n" - " "+DataType().RegularTypeName()+" value, bitMask = ("+DataType().RegularTypeName()+")1 << tid%bits;\n" - " oldValues[tid] = 0;\n" - " for(int i = 0; i < "+IterationsStr()+"; i++)\n" - " {\n" - " value = atomic_fetch_xor"+postfix+"(destMemory+valueInd, bitMask"+memoryOrderScope+");\n" - " if(value & bitMask) // bit should be set to 0\n" - " oldValues[tid]++;\n" - " value = atomic_fetch_xor"+postfix+"(destMemory+valueInd, bitMask"+memoryOrderScope+");\n" - " if(!(value & bitMask)) // bit should be set to 1\n" - " oldValues[tid]++;\n" - " }\n"; - } - virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues) - { - int bits = sizeof(HostDataType)*8; - size_t valueInd = tid/bits; - HostDataType value, bitMask = (HostDataType)1 << tid%bits; - oldValues[tid] = 0; - for(int i = 0; i < Iterations(); i++) - { - value = host_atomic_fetch_xor(destMemory+valueInd, bitMask, MemoryOrder()); - if(value & bitMask) // bit should be set to 0 - oldValues[tid]++; - value = host_atomic_fetch_xor(destMemory+valueInd, bitMask, MemoryOrder()); - if(!(value & bitMask)) // bit should be set to 1 - oldValues[tid]++; - } - } - virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue) - { - expected = 0; - return true; - } - virtual bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues, HostAtomicType *finalValues) - { - correct = true; - for(cl_uint i = 0; i < threadCount; i++) - { - if(refValues[i] > 0) - { - log_error("Thread %d found %d mismatches\n", i, (cl_uint)refValues[i]); - correct = false; - } - } - return true; - } + using CBasicTestMemOrderScope::StartValue; + using CBasicTestMemOrderScope::DataType; + using CBasicTestMemOrderScope::MemoryOrder; + using CBasicTestMemOrderScope::MemoryOrderScopeStr; + using CBasicTestMemOrderScope::Iterations; + using CBasicTestMemOrderScope::IterationsStr; + CBasicTestFetchXor2(TExplicitAtomicType dataType, bool useSVM) + : CBasicTestMemOrderScope(dataType, + useSVM) + { + StartValue(0); + } + virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) + { + return 1 + (threadCount - 1) / (DataType().Size(deviceID) * 8); + } + // each thread modifies (with XOR operation) and verifies + // only one bit in atomic variable + // other bits are modified by other threads but it must not affect current + // thread operation + virtual std::string ProgramCore() + { + std::string memoryOrderScope = MemoryOrderScopeStr(); + std::string postfix(memoryOrderScope.empty() ? "" : "_explicit"); + return std::string(" int bits = sizeof(") + + DataType().RegularTypeName() + ")*8;\n" + + " size_t valueInd = tid/bits;\n" + " " + + DataType().RegularTypeName() + " value, bitMask = (" + + DataType().RegularTypeName() + + ")1 << tid%bits;\n" + " oldValues[tid] = 0;\n" + " for(int i = 0; i < " + + IterationsStr() + + "; i++)\n" + " {\n" + " value = atomic_fetch_xor" + + postfix + "(destMemory+valueInd, bitMask" + memoryOrderScope + + ");\n" + " if(value & bitMask) // bit should be set to 0\n" + " oldValues[tid]++;\n" + " value = atomic_fetch_xor" + + postfix + "(destMemory+valueInd, bitMask" + memoryOrderScope + + ");\n" + " if(!(value & bitMask)) // bit should be set to 1\n" + " oldValues[tid]++;\n" + " }\n"; + } + virtual void HostFunction(cl_uint tid, cl_uint threadCount, + volatile HostAtomicType *destMemory, + HostDataType *oldValues) + { + int bits = sizeof(HostDataType) * 8; + size_t valueInd = tid / bits; + HostDataType value, bitMask = (HostDataType)1 << tid % bits; + oldValues[tid] = 0; + for (int i = 0; i < Iterations(); i++) + { + value = host_atomic_fetch_xor(destMemory + valueInd, bitMask, + MemoryOrder()); + if (value & bitMask) // bit should be set to 0 + oldValues[tid]++; + value = host_atomic_fetch_xor(destMemory + valueInd, bitMask, + MemoryOrder()); + if (!(value & bitMask)) // bit should be set to 1 + oldValues[tid]++; + } + } + virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, + HostDataType *startRefValues, + cl_uint whichDestValue) + { + expected = 0; + return true; + } + virtual bool VerifyRefs(bool &correct, cl_uint threadCount, + HostDataType *refValues, + HostAtomicType *finalValues) + { + correct = true; + for (cl_uint i = 0; i < threadCount; i++) + { + if (refValues[i] > 0) + { + log_error("Thread %d found %d mismatches\n", i, + (cl_uint)refValues[i]); + correct = false; + } + } + return true; + } }; -int test_atomic_fetch_xor2_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM) +int test_atomic_fetch_xor2_generic(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements, + bool useSVM) { - int error = 0; - CBasicTestFetchXor2 test_int(TYPE_ATOMIC_INT, useSVM); - EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchXor2 test_uint(TYPE_ATOMIC_UINT, useSVM); - EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchXor2 test_long(TYPE_ATOMIC_LONG, useSVM); - EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchXor2 test_ulong(TYPE_ATOMIC_ULONG, useSVM); - EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements)); - if(AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4) - { - CBasicTestFetchXor2 test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); - EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchXor2 test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); - EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchXor2 test_size_t(TYPE_ATOMIC_SIZE_T, useSVM); - EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchXor2 test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); - EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); - } - else - { - CBasicTestFetchXor2 test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); - EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchXor2 test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); - EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchXor2 test_size_t(TYPE_ATOMIC_SIZE_T, useSVM); - EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchXor2 test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); - EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); - } - return error; + int error = 0; + CBasicTestFetchXor2 test_int(TYPE_ATOMIC_INT, + useSVM); + EXECUTE_TEST(error, + test_int.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchXor2 test_uint(TYPE_ATOMIC_UINT, + useSVM); + EXECUTE_TEST(error, + test_uint.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchXor2 test_long(TYPE_ATOMIC_LONG, + useSVM); + EXECUTE_TEST(error, + test_long.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchXor2 test_ulong( + TYPE_ATOMIC_ULONG, useSVM); + EXECUTE_TEST(error, + test_ulong.Execute(deviceID, context, queue, num_elements)); + if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4) + { + CBasicTestFetchXor2 + test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); + EXECUTE_TEST( + error, + test_intptr_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchXor2 + test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); + EXECUTE_TEST( + error, + test_uintptr_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchXor2 test_size_t( + TYPE_ATOMIC_SIZE_T, useSVM); + EXECUTE_TEST( + error, test_size_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchXor2 + test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); + EXECUTE_TEST( + error, + test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); + } + else + { + CBasicTestFetchXor2 + test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); + EXECUTE_TEST( + error, + test_intptr_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchXor2 + test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); + EXECUTE_TEST( + error, + test_uintptr_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchXor2 test_size_t( + TYPE_ATOMIC_SIZE_T, useSVM); + EXECUTE_TEST( + error, test_size_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchXor2 + test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); + EXECUTE_TEST( + error, + test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); + } + return error; } -int test_atomic_fetch_xor2(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_atomic_fetch_xor2(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { - return test_atomic_fetch_xor2_generic(deviceID, context, queue, num_elements, false); + return test_atomic_fetch_xor2_generic(deviceID, context, queue, + num_elements, false); } -int test_svm_atomic_fetch_xor2(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_svm_atomic_fetch_xor2(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { - return test_atomic_fetch_xor2_generic(deviceID, context, queue, num_elements, true); + return test_atomic_fetch_xor2_generic(deviceID, context, queue, + num_elements, true); } -template -class CBasicTestFetchMin : public CBasicTestMemOrderScope -{ +template +class CBasicTestFetchMin + : public CBasicTestMemOrderScope { public: - using CBasicTestMemOrderScope::StartValue; - using CBasicTestMemOrderScope::DataType; - using CBasicTestMemOrderScope::MemoryOrder; - using CBasicTestMemOrderScope::MemoryOrderScopeStr; - CBasicTestFetchMin(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope(dataType, useSVM) - { - StartValue(DataType().MaxValue()); - } - virtual std::string ProgramCore() - { - std::string memoryOrderScope = MemoryOrderScopeStr(); - std::string postfix(memoryOrderScope.empty() ? "" : "_explicit"); - return - " oldValues[tid] = atomic_fetch_min"+postfix+"(&destMemory[0], oldValues[tid] "+memoryOrderScope+");\n"; - } - virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues) - { - oldValues[tid] = host_atomic_fetch_min(&destMemory[0], oldValues[tid], MemoryOrder()); - } - virtual bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues, MTdata d) - { - for(cl_uint i = 0; i < threadCount; i++) - { - startRefValues[i] = genrand_int32(d); - if(sizeof(HostDataType) >= 8) - startRefValues[i] |= (HostDataType)genrand_int32(d) << 16; - } - return true; - } - virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue) - { - expected = StartValue(); - for(cl_uint i = 0; i < threadCount; i++) - { - if(startRefValues[ i ] < expected) - expected = startRefValues[ i ]; - } - return true; - } + using CBasicTestMemOrderScope::StartValue; + using CBasicTestMemOrderScope::DataType; + using CBasicTestMemOrderScope::MemoryOrder; + using CBasicTestMemOrderScope::MemoryOrderScopeStr; + CBasicTestFetchMin(TExplicitAtomicType dataType, bool useSVM) + : CBasicTestMemOrderScope(dataType, + useSVM) + { + StartValue(DataType().MaxValue()); + } + virtual std::string ProgramCore() + { + std::string memoryOrderScope = MemoryOrderScopeStr(); + std::string postfix(memoryOrderScope.empty() ? "" : "_explicit"); + return " oldValues[tid] = atomic_fetch_min" + postfix + + "(&destMemory[0], oldValues[tid] " + memoryOrderScope + ");\n"; + } + virtual void HostFunction(cl_uint tid, cl_uint threadCount, + volatile HostAtomicType *destMemory, + HostDataType *oldValues) + { + oldValues[tid] = host_atomic_fetch_min(&destMemory[0], oldValues[tid], + MemoryOrder()); + } + virtual bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues, + MTdata d) + { + for (cl_uint i = 0; i < threadCount; i++) + { + startRefValues[i] = genrand_int32(d); + if (sizeof(HostDataType) >= 8) + startRefValues[i] |= (HostDataType)genrand_int32(d) << 16; + } + return true; + } + virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, + HostDataType *startRefValues, + cl_uint whichDestValue) + { + expected = StartValue(); + for (cl_uint i = 0; i < threadCount; i++) + { + if (startRefValues[i] < expected) expected = startRefValues[i]; + } + return true; + } }; -int test_atomic_fetch_min_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM) +int test_atomic_fetch_min_generic(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements, + bool useSVM) { - int error = 0; - CBasicTestFetchMin test_int(TYPE_ATOMIC_INT, useSVM); - EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchMin test_uint(TYPE_ATOMIC_UINT, useSVM); - EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchMin test_long(TYPE_ATOMIC_LONG, useSVM); - EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchMin test_ulong(TYPE_ATOMIC_ULONG, useSVM); - EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements)); - if(AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4) - { - CBasicTestFetchMin test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); - EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchMin test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); - EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchMin test_size_t(TYPE_ATOMIC_SIZE_T, useSVM); - EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchMin test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); - EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); - } - else - { - CBasicTestFetchMin test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); - EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchMin test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); - EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchMin test_size_t(TYPE_ATOMIC_SIZE_T, useSVM); - EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchMin test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); - EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); - } - return error; + int error = 0; + CBasicTestFetchMin test_int(TYPE_ATOMIC_INT, + useSVM); + EXECUTE_TEST(error, + test_int.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchMin test_uint(TYPE_ATOMIC_UINT, + useSVM); + EXECUTE_TEST(error, + test_uint.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchMin test_long(TYPE_ATOMIC_LONG, + useSVM); + EXECUTE_TEST(error, + test_long.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchMin test_ulong( + TYPE_ATOMIC_ULONG, useSVM); + EXECUTE_TEST(error, + test_ulong.Execute(deviceID, context, queue, num_elements)); + if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4) + { + CBasicTestFetchMin + test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); + EXECUTE_TEST( + error, + test_intptr_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchMin + test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); + EXECUTE_TEST( + error, + test_uintptr_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchMin test_size_t( + TYPE_ATOMIC_SIZE_T, useSVM); + EXECUTE_TEST( + error, test_size_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchMin + test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); + EXECUTE_TEST( + error, + test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); + } + else + { + CBasicTestFetchMin + test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); + EXECUTE_TEST( + error, + test_intptr_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchMin + test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); + EXECUTE_TEST( + error, + test_uintptr_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchMin test_size_t( + TYPE_ATOMIC_SIZE_T, useSVM); + EXECUTE_TEST( + error, test_size_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchMin + test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); + EXECUTE_TEST( + error, + test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); + } + return error; } -int test_atomic_fetch_min(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_atomic_fetch_min(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { - return test_atomic_fetch_min_generic(deviceID, context, queue, num_elements, false); + return test_atomic_fetch_min_generic(deviceID, context, queue, num_elements, + false); } -int test_svm_atomic_fetch_min(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_svm_atomic_fetch_min(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { - return test_atomic_fetch_min_generic(deviceID, context, queue, num_elements, true); + return test_atomic_fetch_min_generic(deviceID, context, queue, num_elements, + true); } -template -class CBasicTestFetchMax : public CBasicTestMemOrderScope -{ +template +class CBasicTestFetchMax + : public CBasicTestMemOrderScope { public: - using CBasicTestMemOrderScope::StartValue; - using CBasicTestMemOrderScope::DataType; - using CBasicTestMemOrderScope::MemoryOrder; - using CBasicTestMemOrderScope::MemoryOrderScopeStr; - CBasicTestFetchMax(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope(dataType, useSVM) - { - StartValue(DataType().MinValue()); - } - virtual std::string ProgramCore() - { - std::string memoryOrderScope = MemoryOrderScopeStr(); - std::string postfix(memoryOrderScope.empty() ? "" : "_explicit"); - return - " oldValues[tid] = atomic_fetch_max"+postfix+"(&destMemory[0], oldValues[tid] "+memoryOrderScope+");\n"; - } - virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues) - { - oldValues[tid] = host_atomic_fetch_max(&destMemory[0], oldValues[tid], MemoryOrder()); - } - virtual bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues, MTdata d) - { - for(cl_uint i = 0; i < threadCount; i++) - { - startRefValues[i] = genrand_int32(d); - if(sizeof(HostDataType) >= 8) - startRefValues[i] |= (HostDataType)genrand_int32(d) << 16; - } - return true; - } - virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue) - { - expected = StartValue(); - for(cl_uint i = 0; i < threadCount; i++) - { - if(startRefValues[ i ] > expected) - expected = startRefValues[ i ]; - } - return true; - } + using CBasicTestMemOrderScope::StartValue; + using CBasicTestMemOrderScope::DataType; + using CBasicTestMemOrderScope::MemoryOrder; + using CBasicTestMemOrderScope::MemoryOrderScopeStr; + CBasicTestFetchMax(TExplicitAtomicType dataType, bool useSVM) + : CBasicTestMemOrderScope(dataType, + useSVM) + { + StartValue(DataType().MinValue()); + } + virtual std::string ProgramCore() + { + std::string memoryOrderScope = MemoryOrderScopeStr(); + std::string postfix(memoryOrderScope.empty() ? "" : "_explicit"); + return " oldValues[tid] = atomic_fetch_max" + postfix + + "(&destMemory[0], oldValues[tid] " + memoryOrderScope + ");\n"; + } + virtual void HostFunction(cl_uint tid, cl_uint threadCount, + volatile HostAtomicType *destMemory, + HostDataType *oldValues) + { + oldValues[tid] = host_atomic_fetch_max(&destMemory[0], oldValues[tid], + MemoryOrder()); + } + virtual bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues, + MTdata d) + { + for (cl_uint i = 0; i < threadCount; i++) + { + startRefValues[i] = genrand_int32(d); + if (sizeof(HostDataType) >= 8) + startRefValues[i] |= (HostDataType)genrand_int32(d) << 16; + } + return true; + } + virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, + HostDataType *startRefValues, + cl_uint whichDestValue) + { + expected = StartValue(); + for (cl_uint i = 0; i < threadCount; i++) + { + if (startRefValues[i] > expected) expected = startRefValues[i]; + } + return true; + } }; -int test_atomic_fetch_max_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM) +int test_atomic_fetch_max_generic(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements, + bool useSVM) { - int error = 0; - CBasicTestFetchMax test_int(TYPE_ATOMIC_INT, useSVM); - EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchMax test_uint(TYPE_ATOMIC_UINT, useSVM); - EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchMax test_long(TYPE_ATOMIC_LONG, useSVM); - EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchMax test_ulong(TYPE_ATOMIC_ULONG, useSVM); - EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements)); - if(AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4) - { - CBasicTestFetchMax test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); - EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchMax test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); - EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchMax test_size_t(TYPE_ATOMIC_SIZE_T, useSVM); - EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchMax test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); - EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); - } - else - { - CBasicTestFetchMax test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); - EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchMax test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); - EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchMax test_size_t(TYPE_ATOMIC_SIZE_T, useSVM); - EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchMax test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); - EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); - } - return error; + int error = 0; + CBasicTestFetchMax test_int(TYPE_ATOMIC_INT, + useSVM); + EXECUTE_TEST(error, + test_int.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchMax test_uint(TYPE_ATOMIC_UINT, + useSVM); + EXECUTE_TEST(error, + test_uint.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchMax test_long(TYPE_ATOMIC_LONG, + useSVM); + EXECUTE_TEST(error, + test_long.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchMax test_ulong( + TYPE_ATOMIC_ULONG, useSVM); + EXECUTE_TEST(error, + test_ulong.Execute(deviceID, context, queue, num_elements)); + if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4) + { + CBasicTestFetchMax + test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); + EXECUTE_TEST( + error, + test_intptr_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchMax + test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); + EXECUTE_TEST( + error, + test_uintptr_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchMax test_size_t( + TYPE_ATOMIC_SIZE_T, useSVM); + EXECUTE_TEST( + error, test_size_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchMax + test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); + EXECUTE_TEST( + error, + test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); + } + else + { + CBasicTestFetchMax + test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); + EXECUTE_TEST( + error, + test_intptr_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchMax + test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); + EXECUTE_TEST( + error, + test_uintptr_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchMax test_size_t( + TYPE_ATOMIC_SIZE_T, useSVM); + EXECUTE_TEST( + error, test_size_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchMax + test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); + EXECUTE_TEST( + error, + test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); + } + return error; } -int test_atomic_fetch_max(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_atomic_fetch_max(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { - return test_atomic_fetch_max_generic(deviceID, context, queue, num_elements, false); + return test_atomic_fetch_max_generic(deviceID, context, queue, num_elements, + false); } -int test_svm_atomic_fetch_max(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_svm_atomic_fetch_max(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { - return test_atomic_fetch_max_generic(deviceID, context, queue, num_elements, true); + return test_atomic_fetch_max_generic(deviceID, context, queue, num_elements, + true); } -template -class CBasicTestFlag : public CBasicTestMemOrderScope -{ - static const HostDataType CRITICAL_SECTION_NOT_VISITED = 1000000000; -public: - using CBasicTestMemOrderScope::StartValue; - using CBasicTestMemOrderScope::OldValueCheck; - using CBasicTestMemOrderScope::MemoryOrder; - using CBasicTestMemOrderScope::MemoryScopeStr; - using CBasicTestMemOrderScope::MemoryOrderScopeStr; - using CBasicTestMemOrderScope::UseSVM; - using CBasicTestMemOrderScope::LocalMemory; - CBasicTestFlag(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope(dataType, useSVM) - { - StartValue(0); - OldValueCheck(false); - } - virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) - { - return threadCount; - } - TExplicitMemoryOrderType MemoryOrderForClear() - { - // Memory ordering for atomic_flag_clear function - // ("shall not be memory_order_acquire nor memory_order_acq_rel") - if(MemoryOrder() == MEMORY_ORDER_ACQUIRE) - return MEMORY_ORDER_RELAXED; - if (MemoryOrder() == MEMORY_ORDER_ACQ_REL) - return MEMORY_ORDER_RELEASE; - return MemoryOrder(); - } - std::string MemoryOrderScopeStrForClear() - { - std::string orderStr; - if (MemoryOrder() != MEMORY_ORDER_EMPTY) - orderStr = std::string(", ") + get_memory_order_type_name(MemoryOrderForClear()); - return orderStr + MemoryScopeStr(); - } - - virtual int ExecuteSingleTest(cl_device_id deviceID, cl_context context, - cl_command_queue queue) - { - // This test assumes support for the memory_scope_device scope in the case - // that LocalMemory() == false. Therefore we should skip this test in that - // configuration on a 3.0 driver since supporting the memory_scope_device - // scope is optionaly. - if (get_device_cl_version(deviceID) >= Version{ 3, 0 }) - { - if (!LocalMemory() - && !(gAtomicFenceCap & CL_DEVICE_ATOMIC_SCOPE_DEVICE)) - { - log_info( - "Skipping atomic_flag test due to use of atomic_scope_device " - "which is optionally not supported on this device\n"); - return 0; // skip test - not applicable - } - } - return CBasicTestMemOrderScope::ExecuteSingleTest(deviceID, - context, - queue); - } - virtual std::string ProgramCore() - { - std::string memoryOrderScope = MemoryOrderScopeStr(); - std::string postfix(memoryOrderScope.empty() ? "" : "_explicit"); - std::string program = - " uint cnt, stop = 0;\n" - " for(cnt = 0; !stop && cnt < threadCount; cnt++) // each thread must find critical section where it is the first visitor\n" - " {\n" - " bool set = atomic_flag_test_and_set" + postfix + "(&destMemory[cnt]" + memoryOrderScope + ");\n"; - if (MemoryOrder() == MEMORY_ORDER_RELAXED - || MemoryOrder() == MEMORY_ORDER_RELEASE || LocalMemory()) - program += " atomic_work_item_fence(" - + std::string(LocalMemory() - ? "CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE, " - : "CLK_GLOBAL_MEM_FENCE, ") - + "memory_order_acquire," - + std::string(LocalMemory() - ? "memory_scope_work_group" - : (UseSVM() ? "memory_scope_all_svm_devices" - : "memory_scope_device")) - + ");\n"; - - program += - " if (!set)\n" - " {\n"; +template +class CBasicTestFlag + : public CBasicTestMemOrderScope { + static const HostDataType CRITICAL_SECTION_NOT_VISITED = 1000000000; - if (LocalMemory()) - program += " uint csIndex = get_enqueued_local_size(0)*get_group_id(0)+cnt;\n"; - else - program += " uint csIndex = cnt;\n"; - - std::ostringstream csNotVisited; - csNotVisited << CRITICAL_SECTION_NOT_VISITED; - program += - " // verify that thread is the first visitor\n" - " if(oldValues[csIndex] == "+csNotVisited.str()+")\n" - " {\n" - " oldValues[csIndex] = tid; // set the winner id for this critical section\n" - " stop = 1;\n" - " }\n"; - - if (MemoryOrder() == MEMORY_ORDER_ACQUIRE - || MemoryOrder() == MEMORY_ORDER_RELAXED || LocalMemory()) - program += " atomic_work_item_fence(" - + std::string(LocalMemory() - ? "CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE, " - : "CLK_GLOBAL_MEM_FENCE, ") - + "memory_order_release," - + std::string(LocalMemory() - ? "memory_scope_work_group" - : (UseSVM() ? "memory_scope_all_svm_devices" - : "memory_scope_device")) - + ");\n"; +public: + using CBasicTestMemOrderScope::StartValue; + using CBasicTestMemOrderScope::OldValueCheck; + using CBasicTestMemOrderScope::MemoryOrder; + using CBasicTestMemOrderScope::MemoryScopeStr; + using CBasicTestMemOrderScope::MemoryOrderScopeStr; + using CBasicTestMemOrderScope::UseSVM; + using CBasicTestMemOrderScope::LocalMemory; + CBasicTestFlag(TExplicitAtomicType dataType, bool useSVM) + : CBasicTestMemOrderScope(dataType, + useSVM) + { + StartValue(0); + OldValueCheck(false); + } + virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) + { + return threadCount; + } + TExplicitMemoryOrderType MemoryOrderForClear() + { + // Memory ordering for atomic_flag_clear function + // ("shall not be memory_order_acquire nor memory_order_acq_rel") + if (MemoryOrder() == MEMORY_ORDER_ACQUIRE) return MEMORY_ORDER_RELAXED; + if (MemoryOrder() == MEMORY_ORDER_ACQ_REL) return MEMORY_ORDER_RELEASE; + return MemoryOrder(); + } + std::string MemoryOrderScopeStrForClear() + { + std::string orderStr; + if (MemoryOrder() != MEMORY_ORDER_EMPTY) + orderStr = std::string(", ") + + get_memory_order_type_name(MemoryOrderForClear()); + return orderStr + MemoryScopeStr(); + } - program += - " atomic_flag_clear" + postfix + "(&destMemory[cnt]" + MemoryOrderScopeStrForClear() + ");\n" - " }\n" - " }\n"; - return program; - } - virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues) - { - cl_uint cnt, stop = 0; - for (cnt = 0; !stop && cnt < threadCount; cnt++) // each thread must find critical section where it is the first visitor\n" - { - if (!host_atomic_flag_test_and_set(&destMemory[cnt], MemoryOrder())) - { - cl_uint csIndex = cnt; - // verify that thread is the first visitor\n" - if (oldValues[csIndex] == CRITICAL_SECTION_NOT_VISITED) + virtual int ExecuteSingleTest(cl_device_id deviceID, cl_context context, + cl_command_queue queue) + { + // This test assumes support for the memory_scope_device scope in the + // case that LocalMemory() == false. Therefore we should skip this test + // in that configuration on a 3.0 driver since supporting the + // memory_scope_device scope is optionaly. + if (get_device_cl_version(deviceID) >= Version{ 3, 0 }) + { + if (!LocalMemory() + && !(gAtomicFenceCap & CL_DEVICE_ATOMIC_SCOPE_DEVICE)) + { + log_info("Skipping atomic_flag test due to use of " + "atomic_scope_device " + "which is optionally not supported on this device\n"); + return 0; // skip test - not applicable + } + } + return CBasicTestMemOrderScope< + HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context, + queue); + } + virtual std::string ProgramCore() + { + std::string memoryOrderScope = MemoryOrderScopeStr(); + std::string postfix(memoryOrderScope.empty() ? "" : "_explicit"); + std::string program = + " uint cnt, stop = 0;\n" + " for(cnt = 0; !stop && cnt < threadCount; cnt++) // each thread " + "must find critical section where it is the first visitor\n" + " {\n" + " bool set = atomic_flag_test_and_set" + + postfix + "(&destMemory[cnt]" + memoryOrderScope + ");\n"; + if (MemoryOrder() == MEMORY_ORDER_RELAXED + || MemoryOrder() == MEMORY_ORDER_RELEASE || LocalMemory()) + program += " atomic_work_item_fence(" + + std::string( + LocalMemory() + ? "CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE, " + : "CLK_GLOBAL_MEM_FENCE, ") + + "memory_order_acquire," + + std::string(LocalMemory() + ? "memory_scope_work_group" + : (UseSVM() ? "memory_scope_all_svm_devices" + : "memory_scope_device")) + + ");\n"; + + program += " if (!set)\n" + " {\n"; + + if (LocalMemory()) + program += " uint csIndex = " + "get_enqueued_local_size(0)*get_group_id(0)+cnt;\n"; + else + program += " uint csIndex = cnt;\n"; + + std::ostringstream csNotVisited; + csNotVisited << CRITICAL_SECTION_NOT_VISITED; + program += " // verify that thread is the first visitor\n" + " if(oldValues[csIndex] == " + + csNotVisited.str() + + ")\n" + " {\n" + " oldValues[csIndex] = tid; // set the winner id for this " + "critical section\n" + " stop = 1;\n" + " }\n"; + + if (MemoryOrder() == MEMORY_ORDER_ACQUIRE + || MemoryOrder() == MEMORY_ORDER_RELAXED || LocalMemory()) + program += " atomic_work_item_fence(" + + std::string( + LocalMemory() + ? "CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE, " + : "CLK_GLOBAL_MEM_FENCE, ") + + "memory_order_release," + + std::string(LocalMemory() + ? "memory_scope_work_group" + : (UseSVM() ? "memory_scope_all_svm_devices" + : "memory_scope_device")) + + ");\n"; + + program += " atomic_flag_clear" + postfix + "(&destMemory[cnt]" + + MemoryOrderScopeStrForClear() + + ");\n" + " }\n" + " }\n"; + return program; + } + virtual void HostFunction(cl_uint tid, cl_uint threadCount, + volatile HostAtomicType *destMemory, + HostDataType *oldValues) + { + cl_uint cnt, stop = 0; + for (cnt = 0; !stop && cnt < threadCount; + cnt++) // each thread must find critical section where it is the + // first visitor\n" { - oldValues[csIndex] = tid; // set the winner id for this critical section\n" - stop = 1; + if (!host_atomic_flag_test_and_set(&destMemory[cnt], MemoryOrder())) + { + cl_uint csIndex = cnt; + // verify that thread is the first visitor\n" + if (oldValues[csIndex] == CRITICAL_SECTION_NOT_VISITED) + { + oldValues[csIndex] = + tid; // set the winner id for this critical section\n" + stop = 1; + } + host_atomic_flag_clear(&destMemory[cnt], MemoryOrderForClear()); + } } - host_atomic_flag_clear(&destMemory[cnt], MemoryOrderForClear()); - } - } - } - virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue) - { - expected = StartValue(); - return true; - } - virtual bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues, MTdata d) - { - for(cl_uint i = 0 ; i < threadCount; i++) - startRefValues[i] = CRITICAL_SECTION_NOT_VISITED; - return true; - } - virtual bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues, HostAtomicType *finalValues) - { - correct = true; - /* We are expecting unique values from 0 to threadCount-1 (each critical section must be visited) */ - /* These values must be distributed across refValues array */ - std::vector tidFound(threadCount); - cl_uint i; - - for (i = 0; i < threadCount; i++) - { - cl_uint value = (cl_uint)refValues[i]; - if (value == CRITICAL_SECTION_NOT_VISITED) - { - // Special initial value - log_error("ERROR: Critical section %u not visited\n", i); - correct = false; + } + virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, + HostDataType *startRefValues, + cl_uint whichDestValue) + { + expected = StartValue(); return true; - } - if (value >= threadCount) - { - log_error("ERROR: Reference value %u outside of valid range! (%u)\n", i, value); - correct = false; + } + virtual bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues, + MTdata d) + { + for (cl_uint i = 0; i < threadCount; i++) + startRefValues[i] = CRITICAL_SECTION_NOT_VISITED; return true; - } - if (tidFound[value]) - { - log_error("ERROR: Value (%u) occurred more thane once\n", value); - correct = false; + } + virtual bool VerifyRefs(bool &correct, cl_uint threadCount, + HostDataType *refValues, + HostAtomicType *finalValues) + { + correct = true; + /* We are expecting unique values from 0 to threadCount-1 (each critical + * section must be visited) */ + /* These values must be distributed across refValues array */ + std::vector tidFound(threadCount); + cl_uint i; + + for (i = 0; i < threadCount; i++) + { + cl_uint value = (cl_uint)refValues[i]; + if (value == CRITICAL_SECTION_NOT_VISITED) + { + // Special initial value + log_error("ERROR: Critical section %u not visited\n", i); + correct = false; + return true; + } + if (value >= threadCount) + { + log_error( + "ERROR: Reference value %u outside of valid range! (%u)\n", + i, value); + correct = false; + return true; + } + if (tidFound[value]) + { + log_error("ERROR: Value (%u) occurred more thane once\n", + value); + correct = false; + return true; + } + tidFound[value] = true; + } return true; - } - tidFound[value] = true; } - return true; - } }; -int test_atomic_flag_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM) +int test_atomic_flag_generic(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements, + bool useSVM) { - int error = 0; - CBasicTestFlag test_flag(TYPE_ATOMIC_FLAG, useSVM); - EXECUTE_TEST(error, test_flag.Execute(deviceID, context, queue, num_elements)); - return error; + int error = 0; + CBasicTestFlag test_flag(TYPE_ATOMIC_FLAG, + useSVM); + EXECUTE_TEST(error, + test_flag.Execute(deviceID, context, queue, num_elements)); + return error; } -int test_atomic_flag(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_atomic_flag(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { - return test_atomic_flag_generic(deviceID, context, queue, num_elements, false); + return test_atomic_flag_generic(deviceID, context, queue, num_elements, + false); } -int test_svm_atomic_flag(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_svm_atomic_flag(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { - return test_atomic_flag_generic(deviceID, context, queue, num_elements, true); + return test_atomic_flag_generic(deviceID, context, queue, num_elements, + true); } -template -class CBasicTestFence : public CBasicTestMemOrderScope -{ - struct TestDefinition { - bool op1IsFence; - TExplicitMemoryOrderType op1MemOrder; - bool op2IsFence; - TExplicitMemoryOrderType op2MemOrder; - }; -public: - using CBasicTestMemOrderScope::StartValue; - using CBasicTestMemOrderScope::OldValueCheck; - using CBasicTestMemOrderScope::MemoryOrder; - using CBasicTestMemOrderScope::MemoryScope; - using CBasicTestMemOrderScope::MemoryScopeStr; - using CBasicTestMemOrderScope::DeclaredInProgram; - using CBasicTestMemOrderScope::UsedInFunction; - using CBasicTestMemOrderScope::DataType; - using CBasicTestMemOrderScope::CurrentGroupSize; - using CBasicTestMemOrderScope::UseSVM; - using CBasicTestMemOrderScope::LocalMemory; - using CBasicTestMemOrderScope::LocalRefValues; - CBasicTestFence(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope(dataType, useSVM) - { - StartValue(0); - OldValueCheck(false); - } - virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) - { - return threadCount; - } - virtual cl_uint NumNonAtomicVariablesPerThread() - { - if (MemoryOrder() == MEMORY_ORDER_SEQ_CST) - return 1; - if (LocalMemory()) - { - if (gIsEmbedded) - { - if (CurrentGroupSize() > 1024) - CurrentGroupSize(1024); - return 1; //1KB of local memory required by spec. Clamp group size to 1k and allow 1 variable per thread - } - else - return 32 * 1024 / 8 / CurrentGroupSize() - 1; //32KB of local memory required by spec - } - return 256; - } - virtual std::string SingleTestName() - { - std::string testName; - if (MemoryOrder() == MEMORY_ORDER_SEQ_CST) - testName += "seq_cst fence, "; - else - testName += std::string(get_memory_order_type_name(_subCase.op1MemOrder)).substr(sizeof("memory_order")) - + (_subCase.op1IsFence ? " fence" : " atomic") + " synchronizes-with " - + std::string(get_memory_order_type_name(_subCase.op2MemOrder)).substr(sizeof("memory_order")) - + (_subCase.op2IsFence ? " fence" : " atomic") + ", "; - testName += CBasicTest::SingleTestName(); - testName += std::string(", ") + std::string(get_memory_scope_type_name(MemoryScope())).substr(sizeof("memory")); - return testName; - } - virtual bool SVMDataBufferAllSVMConsistent() - { - // Although memory_scope_all_devices doesn't mention SVM it is just an - // alias for memory_scope_all_svm_devices. So both scopes interact with - // SVM allocations, on devices that support those, just the same. - return MemoryScope() == MEMORY_SCOPE_ALL_DEVICES - || MemoryScope() == MEMORY_SCOPE_ALL_SVM_DEVICES; - } - virtual int ExecuteForEachParameterSet(cl_device_id deviceID, cl_context context, cl_command_queue queue) - { - int error = 0; - // execute 3 (maximum) sub cases for each memory order - for (_subCaseId = 0; _subCaseId < 3; _subCaseId++) +template +class CBasicTestFence + : public CBasicTestMemOrderScope { + struct TestDefinition { - EXECUTE_TEST(error, (CBasicTestMemOrderScope::ExecuteForEachParameterSet(deviceID, context, queue))); - } - return error; - } - virtual int ExecuteSingleTest(cl_device_id deviceID, cl_context context, cl_command_queue queue) - { - if(DeclaredInProgram() || UsedInFunction()) - return 0; //skip test - not applicable - no overloaded fence functions for different address spaces - if(MemoryOrder() == MEMORY_ORDER_EMPTY || - MemoryScope() == MEMORY_SCOPE_EMPTY) // empty 'scope' not required since opencl20-openclc-rev15 - return 0; //skip test - not applicable - if((UseSVM() || gHost) - && LocalMemory()) - return 0; // skip test - not applicable for SVM and local memory - struct TestDefinition acqTests[] = { - // {op1IsFence, op1MemOrder, op2IsFence, op2MemOrder} - { false, MEMORY_ORDER_RELEASE, true, MEMORY_ORDER_ACQUIRE }, - { true, MEMORY_ORDER_RELEASE, true, MEMORY_ORDER_ACQUIRE }, - { true, MEMORY_ORDER_ACQ_REL, true, MEMORY_ORDER_ACQUIRE } - }; - struct TestDefinition relTests[] = { - { true, MEMORY_ORDER_RELEASE, false, MEMORY_ORDER_ACQUIRE }, - { true, MEMORY_ORDER_RELEASE, true, MEMORY_ORDER_ACQ_REL } - }; - struct TestDefinition arTests[] = { - { false, MEMORY_ORDER_RELEASE, true, MEMORY_ORDER_ACQ_REL }, - { true, MEMORY_ORDER_ACQ_REL, false, MEMORY_ORDER_ACQUIRE }, - { true, MEMORY_ORDER_ACQ_REL, true, MEMORY_ORDER_ACQ_REL } + bool op1IsFence; + TExplicitMemoryOrderType op1MemOrder; + bool op2IsFence; + TExplicitMemoryOrderType op2MemOrder; }; - switch (MemoryOrder()) - { - case MEMORY_ORDER_ACQUIRE: - if (_subCaseId >= sizeof(acqTests) / sizeof(struct TestDefinition)) - return 0; - _subCase = acqTests[_subCaseId]; - break; - case MEMORY_ORDER_RELEASE: - if (_subCaseId >= sizeof(relTests) / sizeof(struct TestDefinition)) - return 0; - _subCase = relTests[_subCaseId]; - break; - case MEMORY_ORDER_ACQ_REL: - if (_subCaseId >= sizeof(arTests) / sizeof(struct TestDefinition)) - return 0; - _subCase = arTests[_subCaseId]; - break; - case MEMORY_ORDER_SEQ_CST: - if (_subCaseId != 0) // one special case only - return 0; - break; - default: - return 0; - } - LocalRefValues(LocalMemory()); - return CBasicTestMemOrderScope::ExecuteSingleTest(deviceID, context, queue); - } - virtual std::string ProgramHeader(cl_uint maxNumDestItems) - { - std::string header; - if(gOldAPI) - { - if(MemoryScope() == MEMORY_SCOPE_EMPTY) - { - header += "#define atomic_work_item_fence(x,y) mem_fence(x)\n"; - } - else - { - header += "#define atomic_work_item_fence(x,y,z) mem_fence(x)\n"; - } - } - return header+CBasicTestMemOrderScope::ProgramHeader(maxNumDestItems); - } - virtual std::string ProgramCore() - { - std::ostringstream naValues; - naValues << NumNonAtomicVariablesPerThread(); - std::string program, fenceType, nonAtomic; - if (LocalMemory()) - { - program = " size_t myId = get_local_id(0), hisId = get_local_size(0)-1-myId;\n"; - fenceType = "CLK_LOCAL_MEM_FENCE"; - nonAtomic = "localValues"; - } - else + +public: + using CBasicTestMemOrderScope::StartValue; + using CBasicTestMemOrderScope::OldValueCheck; + using CBasicTestMemOrderScope::MemoryOrder; + using CBasicTestMemOrderScope::MemoryScope; + using CBasicTestMemOrderScope::MemoryScopeStr; + using CBasicTestMemOrderScope::DeclaredInProgram; + using CBasicTestMemOrderScope::UsedInFunction; + using CBasicTestMemOrderScope::DataType; + using CBasicTestMemOrderScope::CurrentGroupSize; + using CBasicTestMemOrderScope::UseSVM; + using CBasicTestMemOrderScope::LocalMemory; + using CBasicTestMemOrderScope::LocalRefValues; + CBasicTestFence(TExplicitAtomicType dataType, bool useSVM) + : CBasicTestMemOrderScope(dataType, + useSVM) { - program = " size_t myId = tid, hisId = threadCount-1-tid;\n"; - fenceType = "CLK_GLOBAL_MEM_FENCE"; - nonAtomic = "oldValues"; - } - if (MemoryOrder() == MEMORY_ORDER_SEQ_CST) - { - // All threads are divided into pairs. - // Each thread has its own atomic variable and performs the following actions: - // - increments its own variable - // - performs fence operation to propagate its value and to see value from other thread - // - reads value from other thread's variable - // - repeats the above steps when both values are the same (and less than 1000000) - // - stores the last value read from other thread (in additional variable) - // At the end of execution at least one thread should know the last value from other thread - program += std::string("") + - " " + DataType().RegularTypeName() + " myValue = 0, hisValue; \n" - " do {\n" - " myValue++;\n" - " atomic_store_explicit(&destMemory[myId], myValue, memory_order_relaxed" + MemoryScopeStr() + ");\n" - " atomic_work_item_fence(" + fenceType + ", memory_order_seq_cst" + MemoryScopeStr() + "); \n" - " hisValue = atomic_load_explicit(&destMemory[hisId], memory_order_relaxed" + MemoryScopeStr() + ");\n" - " } while(myValue == hisValue && myValue < 1000000);\n" - " " + nonAtomic + "[myId] = hisValue; \n"; + StartValue(0); + OldValueCheck(false); } - else + virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) { - // Each thread modifies one of its non-atomic variables, increments value of its atomic variable - // and reads values from another thread in typical synchronizes-with scenario with: - // - non-atomic variable (at index A) modification (value change from 0 to A) - // - release operation (additional fence or within atomic) + atomic variable modification (value A) - // - atomic variable read (value B) + acquire operation (additional fence or within atomic) - // - non-atomic variable (at index B) read (value C) - // Each thread verifies dependency between atomic and non-atomic value read from another thread - // The following condition must be true: B == C - program += std::string("") + - " " + DataType().RegularTypeName() + " myValue = 0, hisAtomicValue, hisValue; \n" - " do {\n" - " myValue++;\n" - " " + nonAtomic + "[myId*" + naValues.str() +"+myValue] = myValue;\n"; - if (_subCase.op1IsFence) - program += std::string("") + - " atomic_work_item_fence(" + fenceType + ", " + get_memory_order_type_name(_subCase.op1MemOrder) + MemoryScopeStr() + "); \n" - " atomic_store_explicit(&destMemory[myId], myValue, memory_order_relaxed" + MemoryScopeStr() + ");\n"; - else - program += std::string("") + - " atomic_store_explicit(&destMemory[myId], myValue, " + get_memory_order_type_name(_subCase.op1MemOrder) + MemoryScopeStr() + ");\n"; - if (_subCase.op2IsFence) - program += std::string("") + - " hisAtomicValue = atomic_load_explicit(&destMemory[hisId], memory_order_relaxed" + MemoryScopeStr() + ");\n" - " atomic_work_item_fence(" + fenceType + ", " + get_memory_order_type_name(_subCase.op2MemOrder) + MemoryScopeStr() + "); \n"; - else - program += std::string("") + - " hisAtomicValue = atomic_load_explicit(&destMemory[hisId], " + get_memory_order_type_name(_subCase.op2MemOrder) + MemoryScopeStr() + ");\n"; - program += - " hisValue = " + nonAtomic + "[hisId*" + naValues.str() + "+hisAtomicValue]; \n"; - if (LocalMemory()) - program += " hisId = (hisId+1)%get_local_size(0);\n"; - else - program += " hisId = (hisId+1)%threadCount;\n"; - program += - " } while(hisAtomicValue == hisValue && myValue < "+naValues.str()+"-1);\n" - " if(hisAtomicValue != hisValue)\n" - " { // fail\n" - " atomic_store(&destMemory[myId], myValue-1);\n"; - if (LocalMemory()) - program += " hisId = (hisId+get_local_size(0)-1)%get_local_size(0);\n"; - else - program += " hisId = (hisId+threadCount-1)%threadCount;\n"; - program += - " if(myValue+1 < " + naValues.str() + ")\n" - " " + nonAtomic + "[myId*" + naValues.str() + "+myValue+1] = hisId;\n" - " if(myValue+2 < " + naValues.str() + ")\n" - " " + nonAtomic + "[myId*" + naValues.str() + "+myValue+2] = hisAtomicValue;\n" - " if(myValue+3 < " + naValues.str() + ")\n" - " " + nonAtomic + "[myId*" + naValues.str() + "+myValue+3] = hisValue;\n"; - if (gDebug) - { - program += - " printf(\"WI %d: atomic value (%d) at index %d is different than non-atomic value (%d)\\n\", tid, hisAtomicValue, hisId, hisValue);\n"; - } - program += - " }\n"; - } - return program; - } - virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues) - { - size_t myId = tid, hisId = threadCount - 1 - tid; - if (MemoryOrder() == MEMORY_ORDER_SEQ_CST) - { - HostDataType myValue = 0, hisValue; - // CPU thread typically starts faster - wait for GPU thread - myValue++; - host_atomic_store(&destMemory[myId], myValue, MEMORY_ORDER_SEQ_CST); - while (host_atomic_load(&destMemory[hisId], MEMORY_ORDER_SEQ_CST) == 0); - do { - myValue++; - host_atomic_store(&destMemory[myId], myValue, MEMORY_ORDER_RELAXED); - host_atomic_thread_fence(MemoryOrder()); - hisValue = host_atomic_load(&destMemory[hisId], MEMORY_ORDER_RELAXED); - } while (myValue == hisValue && hisValue < 1000000); - oldValues[tid] = hisValue; + return threadCount; } - else + virtual cl_uint NumNonAtomicVariablesPerThread() { - HostDataType myValue = 0, hisAtomicValue, hisValue; - do { - myValue++; - oldValues[myId*NumNonAtomicVariablesPerThread()+myValue] = myValue; - if (_subCase.op1IsFence) + if (MemoryOrder() == MEMORY_ORDER_SEQ_CST) return 1; + if (LocalMemory()) { - host_atomic_thread_fence(_subCase.op1MemOrder); - host_atomic_store(&destMemory[myId], myValue, MEMORY_ORDER_RELAXED); + if (gIsEmbedded) + { + if (CurrentGroupSize() > 512) CurrentGroupSize(512); + return 2; // 1KB of local memory required by spec. Clamp group + // size to 512 and allow 2 variables per thread + } + else + return 32 * 1024 / 8 / CurrentGroupSize() + - 1; // 32KB of local memory required by spec } + return 256; + } + virtual std::string SingleTestName() + { + std::string testName; + if (MemoryOrder() == MEMORY_ORDER_SEQ_CST) + testName += "seq_cst fence, "; else - host_atomic_store(&destMemory[myId], myValue, _subCase.op1MemOrder); - if (_subCase.op2IsFence) + testName += + std::string(get_memory_order_type_name(_subCase.op1MemOrder)) + .substr(sizeof("memory_order")) + + (_subCase.op1IsFence ? " fence" : " atomic") + + " synchronizes-with " + + std::string(get_memory_order_type_name(_subCase.op2MemOrder)) + .substr(sizeof("memory_order")) + + (_subCase.op2IsFence ? " fence" : " atomic") + ", "; + testName += CBasicTest::SingleTestName(); + testName += std::string(", ") + + std::string(get_memory_scope_type_name(MemoryScope())) + .substr(sizeof("memory")); + return testName; + } + virtual bool SVMDataBufferAllSVMConsistent() + { + // Although memory_scope_all_devices doesn't mention SVM it is just an + // alias for memory_scope_all_svm_devices. So both scopes interact with + // SVM allocations, on devices that support those, just the same. + return MemoryScope() == MEMORY_SCOPE_ALL_DEVICES + || MemoryScope() == MEMORY_SCOPE_ALL_SVM_DEVICES; + } + virtual int ExecuteForEachParameterSet(cl_device_id deviceID, + cl_context context, + cl_command_queue queue) + { + int error = 0; + // execute 3 (maximum) sub cases for each memory order + for (_subCaseId = 0; _subCaseId < 3; _subCaseId++) { - hisAtomicValue = host_atomic_load(&destMemory[hisId], MEMORY_ORDER_RELAXED); - host_atomic_thread_fence(_subCase.op2MemOrder); + EXECUTE_TEST( + error, + (CBasicTestMemOrderScope:: + ExecuteForEachParameterSet(deviceID, context, queue))); } - else - hisAtomicValue = host_atomic_load(&destMemory[hisId], _subCase.op2MemOrder); - hisValue = oldValues[hisId*NumNonAtomicVariablesPerThread() + hisAtomicValue]; - hisId = (hisId + 1) % threadCount; - } while(hisAtomicValue == hisValue && myValue < (HostDataType)NumNonAtomicVariablesPerThread()-1); - if(hisAtomicValue != hisValue) - { // fail - host_atomic_store(&destMemory[myId], myValue-1, MEMORY_ORDER_SEQ_CST); - if (gDebug) + return error; + } + virtual int ExecuteSingleTest(cl_device_id deviceID, cl_context context, + cl_command_queue queue) + { + if (DeclaredInProgram() || UsedInFunction()) + return 0; // skip test - not applicable - no overloaded fence + // functions for different address spaces + if (MemoryOrder() == MEMORY_ORDER_EMPTY + || MemoryScope() + == MEMORY_SCOPE_EMPTY) // empty 'scope' not required since + // opencl20-openclc-rev15 + return 0; // skip test - not applicable + if ((UseSVM() || gHost) && LocalMemory()) + return 0; // skip test - not applicable for SVM and local memory + struct TestDefinition acqTests[] = { + // {op1IsFence, op1MemOrder, op2IsFence, op2MemOrder} + { false, MEMORY_ORDER_RELEASE, true, MEMORY_ORDER_ACQUIRE }, + { true, MEMORY_ORDER_RELEASE, true, MEMORY_ORDER_ACQUIRE }, + { true, MEMORY_ORDER_ACQ_REL, true, MEMORY_ORDER_ACQUIRE } + }; + struct TestDefinition relTests[] = { + { true, MEMORY_ORDER_RELEASE, false, MEMORY_ORDER_ACQUIRE }, + { true, MEMORY_ORDER_RELEASE, true, MEMORY_ORDER_ACQ_REL } + }; + struct TestDefinition arTests[] = { + { false, MEMORY_ORDER_RELEASE, true, MEMORY_ORDER_ACQ_REL }, + { true, MEMORY_ORDER_ACQ_REL, false, MEMORY_ORDER_ACQUIRE }, + { true, MEMORY_ORDER_ACQ_REL, true, MEMORY_ORDER_ACQ_REL } + }; + switch (MemoryOrder()) { - hisId = (hisId + threadCount - 1) % threadCount; - printf("WI %d: atomic value (%d) at index %d is different than non-atomic value (%d)\n", tid, hisAtomicValue, hisId, hisValue); + case MEMORY_ORDER_ACQUIRE: + if (_subCaseId + >= sizeof(acqTests) / sizeof(struct TestDefinition)) + return 0; + _subCase = acqTests[_subCaseId]; + break; + case MEMORY_ORDER_RELEASE: + if (_subCaseId + >= sizeof(relTests) / sizeof(struct TestDefinition)) + return 0; + _subCase = relTests[_subCaseId]; + break; + case MEMORY_ORDER_ACQ_REL: + if (_subCaseId + >= sizeof(arTests) / sizeof(struct TestDefinition)) + return 0; + _subCase = arTests[_subCaseId]; + break; + case MEMORY_ORDER_SEQ_CST: + if (_subCaseId != 0) // one special case only + return 0; + break; + default: return 0; } - } - } - } - virtual bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues, MTdata d) - { - for(cl_uint i = 0 ; i < threadCount*NumNonAtomicVariablesPerThread(); i++) - startRefValues[i] = 0; - return true; - } - virtual bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues, HostAtomicType *finalValues) - { - correct = true; - cl_uint workSize = LocalMemory() ? CurrentGroupSize() : threadCount; - for(cl_uint workOffset = 0; workOffset < threadCount; workOffset+= workSize) - { - if(workOffset+workSize > threadCount) - // last workgroup (host threads) - workSize = threadCount-workOffset; - for(cl_uint i = 0 ; i < workSize && workOffset+i < threadCount; i++) - { - HostAtomicType myValue = finalValues[workOffset + i]; - if (MemoryOrder() == MEMORY_ORDER_SEQ_CST) + LocalRefValues(LocalMemory()); + return CBasicTestMemOrderScope< + HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context, + queue); + } + virtual std::string ProgramHeader(cl_uint maxNumDestItems) + { + std::string header; + if (gOldAPI) { - HostDataType hisValue = refValues[workOffset + i]; - if (myValue == hisValue) - { - // a draw - both threads should reach final value 1000000 - if (myValue != 1000000) + if (MemoryScope() == MEMORY_SCOPE_EMPTY) { - log_error("ERROR: Invalid reference value #%u (%d instead of 1000000)\n", workOffset + i, myValue); - correct = false; - return true; + header += "#define atomic_work_item_fence(x,y) " + " mem_fence(x)\n"; } - } - else - { - //slower thread (in total order of seq_cst operations) must know last value written by faster thread - HostAtomicType hisRealValue = finalValues[workOffset + workSize - 1 - i]; - HostDataType myValueReadByHim = refValues[workOffset + workSize - 1 - i]; - - // who is the winner? - thread with lower private counter value - if (myValue == hisRealValue) // forbidden result - fence doesn't work + else { - log_error("ERROR: Atomic counter values #%u and #%u are the same (%u)\n", workOffset + i, workOffset + workSize - 1 - i, myValue); - log_error("ERROR: Both threads have outdated values read from another thread (%u and %u)\n", hisValue, myValueReadByHim); - correct = false; - return true; + header += "#define atomic_work_item_fence(x,y,z) " + " mem_fence(x)\n"; } - if (myValue > hisRealValue) // I'm slower + } + return header + + CBasicTestMemOrderScope:: + ProgramHeader(maxNumDestItems); + } + virtual std::string ProgramCore() + { + std::ostringstream naValues; + naValues << NumNonAtomicVariablesPerThread(); + std::string program, fenceType, nonAtomic; + if (LocalMemory()) + { + program = " size_t myId = get_local_id(0), hisId = " + "get_local_size(0)-1-myId;\n"; + fenceType = "CLK_LOCAL_MEM_FENCE"; + nonAtomic = "localValues"; + } + else + { + program = " size_t myId = tid, hisId = threadCount-1-tid;\n"; + fenceType = "CLK_GLOBAL_MEM_FENCE"; + nonAtomic = "oldValues"; + } + if (MemoryOrder() == MEMORY_ORDER_SEQ_CST) + { + // All threads are divided into pairs. + // Each thread has its own atomic variable and performs the + // following actions: + // - increments its own variable + // - performs fence operation to propagate its value and to see + // value from other thread + // - reads value from other thread's variable + // - repeats the above steps when both values are the same (and less + // than 1000000) + // - stores the last value read from other thread (in additional + // variable) At the end of execution at least one thread should know + // the last value from other thread + program += std::string("") + " " + DataType().RegularTypeName() + + " myValue = 0, hisValue; \n" + " do {\n" + " myValue++;\n" + " atomic_store_explicit(&destMemory[myId], myValue, " + "memory_order_relaxed" + + MemoryScopeStr() + + ");\n" + " atomic_work_item_fence(" + + fenceType + ", memory_order_seq_cst" + MemoryScopeStr() + + "); \n" + " hisValue = atomic_load_explicit(&destMemory[hisId], " + "memory_order_relaxed" + + MemoryScopeStr() + + ");\n" + " } while(myValue == hisValue && myValue < 1000000);\n" + " " + + nonAtomic + "[myId] = hisValue; \n"; + } + else + { + // Each thread modifies one of its non-atomic variables, increments + // value of its atomic variable and reads values from another thread + // in typical synchronizes-with scenario with: + // - non-atomic variable (at index A) modification (value change + // from 0 to A) + // - release operation (additional fence or within atomic) + atomic + // variable modification (value A) + // - atomic variable read (value B) + acquire operation (additional + // fence or within atomic) + // - non-atomic variable (at index B) read (value C) + // Each thread verifies dependency between atomic and non-atomic + // value read from another thread The following condition must be + // true: B == C + program += std::string("") + " " + DataType().RegularTypeName() + + " myValue = 0, hisAtomicValue, hisValue; \n" + " do {\n" + " myValue++;\n" + " " + + nonAtomic + "[myId*" + naValues.str() + + "+myValue] = myValue;\n"; + if (_subCase.op1IsFence) + program += std::string("") + " atomic_work_item_fence(" + + fenceType + ", " + + get_memory_order_type_name(_subCase.op1MemOrder) + + MemoryScopeStr() + + "); \n" + " atomic_store_explicit(&destMemory[myId], myValue, " + "memory_order_relaxed" + + MemoryScopeStr() + ");\n"; + else + program += std::string("") + + " atomic_store_explicit(&destMemory[myId], myValue, " + + get_memory_order_type_name(_subCase.op1MemOrder) + + MemoryScopeStr() + ");\n"; + if (_subCase.op2IsFence) + program += std::string("") + + " hisAtomicValue = " + "atomic_load_explicit(&destMemory[hisId], " + "memory_order_relaxed" + + MemoryScopeStr() + + ");\n" + " atomic_work_item_fence(" + + fenceType + ", " + + get_memory_order_type_name(_subCase.op2MemOrder) + + MemoryScopeStr() + "); \n"; + else + program += std::string("") + + " hisAtomicValue = " + "atomic_load_explicit(&destMemory[hisId], " + + get_memory_order_type_name(_subCase.op2MemOrder) + + MemoryScopeStr() + ");\n"; + program += " hisValue = " + nonAtomic + "[hisId*" + + naValues.str() + "+hisAtomicValue]; \n"; + if (LocalMemory()) + program += " hisId = (hisId+1)%get_local_size(0);\n"; + else + program += " hisId = (hisId+1)%threadCount;\n"; + program += " } while(hisAtomicValue == hisValue && myValue < " + + naValues.str() + + "-1);\n" + " if(hisAtomicValue != hisValue)\n" + " { // fail\n" + " atomic_store(&destMemory[myId], myValue-1);\n"; + if (LocalMemory()) + program += " hisId = " + "(hisId+get_local_size(0)-1)%get_local_size(0);\n"; + else + program += " hisId = (hisId+threadCount-1)%threadCount;\n"; + program += " if(myValue+1 < " + naValues.str() + + ")\n" + " " + + nonAtomic + "[myId*" + naValues.str() + + "+myValue+1] = hisId;\n" + " if(myValue+2 < " + + naValues.str() + + ")\n" + " " + + nonAtomic + "[myId*" + naValues.str() + + "+myValue+2] = hisAtomicValue;\n" + " if(myValue+3 < " + + naValues.str() + + ")\n" + " " + + nonAtomic + "[myId*" + naValues.str() + + "+myValue+3] = hisValue;\n"; + if (gDebug) { - if (hisRealValue != hisValue) - { - log_error("ERROR: Invalid reference value #%u (%d instead of %d)\n", workOffset + i, hisValue, hisRealValue); - log_error("ERROR: Slower thread #%u should know value written by faster thread #%u\n", workOffset + i, workOffset + workSize - 1 - i); - correct = false; - return true; - } + program += " printf(\"WI %d: atomic value (%d) at index %d " + "is different than non-atomic value (%d)\\n\", tid, " + "hisAtomicValue, hisId, hisValue);\n"; } - else // I'm faster + program += " }\n"; + } + return program; + } + virtual void HostFunction(cl_uint tid, cl_uint threadCount, + volatile HostAtomicType *destMemory, + HostDataType *oldValues) + { + size_t myId = tid, hisId = threadCount - 1 - tid; + if (MemoryOrder() == MEMORY_ORDER_SEQ_CST) + { + HostDataType myValue = 0, hisValue; + // CPU thread typically starts faster - wait for GPU thread + myValue++; + host_atomic_store( + &destMemory[myId], myValue, MEMORY_ORDER_SEQ_CST); + while (host_atomic_load( + &destMemory[hisId], MEMORY_ORDER_SEQ_CST) + == 0) + ; + do { - if (myValueReadByHim != myValue) - { - log_error("ERROR: Invalid reference value #%u (%d instead of %d)\n", workOffset + workSize - 1 - i, myValueReadByHim, myValue); - log_error("ERROR: Slower thread #%u should know value written by faster thread #%u\n", workOffset + workSize - 1 - i, workOffset + i); - correct = false; - return true; - } - } - } + myValue++; + host_atomic_store( + &destMemory[myId], myValue, MEMORY_ORDER_RELAXED); + host_atomic_thread_fence(MemoryOrder()); + hisValue = host_atomic_load( + &destMemory[hisId], MEMORY_ORDER_RELAXED); + } while (myValue == hisValue && hisValue < 1000000); + oldValues[tid] = hisValue; } else { - if (myValue != NumNonAtomicVariablesPerThread()-1) - { - log_error("ERROR: Invalid atomic value #%u (%d instead of %d)\n", workOffset + i, myValue, NumNonAtomicVariablesPerThread()-1); - log_error("ERROR: Thread #%u observed invalid values in other thread's variables\n", workOffset + i, myValue); - correct = false; - return true; - } + HostDataType myValue = 0, hisAtomicValue, hisValue; + do + { + myValue++; + oldValues[myId * NumNonAtomicVariablesPerThread() + myValue] = + myValue; + if (_subCase.op1IsFence) + { + host_atomic_thread_fence(_subCase.op1MemOrder); + host_atomic_store( + &destMemory[myId], myValue, MEMORY_ORDER_RELAXED); + } + else + host_atomic_store( + &destMemory[myId], myValue, _subCase.op1MemOrder); + if (_subCase.op2IsFence) + { + hisAtomicValue = + host_atomic_load( + &destMemory[hisId], MEMORY_ORDER_RELAXED); + host_atomic_thread_fence(_subCase.op2MemOrder); + } + else + hisAtomicValue = + host_atomic_load( + &destMemory[hisId], _subCase.op2MemOrder); + hisValue = oldValues[hisId * NumNonAtomicVariablesPerThread() + + hisAtomicValue]; + hisId = (hisId + 1) % threadCount; + } while (hisAtomicValue == hisValue + && myValue + < (HostDataType)NumNonAtomicVariablesPerThread() - 1); + if (hisAtomicValue != hisValue) + { // fail + host_atomic_store( + &destMemory[myId], myValue - 1, MEMORY_ORDER_SEQ_CST); + if (gDebug) + { + hisId = (hisId + threadCount - 1) % threadCount; + printf("WI %d: atomic value (%d) at index %d is different " + "than non-atomic value (%d)\n", + tid, hisAtomicValue, hisId, hisValue); + } + } } - } } - return true; - } + virtual bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues, + MTdata d) + { + for (cl_uint i = 0; i < threadCount * NumNonAtomicVariablesPerThread(); + i++) + startRefValues[i] = 0; + return true; + } + virtual bool VerifyRefs(bool &correct, cl_uint threadCount, + HostDataType *refValues, + HostAtomicType *finalValues) + { + correct = true; + cl_uint workSize = LocalMemory() ? CurrentGroupSize() : threadCount; + for (cl_uint workOffset = 0; workOffset < threadCount; + workOffset += workSize) + { + if (workOffset + workSize > threadCount) + // last workgroup (host threads) + workSize = threadCount - workOffset; + for (cl_uint i = 0; i < workSize && workOffset + i < threadCount; + i++) + { + HostAtomicType myValue = finalValues[workOffset + i]; + if (MemoryOrder() == MEMORY_ORDER_SEQ_CST) + { + HostDataType hisValue = refValues[workOffset + i]; + if (myValue == hisValue) + { + // a draw - both threads should reach final value + // 1000000 + if (myValue != 1000000) + { + log_error("ERROR: Invalid reference value #%u (%d " + "instead of 1000000)\n", + workOffset + i, myValue); + correct = false; + return true; + } + } + else + { + // slower thread (in total order of seq_cst operations) + // must know last value written by faster thread + HostAtomicType hisRealValue = + finalValues[workOffset + workSize - 1 - i]; + HostDataType myValueReadByHim = + refValues[workOffset + workSize - 1 - i]; + + // who is the winner? - thread with lower private + // counter value + if (myValue == hisRealValue) // forbidden result - fence + // doesn't work + { + log_error("ERROR: Atomic counter values #%u and " + "#%u are the same (%u)\n", + workOffset + i, + workOffset + workSize - 1 - i, myValue); + log_error( + "ERROR: Both threads have outdated values read " + "from another thread (%u and %u)\n", + hisValue, myValueReadByHim); + correct = false; + return true; + } + if (myValue > hisRealValue) // I'm slower + { + if (hisRealValue != hisValue) + { + log_error("ERROR: Invalid reference value #%u " + "(%d instead of %d)\n", + workOffset + i, hisValue, + hisRealValue); + log_error( + "ERROR: Slower thread #%u should know " + "value written by faster thread #%u\n", + workOffset + i, + workOffset + workSize - 1 - i); + correct = false; + return true; + } + } + else // I'm faster + { + if (myValueReadByHim != myValue) + { + log_error("ERROR: Invalid reference value #%u " + "(%d instead of %d)\n", + workOffset + workSize - 1 - i, + myValueReadByHim, myValue); + log_error( + "ERROR: Slower thread #%u should know " + "value written by faster thread #%u\n", + workOffset + workSize - 1 - i, + workOffset + i); + correct = false; + return true; + } + } + } + } + else + { + if (myValue != NumNonAtomicVariablesPerThread() - 1) + { + log_error("ERROR: Invalid atomic value #%u (%d instead " + "of %d)\n", + workOffset + i, myValue, + NumNonAtomicVariablesPerThread() - 1); + log_error("ERROR: Thread #%u observed invalid values " + "in other thread's variables\n", + workOffset + i, myValue); + correct = false; + return true; + } + } + } + } + return true; + } + private: - int _subCaseId; - struct TestDefinition _subCase; + int _subCaseId; + struct TestDefinition _subCase; }; -int test_atomic_fence_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM) +int test_atomic_fence_generic(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements, + bool useSVM) { - int error = 0; - CBasicTestFence test_int(TYPE_ATOMIC_INT, useSVM); - EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements)); - CBasicTestFence test_uint(TYPE_ATOMIC_UINT, useSVM); - EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements)); - CBasicTestFence test_long(TYPE_ATOMIC_LONG, useSVM); - EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements)); - CBasicTestFence test_ulong(TYPE_ATOMIC_ULONG, useSVM); - EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements)); - if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4) - { - CBasicTestFence test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); - EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestFence test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); - EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestFence test_size_t(TYPE_ATOMIC_SIZE_T, useSVM); - EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestFence test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); - EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); - } - else - { - CBasicTestFence test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); - EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestFence test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); - EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestFence test_size_t(TYPE_ATOMIC_SIZE_T, useSVM); - EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestFence test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); - EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); - } - return error; + int error = 0; + CBasicTestFence test_int(TYPE_ATOMIC_INT, + useSVM); + EXECUTE_TEST(error, + test_int.Execute(deviceID, context, queue, num_elements)); + CBasicTestFence test_uint(TYPE_ATOMIC_UINT, + useSVM); + EXECUTE_TEST(error, + test_uint.Execute(deviceID, context, queue, num_elements)); + CBasicTestFence test_long(TYPE_ATOMIC_LONG, + useSVM); + EXECUTE_TEST(error, + test_long.Execute(deviceID, context, queue, num_elements)); + CBasicTestFence test_ulong(TYPE_ATOMIC_ULONG, + useSVM); + EXECUTE_TEST(error, + test_ulong.Execute(deviceID, context, queue, num_elements)); + if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4) + { + CBasicTestFence test_intptr_t( + TYPE_ATOMIC_INTPTR_T, useSVM); + EXECUTE_TEST( + error, + test_intptr_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestFence + test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); + EXECUTE_TEST( + error, + test_uintptr_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestFence test_size_t( + TYPE_ATOMIC_SIZE_T, useSVM); + EXECUTE_TEST( + error, test_size_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestFence + test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); + EXECUTE_TEST( + error, + test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); + } + else + { + CBasicTestFence test_intptr_t( + TYPE_ATOMIC_INTPTR_T, useSVM); + EXECUTE_TEST( + error, + test_intptr_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestFence + test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); + EXECUTE_TEST( + error, + test_uintptr_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestFence test_size_t( + TYPE_ATOMIC_SIZE_T, useSVM); + EXECUTE_TEST( + error, test_size_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestFence + test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); + EXECUTE_TEST( + error, + test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); + } + return error; } -int test_atomic_fence(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_atomic_fence(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { - return test_atomic_fence_generic(deviceID, context, queue, num_elements, false); + return test_atomic_fence_generic(deviceID, context, queue, num_elements, + false); } -int test_svm_atomic_fence(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_svm_atomic_fence(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { - return test_atomic_fence_generic(deviceID, context, queue, num_elements, true); + return test_atomic_fence_generic(deviceID, context, queue, num_elements, + true); } -- cgit v1.2.3 From fec9d9a238dd38af18c7d606ef0340786917053e Mon Sep 17 00:00:00 2001 From: Stuart Brady Date: Wed, 7 Sep 2022 17:28:29 +0100 Subject: [NFC] Fix whitespace issues in run_conformance.py (#1491) Fix whitespace issues and remove superfluous parens in the run_conformance.py script. This addresses 288 out of the 415 issues reported by pylint. Signed-off-by: Stuart Brady --- test_conformance/run_conformance.py | 584 ++++++++++++++++++------------------ 1 file changed, 296 insertions(+), 288 deletions(-) diff --git a/test_conformance/run_conformance.py b/test_conformance/run_conformance.py index ea7f6775..52c91697 100755 --- a/test_conformance/run_conformance.py +++ b/test_conformance/run_conformance.py @@ -8,295 +8,303 @@ #// #******************************************************************/ -import os, re, sys, subprocess, time, commands, tempfile, math, string +import os +import re +import sys +import subprocess +import time +import commands +import tempfile +import math +import string DEBUG = 0 -log_file_name = "opencl_conformance_results_" + time.strftime("%Y-%m-%d_%H-%M", time.localtime())+ ".log" +log_file_name = "opencl_conformance_results_" + time.strftime("%Y-%m-%d_%H-%M", time.localtime()) + ".log" process_pid = 0 # The amount of time between printing a "." (if no output from test) or ":" (if output) # to the screen while the tests are running. -seconds_between_status_updates = 60*60*24*7 # effectively never +seconds_between_status_updates = 60 * 60 * 24 * 7 # effectively never + # Help info -def write_help_info() : - print("run_conformance.py test_list [CL_DEVICE_TYPE(s) to test] [partial-test-names, ...] [log=path/to/log/file/]") - print(" test_list - the .csv file containing the test names and commands to run the tests.") - print(" [partial-test-names, ...] - optional partial strings to select a subset of the tests to run.") - print(" [CL_DEVICE_TYPE(s) to test] - list of CL device types to test, default is CL_DEVICE_TYPE_DEFAULT.") - print(" [log=path/to/log/file/] - provide a path for the test log file, default is in the current directory.") - print(" (Note: spaces are not allowed in the log file path.") +def write_help_info(): + print("run_conformance.py test_list [CL_DEVICE_TYPE(s) to test] [partial-test-names, ...] [log=path/to/log/file/]") + print(" test_list - the .csv file containing the test names and commands to run the tests.") + print(" [partial-test-names, ...] - optional partial strings to select a subset of the tests to run.") + print(" [CL_DEVICE_TYPE(s) to test] - list of CL device types to test, default is CL_DEVICE_TYPE_DEFAULT.") + print(" [log=path/to/log/file/] - provide a path for the test log file, default is in the current directory.") + print(" (Note: spaces are not allowed in the log file path.") # Get the time formatted nicely -def get_time() : - return time.strftime("%d-%b %H:%M:%S", time.localtime()) +def get_time(): + return time.strftime("%d-%b %H:%M:%S", time.localtime()) + # Write text to the screen and the log file -def write_screen_log(text) : - global log_file - print(text) - log_file.write(text+"\n") +def write_screen_log(text): + global log_file + print(text) + log_file.write(text + "\n") + # Load the tests from a csv formated file of the form name,command def get_tests(filename, devices_to_test): - tests = [] - if (os.path.exists(filename) == False): - print("FAILED: test_list \"" + filename + "\" does not exist.") - print("") - write_help_info() - sys.exit(-1) - file = open(filename, 'r') - for line in file.readlines(): - comment = re.search("^#.*", line) - if (comment): - continue - device_specific_match = re.search("^\s*(.+?)\s*,\s*(.+?)\s*,\s*(.+?)\s*$", line) - if (device_specific_match): - if (device_specific_match.group(1) in devices_to_test): - test_path = string.replace(device_specific_match.group(3), '/', os.sep) - test_name = string.replace(device_specific_match.group(2), '/', os.sep) - tests.append((test_name, test_path)) - else: - print("Skipping " + device_specific_match.group(2) + " because " + device_specific_match.group(1) + " is not in the list of devices to test.") - continue - match = re.search("^\s*(.+?)\s*,\s*(.+?)\s*$", line) - if (match): - test_path = string.replace(match.group(2), '/', os.sep) - test_name = string.replace(match.group(1), '/', os.sep) - tests.append((test_name, test_path)) - return tests + tests = [] + if os.path.exists(filename) == False: + print("FAILED: test_list \"" + filename + "\" does not exist.") + print("") + write_help_info() + sys.exit(-1) + file = open(filename, 'r') + for line in file.readlines(): + comment = re.search("^#.*", line) + if comment: + continue + device_specific_match = re.search("^\s*(.+?)\s*,\s*(.+?)\s*,\s*(.+?)\s*$", line) + if device_specific_match: + if device_specific_match.group(1) in devices_to_test: + test_path = string.replace(device_specific_match.group(3), '/', os.sep) + test_name = string.replace(device_specific_match.group(2), '/', os.sep) + tests.append((test_name, test_path)) + else: + print("Skipping " + device_specific_match.group(2) + " because " + device_specific_match.group(1) + " is not in the list of devices to test.") + continue + match = re.search("^\s*(.+?)\s*,\s*(.+?)\s*$", line) + if match: + test_path = string.replace(match.group(2), '/', os.sep) + test_name = string.replace(match.group(1), '/', os.sep) + tests.append((test_name, test_path)) + return tests def run_test_checking_output(current_directory, test_dir, log_file): - global process_pid, seconds_between_status_updates - failures_this_run = 0 - start_time = time.time() - # Create a temporary file for capturing the output from the test - (output_fd, output_name) = tempfile.mkstemp() - if ( not os.path.exists(output_name)) : - write_screen_log("\n ==> ERROR: could not create temporary file %s ." % output_name) - os.close(output_fd) - return -1 - # Execute the test - program_to_run = test_dir_without_args = test_dir.split(None, 1)[0] - if ( os.sep == '\\' ) : program_to_run += ".exe" - if (os.path.exists(current_directory + os.sep + program_to_run)) : - os.chdir(os.path.dirname(current_directory+os.sep+test_dir_without_args) ) - try: - if (DEBUG): p = subprocess.Popen("", stderr=subprocess.STDOUT, stdout=subprocess.PIPE, shell=True) - else : p = subprocess.Popen(current_directory + os.sep + test_dir, stderr=output_fd, stdout=output_fd, shell=True) - except OSError: - write_screen_log("\n ==> ERROR: failed to execute test. Failing test. : " + str(OSError)) - os.close(output_fd) - return -1 - else: - write_screen_log("\n ==> ERROR: test file (" + current_directory + os.sep + program_to_run +") does not exist. Failing test.") - os.close(output_fd) - return -1 - # Set the global pid so we can kill it if this is aborted - process_pid = p.pid - # Read one character at a time from the temporary output file while the process is running. - # When we get an end-of-line, look for errors and write the results to the log file. - # This allows us to process the file as it is being produced. - # Keep track of the state for reading - # Whether we are done, if we have more to read, and where in the file we last read - done = False - more_to_read = True - pointer = 0 - pointer_at_last_user_update = 0 - output_this_run = False - try: - read_output = open(output_name, 'r') - except IOError: - write_screen_log("\n ==> ERROR: could not open output file from test.") - os.close(output_fd) - return -1 - line = "" - while (not done or more_to_read): - os.fsync(output_fd) - # Determine if we should display some output - elapsed_time = (time.time() - start_time) - if (elapsed_time > seconds_between_status_updates): - start_time = time.time() - # If we've received output from the test since the last update, display a # - if (pointer != pointer_at_last_user_update): - sys.stdout.write(":") - else: - sys.stdout.write(".") - pointer_at_last_user_update = pointer - sys.stdout.flush() - # Check if we're done - p.poll() - if (not done and p.returncode != None): - if (p.returncode < 0): - if (not output_this_run): - print "" - output_this_run = True - write_screen_log(" ==> ERROR: test killed/crashed: " + str(p.returncode)+ ".") - done = True - # Try reading + global process_pid, seconds_between_status_updates + failures_this_run = 0 + start_time = time.time() + # Create a temporary file for capturing the output from the test + (output_fd, output_name) = tempfile.mkstemp() + if not os.path.exists(output_name): + write_screen_log("\n ==> ERROR: could not create temporary file %s ." % output_name) + os.close(output_fd) + return -1 + # Execute the test + program_to_run = test_dir_without_args = test_dir.split(None, 1)[0] + if os.sep == '\\': + program_to_run += ".exe" + if os.path.exists(current_directory + os.sep + program_to_run): + os.chdir(os.path.dirname(current_directory + os.sep + test_dir_without_args)) + try: + if DEBUG: p = subprocess.Popen("", stderr=subprocess.STDOUT, stdout=subprocess.PIPE, shell=True) + else: p = subprocess.Popen(current_directory + os.sep + test_dir, stderr=output_fd, stdout=output_fd, shell=True) + except OSError: + write_screen_log("\n ==> ERROR: failed to execute test. Failing test. : " + str(OSError)) + os.close(output_fd) + return -1 + else: + write_screen_log("\n ==> ERROR: test file (" + current_directory + os.sep + program_to_run + ") does not exist. Failing test.") + os.close(output_fd) + return -1 + # Set the global pid so we can kill it if this is aborted + process_pid = p.pid + # Read one character at a time from the temporary output file while the process is running. + # When we get an end-of-line, look for errors and write the results to the log file. + # This allows us to process the file as it is being produced. + # Keep track of the state for reading + # Whether we are done, if we have more to read, and where in the file we last read + done = False + more_to_read = True + pointer = 0 + pointer_at_last_user_update = 0 + output_this_run = False try: - read_output.seek(pointer) - char_read = read_output.read(1) - except IOError: - time.sleep(1) - continue - # If we got a full line then process it - if (char_read == "\n"): - # Look for failures and report them as such - match = re.search(".*(FAILED|ERROR).*", line) - if (match): - if (not output_this_run): - print "" - output_this_run = True - print(" ==> " + line.replace('\n','')) - match = re.search(".*FAILED.*", line) - if (match): - failures_this_run = failures_this_run + 1 - match = re.search(".*(PASSED).*", line) - if (match): - if (not output_this_run): - print "" - output_this_run = True - print(" " + line.replace('\n','')) - # Write it to the log - log_file.write(" " + line +"\n") - log_file.flush() - line = "" - pointer = pointer + 1 - # If we are at the end of the file, then re-open it to get new data - elif (char_read == ""): - more_to_read = False - read_output.close() - time.sleep(1) - try: - os.fsync(output_fd) read_output = open(output_name, 'r') - # See if there is more to read. This happens if the process ends and we have data left. - read_output.seek(pointer) - if (read_output.read(1) != ""): - more_to_read = True - except IOError: - write_screen_log("\n ==> ERROR: could not reopen output file from test.") + except IOError: + write_screen_log("\n ==> ERROR: could not open output file from test.") + os.close(output_fd) return -1 - done = True - else: - line = line + char_read - pointer = pointer + 1 - # Now we are done, so write out any remaining data in the file: - # This should only happen if the process exited with an error. - os.fsync(output_fd) - while (read_output.read(1) != ""): - log_file.write(read_output.read(1)) - # Return the total number of failures - if (p.returncode == 0 and failures_this_run > 0): - write_screen_log("\n ==> ERROR: Test returned 0, but number of FAILED lines reported is " + str(failures_this_run) +".") - return failures_this_run - return p.returncode - - -def run_tests(tests) : - global curent_directory - global process_pid - # Run the tests - failures = 0 - previous_test = None - test_number = 1 - for test in tests: - # Print the name of the test we're running and the time - (test_name, test_dir) = test - if (test_dir != previous_test): - print("========== " + test_dir) - log_file.write("========================================================================================\n") - log_file.write("========================================================================================\n") - log_file.write("(" + get_time() + ") Running Tests: " + test_dir +"\n") - log_file.write("========================================================================================\n") - log_file.write("========================================================================================\n") - previous_test = test_dir - print("("+get_time()+") BEGIN " + test_name.ljust(40) +": "), - log_file.write(" ----------------------------------------------------------------------------------------\n") - log_file.write(" (" + get_time() + ") Running Sub Test: " + test_name + "\n") - log_file.write(" ----------------------------------------------------------------------------------------\n") - log_file.flush() - sys.stdout.flush() - - # Run the test - result = 0 - start_time = time.time() - try: - process_pid = 0 - result = run_test_checking_output(current_directory, test_dir, log_file) - except KeyboardInterrupt: - # Catch an interrupt from the user - write_screen_log("\nFAILED: Execution interrupted. Killing test process, but not aborting full test run.") - os.kill(process_pid, 9) - answer = raw_input("Abort all tests? (y/n)") - if (answer.find("y") != -1): - write_screen_log("\nUser chose to abort all tests.") - log_file.close() - sys.exit(-1) - else: - write_screen_log("\nUser chose to continue with other tests. Reporting this test as failed.") - result = 1 - run_time = (time.time() - start_time) - - # Move print the finish status - if (result == 0): - print("("+get_time()+") PASSED " + test_name.ljust(40) +": (" + str(int(run_time)).rjust(3) + "s, test " + str(test_number).rjust(3) + os.sep + str(len(tests)) +")"), - else: - print("("+get_time()+") FAILED " + test_name.ljust(40) +": (" + str(int(run_time)).rjust(3) + "s, test " + str(test_number).rjust(3) + os.sep + str(len(tests)) +")"), - - test_number = test_number + 1 - log_file.write(" ----------------------------------------------------------------------------------------\n") - log_file.flush() - - print("") - if (result != 0): - log_file.write(" *******************************************************************************************\n") - log_file.write(" * ("+get_time()+") Test " + test_name + " ==> FAILED: " + str(result)+"\n") - log_file.write(" *******************************************************************************************\n") - failures = failures + 1 - else: - log_file.write(" ("+get_time()+") Test " + test_name +" passed in " + str(run_time) + "s\n") - - log_file.write(" ----------------------------------------------------------------------------------------\n") - log_file.write("\n") - return failures - - - + line = "" + while not done or more_to_read: + os.fsync(output_fd) + # Determine if we should display some output + elapsed_time = (time.time() - start_time) + if elapsed_time > seconds_between_status_updates: + start_time = time.time() + # If we've received output from the test since the last update, display a # + if pointer != pointer_at_last_user_update: + sys.stdout.write(":") + else: + sys.stdout.write(".") + pointer_at_last_user_update = pointer + sys.stdout.flush() + # Check if we're done + p.poll() + if not done and p.returncode != None: + if p.returncode < 0: + if not output_this_run: + print "" + output_this_run = True + write_screen_log(" ==> ERROR: test killed/crashed: " + str(p.returncode) + ".") + done = True + # Try reading + try: + read_output.seek(pointer) + char_read = read_output.read(1) + except IOError: + time.sleep(1) + continue + # If we got a full line then process it + if char_read == "\n": + # Look for failures and report them as such + match = re.search(".*(FAILED|ERROR).*", line) + if match: + if not output_this_run: + print "" + output_this_run = True + print(" ==> " + line.replace('\n', '')) + match = re.search(".*FAILED.*", line) + if match: + failures_this_run = failures_this_run + 1 + match = re.search(".*(PASSED).*", line) + if match: + if not output_this_run: + print "" + output_this_run = True + print(" " + line.replace('\n', '')) + # Write it to the log + log_file.write(" " + line + "\n") + log_file.flush() + line = "" + pointer = pointer + 1 + # If we are at the end of the file, then re-open it to get new data + elif char_read == "": + more_to_read = False + read_output.close() + time.sleep(1) + try: + os.fsync(output_fd) + read_output = open(output_name, 'r') + # See if there is more to read. This happens if the process ends and we have data left. + read_output.seek(pointer) + if read_output.read(1) != "": + more_to_read = True + except IOError: + write_screen_log("\n ==> ERROR: could not reopen output file from test.") + return -1 + done = True + else: + line = line + char_read + pointer = pointer + 1 + # Now we are done, so write out any remaining data in the file: + # This should only happen if the process exited with an error. + os.fsync(output_fd) + while read_output.read(1) != "": + log_file.write(read_output.read(1)) + # Return the total number of failures + if (p.returncode == 0 and failures_this_run > 0): + write_screen_log("\n ==> ERROR: Test returned 0, but number of FAILED lines reported is " + str(failures_this_run) + ".") + return failures_this_run + return p.returncode + + +def run_tests(tests): + global curent_directory + global process_pid + # Run the tests + failures = 0 + previous_test = None + test_number = 1 + for test in tests: + # Print the name of the test we're running and the time + (test_name, test_dir) = test + if test_dir != previous_test: + print("========== " + test_dir) + log_file.write("========================================================================================\n") + log_file.write("========================================================================================\n") + log_file.write("(" + get_time() + ") Running Tests: " + test_dir + "\n") + log_file.write("========================================================================================\n") + log_file.write("========================================================================================\n") + previous_test = test_dir + print("(" + get_time() + ") BEGIN " + test_name.ljust(40) + ": "), + log_file.write(" ----------------------------------------------------------------------------------------\n") + log_file.write(" (" + get_time() + ") Running Sub Test: " + test_name + "\n") + log_file.write(" ----------------------------------------------------------------------------------------\n") + log_file.flush() + sys.stdout.flush() + + # Run the test + result = 0 + start_time = time.time() + try: + process_pid = 0 + result = run_test_checking_output(current_directory, test_dir, log_file) + except KeyboardInterrupt: + # Catch an interrupt from the user + write_screen_log("\nFAILED: Execution interrupted. Killing test process, but not aborting full test run.") + os.kill(process_pid, 9) + answer = raw_input("Abort all tests? (y/n)") + if answer.find("y") != -1: + write_screen_log("\nUser chose to abort all tests.") + log_file.close() + sys.exit(-1) + else: + write_screen_log("\nUser chose to continue with other tests. Reporting this test as failed.") + result = 1 + run_time = (time.time() - start_time) + + # Move print the finish status + if result == 0: + print("(" + get_time() + ") PASSED " + test_name.ljust(40) + ": (" + str(int(run_time)).rjust(3) + "s, test " + str(test_number).rjust(3) + os.sep + str(len(tests)) + ")"), + else: + print("(" + get_time() + ") FAILED " + test_name.ljust(40) + ": (" + str(int(run_time)).rjust(3) + "s, test " + str(test_number).rjust(3) + os.sep + str(len(tests)) + ")"), + + test_number = test_number + 1 + log_file.write(" ----------------------------------------------------------------------------------------\n") + log_file.flush() + + print("") + if result != 0: + log_file.write(" *******************************************************************************************\n") + log_file.write(" * (" + get_time() + ") Test " + test_name + " ==> FAILED: " + str(result) + "\n") + log_file.write(" *******************************************************************************************\n") + failures = failures + 1 + else: + log_file.write(" (" + get_time() + ") Test " + test_name + " passed in " + str(run_time) + "s\n") + + log_file.write(" ----------------------------------------------------------------------------------------\n") + log_file.write("\n") + return failures # ######################## # Begin OpenCL conformance run script # ######################## -if (len(sys.argv) < 2): - write_help_info() - sys.exit(-1) - +if len(sys.argv) < 2: + write_help_info() + sys.exit(-1) current_directory = os.getcwd() # Open the log file for arg in sys.argv: - match = re.search("log=(\S+)", arg) - if (match): - log_file_name = match.group(1).rstrip('/') + os.sep + log_file_name + match = re.search("log=(\S+)", arg) + if match: + log_file_name = match.group(1).rstrip('/') + os.sep + log_file_name try: - log_file = open(log_file_name, "w") + log_file = open(log_file_name, "w") except IOError: - print "Could not open log file " + log_file_name + print "Could not open log file " + log_file_name # Determine which devices to test device_types = ["CL_DEVICE_TYPE_DEFAULT", "CL_DEVICE_TYPE_CPU", "CL_DEVICE_TYPE_GPU", "CL_DEVICE_TYPE_ACCELERATOR", "CL_DEVICE_TYPE_ALL"] devices_to_test = [] for device in device_types: - if device in sys.argv[2:]: - devices_to_test.append(device) -if (len(devices_to_test) == 0): - devices_to_test = ["CL_DEVICE_TYPE_DEFAULT"] + if device in sys.argv[2:]: + devices_to_test.append(device) +if len(devices_to_test) == 0: + devices_to_test = ["CL_DEVICE_TYPE_DEFAULT"] write_screen_log("Testing on: " + str(devices_to_test)) # Get the tests @@ -306,52 +314,52 @@ tests = get_tests(sys.argv[1], devices_to_test) tests_to_use = [] num_of_patterns_to_match = 0 for arg in sys.argv[2:]: - if arg in device_types: - continue - if re.search("log=(\S+)", arg): - continue - num_of_patterns_to_match = num_of_patterns_to_match + 1 - found_it = False - for test in tests: - (test_name, test_dir) = test - if (test_name.find(arg) != -1 or test_dir.find(arg) != -1): - found_it = True - if (test not in tests_to_use): - tests_to_use.append(test) - if (found_it == False): - print("Failed to find a test matching " + arg) -if (len(tests_to_use) == 0): - if (num_of_patterns_to_match > 0): - print("FAILED: Failed to find any tests matching the given command-line options.") - print("") - write_help_info() - sys.exit(-1) + if arg in device_types: + continue + if re.search("log=(\S+)", arg): + continue + num_of_patterns_to_match = num_of_patterns_to_match + 1 + found_it = False + for test in tests: + (test_name, test_dir) = test + if (test_name.find(arg) != -1 or test_dir.find(arg) != -1): + found_it = True + if test not in tests_to_use: + tests_to_use.append(test) + if found_it == False: + print("Failed to find a test matching " + arg) +if len(tests_to_use) == 0: + if num_of_patterns_to_match > 0: + print("FAILED: Failed to find any tests matching the given command-line options.") + print("") + write_help_info() + sys.exit(-1) else: - tests = tests_to_use[:] + tests = tests_to_use[:] write_screen_log("Test execution arguments: " + str(sys.argv)) -write_screen_log("Logging to file " + log_file_name +".") +write_screen_log("Logging to file " + log_file_name + ".") write_screen_log("Loaded tests from " + sys.argv[1] + ", total of " + str(len(tests)) + " tests selected to run:") for (test_name, test_command) in tests: - write_screen_log(test_name.ljust(50) + " (" + test_command +")") + write_screen_log(test_name.ljust(50) + " (" + test_command + ")") # Run the tests total_failures = 0 for device_to_test in devices_to_test: - os.environ['CL_DEVICE_TYPE'] = device_to_test - write_screen_log("========================================================================================") - write_screen_log("========================================================================================") - write_screen_log(("Setting CL_DEVICE_TYPE to " + device_to_test).center(90)) - write_screen_log("========================================================================================") - write_screen_log("========================================================================================") - failures = run_tests(tests) - write_screen_log("========================================================================================") - if (failures == 0): - write_screen_log(">> TEST on " + device_to_test + " PASSED") - else: - write_screen_log(">> TEST on " + device_to_test + " FAILED (" + str(failures) + " FAILURES)") - write_screen_log("========================================================================================") - total_failures = total_failures + failures - -write_screen_log("("+get_time()+") Testing complete. " + str(total_failures) + " failures for " + str(len(tests)) + " tests.") + os.environ['CL_DEVICE_TYPE'] = device_to_test + write_screen_log("========================================================================================") + write_screen_log("========================================================================================") + write_screen_log(("Setting CL_DEVICE_TYPE to " + device_to_test).center(90)) + write_screen_log("========================================================================================") + write_screen_log("========================================================================================") + failures = run_tests(tests) + write_screen_log("========================================================================================") + if failures == 0: + write_screen_log(">> TEST on " + device_to_test + " PASSED") + else: + write_screen_log(">> TEST on " + device_to_test + " FAILED (" + str(failures) + " FAILURES)") + write_screen_log("========================================================================================") + total_failures = total_failures + failures + +write_screen_log("(" + get_time() + ") Testing complete. " + str(total_failures) + " failures for " + str(len(tests)) + " tests.") log_file.close() -- cgit v1.2.3 From 6554c4901825381ee0d4d8ba199a66afff941a1a Mon Sep 17 00:00:00 2001 From: Sven van Haastregt Date: Thu, 8 Sep 2022 12:54:36 +0100 Subject: [NFCI] Remove unused variables and enable -Wunused-variable (#1483) Remove unused variables throughout the code base and enable the `-Wunused-variable` warning flag globally to prevent new unused variable issues being introduced in the future. This is mostly a non-functional change, with one exception: - In `test_conformance/api/test_kernel_arg_info.cpp`, an error check of the clGetDeviceInfo return value was added. Signed-off-by: Sven van Haastregt --- CMakeLists.txt | 1 + test_common/gl/helpers.cpp | 1 - test_conformance/SVM/test_byte_granularity.cpp | 1 - test_conformance/SVM/test_migrate.cpp | 3 --- test_conformance/api/test_api_min_max.cpp | 3 --- test_conformance/api/test_kernel_arg_info.cpp | 5 +---- test_conformance/api/test_mem_object_info.cpp | 2 -- test_conformance/api/test_null_buffer_arg.cpp | 1 - test_conformance/api/test_queries.cpp | 20 -------------------- test_conformance/api/test_sub_group_dispatch.cpp | 4 +--- test_conformance/basic/test_fpmath_float.cpp | 2 -- test_conformance/basic/test_hiloeo.cpp | 2 -- test_conformance/basic/test_hostptr.cpp | 2 -- test_conformance/basic/test_preprocessors.cpp | 2 +- test_conformance/basic/test_progvar.cpp | 2 -- test_conformance/basic/test_queue_priority.cpp | 6 ------ test_conformance/basic/test_readimage3d.cpp | 2 +- test_conformance/buffers/test_buffer_migrate.cpp | 2 +- test_conformance/buffers/test_image_migrate.cpp | 1 - .../test_compiler_defines_for_extensions.cpp | 2 -- test_conformance/computeinfo/main.cpp | 6 ------ test_conformance/conversions/fplib.cpp | 4 ---- test_conformance/conversions/test_conversions.cpp | 2 -- test_conformance/events/test_callbacks.cpp | 1 - test_conformance/events/test_events.cpp | 4 ---- test_conformance/gl/common.h | 7 ++++++- test_conformance/gl/test_image_methods.cpp | 2 -- test_conformance/gl/test_images_write_common.cpp | 4 ---- test_conformance/half/Test_vStoreHalf.cpp | 2 +- .../images/clReadWriteImage/test_read_1D.cpp | 1 - .../images/clReadWriteImage/test_read_1D_array.cpp | 1 - .../images/clReadWriteImage/test_read_2D.cpp | 1 - .../images/clReadWriteImage/test_read_2D_array.cpp | 3 +-- .../images/clReadWriteImage/test_read_3D.cpp | 1 - .../images/kernel_read_write/CMakeLists.txt | 10 ++++++++++ test_conformance/math_brute_force/reference_math.cpp | 4 ++-- test_conformance/pipes/test_pipe_limits.cpp | 4 ++-- test_conformance/pipes/test_pipe_read_write.cpp | 1 - test_conformance/printf/test_printf.cpp | 4 +--- test_conformance/printf/util_printf.cpp | 2 -- test_conformance/select/test_select.cpp | 2 -- test_conformance/spir/run_services.cpp | 1 - test_conformance/spirv_new/main.cpp | 1 - .../test_cl_khr_spirv_no_integer_wrap_decoration.cpp | 1 - test_conformance/spirv_new/test_op_fmath.cpp | 3 --- test_conformance/spirv_new/test_op_function.cpp | 1 - test_conformance/spirv_new/test_op_negate.cpp | 1 - test_conformance/spirv_new/test_op_opaque.cpp | 1 - .../spirv_new/test_op_vector_times_scalar.cpp | 2 -- 49 files changed, 29 insertions(+), 112 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index fe56d0fa..b7c86ba1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -89,6 +89,7 @@ endmacro(add_cxx_flag_if_supported) if(CMAKE_COMPILER_IS_GNUCC OR "${CMAKE_CXX_COMPILER_ID}" MATCHES "(Apple)?Clang") add_cxx_flag_if_supported(-Wmisleading-indentation) + add_cxx_flag_if_supported(-Wunused-variable) add_cxx_flag_if_supported(-Wno-narrowing) add_cxx_flag_if_supported(-Wno-format) add_cxx_flag_if_supported(-Werror) diff --git a/test_common/gl/helpers.cpp b/test_common/gl/helpers.cpp index def78d75..b9f95a94 100644 --- a/test_common/gl/helpers.cpp +++ b/test_common/gl/helpers.cpp @@ -1381,7 +1381,6 @@ void * CreateGLTexture2DArrayMultisample(size_t width, size_t height, //calculating colors double color_delta = 1.0 / (total_layers * samples); - double color = color_delta; if (attachment != GL_DEPTH_ATTACHMENT && attachment != GL_DEPTH_STENCIL_ATTACHMENT) { glDisable(GL_DEPTH_TEST); diff --git a/test_conformance/SVM/test_byte_granularity.cpp b/test_conformance/SVM/test_byte_granularity.cpp index 403528b9..6dbb3649 100644 --- a/test_conformance/SVM/test_byte_granularity.cpp +++ b/test_conformance/SVM/test_byte_granularity.cpp @@ -58,7 +58,6 @@ int test_svm_byte_granularity(cl_device_id deviceID, cl_context c, cl_command_qu cl_uint num_devices = 0; cl_int err = CL_SUCCESS; - cl_int rval = CL_SUCCESS; err = create_cl_objects(deviceID, &byte_manipulation_kernels[0], &context, &program, &queues[0], &num_devices, CL_DEVICE_SVM_FINE_GRAIN_BUFFER); if(err == 1) return 0; // no devices capable of requested SVM level, so don't execute but count test as passing. diff --git a/test_conformance/SVM/test_migrate.cpp b/test_conformance/SVM/test_migrate.cpp index 2a1ce051..f624bcd9 100644 --- a/test_conformance/SVM/test_migrate.cpp +++ b/test_conformance/SVM/test_migrate.cpp @@ -78,9 +78,6 @@ int test_svm_migrate(cl_device_id deviceID, cl_context c, cl_command_queue queue cl_uint amem[GLOBAL_SIZE]; cl_uint bmem[GLOBAL_SIZE]; cl_uint cmem[GLOBAL_SIZE]; - cl_uint ramem[GLOBAL_SIZE]; - cl_uint rbmem[GLOBAL_SIZE]; - cl_uint rcmem[GLOBAL_SIZE]; cl_event evs[20]; const size_t global_size = GLOBAL_SIZE; diff --git a/test_conformance/api/test_api_min_max.cpp b/test_conformance/api/test_api_min_max.cpp index 9e08b16d..086008d7 100644 --- a/test_conformance/api/test_api_min_max.cpp +++ b/test_conformance/api/test_api_min_max.cpp @@ -665,8 +665,6 @@ int test_min_max_image_2d_width(cl_device_id deviceID, cl_context context, cl_image_format image_format_desc; cl_ulong maxAllocSize; cl_uint minRequiredDimension; - size_t length; - PASSIVE_REQUIRE_IMAGE_SUPPORT(deviceID) @@ -746,7 +744,6 @@ int test_min_max_image_2d_height(cl_device_id deviceID, cl_context context, cl_image_format image_format_desc; cl_ulong maxAllocSize; cl_uint minRequiredDimension; - size_t length; PASSIVE_REQUIRE_IMAGE_SUPPORT(deviceID) diff --git a/test_conformance/api/test_kernel_arg_info.cpp b/test_conformance/api/test_kernel_arg_info.cpp index 28825f10..d0681dfd 100644 --- a/test_conformance/api/test_kernel_arg_info.cpp +++ b/test_conformance/api/test_kernel_arg_info.cpp @@ -22,11 +22,8 @@ #define MINIMUM_OPENCL_PIPE_VERSION Version(2, 0) -static constexpr size_t CL_VERSION_LENGTH = 128; static constexpr size_t KERNEL_ARGUMENT_LENGTH = 128; static constexpr char KERNEL_ARGUMENT_NAME[] = "argument"; -static constexpr size_t KERNEL_ARGUMENT_NAME_LENGTH = - sizeof(KERNEL_ARGUMENT_NAME) + 1; static constexpr int SINGLE_KERNEL_ARG_NUMBER = 0; static constexpr int MAX_NUMBER_OF_KERNEL_ARGS = 128; @@ -183,7 +180,6 @@ static std::string generate_kernel(const std::vector& all_args, ret += "kernel void get_kernel_arg_info(\n"; for (int i = 0; i < all_args.size(); ++i) { - const KernelArgInfo& arg = all_args[i]; ret += generate_argument(all_args[i]); if (i == all_args.size() - 1) { @@ -542,6 +538,7 @@ size_t get_param_size(const std::string& arg_type, cl_device_id deviceID, cl_int err = clGetDeviceInfo(deviceID, CL_DEVICE_ADDRESS_BITS, sizeof(device_address_bits), &device_address_bits, NULL); + test_error_ret(err, "clGetDeviceInfo", 0); return (device_address_bits / 8); } diff --git a/test_conformance/api/test_mem_object_info.cpp b/test_conformance/api/test_mem_object_info.cpp index 2afe0437..8dc8f6cf 100644 --- a/test_conformance/api/test_mem_object_info.cpp +++ b/test_conformance/api/test_mem_object_info.cpp @@ -363,8 +363,6 @@ int test_get_imageObject_info( cl_mem * image, cl_mem_flags objectFlags, cl_imag cl_mem_flags flags; cl_uint mapCount; cl_uint refCount; - size_t rowPitchMultiplier; - size_t slicePitchMultiplier; cl_context otherCtx; size_t offset; size_t sz; diff --git a/test_conformance/api/test_null_buffer_arg.cpp b/test_conformance/api/test_null_buffer_arg.cpp index d412d4ea..75bdd479 100644 --- a/test_conformance/api/test_null_buffer_arg.cpp +++ b/test_conformance/api/test_null_buffer_arg.cpp @@ -149,7 +149,6 @@ int test_null_buffer_arg(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements) { unsigned int test_success = 0; - unsigned int i; unsigned int buffer_size; cl_int status; cl_program program; diff --git a/test_conformance/api/test_queries.cpp b/test_conformance/api/test_queries.cpp index 30b5706f..a7703a76 100644 --- a/test_conformance/api/test_queries.cpp +++ b/test_conformance/api/test_queries.cpp @@ -526,26 +526,6 @@ void CL_CALLBACK mem_obj_destructor_callback( cl_mem, void *data ) free( data ); } -// All possible combinations of valid cl_mem_flags. -static cl_mem_flags all_flags[16] = { - 0, - CL_MEM_READ_WRITE, - CL_MEM_READ_ONLY, - CL_MEM_WRITE_ONLY, - CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, - CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, - CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR, - CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, - CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR, - CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR, - CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR, - CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR, - CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR, - CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, - CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, - CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR, -}; - #define TEST_DEVICE_PARAM( device, paramName, val, name, type, cast ) \ error = clGetDeviceInfo( device, paramName, sizeof( val ), &val, &size ); \ test_error( error, "Unable to get device " name ); \ diff --git a/test_conformance/api/test_sub_group_dispatch.cpp b/test_conformance/api/test_sub_group_dispatch.cpp index 01d0ffa3..61d9a524 100644 --- a/test_conformance/api/test_sub_group_dispatch.cpp +++ b/test_conformance/api/test_sub_group_dispatch.cpp @@ -56,11 +56,9 @@ cl_int get_sub_group_num(cl_command_queue queue, cl_kernel kernel, clMemWrapper& int test_sub_group_dispatch(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { - static const size_t gsize0 = 80; - int i, error; + int error; size_t realSize; size_t kernel_max_subgroup_size, kernel_subgroup_count; - size_t global[] = {1,1,1}; size_t max_local; cl_platform_id platform; diff --git a/test_conformance/basic/test_fpmath_float.cpp b/test_conformance/basic/test_fpmath_float.cpp index 6e5deb4b..60d509b0 100644 --- a/test_conformance/basic/test_fpmath_float.cpp +++ b/test_conformance/basic/test_fpmath_float.cpp @@ -49,8 +49,6 @@ static const char *fpmul_kernel_code = "}\n"; -static const float MAX_ERR = 1e-5f; - static int verify_fpadd(float *inptrA, float *inptrB, float *outptr, int n) { diff --git a/test_conformance/basic/test_hiloeo.cpp b/test_conformance/basic/test_hiloeo.cpp index 4cdf2ac7..3470ad00 100644 --- a/test_conformance/basic/test_hiloeo.cpp +++ b/test_conformance/basic/test_hiloeo.cpp @@ -43,8 +43,6 @@ static const unsigned int out_vector_idx[] = { 0, 0, 1, 1, 3, 4}; // input type name is strcat(gentype, vector_size_names[i]); // and output type name is // strcat(gentype, vector_size_names[out_vector_idx[i]]); -static const int size_to_idx[] = {-1,0,1,2,3,-1,-1,-1,4, - -1,-1,-1,-1,-1,-1,-1,5}; static const char *vector_size_names[] = { "", "2", "3", "4", "8", "16"}; static const size_t kSizes[] = { 1, 1, 2, 2, 4, 4, 8, 8, 4, 8 }; diff --git a/test_conformance/basic/test_hostptr.cpp b/test_conformance/basic/test_hostptr.cpp index 65af5c3c..dee78675 100644 --- a/test_conformance/basic/test_hostptr.cpp +++ b/test_conformance/basic/test_hostptr.cpp @@ -32,8 +32,6 @@ const char *hostptr_kernel_code = " dst[tid] = srcA[tid] + srcB[tid];\n" "}\n"; -static const float MAX_ERR = 1e-5f; - static int verify_hostptr(cl_float *inptrA, cl_float *inptrB, cl_float *outptr, int n) { cl_float r; diff --git a/test_conformance/basic/test_preprocessors.cpp b/test_conformance/basic/test_preprocessors.cpp index 2038d150..e67487eb 100644 --- a/test_conformance/basic/test_preprocessors.cpp +++ b/test_conformance/basic/test_preprocessors.cpp @@ -97,10 +97,10 @@ int test_kernel_preprocessor_macros(cl_device_id deviceID, cl_context context, c char programSource[4096]; char curFileName[512]; char *programPtr = programSource; - int i = 0; snprintf(curFileName, 512, "%s", __FILE__); #ifdef _WIN32 // Replace "\" with "\\" + int i = 0; while(curFileName[i] != '\0') { if (curFileName[i] == '\\') { int j = i + 1; diff --git a/test_conformance/basic/test_progvar.cpp b/test_conformance/basic/test_progvar.cpp index 62c0a6be..c0ad870a 100644 --- a/test_conformance/basic/test_progvar.cpp +++ b/test_conformance/basic/test_progvar.cpp @@ -1642,8 +1642,6 @@ int test_progvar_func_scope(cl_device_id device, cl_context context, cl_command_ "supported on this device\n"); return TEST_SKIPPED_ITSELF; } - size_t max_size = 0; - size_t pref_size = 0; cl_int err = CL_SUCCESS; diff --git a/test_conformance/basic/test_queue_priority.cpp b/test_conformance/basic/test_queue_priority.cpp index 57ce5041..ff6283cd 100644 --- a/test_conformance/basic/test_queue_priority.cpp +++ b/test_conformance/basic/test_queue_priority.cpp @@ -48,13 +48,9 @@ static const char *fpmul_kernel_code = " dst[tid] = srcA[tid] * srcB[tid];\n" "}\n"; - -static const float MAX_ERR = 1e-5f; - static int verify_fpadd(float *inptrA, float *inptrB, float *outptr, int n, int fileNum) { - float r; int i; float * reference_ptr = (float *)malloc(n * sizeof(float)); @@ -82,7 +78,6 @@ verify_fpadd(float *inptrA, float *inptrB, float *outptr, int n, int fileNum) static int verify_fpsub(float *inptrA, float *inptrB, float *outptr, int n, int fileNum) { - float r; int i; float * reference_ptr = (float *)malloc(n * sizeof(float)); @@ -110,7 +105,6 @@ verify_fpsub(float *inptrA, float *inptrB, float *outptr, int n, int fileNum) static int verify_fpmul(float *inptrA, float *inptrB, float *outptr, int n, int fileNum) { - float r; int i; float * reference_ptr = (float *)malloc(n * sizeof(float)); diff --git a/test_conformance/basic/test_readimage3d.cpp b/test_conformance/basic/test_readimage3d.cpp index 1337c9fb..5fd7d109 100644 --- a/test_conformance/basic/test_readimage3d.cpp +++ b/test_conformance/basic/test_readimage3d.cpp @@ -142,7 +142,7 @@ int test_readimage3d(cl_device_id device, cl_context context, cl_command_queue q int img_width = 64; int img_height = 64; int img_depth = 64; - int i, err; + int err; size_t origin[3] = {0, 0, 0}; size_t region[3] = {img_width, img_height, img_depth}; size_t length = img_width * img_height * img_depth * 4 * sizeof(float); diff --git a/test_conformance/buffers/test_buffer_migrate.cpp b/test_conformance/buffers/test_buffer_migrate.cpp index f3098366..6cdc271b 100644 --- a/test_conformance/buffers/test_buffer_migrate.cpp +++ b/test_conformance/buffers/test_buffer_migrate.cpp @@ -80,7 +80,7 @@ static cl_int migrateMemObject(enum migrations migrate, cl_command_queue *queues static cl_int restoreBuffer(cl_command_queue *queues, cl_mem *buffers, cl_uint num_devices, cl_mem_migration_flags *flags, cl_uint *buffer) { - cl_uint i, j; + cl_uint i; cl_int err; // If the buffer was previously migrated with undefined content, reload the content. diff --git a/test_conformance/buffers/test_image_migrate.cpp b/test_conformance/buffers/test_image_migrate.cpp index dbdca9cc..6c8acdce 100644 --- a/test_conformance/buffers/test_image_migrate.cpp +++ b/test_conformance/buffers/test_image_migrate.cpp @@ -128,7 +128,6 @@ int test_image_migrate(cl_device_id deviceID, cl_context context, cl_command_que cl_mem_migration_flags *flagsA, *flagsB, *flagsC; cl_device_partition_property property[] = {CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN, 0, 0}; cl_mem *imageA, *imageB, *imageC; - cl_mem_flags flags; cl_image_format format; cl_sampler sampler = NULL; cl_program program = NULL; diff --git a/test_conformance/compiler/test_compiler_defines_for_extensions.cpp b/test_conformance/compiler/test_compiler_defines_for_extensions.cpp index 91441416..94657d61 100644 --- a/test_conformance/compiler/test_compiler_defines_for_extensions.cpp +++ b/test_conformance/compiler/test_compiler_defines_for_extensions.cpp @@ -361,8 +361,6 @@ int test_compiler_defines_for_extensions(cl_device_id device, cl_context context clProgramWrapper program; clKernelWrapper kernel; - Version version = get_device_cl_version(device); - error = create_single_kernel_helper(context, &program, &kernel, 1, (const char **)&kernel_code, "test"); test_error(error, "create_single_kernel_helper failed"); diff --git a/test_conformance/computeinfo/main.cpp b/test_conformance/computeinfo/main.cpp index 03bdb2c1..382cd6a3 100644 --- a/test_conformance/computeinfo/main.cpp +++ b/test_conformance/computeinfo/main.cpp @@ -908,12 +908,6 @@ void dumpConfigInfo(config_info* info) { cl_name_version new_version_item = info->config.cl_name_version_array[f]; - cl_version new_version_major = - CL_VERSION_MAJOR_KHR(new_version_item.version); - cl_version new_version_minor = - CL_VERSION_MINOR_KHR(new_version_item.version); - cl_version new_version_patch = - CL_VERSION_PATCH_KHR(new_version_item.version); log_info("\t\t\"%s\" %d.%d.%d\n", new_version_item.name, CL_VERSION_MAJOR_KHR(new_version_item.version), CL_VERSION_MINOR_KHR(new_version_item.version), diff --git a/test_conformance/conversions/fplib.cpp b/test_conformance/conversions/fplib.cpp index e739b9ae..3b19b56d 100644 --- a/test_conformance/conversions/fplib.cpp +++ b/test_conformance/conversions/fplib.cpp @@ -79,7 +79,6 @@ float qcom_s64_2_f32(int64_t data, bool sat, roundingMode rnd) uint32_t mantissa; if (mantShift >= 0){ uint64_t temp = (uint64_t)data >> mantShift; - uint64_t mask = (1 << mantShift) - 1; if ((temp << mantShift) != data) inExact = 1; mantissa = (uint32_t)temp; @@ -124,7 +123,6 @@ float qcom_s64_2_f32(int64_t data, bool sat, roundingMode rnd) uint32_t mantissa; if (mantShift >= 0){ uint64_t temp = (uint64_t)data >> mantShift; - uint64_t mask = (1 << mantShift) - 1; if (temp << mantShift != data) inExact = 1; mantissa = (uint32_t)temp; @@ -183,7 +181,6 @@ float qcom_u64_2_f32(uint64_t data, bool sat, roundingMode rnd) uint32_t mantissa; if (mantShift >= 0){ uint64_t temp = data >> mantShift; - uint64_t mask = (1 << mantShift) - 1; if (temp << mantShift != data) inExact = 1; mantissa = (uint32_t)temp; @@ -209,7 +206,6 @@ float qcom_u64_2_f32(uint64_t data, bool sat, roundingMode rnd) uint32_t mantissa; if (mantShift >= 0){ uint64_t temp = (uint64_t)data >> mantShift; - uint64_t mask = (1 << mantShift) - 1; if (temp << mantShift != data) inExact = 1; mantissa = (uint32_t)temp; diff --git a/test_conformance/conversions/test_conversions.cpp b/test_conformance/conversions/test_conversions.cpp index d489e28a..788af99b 100644 --- a/test_conformance/conversions/test_conversions.cpp +++ b/test_conformance/conversions/test_conversions.cpp @@ -1666,8 +1666,6 @@ static cl_program MakeProgram( Type outType, Type inType, SaturationMode sat, &programSource, testName, flags); if (error) { - char buffer[2048] = ""; - vlog_error("Failed to build kernel/program.\n", error); clReleaseProgram(program); return NULL; diff --git a/test_conformance/events/test_callbacks.cpp b/test_conformance/events/test_callbacks.cpp index 6025afb7..47e898b9 100644 --- a/test_conformance/events/test_callbacks.cpp +++ b/test_conformance/events/test_callbacks.cpp @@ -79,7 +79,6 @@ int test_callback_event_single( cl_device_id device, cl_context context, cl_comm /* use struct as call back para */ CALL_BACK_USER_DATA user_data[EVENT_CALLBACK_TYPE_TOTAL]; - int index [EVENT_CALLBACK_TYPE_TOTAL]={ 0,1,2}; for( int i=0;i< EVENT_CALLBACK_TYPE_TOTAL; i++) { user_data[i].enevt_type=event_callback_types[i]; diff --git a/test_conformance/events/test_events.cpp b/test_conformance/events/test_events.cpp index 26693f99..c0efe864 100644 --- a/test_conformance/events/test_events.cpp +++ b/test_conformance/events/test_events.cpp @@ -604,8 +604,6 @@ int test_event_enqueue_marker( cl_device_id deviceID, cl_context context, cl_com #ifdef CL_VERSION_1_2 int test_event_enqueue_marker_with_event_list( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { - - cl_int status; SETUP_EVENT( context, queue ); cl_event event_list[3]={ NULL, NULL, NULL}; @@ -649,8 +647,6 @@ int test_event_enqueue_marker_with_event_list( cl_device_id deviceID, cl_context int test_event_enqueue_barrier_with_event_list( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { - - cl_int status; SETUP_EVENT( context, queue ); cl_event event_list[3]={ NULL, NULL, NULL}; diff --git a/test_conformance/gl/common.h b/test_conformance/gl/common.h index 36221da1..aaa6a5e7 100644 --- a/test_conformance/gl/common.h +++ b/test_conformance/gl/common.h @@ -32,7 +32,11 @@ struct format { }; // These are the typically tested formats. - +// TODO: These variables should be made const; until then, suppress unused +// variable warnings as not every translation unit including this header uses +// all variables. +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-variable" static struct format common_formats[] = { #ifdef __APPLE__ { GL_RGBA8, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8, kUChar }, @@ -60,6 +64,7 @@ static struct format depth_formats[] = { { GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL, GL_FLOAT_32_UNSIGNED_INT_24_8_REV, kFloat }, }; #endif +#pragma GCC diagnostic pop int test_images_write_common(cl_device_id device, cl_context context, cl_command_queue queue, struct format* formats, size_t nformats, diff --git a/test_conformance/gl/test_image_methods.cpp b/test_conformance/gl/test_image_methods.cpp index 07f5b65e..7d055fb2 100644 --- a/test_conformance/gl/test_image_methods.cpp +++ b/test_conformance/gl/test_image_methods.cpp @@ -337,7 +337,6 @@ int test_image_methods_depth( cl_device_id device, cl_context context, cl_comman return 0; } - size_t pixelSize; int result = 0; GLenum depth_targets[] = {GL_TEXTURE_2D, GL_TEXTURE_2D_ARRAY}; size_t ntargets = sizeof(depth_targets) / sizeof(depth_targets[0]); @@ -378,7 +377,6 @@ int test_image_methods_multisample( cl_device_id device, cl_context context, cl_ return 0; } - size_t pixelSize; int result = 0; GLenum targets[] = {GL_TEXTURE_2D_MULTISAMPLE, GL_TEXTURE_2D_MULTISAMPLE_ARRAY}; size_t ntargets = sizeof(targets) / sizeof(targets[0]); diff --git a/test_conformance/gl/test_images_write_common.cpp b/test_conformance/gl/test_images_write_common.cpp index 9bbb257b..15bad520 100644 --- a/test_conformance/gl/test_images_write_common.cpp +++ b/test_conformance/gl/test_images_write_common.cpp @@ -427,7 +427,6 @@ static int test_image_write( cl_context context, cl_command_queue queue, int supportsHalf(cl_context context, bool* supports_half) { int error; - size_t size; cl_uint numDev; error = clGetContextInfo(context, CL_CONTEXT_NUM_DEVICES, sizeof(cl_uint), &numDev, NULL); @@ -446,7 +445,6 @@ int supportsHalf(cl_context context, bool* supports_half) int supportsMsaa(cl_context context, bool* supports_msaa) { int error; - size_t size; cl_uint numDev; error = clGetContextInfo(context, CL_CONTEXT_NUM_DEVICES, sizeof(cl_uint), &numDev, NULL); @@ -465,7 +463,6 @@ int supportsMsaa(cl_context context, bool* supports_msaa) int supportsDepth(cl_context context, bool* supports_depth) { int error; - size_t size; cl_uint numDev; error = clGetContextInfo(context, CL_CONTEXT_NUM_DEVICES, sizeof(cl_uint), &numDev, NULL); @@ -486,7 +483,6 @@ static int test_image_format_write( cl_context context, cl_command_queue queue, GLenum internalFormat, GLenum glType, ExplicitType type, MTdata d ) { int error; - int samples = 8; // If we're testing a half float format, then we need to determine the // rounding mode of this machine. Punt if we fail to do so. diff --git a/test_conformance/half/Test_vStoreHalf.cpp b/test_conformance/half/Test_vStoreHalf.cpp index b1491025..591470f0 100644 --- a/test_conformance/half/Test_vStoreHalf.cpp +++ b/test_conformance/half/Test_vStoreHalf.cpp @@ -81,7 +81,7 @@ ReferenceF(cl_uint jid, cl_uint tid, void *userInfo) cl_ushort *r = cri->r + off; f2h f = cri->f; cl_ulong i = cri->i + off; - cl_uint j, rr; + cl_uint j; if (off + count > lim) count = lim - off; diff --git a/test_conformance/images/clReadWriteImage/test_read_1D.cpp b/test_conformance/images/clReadWriteImage/test_read_1D.cpp index 2a42a70e..42933c0f 100644 --- a/test_conformance/images/clReadWriteImage/test_read_1D.cpp +++ b/test_conformance/images/clReadWriteImage/test_read_1D.cpp @@ -81,7 +81,6 @@ int test_read_image_1D(cl_context context, cl_command_queue queue, for( size_t lod = 0; (gTestMipmaps && lod < imageInfo->num_mip_levels) || (!gTestMipmaps && lod < 1); lod++) { - float lod_float = (float) lod; origin[1] = lod; size_t width_lod, row_pitch_lod; diff --git a/test_conformance/images/clReadWriteImage/test_read_1D_array.cpp b/test_conformance/images/clReadWriteImage/test_read_1D_array.cpp index 5d5c2883..efd2a795 100644 --- a/test_conformance/images/clReadWriteImage/test_read_1D_array.cpp +++ b/test_conformance/images/clReadWriteImage/test_read_1D_array.cpp @@ -82,7 +82,6 @@ int test_read_image_1D_array(cl_context context, cl_command_queue queue, for( size_t lod = 0; (gTestMipmaps && lod < imageInfo->num_mip_levels) || (!gTestMipmaps && lod < 1); lod++) { - float lod_float = (float) lod; size_t width_lod, row_pitch_lod, slice_pitch_lod; if( gTestMipmaps ) origin[2] = lod; diff --git a/test_conformance/images/clReadWriteImage/test_read_2D.cpp b/test_conformance/images/clReadWriteImage/test_read_2D.cpp index fb2e7948..b7f8553b 100644 --- a/test_conformance/images/clReadWriteImage/test_read_2D.cpp +++ b/test_conformance/images/clReadWriteImage/test_read_2D.cpp @@ -81,7 +81,6 @@ int test_read_image_2D(cl_context context, cl_command_queue queue, for( size_t lod = 0; (gTestMipmaps && lod < imageInfo->num_mip_levels) || (!gTestMipmaps && lod < 1); lod++) { - float lod_float = (float) lod; origin[2] = lod; size_t width_lod, height_lod, row_pitch_lod; diff --git a/test_conformance/images/clReadWriteImage/test_read_2D_array.cpp b/test_conformance/images/clReadWriteImage/test_read_2D_array.cpp index d0113bb7..5889ad6a 100644 --- a/test_conformance/images/clReadWriteImage/test_read_2D_array.cpp +++ b/test_conformance/images/clReadWriteImage/test_read_2D_array.cpp @@ -83,9 +83,8 @@ int test_read_image_2D_array(cl_context context, cl_command_queue queue, for(size_t lod = 0; (gTestMipmaps && lod < imageInfo->num_mip_levels) || (!gTestMipmaps && lod < 1); lod++) { - float lod_float = (float) lod; origin[3] = lod; - size_t width_lod, height_lod, depth_lod, row_pitch_lod, slice_pitch_lod; + size_t width_lod, height_lod, row_pitch_lod, slice_pitch_lod; width_lod = (imageInfo->width >> lod) ? (imageInfo->width >> lod) : 1; height_lod = (imageInfo->height >> lod) ? (imageInfo->height >> lod) : 1; diff --git a/test_conformance/images/clReadWriteImage/test_read_3D.cpp b/test_conformance/images/clReadWriteImage/test_read_3D.cpp index 2dcd2433..6f73f423 100644 --- a/test_conformance/images/clReadWriteImage/test_read_3D.cpp +++ b/test_conformance/images/clReadWriteImage/test_read_3D.cpp @@ -83,7 +83,6 @@ int test_read_image_3D(cl_context context, cl_command_queue queue, for(size_t lod = 0; (gTestMipmaps && lod < imageInfo->num_mip_levels) || (!gTestMipmaps && lod < 1); lod++) { - float lod_float = (float) lod; origin[3] = lod; size_t width_lod, height_lod, depth_lod, row_pitch_lod, slice_pitch_lod; diff --git a/test_conformance/images/kernel_read_write/CMakeLists.txt b/test_conformance/images/kernel_read_write/CMakeLists.txt index 595f024a..54449875 100644 --- a/test_conformance/images/kernel_read_write/CMakeLists.txt +++ b/test_conformance/images/kernel_read_write/CMakeLists.txt @@ -17,5 +17,15 @@ set(${MODULE_NAME}_SOURCES ../common.cpp ) +# Make unused variables not fatal in this module; see +# https://github.com/KhronosGroup/OpenCL-CTS/issues/1484 +if(CMAKE_COMPILER_IS_GNUCC OR "${CMAKE_CXX_COMPILER_ID}" MATCHES "(Apple)?Clang") + SET_SOURCE_FILES_PROPERTIES( + ${${MODULE_NAME}_SOURCES} + PROPERTIES + COMPILE_FLAGS "-Wno-error=unused-variable" + ) +endif() + include(../../CMakeCommon.txt) diff --git a/test_conformance/math_brute_force/reference_math.cpp b/test_conformance/math_brute_force/reference_math.cpp index a0a3d65d..f91ecb22 100644 --- a/test_conformance/math_brute_force/reference_math.cpp +++ b/test_conformance/math_brute_force/reference_math.cpp @@ -5357,10 +5357,10 @@ long double reference_acosl(long double x) 0x3243F6A8885A308DULL, 0x313198A2E0370734ULL }; // first 126 bits of pi // http://www.super-computing.org/pi-hexa_current.html - long double head, tail, temp; + long double head, tail; #if __LDBL_MANT_DIG__ >= 64 // long double has 64-bits of precision or greater - temp = (long double)pi_bits[0] * 0x1.0p64L; + long double temp = (long double)pi_bits[0] * 0x1.0p64L; head = temp + (long double)pi_bits[1]; temp -= head; // rounding err rounding pi_bits[1] into head tail = (long double)pi_bits[1] + temp; diff --git a/test_conformance/pipes/test_pipe_limits.cpp b/test_conformance/pipes/test_pipe_limits.cpp index 169ab80c..7e979251 100644 --- a/test_conformance/pipes/test_pipe_limits.cpp +++ b/test_conformance/pipes/test_pipe_limits.cpp @@ -163,7 +163,7 @@ int test_pipe_max_args(cl_device_id deviceID, cl_context context, cl_command_que cl_int err; cl_int size; int num_pipe_elements = 1024; - int i, j; + int i; int max_pipe_args; std::stringstream source; clEventWrapper producer_sync_event = NULL; @@ -648,4 +648,4 @@ int test_pipe_max_active_reservations(cl_device_id deviceID, cl_context context, } return 0; -} \ No newline at end of file +} diff --git a/test_conformance/pipes/test_pipe_read_write.cpp b/test_conformance/pipes/test_pipe_read_write.cpp index dd0d1216..a502e03e 100644 --- a/test_conformance/pipes/test_pipe_read_write.cpp +++ b/test_conformance/pipes/test_pipe_read_write.cpp @@ -626,7 +626,6 @@ int test_pipe_readwrite_struct_generic( cl_device_id deviceID, cl_context contex size_t size = sizeof(TestStruct); size_t global_work_size[3]; cl_int err; - int total_errors = 0; int i; MTdataHolder d(gRandomSeed); clEventWrapper producer_sync_event = NULL; diff --git a/test_conformance/printf/test_printf.cpp b/test_conformance/printf/test_printf.cpp index 12ff6535..a32ee4ea 100644 --- a/test_conformance/printf/test_printf.cpp +++ b/test_conformance/printf/test_printf.cpp @@ -232,10 +232,8 @@ int waitForEvent(cl_event* event) //----------------------------------------- static cl_program makePrintfProgram(cl_kernel *kernel_ptr, const cl_context context,const unsigned int testId,const unsigned int testNum,bool isLongSupport,bool is64bAddrSpace) { - int err,i; + int err; cl_program program; - cl_device_id devID; - char buildLog[ 1024 * 128 ]; char testname[256] = {0}; char addrSpaceArgument[256] = {0}; char addrSpacePAddArgument[256] = {0}; diff --git a/test_conformance/printf/util_printf.cpp b/test_conformance/printf/util_printf.cpp index 3546c5f5..d45e1d43 100644 --- a/test_conformance/printf/util_printf.cpp +++ b/test_conformance/printf/util_printf.cpp @@ -842,8 +842,6 @@ static void hexRefBuilder(printDataGenParameters& params, char* refResult, const */ void generateRef(const cl_device_id device) { - int fd = -1; - char _refBuffer[ANALYSIS_BUFFER_SIZE]; const cl_device_fp_config fpConfig = get_default_rounding_mode(device); const RoundingMode hostRound = get_round(); RoundingMode deviceRound; diff --git a/test_conformance/select/test_select.cpp b/test_conformance/select/test_select.cpp index e659206e..27ee5ffd 100644 --- a/test_conformance/select/test_select.cpp +++ b/test_conformance/select/test_select.cpp @@ -173,8 +173,6 @@ static cl_program makeSelectProgram(cl_kernel *kernel_ptr, const cl_context cont char extension[128] = ""; int err = 0; - int i; // generic, re-usable loop variable - const char *source[] = { extension, "__kernel void ", testname, diff --git a/test_conformance/spir/run_services.cpp b/test_conformance/spir/run_services.cpp index 3162e16f..6e06d53c 100644 --- a/test_conformance/spir/run_services.cpp +++ b/test_conformance/spir/run_services.cpp @@ -213,7 +213,6 @@ cl_kernel create_kernel_helper( cl_program program, const std::string& kernel_na { int error = CL_SUCCESS; cl_kernel kernel = NULL; - cl_device_id device = get_program_device(program); /* And create a kernel from it */ kernel = clCreateKernel( program, kernel_name.c_str(), &error ); if( kernel == NULL || error != CL_SUCCESS) diff --git a/test_conformance/spirv_new/main.cpp b/test_conformance/spirv_new/main.cpp index 5a8664b6..41566837 100644 --- a/test_conformance/spirv_new/main.cpp +++ b/test_conformance/spirv_new/main.cpp @@ -203,7 +203,6 @@ int get_program_with_il(clProgramWrapper &prog, const cl_device_id deviceID, test_status InitCL(cl_device_id id) { test_status spirv_status; - bool force = true; spirv_status = check_spirv_compilation_readiness(id); if (spirv_status != TEST_PASS) { diff --git a/test_conformance/spirv_new/test_cl_khr_spirv_no_integer_wrap_decoration.cpp b/test_conformance/spirv_new/test_cl_khr_spirv_no_integer_wrap_decoration.cpp index 6a4982eb..0728ea03 100644 --- a/test_conformance/spirv_new/test_cl_khr_spirv_no_integer_wrap_decoration.cpp +++ b/test_conformance/spirv_new/test_cl_khr_spirv_no_integer_wrap_decoration.cpp @@ -109,7 +109,6 @@ int test_ext_cl_khr_spirv_no_integer_wrap_decoration(cl_device_id deviceID, kernelStr = kernelStream.str(); } - size_t kernelLen = kernelStr.size(); const char *kernelBuf = kernelStr.c_str(); for (int i = 0; i < num; i++) { diff --git a/test_conformance/spirv_new/test_op_fmath.cpp b/test_conformance/spirv_new/test_op_fmath.cpp index bec0667c..61e2864d 100644 --- a/test_conformance/spirv_new/test_op_fmath.cpp +++ b/test_conformance/spirv_new/test_op_fmath.cpp @@ -79,11 +79,8 @@ int test_fmath(cl_device_id deviceID, kernelStr = kernelStream.str(); } - size_t kernelLen = kernelStr.size(); const char *kernelBuf = kernelStr.c_str(); - const char *options = fast_math ? "-cl-fast-relaxed-math" : NULL; - std::vector h_ref(num); { diff --git a/test_conformance/spirv_new/test_op_function.cpp b/test_conformance/spirv_new/test_op_function.cpp index caa3e0d3..16183e80 100644 --- a/test_conformance/spirv_new/test_op_function.cpp +++ b/test_conformance/spirv_new/test_op_function.cpp @@ -33,7 +33,6 @@ int test_function(cl_device_id deviceID, err = clEnqueueWriteBuffer(queue, in, CL_TRUE, 0, bytes, &h_in[0], 0, NULL, NULL); SPIRV_CHECK_ERROR(err, "Failed to copy to in buffer"); - cl_uint bits = sizeof(void *) * 8; std::string spvStr = std::string("op_function") + "_" + std::string(funcType); const char *spvName = spvStr.c_str(); diff --git a/test_conformance/spirv_new/test_op_negate.cpp b/test_conformance/spirv_new/test_op_negate.cpp index 1891c9bb..e3dc1f34 100644 --- a/test_conformance/spirv_new/test_op_negate.cpp +++ b/test_conformance/spirv_new/test_op_negate.cpp @@ -43,7 +43,6 @@ int test_negation(cl_device_id deviceID, err = clEnqueueWriteBuffer(queue, in, CL_TRUE, 0, bytes, &h_in[0], 0, NULL, NULL); SPIRV_CHECK_ERROR(err, "Failed to copy to in buffer"); - cl_uint bits = sizeof(void *) * 8; std::string spvStr = std::string(funcName) + "_" + std::string(Tname); const char *spvName = spvStr.c_str(); diff --git a/test_conformance/spirv_new/test_op_opaque.cpp b/test_conformance/spirv_new/test_op_opaque.cpp index 067d9e4e..e6216061 100644 --- a/test_conformance/spirv_new/test_op_opaque.cpp +++ b/test_conformance/spirv_new/test_op_opaque.cpp @@ -17,7 +17,6 @@ or Khronos Conformance Test Source License Agreement as executed between Khronos TEST_SPIRV_FUNC(op_type_opaque_simple) { const char *name = "opaque"; - int num = (int)(1 << 10); cl_int err = CL_SUCCESS; std::vector buffer_vec = readSPIRV(name); diff --git a/test_conformance/spirv_new/test_op_vector_times_scalar.cpp b/test_conformance/spirv_new/test_op_vector_times_scalar.cpp index 0a604bcf..0859668c 100644 --- a/test_conformance/spirv_new/test_op_vector_times_scalar.cpp +++ b/test_conformance/spirv_new/test_op_vector_times_scalar.cpp @@ -75,7 +75,6 @@ int test_vector_times_scalar(cl_device_id deviceID, kernelStr = kernelStream.str(); } - size_t kernelLen = kernelStr.size(); const char *kernelBuf = kernelStr.c_str(); std::vector h_ref(num); @@ -107,7 +106,6 @@ int test_vector_times_scalar(cl_device_id deviceID, SPIRV_CHECK_ERROR(err, "Failed to read from ref"); } - cl_uint bits = sizeof(void *) * 8; std::string ref = "vector_times_scalar_"; ref += Tname; const char *spvName = ref.c_str(); -- cgit v1.2.3 From 89c8d87963fb5cdafd2632d3892b10626a73ad2d Mon Sep 17 00:00:00 2001 From: Sven van Haastregt Date: Fri, 9 Sep 2022 17:58:31 +0100 Subject: [NFC] Fix unused variable warning in Release builds (#1494) The condition inside the assert is dropped in Release builds, so `num_printed` becomes unused. Signed-off-by: Sven van Haastregt --- test_conformance/basic/test_progvar.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/test_conformance/basic/test_progvar.cpp b/test_conformance/basic/test_progvar.cpp index c0ad870a..9c872be5 100644 --- a/test_conformance/basic/test_progvar.cpp +++ b/test_conformance/basic/test_progvar.cpp @@ -1256,6 +1256,7 @@ static int l_capacity( cl_device_id device, cl_context context, cl_command_queue char prog_src[MAX_STR]; int num_printed = snprintf(prog_src,sizeof(prog_src),prog_src_template,max_size, max_size); assert( num_printed < MAX_STR ); // or increase MAX_STR + (void)num_printed; StringTable ksrc; ksrc.add( prog_src ); -- cgit v1.2.3 From 00f21739e5f474bf55d8912756121aabb1d3045e Mon Sep 17 00:00:00 2001 From: Stuart Brady Date: Mon, 12 Sep 2022 11:49:13 +0100 Subject: Minor cleanups for run_conformance.py (#1492) Use the print function from futures for Python 3 compatibility, remove an unreachable statement, remove unused imports, and add a missing sys.exit call when opening the log file fails. Signed-off-by: Stuart Brady --- test_conformance/run_conformance.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/test_conformance/run_conformance.py b/test_conformance/run_conformance.py index 52c91697..bb8f86ff 100755 --- a/test_conformance/run_conformance.py +++ b/test_conformance/run_conformance.py @@ -8,14 +8,14 @@ #// #******************************************************************/ +from __future__ import print_function + import os import re import sys import subprocess import time -import commands import tempfile -import math import string DEBUG = 0 @@ -144,7 +144,7 @@ def run_test_checking_output(current_directory, test_dir, log_file): if not done and p.returncode != None: if p.returncode < 0: if not output_this_run: - print "" + print("") output_this_run = True write_screen_log(" ==> ERROR: test killed/crashed: " + str(p.returncode) + ".") done = True @@ -161,7 +161,7 @@ def run_test_checking_output(current_directory, test_dir, log_file): match = re.search(".*(FAILED|ERROR).*", line) if match: if not output_this_run: - print "" + print("") output_this_run = True print(" ==> " + line.replace('\n', '')) match = re.search(".*FAILED.*", line) @@ -170,7 +170,7 @@ def run_test_checking_output(current_directory, test_dir, log_file): match = re.search(".*(PASSED).*", line) if match: if not output_this_run: - print "" + print("") output_this_run = True print(" " + line.replace('\n', '')) # Write it to the log @@ -193,7 +193,6 @@ def run_test_checking_output(current_directory, test_dir, log_file): except IOError: write_screen_log("\n ==> ERROR: could not reopen output file from test.") return -1 - done = True else: line = line + char_read pointer = pointer + 1 @@ -227,7 +226,7 @@ def run_tests(tests): log_file.write("========================================================================================\n") log_file.write("========================================================================================\n") previous_test = test_dir - print("(" + get_time() + ") BEGIN " + test_name.ljust(40) + ": "), + print("(" + get_time() + ") BEGIN " + test_name.ljust(40) + ": ", end='') log_file.write(" ----------------------------------------------------------------------------------------\n") log_file.write(" (" + get_time() + ") Running Sub Test: " + test_name + "\n") log_file.write(" ----------------------------------------------------------------------------------------\n") @@ -256,9 +255,9 @@ def run_tests(tests): # Move print the finish status if result == 0: - print("(" + get_time() + ") PASSED " + test_name.ljust(40) + ": (" + str(int(run_time)).rjust(3) + "s, test " + str(test_number).rjust(3) + os.sep + str(len(tests)) + ")"), + print("(" + get_time() + ") PASSED " + test_name.ljust(40) + ": (" + str(int(run_time)).rjust(3) + "s, test " + str(test_number).rjust(3) + os.sep + str(len(tests)) + ")", end='') else: - print("(" + get_time() + ") FAILED " + test_name.ljust(40) + ": (" + str(int(run_time)).rjust(3) + "s, test " + str(test_number).rjust(3) + os.sep + str(len(tests)) + ")"), + print("(" + get_time() + ") FAILED " + test_name.ljust(40) + ": (" + str(int(run_time)).rjust(3) + "s, test " + str(test_number).rjust(3) + os.sep + str(len(tests)) + ")", end='') test_number = test_number + 1 log_file.write(" ----------------------------------------------------------------------------------------\n") @@ -295,7 +294,8 @@ for arg in sys.argv: try: log_file = open(log_file_name, "w") except IOError: - print "Could not open log file " + log_file_name + print("Could not open log file " + log_file_name) + sys.exit(-1) # Determine which devices to test device_types = ["CL_DEVICE_TYPE_DEFAULT", "CL_DEVICE_TYPE_CPU", "CL_DEVICE_TYPE_GPU", "CL_DEVICE_TYPE_ACCELERATOR", "CL_DEVICE_TYPE_ALL"] -- cgit v1.2.3 From d928ac059c2fb175974af0b1abdf888f5f7db2cb Mon Sep 17 00:00:00 2001 From: niranjanjoshi121 <43807392+niranjanjoshi121@users.noreply.github.com> Date: Mon, 12 Sep 2022 17:12:06 +0530 Subject: Use correct size for memory allocation in SVM test (#1496) Memory is allocated for cl_int, but mapped as size_t. Use size_t instead of cl_int during allocation and mapping for consistency. --- test_conformance/SVM/test_shared_address_space_fine_grain.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test_conformance/SVM/test_shared_address_space_fine_grain.cpp b/test_conformance/SVM/test_shared_address_space_fine_grain.cpp index a98a880c..3350972e 100644 --- a/test_conformance/SVM/test_shared_address_space_fine_grain.cpp +++ b/test_conformance/SVM/test_shared_address_space_fine_grain.cpp @@ -47,7 +47,7 @@ int test_svm_shared_address_space_fine_grain(cl_device_id deviceID, cl_context c test_error2(error, pNodes, "malloc failed"); // this allocation holds an index into the nodes buffer, it is used for node allocation - size_t* pAllocator = (size_t*) align_malloc(sizeof(cl_int), 128); + size_t *pAllocator = (size_t *)align_malloc(sizeof(size_t), 128); test_error2(error, pAllocator, "malloc failed"); // this allocation holds the count of correct nodes, which is computed by the verify kernel. -- cgit v1.2.3 From 1d74c85ff3ba210e8d14fa81feff237dcb52529a Mon Sep 17 00:00:00 2001 From: Stuart Brady Date: Tue, 13 Sep 2022 13:42:32 +0100 Subject: [NFC] Reformat code in events test (#1497) Signed-off-by: Stuart Brady --- test_conformance/events/action_classes.cpp | 529 +++++++++------ test_conformance/events/action_classes.h | 430 ++++++------ test_conformance/events/main.cpp | 62 +- test_conformance/events/procs.h | 127 +++- test_conformance/events/testBase.h | 5 +- test_conformance/events/test_callbacks.cpp | 371 ++++++----- .../events/test_event_dependencies.cpp | 542 ++++++++++----- test_conformance/events/test_events.cpp | 730 ++++++++++++--------- test_conformance/events/test_userevents.cpp | 426 +++++++----- .../events/test_userevents_multithreaded.cpp | 38 +- test_conformance/events/test_waitlists.cpp | 267 ++++---- 11 files changed, 2079 insertions(+), 1448 deletions(-) diff --git a/test_conformance/events/action_classes.cpp b/test_conformance/events/action_classes.cpp index d70d76bd..a84be6b6 100644 --- a/test_conformance/events/action_classes.cpp +++ b/test_conformance/events/action_classes.cpp @@ -1,6 +1,6 @@ // // Copyright (c) 2017 The Khronos Group Inc. -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -19,7 +19,8 @@ const cl_uint BufferSizeReductionFactor = 20; -cl_int Action::IGetPreferredImageSize2D( cl_device_id device, size_t &outWidth, size_t &outHeight ) +cl_int Action::IGetPreferredImageSize2D(cl_device_id device, size_t &outWidth, + size_t &outHeight) { cl_ulong maxAllocSize; size_t maxWidth, maxHeight; @@ -27,23 +28,27 @@ cl_int Action::IGetPreferredImageSize2D( cl_device_id device, size_t &outWidt // Get the largest possible buffer we could allocate - error = clGetDeviceInfo( device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof( maxAllocSize ), &maxAllocSize, NULL ); - error |= clGetDeviceInfo( device, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof( maxWidth ), &maxWidth, NULL ); - error |= clGetDeviceInfo( device, CL_DEVICE_IMAGE2D_MAX_HEIGHT, sizeof( maxHeight ), &maxHeight, NULL ); - test_error( error, "Unable to get device config" ); + error = clGetDeviceInfo(device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, + sizeof(maxAllocSize), &maxAllocSize, NULL); + error |= clGetDeviceInfo(device, CL_DEVICE_IMAGE2D_MAX_WIDTH, + sizeof(maxWidth), &maxWidth, NULL); + error |= clGetDeviceInfo(device, CL_DEVICE_IMAGE2D_MAX_HEIGHT, + sizeof(maxHeight), &maxHeight, NULL); + test_error(error, "Unable to get device config"); // Create something of a decent size - if( maxWidth * maxHeight * 4 > maxAllocSize / BufferSizeReductionFactor ) + if (maxWidth * maxHeight * 4 > maxAllocSize / BufferSizeReductionFactor) { - float rootSize = sqrtf( (float)( maxAllocSize / ( BufferSizeReductionFactor * 4 ) ) ); + float rootSize = + sqrtf((float)(maxAllocSize / (BufferSizeReductionFactor * 4))); - if( (size_t)rootSize > maxWidth ) + if ((size_t)rootSize > maxWidth) outWidth = maxWidth; else outWidth = (size_t)rootSize; - outHeight = (size_t)( ( maxAllocSize / ( BufferSizeReductionFactor * 4 ) ) / outWidth ); - if( outHeight > maxHeight ) - outHeight = maxHeight; + outHeight = (size_t)((maxAllocSize / (BufferSizeReductionFactor * 4)) + / outWidth); + if (outHeight > maxHeight) outHeight = maxHeight; } else { @@ -51,19 +56,18 @@ cl_int Action::IGetPreferredImageSize2D( cl_device_id device, size_t &outWidt outHeight = maxHeight; } - outWidth /=2; - outHeight /=2; + outWidth /= 2; + outHeight /= 2; - if (outWidth > 2048) - outWidth = 2048; - if (outHeight > 2048) - outHeight = 2048; + if (outWidth > 2048) outWidth = 2048; + if (outHeight > 2048) outHeight = 2048; log_info("\tImage size: %d x %d (%gMB)\n", (int)outWidth, (int)outHeight, - (double)((int)outWidth*(int)outHeight*4)/(1024.0*1024.0)); + (double)((int)outWidth * (int)outHeight * 4) / (1024.0 * 1024.0)); return CL_SUCCESS; } -cl_int Action::IGetPreferredImageSize3D( cl_device_id device, size_t &outWidth, size_t &outHeight, size_t &outDepth ) +cl_int Action::IGetPreferredImageSize3D(cl_device_id device, size_t &outWidth, + size_t &outHeight, size_t &outDepth) { cl_ulong maxAllocSize; size_t maxWidth, maxHeight, maxDepth; @@ -71,28 +75,34 @@ cl_int Action::IGetPreferredImageSize3D( cl_device_id device, size_t &outWidt // Get the largest possible buffer we could allocate - error = clGetDeviceInfo( device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof( maxAllocSize ), &maxAllocSize, NULL ); - error |= clGetDeviceInfo( device, CL_DEVICE_IMAGE3D_MAX_WIDTH, sizeof( maxWidth ), &maxWidth, NULL ); - error |= clGetDeviceInfo( device, CL_DEVICE_IMAGE3D_MAX_HEIGHT, sizeof( maxHeight ), &maxHeight, NULL ); - error |= clGetDeviceInfo( device, CL_DEVICE_IMAGE3D_MAX_DEPTH, sizeof( maxDepth ), &maxDepth, NULL ); - test_error( error, "Unable to get device config" ); + error = clGetDeviceInfo(device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, + sizeof(maxAllocSize), &maxAllocSize, NULL); + error |= clGetDeviceInfo(device, CL_DEVICE_IMAGE3D_MAX_WIDTH, + sizeof(maxWidth), &maxWidth, NULL); + error |= clGetDeviceInfo(device, CL_DEVICE_IMAGE3D_MAX_HEIGHT, + sizeof(maxHeight), &maxHeight, NULL); + error |= clGetDeviceInfo(device, CL_DEVICE_IMAGE3D_MAX_DEPTH, + sizeof(maxDepth), &maxDepth, NULL); + test_error(error, "Unable to get device config"); // Create something of a decent size - if( (cl_ulong)maxWidth * maxHeight * maxDepth > maxAllocSize / ( BufferSizeReductionFactor * 4 ) ) + if ((cl_ulong)maxWidth * maxHeight * maxDepth + > maxAllocSize / (BufferSizeReductionFactor * 4)) { - float rootSize = cbrtf( (float)( maxAllocSize / ( BufferSizeReductionFactor * 4 ) ) ); + float rootSize = + cbrtf((float)(maxAllocSize / (BufferSizeReductionFactor * 4))); - if( (size_t)rootSize > maxWidth ) + if ((size_t)rootSize > maxWidth) outWidth = maxWidth; else outWidth = (size_t)rootSize; - if( (size_t)rootSize > maxHeight ) + if ((size_t)rootSize > maxHeight) outHeight = maxHeight; else outHeight = (size_t)rootSize; - outDepth = (size_t)( ( maxAllocSize / ( BufferSizeReductionFactor * 4 ) ) / ( outWidth * outHeight ) ); - if( outDepth > maxDepth ) - outDepth = maxDepth; + outDepth = (size_t)((maxAllocSize / (BufferSizeReductionFactor * 4)) + / (outWidth * outHeight)); + if (outDepth > maxDepth) outDepth = maxDepth; } else { @@ -101,25 +111,25 @@ cl_int Action::IGetPreferredImageSize3D( cl_device_id device, size_t &outWidt outDepth = maxDepth; } - outWidth /=2; - outHeight /=2; - outDepth /=2; + outWidth /= 2; + outHeight /= 2; + outDepth /= 2; - if (outWidth > 512) - outWidth = 512; - if (outHeight > 512) - outHeight = 512; - if (outDepth > 512) - outDepth = 512; - log_info("\tImage size: %d x %d x %d (%gMB)\n", (int)outWidth, (int)outHeight, (int)outDepth, - (double)((int)outWidth*(int)outHeight*(int)outDepth*4)/(1024.0*1024.0)); + if (outWidth > 512) outWidth = 512; + if (outHeight > 512) outHeight = 512; + if (outDepth > 512) outDepth = 512; + log_info("\tImage size: %d x %d x %d (%gMB)\n", (int)outWidth, + (int)outHeight, (int)outDepth, + (double)((int)outWidth * (int)outHeight * (int)outDepth * 4) + / (1024.0 * 1024.0)); return CL_SUCCESS; } #pragma mark -------------------- Execution Sub-Classes ------------------------- -cl_int NDRangeKernelAction::Setup( cl_device_id device, cl_context context, cl_command_queue queue ) +cl_int NDRangeKernelAction::Setup(cl_device_id device, cl_context context, + cl_command_queue queue) { const char *long_kernel[] = { "__kernel void sample_test(__global float *src, __global int *dst)\n" @@ -132,101 +142,116 @@ cl_int NDRangeKernelAction::Setup( cl_device_id device, cl_context context, cl_c " dst[tid] = (int)src[tid] * 3;\n" " }\n" "\n" - "}\n" }; + "}\n" + }; size_t threads[1] = { 1000 }; int error; - if( create_single_kernel_helper( context, &mProgram, &mKernel, 1, long_kernel, "sample_test" ) ) + if (create_single_kernel_helper(context, &mProgram, &mKernel, 1, + long_kernel, "sample_test")) { return -1; } - error = get_max_common_work_group_size( context, mKernel, threads[0], &mLocalThreads[0] ); - test_error( error, "Unable to get work group size to use" ); + error = get_max_common_work_group_size(context, mKernel, threads[0], + &mLocalThreads[0]); + test_error(error, "Unable to get work group size to use"); mStreams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_float) * 1000, NULL, &error); - test_error( error, "Creating test array failed" ); + test_error(error, "Creating test array failed"); mStreams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_int) * 1000, NULL, &error); - test_error( error, "Creating test array failed" ); + test_error(error, "Creating test array failed"); /* Set the arguments */ - error = clSetKernelArg( mKernel, 0, sizeof( mStreams[0] ), &mStreams[0] ); - test_error( error, "Unable to set kernel arguments" ); - error = clSetKernelArg( mKernel, 1, sizeof( mStreams[1] ), &mStreams[1] ); - test_error( error, "Unable to set kernel arguments" ); + error = clSetKernelArg(mKernel, 0, sizeof(mStreams[0]), &mStreams[0]); + test_error(error, "Unable to set kernel arguments"); + error = clSetKernelArg(mKernel, 1, sizeof(mStreams[1]), &mStreams[1]); + test_error(error, "Unable to set kernel arguments"); return CL_SUCCESS; } -cl_int NDRangeKernelAction::Execute( cl_command_queue queue, cl_uint numWaits, cl_event *waits, cl_event *outEvent ) +cl_int NDRangeKernelAction::Execute(cl_command_queue queue, cl_uint numWaits, + cl_event *waits, cl_event *outEvent) { size_t threads[1] = { 1000 }; - cl_int error = clEnqueueNDRangeKernel( queue, mKernel, 1, NULL, threads, mLocalThreads, numWaits, waits, outEvent ); - test_error( error, "Unable to execute kernel" ); + cl_int error = + clEnqueueNDRangeKernel(queue, mKernel, 1, NULL, threads, mLocalThreads, + numWaits, waits, outEvent); + test_error(error, "Unable to execute kernel"); return CL_SUCCESS; } #pragma mark -------------------- Buffer Sub-Classes ------------------------- -cl_int BufferAction::Setup( cl_device_id device, cl_context context, cl_command_queue queue, bool allocate ) +cl_int BufferAction::Setup(cl_device_id device, cl_context context, + cl_command_queue queue, bool allocate) { cl_int error; cl_ulong maxAllocSize; // Get the largest possible buffer we could allocate - error = clGetDeviceInfo( device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof( maxAllocSize ), &maxAllocSize, NULL ); + error = clGetDeviceInfo(device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, + sizeof(maxAllocSize), &maxAllocSize, NULL); - // Don't create a buffer quite that big, just so we have some space left over for other work - mSize = (size_t)( maxAllocSize / BufferSizeReductionFactor ); + // Don't create a buffer quite that big, just so we have some space left + // over for other work + mSize = (size_t)(maxAllocSize / BufferSizeReductionFactor); // Cap at 128M so tests complete in a reasonable amount of time. - if (mSize > 128 << 20) - mSize = 128 << 20; + if (mSize > 128 << 20) mSize = 128 << 20; - mSize /=2; + mSize /= 2; - log_info("\tBuffer size: %gMB\n", (double)mSize/(1024.0*1024.0)); + log_info("\tBuffer size: %gMB\n", (double)mSize / (1024.0 * 1024.0)); - mBuffer = clCreateBuffer( context, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, mSize, NULL, &error ); - test_error( error, "Unable to create buffer to test against" ); + mBuffer = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, + mSize, NULL, &error); + test_error(error, "Unable to create buffer to test against"); - mOutBuffer = malloc( mSize ); - if( mOutBuffer == NULL ) + mOutBuffer = malloc(mSize); + if (mOutBuffer == NULL) { - log_error( "ERROR: Unable to allocate temp buffer (out of memory)\n" ); + log_error("ERROR: Unable to allocate temp buffer (out of memory)\n"); return CL_OUT_OF_RESOURCES; } return CL_SUCCESS; } -cl_int ReadBufferAction::Setup( cl_device_id device, cl_context context, cl_command_queue queue ) +cl_int ReadBufferAction::Setup(cl_device_id device, cl_context context, + cl_command_queue queue) { - return BufferAction::Setup( device, context, queue, true ); + return BufferAction::Setup(device, context, queue, true); } -cl_int ReadBufferAction::Execute( cl_command_queue queue, cl_uint numWaits, cl_event *waits, cl_event *outEvent ) +cl_int ReadBufferAction::Execute(cl_command_queue queue, cl_uint numWaits, + cl_event *waits, cl_event *outEvent) { - cl_int error = clEnqueueReadBuffer( queue, mBuffer, CL_FALSE, 0, mSize, mOutBuffer, numWaits, waits, outEvent ); - test_error( error, "Unable to enqueue buffer read" ); + cl_int error = clEnqueueReadBuffer(queue, mBuffer, CL_FALSE, 0, mSize, + mOutBuffer, numWaits, waits, outEvent); + test_error(error, "Unable to enqueue buffer read"); return CL_SUCCESS; } -cl_int WriteBufferAction::Setup( cl_device_id device, cl_context context, cl_command_queue queue ) +cl_int WriteBufferAction::Setup(cl_device_id device, cl_context context, + cl_command_queue queue) { - return BufferAction::Setup( device, context, queue, true ); + return BufferAction::Setup(device, context, queue, true); } -cl_int WriteBufferAction::Execute( cl_command_queue queue, cl_uint numWaits, cl_event *waits, cl_event *outEvent ) +cl_int WriteBufferAction::Execute(cl_command_queue queue, cl_uint numWaits, + cl_event *waits, cl_event *outEvent) { - cl_int error = clEnqueueWriteBuffer( queue, mBuffer, CL_FALSE, 0, mSize, mOutBuffer, numWaits, waits, outEvent ); - test_error( error, "Unable to enqueue buffer write" ); + cl_int error = clEnqueueWriteBuffer(queue, mBuffer, CL_FALSE, 0, mSize, + mOutBuffer, numWaits, waits, outEvent); + test_error(error, "Unable to enqueue buffer write"); return CL_SUCCESS; } @@ -234,40 +259,46 @@ cl_int WriteBufferAction::Execute( cl_command_queue queue, cl_uint numWaits, cl_ MapBufferAction::~MapBufferAction() { if (mQueue) - clEnqueueUnmapMemObject( mQueue, mBuffer, mMappedPtr, 0, NULL, NULL ); + clEnqueueUnmapMemObject(mQueue, mBuffer, mMappedPtr, 0, NULL, NULL); } -cl_int MapBufferAction::Setup( cl_device_id device, cl_context context, cl_command_queue queue ) +cl_int MapBufferAction::Setup(cl_device_id device, cl_context context, + cl_command_queue queue) { - return BufferAction::Setup( device, context, queue, false ); + return BufferAction::Setup(device, context, queue, false); } -cl_int MapBufferAction::Execute( cl_command_queue queue, cl_uint numWaits, cl_event *waits, cl_event *outEvent ) +cl_int MapBufferAction::Execute(cl_command_queue queue, cl_uint numWaits, + cl_event *waits, cl_event *outEvent) { cl_int error; mQueue = queue; - mMappedPtr = clEnqueueMapBuffer( queue, mBuffer, CL_FALSE, CL_MAP_READ, 0, mSize, numWaits, waits, outEvent, &error ); - test_error( error, "Unable to enqueue buffer map" ); + mMappedPtr = clEnqueueMapBuffer(queue, mBuffer, CL_FALSE, CL_MAP_READ, 0, + mSize, numWaits, waits, outEvent, &error); + test_error(error, "Unable to enqueue buffer map"); return CL_SUCCESS; } -cl_int UnmapBufferAction::Setup( cl_device_id device, cl_context context, cl_command_queue queue ) +cl_int UnmapBufferAction::Setup(cl_device_id device, cl_context context, + cl_command_queue queue) { - cl_int error = BufferAction::Setup( device, context, queue, false ); - if( error != CL_SUCCESS ) - return error; + cl_int error = BufferAction::Setup(device, context, queue, false); + if (error != CL_SUCCESS) return error; - mMappedPtr = clEnqueueMapBuffer( queue, mBuffer, CL_TRUE, CL_MAP_READ, 0, mSize, 0, NULL, NULL, &error ); - test_error( error, "Unable to enqueue buffer map" ); + mMappedPtr = clEnqueueMapBuffer(queue, mBuffer, CL_TRUE, CL_MAP_READ, 0, + mSize, 0, NULL, NULL, &error); + test_error(error, "Unable to enqueue buffer map"); return CL_SUCCESS; } -cl_int UnmapBufferAction::Execute( cl_command_queue queue, cl_uint numWaits, cl_event *waits, cl_event *outEvent ) +cl_int UnmapBufferAction::Execute(cl_command_queue queue, cl_uint numWaits, + cl_event *waits, cl_event *outEvent) { - cl_int error = clEnqueueUnmapMemObject( queue, mBuffer, mMappedPtr, numWaits, waits, outEvent ); - test_error( error, "Unable to enqueue buffer unmap" ); + cl_int error = clEnqueueUnmapMemObject(queue, mBuffer, mMappedPtr, numWaits, + waits, outEvent); + test_error(error, "Unable to enqueue buffer unmap"); return CL_SUCCESS; } @@ -275,349 +306,410 @@ cl_int UnmapBufferAction::Execute( cl_command_queue queue, cl_uint numWaits, cl_ #pragma mark -------------------- Read/Write Image Classes ------------------------- -cl_int ReadImage2DAction::Setup( cl_device_id device, cl_context context, cl_command_queue queue ) +cl_int ReadImage2DAction::Setup(cl_device_id device, cl_context context, + cl_command_queue queue) { cl_int error; - if( ( error = IGetPreferredImageSize2D( device, mWidth, mHeight ) ) ) + if ((error = IGetPreferredImageSize2D(device, mWidth, mHeight))) return error; cl_image_format format = { CL_RGBA, CL_SIGNED_INT8 }; - mImage = create_image_2d( context, CL_MEM_READ_ONLY, &format, mWidth, mHeight, 0, NULL, &error ); + mImage = create_image_2d(context, CL_MEM_READ_ONLY, &format, mWidth, + mHeight, 0, NULL, &error); - test_error( error, "Unable to create image to test against" ); + test_error(error, "Unable to create image to test against"); - mOutput = malloc( mWidth * mHeight * 4 ); - if( mOutput == NULL ) + mOutput = malloc(mWidth * mHeight * 4); + if (mOutput == NULL) { - log_error( "ERROR: Unable to allocate buffer: out of memory\n" ); + log_error("ERROR: Unable to allocate buffer: out of memory\n"); return CL_OUT_OF_RESOURCES; } return CL_SUCCESS; } -cl_int ReadImage2DAction::Execute( cl_command_queue queue, cl_uint numWaits, cl_event *waits, cl_event *outEvent ) +cl_int ReadImage2DAction::Execute(cl_command_queue queue, cl_uint numWaits, + cl_event *waits, cl_event *outEvent) { - size_t origin[ 3 ] = { 0, 0, 0 }, region[ 3 ] = { mWidth, mHeight, 1 }; + size_t origin[3] = { 0, 0, 0 }, region[3] = { mWidth, mHeight, 1 }; - cl_int error = clEnqueueReadImage( queue, mImage, CL_FALSE, origin, region, 0, 0, mOutput, numWaits, waits, outEvent ); - test_error( error, "Unable to enqueue image read" ); + cl_int error = clEnqueueReadImage(queue, mImage, CL_FALSE, origin, region, + 0, 0, mOutput, numWaits, waits, outEvent); + test_error(error, "Unable to enqueue image read"); return CL_SUCCESS; } -cl_int ReadImage3DAction::Setup( cl_device_id device, cl_context context, cl_command_queue queue ) +cl_int ReadImage3DAction::Setup(cl_device_id device, cl_context context, + cl_command_queue queue) { cl_int error; - if( ( error = IGetPreferredImageSize3D( device, mWidth, mHeight, mDepth ) ) ) + if ((error = IGetPreferredImageSize3D(device, mWidth, mHeight, mDepth))) return error; cl_image_format format = { CL_RGBA, CL_SIGNED_INT8 }; - mImage = create_image_3d( context, CL_MEM_READ_ONLY, &format, mWidth, mHeight, mDepth, 0, 0, NULL, &error ); - test_error( error, "Unable to create image to test against" ); + mImage = create_image_3d(context, CL_MEM_READ_ONLY, &format, mWidth, + mHeight, mDepth, 0, 0, NULL, &error); + test_error(error, "Unable to create image to test against"); - mOutput = malloc( mWidth * mHeight * mDepth * 4 ); - if( mOutput == NULL ) + mOutput = malloc(mWidth * mHeight * mDepth * 4); + if (mOutput == NULL) { - log_error( "ERROR: Unable to allocate buffer: out of memory\n" ); + log_error("ERROR: Unable to allocate buffer: out of memory\n"); return CL_OUT_OF_RESOURCES; } return CL_SUCCESS; } -cl_int ReadImage3DAction::Execute( cl_command_queue queue, cl_uint numWaits, cl_event *waits, cl_event *outEvent ) +cl_int ReadImage3DAction::Execute(cl_command_queue queue, cl_uint numWaits, + cl_event *waits, cl_event *outEvent) { - size_t origin[ 3 ] = { 0, 0, 0 }, region[ 3 ] = { mWidth, mHeight, mDepth }; + size_t origin[3] = { 0, 0, 0 }, region[3] = { mWidth, mHeight, mDepth }; - cl_int error = clEnqueueReadImage( queue, mImage, CL_FALSE, origin, region, 0, 0, mOutput, numWaits, waits, outEvent ); - test_error( error, "Unable to enqueue image read" ); + cl_int error = clEnqueueReadImage(queue, mImage, CL_FALSE, origin, region, + 0, 0, mOutput, numWaits, waits, outEvent); + test_error(error, "Unable to enqueue image read"); return CL_SUCCESS; } -cl_int WriteImage2DAction::Setup( cl_device_id device, cl_context context, cl_command_queue queue ) +cl_int WriteImage2DAction::Setup(cl_device_id device, cl_context context, + cl_command_queue queue) { cl_int error; - if( ( error = IGetPreferredImageSize2D( device, mWidth, mHeight ) ) ) + if ((error = IGetPreferredImageSize2D(device, mWidth, mHeight))) return error; cl_image_format format = { CL_RGBA, CL_SIGNED_INT8 }; - mImage = create_image_2d( context, CL_MEM_WRITE_ONLY, &format, mWidth, mHeight, 0, NULL, &error ); - test_error( error, "Unable to create image to test against" ); + mImage = create_image_2d(context, CL_MEM_WRITE_ONLY, &format, mWidth, + mHeight, 0, NULL, &error); + test_error(error, "Unable to create image to test against"); - mOutput = malloc( mWidth * mHeight * 4 ); - if( mOutput == NULL ) + mOutput = malloc(mWidth * mHeight * 4); + if (mOutput == NULL) { - log_error( "ERROR: Unable to allocate buffer: out of memory\n" ); + log_error("ERROR: Unable to allocate buffer: out of memory\n"); return CL_OUT_OF_RESOURCES; } return CL_SUCCESS; } -cl_int WriteImage2DAction::Execute( cl_command_queue queue, cl_uint numWaits, cl_event *waits, cl_event *outEvent ) +cl_int WriteImage2DAction::Execute(cl_command_queue queue, cl_uint numWaits, + cl_event *waits, cl_event *outEvent) { - size_t origin[ 3 ] = { 0, 0, 0 }, region[ 3 ] = { mWidth, mHeight, 1 }; + size_t origin[3] = { 0, 0, 0 }, region[3] = { mWidth, mHeight, 1 }; - cl_int error = clEnqueueWriteImage( queue, mImage, CL_FALSE, origin, region, 0, 0, mOutput, numWaits, waits, outEvent ); - test_error( error, "Unable to enqueue image write" ); + cl_int error = + clEnqueueWriteImage(queue, mImage, CL_FALSE, origin, region, 0, 0, + mOutput, numWaits, waits, outEvent); + test_error(error, "Unable to enqueue image write"); return CL_SUCCESS; } -cl_int WriteImage3DAction::Setup( cl_device_id device, cl_context context, cl_command_queue queue ) +cl_int WriteImage3DAction::Setup(cl_device_id device, cl_context context, + cl_command_queue queue) { cl_int error; - if( ( error = IGetPreferredImageSize3D( device, mWidth, mHeight, mDepth ) ) ) + if ((error = IGetPreferredImageSize3D(device, mWidth, mHeight, mDepth))) return error; cl_image_format format = { CL_RGBA, CL_SIGNED_INT8 }; - mImage = create_image_3d( context, CL_MEM_READ_ONLY, &format, mWidth, mHeight, mDepth, 0, 0, NULL, &error ); - test_error( error, "Unable to create image to test against" ); + mImage = create_image_3d(context, CL_MEM_READ_ONLY, &format, mWidth, + mHeight, mDepth, 0, 0, NULL, &error); + test_error(error, "Unable to create image to test against"); - mOutput = malloc( mWidth * mHeight * mDepth * 4 ); - if( mOutput == NULL ) + mOutput = malloc(mWidth * mHeight * mDepth * 4); + if (mOutput == NULL) { - log_error( "ERROR: Unable to allocate buffer: out of memory\n" ); + log_error("ERROR: Unable to allocate buffer: out of memory\n"); return CL_OUT_OF_RESOURCES; } return CL_SUCCESS; } -cl_int WriteImage3DAction::Execute( cl_command_queue queue, cl_uint numWaits, cl_event *waits, cl_event *outEvent ) +cl_int WriteImage3DAction::Execute(cl_command_queue queue, cl_uint numWaits, + cl_event *waits, cl_event *outEvent) { - size_t origin[ 3 ] = { 0, 0, 0 }, region[ 3 ] = { mWidth, mHeight, mDepth }; + size_t origin[3] = { 0, 0, 0 }, region[3] = { mWidth, mHeight, mDepth }; - cl_int error = clEnqueueWriteImage( queue, mImage, CL_FALSE, origin, region, 0, 0, mOutput, numWaits, waits, outEvent ); - test_error( error, "Unable to enqueue image write" ); + cl_int error = + clEnqueueWriteImage(queue, mImage, CL_FALSE, origin, region, 0, 0, + mOutput, numWaits, waits, outEvent); + test_error(error, "Unable to enqueue image write"); return CL_SUCCESS; } #pragma mark -------------------- Copy Image Classes ------------------------- -cl_int CopyImageAction::Execute( cl_command_queue queue, cl_uint numWaits, cl_event *waits, cl_event *outEvent ) +cl_int CopyImageAction::Execute(cl_command_queue queue, cl_uint numWaits, + cl_event *waits, cl_event *outEvent) { - size_t origin[ 3 ] = { 0, 0, 0 }, region[ 3 ] = { mWidth, mHeight, mDepth }; + size_t origin[3] = { 0, 0, 0 }, region[3] = { mWidth, mHeight, mDepth }; - cl_int error = clEnqueueCopyImage( queue, mSrcImage, mDstImage, origin, origin, region, numWaits, waits, outEvent ); - test_error( error, "Unable to enqueue image copy" ); + cl_int error = + clEnqueueCopyImage(queue, mSrcImage, mDstImage, origin, origin, region, + numWaits, waits, outEvent); + test_error(error, "Unable to enqueue image copy"); return CL_SUCCESS; } -cl_int CopyImage2Dto2DAction::Setup( cl_device_id device, cl_context context, cl_command_queue queue ) +cl_int CopyImage2Dto2DAction::Setup(cl_device_id device, cl_context context, + cl_command_queue queue) { cl_int error; - if( ( error = IGetPreferredImageSize2D( device, mWidth, mHeight ) ) ) + if ((error = IGetPreferredImageSize2D(device, mWidth, mHeight))) return error; mWidth /= 2; cl_image_format format = { CL_RGBA, CL_SIGNED_INT8 }; - mSrcImage = create_image_2d( context, CL_MEM_READ_ONLY, &format, mWidth, mHeight, 0, NULL, &error ); - test_error( error, "Unable to create image to test against" ); + mSrcImage = create_image_2d(context, CL_MEM_READ_ONLY, &format, mWidth, + mHeight, 0, NULL, &error); + test_error(error, "Unable to create image to test against"); - mDstImage = create_image_2d( context, CL_MEM_WRITE_ONLY, &format, mWidth, mHeight, 0, NULL, &error ); - test_error( error, "Unable to create image to test against" ); + mDstImage = create_image_2d(context, CL_MEM_WRITE_ONLY, &format, mWidth, + mHeight, 0, NULL, &error); + test_error(error, "Unable to create image to test against"); mDepth = 1; return CL_SUCCESS; } -cl_int CopyImage2Dto3DAction::Setup( cl_device_id device, cl_context context, cl_command_queue queue ) +cl_int CopyImage2Dto3DAction::Setup(cl_device_id device, cl_context context, + cl_command_queue queue) { cl_int error; - if( ( error = IGetPreferredImageSize3D( device, mWidth, mHeight, mDepth ) ) ) + if ((error = IGetPreferredImageSize3D(device, mWidth, mHeight, mDepth))) return error; mDepth /= 2; cl_image_format format = { CL_RGBA, CL_SIGNED_INT8 }; - mSrcImage = create_image_2d( context, CL_MEM_READ_ONLY, &format, mWidth, mHeight, 0, NULL, &error ); - test_error( error, "Unable to create image to test against" ); + mSrcImage = create_image_2d(context, CL_MEM_READ_ONLY, &format, mWidth, + mHeight, 0, NULL, &error); + test_error(error, "Unable to create image to test against"); - mDstImage = create_image_3d( context, CL_MEM_READ_ONLY, &format, mWidth, mHeight, mDepth, 0, 0, NULL, &error ); - test_error( error, "Unable to create image to test against" ); + mDstImage = create_image_3d(context, CL_MEM_READ_ONLY, &format, mWidth, + mHeight, mDepth, 0, 0, NULL, &error); + test_error(error, "Unable to create image to test against"); mDepth = 1; return CL_SUCCESS; } -cl_int CopyImage3Dto2DAction::Setup( cl_device_id device, cl_context context, cl_command_queue queue ) +cl_int CopyImage3Dto2DAction::Setup(cl_device_id device, cl_context context, + cl_command_queue queue) { cl_int error; - if( ( error = IGetPreferredImageSize3D( device, mWidth, mHeight, mDepth ) ) ) + if ((error = IGetPreferredImageSize3D(device, mWidth, mHeight, mDepth))) return error; mDepth /= 2; cl_image_format format = { CL_RGBA, CL_SIGNED_INT8 }; - mSrcImage = create_image_3d( context, CL_MEM_READ_ONLY, &format, mWidth, mHeight, mDepth, 0, 0, NULL, &error ); - test_error( error, "Unable to create image to test against" ); + mSrcImage = create_image_3d(context, CL_MEM_READ_ONLY, &format, mWidth, + mHeight, mDepth, 0, 0, NULL, &error); + test_error(error, "Unable to create image to test against"); - mDstImage = create_image_2d( context, CL_MEM_WRITE_ONLY, &format, mWidth, mHeight, 0, NULL, &error ); - test_error( error, "Unable to create image to test against" ); + mDstImage = create_image_2d(context, CL_MEM_WRITE_ONLY, &format, mWidth, + mHeight, 0, NULL, &error); + test_error(error, "Unable to create image to test against"); mDepth = 1; return CL_SUCCESS; } -cl_int CopyImage3Dto3DAction::Setup( cl_device_id device, cl_context context, cl_command_queue queue ) +cl_int CopyImage3Dto3DAction::Setup(cl_device_id device, cl_context context, + cl_command_queue queue) { cl_int error; - if( ( error = IGetPreferredImageSize3D( device, mWidth, mHeight, mDepth ) ) ) + if ((error = IGetPreferredImageSize3D(device, mWidth, mHeight, mDepth))) return error; mDepth /= 2; cl_image_format format = { CL_RGBA, CL_SIGNED_INT8 }; - mSrcImage = create_image_3d( context, CL_MEM_READ_ONLY, &format, mWidth, mHeight, mDepth, 0, 0, NULL, &error ); - test_error( error, "Unable to create image to test against" ); + mSrcImage = create_image_3d(context, CL_MEM_READ_ONLY, &format, mWidth, + mHeight, mDepth, 0, 0, NULL, &error); + test_error(error, "Unable to create image to test against"); - mDstImage = create_image_3d( context, CL_MEM_READ_ONLY, &format, mWidth, mHeight, mDepth, 0, 0, NULL, &error ); - test_error( error, "Unable to create image to test against" ); + mDstImage = create_image_3d(context, CL_MEM_READ_ONLY, &format, mWidth, + mHeight, mDepth, 0, 0, NULL, &error); + test_error(error, "Unable to create image to test against"); return CL_SUCCESS; } #pragma mark -------------------- Copy Image/Buffer Classes ------------------------- -cl_int Copy2DImageToBufferAction::Setup( cl_device_id device, cl_context context, cl_command_queue queue ) +cl_int Copy2DImageToBufferAction::Setup(cl_device_id device, cl_context context, + cl_command_queue queue) { cl_int error; - if( ( error = IGetPreferredImageSize2D( device, mWidth, mHeight ) ) ) + if ((error = IGetPreferredImageSize2D(device, mWidth, mHeight))) return error; mWidth /= 2; cl_image_format format = { CL_RGBA, CL_SIGNED_INT8 }; - mSrcImage = create_image_2d( context, CL_MEM_READ_ONLY, &format, mWidth, mHeight, 0, NULL, &error ); - test_error( error, "Unable to create image to test against" ); + mSrcImage = create_image_2d(context, CL_MEM_READ_ONLY, &format, mWidth, + mHeight, 0, NULL, &error); + test_error(error, "Unable to create image to test against"); - mDstBuffer = clCreateBuffer( context, CL_MEM_WRITE_ONLY, mWidth * mHeight * 4, NULL, &error ); - test_error( error, "Unable to create buffer to test against" ); + mDstBuffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY, + mWidth * mHeight * 4, NULL, &error); + test_error(error, "Unable to create buffer to test against"); return CL_SUCCESS; } -cl_int Copy2DImageToBufferAction::Execute( cl_command_queue queue, cl_uint numWaits, cl_event *waits, cl_event *outEvent ) +cl_int Copy2DImageToBufferAction::Execute(cl_command_queue queue, + cl_uint numWaits, cl_event *waits, + cl_event *outEvent) { - size_t origin[ 3 ] = { 0, 0, 0 }, region[ 3 ] = { mWidth, mHeight, 1 }; + size_t origin[3] = { 0, 0, 0 }, region[3] = { mWidth, mHeight, 1 }; - cl_int error = clEnqueueCopyImageToBuffer( queue, mSrcImage, mDstBuffer, origin, region, 0, numWaits, waits, outEvent ); - test_error( error, "Unable to enqueue image to buffer copy" ); + cl_int error = + clEnqueueCopyImageToBuffer(queue, mSrcImage, mDstBuffer, origin, region, + 0, numWaits, waits, outEvent); + test_error(error, "Unable to enqueue image to buffer copy"); return CL_SUCCESS; } -cl_int Copy3DImageToBufferAction::Setup( cl_device_id device, cl_context context, cl_command_queue queue ) +cl_int Copy3DImageToBufferAction::Setup(cl_device_id device, cl_context context, + cl_command_queue queue) { cl_int error; - if( ( error = IGetPreferredImageSize3D( device, mWidth, mHeight, mDepth ) ) ) + if ((error = IGetPreferredImageSize3D(device, mWidth, mHeight, mDepth))) return error; mDepth /= 2; cl_image_format format = { CL_RGBA, CL_SIGNED_INT8 }; - mSrcImage = create_image_3d( context, CL_MEM_READ_ONLY, &format, mWidth, mHeight, mDepth, 0, 0, NULL, &error ); - test_error( error, "Unable to create image to test against" ); + mSrcImage = create_image_3d(context, CL_MEM_READ_ONLY, &format, mWidth, + mHeight, mDepth, 0, 0, NULL, &error); + test_error(error, "Unable to create image to test against"); - mDstBuffer = clCreateBuffer( context, CL_MEM_WRITE_ONLY, mWidth * mHeight * mDepth * 4, NULL, &error ); - test_error( error, "Unable to create buffer to test against" ); + mDstBuffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY, + mWidth * mHeight * mDepth * 4, NULL, &error); + test_error(error, "Unable to create buffer to test against"); return CL_SUCCESS; } -cl_int Copy3DImageToBufferAction::Execute( cl_command_queue queue, cl_uint numWaits, cl_event *waits, cl_event *outEvent ) +cl_int Copy3DImageToBufferAction::Execute(cl_command_queue queue, + cl_uint numWaits, cl_event *waits, + cl_event *outEvent) { - size_t origin[ 3 ] = { 0, 0, 0 }, region[ 3 ] = { mWidth, mHeight, mDepth }; + size_t origin[3] = { 0, 0, 0 }, region[3] = { mWidth, mHeight, mDepth }; - cl_int error = clEnqueueCopyImageToBuffer( queue, mSrcImage, mDstBuffer, origin, region, 0, numWaits, waits, outEvent ); - test_error( error, "Unable to enqueue image to buffer copy" ); + cl_int error = + clEnqueueCopyImageToBuffer(queue, mSrcImage, mDstBuffer, origin, region, + 0, numWaits, waits, outEvent); + test_error(error, "Unable to enqueue image to buffer copy"); return CL_SUCCESS; } -cl_int CopyBufferTo2DImageAction::Setup( cl_device_id device, cl_context context, cl_command_queue queue ) +cl_int CopyBufferTo2DImageAction::Setup(cl_device_id device, cl_context context, + cl_command_queue queue) { cl_int error; - if( ( error = IGetPreferredImageSize2D( device, mWidth, mHeight ) ) ) + if ((error = IGetPreferredImageSize2D(device, mWidth, mHeight))) return error; mWidth /= 2; cl_image_format format = { CL_RGBA, CL_SIGNED_INT8 }; - mSrcBuffer = clCreateBuffer( context, CL_MEM_READ_ONLY, mWidth * mHeight * 4, NULL, &error ); - test_error( error, "Unable to create buffer to test against" ); + mSrcBuffer = clCreateBuffer(context, CL_MEM_READ_ONLY, mWidth * mHeight * 4, + NULL, &error); + test_error(error, "Unable to create buffer to test against"); - mDstImage = create_image_2d( context, CL_MEM_WRITE_ONLY, &format, mWidth, mHeight, 0, NULL, &error ); - test_error( error, "Unable to create image to test against" ); + mDstImage = create_image_2d(context, CL_MEM_WRITE_ONLY, &format, mWidth, + mHeight, 0, NULL, &error); + test_error(error, "Unable to create image to test against"); return CL_SUCCESS; } -cl_int CopyBufferTo2DImageAction::Execute( cl_command_queue queue, cl_uint numWaits, cl_event *waits, cl_event *outEvent ) +cl_int CopyBufferTo2DImageAction::Execute(cl_command_queue queue, + cl_uint numWaits, cl_event *waits, + cl_event *outEvent) { - size_t origin[ 3 ] = { 0, 0, 0 }, region[ 3 ] = { mWidth, mHeight, 1 }; + size_t origin[3] = { 0, 0, 0 }, region[3] = { mWidth, mHeight, 1 }; - cl_int error = clEnqueueCopyBufferToImage( queue, mSrcBuffer, mDstImage, 0, origin, region, numWaits, waits, outEvent ); - test_error( error, "Unable to enqueue buffer to image copy" ); + cl_int error = + clEnqueueCopyBufferToImage(queue, mSrcBuffer, mDstImage, 0, origin, + region, numWaits, waits, outEvent); + test_error(error, "Unable to enqueue buffer to image copy"); return CL_SUCCESS; } -cl_int CopyBufferTo3DImageAction::Setup( cl_device_id device, cl_context context, cl_command_queue queue ) +cl_int CopyBufferTo3DImageAction::Setup(cl_device_id device, cl_context context, + cl_command_queue queue) { cl_int error; - if( ( error = IGetPreferredImageSize3D( device, mWidth, mHeight, mDepth ) ) ) + if ((error = IGetPreferredImageSize3D(device, mWidth, mHeight, mDepth))) return error; mDepth /= 2; - mSrcBuffer = clCreateBuffer( context, CL_MEM_READ_ONLY, mWidth * mHeight * mDepth * 4, NULL, &error ); - test_error( error, "Unable to create buffer to test against" ); + mSrcBuffer = clCreateBuffer(context, CL_MEM_READ_ONLY, + mWidth * mHeight * mDepth * 4, NULL, &error); + test_error(error, "Unable to create buffer to test against"); cl_image_format format = { CL_RGBA, CL_SIGNED_INT8 }; - mDstImage = create_image_3d( context, CL_MEM_READ_ONLY, &format, mWidth, mHeight, mDepth, 0, 0, NULL, &error ); - test_error( error, "Unable to create image to test against" ); + mDstImage = create_image_3d(context, CL_MEM_READ_ONLY, &format, mWidth, + mHeight, mDepth, 0, 0, NULL, &error); + test_error(error, "Unable to create image to test against"); return CL_SUCCESS; } -cl_int CopyBufferTo3DImageAction::Execute( cl_command_queue queue, cl_uint numWaits, cl_event *waits, cl_event *outEvent ) +cl_int CopyBufferTo3DImageAction::Execute(cl_command_queue queue, + cl_uint numWaits, cl_event *waits, + cl_event *outEvent) { - size_t origin[ 3 ] = { 0, 0, 0 }, region[ 3 ] = { mWidth, mHeight, mDepth }; + size_t origin[3] = { 0, 0, 0 }, region[3] = { mWidth, mHeight, mDepth }; - cl_int error = clEnqueueCopyBufferToImage( queue, mSrcBuffer, mDstImage, 0, origin, region, numWaits, waits, outEvent ); - test_error( error, "Unable to enqueue buffer to image copy" ); + cl_int error = + clEnqueueCopyBufferToImage(queue, mSrcBuffer, mDstImage, 0, origin, + region, numWaits, waits, outEvent); + test_error(error, "Unable to enqueue buffer to image copy"); return CL_SUCCESS; } @@ -627,34 +719,39 @@ cl_int CopyBufferTo3DImageAction::Execute( cl_command_queue queue, cl_uint numWa MapImageAction::~MapImageAction() { if (mQueue) - clEnqueueUnmapMemObject( mQueue, mImage, mMappedPtr, 0, NULL, NULL ); + clEnqueueUnmapMemObject(mQueue, mImage, mMappedPtr, 0, NULL, NULL); } -cl_int MapImageAction::Setup( cl_device_id device, cl_context context, cl_command_queue queue ) +cl_int MapImageAction::Setup(cl_device_id device, cl_context context, + cl_command_queue queue) { cl_int error; - if( ( error = IGetPreferredImageSize2D( device, mWidth, mHeight ) ) ) + if ((error = IGetPreferredImageSize2D(device, mWidth, mHeight))) return error; cl_image_format format = { CL_RGBA, CL_SIGNED_INT8 }; - mImage = create_image_2d( context, CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR, &format, mWidth, mHeight, 0, NULL, &error ); - test_error( error, "Unable to create image to test against" ); + mImage = create_image_2d(context, CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR, + &format, mWidth, mHeight, 0, NULL, &error); + test_error(error, "Unable to create image to test against"); return CL_SUCCESS; } -cl_int MapImageAction::Execute( cl_command_queue queue, cl_uint numWaits, cl_event *waits, cl_event *outEvent ) +cl_int MapImageAction::Execute(cl_command_queue queue, cl_uint numWaits, + cl_event *waits, cl_event *outEvent) { cl_int error; - size_t origin[ 3 ] = { 0, 0, 0 }, region[ 3 ] = { mWidth, mHeight, 1 }; + size_t origin[3] = { 0, 0, 0 }, region[3] = { mWidth, mHeight, 1 }; size_t outPitch; mQueue = queue; - mMappedPtr = clEnqueueMapImage( queue, mImage, CL_FALSE, CL_MAP_READ, origin, region, &outPitch, NULL, numWaits, waits, outEvent, &error ); - test_error( error, "Unable to enqueue image map" ); + mMappedPtr = + clEnqueueMapImage(queue, mImage, CL_FALSE, CL_MAP_READ, origin, region, + &outPitch, NULL, numWaits, waits, outEvent, &error); + test_error(error, "Unable to enqueue image map"); return CL_SUCCESS; } diff --git a/test_conformance/events/action_classes.h b/test_conformance/events/action_classes.h index 069ed346..e528f11a 100644 --- a/test_conformance/events/action_classes.h +++ b/test_conformance/events/action_classes.h @@ -1,6 +1,6 @@ // // Copyright (c) 2017 The Khronos Group Inc. -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -23,303 +23,319 @@ // it would potentially be possible for an implementation to make actions // wait on one another based on their shared I/O, not because of their // wait lists! -class Action -{ - public: - Action() {} - virtual ~Action() {} - - virtual cl_int Setup( cl_device_id device, cl_context context, cl_command_queue queue ) = 0; - virtual cl_int Execute( cl_command_queue queue, cl_uint numWaits, cl_event *waits, cl_event *outEvent ) = 0; - - virtual const char * GetName( void ) const = 0; - - protected: - - cl_int IGetPreferredImageSize2D( cl_device_id device, size_t &outWidth, size_t &outHeight ); - cl_int IGetPreferredImageSize3D( cl_device_id device, size_t &outWidth, size_t &outHeight, size_t &outDepth ); +class Action { +public: + Action() {} + virtual ~Action() {} + + virtual cl_int Setup(cl_device_id device, cl_context context, + cl_command_queue queue) = 0; + virtual cl_int Execute(cl_command_queue queue, cl_uint numWaits, + cl_event *waits, cl_event *outEvent) = 0; + + virtual const char *GetName(void) const = 0; + +protected: + cl_int IGetPreferredImageSize2D(cl_device_id device, size_t &outWidth, + size_t &outHeight); + cl_int IGetPreferredImageSize3D(cl_device_id device, size_t &outWidth, + size_t &outHeight, size_t &outDepth); }; // Simple NDRangeKernel execution that takes a noticable amount of time -class NDRangeKernelAction : public Action -{ - public: - NDRangeKernelAction() {} - virtual ~NDRangeKernelAction() {} - - size_t mLocalThreads[ 1 ]; - clMemWrapper mStreams[ 2 ]; - clProgramWrapper mProgram; - clKernelWrapper mKernel; - - virtual cl_int Setup( cl_device_id device, cl_context context, cl_command_queue queue ); - virtual cl_int Execute( cl_command_queue queue, cl_uint numWaits, cl_event *waits, cl_event *outEvent ); - - virtual const char * GetName( void ) const { return "NDRangeKernel"; } +class NDRangeKernelAction : public Action { +public: + NDRangeKernelAction() {} + virtual ~NDRangeKernelAction() {} + + size_t mLocalThreads[1]; + clMemWrapper mStreams[2]; + clProgramWrapper mProgram; + clKernelWrapper mKernel; + + virtual cl_int Setup(cl_device_id device, cl_context context, + cl_command_queue queue); + virtual cl_int Execute(cl_command_queue queue, cl_uint numWaits, + cl_event *waits, cl_event *outEvent); + + virtual const char *GetName(void) const { return "NDRangeKernel"; } }; // Base action for buffer actions -class BufferAction : public Action -{ - public: - clMemWrapper mBuffer; - size_t mSize; - void *mOutBuffer; +class BufferAction : public Action { +public: + clMemWrapper mBuffer; + size_t mSize; + void *mOutBuffer; - BufferAction() { mOutBuffer = NULL; } - virtual ~BufferAction() { free( mOutBuffer ); } + BufferAction() { mOutBuffer = NULL; } + virtual ~BufferAction() { free(mOutBuffer); } - virtual cl_int Setup( cl_device_id device, cl_context context, cl_command_queue queue, bool allocate ); + virtual cl_int Setup(cl_device_id device, cl_context context, + cl_command_queue queue, bool allocate); }; -class ReadBufferAction : public BufferAction -{ - public: - ReadBufferAction() {} - virtual ~ReadBufferAction() {} +class ReadBufferAction : public BufferAction { +public: + ReadBufferAction() {} + virtual ~ReadBufferAction() {} - virtual cl_int Setup( cl_device_id device, cl_context context, cl_command_queue queue ); - virtual cl_int Execute( cl_command_queue queue, cl_uint numWaits, cl_event *waits, cl_event *outEvent ); + virtual cl_int Setup(cl_device_id device, cl_context context, + cl_command_queue queue); + virtual cl_int Execute(cl_command_queue queue, cl_uint numWaits, + cl_event *waits, cl_event *outEvent); - virtual const char * GetName( void ) const { return "ReadBuffer"; } + virtual const char *GetName(void) const { return "ReadBuffer"; } }; -class WriteBufferAction : public BufferAction -{ - public: - WriteBufferAction() {} - virtual ~WriteBufferAction() {} +class WriteBufferAction : public BufferAction { +public: + WriteBufferAction() {} + virtual ~WriteBufferAction() {} - virtual cl_int Setup( cl_device_id device, cl_context context, cl_command_queue queue ); - virtual cl_int Execute( cl_command_queue queue, cl_uint numWaits, cl_event *waits, cl_event *outEvent ); + virtual cl_int Setup(cl_device_id device, cl_context context, + cl_command_queue queue); + virtual cl_int Execute(cl_command_queue queue, cl_uint numWaits, + cl_event *waits, cl_event *outEvent); - virtual const char * GetName( void ) const { return "WriteBuffer"; } + virtual const char *GetName(void) const { return "WriteBuffer"; } }; -class MapBufferAction : public BufferAction -{ - public: - MapBufferAction() : mQueue(0) {} +class MapBufferAction : public BufferAction { +public: + MapBufferAction(): mQueue(0) {} - cl_command_queue mQueue; - void *mMappedPtr; + cl_command_queue mQueue; + void *mMappedPtr; - virtual ~MapBufferAction(); - virtual cl_int Setup( cl_device_id device, cl_context context, cl_command_queue queue ); - virtual cl_int Execute( cl_command_queue queue, cl_uint numWaits, cl_event *waits, cl_event *outEvent ); + virtual ~MapBufferAction(); + virtual cl_int Setup(cl_device_id device, cl_context context, + cl_command_queue queue); + virtual cl_int Execute(cl_command_queue queue, cl_uint numWaits, + cl_event *waits, cl_event *outEvent); - virtual const char * GetName( void ) const { return "MapBuffer"; } + virtual const char *GetName(void) const { return "MapBuffer"; } }; -class UnmapBufferAction : public BufferAction -{ - public: - UnmapBufferAction() {} - virtual ~UnmapBufferAction() {} +class UnmapBufferAction : public BufferAction { +public: + UnmapBufferAction() {} + virtual ~UnmapBufferAction() {} - void *mMappedPtr; + void *mMappedPtr; - virtual cl_int Setup( cl_device_id device, cl_context context, cl_command_queue queue ); - virtual cl_int Execute( cl_command_queue queue, cl_uint numWaits, cl_event *waits, cl_event *outEvent ); + virtual cl_int Setup(cl_device_id device, cl_context context, + cl_command_queue queue); + virtual cl_int Execute(cl_command_queue queue, cl_uint numWaits, + cl_event *waits, cl_event *outEvent); - virtual const char * GetName( void ) const { return "UnmapBuffer"; } + virtual const char *GetName(void) const { return "UnmapBuffer"; } }; -class ReadImage2DAction : public Action -{ - public: - ReadImage2DAction() { mOutput = NULL; } - virtual ~ReadImage2DAction() { free( mOutput ); } +class ReadImage2DAction : public Action { +public: + ReadImage2DAction() { mOutput = NULL; } + virtual ~ReadImage2DAction() { free(mOutput); } - clMemWrapper mImage; - size_t mWidth, mHeight; - void *mOutput; + clMemWrapper mImage; + size_t mWidth, mHeight; + void *mOutput; - virtual cl_int Setup( cl_device_id device, cl_context context, cl_command_queue queue ); - virtual cl_int Execute( cl_command_queue queue, cl_uint numWaits, cl_event *waits, cl_event *outEvent ); + virtual cl_int Setup(cl_device_id device, cl_context context, + cl_command_queue queue); + virtual cl_int Execute(cl_command_queue queue, cl_uint numWaits, + cl_event *waits, cl_event *outEvent); - virtual const char * GetName( void ) const { return "ReadImage2D"; } + virtual const char *GetName(void) const { return "ReadImage2D"; } }; -class ReadImage3DAction : public Action -{ - public: - ReadImage3DAction() { mOutput = NULL; } - virtual ~ReadImage3DAction() { free( mOutput ); } +class ReadImage3DAction : public Action { +public: + ReadImage3DAction() { mOutput = NULL; } + virtual ~ReadImage3DAction() { free(mOutput); } - clMemWrapper mImage; - size_t mWidth, mHeight, mDepth; - void *mOutput; + clMemWrapper mImage; + size_t mWidth, mHeight, mDepth; + void *mOutput; - virtual cl_int Setup( cl_device_id device, cl_context context, cl_command_queue queue ); - virtual cl_int Execute( cl_command_queue queue, cl_uint numWaits, cl_event *waits, cl_event *outEvent ); + virtual cl_int Setup(cl_device_id device, cl_context context, + cl_command_queue queue); + virtual cl_int Execute(cl_command_queue queue, cl_uint numWaits, + cl_event *waits, cl_event *outEvent); - virtual const char * GetName( void ) const { return "ReadImage3D"; } + virtual const char *GetName(void) const { return "ReadImage3D"; } }; -class WriteImage2DAction : public Action -{ - public: - clMemWrapper mImage; - size_t mWidth, mHeight; - void *mOutput; +class WriteImage2DAction : public Action { +public: + clMemWrapper mImage; + size_t mWidth, mHeight; + void *mOutput; - WriteImage2DAction() { mOutput = NULL; } - virtual ~WriteImage2DAction() { free( mOutput ); } + WriteImage2DAction() { mOutput = NULL; } + virtual ~WriteImage2DAction() { free(mOutput); } - virtual cl_int Setup( cl_device_id device, cl_context context, cl_command_queue queue ); - virtual cl_int Execute( cl_command_queue queue, cl_uint numWaits, cl_event *waits, cl_event *outEvent ); + virtual cl_int Setup(cl_device_id device, cl_context context, + cl_command_queue queue); + virtual cl_int Execute(cl_command_queue queue, cl_uint numWaits, + cl_event *waits, cl_event *outEvent); - virtual const char * GetName( void ) const { return "WriteImage2D"; } + virtual const char *GetName(void) const { return "WriteImage2D"; } }; -class WriteImage3DAction : public Action -{ - public: - clMemWrapper mImage; - size_t mWidth, mHeight, mDepth; - void *mOutput; +class WriteImage3DAction : public Action { +public: + clMemWrapper mImage; + size_t mWidth, mHeight, mDepth; + void *mOutput; - WriteImage3DAction() { mOutput = NULL; } - virtual ~WriteImage3DAction() { free( mOutput ); } + WriteImage3DAction() { mOutput = NULL; } + virtual ~WriteImage3DAction() { free(mOutput); } - virtual cl_int Setup( cl_device_id device, cl_context context, cl_command_queue queue ); - virtual cl_int Execute( cl_command_queue queue, cl_uint numWaits, cl_event *waits, cl_event *outEvent ); + virtual cl_int Setup(cl_device_id device, cl_context context, + cl_command_queue queue); + virtual cl_int Execute(cl_command_queue queue, cl_uint numWaits, + cl_event *waits, cl_event *outEvent); - virtual const char * GetName( void ) const { return "WriteImage3D"; } + virtual const char *GetName(void) const { return "WriteImage3D"; } }; -class CopyImageAction : public Action -{ - public: - CopyImageAction() {} - virtual ~CopyImageAction() {} +class CopyImageAction : public Action { +public: + CopyImageAction() {} + virtual ~CopyImageAction() {} - clMemWrapper mSrcImage, mDstImage; - size_t mWidth, mHeight, mDepth; + clMemWrapper mSrcImage, mDstImage; + size_t mWidth, mHeight, mDepth; - virtual cl_int Execute( cl_command_queue queue, cl_uint numWaits, cl_event *waits, cl_event *outEvent ); + virtual cl_int Execute(cl_command_queue queue, cl_uint numWaits, + cl_event *waits, cl_event *outEvent); }; -class CopyImage2Dto2DAction : public CopyImageAction -{ - public: - CopyImage2Dto2DAction() {} - virtual ~CopyImage2Dto2DAction() {} +class CopyImage2Dto2DAction : public CopyImageAction { +public: + CopyImage2Dto2DAction() {} + virtual ~CopyImage2Dto2DAction() {} - virtual cl_int Setup( cl_device_id device, cl_context context, cl_command_queue queue ); + virtual cl_int Setup(cl_device_id device, cl_context context, + cl_command_queue queue); - virtual const char * GetName( void ) const { return "CopyImage2Dto2D"; } + virtual const char *GetName(void) const { return "CopyImage2Dto2D"; } }; -class CopyImage2Dto3DAction : public CopyImageAction -{ - public: - CopyImage2Dto3DAction() {} - virtual ~CopyImage2Dto3DAction() {} +class CopyImage2Dto3DAction : public CopyImageAction { +public: + CopyImage2Dto3DAction() {} + virtual ~CopyImage2Dto3DAction() {} - virtual cl_int Setup( cl_device_id device, cl_context context, cl_command_queue queue ); + virtual cl_int Setup(cl_device_id device, cl_context context, + cl_command_queue queue); - virtual const char * GetName( void ) const { return "CopyImage2Dto3D"; } + virtual const char *GetName(void) const { return "CopyImage2Dto3D"; } }; -class CopyImage3Dto2DAction : public CopyImageAction -{ - public: - CopyImage3Dto2DAction() {} - virtual ~CopyImage3Dto2DAction() {} +class CopyImage3Dto2DAction : public CopyImageAction { +public: + CopyImage3Dto2DAction() {} + virtual ~CopyImage3Dto2DAction() {} - virtual cl_int Setup( cl_device_id device, cl_context context, cl_command_queue queue ); + virtual cl_int Setup(cl_device_id device, cl_context context, + cl_command_queue queue); - virtual const char * GetName( void ) const { return "CopyImage3Dto2D"; } + virtual const char *GetName(void) const { return "CopyImage3Dto2D"; } }; -class CopyImage3Dto3DAction : public CopyImageAction -{ - public: - CopyImage3Dto3DAction() {} - virtual ~CopyImage3Dto3DAction() {} +class CopyImage3Dto3DAction : public CopyImageAction { +public: + CopyImage3Dto3DAction() {} + virtual ~CopyImage3Dto3DAction() {} - virtual cl_int Setup( cl_device_id device, cl_context context, cl_command_queue queue ); + virtual cl_int Setup(cl_device_id device, cl_context context, + cl_command_queue queue); - virtual const char * GetName( void ) const { return "CopyImage3Dto3D"; } + virtual const char *GetName(void) const { return "CopyImage3Dto3D"; } }; -class Copy2DImageToBufferAction : public Action -{ - public: - Copy2DImageToBufferAction() {} - virtual ~Copy2DImageToBufferAction() {} +class Copy2DImageToBufferAction : public Action { +public: + Copy2DImageToBufferAction() {} + virtual ~Copy2DImageToBufferAction() {} - clMemWrapper mSrcImage, mDstBuffer; - size_t mWidth, mHeight; + clMemWrapper mSrcImage, mDstBuffer; + size_t mWidth, mHeight; - virtual cl_int Setup( cl_device_id device, cl_context context, cl_command_queue queue ); - virtual cl_int Execute( cl_command_queue queue, cl_uint numWaits, cl_event *waits, cl_event *outEvent ); + virtual cl_int Setup(cl_device_id device, cl_context context, + cl_command_queue queue); + virtual cl_int Execute(cl_command_queue queue, cl_uint numWaits, + cl_event *waits, cl_event *outEvent); - virtual const char * GetName( void ) const { return "Copy2DImageToBuffer"; } + virtual const char *GetName(void) const { return "Copy2DImageToBuffer"; } }; -class Copy3DImageToBufferAction : public Action -{ - public: - Copy3DImageToBufferAction() {} - virtual ~Copy3DImageToBufferAction() {} +class Copy3DImageToBufferAction : public Action { +public: + Copy3DImageToBufferAction() {} + virtual ~Copy3DImageToBufferAction() {} - clMemWrapper mSrcImage, mDstBuffer; - size_t mWidth, mHeight, mDepth; + clMemWrapper mSrcImage, mDstBuffer; + size_t mWidth, mHeight, mDepth; - virtual cl_int Setup( cl_device_id device, cl_context context, cl_command_queue queue ); - virtual cl_int Execute( cl_command_queue queue, cl_uint numWaits, cl_event *waits, cl_event *outEvent ); + virtual cl_int Setup(cl_device_id device, cl_context context, + cl_command_queue queue); + virtual cl_int Execute(cl_command_queue queue, cl_uint numWaits, + cl_event *waits, cl_event *outEvent); - virtual const char * GetName( void ) const { return "Copy3DImageToBuffer"; } + virtual const char *GetName(void) const { return "Copy3DImageToBuffer"; } }; -class CopyBufferTo2DImageAction : public Action -{ - public: - CopyBufferTo2DImageAction() {} - virtual ~CopyBufferTo2DImageAction() {} +class CopyBufferTo2DImageAction : public Action { +public: + CopyBufferTo2DImageAction() {} + virtual ~CopyBufferTo2DImageAction() {} - clMemWrapper mSrcBuffer, mDstImage; - size_t mWidth, mHeight; + clMemWrapper mSrcBuffer, mDstImage; + size_t mWidth, mHeight; - virtual cl_int Setup( cl_device_id device, cl_context context, cl_command_queue queue ); - virtual cl_int Execute( cl_command_queue queue, cl_uint numWaits, cl_event *waits, cl_event *outEvent ); + virtual cl_int Setup(cl_device_id device, cl_context context, + cl_command_queue queue); + virtual cl_int Execute(cl_command_queue queue, cl_uint numWaits, + cl_event *waits, cl_event *outEvent); - virtual const char * GetName( void ) const { return "CopyBufferTo2D"; } + virtual const char *GetName(void) const { return "CopyBufferTo2D"; } }; -class CopyBufferTo3DImageAction : public Action -{ - public: - CopyBufferTo3DImageAction() {} - virtual ~CopyBufferTo3DImageAction() {} +class CopyBufferTo3DImageAction : public Action { +public: + CopyBufferTo3DImageAction() {} + virtual ~CopyBufferTo3DImageAction() {} - clMemWrapper mSrcBuffer, mDstImage; - size_t mWidth, mHeight, mDepth; + clMemWrapper mSrcBuffer, mDstImage; + size_t mWidth, mHeight, mDepth; - virtual cl_int Setup( cl_device_id device, cl_context context, cl_command_queue queue ); - virtual cl_int Execute( cl_command_queue queue, cl_uint numWaits, cl_event *waits, cl_event *outEvent ); + virtual cl_int Setup(cl_device_id device, cl_context context, + cl_command_queue queue); + virtual cl_int Execute(cl_command_queue queue, cl_uint numWaits, + cl_event *waits, cl_event *outEvent); - virtual const char * GetName( void ) const { return "CopyBufferTo3D"; } + virtual const char *GetName(void) const { return "CopyBufferTo3D"; } }; -class MapImageAction : public Action -{ - public: - MapImageAction() : mQueue(0) {} +class MapImageAction : public Action { +public: + MapImageAction(): mQueue(0) {} - clMemWrapper mImage; - size_t mWidth, mHeight; - void *mMappedPtr; - cl_command_queue mQueue; + clMemWrapper mImage; + size_t mWidth, mHeight; + void *mMappedPtr; + cl_command_queue mQueue; - virtual ~MapImageAction(); - virtual cl_int Setup( cl_device_id device, cl_context context, cl_command_queue queue ); - virtual cl_int Execute( cl_command_queue queue, cl_uint numWaits, cl_event *waits, cl_event *outEvent ); + virtual ~MapImageAction(); + virtual cl_int Setup(cl_device_id device, cl_context context, + cl_command_queue queue); + virtual cl_int Execute(cl_command_queue queue, cl_uint numWaits, + cl_event *waits, cl_event *outEvent); - virtual const char * GetName( void ) const { return "MapImage"; } + virtual const char *GetName(void) const { return "MapImage"; } }; diff --git a/test_conformance/events/main.cpp b/test_conformance/events/main.cpp index 777d2d36..74682f99 100644 --- a/test_conformance/events/main.cpp +++ b/test_conformance/events/main.cpp @@ -1,6 +1,6 @@ // // Copyright (c) 2017 The Khronos Group Inc. -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -24,44 +24,44 @@ #endif test_definition test_list[] = { - ADD_TEST( event_get_execute_status ), - ADD_TEST( event_get_write_array_status ), - ADD_TEST( event_get_read_array_status ), - ADD_TEST( event_get_info ), - ADD_TEST( event_wait_for_execute ), - ADD_TEST( event_wait_for_array ), - ADD_TEST( event_flush ), - ADD_TEST( event_finish_execute ), - ADD_TEST( event_finish_array ), - ADD_TEST( event_release_before_done ), - ADD_TEST( event_enqueue_marker ), + ADD_TEST(event_get_execute_status), + ADD_TEST(event_get_write_array_status), + ADD_TEST(event_get_read_array_status), + ADD_TEST(event_get_info), + ADD_TEST(event_wait_for_execute), + ADD_TEST(event_wait_for_array), + ADD_TEST(event_flush), + ADD_TEST(event_finish_execute), + ADD_TEST(event_finish_array), + ADD_TEST(event_release_before_done), + ADD_TEST(event_enqueue_marker), #ifdef CL_VERSION_1_2 - ADD_TEST( event_enqueue_marker_with_event_list ), - ADD_TEST( event_enqueue_barrier_with_event_list ), + ADD_TEST(event_enqueue_marker_with_event_list), + ADD_TEST(event_enqueue_barrier_with_event_list), #endif - ADD_TEST( out_of_order_event_waitlist_single_queue ), - ADD_TEST( out_of_order_event_waitlist_multi_queue ), - ADD_TEST( out_of_order_event_waitlist_multi_queue_multi_device ), - ADD_TEST( out_of_order_event_enqueue_wait_for_events_single_queue ), - ADD_TEST( out_of_order_event_enqueue_wait_for_events_multi_queue ), - ADD_TEST( out_of_order_event_enqueue_wait_for_events_multi_queue_multi_device ), - ADD_TEST( out_of_order_event_enqueue_marker_single_queue ), - ADD_TEST( out_of_order_event_enqueue_marker_multi_queue ), - ADD_TEST( out_of_order_event_enqueue_marker_multi_queue_multi_device ), - ADD_TEST( out_of_order_event_enqueue_barrier_single_queue ), + ADD_TEST(out_of_order_event_waitlist_single_queue), + ADD_TEST(out_of_order_event_waitlist_multi_queue), + ADD_TEST(out_of_order_event_waitlist_multi_queue_multi_device), + ADD_TEST(out_of_order_event_enqueue_wait_for_events_single_queue), + ADD_TEST(out_of_order_event_enqueue_wait_for_events_multi_queue), + ADD_TEST( + out_of_order_event_enqueue_wait_for_events_multi_queue_multi_device), + ADD_TEST(out_of_order_event_enqueue_marker_single_queue), + ADD_TEST(out_of_order_event_enqueue_marker_multi_queue), + ADD_TEST(out_of_order_event_enqueue_marker_multi_queue_multi_device), + ADD_TEST(out_of_order_event_enqueue_barrier_single_queue), - ADD_TEST( waitlists ), - ADD_TEST( userevents ), - ADD_TEST( callbacks ), - ADD_TEST( callbacks_simultaneous ), - ADD_TEST( userevents_multithreaded ), + ADD_TEST(waitlists), + ADD_TEST(userevents), + ADD_TEST(callbacks), + ADD_TEST(callbacks_simultaneous), + ADD_TEST(userevents_multithreaded), }; -const int test_num = ARRAY_SIZE( test_list ); +const int test_num = ARRAY_SIZE(test_list); int main(int argc, const char *argv[]) { return runTestHarness(argc, argv, test_num, test_list, false, 0); } - diff --git a/test_conformance/events/procs.h b/test_conformance/events/procs.h index f077c247..97309db3 100644 --- a/test_conformance/events/procs.h +++ b/test_conformance/events/procs.h @@ -1,6 +1,6 @@ // // Copyright (c) 2017 The Khronos Group Inc. -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -18,44 +18,101 @@ #include "harness/typeWrappers.h" #include "harness/clImageHelper.h" -extern float random_float(float low, float high); -extern float calculate_ulperror(float a, float b); - - -extern int test_event_get_execute_status(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); -extern int test_event_get_write_array_status(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); -extern int test_event_get_read_array_status(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); -extern int test_event_get_info( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); -extern int test_event_wait_for_execute(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); -extern int test_event_wait_for_array(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); -extern int test_event_flush(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); -extern int test_event_finish_execute(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); -extern int test_event_finish_array(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); -extern int test_event_release_before_done(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); -extern int test_event_enqueue_marker(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); -#ifdef CL_VERSION_1_2 -extern int test_event_enqueue_marker_with_event_list(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); -extern int test_event_enqueue_barrier_with_event_list(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); -#endif +extern float random_float(float low, float high); +extern float calculate_ulperror(float a, float b); -extern int test_out_of_order_event_waitlist_single_queue(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); -extern int test_out_of_order_event_waitlist_multi_queue( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); -extern int test_out_of_order_event_waitlist_multi_queue_multi_device(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); -extern int test_out_of_order_event_enqueue_wait_for_events_single_queue(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); -extern int test_out_of_order_event_enqueue_wait_for_events_multi_queue( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); -extern int test_out_of_order_event_enqueue_wait_for_events_multi_queue_multi_device(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); +extern int test_event_get_execute_status(cl_device_id deviceID, + cl_context context, + cl_command_queue queue, + int num_elements); +extern int test_event_get_write_array_status(cl_device_id deviceID, + cl_context context, + cl_command_queue queue, + int num_elements); +extern int test_event_get_read_array_status(cl_device_id deviceID, + cl_context context, + cl_command_queue queue, + int num_elements); +extern int test_event_get_info(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements); +extern int test_event_wait_for_execute(cl_device_id deviceID, + cl_context context, + cl_command_queue queue, + int num_elements); +extern int test_event_wait_for_array(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements); +extern int test_event_flush(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements); +extern int test_event_finish_execute(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements); +extern int test_event_finish_array(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements); +extern int test_event_release_before_done(cl_device_id deviceID, + cl_context context, + cl_command_queue queue, + int num_elements); +extern int test_event_enqueue_marker(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements); +#ifdef CL_VERSION_1_2 +extern int test_event_enqueue_marker_with_event_list(cl_device_id deviceID, + cl_context context, + cl_command_queue queue, + int num_elements); +extern int test_event_enqueue_barrier_with_event_list(cl_device_id deviceID, + cl_context context, + cl_command_queue queue, + int num_elements); +#endif -extern int test_out_of_order_event_enqueue_barrier_single_queue(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); +extern int test_out_of_order_event_waitlist_single_queue(cl_device_id deviceID, + cl_context context, + cl_command_queue queue, + int num_elements); +extern int test_out_of_order_event_waitlist_multi_queue(cl_device_id deviceID, + cl_context context, + cl_command_queue queue, + int num_elements); +extern int test_out_of_order_event_waitlist_multi_queue_multi_device( + cl_device_id deviceID, cl_context context, cl_command_queue queue, + int num_elements); -extern int test_out_of_order_event_enqueue_marker_single_queue(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); -extern int test_out_of_order_event_enqueue_marker_multi_queue( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); -extern int test_out_of_order_event_enqueue_marker_multi_queue_multi_device(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); +extern int test_out_of_order_event_enqueue_wait_for_events_single_queue( + cl_device_id deviceID, cl_context context, cl_command_queue queue, + int num_elements); +extern int test_out_of_order_event_enqueue_wait_for_events_multi_queue( + cl_device_id deviceID, cl_context context, cl_command_queue queue, + int num_elements); +extern int +test_out_of_order_event_enqueue_wait_for_events_multi_queue_multi_device( + cl_device_id deviceID, cl_context context, cl_command_queue queue, + int num_elements); -extern int test_waitlists( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ); -extern int test_userevents( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ); -extern int test_callbacks( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ); -extern int test_callbacks_simultaneous( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ); -extern int test_userevents_multithreaded( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ); +extern int test_out_of_order_event_enqueue_barrier_single_queue( + cl_device_id deviceID, cl_context context, cl_command_queue queue, + int num_elements); +extern int test_out_of_order_event_enqueue_marker_single_queue( + cl_device_id deviceID, cl_context context, cl_command_queue queue, + int num_elements); +extern int test_out_of_order_event_enqueue_marker_multi_queue( + cl_device_id deviceID, cl_context context, cl_command_queue queue, + int num_elements); +extern int test_out_of_order_event_enqueue_marker_multi_queue_multi_device( + cl_device_id deviceID, cl_context context, cl_command_queue queue, + int num_elements); +extern int test_waitlists(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements); +extern int test_userevents(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements); +extern int test_callbacks(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements); +extern int test_callbacks_simultaneous(cl_device_id deviceID, + cl_context context, + cl_command_queue queue, + int num_elements); +extern int test_userevents_multithreaded(cl_device_id deviceID, + cl_context context, + cl_command_queue queue, + int num_elements); diff --git a/test_conformance/events/testBase.h b/test_conformance/events/testBase.h index 5b49bfd7..63086d7e 100644 --- a/test_conformance/events/testBase.h +++ b/test_conformance/events/testBase.h @@ -1,6 +1,6 @@ // // Copyright (c) 2017 The Khronos Group Inc. -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -26,6 +26,3 @@ #include "procs.h" #endif // _testBase_h - - - diff --git a/test_conformance/events/test_callbacks.cpp b/test_conformance/events/test_callbacks.cpp index 47e898b9..911298a5 100644 --- a/test_conformance/events/test_callbacks.cpp +++ b/test_conformance/events/test_callbacks.cpp @@ -1,6 +1,6 @@ // // Copyright (c) 2017 The Khronos Group Inc. -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -18,28 +18,34 @@ #include "harness/conversions.h" #include "harness/ThreadPool.h" -#if !defined (_MSC_VER) +#if !defined(_MSC_VER) #include #endif // !_MSC_VER -extern const char *IGetStatusString( cl_int status ); +extern const char *IGetStatusString(cl_int status); #define PRINT_OPS 0 -// Yes, this is somewhat nasty, in that we're relying on the CPU (the real CPU, not the OpenCL device) -// to be atomic w.r.t. boolean values. Although if it isn't, we'll just miss the check on this bool -// until the next time around, so it's not that big of a deal. Ideally, we'd be using a semaphore with -// a trywait on it, but then that introduces the fun issue of what to do on Win32, etc. This way is -// far more portable, and worst case of failure is a slightly longer test run. +// Yes, this is somewhat nasty, in that we're relying on the CPU (the real CPU, +// not the OpenCL device) to be atomic w.r.t. boolean values. Although if it +// isn't, we'll just miss the check on this bool until the next time around, so +// it's not that big of a deal. Ideally, we'd be using a semaphore with a +// trywait on it, but then that introduces the fun issue of what to do on Win32, +// etc. This way is far more portable, and worst case of failure is a slightly +// longer test run. static bool sCallbackTriggered = false; #define EVENT_CALLBACK_TYPE_TOTAL 3 -static bool sCallbackTriggered_flag[ EVENT_CALLBACK_TYPE_TOTAL ] ={ false,false, false }; -cl_int event_callback_types[EVENT_CALLBACK_TYPE_TOTAL] ={ CL_SUBMITTED, CL_RUNNING, CL_COMPLETE}; +static bool sCallbackTriggered_flag[EVENT_CALLBACK_TYPE_TOTAL] = { false, false, + false }; +cl_int event_callback_types[EVENT_CALLBACK_TYPE_TOTAL] = { CL_SUBMITTED, + CL_RUNNING, + CL_COMPLETE }; // Our callback function -/*void CL_CALLBACK single_event_callback_function( cl_event event, cl_int commandStatus, void * userData ) +/*void CL_CALLBACK single_event_callback_function( cl_event event, cl_int +commandStatus, void * userData ) { int i=*static_cast(userData); log_info( "\tEvent callback %d triggered\n", i); @@ -47,67 +53,79 @@ cl_int event_callback_types[EVENT_CALLBACK_TYPE_TOTAL] ={ CL_SUBMITTED, CL_RUNNI }*/ /* use struct as call back para */ -typedef struct { cl_int enevt_type; int index; } CALL_BACK_USER_DATA; +typedef struct +{ + cl_int enevt_type; + int index; +} CALL_BACK_USER_DATA; -void CL_CALLBACK single_event_callback_function_flags( cl_event event, cl_int commandStatus, void * userData ) +void CL_CALLBACK single_event_callback_function_flags(cl_event event, + cl_int commandStatus, + void *userData) { - // int i=*static_cast(userData); - CALL_BACK_USER_DATA *pdata= static_cast(userData); + // int i=*static_cast(userData); + CALL_BACK_USER_DATA *pdata = static_cast(userData); - log_info( "\tEvent callback %d of type %d triggered\n", pdata->index, pdata->enevt_type); - sCallbackTriggered_flag [pdata->index ] = true; + log_info("\tEvent callback %d of type %d triggered\n", pdata->index, + pdata->enevt_type); + sCallbackTriggered_flag[pdata->index] = true; } -int test_callback_event_single( cl_device_id device, cl_context context, cl_command_queue queue, Action *actionToTest ) +int test_callback_event_single(cl_device_id device, cl_context context, + cl_command_queue queue, Action *actionToTest) { - // Note: we don't use the waiting feature here. We just want to verify that we get a callback called - // when the given event finishes + // Note: we don't use the waiting feature here. We just want to verify that + // we get a callback called when the given event finishes - cl_int error = actionToTest->Setup( device, context, queue ); - test_error( error, "Unable to set up test action" ); + cl_int error = actionToTest->Setup(device, context, queue); + test_error(error, "Unable to set up test action"); // Set up a user event, which we use as a gate for the second event - clEventWrapper gateEvent = clCreateUserEvent( context, &error ); - test_error( error, "Unable to set up user gate event" ); + clEventWrapper gateEvent = clCreateUserEvent(context, &error); + test_error(error, "Unable to set up user gate event"); // Set up the execution of the action with its actual event clEventWrapper actualEvent; - error = actionToTest->Execute( queue, 1, &gateEvent, &actualEvent ); - test_error( error, "Unable to set up action execution" ); + error = actionToTest->Execute(queue, 1, &gateEvent, &actualEvent); + test_error(error, "Unable to set up action execution"); // Set up the callback on the actual event - /* use struct as call back para */ - CALL_BACK_USER_DATA user_data[EVENT_CALLBACK_TYPE_TOTAL]; - for( int i=0;i< EVENT_CALLBACK_TYPE_TOTAL; i++) - { - user_data[i].enevt_type=event_callback_types[i]; - user_data[i].index =i; - error = clSetEventCallback( actualEvent, event_callback_types[i], single_event_callback_function_flags, user_data+i ); - - } + /* use struct as call back para */ + CALL_BACK_USER_DATA user_data[EVENT_CALLBACK_TYPE_TOTAL]; + for (int i = 0; i < EVENT_CALLBACK_TYPE_TOTAL; i++) + { + user_data[i].enevt_type = event_callback_types[i]; + user_data[i].index = i; + error = clSetEventCallback(actualEvent, event_callback_types[i], + single_event_callback_function_flags, + user_data + i); + } // Now release the user event, which will allow our actual action to run - error = clSetUserEventStatus( gateEvent, CL_COMPLETE ); - test_error( error, "Unable to trigger gate event" ); + error = clSetUserEventStatus(gateEvent, CL_COMPLETE); + test_error(error, "Unable to trigger gate event"); - // Now we wait for completion. Note that we can actually wait on the event itself, at least at first - error = clWaitForEvents( 1, &actualEvent ); - test_error( error, "Unable to wait for actual test event" ); + // Now we wait for completion. Note that we can actually wait on the event + // itself, at least at first + error = clWaitForEvents(1, &actualEvent); + test_error(error, "Unable to wait for actual test event"); - // Note: we can check our callback now, and it MIGHT have been triggered, but that's not guaranteed - if( sCallbackTriggered ) + // Note: we can check our callback now, and it MIGHT have been triggered, + // but that's not guaranteed + if (sCallbackTriggered) { // We're all good, so return success return 0; } - // The callback has not yet been called, but that doesn't mean it won't be. So wait for it - log_info( "\tWaiting for callback..." ); - fflush( stdout ); - for( int i = 0; i < 10 * 10; i++ ) + // The callback has not yet been called, but that doesn't mean it won't be. + // So wait for it + log_info("\tWaiting for callback..."); + fflush(stdout); + for (int i = 0; i < 10 * 10; i++) { - usleep( 100000 ); // 1/10th second + usleep(100000); // 1/10th second int cc = 0; for (int k = 0; k < EVENT_CALLBACK_TYPE_TOTAL; k++) @@ -116,206 +134,222 @@ int test_callback_event_single( cl_device_id device, cl_context context, cl_comm cc++; } - if (cc== EVENT_CALLBACK_TYPE_TOTAL ) + if (cc == EVENT_CALLBACK_TYPE_TOTAL) { - log_info( "\n" ); + log_info("\n"); return 0; } - log_info( "." ); - fflush( stdout ); + log_info("."); + fflush(stdout); } // If we got here, we never got the callback - log_error( "\nCallback not called within 10 seconds! (assuming failure)\n" ); + log_error("\nCallback not called within 10 seconds! (assuming failure)\n"); return -1; } -#define TEST_ACTION( name ) \ -{ \ - name##Action action; \ - log_info( "-- Testing " #name "...\n" ); \ - if( ( error = test_callback_event_single( deviceID, context, queue, &action ) ) != CL_SUCCESS ) \ - retVal++; \ - clFinish( queue ); \ -} +#define TEST_ACTION(name) \ + { \ + name##Action action; \ + log_info("-- Testing " #name "...\n"); \ + if ((error = test_callback_event_single(deviceID, context, queue, \ + &action)) \ + != CL_SUCCESS) \ + retVal++; \ + clFinish(queue); \ + } -int test_callbacks( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) +int test_callbacks(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { cl_int error; int retVal = 0; - log_info( "\n" ); + log_info("\n"); - TEST_ACTION( NDRangeKernel ) + TEST_ACTION(NDRangeKernel) - TEST_ACTION( ReadBuffer ) - TEST_ACTION( WriteBuffer ) - TEST_ACTION( MapBuffer ) - TEST_ACTION( UnmapBuffer ) + TEST_ACTION(ReadBuffer) + TEST_ACTION(WriteBuffer) + TEST_ACTION(MapBuffer) + TEST_ACTION(UnmapBuffer) - if( checkForImageSupport( deviceID ) == CL_IMAGE_FORMAT_NOT_SUPPORTED ) + if (checkForImageSupport(deviceID) == CL_IMAGE_FORMAT_NOT_SUPPORTED) { - log_info( "\nNote: device does not support images. Skipping remainder of callback tests...\n" ); + log_info("\nNote: device does not support images. Skipping remainder " + "of callback tests...\n"); } else { - TEST_ACTION( ReadImage2D ) - TEST_ACTION( WriteImage2D ) - TEST_ACTION( CopyImage2Dto2D ) - TEST_ACTION( Copy2DImageToBuffer ) - TEST_ACTION( CopyBufferTo2DImage ) - TEST_ACTION( MapImage ) - - if( checkFor3DImageSupport( deviceID ) == CL_IMAGE_FORMAT_NOT_SUPPORTED ) - log_info( "\nNote: device does not support 3D images. Skipping remainder of waitlist tests...\n" ); + TEST_ACTION(ReadImage2D) + TEST_ACTION(WriteImage2D) + TEST_ACTION(CopyImage2Dto2D) + TEST_ACTION(Copy2DImageToBuffer) + TEST_ACTION(CopyBufferTo2DImage) + TEST_ACTION(MapImage) + + if (checkFor3DImageSupport(deviceID) == CL_IMAGE_FORMAT_NOT_SUPPORTED) + log_info("\nNote: device does not support 3D images. Skipping " + "remainder of waitlist tests...\n"); else { - TEST_ACTION( ReadImage3D ) - TEST_ACTION( WriteImage3D ) - TEST_ACTION( CopyImage2Dto3D ) - TEST_ACTION( CopyImage3Dto2D ) - TEST_ACTION( CopyImage3Dto3D ) - TEST_ACTION( Copy3DImageToBuffer ) - TEST_ACTION( CopyBufferTo3DImage ) + TEST_ACTION(ReadImage3D) + TEST_ACTION(WriteImage3D) + TEST_ACTION(CopyImage2Dto3D) + TEST_ACTION(CopyImage3Dto2D) + TEST_ACTION(CopyImage3Dto3D) + TEST_ACTION(Copy3DImageToBuffer) + TEST_ACTION(CopyBufferTo3DImage) } } return retVal; } -#define SIMUTANEOUS_ACTION_TOTAL 18 -static bool sSimultaneousFlags[ 54 ];// for 18 actions with 3 callback status +#define SIMUTANEOUS_ACTION_TOTAL 18 +static bool sSimultaneousFlags[54]; // for 18 actions with 3 callback status static volatile int sSimultaneousCount; -Action * actions[ 19 ] = { 0 }; +Action *actions[19] = { 0 }; // Callback for the simultaneous tests -void CL_CALLBACK simultaneous_event_callback_function( cl_event event, cl_int commandStatus, void * userData ) +void CL_CALLBACK simultaneous_event_callback_function(cl_event event, + cl_int commandStatus, + void *userData) { int eventIndex = (int)(size_t)userData; - int actionIndex = eventIndex/EVENT_CALLBACK_TYPE_TOTAL; - int statusIndex = eventIndex%EVENT_CALLBACK_TYPE_TOTAL; - log_info( "\tEvent callback triggered for action %s callback type %s \n", actions[actionIndex]->GetName(), IGetStatusString(statusIndex) ); - sSimultaneousFlags[ actionIndex ] = true; - ThreadPool_AtomicAdd(&sSimultaneousCount,1); + int actionIndex = eventIndex / EVENT_CALLBACK_TYPE_TOTAL; + int statusIndex = eventIndex % EVENT_CALLBACK_TYPE_TOTAL; + log_info("\tEvent callback triggered for action %s callback type %s \n", + actions[actionIndex]->GetName(), IGetStatusString(statusIndex)); + sSimultaneousFlags[actionIndex] = true; + ThreadPool_AtomicAdd(&sSimultaneousCount, 1); } -int test_callbacks_simultaneous( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) +int test_callbacks_simultaneous(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { cl_int error; - // Unlike the singles test, in this one, we run a bunch of events all at once, to verify that - // the callbacks do get called once-and-only-once for each event, even if the run out of order or - // are dependent on each other + // Unlike the singles test, in this one, we run a bunch of events all at + // once, to verify that the callbacks do get called once-and-only-once for + // each event, even if the run out of order or are dependent on each other // First, the list of actions to run int actionCount = 0, index = 0; - actions[ index++ ] = new NDRangeKernelAction(); - actions[ index++ ] = new ReadBufferAction(); - actions[ index++ ] = new WriteBufferAction(); - actions[ index++ ] = new MapBufferAction(); - actions[ index++ ] = new UnmapBufferAction(); + actions[index++] = new NDRangeKernelAction(); + actions[index++] = new ReadBufferAction(); + actions[index++] = new WriteBufferAction(); + actions[index++] = new MapBufferAction(); + actions[index++] = new UnmapBufferAction(); - if( checkForImageSupport( deviceID ) != CL_IMAGE_FORMAT_NOT_SUPPORTED ) + if (checkForImageSupport(deviceID) != CL_IMAGE_FORMAT_NOT_SUPPORTED) { - actions[ index++ ] = new ReadImage2DAction(); - actions[ index++ ] = new WriteImage2DAction(); - actions[ index++ ] = new CopyImage2Dto2DAction(); - actions[ index++ ] = new Copy2DImageToBufferAction(); - actions[ index++ ] = new CopyBufferTo2DImageAction(); - actions[ index++ ] = new MapImageAction(); - - if( checkFor3DImageSupport( deviceID ) != CL_IMAGE_FORMAT_NOT_SUPPORTED ) + actions[index++] = new ReadImage2DAction(); + actions[index++] = new WriteImage2DAction(); + actions[index++] = new CopyImage2Dto2DAction(); + actions[index++] = new Copy2DImageToBufferAction(); + actions[index++] = new CopyBufferTo2DImageAction(); + actions[index++] = new MapImageAction(); + + if (checkFor3DImageSupport(deviceID) != CL_IMAGE_FORMAT_NOT_SUPPORTED) { - actions[ index++ ] = new ReadImage3DAction(); - actions[ index++ ] = new WriteImage3DAction(); - actions[ index++ ] = new CopyImage2Dto3DAction(); - actions[ index++ ] = new CopyImage3Dto2DAction(); - actions[ index++ ] = new CopyImage3Dto3DAction(); - actions[ index++ ] = new Copy3DImageToBufferAction(); - actions[ index++ ] = new CopyBufferTo3DImageAction(); + actions[index++] = new ReadImage3DAction(); + actions[index++] = new WriteImage3DAction(); + actions[index++] = new CopyImage2Dto3DAction(); + actions[index++] = new CopyImage3Dto2DAction(); + actions[index++] = new CopyImage3Dto3DAction(); + actions[index++] = new Copy3DImageToBufferAction(); + actions[index++] = new CopyBufferTo3DImageAction(); } } actionCount = index; - actions[ index++ ] = NULL; + actions[index++] = NULL; // Now set them all up - log_info( "\tSetting up test events...\n" ); - for( index = 0; actions[ index ] != NULL; index++ ) + log_info("\tSetting up test events...\n"); + for (index = 0; actions[index] != NULL; index++) { - error = actions[ index ]->Setup( deviceID, context, queue ); - test_error( error, "Unable to set up test action" ); - sSimultaneousFlags[ index ] = false; + error = actions[index]->Setup(deviceID, context, queue); + test_error(error, "Unable to set up test action"); + sSimultaneousFlags[index] = false; } sSimultaneousCount = 0; // Set up the user event to start them all - clEventWrapper gateEvent = clCreateUserEvent( context, &error ); - test_error( error, "Unable to set up user gate event" ); + clEventWrapper gateEvent = clCreateUserEvent(context, &error); + test_error(error, "Unable to set up user gate event"); // Start executing, all tied to the gate event - //clEventWrapper actionEvents[ 18 ];// current actionCount is 18 - clEventWrapper *actionEvents= new clEventWrapper[actionCount]; + // clEventWrapper actionEvents[ 18 ];// current actionCount is 18 + clEventWrapper *actionEvents = new clEventWrapper[actionCount]; if (actionEvents == NULL) { log_error(" memory error in test_callbacks_simultaneous \n"); for (size_t i = 0; i < (sizeof(actions) / sizeof(actions[0])); ++i) if (actions[i]) delete actions[i]; - return -1; + return -1; } - RandomSeed seed( gRandomSeed ); - for( index = 0; actions[ index ] != NULL; index++ ) + RandomSeed seed(gRandomSeed); + for (index = 0; actions[index] != NULL; index++) { // Randomly choose to wait on the gate, or wait on the previous event - cl_event * eventPtr = &gateEvent; - if( ( index > 0 ) && ( random_in_range( 0, 255, seed ) & 1 ) ) - eventPtr = &actionEvents[ index - 1 ]; + cl_event *eventPtr = &gateEvent; + if ((index > 0) && (random_in_range(0, 255, seed) & 1)) + eventPtr = &actionEvents[index - 1]; - error = actions[ index ]->Execute( queue, 1, eventPtr, &actionEvents[ index ] ); - test_error( error, "Unable to execute test action" ); + error = + actions[index]->Execute(queue, 1, eventPtr, &actionEvents[index]); + test_error(error, "Unable to execute test action"); - for( int k=0; k< EVENT_CALLBACK_TYPE_TOTAL; k++) - { - error = clSetEventCallback( actionEvents[index], event_callback_types[k], simultaneous_event_callback_function, (void *)(size_t)(index*EVENT_CALLBACK_TYPE_TOTAL+k ) ); - test_error( error, "Unable to set event callback function" ); - - } + for (int k = 0; k < EVENT_CALLBACK_TYPE_TOTAL; k++) + { + error = clSetEventCallback( + actionEvents[index], event_callback_types[k], + simultaneous_event_callback_function, + (void *)(size_t)(index * EVENT_CALLBACK_TYPE_TOTAL + k)); + test_error(error, "Unable to set event callback function"); + } } - int total_callbacks= actionCount * EVENT_CALLBACK_TYPE_TOTAL; + int total_callbacks = actionCount * EVENT_CALLBACK_TYPE_TOTAL; // Now release the user event, which will allow our actual action to run - error = clSetUserEventStatus( gateEvent, CL_COMPLETE ); - test_error( error, "Unable to trigger gate event" ); + error = clSetUserEventStatus(gateEvent, CL_COMPLETE); + test_error(error, "Unable to trigger gate event"); // Wait on the actual action events now - log_info( "\tWaiting for test completions...\n" ); - error = clWaitForEvents( actionCount, &actionEvents[ 0 ] ); - test_error( error, "Unable to wait for actual test events" ); - - // Note: we can check our callback now, and it MIGHT have been triggered, but that's not guaranteed - int last_count = 0; - if( ((last_count = sSimultaneousCount)) == total_callbacks) + log_info("\tWaiting for test completions...\n"); + error = clWaitForEvents(actionCount, &actionEvents[0]); + test_error(error, "Unable to wait for actual test events"); + + // Note: we can check our callback now, and it MIGHT have been triggered, + // but that's not guaranteed + int last_count = 0; + if (((last_count = sSimultaneousCount)) == total_callbacks) { // We're all good, so return success - log_info( "\t%d of %d callbacks received\n", sSimultaneousCount, total_callbacks ); + log_info("\t%d of %d callbacks received\n", sSimultaneousCount, + total_callbacks); - if (actionEvents) delete [] actionEvents; - for (size_t i=0;i<(sizeof(actions)/sizeof(actions[0]));++i) - if (actions[i]) delete actions[i]; + if (actionEvents) delete[] actionEvents; + for (size_t i = 0; i < (sizeof(actions) / sizeof(actions[0])); ++i) + if (actions[i]) delete actions[i]; return 0; } // We haven't gotten (all) of the callbacks, so wait for them - log_info( "\tWe've only received %d of the %d callbacks we expected; waiting for more...\n", last_count, total_callbacks ); + log_info("\tWe've only received %d of the %d callbacks we expected; " + "waiting for more...\n", + last_count, total_callbacks); - for( int i = 0; i < 10 * 10; i++ ) + for (int i = 0; i < 10 * 10; i++) { - usleep( 100000 ); // 1/10th second - if( ((last_count = sSimultaneousCount)) == total_callbacks ) + usleep(100000); // 1/10th second + if (((last_count = sSimultaneousCount)) == total_callbacks) { // All of the callbacks were executed if (actionEvents) delete[] actionEvents; @@ -326,16 +360,15 @@ int test_callbacks_simultaneous( cl_device_id deviceID, cl_context context, cl_c } // If we got here, some of the callbacks did not occur in time - log_error( "\nError: We only ever received %d of our %d callbacks!\n", last_count, total_callbacks ); - log_error( "Events that did not receive callbacks:\n" ); - for( index = 0; actions[ index ] != NULL; index++ ) + log_error("\nError: We only ever received %d of our %d callbacks!\n", + last_count, total_callbacks); + log_error("Events that did not receive callbacks:\n"); + for (index = 0; actions[index] != NULL; index++) { - if( !sSimultaneousFlags[ index ] ) - log_error( "\t%s\n", actions[ index ]->GetName() ); + if (!sSimultaneousFlags[index]) + log_error("\t%s\n", actions[index]->GetName()); } - if (actionEvents) delete [] actionEvents; + if (actionEvents) delete[] actionEvents; return -1; - } - diff --git a/test_conformance/events/test_event_dependencies.cpp b/test_conformance/events/test_event_dependencies.cpp index 41136548..45b260a6 100644 --- a/test_conformance/events/test_event_dependencies.cpp +++ b/test_conformance/events/test_event_dependencies.cpp @@ -1,6 +1,6 @@ // // Copyright (c) 2017 The Khronos Group Inc. -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -39,61 +39,79 @@ const char *write_kernels[] = { /* Tests event dependencies by running two kernels that use the same buffer. If two_queues is set they are run in separate queues. - If test_enqueue_wait_for_events is set then clEnqueueWaitForEvent is called between them. - If test_barrier is set then clEnqueueBarrier is called between them (only for single queue). - If neither are set, nothing is done to prevent them from executing in the wrong order. This can be used for verification. + If test_enqueue_wait_for_events is set then clEnqueueWaitForEvent is called + between them. If test_barrier is set then clEnqueueBarrier is called between + them (only for single queue). If neither are set, nothing is done to prevent + them from executing in the wrong order. This can be used for verification. */ -int test_event_enqueue_wait_for_events_run_test( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, int two_queues, int two_devices, - int test_enqueue_wait_for_events, int test_barrier, int use_waitlist, int use_marker) +int test_event_enqueue_wait_for_events_run_test( + cl_device_id deviceID, cl_context context, cl_command_queue queue, + int num_elements, int two_queues, int two_devices, + int test_enqueue_wait_for_events, int test_barrier, int use_waitlist, + int use_marker) { cl_int error = CL_SUCCESS; - size_t threads[3] = {TEST_SIZE,0,0}; + size_t threads[3] = { TEST_SIZE, 0, 0 }; int i, loop_count, event_count, expected_value, failed; int expected_if_only_queue[2]; int max_count = TEST_SIZE; cl_platform_id platform; - cl_command_queue queues[2]; // Not a wrapper so we don't autorelease if they are the same - clCommandQueueWrapper queueWrappers[2]; // If they are different, we use the wrapper so it will auto release + cl_command_queue + queues[2]; // Not a wrapper so we don't autorelease if they are the same + clCommandQueueWrapper queueWrappers[2]; // If they are different, we use the + // wrapper so it will auto release clContextWrapper context_to_use; clMemWrapper data; clProgramWrapper program; clKernelWrapper kernel1[TEST_COUNT], kernel2[TEST_COUNT]; - clEventWrapper event[TEST_COUNT*4+2]; // If we usemarkers we get 2 more events per iteration + clEventWrapper event[TEST_COUNT * 4 + 2]; // If we usemarkers we get 2 more + // events per iteration if (test_enqueue_wait_for_events) - log_info("\tTesting with clEnqueueBarrierWithWaitList as barrier function.\n"); + log_info("\tTesting with clEnqueueBarrierWithWaitList as barrier " + "function.\n"); if (test_barrier) - log_info("\tTesting with clEnqueueBarrierWithWaitList as barrier function.\n"); + log_info("\tTesting with clEnqueueBarrierWithWaitList as barrier " + "function.\n"); if (use_waitlist) - log_info("\tTesting with waitlist-based depenednecies between kernels.\n"); + log_info( + "\tTesting with waitlist-based depenednecies between kernels.\n"); if (use_marker) log_info("\tTesting with clEnqueueMarker as a barrier function.\n"); - if (test_barrier && (two_queues || two_devices)) { - log_error("\tTest requested with clEnqueueBarrier across two queues. This is not a valid combination.\n"); + if (test_barrier && (two_queues || two_devices)) + { + log_error("\tTest requested with clEnqueueBarrier across two queues. " + "This is not a valid combination.\n"); return -1; } error = clGetPlatformIDs(1, &platform, NULL); test_error(error, "clGetPlatformIDs failed."); - // If we are to use two devices, then get them and create a context with both. + // If we are to use two devices, then get them and create a context with + // both. cl_device_id *two_device_ids; - if (two_devices) { - two_device_ids = (cl_device_id*)malloc(sizeof(cl_device_id)*2); + if (two_devices) + { + two_device_ids = (cl_device_id *)malloc(sizeof(cl_device_id) * 2); cl_uint number_returned; - error = clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, 2, two_device_ids, &number_returned); - test_error( error, "clGetDeviceIDs for CL_DEVICE_TYPE_ALL failed."); - if (number_returned != 2) { + error = clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, 2, two_device_ids, + &number_returned); + test_error(error, "clGetDeviceIDs for CL_DEVICE_TYPE_ALL failed."); + if (number_returned != 2) + { log_info("Failed to obtain two devices. Test can not run.\n"); free(two_device_ids); return 0; } - for (i=0; i<2; i++) { + for (i = 0; i < 2; i++) + { cl_device_type type; - error = clGetDeviceInfo(two_device_ids[i], CL_DEVICE_TYPE, sizeof(cl_device_type), &type, NULL); - test_error( error, "clGetDeviceInfo failed."); + error = clGetDeviceInfo(two_device_ids[i], CL_DEVICE_TYPE, + sizeof(cl_device_type), &type, NULL); + test_error(error, "clGetDeviceInfo failed."); if (type & CL_DEVICE_TYPE_CPU) log_info("\tDevice %d is CL_DEVICE_TYPE_CPU.\n", i); if (type & CL_DEVICE_TYPE_GPU) @@ -104,12 +122,16 @@ int test_event_enqueue_wait_for_events_run_test( cl_device_id deviceID, cl_conte log_info("\tDevice %d is CL_DEVICE_TYPE_DEFAULT.\n", i); } - context_to_use = clCreateContext(NULL, 2, two_device_ids, notify_callback, NULL, &error); + context_to_use = clCreateContext(NULL, 2, two_device_ids, + notify_callback, NULL, &error); test_error(error, "clCreateContext failed for two devices."); log_info("\tTesting with two devices.\n"); - } else { - context_to_use = clCreateContext(NULL, 1, &deviceID, NULL, NULL, &error); + } + else + { + context_to_use = + clCreateContext(NULL, 1, &deviceID, NULL, NULL, &error); test_error(error, "clCreateContext failed for one device."); log_info("\tTesting with one device.\n"); @@ -117,41 +139,55 @@ int test_event_enqueue_wait_for_events_run_test( cl_device_id deviceID, cl_conte // If we are using two queues then create them cl_command_queue_properties props = CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE; - if (two_queues) { + if (two_queues) + { // Get a second queue if (two_devices) { - if( !checkDeviceForQueueSupport( two_device_ids[ 0 ], props ) || - !checkDeviceForQueueSupport( two_device_ids[ 1 ], props ) ) + if (!checkDeviceForQueueSupport(two_device_ids[0], props) + || !checkDeviceForQueueSupport(two_device_ids[1], props)) { - log_info( "WARNING: One or more device for multi-device test does not support out-of-order exec mode; skipping test.\n" ); + log_info( + "WARNING: One or more device for multi-device test does " + "not support out-of-order exec mode; skipping test.\n"); return -1942; } - queueWrappers[0] = clCreateCommandQueue(context_to_use, two_device_ids[0], props, &error); - test_error(error, "clCreateCommandQueue for first queue on first device failed."); - queueWrappers[1] = clCreateCommandQueue(context_to_use, two_device_ids[1], props, &error); - test_error(error, "clCreateCommandQueue for second queue on second device failed."); - + queueWrappers[0] = clCreateCommandQueue( + context_to_use, two_device_ids[0], props, &error); + test_error( + error, + "clCreateCommandQueue for first queue on first device failed."); + queueWrappers[1] = clCreateCommandQueue( + context_to_use, two_device_ids[1], props, &error); + test_error(error, + "clCreateCommandQueue for second queue on second device " + "failed."); } else { - // Single device has already been checked for out-of-order exec support - queueWrappers[0] = clCreateCommandQueue(context_to_use, deviceID, props, &error); + // Single device has already been checked for out-of-order exec + // support + queueWrappers[0] = + clCreateCommandQueue(context_to_use, deviceID, props, &error); test_error(error, "clCreateCommandQueue for first queue failed."); - queueWrappers[1] = clCreateCommandQueue(context_to_use, deviceID, props, &error); + queueWrappers[1] = + clCreateCommandQueue(context_to_use, deviceID, props, &error); test_error(error, "clCreateCommandQueue for second queue failed."); } - // Ugly hack to make sure we only have the wrapper auto-release if they are different queues + // Ugly hack to make sure we only have the wrapper auto-release if they + // are different queues queues[0] = queueWrappers[0]; queues[1] = queueWrappers[1]; log_info("\tTesting with two queues.\n"); } else { - // (Note: single device has already been checked for out-of-order exec support) - // Otherwise create one queue and have the second one be the same - queueWrappers[0] = clCreateCommandQueue(context_to_use, deviceID, props, &error); + // (Note: single device has already been checked for out-of-order exec + // support) Otherwise create one queue and have the second one be the + // same + queueWrappers[0] = + clCreateCommandQueue(context_to_use, deviceID, props, &error); test_error(error, "clCreateCommandQueue for first queue failed."); queues[0] = queueWrappers[0]; queues[1] = (cl_command_queue)queues[0]; @@ -160,236 +196,346 @@ int test_event_enqueue_wait_for_events_run_test( cl_device_id deviceID, cl_conte // Setup - create a buffer and the two kernels - data = clCreateBuffer(context_to_use, CL_MEM_READ_WRITE, TEST_SIZE*sizeof(cl_int), NULL, &error); - test_error( error, "clCreateBuffer failed"); + data = clCreateBuffer(context_to_use, CL_MEM_READ_WRITE, + TEST_SIZE * sizeof(cl_int), NULL, &error); + test_error(error, "clCreateBuffer failed"); // Initialize the values to zero - cl_int *values = (cl_int*)malloc(TEST_SIZE*sizeof(cl_int)); - for (i=0; i<(int)TEST_SIZE; i++) - values[i] = 0; - error = clEnqueueWriteBuffer(queues[0], data, CL_TRUE, 0, TEST_SIZE*sizeof(cl_int), values, 0, NULL, NULL); - test_error( error, "clEnqueueWriteBuffer failed"); + cl_int *values = (cl_int *)malloc(TEST_SIZE * sizeof(cl_int)); + for (i = 0; i < (int)TEST_SIZE; i++) values[i] = 0; + error = + clEnqueueWriteBuffer(queues[0], data, CL_TRUE, 0, + TEST_SIZE * sizeof(cl_int), values, 0, NULL, NULL); + test_error(error, "clEnqueueWriteBuffer failed"); expected_value = 0; // Build the kernels - if (create_single_kernel_helper( context_to_use, &program, &kernel1[0], 1, write_kernels, "write_up" )) + if (create_single_kernel_helper(context_to_use, &program, &kernel1[0], 1, + write_kernels, "write_up")) return -1; error = clSetKernelArg(kernel1[0], 0, sizeof(data), &data); error |= clSetKernelArg(kernel1[0], 1, sizeof(max_count), &max_count); - test_error( error, "clSetKernelArg 1 failed"); + test_error(error, "clSetKernelArg 1 failed"); - for (i=1; i", (int)status ); + sprintf(tempString, "", (int)status); return tempString; } } /* Note: tests clGetEventStatus and clReleaseEvent (implicitly) */ -int test_event_get_execute_status( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_event_get_execute_status(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { cl_int status; - SETUP_EVENT( context, queue ); + SETUP_EVENT(context, queue); /* Now wait for it to be done */ - error = clWaitForEvents( 1, &event ); - test_error( error, "Unable to wait for event" ); - - error = clGetEventInfo( event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof( status ), &status, NULL ); - test_error( error, "Calling clGetEventStatus to wait for event completion failed" ); - if( status != CL_COMPLETE ) + error = clWaitForEvents(1, &event); + test_error(error, "Unable to wait for event"); + + error = clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS, + sizeof(status), &status, NULL); + test_error(error, + "Calling clGetEventStatus to wait for event completion failed"); + if (status != CL_COMPLETE) { - log_error( "ERROR: Incorrect status returned from clGetErrorStatus after event complete (%d:%s)\n", status, IGetStatusString( status ) ); + log_error("ERROR: Incorrect status returned from clGetErrorStatus " + "after event complete (%d:%s)\n", + status, IGetStatusString(status)); return -1; } @@ -113,57 +128,75 @@ int test_event_get_execute_status( cl_device_id deviceID, cl_context context, cl return 0; } -int test_event_get_info( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_event_get_info(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { - SETUP_EVENT( context, queue ); + SETUP_EVENT(context, queue); /* Verify parameters of clGetEventInfo not already tested by other tests */ cl_command_queue otherQueue; size_t size; - error = clGetEventInfo( event, CL_EVENT_COMMAND_QUEUE, sizeof( otherQueue ), &otherQueue, &size ); - test_error( error, "Unable to get event info!" ); - // We can not check if this is the right queue because this is an opaque object. - if( size != sizeof( queue ) ) + error = clGetEventInfo(event, CL_EVENT_COMMAND_QUEUE, sizeof(otherQueue), + &otherQueue, &size); + test_error(error, "Unable to get event info!"); + // We can not check if this is the right queue because this is an opaque + // object. + if (size != sizeof(queue)) { - log_error( "ERROR: Returned command queue size does not validate (expected %d, got %d)\n", (int)sizeof( queue ), (int)size ); + log_error("ERROR: Returned command queue size does not validate " + "(expected %d, got %d)\n", + (int)sizeof(queue), (int)size); return -1; } cl_command_type type; - error = clGetEventInfo( event, CL_EVENT_COMMAND_TYPE, sizeof( type ), &type, &size ); - test_error( error, "Unable to get event info!" ); - if( type != CL_COMMAND_NDRANGE_KERNEL ) + error = clGetEventInfo(event, CL_EVENT_COMMAND_TYPE, sizeof(type), &type, + &size); + test_error(error, "Unable to get event info!"); + if (type != CL_COMMAND_NDRANGE_KERNEL) { - log_error( "ERROR: Returned command type does not validate (expected %d, got %d)\n", (int)CL_COMMAND_NDRANGE_KERNEL, (int)type ); + log_error("ERROR: Returned command type does not validate (expected " + "%d, got %d)\n", + (int)CL_COMMAND_NDRANGE_KERNEL, (int)type); return -1; } - if( size != sizeof( type ) ) + if (size != sizeof(type)) { - log_error( "ERROR: Returned command type size does not validate (expected %d, got %d)\n", (int)sizeof( type ), (int)size ); + log_error("ERROR: Returned command type size does not validate " + "(expected %d, got %d)\n", + (int)sizeof(type), (int)size); return -1; } cl_uint count; - error = clGetEventInfo( event, CL_EVENT_REFERENCE_COUNT, sizeof( count ), &count, &size ); - test_error( error, "Unable to get event info for CL_EVENT_REFERENCE_COUNT!" ); - if( size != sizeof( count ) ) + error = clGetEventInfo(event, CL_EVENT_REFERENCE_COUNT, sizeof(count), + &count, &size); + test_error(error, "Unable to get event info for CL_EVENT_REFERENCE_COUNT!"); + if (size != sizeof(count)) { - log_error( "ERROR: Returned command type size does not validate (expected %d, got %d)\n", (int)sizeof( type ), (int)size ); + log_error("ERROR: Returned command type size does not validate " + "(expected %d, got %d)\n", + (int)sizeof(type), (int)size); return -1; } cl_context testCtx; - error = clGetEventInfo( event, CL_EVENT_CONTEXT, sizeof( testCtx ), &testCtx, &size ); - test_error( error, "Unable to get event context info!" ); - if( size != sizeof( context ) ) + error = clGetEventInfo(event, CL_EVENT_CONTEXT, sizeof(testCtx), &testCtx, + &size); + test_error(error, "Unable to get event context info!"); + if (size != sizeof(context)) { - log_error( "ERROR: Returned context size does not validate (expected %d, got %d)\n", (int)sizeof( context ), (int)size ); + log_error("ERROR: Returned context size does not validate (expected " + "%d, got %d)\n", + (int)sizeof(context), (int)size); return -1; } - if( testCtx != context ) + if (testCtx != context) { - log_error( "ERROR: Returned context does not match (expected %p, got %p)\n", (void *)context, (void *)testCtx ); + log_error( + "ERROR: Returned context does not match (expected %p, got %p)\n", + (void *)context, (void *)testCtx); return -1; } @@ -171,10 +204,11 @@ int test_event_get_info( cl_device_id deviceID, cl_context context, cl_command_q return 0; } -int test_event_get_write_array_status( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_event_get_write_array_status(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { cl_mem stream; - cl_float testArray[ 1024 * 32 ]; + cl_float testArray[1024 * 32]; cl_event event; int error; cl_int status; @@ -182,34 +216,41 @@ int test_event_get_write_array_status( cl_device_id deviceID, cl_context context stream = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_float) * 1024 * 32, NULL, &error); - test_error( error, "Creating test array failed" ); + test_error(error, "Creating test array failed"); - error = clEnqueueWriteBuffer(queue, stream, CL_FALSE, 0, sizeof(cl_float)*1024*32, (void *)testArray, 0, NULL, &event); - test_error( error, "Unable to set testing kernel data" ); + error = clEnqueueWriteBuffer(queue, stream, CL_FALSE, 0, + sizeof(cl_float) * 1024 * 32, + (void *)testArray, 0, NULL, &event); + test_error(error, "Unable to set testing kernel data"); /* Now wait for it to be done */ - error = clWaitForEvents( 1, &event ); - test_error( error, "Unable to wait for event" ); - - error = clGetEventInfo( event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof( status ), &status, NULL ); - test_error( error, "Calling clGetEventStatus to wait for event completion failed" ); - if( status != CL_COMPLETE ) + error = clWaitForEvents(1, &event); + test_error(error, "Unable to wait for event"); + + error = clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS, + sizeof(status), &status, NULL); + test_error(error, + "Calling clGetEventStatus to wait for event completion failed"); + if (status != CL_COMPLETE) { - log_error( "ERROR: Incorrect status returned from clGetErrorStatus after array write complete (%d:%s)\n", status, IGetStatusString( status ) ); + log_error("ERROR: Incorrect status returned from clGetErrorStatus " + "after array write complete (%d:%s)\n", + status, IGetStatusString(status)); return -1; } - clReleaseMemObject( stream ); - clReleaseEvent( event ); + clReleaseMemObject(stream); + clReleaseEvent(event); return 0; } -int test_event_get_read_array_status( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_event_get_read_array_status(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { cl_mem stream; - cl_float testArray[ 1024 * 32 ]; + cl_float testArray[1024 * 32]; cl_event event; int error; cl_int status; @@ -217,58 +258,72 @@ int test_event_get_read_array_status( cl_device_id deviceID, cl_context context, stream = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_float) * 1024 * 32, NULL, &error); - test_error( error, "Creating test array failed" ); + test_error(error, "Creating test array failed"); - error = clEnqueueReadBuffer(queue, stream, CL_FALSE, 0, sizeof(cl_float)*1024*32, (void *)testArray, 0, NULL, &event); - test_error( error, "Unable to get testing kernel data" ); + error = clEnqueueReadBuffer(queue, stream, CL_FALSE, 0, + sizeof(cl_float) * 1024 * 32, (void *)testArray, + 0, NULL, &event); + test_error(error, "Unable to get testing kernel data"); /* It should still be running... */ - error = clGetEventInfo( event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof( status ), &status, NULL ); - test_error( error, "Calling clGetEventStatus didn't work!" ); + error = clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS, + sizeof(status), &status, NULL); + test_error(error, "Calling clGetEventStatus didn't work!"); - if( status != CL_RUNNING && status != CL_QUEUED && status != CL_SUBMITTED && status != CL_COMPLETE) + if (status != CL_RUNNING && status != CL_QUEUED && status != CL_SUBMITTED + && status != CL_COMPLETE) { - log_error( "ERROR: Incorrect status returned from clGetErrorStatus during array read (%d:%s)\n", status, IGetStatusString( status ) ); + log_error("ERROR: Incorrect status returned from clGetErrorStatus " + "during array read (%d:%s)\n", + status, IGetStatusString(status)); return -1; } /* Now wait for it to be done */ - error = clWaitForEvents( 1, &event ); - test_error( error, "Unable to wait for event" ); - - error = clGetEventInfo( event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof( status ), &status, NULL ); - test_error( error, "Calling clGetEventStatus to wait for event completion failed" ); - if( status != CL_COMPLETE ) + error = clWaitForEvents(1, &event); + test_error(error, "Unable to wait for event"); + + error = clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS, + sizeof(status), &status, NULL); + test_error(error, + "Calling clGetEventStatus to wait for event completion failed"); + if (status != CL_COMPLETE) { - log_error( "ERROR: Incorrect status returned from clGetErrorStatus after array read complete (%d:%s)\n", status, IGetStatusString( status ) ); + log_error("ERROR: Incorrect status returned from clGetErrorStatus " + "after array read complete (%d:%s)\n", + status, IGetStatusString(status)); return -1; } - clReleaseMemObject( stream ); - clReleaseEvent( event ); + clReleaseMemObject(stream); + clReleaseEvent(event); return 0; } /* clGetEventStatus not implemented yet */ -int test_event_wait_for_execute( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_event_wait_for_execute(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { cl_int status; - SETUP_EVENT( context, queue ); + SETUP_EVENT(context, queue); /* Now we wait for it to be done, then test the status again */ - error = clWaitForEvents( 1, &event ); - test_error( error, "Unable to wait for execute event" ); + error = clWaitForEvents(1, &event); + test_error(error, "Unable to wait for execute event"); /* Make sure it worked */ - error = clGetEventInfo( event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof( status ), &status, NULL ); - test_error( error, "Calling clGetEventStatus didn't work!" ); - if( status != CL_COMPLETE ) + error = clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS, + sizeof(status), &status, NULL); + test_error(error, "Calling clGetEventStatus didn't work!"); + if (status != CL_COMPLETE) { - log_error( "ERROR: Incorrect status returned from clGetErrorStatus after event complete (%d:%s)\n", status, IGetStatusString( status ) ); + log_error("ERROR: Incorrect status returned from clGetErrorStatus " + "after event complete (%d:%s)\n", + status, IGetStatusString(status)); return -1; } @@ -276,11 +331,12 @@ int test_event_wait_for_execute( cl_device_id deviceID, cl_context context, cl_c return 0; } -int test_event_wait_for_array( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_event_wait_for_array(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { cl_mem streams[2]; - cl_float readArray[ 1024 * 32 ]; - cl_float writeArray[ 1024 * 32 ]; + cl_float readArray[1024 * 32]; + cl_float writeArray[1024 * 32]; cl_event events[2]; int error; cl_int status; @@ -288,128 +344,155 @@ int test_event_wait_for_array( cl_device_id deviceID, cl_context context, cl_com streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_float) * 1024 * 32, NULL, &error); - test_error( error, "Creating test array failed" ); + test_error(error, "Creating test array failed"); streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_float) * 1024 * 32, NULL, &error); - test_error( error, "Creating test array failed" ); + test_error(error, "Creating test array failed"); - error = clEnqueueReadBuffer(queue, streams[0], CL_FALSE, 0, sizeof(cl_float)*1024*32, (void *)readArray, 0, NULL, &events[0]); - test_error( error, "Unable to read testing kernel data" ); + error = clEnqueueReadBuffer(queue, streams[0], CL_FALSE, 0, + sizeof(cl_float) * 1024 * 32, (void *)readArray, + 0, NULL, &events[0]); + test_error(error, "Unable to read testing kernel data"); - error = clEnqueueWriteBuffer(queue, streams[1], CL_FALSE, 0, sizeof(cl_float)*1024*32, (void *)writeArray, 0, NULL, &events[1]); - test_error( error, "Unable to write testing kernel data" ); + error = clEnqueueWriteBuffer(queue, streams[1], CL_FALSE, 0, + sizeof(cl_float) * 1024 * 32, + (void *)writeArray, 0, NULL, &events[1]); + test_error(error, "Unable to write testing kernel data"); /* Both should still be running */ - error = clGetEventInfo( events[0], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof( status ), &status, NULL ); - test_error( error, "Calling clGetEventStatus didn't work!" ); - if( status != CL_RUNNING && status != CL_QUEUED && status != CL_SUBMITTED && status != CL_COMPLETE) + error = clGetEventInfo(events[0], CL_EVENT_COMMAND_EXECUTION_STATUS, + sizeof(status), &status, NULL); + test_error(error, "Calling clGetEventStatus didn't work!"); + if (status != CL_RUNNING && status != CL_QUEUED && status != CL_SUBMITTED + && status != CL_COMPLETE) { - log_error( "ERROR: Incorrect status returned from clGetErrorStatus during array read (%d:%s)\n", status, IGetStatusString( status ) ); + log_error("ERROR: Incorrect status returned from clGetErrorStatus " + "during array read (%d:%s)\n", + status, IGetStatusString(status)); return -1; } - error = clGetEventInfo( events[1], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof( status ), &status, NULL ); - test_error( error, "Calling clGetEventStatus didn't work!" ); - if( status != CL_RUNNING && status != CL_QUEUED && status != CL_SUBMITTED && status != CL_COMPLETE) + error = clGetEventInfo(events[1], CL_EVENT_COMMAND_EXECUTION_STATUS, + sizeof(status), &status, NULL); + test_error(error, "Calling clGetEventStatus didn't work!"); + if (status != CL_RUNNING && status != CL_QUEUED && status != CL_SUBMITTED + && status != CL_COMPLETE) { - log_error( "ERROR: Incorrect status returned from clGetErrorStatus during array write (%d:%s)\n", status, IGetStatusString( status ) ); + log_error("ERROR: Incorrect status returned from clGetErrorStatus " + "during array write (%d:%s)\n", + status, IGetStatusString(status)); return -1; } /* Now try waiting for both */ - error = clWaitForEvents( 2, events ); - test_error( error, "Unable to wait for array events" ); + error = clWaitForEvents(2, events); + test_error(error, "Unable to wait for array events"); /* Double check status on both */ - error = clGetEventInfo( events[0], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof( status ), &status, NULL ); - test_error( error, "Calling clGetEventStatus didn't work!" ); - if( status != CL_COMPLETE ) + error = clGetEventInfo(events[0], CL_EVENT_COMMAND_EXECUTION_STATUS, + sizeof(status), &status, NULL); + test_error(error, "Calling clGetEventStatus didn't work!"); + if (status != CL_COMPLETE) { - log_error( "ERROR: Incorrect status returned from clGetErrorStatus after array read complete (%d:%s)\n", status, IGetStatusString( status ) ); + log_error("ERROR: Incorrect status returned from clGetErrorStatus " + "after array read complete (%d:%s)\n", + status, IGetStatusString(status)); return -1; } - error = clGetEventInfo( events[1], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof( status ), &status, NULL ); - test_error( error, "Calling clGetEventStatus didn't work!" ); - if( status != CL_COMPLETE ) + error = clGetEventInfo(events[1], CL_EVENT_COMMAND_EXECUTION_STATUS, + sizeof(status), &status, NULL); + test_error(error, "Calling clGetEventStatus didn't work!"); + if (status != CL_COMPLETE) { - log_error( "ERROR: Incorrect status returned from clGetErrorStatus after array write complete (%d:%s)\n", status, IGetStatusString( status ) ); + log_error("ERROR: Incorrect status returned from clGetErrorStatus " + "after array write complete (%d:%s)\n", + status, IGetStatusString(status)); return -1; } - clReleaseMemObject( streams[0] ); - clReleaseMemObject( streams[1] ); - clReleaseEvent( events[0] ); - clReleaseEvent( events[1] ); + clReleaseMemObject(streams[0]); + clReleaseMemObject(streams[1]); + clReleaseEvent(events[0]); + clReleaseEvent(events[1]); return 0; } -int test_event_flush( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_event_flush(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { int loopCount = 0; cl_int status; - SETUP_EVENT( context, queue ); + SETUP_EVENT(context, queue); - /* Now flush. Note that we can't guarantee this actually lets the op finish, but we can guarantee it's no longer queued */ - error = clFlush( queue ); - test_error( error, "Unable to flush events" ); + /* Now flush. Note that we can't guarantee this actually lets the op finish, + * but we can guarantee it's no longer queued */ + error = clFlush(queue); + test_error(error, "Unable to flush events"); /* Make sure it worked */ - while (1) { - error = clGetEventInfo( event, CL_EVENT_COMMAND_EXECUTION_STATUS, - sizeof( status ), &status, NULL ); - test_error( error, "Calling clGetEventStatus didn't work!" ); + while (1) + { + error = clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS, + sizeof(status), &status, NULL); + test_error(error, "Calling clGetEventStatus didn't work!"); - if( status != CL_QUEUED ) - break; + if (status != CL_QUEUED) break; -#if ! defined( _WIN32 ) +#if !defined(_WIN32) sleep(1); // give it some time here. #else // _WIN32 - Sleep(1000); + Sleep(1000); #endif ++loopCount; - } - -/* -CL_QUEUED (command has been enqueued in the command-queue), -CL_SUBMITTED (enqueued command has been submitted by the host to the device associated with the command-queue), -CL_RUNNING (device is currently executing this command), -CL_COMPLETE (the command has completed), or -Error code given by a negative integer value. (command was abnormally terminated – this may be caused by a bad memory access etc.). -*/ - if(status != CL_COMPLETE && status != CL_SUBMITTED && - status != CL_RUNNING && status != CL_COMPLETE) - { - log_error( "ERROR: Incorrect status returned from clGetErrorStatus after event flush (%d:%s)\n", status, IGetStatusString( status ) ); + } + + /* + CL_QUEUED (command has been enqueued in the command-queue), + CL_SUBMITTED (enqueued command has been submitted by the host to the device + associated with the command-queue), CL_RUNNING (device is currently + executing this command), CL_COMPLETE (the command has completed), or Error + code given by a negative integer value. (command was abnormally terminated – + this may be caused by a bad memory access etc.). + */ + if (status != CL_COMPLETE && status != CL_SUBMITTED && status != CL_RUNNING + && status != CL_COMPLETE) + { + log_error("ERROR: Incorrect status returned from clGetErrorStatus " + "after event flush (%d:%s)\n", + status, IGetStatusString(status)); return -1; } /* Now wait */ - error = clFinish( queue ); - test_error( error, "Unable to finish events" ); + error = clFinish(queue); + test_error(error, "Unable to finish events"); FINISH_EVENT(queue); return 0; } -int test_event_finish_execute( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_event_finish_execute(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { cl_int status; - SETUP_EVENT( context, queue ); + SETUP_EVENT(context, queue); /* Now flush and finish all ops */ - error = clFinish( queue ); - test_error( error, "Unable to finish all events" ); + error = clFinish(queue); + test_error(error, "Unable to finish all events"); /* Make sure it worked */ - error = clGetEventInfo( event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof( status ), &status, NULL ); - test_error( error, "Calling clGetEventStatus didn't work!" ); - if( status != CL_COMPLETE ) + error = clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS, + sizeof(status), &status, NULL); + test_error(error, "Calling clGetEventStatus didn't work!"); + if (status != CL_COMPLETE) { - log_error( "ERROR: Incorrect status returned from clGetErrorStatus after event complete (%d:%s)\n", status, IGetStatusString( status ) ); + log_error("ERROR: Incorrect status returned from clGetErrorStatus " + "after event complete (%d:%s)\n", + status, IGetStatusString(status)); return -1; } @@ -417,11 +500,12 @@ int test_event_finish_execute( cl_device_id deviceID, cl_context context, cl_com return 0; } -int test_event_finish_array( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_event_finish_array(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { cl_mem streams[2]; - cl_float readArray[ 1024 * 32 ]; - cl_float writeArray[ 1024 * 32 ]; + cl_float readArray[1024 * 32]; + cl_float writeArray[1024 * 32]; cl_event events[2]; int error; cl_int status; @@ -429,59 +513,77 @@ int test_event_finish_array( cl_device_id deviceID, cl_context context, cl_comma streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_float) * 1024 * 32, NULL, &error); - test_error( error, "Creating test array failed" ); + test_error(error, "Creating test array failed"); streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_float) * 1024 * 32, NULL, &error); - test_error( error, "Creating test array failed" ); + test_error(error, "Creating test array failed"); - error = clEnqueueReadBuffer(queue, streams[0], CL_FALSE, 0, sizeof(cl_float)*1024*32, (void *)readArray, 0, NULL, &events[0]); - test_error( error, "Unable to read testing kernel data" ); + error = clEnqueueReadBuffer(queue, streams[0], CL_FALSE, 0, + sizeof(cl_float) * 1024 * 32, (void *)readArray, + 0, NULL, &events[0]); + test_error(error, "Unable to read testing kernel data"); - error = clEnqueueWriteBuffer(queue, streams[1], CL_FALSE, 0, sizeof(cl_float)*1024*32, (void *)writeArray, 0, NULL, &events[1]); - test_error( error, "Unable to write testing kernel data" ); + error = clEnqueueWriteBuffer(queue, streams[1], CL_FALSE, 0, + sizeof(cl_float) * 1024 * 32, + (void *)writeArray, 0, NULL, &events[1]); + test_error(error, "Unable to write testing kernel data"); /* Both should still be running */ - error = clGetEventInfo( events[0], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof( status ), &status, NULL ); - test_error( error, "Calling clGetEventStatus didn't work!" ); - if( status != CL_RUNNING && status != CL_QUEUED && status != CL_SUBMITTED && status != CL_COMPLETE) + error = clGetEventInfo(events[0], CL_EVENT_COMMAND_EXECUTION_STATUS, + sizeof(status), &status, NULL); + test_error(error, "Calling clGetEventStatus didn't work!"); + if (status != CL_RUNNING && status != CL_QUEUED && status != CL_SUBMITTED + && status != CL_COMPLETE) { - log_error( "ERROR: Incorrect status returned from clGetErrorStatus during array read (%d:%s)\n", status, IGetStatusString( status ) ); + log_error("ERROR: Incorrect status returned from clGetErrorStatus " + "during array read (%d:%s)\n", + status, IGetStatusString(status)); return -1; } - error = clGetEventInfo( events[1], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof( status ), &status, NULL ); - test_error( error, "Calling clGetEventStatus didn't work!" ); - if( status != CL_RUNNING && status != CL_QUEUED && status != CL_SUBMITTED && status != CL_COMPLETE) + error = clGetEventInfo(events[1], CL_EVENT_COMMAND_EXECUTION_STATUS, + sizeof(status), &status, NULL); + test_error(error, "Calling clGetEventStatus didn't work!"); + if (status != CL_RUNNING && status != CL_QUEUED && status != CL_SUBMITTED + && status != CL_COMPLETE) { - log_error( "ERROR: Incorrect status returned from clGetErrorStatus during array write (%d:%s)\n", status, IGetStatusString( status ) ); + log_error("ERROR: Incorrect status returned from clGetErrorStatus " + "during array write (%d:%s)\n", + status, IGetStatusString(status)); return -1; } /* Now try finishing all ops */ - error = clFinish( queue ); - test_error( error, "Unable to finish all events" ); + error = clFinish(queue); + test_error(error, "Unable to finish all events"); /* Double check status on both */ - error = clGetEventInfo( events[0], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof( status ), &status, NULL ); - test_error( error, "Calling clGetEventStatus didn't work!" ); - if( status != CL_COMPLETE ) + error = clGetEventInfo(events[0], CL_EVENT_COMMAND_EXECUTION_STATUS, + sizeof(status), &status, NULL); + test_error(error, "Calling clGetEventStatus didn't work!"); + if (status != CL_COMPLETE) { - log_error( "ERROR: Incorrect status returned from clGetErrorStatus after array read complete (%d:%s)\n", status, IGetStatusString( status ) ); + log_error("ERROR: Incorrect status returned from clGetErrorStatus " + "after array read complete (%d:%s)\n", + status, IGetStatusString(status)); return -1; } - error = clGetEventInfo( events[1], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof( status ), &status, NULL ); - test_error( error, "Calling clGetEventStatus didn't work!" ); - if( status != CL_COMPLETE ) + error = clGetEventInfo(events[1], CL_EVENT_COMMAND_EXECUTION_STATUS, + sizeof(status), &status, NULL); + test_error(error, "Calling clGetEventStatus didn't work!"); + if (status != CL_COMPLETE) { - log_error( "ERROR: Incorrect status returned from clGetErrorStatus after array write complete (%d:%s)\n", status, IGetStatusString( status ) ); + log_error("ERROR: Incorrect status returned from clGetErrorStatus " + "after array write complete (%d:%s)\n", + status, IGetStatusString(status)); return -1; } - clReleaseMemObject( streams[0] ); - clReleaseMemObject( streams[1] ); - clReleaseEvent( events[0] ); - clReleaseEvent( events[1] ); + clReleaseMemObject(streams[0]); + clReleaseMemObject(streams[1]); + clReleaseEvent(events[0]); + clReleaseEvent(events[1]); return 0; } @@ -489,7 +591,8 @@ int test_event_finish_array( cl_device_id deviceID, cl_context context, cl_comma #define NUM_EVENT_RUNS 100 -int test_event_release_before_done( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_event_release_before_done(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { // Create a kernel to run clProgramWrapper program; @@ -501,21 +604,24 @@ int test_event_release_before_done( cl_device_id deviceID, cl_context context, c int error, i; // Create a kernel - if( create_single_kernel_helper( context, &program, &kernel[0], 1, sample_long_test_kernel, "sample_test" ) ) + if (create_single_kernel_helper(context, &program, &kernel[0], 1, + sample_long_test_kernel, "sample_test")) { return -1; } - for( i = 1; i < NUM_EVENT_RUNS; i++ ) { - kernel[i] = clCreateKernel(program, "sample_test", &error); - test_error(error, "Unable to create kernel"); - } + for (i = 1; i < NUM_EVENT_RUNS; i++) + { + kernel[i] = clCreateKernel(program, "sample_test", &error); + test_error(error, "Unable to create kernel"); + } - error = get_max_common_work_group_size( context, kernel[0], 1024, &threads[0] ); - test_error( error, "Unable to get work group size to use" ); + error = + get_max_common_work_group_size(context, kernel[0], 1024, &threads[0]); + test_error(error, "Unable to get work group size to use"); // Create a set of streams to use as arguments - for( i = 0; i < NUM_EVENT_RUNS; i++ ) + for (i = 0; i < NUM_EVENT_RUNS; i++) { streams[i][0] = clCreateBuffer(context, CL_MEM_READ_WRITE, @@ -523,77 +629,89 @@ int test_event_release_before_done( cl_device_id deviceID, cl_context context, c streams[i][1] = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_int) * threads[0], NULL, &error); - if( ( streams[i][0] == NULL ) || ( streams[i][1] == NULL ) ) + if ((streams[i][0] == NULL) || (streams[i][1] == NULL)) { - log_error( "ERROR: Unable to allocate testing streams" ); + log_error("ERROR: Unable to allocate testing streams"); return -1; } } - // Execute the kernels one by one, hopefully making sure they won't be done by the time we get to the end - for( i = 0; i < NUM_EVENT_RUNS; i++ ) + // Execute the kernels one by one, hopefully making sure they won't be done + // by the time we get to the end + for (i = 0; i < NUM_EVENT_RUNS; i++) { - error = clSetKernelArg( kernel[i], 0, sizeof( cl_mem ), &streams[i][0] ); - error |= clSetKernelArg( kernel[i], 1, sizeof( cl_mem ), &streams[i][1] ); - test_error( error, "Unable to set kernel arguments" ); + error = clSetKernelArg(kernel[i], 0, sizeof(cl_mem), &streams[i][0]); + error |= clSetKernelArg(kernel[i], 1, sizeof(cl_mem), &streams[i][1]); + test_error(error, "Unable to set kernel arguments"); - error = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, threads, 0, NULL, &events[i]); - test_error( error, "Unable to execute test kernel" ); + error = clEnqueueNDRangeKernel(queue, kernel[i], 1, NULL, threads, + threads, 0, NULL, &events[i]); + test_error(error, "Unable to execute test kernel"); } // Free all but the last event - for( i = 0; i < NUM_EVENT_RUNS - 1; i++ ) + for (i = 0; i < NUM_EVENT_RUNS - 1; i++) { - clReleaseEvent( events[ i ] ); + clReleaseEvent(events[i]); } // Get status on the last one, then free it - error = clGetEventInfo( events[ NUM_EVENT_RUNS - 1 ], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof( status ), &status, NULL ); - test_error( error, "Unable to get event status" ); + error = clGetEventInfo(events[NUM_EVENT_RUNS - 1], + CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(status), + &status, NULL); + test_error(error, "Unable to get event status"); - clReleaseEvent( events[ NUM_EVENT_RUNS - 1 ] ); + clReleaseEvent(events[NUM_EVENT_RUNS - 1]); // Was the status still-running? - if( status == CL_COMPLETE ) + if (status == CL_COMPLETE) { - log_info( "WARNING: Events completed before they could be released, so test is a null-op. Increase workload and try again." ); + log_info("WARNING: Events completed before they could be released, so " + "test is a null-op. Increase workload and try again."); } - else if( status == CL_RUNNING || status == CL_QUEUED || status == CL_SUBMITTED ) + else if (status == CL_RUNNING || status == CL_QUEUED + || status == CL_SUBMITTED) { - log_info( "Note: Event status was running or queued when released, so test was good.\n" ); + log_info("Note: Event status was running or queued when released, so " + "test was good.\n"); } // If we didn't crash by now, the test succeeded - clFinish( queue ); + clFinish(queue); return 0; } -int test_event_enqueue_marker( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_event_enqueue_marker(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { cl_int status; - SETUP_EVENT( context, queue ); + SETUP_EVENT(context, queue); - /* Now we queue a marker and wait for that, which--since it queues afterwards--should guarantee the execute finishes too */ + /* Now we queue a marker and wait for that, which--since it queues + * afterwards--should guarantee the execute finishes too */ clEventWrapper markerEvent; - //error = clEnqueueMarker( queue, &markerEvent ); + // error = clEnqueueMarker( queue, &markerEvent ); #ifdef CL_VERSION_1_2 - error = clEnqueueMarkerWithWaitList(queue, 0, NULL, &markerEvent ); + error = clEnqueueMarkerWithWaitList(queue, 0, NULL, &markerEvent); #else - error = clEnqueueMarker( queue, &markerEvent ); + error = clEnqueueMarker(queue, &markerEvent); #endif - test_error( error, "Unable to queue marker" ); + test_error(error, "Unable to queue marker"); /* Now we wait for it to be done, then test the status again */ - error = clWaitForEvents( 1, &markerEvent ); - test_error( error, "Unable to wait for marker event" ); + error = clWaitForEvents(1, &markerEvent); + test_error(error, "Unable to wait for marker event"); /* Check the status of the first event */ - error = clGetEventInfo( event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof( status ), &status, NULL ); - test_error( error, "Calling clGetEventInfo didn't work!" ); - if( status != CL_COMPLETE ) + error = clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS, + sizeof(status), &status, NULL); + test_error(error, "Calling clGetEventInfo didn't work!"); + if (status != CL_COMPLETE) { - log_error( "ERROR: Incorrect status returned from clGetEventInfo after event complete (%d:%s)\n", status, IGetStatusString( status ) ); + log_error("ERROR: Incorrect status returned from clGetEventInfo after " + "event complete (%d:%s)\n", + status, IGetStatusString(status)); return -1; } @@ -602,81 +720,101 @@ int test_event_enqueue_marker( cl_device_id deviceID, cl_context context, cl_com } #ifdef CL_VERSION_1_2 -int test_event_enqueue_marker_with_event_list( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_event_enqueue_marker_with_event_list(cl_device_id deviceID, + cl_context context, + cl_command_queue queue, + int num_elements) { - SETUP_EVENT( context, queue ); - cl_event event_list[3]={ NULL, NULL, NULL}; + SETUP_EVENT(context, queue); + cl_event event_list[3] = { NULL, NULL, NULL }; - size_t threads[1] = { 10 }, localThreads[1]={1}; - cl_uint event_count=2; - error= clEnqueueNDRangeKernel( queue,kernel,1,NULL, threads, localThreads, 0, NULL, &event_list[0]); - test_error( error, " clEnqueueMarkerWithWaitList 1 " ); + size_t threads[1] = { 10 }, localThreads[1] = { 1 }; + cl_uint event_count = 2; + error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threads, + localThreads, 0, NULL, &event_list[0]); + test_error(error, " clEnqueueMarkerWithWaitList 1 "); - error= clEnqueueNDRangeKernel( queue,kernel,1,NULL, threads, localThreads, 0, NULL, &event_list[1]); - test_error( error, " clEnqueueMarkerWithWaitList 2" ); + error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threads, + localThreads, 0, NULL, &event_list[1]); + test_error(error, " clEnqueueMarkerWithWaitList 2"); - error= clEnqueueNDRangeKernel( queue,kernel,1,NULL, threads, localThreads, 0, NULL, NULL); - test_error( error, " clEnqueueMarkerWithWaitList 3" ); + error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threads, + localThreads, 0, NULL, NULL); + test_error(error, " clEnqueueMarkerWithWaitList 3"); // test the case event returned - error =clEnqueueMarkerWithWaitList(queue, event_count, event_list, &event_list[2]); - test_error( error, " clEnqueueMarkerWithWaitList " ); + error = clEnqueueMarkerWithWaitList(queue, event_count, event_list, + &event_list[2]); + test_error(error, " clEnqueueMarkerWithWaitList "); error = clReleaseEvent(event_list[0]); error |= clReleaseEvent(event_list[1]); - test_error( error, "clReleaseEvent" ); + test_error(error, "clReleaseEvent"); - error= clEnqueueNDRangeKernel( queue,kernel,1,NULL, threads, localThreads, 0, NULL, &event_list[0]); - test_error( error, " clEnqueueMarkerWithWaitList 1 -1 " ); + error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threads, + localThreads, 0, NULL, &event_list[0]); + test_error(error, " clEnqueueMarkerWithWaitList 1 -1 "); - error= clEnqueueNDRangeKernel( queue,kernel,1,NULL, threads, localThreads, 0, NULL, &event_list[1]); - test_error( error, " clEnqueueMarkerWithWaitList 2-2" ); + error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threads, + localThreads, 0, NULL, &event_list[1]); + test_error(error, " clEnqueueMarkerWithWaitList 2-2"); - // test the case event =NULL, caused [CL_INVALID_VALUE] : OpenCL Error : clEnqueueMarkerWithWaitList failed: event is a NULL value - error =clEnqueueMarkerWithWaitList(queue, event_count, event_list, NULL); - test_error( error, " clEnqueueMarkerWithWaitList " ); + // test the case event =NULL, caused [CL_INVALID_VALUE] : OpenCL Error : + // clEnqueueMarkerWithWaitList failed: event is a NULL value + error = clEnqueueMarkerWithWaitList(queue, event_count, event_list, NULL); + test_error(error, " clEnqueueMarkerWithWaitList "); error = clReleaseEvent(event_list[0]); error |= clReleaseEvent(event_list[1]); error |= clReleaseEvent(event_list[2]); - test_error( error, "clReleaseEvent" ); + test_error(error, "clReleaseEvent"); FINISH_EVENT(queue); return 0; } -int test_event_enqueue_barrier_with_event_list( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_event_enqueue_barrier_with_event_list(cl_device_id deviceID, + cl_context context, + cl_command_queue queue, + int num_elements) { - SETUP_EVENT( context, queue ); - cl_event event_list[3]={ NULL, NULL, NULL}; + SETUP_EVENT(context, queue); + cl_event event_list[3] = { NULL, NULL, NULL }; - size_t threads[1] = { 10 }, localThreads[1]={1}; - cl_uint event_count=2; - error= clEnqueueNDRangeKernel( queue,kernel,1,NULL, threads, localThreads, 0, NULL, &event_list[0]); - test_error( error, " clEnqueueBarrierWithWaitList 1 " ); + size_t threads[1] = { 10 }, localThreads[1] = { 1 }; + cl_uint event_count = 2; + error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threads, + localThreads, 0, NULL, &event_list[0]); + test_error(error, " clEnqueueBarrierWithWaitList 1 "); - error= clEnqueueNDRangeKernel( queue,kernel,1,NULL, threads, localThreads, 0, NULL, &event_list[1]); - test_error( error, " clEnqueueBarrierWithWaitList 2" ); + error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threads, + localThreads, 0, NULL, &event_list[1]); + test_error(error, " clEnqueueBarrierWithWaitList 2"); - error= clEnqueueNDRangeKernel( queue,kernel,1,NULL, threads, localThreads, 0, NULL, NULL); - test_error( error, " clEnqueueBarrierWithWaitList 20" ); + error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threads, + localThreads, 0, NULL, NULL); + test_error(error, " clEnqueueBarrierWithWaitList 20"); // test the case event returned - error =clEnqueueBarrierWithWaitList(queue, event_count, event_list, &event_list[2]); - test_error( error, " clEnqueueBarrierWithWaitList " ); + error = clEnqueueBarrierWithWaitList(queue, event_count, event_list, + &event_list[2]); + test_error(error, " clEnqueueBarrierWithWaitList "); clReleaseEvent(event_list[0]); clReleaseEvent(event_list[1]); - error= clEnqueueNDRangeKernel( queue,kernel,1,NULL, threads, localThreads, 0, NULL, &event_list[0]); - test_error( error, " clEnqueueBarrierWithWaitList 1 " ); + error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threads, + localThreads, 0, NULL, &event_list[0]); + test_error(error, " clEnqueueBarrierWithWaitList 1 "); - error= clEnqueueNDRangeKernel( queue,kernel,1,NULL, threads, localThreads, 0, NULL, &event_list[1]); - test_error( error, " clEnqueueBarrierWithWaitList 2" ); + error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threads, + localThreads, 0, NULL, &event_list[1]); + test_error(error, " clEnqueueBarrierWithWaitList 2"); - // test the case event =NULL, caused [CL_INVALID_VALUE] : OpenCL Error : clEnqueueMarkerWithWaitList failed: event is a NULL value - error = clEnqueueBarrierWithWaitList(queue, event_count, event_list, NULL); - test_error( error, " clEnqueueBarrierWithWaitList " ); + // test the case event =NULL, caused [CL_INVALID_VALUE] : OpenCL Error : + // clEnqueueMarkerWithWaitList failed: event is a NULL value + error = clEnqueueBarrierWithWaitList(queue, event_count, event_list, NULL); + test_error(error, " clEnqueueBarrierWithWaitList "); clReleaseEvent(event_list[0]); clReleaseEvent(event_list[1]); diff --git a/test_conformance/events/test_userevents.cpp b/test_conformance/events/test_userevents.cpp index 0a4954f9..1fdb4ea4 100644 --- a/test_conformance/events/test_userevents.cpp +++ b/test_conformance/events/test_userevents.cpp @@ -1,6 +1,6 @@ // // Copyright (c) 2017 The Khronos Group Inc. -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -14,11 +14,11 @@ // limitations under the License. // #if defined(__APPLE__) - #include - #include +#include +#include #else - #include - #include +#include +#include #endif #include #include @@ -29,189 +29,261 @@ // CL error checking. #if defined(_MSC_VER) -#define CL_EXIT_ERROR(cmd,...) \ -{ \ -if ((cmd) != CL_SUCCESS) { \ -log_error("CL ERROR: %s %u: ", __FILE__,__LINE__);\ -log_error(## __VA_ARGS__ );\ -log_error("\n");\ -return -1;\ -}\ -} +#define CL_EXIT_ERROR(cmd, ...) \ + { \ + if ((cmd) != CL_SUCCESS) \ + { \ + log_error("CL ERROR: %s %u: ", __FILE__, __LINE__); \ + log_error(##__VA_ARGS__); \ + log_error("\n"); \ + return -1; \ + } \ + } #else -#define CL_EXIT_ERROR(cmd,format,...) \ -{ \ -if ((cmd) != CL_SUCCESS) { \ -log_error("CL ERROR: %s %u: ", __FILE__,__LINE__);\ -log_error(format,## __VA_ARGS__ );\ -log_error("\n");\ -return -1;\ -}\ -} -#endif - -#define CL_EXIT_BUILD_ERROR(cmd,program,format,...) \ -{ \ -if ((cmd) != CL_SUCCESS) { \ -cl_uint num_devices_;\ -clGetProgramInfo(program,CL_PROGRAM_NUM_DEVICES,sizeof(num_devices_),&num_devices_,NULL);\ -cl_device_id *device_list;\ -device_list=(cl_device_id *)malloc(num_devices_*sizeof(cl_device_id));\ -clGetProgramInfo(program,CL_PROGRAM_DEVICES,num_devices_*sizeof(cl_device_id),device_list,NULL);\ -for (unsigned i=0;i= CL_SUBMITTED) ? CL_SUCCESS : -1,"clGetEventInfo %u returned wrong status before user event",i); +#define CL_EXIT_ERROR(cmd, format, ...) \ + { \ + if ((cmd) != CL_SUCCESS) \ + { \ + log_error("CL ERROR: %s %u: ", __FILE__, __LINE__); \ + log_error(format, ##__VA_ARGS__); \ + log_error("\n"); \ + return -1; \ + } \ } +#endif - log_info("Setting user event status to complete\n"); - CL_EXIT_ERROR(clSetUserEventStatus(u1,CL_COMPLETE),"clSetUserEventStatus failed"); - - log_info("Waiting for tasks to finish executing\n"); - CL_EXIT_ERROR(clWaitForEvents( 1, &e[N-1] ),"clWaitForEvent failed"); - - log_info("Checking task status after setting user event status\n"); - for (cl_uint i = 0; i != N; ++i) { - CL_EXIT_ERROR(clGetEventInfo(e[i],CL_EVENT_COMMAND_EXECUTION_STATUS,sizeof s,&s,0),"clGetEventInfo failed"); - CL_EXIT_ERROR((s != CL_QUEUED) ? CL_SUCCESS : -1,"clGetEventInfo %u returned wrong status %04x after successful user event",i,s); +#define CL_EXIT_BUILD_ERROR(cmd, program, format, ...) \ + { \ + if ((cmd) != CL_SUCCESS) \ + { \ + cl_uint num_devices_; \ + clGetProgramInfo(program, CL_PROGRAM_NUM_DEVICES, \ + sizeof(num_devices_), &num_devices_, NULL); \ + cl_device_id *device_list; \ + device_list = \ + (cl_device_id *)malloc(num_devices_ * sizeof(cl_device_id)); \ + clGetProgramInfo(program, CL_PROGRAM_DEVICES, \ + num_devices_ * sizeof(cl_device_id), device_list, \ + NULL); \ + for (unsigned i = 0; i < num_devices_; ++i) \ + { \ + size_t len; \ + char buffer[2048]; \ + clGetProgramBuildInfo(program, device_list[i], \ + CL_PROGRAM_BUILD_LOG, sizeof(buffer), \ + buffer, &len); \ + log_error("DEVICE %u CL BUILD ERROR: %s(%u): ", i, __FILE__, \ + __LINE__); \ + log_error(format, ##__VA_ARGS__); \ + log_error("\n"); \ + } \ + free(device_list); \ + return -1; \ + } \ } - CL_EXIT_ERROR(clReleaseEvent(u1),"clReleaseEvent failed"); - - for (cl_uint i = 0; i != N; ++i) - CL_EXIT_ERROR(clReleaseEvent(e[i]),"clReleaseEvent failed"); - - log_info("Successful user event case passed.\n"); - - } +const char *src[] = { "__kernel void simple_task(__global float* output) {\n" + " output[0] += 1;\n" + "}\n" }; - // Test unsuccessful user event case. /////////////////////////////////////////////////////////////////// - { - cl_event u2 = clCreateUserEvent( context, &err ); - CL_EXIT_ERROR(err,"clCreateUserEvent failed"); - - cl_event e[4]; - cl_uint N = sizeof e / sizeof(cl_event); +enum +{ + MaxDevices = 8 +}; - log_info("Enqueuing tasks\n"); - for (cl_uint i = 0; i != N; ++i) - CL_EXIT_ERROR(clEnqueueTask(queue,k0,1,&u2,&e[i]),"clEnqueueTaskFailed"); +int test_userevents(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) +{ - log_info("Checking task status before setting user event status\n"); - for (cl_uint i = 0; i != N; ++i) { - CL_EXIT_ERROR(clGetEventInfo(e[i],CL_EVENT_COMMAND_EXECUTION_STATUS,sizeof s,&s,0),"clGetEventInfo failed"); - CL_EXIT_ERROR((s == CL_QUEUED || s == CL_SUBMITTED) ? CL_SUCCESS : -1,"clGetEventInfo %u returned wrong status %d before user event",i, (int) s); + cl_int err; + + cl_event u1 = clCreateUserEvent(context, &err); + CL_EXIT_ERROR(err, "clCreateUserEvent failed"); + + // Test event properties. + cl_int s; + size_t sizeofs; + CL_EXIT_ERROR(clGetEventInfo(u1, CL_EVENT_COMMAND_EXECUTION_STATUS, + sizeof s, &s, &sizeofs), + "clGetEventInfo failed"); + CL_EXIT_ERROR((sizeof s == sizeofs) ? CL_SUCCESS : -1, + "clGetEventInfo returned wrong size for " + "CL_EVENT_COMMAND_EXECUTION_STATUS"); + CL_EXIT_ERROR((s == CL_SUBMITTED) ? CL_SUCCESS : -1, + "clGetEventInfo returned wrong value for " + "CL_EVENT_COMMAND_EXECUTION_STATUS"); + + cl_command_type t; + size_t sizeoft; + CL_EXIT_ERROR( + clGetEventInfo(u1, CL_EVENT_COMMAND_TYPE, sizeof t, &t, &sizeoft), + "clGetEventInfo failed"); + CL_EXIT_ERROR( + (sizeof t == sizeoft) ? CL_SUCCESS : -1, + "clGetEventInfo returned wrong size for CL_EVENT_COMMAND_TYPE"); + CL_EXIT_ERROR( + (t == CL_COMMAND_USER) ? CL_SUCCESS : -1, + "clGetEventInfo returned wrong value for CL_EVENT_COMMAND_TYPE"); + + cl_command_queue q; + size_t sizeofq; + CL_EXIT_ERROR( + clGetEventInfo(u1, CL_EVENT_COMMAND_QUEUE, sizeof q, &q, &sizeofq), + "clGetEventInfo failed"); + CL_EXIT_ERROR( + (sizeof q == sizeofq) ? CL_SUCCESS : -1, + "clGetEventInfo returned wrong size for CL_EVENT_COMMAND_QUEUE"); + CL_EXIT_ERROR( + (q == NULL) ? CL_SUCCESS : -1, + "clGetEventInfo returned wrong value for CL_EVENT_COMMAND_QUEUE"); + + cl_context c; + size_t sizeofc; + CL_EXIT_ERROR(clGetEventInfo(u1, CL_EVENT_CONTEXT, sizeof c, &c, &sizeofc), + "clGetEventInfo failed"); + CL_EXIT_ERROR((sizeof c == sizeofc) ? CL_SUCCESS : -1, + "clGetEventInfo returned wrong size for CL_EVENT_CONTEXT"); + CL_EXIT_ERROR((c == context) ? CL_SUCCESS : -1, + "clGetEventInfo returned wrong value for CL_EVENT_CONTEXT"); + + cl_ulong p; + err = clGetEventProfilingInfo(u1, CL_PROFILING_COMMAND_QUEUED, sizeof p, &p, + 0); + CL_EXIT_ERROR((err != CL_SUCCESS) ? CL_SUCCESS : -1, + "clGetEventProfilingInfo returned wrong error."); + + // Test semantics. + cl_program program; + err = create_single_kernel_helper_create_program(context, &program, 1, src); + CL_EXIT_ERROR(err, "clCreateProgramWithSource failed"); + + CL_EXIT_BUILD_ERROR(clBuildProgram(program, 0, NULL, "", NULL, NULL), + program, "Building program from inline src:\t%s", + src[0]); + + cl_kernel k0 = clCreateKernel(program, "simple_task", &err); + CL_EXIT_ERROR(err, "clCreateKernel failed"); + + float buffer[1]; + cl_mem output = clCreateBuffer(context, CL_MEM_USE_HOST_PTR, sizeof buffer, + buffer, &err); + CL_EXIT_ERROR(err, "clCreateBuffer failed."); + + CL_EXIT_ERROR(clSetKernelArg(k0, 0, sizeof(output), &output), + "clSetKernelArg failed"); + + + // Successful case. + // ////////////////////////////////////////////////////////////////////////////////////// + { + cl_event e[4]; + cl_uint N = sizeof e / sizeof(cl_event); + + log_info("Enqueuing tasks\n"); + for (cl_uint i = 0; i != N; ++i) + CL_EXIT_ERROR(clEnqueueTask(queue, k0, 1, &u1, &e[i]), + "clEnqueueTaskFailed"); + + log_info("Checking task status before setting user event status\n"); + for (cl_uint i = 0; i != N; ++i) + { + CL_EXIT_ERROR(clGetEventInfo(e[i], + CL_EVENT_COMMAND_EXECUTION_STATUS, + sizeof s, &s, 0), + "clGetEventInfo failed"); + CL_EXIT_ERROR( + (s >= CL_SUBMITTED) ? CL_SUCCESS : -1, + "clGetEventInfo %u returned wrong status before user event", i); + } + + log_info("Setting user event status to complete\n"); + CL_EXIT_ERROR(clSetUserEventStatus(u1, CL_COMPLETE), + "clSetUserEventStatus failed"); + + log_info("Waiting for tasks to finish executing\n"); + CL_EXIT_ERROR(clWaitForEvents(1, &e[N - 1]), "clWaitForEvent failed"); + + log_info("Checking task status after setting user event status\n"); + for (cl_uint i = 0; i != N; ++i) + { + CL_EXIT_ERROR(clGetEventInfo(e[i], + CL_EVENT_COMMAND_EXECUTION_STATUS, + sizeof s, &s, 0), + "clGetEventInfo failed"); + CL_EXIT_ERROR((s != CL_QUEUED) ? CL_SUCCESS : -1, + "clGetEventInfo %u returned wrong status %04x after " + "successful user event", + i, s); + } + + CL_EXIT_ERROR(clReleaseEvent(u1), "clReleaseEvent failed"); + + for (cl_uint i = 0; i != N; ++i) + CL_EXIT_ERROR(clReleaseEvent(e[i]), "clReleaseEvent failed"); + + log_info("Successful user event case passed.\n"); } - log_info("Setting user event status to unsuccessful result\n"); - CL_EXIT_ERROR(clSetUserEventStatus(u2,-1),"clSetUserEventStatus failed"); - - log_info("Waiting for tasks to finish executing\n"); - CL_EXIT_ERROR((clWaitForEvents( N, &e[0] )!=CL_SUCCESS) ? CL_SUCCESS : -1,"clWaitForEvent succeeded when it should have failed"); - - log_info("Checking task status after setting user event status\n"); - for (cl_uint i = 0; i != N; ++i) { - CL_EXIT_ERROR(clGetEventInfo(e[i],CL_EVENT_COMMAND_EXECUTION_STATUS,sizeof s,&s,0),"clGetEventInfo failed"); - CL_EXIT_ERROR((s != CL_QUEUED) ? CL_SUCCESS : -1,"clGetEventInfo %u returned wrong status %04x after unsuccessful user event",i,s); + // Test unsuccessful user event case. + // /////////////////////////////////////////////////////////////////// + { + cl_event u2 = clCreateUserEvent(context, &err); + CL_EXIT_ERROR(err, "clCreateUserEvent failed"); + + cl_event e[4]; + cl_uint N = sizeof e / sizeof(cl_event); + + log_info("Enqueuing tasks\n"); + for (cl_uint i = 0; i != N; ++i) + CL_EXIT_ERROR(clEnqueueTask(queue, k0, 1, &u2, &e[i]), + "clEnqueueTaskFailed"); + + log_info("Checking task status before setting user event status\n"); + for (cl_uint i = 0; i != N; ++i) + { + CL_EXIT_ERROR(clGetEventInfo(e[i], + CL_EVENT_COMMAND_EXECUTION_STATUS, + sizeof s, &s, 0), + "clGetEventInfo failed"); + CL_EXIT_ERROR( + (s == CL_QUEUED || s == CL_SUBMITTED) ? CL_SUCCESS : -1, + "clGetEventInfo %u returned wrong status %d before user event", + i, (int)s); + } + + log_info("Setting user event status to unsuccessful result\n"); + CL_EXIT_ERROR(clSetUserEventStatus(u2, -1), + "clSetUserEventStatus failed"); + + log_info("Waiting for tasks to finish executing\n"); + CL_EXIT_ERROR((clWaitForEvents(N, &e[0]) != CL_SUCCESS) ? CL_SUCCESS + : -1, + "clWaitForEvent succeeded when it should have failed"); + + log_info("Checking task status after setting user event status\n"); + for (cl_uint i = 0; i != N; ++i) + { + CL_EXIT_ERROR(clGetEventInfo(e[i], + CL_EVENT_COMMAND_EXECUTION_STATUS, + sizeof s, &s, 0), + "clGetEventInfo failed"); + CL_EXIT_ERROR((s != CL_QUEUED) ? CL_SUCCESS : -1, + "clGetEventInfo %u returned wrong status %04x after " + "unsuccessful user event", + i, s); + } + + CL_EXIT_ERROR(clReleaseEvent(u2), "clReleaseEvent failed"); + + for (cl_uint i = 0; i != N; ++i) + CL_EXIT_ERROR(clReleaseEvent(e[i]), "clReleaseEvent failed"); + + log_info("Unsuccessful user event case passed.\n"); } - CL_EXIT_ERROR(clReleaseEvent(u2),"clReleaseEvent failed"); - - for (cl_uint i = 0; i != N; ++i) - CL_EXIT_ERROR(clReleaseEvent(e[i]),"clReleaseEvent failed"); - - log_info("Unsuccessful user event case passed.\n"); - } - - clReleaseKernel(k0); - clReleaseProgram(program); - clReleaseMemObject(output); - - return 0; + clReleaseKernel(k0); + clReleaseProgram(program); + clReleaseMemObject(output); + return 0; } - diff --git a/test_conformance/events/test_userevents_multithreaded.cpp b/test_conformance/events/test_userevents_multithreaded.cpp index 51ef2226..a7845bf1 100644 --- a/test_conformance/events/test_userevents_multithreaded.cpp +++ b/test_conformance/events/test_userevents_multithreaded.cpp @@ -1,6 +1,6 @@ // // Copyright (c) 2017 The Khronos Group Inc. -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -19,8 +19,8 @@ #include -#if !defined (_MSC_VER) - #include +#if !defined(_MSC_VER) +#include #endif // !_MSC_VER void trigger_user_event(cl_event *event) @@ -30,44 +30,44 @@ void trigger_user_event(cl_event *event) clSetUserEventStatus(*event, CL_COMPLETE); } -int test_userevents_multithreaded( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) +int test_userevents_multithreaded(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { cl_int error; // Set up a user event to act as a gate - clEventWrapper gateEvent = clCreateUserEvent( context, &error ); - test_error( error, "Unable to create user gate event" ); + clEventWrapper gateEvent = clCreateUserEvent(context, &error); + test_error(error, "Unable to create user gate event"); // Set up a few actions gated on the user event NDRangeKernelAction action1; ReadBufferAction action2; WriteBufferAction action3; - clEventWrapper actionEvents[ 3 ]; - Action * actions[] = { &action1, &action2, &action3, NULL }; + clEventWrapper actionEvents[3]; + Action *actions[] = { &action1, &action2, &action3, NULL }; - for( int i = 0; actions[ i ] != NULL; i++ ) + for (int i = 0; actions[i] != NULL; i++) { - error = actions[ i ]->Setup( deviceID, context, queue ); - test_error( error, "Unable to set up test action" ); + error = actions[i]->Setup(deviceID, context, queue); + test_error(error, "Unable to set up test action"); - error = actions[ i ]->Execute( queue, 1, &gateEvent, &actionEvents[ i ] ); - test_error( error, "Unable to execute test action" ); + error = actions[i]->Execute(queue, 1, &gateEvent, &actionEvents[i]); + test_error(error, "Unable to execute test action"); } // Now, instead of releasing the gate, we spawn a separate thread to do so - log_info( "\tStarting trigger thread...\n" ); + log_info("\tStarting trigger thread...\n"); std::thread thread(trigger_user_event, &gateEvent); - log_info( "\tWaiting for actions...\n" ); - error = clWaitForEvents( 3, &actionEvents[ 0 ] ); - test_error( error, "Unable to wait for action events" ); + log_info("\tWaiting for actions...\n"); + error = clWaitForEvents(3, &actionEvents[0]); + test_error(error, "Unable to wait for action events"); thread.join(); - log_info( "\tActions completed.\n" ); + log_info("\tActions completed.\n"); // If we got here without error, we're good return 0; } - diff --git a/test_conformance/events/test_waitlists.cpp b/test_conformance/events/test_waitlists.cpp index ebf5da9b..6036451f 100644 --- a/test_conformance/events/test_waitlists.cpp +++ b/test_conformance/events/test_waitlists.cpp @@ -1,6 +1,6 @@ // // Copyright (c) 2017 The Khronos Group Inc. -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -17,15 +17,16 @@ #include "action_classes.h" -extern const char *IGetStatusString( cl_int status ); +extern const char *IGetStatusString(cl_int status); #define PRINT_OPS 0 -int test_waitlist( cl_device_id device, cl_context context, cl_command_queue queue, Action *actionToTest, bool multiple ) +int test_waitlist(cl_device_id device, cl_context context, + cl_command_queue queue, Action *actionToTest, bool multiple) { - NDRangeKernelAction actions[ 2 ]; - clEventWrapper events[ 3 ]; - cl_int status[ 3 ]; + NDRangeKernelAction actions[2]; + clEventWrapper events[3]; + cl_int status[3]; cl_int error; if (multiple) @@ -37,41 +38,43 @@ int test_waitlist( cl_device_id device, cl_context context, cl_command_queue que "reference event 0 in its waitlist.\n"); // Set up the first base action to wait against - error = actions[ 0 ].Setup( device, context, queue ); - test_error( error, "Unable to setup base event to wait against" ); + error = actions[0].Setup(device, context, queue); + test_error(error, "Unable to setup base event to wait against"); - if( multiple ) + if (multiple) { // Set up a second event to wait against - error = actions[ 1 ].Setup( device, context, queue ); - test_error( error, "Unable to setup second base event to wait against" ); + error = actions[1].Setup(device, context, queue); + test_error(error, "Unable to setup second base event to wait against"); } // Now set up the actual action to test - error = actionToTest->Setup( device, context, queue ); - test_error( error, "Unable to set up test event" ); + error = actionToTest->Setup(device, context, queue); + test_error(error, "Unable to set up test event"); // Execute all events now if (PRINT_OPS) log_info("\tExecuting action 0...\n"); - error = actions[ 0 ].Execute( queue, 0, NULL, &events[ 0 ] ); - test_error( error, "Unable to execute first event" ); + error = actions[0].Execute(queue, 0, NULL, &events[0]); + test_error(error, "Unable to execute first event"); - if( multiple ) + if (multiple) { - if (PRINT_OPS) log_info("\tExecuting action 1...\n"); - error = actions[ 1 ].Execute( queue, 1, &events[0], &events[ 1 ] ); - test_error( error, "Unable to execute second event" ); + if (PRINT_OPS) log_info("\tExecuting action 1...\n"); + error = actions[1].Execute(queue, 1, &events[0], &events[1]); + test_error(error, "Unable to execute second event"); } // Sanity check if (multiple) { if (PRINT_OPS) log_info("\tChecking status of action 1...\n"); - error = clGetEventInfo( events[ 1 ], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof( status[ 1 ] ), &status[ 1 ], NULL ); + error = clGetEventInfo(events[1], CL_EVENT_COMMAND_EXECUTION_STATUS, + sizeof(status[1]), &status[1], NULL); test_error(error, "Unable to get event status"); } if (PRINT_OPS) log_info("\tChecking status of action 0...\n"); - error = clGetEventInfo( events[ 0 ], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof( status[ 0 ] ), &status[ 0 ], NULL ); + error = clGetEventInfo(events[0], CL_EVENT_COMMAND_EXECUTION_STATUS, + sizeof(status[0]), &status[0], NULL); test_error(error, "Unable to get event status"); log_info("\t\tEvent status after starting reference events: reference " @@ -79,28 +82,34 @@ int test_waitlist( cl_device_id device, cl_context context, cl_command_queue que IGetStatusString(status[0]), (multiple ? IGetStatusString(status[1]) : "N/A"), "N/A"); - if( ( status[ 0 ] == CL_COMPLETE ) || ( multiple && status[ 1 ] == CL_COMPLETE ) ) + if ((status[0] == CL_COMPLETE) || (multiple && status[1] == CL_COMPLETE)) { - log_info( "WARNING: Reference event(s) already completed before we could execute test event! Possible that the reference event blocked (implicitly passing)\n" ); + log_info("WARNING: Reference event(s) already completed before we " + "could execute test event! Possible that the reference event " + "blocked (implicitly passing)\n"); return 0; } if (PRINT_OPS) log_info("\tExecuting action to test...\n"); - error = actionToTest->Execute( queue, ( multiple ) ? 2 : 1, &events[ 0 ], &events[ 2 ] ); - test_error( error, "Unable to execute test event" ); + error = actionToTest->Execute(queue, (multiple) ? 2 : 1, &events[0], + &events[2]); + test_error(error, "Unable to execute test event"); // Hopefully, the first event is still running if (PRINT_OPS) log_info("\tChecking status of action to test 2...\n"); - error = clGetEventInfo( events[ 2 ], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof( status[ 2 ] ), &status[ 2 ], NULL ); - test_error( error, "Unable to get event status" ); + error = clGetEventInfo(events[2], CL_EVENT_COMMAND_EXECUTION_STATUS, + sizeof(status[2]), &status[2], NULL); + test_error(error, "Unable to get event status"); if (multiple) { if (PRINT_OPS) log_info("\tChecking status of action 1...\n"); - error = clGetEventInfo( events[ 1 ], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof( status[ 1 ] ), &status[ 1 ], NULL ); + error = clGetEventInfo(events[1], CL_EVENT_COMMAND_EXECUTION_STATUS, + sizeof(status[1]), &status[1], NULL); test_error(error, "Unable to get event status"); } if (PRINT_OPS) log_info("\tChecking status of action 0...\n"); - error = clGetEventInfo( events[ 0 ], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof( status[ 0 ] ), &status[ 0 ], NULL ); + error = clGetEventInfo(events[0], CL_EVENT_COMMAND_EXECUTION_STATUS, + sizeof(status[0]), &status[0], NULL); test_error(error, "Unable to get event status"); log_info("\t\tEvent status after starting test event: reference event 0: " @@ -109,12 +118,13 @@ int test_waitlist( cl_device_id device, cl_context context, cl_command_queue que (multiple ? IGetStatusString(status[1]) : "N/A"), IGetStatusString(status[2])); - if( multiple ) + if (multiple) { - if( status[ 0 ] == CL_COMPLETE && status[ 1 ] == CL_COMPLETE ) + if (status[0] == CL_COMPLETE && status[1] == CL_COMPLETE) { - log_info( "WARNING: Both events completed, so unable to test further (implicitly passing).\n" ); - clFinish( queue ); + log_info("WARNING: Both events completed, so unable to test " + "further (implicitly passing).\n"); + clFinish(queue); return 0; } @@ -124,50 +134,59 @@ int test_waitlist( cl_device_id device, cl_context context, cl_command_queue que "ERROR: Test failed because the second wait event is complete " "and the first is not.(status: 0: %s and 1: %s)\n", IGetStatusString(status[0]), IGetStatusString(status[1])); - clFinish( queue ); + clFinish(queue); return -1; } } else { - if( status[ 0 ] == CL_COMPLETE ) + if (status[0] == CL_COMPLETE) { - log_info( "WARNING: Reference event completed, so unable to test further (implicitly passing).\n" ); - clFinish( queue ); + log_info("WARNING: Reference event completed, so unable to test " + "further (implicitly passing).\n"); + clFinish(queue); return 0; } - if( status[ 0 ] != CL_RUNNING && status[ 0 ] != CL_QUEUED && status[ 0 ] != CL_SUBMITTED ) + if (status[0] != CL_RUNNING && status[0] != CL_QUEUED + && status[0] != CL_SUBMITTED) { - log_error( "ERROR: Test failed because first wait event is not currently running, queued, or submitted! (status: 0: %s)\n", IGetStatusString( status[ 0 ] ) ); - clFinish( queue ); + log_error( + "ERROR: Test failed because first wait event is not currently " + "running, queued, or submitted! (status: 0: %s)\n", + IGetStatusString(status[0])); + clFinish(queue); return -1; } } - if( status[ 2 ] != CL_QUEUED && status[ 2 ] != CL_SUBMITTED ) + if (status[2] != CL_QUEUED && status[2] != CL_SUBMITTED) { - log_error( "ERROR: Test event is not waiting to run! (status: 2: %s)\n", IGetStatusString( status[ 2 ] ) ); - clFinish( queue ); + log_error("ERROR: Test event is not waiting to run! (status: 2: %s)\n", + IGetStatusString(status[2])); + clFinish(queue); return -1; } // Now wait for the first reference event if (PRINT_OPS) log_info("\tWaiting for action 1 to finish...\n"); - error = clWaitForEvents( 1, &events[ 0 ] ); - test_error( error, "Unable to wait for reference event" ); + error = clWaitForEvents(1, &events[0]); + test_error(error, "Unable to wait for reference event"); // Grab statuses again if (PRINT_OPS) log_info("\tChecking status of action to test 2...\n"); - error = clGetEventInfo( events[ 2 ], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof( status[ 2 ] ), &status[ 2 ], NULL ); - test_error( error, "Unable to get event status" ); + error = clGetEventInfo(events[2], CL_EVENT_COMMAND_EXECUTION_STATUS, + sizeof(status[2]), &status[2], NULL); + test_error(error, "Unable to get event status"); if (multiple) { if (PRINT_OPS) log_info("\tChecking status of action 1...\n"); - error = clGetEventInfo( events[ 1 ], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof( status[ 1 ] ), &status[ 1 ], NULL ); + error = clGetEventInfo(events[1], CL_EVENT_COMMAND_EXECUTION_STATUS, + sizeof(status[1]), &status[1], NULL); test_error(error, "Unable to get event status"); } if (PRINT_OPS) log_info("\tChecking status of action 0...\n"); - error = clGetEventInfo( events[ 0 ], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof( status[ 0 ] ), &status[ 0 ], NULL ); + error = clGetEventInfo(events[0], CL_EVENT_COMMAND_EXECUTION_STATUS, + sizeof(status[0]), &status[0], NULL); test_error(error, "Unable to get event status"); log_info("\t\tEvent status after waiting for reference event 0: reference " @@ -177,15 +196,18 @@ int test_waitlist( cl_device_id device, cl_context context, cl_command_queue que IGetStatusString(status[2])); // Sanity - if( status[ 0 ] != CL_COMPLETE ) + if (status[0] != CL_COMPLETE) { - log_error( "ERROR: Waited for first event but it's not complete (status: 0: %s)\n", IGetStatusString( status[ 0 ] ) ); - clFinish( queue ); + log_error("ERROR: Waited for first event but it's not complete " + "(status: 0: %s)\n", + IGetStatusString(status[0])); + clFinish(queue); return -1; } - // If we're multiple, and the second event isn't complete, then our test event should still be queued - if( multiple && status[ 1 ] != CL_COMPLETE ) + // If we're multiple, and the second event isn't complete, then our test + // event should still be queued + if (multiple && status[1] != CL_COMPLETE) { if (status[1] == CL_RUNNING && status[2] == CL_RUNNING) { @@ -193,17 +215,19 @@ int test_waitlist( cl_device_id device, cl_context context, cl_command_queue que clFinish(queue); return -1; } - if( status[ 2 ] != CL_QUEUED && status[ 2 ] != CL_SUBMITTED ) + if (status[2] != CL_QUEUED && status[2] != CL_SUBMITTED) { - log_error( "ERROR: Test event did not wait for second event before starting! (status of ref: 1: %s, of test: 2: %s)\n", IGetStatusString( status[ 1 ] ), IGetStatusString( status[ 2 ] ) ); - clFinish( queue ); + log_error("ERROR: Test event did not wait for second event before " + "starting! (status of ref: 1: %s, of test: 2: %s)\n", + IGetStatusString(status[1]), IGetStatusString(status[2])); + clFinish(queue); return -1; } // Now wait for second event to complete, too if (PRINT_OPS) log_info("\tWaiting for action 1 to finish...\n"); - error = clWaitForEvents( 1, &events[ 1 ] ); - test_error( error, "Unable to wait for second reference event" ); + error = clWaitForEvents(1, &events[1]); + test_error(error, "Unable to wait for second reference event"); // Grab statuses again if (PRINT_OPS) log_info("\tChecking status of action to test 2...\n"); @@ -230,32 +254,38 @@ int test_waitlist( cl_device_id device, cl_context context, cl_command_queue que IGetStatusString(status[2])); // Sanity - if( status[ 1 ] != CL_COMPLETE ) + if (status[1] != CL_COMPLETE) { - log_error( "ERROR: Waited for second reference event but it didn't complete (status: 1: %s)\n", IGetStatusString( status[ 1 ] ) ); - clFinish( queue ); + log_error("ERROR: Waited for second reference event but it didn't " + "complete (status: 1: %s)\n", + IGetStatusString(status[1])); + clFinish(queue); return -1; } } - // At this point, the test event SHOULD be running, but if it completed, we consider it a pass - if( status[ 2 ] == CL_COMPLETE ) + // At this point, the test event SHOULD be running, but if it completed, we + // consider it a pass + if (status[2] == CL_COMPLETE) { - log_info( "WARNING: Test event already completed. Assumed valid.\n" ); - clFinish( queue ); + log_info("WARNING: Test event already completed. Assumed valid.\n"); + clFinish(queue); return 0; } - if( status[ 2 ] != CL_RUNNING && status[ 2 ] != CL_SUBMITTED && status[ 2 ] != CL_QUEUED) + if (status[2] != CL_RUNNING && status[2] != CL_SUBMITTED + && status[2] != CL_QUEUED) { - log_error( "ERROR: Second event did not start running after reference event(s) completed! (status: 2: %s)\n", IGetStatusString( status[ 2 ] ) ); - clFinish( queue ); + log_error("ERROR: Second event did not start running after reference " + "event(s) completed! (status: 2: %s)\n", + IGetStatusString(status[2])); + clFinish(queue); return -1; } // Wait for the test event, then return if (PRINT_OPS) log_info("\tWaiting for action 2 to test to finish...\n"); - error = clWaitForEvents( 1, &events[ 2 ] ); - test_error( error, "Unable to wait for test event" ); + error = clWaitForEvents(1, &events[2]); + test_error(error, "Unable to wait for test event"); error |= clGetEventInfo(events[2], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(status[2]), &status[2], NULL); @@ -280,74 +310,81 @@ int test_waitlist( cl_device_id device, cl_context context, cl_command_queue que return 0; } -#define TEST_ACTION( name ) \ - { \ - name##Action action; \ - log_info( "-- Testing " #name " (waiting on 1 event)...\n" ); \ - if( ( error = test_waitlist( deviceID, context, queue, &action, false ) ) != CL_SUCCESS ) \ - retVal++; \ - clFinish( queue ); \ - } \ - if( error == CL_SUCCESS ) /* Only run multiples test if single test passed */ \ - { \ - name##Action action; \ - log_info( "-- Testing " #name " (waiting on 2 events)...\n" ); \ - if( ( error = test_waitlist( deviceID, context, queue, &action, true ) ) != CL_SUCCESS ) \ - retVal++; \ - clFinish( queue ); \ +#define TEST_ACTION(name) \ + { \ + name##Action action; \ + log_info("-- Testing " #name " (waiting on 1 event)...\n"); \ + if ((error = test_waitlist(deviceID, context, queue, &action, false)) \ + != CL_SUCCESS) \ + retVal++; \ + clFinish(queue); \ + } \ + if (error \ + == CL_SUCCESS) /* Only run multiples test if single test passed */ \ + { \ + name##Action action; \ + log_info("-- Testing " #name " (waiting on 2 events)...\n"); \ + if ((error = test_waitlist(deviceID, context, queue, &action, true)) \ + != CL_SUCCESS) \ + retVal++; \ + clFinish(queue); \ } -int test_waitlists( cl_device_id deviceID, cl_context context, cl_command_queue oldQueue, int num_elements ) +int test_waitlists(cl_device_id deviceID, cl_context context, + cl_command_queue oldQueue, int num_elements) { cl_int error; int retVal = 0; cl_command_queue_properties props = CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE; - if( !checkDeviceForQueueSupport( deviceID, props ) ) + if (!checkDeviceForQueueSupport(deviceID, props)) { - log_info( "WARNING: Device does not support out-of-order exec mode; skipping test.\n" ); + log_info("WARNING: Device does not support out-of-order exec mode; " + "skipping test.\n"); return 0; } - clCommandQueueWrapper queue = clCreateCommandQueue( context, deviceID, props, &error ); + clCommandQueueWrapper queue = + clCreateCommandQueue(context, deviceID, props, &error); test_error(error, "Unable to create out-of-order queue"); - log_info( "\n" ); + log_info("\n"); - TEST_ACTION( NDRangeKernel ) + TEST_ACTION(NDRangeKernel) - TEST_ACTION( ReadBuffer ) - TEST_ACTION( WriteBuffer ) - TEST_ACTION( MapBuffer ) - TEST_ACTION( UnmapBuffer ) + TEST_ACTION(ReadBuffer) + TEST_ACTION(WriteBuffer) + TEST_ACTION(MapBuffer) + TEST_ACTION(UnmapBuffer) - if( checkForImageSupport( deviceID ) == CL_IMAGE_FORMAT_NOT_SUPPORTED ) + if (checkForImageSupport(deviceID) == CL_IMAGE_FORMAT_NOT_SUPPORTED) { - log_info( "\nNote: device does not support images. Skipping remainder of waitlist tests...\n" ); + log_info("\nNote: device does not support images. Skipping remainder " + "of waitlist tests...\n"); } else { - TEST_ACTION( ReadImage2D ) - TEST_ACTION( WriteImage2D ) - TEST_ACTION( CopyImage2Dto2D ) - TEST_ACTION( Copy2DImageToBuffer ) - TEST_ACTION( CopyBufferTo2DImage ) - TEST_ACTION( MapImage ) - - if( checkFor3DImageSupport( deviceID ) == CL_IMAGE_FORMAT_NOT_SUPPORTED ) - log_info("Device does not support 3D images. Skipping remainder of waitlist tests...\n"); + TEST_ACTION(ReadImage2D) + TEST_ACTION(WriteImage2D) + TEST_ACTION(CopyImage2Dto2D) + TEST_ACTION(Copy2DImageToBuffer) + TEST_ACTION(CopyBufferTo2DImage) + TEST_ACTION(MapImage) + + if (checkFor3DImageSupport(deviceID) == CL_IMAGE_FORMAT_NOT_SUPPORTED) + log_info("Device does not support 3D images. Skipping remainder of " + "waitlist tests...\n"); else { - TEST_ACTION( ReadImage3D ) - TEST_ACTION( WriteImage3D ) - TEST_ACTION( CopyImage2Dto3D ) - TEST_ACTION( CopyImage3Dto2D ) - TEST_ACTION( CopyImage3Dto3D ) - TEST_ACTION( Copy3DImageToBuffer ) - TEST_ACTION( CopyBufferTo3DImage ) + TEST_ACTION(ReadImage3D) + TEST_ACTION(WriteImage3D) + TEST_ACTION(CopyImage2Dto3D) + TEST_ACTION(CopyImage3Dto2D) + TEST_ACTION(CopyImage3Dto3D) + TEST_ACTION(Copy3DImageToBuffer) + TEST_ACTION(CopyBufferTo3DImage) } } return retVal; } - -- cgit v1.2.3 From 5d5bffba13e4187f1378a8d3f8db6d5662cf1dc2 Mon Sep 17 00:00:00 2001 From: Sven van Haastregt Date: Tue, 13 Sep 2022 14:48:54 +0100 Subject: [NFC] Declare format tables as const (#1493) Without const, these variables would be flagged up by `-Wunused-variable`. Drop `struct` from the declarations as that is not needed in C++. Signed-off-by: Sven van Haastregt --- test_conformance/gl/common.h | 32 +++++++++++----------- test_conformance/gl/test_images_getinfo_common.cpp | 17 +++++++----- test_conformance/gl/test_images_read_common.cpp | 14 +++++----- test_conformance/gl/test_images_write_common.cpp | 5 ++-- 4 files changed, 36 insertions(+), 32 deletions(-) diff --git a/test_conformance/gl/common.h b/test_conformance/gl/common.h index aaa6a5e7..d8587cf0 100644 --- a/test_conformance/gl/common.h +++ b/test_conformance/gl/common.h @@ -32,12 +32,8 @@ struct format { }; // These are the typically tested formats. -// TODO: These variables should be made const; until then, suppress unused -// variable warnings as not every translation unit including this header uses -// all variables. -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wunused-variable" -static struct format common_formats[] = { +// clang-format off +static const format common_formats[] = { #ifdef __APPLE__ { GL_RGBA8, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8, kUChar }, { GL_RGBA8, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, kUChar }, @@ -57,26 +53,30 @@ static struct format common_formats[] = { }; #ifdef GL_VERSION_3_2 -static struct format depth_formats[] = { +static const format depth_formats[] = { { GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT, kUShort }, { GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT, kFloat }, { GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, kUInt }, { GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL, GL_FLOAT_32_UNSIGNED_INT_24_8_REV, kFloat }, }; #endif -#pragma GCC diagnostic pop +// clang-format on int test_images_write_common(cl_device_id device, cl_context context, - cl_command_queue queue, struct format* formats, size_t nformats, - GLenum *targets, size_t ntargets, sizevec_t* sizes, size_t nsizes ); + cl_command_queue queue, const format *formats, + size_t nformats, GLenum *targets, size_t ntargets, + sizevec_t *sizes, size_t nsizes); -int test_images_read_common( cl_device_id device, cl_context context, - cl_command_queue queue, struct format* formats, size_t nformats, - GLenum *targets, size_t ntargets, sizevec_t *sizes, size_t nsizes ); +int test_images_read_common(cl_device_id device, cl_context context, + cl_command_queue queue, const format *formats, + size_t nformats, GLenum *targets, size_t ntargets, + sizevec_t *sizes, size_t nsizes); -int test_images_get_info_common( cl_device_id device, cl_context context, - cl_command_queue queue, struct format* formats, size_t nformats, - GLenum *targets, size_t ntargets, sizevec_t *sizes, size_t nsizes ); +int test_images_get_info_common(cl_device_id device, cl_context context, + cl_command_queue queue, const format *formats, + size_t nformats, GLenum *targets, + size_t ntargets, sizevec_t *sizes, + size_t nsizes); int is_rgb_101010_supported( cl_context context, GLenum gl_target ); diff --git a/test_conformance/gl/test_images_getinfo_common.cpp b/test_conformance/gl/test_images_getinfo_common.cpp index 345b5950..2322c269 100644 --- a/test_conformance/gl/test_images_getinfo_common.cpp +++ b/test_conformance/gl/test_images_getinfo_common.cpp @@ -86,10 +86,11 @@ static int test_image_info( cl_context context, cl_command_queue queue, return CheckGLObjectInfo(streams[0], object_type, glTexture, glTarget, 0); } -static int test_image_format_get_info( - cl_context context, cl_command_queue queue, - size_t width, size_t height, size_t depth, - GLenum target, struct format* fmt, MTdata data) +static int test_image_format_get_info(cl_context context, + cl_command_queue queue, size_t width, + size_t height, size_t depth, + GLenum target, const format *fmt, + MTdata data) { int error = 0; @@ -197,9 +198,11 @@ static int test_image_format_get_info( &actualType, (void **)&outBuffer ); } -int test_images_get_info_common( cl_device_id device, cl_context context, - cl_command_queue queue, struct format* formats, size_t nformats, - GLenum *targets, size_t ntargets, sizevec_t *sizes, size_t nsizes ) +int test_images_get_info_common(cl_device_id device, cl_context context, + cl_command_queue queue, const format *formats, + size_t nformats, GLenum *targets, + size_t ntargets, sizevec_t *sizes, + size_t nsizes) { int error = 0; RandomSeed seed(gRandomSeed); diff --git a/test_conformance/gl/test_images_read_common.cpp b/test_conformance/gl/test_images_read_common.cpp index 112c7891..fe2a529b 100644 --- a/test_conformance/gl/test_images_read_common.cpp +++ b/test_conformance/gl/test_images_read_common.cpp @@ -386,10 +386,9 @@ static int test_image_read( cl_context context, cl_command_queue queue, width, height, depth, sampleNum, outFormat, outType, outResultBuffer ); } -static int test_image_format_read( - cl_context context, cl_command_queue queue, - size_t width, size_t height, size_t depth, - GLenum target, struct format* fmt, MTdata data) +static int test_image_format_read(cl_context context, cl_command_queue queue, + size_t width, size_t height, size_t depth, + GLenum target, const format *fmt, MTdata data) { int error = 0; @@ -645,9 +644,10 @@ static int test_image_format_read( } } -int test_images_read_common( cl_device_id device, cl_context context, - cl_command_queue queue, struct format* formats, size_t nformats, - GLenum *targets, size_t ntargets, sizevec_t *sizes, size_t nsizes ) +int test_images_read_common(cl_device_id device, cl_context context, + cl_command_queue queue, const format *formats, + size_t nformats, GLenum *targets, size_t ntargets, + sizevec_t *sizes, size_t nsizes) { int error = 0; RandomSeed seed(gRandomSeed); diff --git a/test_conformance/gl/test_images_write_common.cpp b/test_conformance/gl/test_images_write_common.cpp index 15bad520..0dba83bb 100644 --- a/test_conformance/gl/test_images_write_common.cpp +++ b/test_conformance/gl/test_images_write_common.cpp @@ -660,8 +660,9 @@ static int test_image_format_write( cl_context context, cl_command_queue queue, // combination. int test_images_write_common(cl_device_id device, cl_context context, - cl_command_queue queue, struct format* formats, size_t nformats, - GLenum *targets, size_t ntargets, sizevec_t* sizes, size_t nsizes ) + cl_command_queue queue, const format *formats, + size_t nformats, GLenum *targets, size_t ntargets, + sizevec_t *sizes, size_t nsizes) { int err = 0; int error = 0; -- cgit v1.2.3 From d42b3dcfb6ea192b03cc37501f5e1c0e692303be Mon Sep 17 00:00:00 2001 From: Stuart Brady Date: Tue, 13 Sep 2022 17:49:09 +0100 Subject: [NFC] Fix typo (enevt_type -> event_type) (#1498) Signed-off-by: Stuart Brady --- test_conformance/events/test_callbacks.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test_conformance/events/test_callbacks.cpp b/test_conformance/events/test_callbacks.cpp index 911298a5..04481dec 100644 --- a/test_conformance/events/test_callbacks.cpp +++ b/test_conformance/events/test_callbacks.cpp @@ -55,7 +55,7 @@ commandStatus, void * userData ) /* use struct as call back para */ typedef struct { - cl_int enevt_type; + cl_int event_type; int index; } CALL_BACK_USER_DATA; @@ -67,7 +67,7 @@ void CL_CALLBACK single_event_callback_function_flags(cl_event event, CALL_BACK_USER_DATA *pdata = static_cast(userData); log_info("\tEvent callback %d of type %d triggered\n", pdata->index, - pdata->enevt_type); + pdata->event_type); sCallbackTriggered_flag[pdata->index] = true; } @@ -95,7 +95,7 @@ int test_callback_event_single(cl_device_id device, cl_context context, CALL_BACK_USER_DATA user_data[EVENT_CALLBACK_TYPE_TOTAL]; for (int i = 0; i < EVENT_CALLBACK_TYPE_TOTAL; i++) { - user_data[i].enevt_type = event_callback_types[i]; + user_data[i].event_type = event_callback_types[i]; user_data[i].index = i; error = clSetEventCallback(actualEvent, event_callback_types[i], single_event_callback_function_flags, -- cgit v1.2.3 From 426097cf7c2e0e4b6c659bd0b744e6f51e61805d Mon Sep 17 00:00:00 2001 From: Sreelakshmi Haridas Maruthur Date: Tue, 13 Sep 2022 10:50:25 -0600 Subject: gles: Limit variable definition to the same scope as usage (#1495) Fix unused-variable errors by limiting variable definition to the case that would use it --- test_conformance/gles/main.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/test_conformance/gles/main.cpp b/test_conformance/gles/main.cpp index 644fa63c..60e020d8 100644 --- a/test_conformance/gles/main.cpp +++ b/test_conformance/gles/main.cpp @@ -320,8 +320,10 @@ int main(int argc, const char *argv[]) goto cleanup; } +#ifdef GLES3 int argc_ = (first_32_testname) ? 1 + (argc - first_32_testname) : argc; const char** argv_ = (first_32_testname) ? &argv[first_32_testname-1] : argv; +#endif // Execute the tests. for( size_t i = 0; i < numDevices; i++ ) { -- cgit v1.2.3 From c0a10f4e12c1a4866a37449c5697a2f4c5e82e25 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?K=C3=A9vin=20Petit?= Date: Tue, 13 Sep 2022 17:58:24 +0100 Subject: Tests for cl-ext-image-from-buffer and cl-ext-image-requirements-info (#1438) * Add CTS tests for cl_ext_image_requirements_info Change-Id: I20c1c77ff5ba88eb475801bafba30ef9caf82601 * Add CTS tests for cl_ext_image_from_buffer Change-Id: Ic30429d77a1317d0fea7d9ecc6d603267fa6602f * Fixes for image_from_buffer and image_requirements extension * Use CL_MEM_READ_WRITE flag when creating images that support CL_MEM_KERNEL_READ_AND_WRITE (#1447) * format fixes Change-Id: I04d69720730440cb61e64fed2cb5065b2ff8bf90 Co-authored-by: Oualid Khelifi Co-authored-by: oramirez Co-authored-by: Sreelakshmi Haridas Maruthur --- .../images/kernel_read_write/CMakeLists.txt | 2 + test_conformance/images/kernel_read_write/main.cpp | 143 ++- .../kernel_read_write/test_cl_ext_image_buffer.hpp | 124 +++ .../test_cl_ext_image_from_buffer.cpp | 1007 ++++++++++++++++++++ .../test_cl_ext_image_requirements_info.cpp | 482 ++++++++++ 5 files changed, 1753 insertions(+), 5 deletions(-) create mode 100644 test_conformance/images/kernel_read_write/test_cl_ext_image_buffer.hpp create mode 100644 test_conformance/images/kernel_read_write/test_cl_ext_image_from_buffer.cpp create mode 100644 test_conformance/images/kernel_read_write/test_cl_ext_image_requirements_info.cpp diff --git a/test_conformance/images/kernel_read_write/CMakeLists.txt b/test_conformance/images/kernel_read_write/CMakeLists.txt index 54449875..6eb5dc7f 100644 --- a/test_conformance/images/kernel_read_write/CMakeLists.txt +++ b/test_conformance/images/kernel_read_write/CMakeLists.txt @@ -14,6 +14,8 @@ set(${MODULE_NAME}_SOURCES test_write_1D_array.cpp test_write_2D_array.cpp test_write_3D.cpp + test_cl_ext_image_requirements_info.cpp + test_cl_ext_image_from_buffer.cpp ../common.cpp ) diff --git a/test_conformance/images/kernel_read_write/main.cpp b/test_conformance/images/kernel_read_write/main.cpp index 31dceb33..0a93a974 100644 --- a/test_conformance/images/kernel_read_write/main.cpp +++ b/test_conformance/images/kernel_read_write/main.cpp @@ -53,6 +53,43 @@ static void printUsage( const char *execName ); extern int test_image_set( cl_device_id device, cl_context context, cl_command_queue queue, test_format_set_fn formatTestFn, cl_mem_object_type imageType ); +extern int cl_image_requirements_size_ext_negative(cl_device_id device, + cl_context context, + cl_command_queue queue); +extern int cl_image_requirements_size_ext_consistency(cl_device_id device, + cl_context context, + cl_command_queue queue); +extern int clGetImageRequirementsInfoEXT_negative(cl_device_id device, + cl_context context, + cl_command_queue queue); +extern int cl_image_requirements_max_val_ext_negative(cl_device_id device, + cl_context context, + cl_command_queue queue); +extern int cl_image_requirements_max_val_ext_positive(cl_device_id device, + cl_context context, + cl_command_queue queue); + +extern int image2d_from_buffer_positive(cl_device_id device, cl_context context, + cl_command_queue queue); +extern int memInfo_image_from_buffer_positive(cl_device_id device, + cl_context context, + cl_command_queue queue); +extern int imageInfo_image_from_buffer_positive(cl_device_id device, + cl_context context, + cl_command_queue queue); +extern int image_from_buffer_alignment_negative(cl_device_id device, + cl_context context, + cl_command_queue queue); +extern int image_from_small_buffer_negative(cl_device_id device, + cl_context context, + cl_command_queue queue); +extern int image_from_buffer_fill_positive(cl_device_id device, + cl_context context, + cl_command_queue queue); +extern int image_from_buffer_read_positive(cl_device_id device, + cl_context context, + cl_command_queue queue); + /** read_write images only support sampler-less read buildt-ins which require special settings * for some global parameters. This pair of functions temporarily overwrite those global parameters * and then recover them after completing a read_write test. @@ -246,12 +283,108 @@ int test_2Darray(cl_device_id device, cl_context context, cl_command_queue queue return doTest( device, context, queue, CL_MEM_OBJECT_IMAGE2D_ARRAY ); } +int test_cl_image_requirements_size_ext_negative(cl_device_id device, + cl_context context, + cl_command_queue queue, + int num_elements) +{ + return cl_image_requirements_size_ext_negative(device, context, queue); +} +int test_cl_image_requirements_size_ext_consistency(cl_device_id device, + cl_context context, + cl_command_queue queue, + int num_elements) +{ + return cl_image_requirements_size_ext_consistency(device, context, queue); +} +int test_clGetImageRequirementsInfoEXT_negative(cl_device_id device, + cl_context context, + cl_command_queue queue, + int num_elements) +{ + return clGetImageRequirementsInfoEXT_negative(device, context, queue); +} +int test_cl_image_requirements_max_val_ext_negative(cl_device_id device, + cl_context context, + cl_command_queue queue, + int num_elements) +{ + return cl_image_requirements_max_val_ext_negative(device, context, queue); +} +int test_cl_image_requirements_max_val_ext_positive(cl_device_id device, + cl_context context, + cl_command_queue queue, + int num_elements) +{ + return cl_image_requirements_max_val_ext_positive(device, context, queue); +} + +int test_image2d_from_buffer_positive(cl_device_id device, cl_context context, + cl_command_queue queue, int num_elements) +{ + return image2d_from_buffer_positive(device, context, queue); +} +int test_memInfo_image_from_buffer_positive(cl_device_id device, + cl_context context, + cl_command_queue queue, + int num_elements) +{ + return memInfo_image_from_buffer_positive(device, context, queue); +} +int test_imageInfo_image_from_buffer_positive(cl_device_id device, + cl_context context, + cl_command_queue queue, + int num_elements) +{ + return imageInfo_image_from_buffer_positive(device, context, queue); +} +int test_image_from_buffer_alignment_negative(cl_device_id device, + cl_context context, + cl_command_queue queue, + int num_elements) +{ + return image_from_buffer_alignment_negative(device, context, queue); +} +int test_image_from_small_buffer_negative(cl_device_id device, + cl_context context, + cl_command_queue queue, + int num_elements) +{ + return image_from_small_buffer_negative(device, context, queue); +} +int test_image_from_buffer_fill_positive(cl_device_id device, + cl_context context, + cl_command_queue queue, + int num_elements) +{ + return image_from_buffer_fill_positive(device, context, queue); +} +int test_image_from_buffer_read_positive(cl_device_id device, + cl_context context, + cl_command_queue queue, + int num_elements) +{ + return image_from_buffer_read_positive(device, context, queue); +} + test_definition test_list[] = { - ADD_TEST( 1D ), - ADD_TEST( 2D ), - ADD_TEST( 3D ), - ADD_TEST( 1Darray ), - ADD_TEST( 2Darray ), + ADD_TEST(1D), + ADD_TEST(2D), + ADD_TEST(3D), + ADD_TEST(1Darray), + ADD_TEST(2Darray), + ADD_TEST_VERSION(cl_image_requirements_size_ext_negative, Version(3, 0)), + ADD_TEST_VERSION(cl_image_requirements_size_ext_consistency, Version(3, 0)), + ADD_TEST_VERSION(clGetImageRequirementsInfoEXT_negative, Version(3, 0)), + ADD_TEST_VERSION(cl_image_requirements_max_val_ext_negative, Version(3, 0)), + ADD_TEST_VERSION(cl_image_requirements_max_val_ext_positive, Version(3, 0)), + ADD_TEST_VERSION(image2d_from_buffer_positive, Version(3, 0)), + ADD_TEST_VERSION(memInfo_image_from_buffer_positive, Version(3, 0)), + ADD_TEST_VERSION(imageInfo_image_from_buffer_positive, Version(3, 0)), + ADD_TEST_VERSION(image_from_buffer_alignment_negative, Version(3, 0)), + ADD_TEST_VERSION(image_from_small_buffer_negative, Version(3, 0)), + ADD_TEST_VERSION(image_from_buffer_fill_positive, Version(3, 0)), + ADD_TEST_VERSION(image_from_buffer_read_positive, Version(3, 0)), }; const int test_num = ARRAY_SIZE( test_list ); diff --git a/test_conformance/images/kernel_read_write/test_cl_ext_image_buffer.hpp b/test_conformance/images/kernel_read_write/test_cl_ext_image_buffer.hpp new file mode 100644 index 00000000..c6646330 --- /dev/null +++ b/test_conformance/images/kernel_read_write/test_cl_ext_image_buffer.hpp @@ -0,0 +1,124 @@ +// +// Copyright (c) 2022 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#ifndef _TEST_CL_EXT_IMAGE_BUFFER +#define _TEST_CL_EXT_IMAGE_BUFFER + +#define TEST_IMAGE_SIZE 20 + +#define GET_EXTENSION_FUNC(platform, function_name) \ + function_name##_fn function_name = reinterpret_cast( \ + clGetExtensionFunctionAddressForPlatform(platform, #function_name)); \ + if (function_name == nullptr) \ + { \ + return TEST_FAIL; \ + } \ + do \ + { \ + } while (false) + +static inline size_t aligned_size(size_t size, size_t alignment) +{ + return (size + alignment - 1) & ~(alignment - 1); +} + +static inline void* aligned_ptr(void* ptr, size_t alignment) +{ + return (void*)(((uintptr_t)ptr + alignment - 1) & ~(alignment - 1)); +} + +static inline size_t get_format_size(cl_context context, + cl_image_format* format, + cl_mem_object_type imageType, + cl_mem_flags flags) +{ + cl_image_desc image_desc = { 0 }; + image_desc.image_type = imageType; + + /* Size 1 only to query element size */ + image_desc.image_width = 1; + if (CL_MEM_OBJECT_IMAGE1D_BUFFER != imageType + && CL_MEM_OBJECT_IMAGE1D != imageType) + { + image_desc.image_height = 1; + } + if (CL_MEM_OBJECT_IMAGE3D == imageType + || CL_MEM_OBJECT_IMAGE2D_ARRAY == imageType) + { + image_desc.image_depth = 1; + } + if (CL_MEM_OBJECT_IMAGE1D_ARRAY == imageType + || CL_MEM_OBJECT_IMAGE2D_ARRAY == imageType) + { + image_desc.image_array_size = 1; + } + + cl_int error = 0; + cl_mem buffer; + if (imageType == CL_MEM_OBJECT_IMAGE1D_BUFFER) + { + buffer = clCreateBuffer(context, flags, + get_pixel_size(format) * image_desc.image_width, + NULL, &error); + test_error(error, "Unable to create buffer"); + + image_desc.buffer = buffer; + } + + cl_mem image = + clCreateImage(context, flags, format, &image_desc, nullptr, &error); + test_error(error, "Unable to create image"); + + size_t element_size = 0; + error = clGetImageInfo(image, CL_IMAGE_ELEMENT_SIZE, sizeof(element_size), + &element_size, nullptr); + test_error(error, "Error clGetImageInfo"); + + error = clReleaseMemObject(image); + test_error(error, "Unable to release image"); + + if (imageType == CL_MEM_OBJECT_IMAGE1D_BUFFER) + { + error = clReleaseMemObject(buffer); + test_error(error, "Unable to release buffer"); + } + + return element_size; +} + +static inline void image_desc_init(cl_image_desc* desc, + cl_mem_object_type imageType) +{ + desc->image_type = imageType; + desc->image_width = TEST_IMAGE_SIZE; + if (CL_MEM_OBJECT_IMAGE1D_BUFFER != imageType + && CL_MEM_OBJECT_IMAGE1D != imageType) + { + desc->image_height = TEST_IMAGE_SIZE; + } + if (CL_MEM_OBJECT_IMAGE3D == imageType + || CL_MEM_OBJECT_IMAGE2D_ARRAY == imageType) + { + desc->image_depth = TEST_IMAGE_SIZE; + } + if (CL_MEM_OBJECT_IMAGE1D_ARRAY == imageType + || CL_MEM_OBJECT_IMAGE2D_ARRAY == imageType) + { + desc->image_array_size = TEST_IMAGE_SIZE; + } +} + +#endif /* _TEST_CL_EXT_IMAGE_BUFFER */ \ No newline at end of file diff --git a/test_conformance/images/kernel_read_write/test_cl_ext_image_from_buffer.cpp b/test_conformance/images/kernel_read_write/test_cl_ext_image_from_buffer.cpp new file mode 100644 index 00000000..1b3b04b7 --- /dev/null +++ b/test_conformance/images/kernel_read_write/test_cl_ext_image_from_buffer.cpp @@ -0,0 +1,1007 @@ +// +// Copyright (c) 2022 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#include "../testBase.h" +#include "../common.h" +#include "test_cl_ext_image_buffer.hpp" + +static int get_image_requirement_alignment( + cl_device_id device, cl_context context, cl_mem_flags flags, + const cl_image_format* image_format, const cl_image_desc* image_desc, + size_t* row_pitch_alignment, size_t* slice_pitch_alignment, + size_t* base_address_alignment) +{ + cl_platform_id platform = getPlatformFromDevice(device); + GET_EXTENSION_FUNC(platform, clGetImageRequirementsInfoEXT); + + cl_int err = CL_SUCCESS; + if (nullptr != row_pitch_alignment) + { + err = clGetImageRequirementsInfoEXT( + context, nullptr, flags, image_format, image_desc, + CL_IMAGE_REQUIREMENTS_ROW_PITCH_ALIGNMENT_EXT, + sizeof(*row_pitch_alignment), row_pitch_alignment, nullptr); + test_error(err, "Error getting alignment"); + } + + if (nullptr != slice_pitch_alignment && CL_SUCCESS == err) + { + err = clGetImageRequirementsInfoEXT( + context, nullptr, flags, image_format, image_desc, + CL_IMAGE_REQUIREMENTS_SLICE_PITCH_ALIGNMENT_EXT, + sizeof(*slice_pitch_alignment), slice_pitch_alignment, nullptr); + test_error(err, "Error getting alignment"); + } + + if (nullptr != base_address_alignment && CL_SUCCESS == err) + { + err = clGetImageRequirementsInfoEXT( + context, nullptr, flags, image_format, image_desc, + CL_IMAGE_REQUIREMENTS_BASE_ADDRESS_ALIGNMENT_EXT, + sizeof(*base_address_alignment), base_address_alignment, nullptr); + test_error(err, "Error getting alignment"); + } + + return TEST_PASS; +} + +/** + * Consistency with alignment requirements as returned by + * cl_khr_image2d_from_buffer Check that the returned values for + * CL_DEVICE_IMAGE_PITCH_ALIGNMENT and CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT + * are correct. + */ +int image2d_from_buffer_positive(cl_device_id device, cl_context context, + cl_command_queue queue) +{ + if (!is_extension_available(device, "cl_khr_image2d_from_buffer")) + { + printf("Extension cl_khr_image2d_from_buffer not available"); + return TEST_SKIPPED_ITSELF; + } + + std::vector imageTypes{ + CL_MEM_OBJECT_IMAGE1D, CL_MEM_OBJECT_IMAGE2D, + CL_MEM_OBJECT_IMAGE3D, CL_MEM_OBJECT_IMAGE1D_BUFFER, + CL_MEM_OBJECT_IMAGE1D_ARRAY, CL_MEM_OBJECT_IMAGE2D_ARRAY + }; + + std::vector flagTypes{ CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY, + CL_MEM_READ_WRITE, + CL_MEM_KERNEL_READ_AND_WRITE }; + + for (auto flag : flagTypes) + { + for (auto imageType : imageTypes) + { + /* Get the list of supported image formats */ + std::vector formatList; + if (TEST_PASS + != get_format_list(context, imageType, formatList, flag) + || formatList.size() == 0) + { + test_fail("Failure to get supported formats list"); + } + + cl_uint row_pitch_alignment_2d = 0; + cl_int err = + clGetDeviceInfo(device, CL_DEVICE_IMAGE_PITCH_ALIGNMENT, + sizeof(row_pitch_alignment_2d), + &row_pitch_alignment_2d, nullptr); + test_error(err, "Error clGetDeviceInfo"); + + cl_uint base_address_alignment_2d = 0; + err = + clGetDeviceInfo(device, CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT, + sizeof(base_address_alignment_2d), + &base_address_alignment_2d, nullptr); + test_error(err, "Error clGetDeviceInfo"); + + for (auto format : formatList) + { + cl_image_desc image_desc = { 0 }; + image_desc_init(&image_desc, imageType); + + flag = (flag == CL_MEM_KERNEL_READ_AND_WRITE) + ? CL_MEM_READ_WRITE + : flag; + + size_t row_pitch_alignment = 0; + size_t base_address_alignment = 0; + + int get_error = get_image_requirement_alignment( + device, context, 0, &format, &image_desc, + &row_pitch_alignment, nullptr, &base_address_alignment); + if (TEST_PASS != get_error) + { + return get_error; + } + + const size_t element_size = + get_format_size(context, &format, imageType, flag); + + /* Alignements in pixels vs bytes */ + if (base_address_alignment + > base_address_alignment_2d * element_size) + { + test_fail("Unexpected base_address_alignment"); + } + + if (row_pitch_alignment > row_pitch_alignment_2d * element_size) + { + test_fail("Unexpected row_pitch_alignment"); + } + } + } + } + + return TEST_PASS; +} + +/** + * Test clGetMemObjectInfo + * Check that CL_MEM_ASSOCIATED_MEMOBJECT correctly returns the buffer that was + * used. + */ +int memInfo_image_from_buffer_positive(cl_device_id device, cl_context context, + cl_command_queue queue) +{ + if (!is_extension_available(device, "cl_ext_image_requirements_info")) + { + printf("Extension cl_ext_image_requirements_info not available"); + return TEST_SKIPPED_ITSELF; + } + + if (!is_extension_available(device, "cl_ext_image_from_buffer")) + { + printf("Extension cl_ext_image_from_buffer not available"); + return TEST_SKIPPED_ITSELF; + } + + std::vector imageTypes{ + CL_MEM_OBJECT_IMAGE1D, CL_MEM_OBJECT_IMAGE2D, + CL_MEM_OBJECT_IMAGE3D, CL_MEM_OBJECT_IMAGE1D_BUFFER, + CL_MEM_OBJECT_IMAGE1D_ARRAY, CL_MEM_OBJECT_IMAGE2D_ARRAY + }; + + std::vector flagTypes{ CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY, + CL_MEM_READ_WRITE, + CL_MEM_KERNEL_READ_AND_WRITE }; + + for (auto flag : flagTypes) + { + for (auto imageType : imageTypes) + { + /* Get the list of supported image formats */ + std::vector formatList; + if (TEST_PASS + != get_format_list(context, imageType, formatList, flag) + || formatList.size() == 0) + { + test_fail("Failure to get supported formats list"); + } + + for (auto format : formatList) + { + cl_image_desc image_desc = { 0 }; + image_desc_init(&image_desc, imageType); + + flag = (flag == CL_MEM_KERNEL_READ_AND_WRITE) + ? CL_MEM_READ_WRITE + : flag; + + size_t row_pitch_alignment = 0; + size_t slice_pitch_alignment = 0; + + int get_error = get_image_requirement_alignment( + device, context, 0, &format, &image_desc, + &row_pitch_alignment, &slice_pitch_alignment, nullptr); + if (TEST_PASS != get_error) + { + return get_error; + } + + const size_t element_size = + get_format_size(context, &format, imageType, flag); + + const size_t row_pitch = aligned_size( + TEST_IMAGE_SIZE * element_size, row_pitch_alignment); + const size_t slice_pitch = aligned_size( + row_pitch * TEST_IMAGE_SIZE, slice_pitch_alignment); + + const size_t buffer_size = slice_pitch * TEST_IMAGE_SIZE; + + cl_int err = CL_SUCCESS; + cl_mem buffer = + clCreateBuffer(context, flag, buffer_size, nullptr, &err); + test_error(err, "Unable to create buffer"); + + image_desc.buffer = buffer; + + cl_mem image_buffer = clCreateImage(context, flag, &format, + &image_desc, nullptr, &err); + test_error(err, "Unable to create image"); + + cl_mem returned_buffer; + err = clGetMemObjectInfo( + image_buffer, CL_MEM_ASSOCIATED_MEMOBJECT, + sizeof(returned_buffer), &returned_buffer, nullptr); + test_error(err, "Error clGetMemObjectInfo"); + + if (returned_buffer != buffer) + { + test_fail("Unexpected CL_MEM_ASSOCIATED_MEMOBJECT buffer"); + } + + err = clReleaseMemObject(buffer); + test_error(err, "Unable to release buffer"); + + err = clReleaseMemObject(image_buffer); + test_error(err, "Unable to release image"); + } + } + } + + return TEST_PASS; +} + +/** + * Test clGetImageInfo + * Check that the returned values for CL_IMAGE_ROW_PITCH and + * CL_IMAGE_SLICE_PITCH are correct. + */ +int imageInfo_image_from_buffer_positive(cl_device_id device, + cl_context context, + cl_command_queue queue) +{ + if (!is_extension_available(device, "cl_ext_image_requirements_info")) + { + printf("Extension cl_ext_image_requirements_info not available"); + return TEST_SKIPPED_ITSELF; + } + + if (!is_extension_available(device, "cl_ext_image_from_buffer")) + { + printf("Extension cl_ext_image_from_buffer not available"); + return TEST_SKIPPED_ITSELF; + } + + std::vector imageTypes{ + CL_MEM_OBJECT_IMAGE1D, CL_MEM_OBJECT_IMAGE2D, + CL_MEM_OBJECT_IMAGE3D, CL_MEM_OBJECT_IMAGE1D_BUFFER, + CL_MEM_OBJECT_IMAGE1D_ARRAY, CL_MEM_OBJECT_IMAGE2D_ARRAY + }; + + std::vector flagTypes{ CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY, + CL_MEM_READ_WRITE, + CL_MEM_KERNEL_READ_AND_WRITE }; + + for (auto flag : flagTypes) + { + for (auto imageType : imageTypes) + { + /* Get the list of supported image formats */ + std::vector formatList; + if (TEST_PASS + != get_format_list(context, imageType, formatList, flag) + || formatList.size() == 0) + { + test_fail("Failure to get supported formats list"); + } + + for (auto format : formatList) + { + cl_image_desc image_desc = { 0 }; + image_desc_init(&image_desc, imageType); + + flag = (flag == CL_MEM_KERNEL_READ_AND_WRITE) + ? CL_MEM_READ_WRITE + : flag; + + size_t row_pitch_alignment = 0; + size_t slice_pitch_alignment = 0; + + int get_error = get_image_requirement_alignment( + device, context, 0, &format, &image_desc, + &row_pitch_alignment, &slice_pitch_alignment, nullptr); + if (TEST_PASS != get_error) + { + return get_error; + } + + const size_t element_size = + get_format_size(context, &format, imageType, flag); + + const size_t row_pitch = aligned_size( + TEST_IMAGE_SIZE * element_size, row_pitch_alignment); + const size_t slice_pitch = aligned_size( + row_pitch * TEST_IMAGE_SIZE, slice_pitch_alignment); + + const size_t buffer_size = slice_pitch * TEST_IMAGE_SIZE; + + cl_int err = CL_SUCCESS; + cl_mem buffer = + clCreateBuffer(context, flag, buffer_size, nullptr, &err); + test_error(err, "Unable to create buffer"); + + image_desc.buffer = buffer; + + if (imageType == CL_MEM_OBJECT_IMAGE2D + || imageType == CL_MEM_OBJECT_IMAGE1D_ARRAY) + { + image_desc.image_row_pitch = row_pitch; + } + else if (imageType == CL_MEM_OBJECT_IMAGE3D + || imageType == CL_MEM_OBJECT_IMAGE2D_ARRAY) + { + image_desc.image_row_pitch = row_pitch; + image_desc.image_slice_pitch = slice_pitch; + } + + cl_mem image_buffer = clCreateImage(context, flag, &format, + &image_desc, nullptr, &err); + test_error(err, "Unable to create image"); + + if (imageType == CL_MEM_OBJECT_IMAGE3D + || imageType == CL_MEM_OBJECT_IMAGE2D_ARRAY + || imageType == CL_MEM_OBJECT_IMAGE2D + || imageType == CL_MEM_OBJECT_IMAGE1D_ARRAY) + { + size_t returned_row_pitch = 0; + err = clGetImageInfo(image_buffer, CL_IMAGE_ROW_PITCH, + sizeof(returned_row_pitch), + &returned_row_pitch, nullptr); + test_error(err, "Error clGetImageInfo"); + + if (returned_row_pitch != row_pitch) + { + test_fail( + "Unexpected row pitch " + "CL_IMAGE_REQUIREMENTS_ROW_PITCH_ALIGNMENT_EXT"); + } + } + + if (imageType == CL_MEM_OBJECT_IMAGE3D + || imageType == CL_MEM_OBJECT_IMAGE2D_ARRAY) + { + size_t returned_slice_pitch = 0; + err = clGetImageInfo(image_buffer, CL_IMAGE_SLICE_PITCH, + sizeof(returned_slice_pitch), + &returned_slice_pitch, nullptr); + test_error(err, "Error clGetImageInfo"); + + if (returned_slice_pitch != slice_pitch) + { + test_fail( + "Unexpected row pitch " + "CL_IMAGE_REQUIREMENTS_SLICE_PITCH_ALIGNMENT_EXT"); + } + } + + err = clReleaseMemObject(buffer); + test_error(err, "Unable to release buffer"); + + err = clReleaseMemObject(image_buffer); + test_error(err, "Unable to release image"); + } + } + } + + return TEST_PASS; +} + +/** + * Negative testing for clCreateImage and wrong alignment + * - Create an image from a buffer with invalid row pitch (not a multiple of + * required alignment) and check that CL_INVALID_IMAGE_DESCRIPTOR is returned. + * - Create an image from a buffer with invalid slice pitch (not a multiple of + * required alignment) and check that CL_INVALID_IMAGE_DESCRIPTOR is returned. + * - Create an image from a buffer with invalid base address alignment (not a + * multiple of required alignment) and check that CL_INVALID_IMAGE_DESCRIPTOR is + * returned + */ +int image_from_buffer_alignment_negative(cl_device_id device, + cl_context context, + cl_command_queue queue) +{ + if (!is_extension_available(device, "cl_ext_image_requirements_info")) + { + printf("Extension cl_ext_image_requirements_info not available"); + return TEST_SKIPPED_ITSELF; + } + + if (!is_extension_available(device, "cl_ext_image_from_buffer")) + { + printf("Extension cl_ext_image_from_buffer not available"); + return TEST_SKIPPED_ITSELF; + } + + std::vector imageTypes{ + CL_MEM_OBJECT_IMAGE1D, CL_MEM_OBJECT_IMAGE2D, + CL_MEM_OBJECT_IMAGE3D, CL_MEM_OBJECT_IMAGE1D_BUFFER, + CL_MEM_OBJECT_IMAGE1D_ARRAY, CL_MEM_OBJECT_IMAGE2D_ARRAY + }; + + std::vector flagTypes{ CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY, + CL_MEM_READ_WRITE, + CL_MEM_KERNEL_READ_AND_WRITE }; + + for (auto flag : flagTypes) + { + for (auto imageType : imageTypes) + { + /* Get the list of supported image formats */ + std::vector formatList; + if (TEST_PASS + != get_format_list(context, imageType, formatList, flag) + || formatList.size() == 0) + { + test_fail("Failure to get supported formats list"); + } + + for (auto format : formatList) + { + cl_image_desc image_desc = { 0 }; + image_desc_init(&image_desc, imageType); + + flag = (flag == CL_MEM_KERNEL_READ_AND_WRITE) + ? CL_MEM_READ_WRITE + : flag; + + size_t row_pitch_alignment = 0; + size_t slice_pitch_alignment = 0; + size_t base_address_alignment = 0; + + int get_error = get_image_requirement_alignment( + device, context, 0, &format, &image_desc, + &row_pitch_alignment, &slice_pitch_alignment, + &base_address_alignment); + if (TEST_PASS != get_error) + { + return get_error; + } + + const size_t element_size = + get_format_size(context, &format, imageType, flag); + + const size_t row_pitch = aligned_size( + TEST_IMAGE_SIZE * element_size, row_pitch_alignment); + const size_t slice_pitch = aligned_size( + row_pitch * TEST_IMAGE_SIZE, slice_pitch_alignment); + + const size_t buffer_size = (slice_pitch + 1) + * TEST_IMAGE_SIZE; /* For bigger row/slice pitch */ + + cl_int err = CL_SUCCESS; + cl_mem buffer = + clCreateBuffer(context, flag, buffer_size, nullptr, &err); + test_error(err, "Unable to create buffer"); + + /* Test Row pitch images */ + if (imageType == CL_MEM_OBJECT_IMAGE2D + || imageType == CL_MEM_OBJECT_IMAGE3D + || imageType == CL_MEM_OBJECT_IMAGE1D_ARRAY + || imageType == CL_MEM_OBJECT_IMAGE2D_ARRAY) + { + image_desc.buffer = buffer; + image_desc.image_row_pitch = + row_pitch + 1; /* wrong row pitch */ + + clCreateImage(context, flag, &format, &image_desc, nullptr, + &err); + test_failure_error(err, CL_INVALID_IMAGE_FORMAT_DESCRIPTOR, + "Unexpected clCreateImage return"); + } + + /* Test Slice pitch images */ + if (imageType == CL_MEM_OBJECT_IMAGE3D + || imageType == CL_MEM_OBJECT_IMAGE2D_ARRAY) + { + image_desc.buffer = buffer; + image_desc.image_row_pitch = row_pitch; + image_desc.image_slice_pitch = + slice_pitch + 1; /* wrong slice pitch */ + + clCreateImage(context, flag, &format, &image_desc, nullptr, + &err); + test_failure_error(err, CL_INVALID_IMAGE_FORMAT_DESCRIPTOR, + "Unexpected clCreateImage return"); + } + + /* Test buffer from host ptr to test base address alignment */ + const size_t aligned_buffer_size = + aligned_size(buffer_size, base_address_alignment); + /* Create buffer with host ptr and additional size for the wrong + * alignment */ + void* const host_ptr = + malloc(aligned_buffer_size + base_address_alignment); + void* non_aligned_host_ptr = + (void*)((char*)(aligned_ptr(host_ptr, + base_address_alignment)) + + 1); /* wrong alignment */ + + cl_mem buffer_host = clCreateBuffer( + context, CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE, + buffer_size, non_aligned_host_ptr, &err); + test_error(err, "Unable to create buffer"); + + image_desc.buffer = buffer_host; + + clCreateImage(context, flag, &format, &image_desc, nullptr, + &err); + test_failure_error(err, CL_INVALID_IMAGE_FORMAT_DESCRIPTOR, + "Unexpected clCreateImage return"); + + free(host_ptr); + + err = clReleaseMemObject(buffer); + test_error(err, "Unable to release buffer"); + + err = clReleaseMemObject(buffer_host); + test_error(err, "Unable to release buffer"); + } + } + } + + return TEST_PASS; +} + +/** + * Negative testing for clCreateImage (buffer size). + * Create a buffer too small and check that image creation from that buffer is + * rejected + */ +int image_from_small_buffer_negative(cl_device_id device, cl_context context, + cl_command_queue queue) +{ + if (!is_extension_available(device, "cl_ext_image_requirements_info")) + { + printf("Extension cl_ext_image_requirements_info not available"); + return TEST_SKIPPED_ITSELF; + } + + if (!is_extension_available(device, "cl_ext_image_from_buffer")) + { + printf("Extension cl_ext_image_from_buffer not available"); + return TEST_SKIPPED_ITSELF; + } + + std::vector imageTypes{ + CL_MEM_OBJECT_IMAGE1D, CL_MEM_OBJECT_IMAGE2D, + CL_MEM_OBJECT_IMAGE1D_BUFFER, CL_MEM_OBJECT_IMAGE3D, + CL_MEM_OBJECT_IMAGE1D_ARRAY, CL_MEM_OBJECT_IMAGE2D_ARRAY + }; + + std::vector flagTypes{ CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY, + CL_MEM_READ_WRITE, + CL_MEM_KERNEL_READ_AND_WRITE }; + + for (auto flag : flagTypes) + { + for (auto imageType : imageTypes) + { + /* Get the list of supported image formats */ + std::vector formatList; + if (TEST_PASS + != get_format_list(context, imageType, formatList, flag) + || formatList.size() == 0) + { + test_fail("Failure to get supported formats list"); + } + + for (auto format : formatList) + { + cl_image_desc image_desc = { 0 }; + image_desc_init(&image_desc, imageType); + + flag = (flag == CL_MEM_KERNEL_READ_AND_WRITE) + ? CL_MEM_READ_WRITE + : flag; + + /* Invalid buffer size */ + cl_int err; + cl_mem buffer = clCreateBuffer( + context, flag, TEST_IMAGE_SIZE / 2, nullptr, &err); + test_error(err, "Unable to create buffer"); + + image_desc.buffer = buffer; + + clCreateImage(context, flag, &format, &image_desc, nullptr, + &err); + test_failure_error(err, CL_INVALID_MEM_OBJECT, + "Unexpected clCreateImage return"); + + err = clReleaseMemObject(buffer); + test_error(err, "Unable to release buffer"); + } + } + } + + return TEST_PASS; +} + +static int image_from_buffer_fill_check(cl_command_queue queue, cl_mem image, + size_t* region, size_t element_size, + char pattern) +{ + /* read the image from buffer and check the pattern */ + const size_t image_size = region[0] * region[1] * region[2] * element_size; + size_t origin[3] = { 0, 0, 0 }; + std::vector read_buffer(image_size); + + cl_int error = + clEnqueueReadImage(queue, image, CL_BLOCKING, origin, region, 0, 0, + read_buffer.data(), 0, nullptr, nullptr); + test_error(error, "Error clEnqueueReadImage"); + + for (size_t line = 0; line < region[0]; line++) + { + for (size_t row = 0; row < region[1]; row++) + { + for (size_t depth = 0; depth < region[2]; depth++) + { + for (size_t elmt = 0; elmt < element_size; elmt++) + { + size_t index = line * row * depth * elmt; + + if (read_buffer[index] != pattern) + { + test_fail("Image pattern check failed"); + } + } + } + } + } + + return TEST_PASS; +} + +/** + * Use fill buffer to fill the image from buffer + */ +int image_from_buffer_fill_positive(cl_device_id device, cl_context context, + cl_command_queue queue) +{ + if (!is_extension_available(device, "cl_ext_image_requirements_info")) + { + printf("Extension cl_ext_image_requirements_info not available"); + return TEST_SKIPPED_ITSELF; + } + + if (!is_extension_available(device, "cl_ext_image_from_buffer")) + { + printf("Extension cl_ext_image_from_buffer not available"); + return TEST_SKIPPED_ITSELF; + } + + std::vector imageTypes{ + CL_MEM_OBJECT_IMAGE1D, CL_MEM_OBJECT_IMAGE2D, + CL_MEM_OBJECT_IMAGE3D, CL_MEM_OBJECT_IMAGE1D_BUFFER, + CL_MEM_OBJECT_IMAGE1D_ARRAY, CL_MEM_OBJECT_IMAGE2D_ARRAY + }; + + std::vector flagTypes{ CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY, + CL_MEM_READ_WRITE, + CL_MEM_KERNEL_READ_AND_WRITE }; + + for (auto flag : flagTypes) + { + for (auto imageType : imageTypes) + { + /* Get the list of supported image formats */ + std::vector formatList; + if (TEST_PASS + != get_format_list(context, imageType, formatList, flag) + || formatList.size() == 0) + { + test_fail("Failure to get supported formats list"); + } + + for (auto format : formatList) + { + cl_image_desc image_desc = { 0 }; + image_desc_init(&image_desc, imageType); + + flag = (flag == CL_MEM_KERNEL_READ_AND_WRITE) + ? CL_MEM_READ_WRITE + : flag; + + size_t row_pitch_alignment = 0; + size_t slice_pitch_alignment = 0; + + int get_error = get_image_requirement_alignment( + device, context, 0, &format, &image_desc, + &row_pitch_alignment, &slice_pitch_alignment, nullptr); + if (TEST_PASS != get_error) + { + return get_error; + } + + const size_t element_size = + get_format_size(context, &format, imageType, flag); + + const size_t row_pitch = aligned_size( + TEST_IMAGE_SIZE * element_size, row_pitch_alignment); + const size_t slice_pitch = aligned_size( + row_pitch * TEST_IMAGE_SIZE, slice_pitch_alignment); + + const size_t buffer_size = slice_pitch * TEST_IMAGE_SIZE; + + cl_int err = CL_SUCCESS; + cl_mem buffer = + clCreateBuffer(context, flag, buffer_size, nullptr, &err); + test_error(err, "Unable to create buffer"); + + /* fill the buffer with a pattern */ + const char pattern = 0x55; + err = clEnqueueFillBuffer(queue, buffer, &pattern, + sizeof(pattern), 0, buffer_size, 0, + nullptr, nullptr); + test_error(err, "Error clEnqueueFillBuffer"); + + err = clFinish(queue); + test_error(err, "Error clFinish"); + + cl_mem image1d_buffer; + if (imageType == CL_MEM_OBJECT_IMAGE1D_BUFFER) + { + image1d_buffer = clCreateBuffer(context, flag, buffer_size, + nullptr, &err); + test_error(err, "Unable to create buffer"); + + image_desc.buffer = image1d_buffer; + } + + cl_mem image = clCreateImage(context, flag, &format, + &image_desc, nullptr, &err); + test_error(err, "Unable to create image"); + + /* Check the image from buffer */ + image_desc.buffer = buffer; + + if (imageType == CL_MEM_OBJECT_IMAGE2D + || imageType == CL_MEM_OBJECT_IMAGE1D_ARRAY) + { + image_desc.image_row_pitch = row_pitch; + } + else if (imageType == CL_MEM_OBJECT_IMAGE3D + || imageType == CL_MEM_OBJECT_IMAGE2D_ARRAY) + { + image_desc.image_row_pitch = row_pitch; + image_desc.image_slice_pitch = slice_pitch; + } + + cl_mem image_from_buffer = clCreateImage( + context, flag, &format, &image_desc, nullptr, &err); + test_error(err, "Unable to create image"); + + size_t origin[3] = { 0, 0, 0 }; + size_t region[3] = { 1, 1, 1 }; + + region[0] = TEST_IMAGE_SIZE; + if (CL_MEM_OBJECT_IMAGE1D_BUFFER != imageType + && CL_MEM_OBJECT_IMAGE1D != imageType) + { + region[1] = TEST_IMAGE_SIZE; + } + if (CL_MEM_OBJECT_IMAGE3D == imageType + || CL_MEM_OBJECT_IMAGE2D_ARRAY == imageType) + { + region[2] = TEST_IMAGE_SIZE; + } + + /* Check the copy of the image from buffer */ + err = + clEnqueueCopyImage(queue, image_from_buffer, image, origin, + origin, region, 0, nullptr, nullptr); + test_error(err, "Error clEnqueueCopyImage"); + + err = clFinish(queue); + test_error(err, "Error clFinish"); + + int fill_error = image_from_buffer_fill_check( + queue, image_from_buffer, region, element_size, pattern); + if (TEST_PASS != fill_error) + { + return fill_error; + } + + fill_error = image_from_buffer_fill_check( + queue, image, region, element_size, pattern); + if (TEST_PASS != fill_error) + { + return fill_error; + } + + err = clReleaseMemObject(buffer); + test_error(err, "Unable to release buffer"); + + err = clReleaseMemObject(image); + test_error(err, "Unable to release image"); + + err = clReleaseMemObject(image_from_buffer); + test_error(err, "Unable to release image"); + + if (imageType == CL_MEM_OBJECT_IMAGE1D_BUFFER) + { + err = clReleaseMemObject(image1d_buffer); + test_error(err, "Unable to release image"); + } + } + } + } + + return TEST_PASS; +} + +static int image_from_buffer_read_check(cl_command_queue queue, cl_mem buffer, + const size_t buffer_size, + size_t* region, size_t element_size, + char pattern, size_t row_pitch, + size_t slice_pitch) +{ + /* read the buffer and check the pattern */ + std::vector host_buffer(buffer_size); + char* host_ptr = host_buffer.data(); + char* host_ptr_slice = host_ptr; + + cl_int error = + clEnqueueReadBuffer(queue, buffer, CL_BLOCKING, 0, buffer_size, + host_buffer.data(), 0, nullptr, nullptr); + test_error(error, "Error clEnqueueReadBuffer"); + + for (size_t k = 0; k < region[2]; k++) + { + for (size_t i = 0; i < region[1]; i++) + { + for (size_t j = 0; j < region[0] * element_size; j++) + { + if (host_ptr[j] != pattern) + { + test_fail("Image pattern check failed"); + } + } + host_ptr = host_ptr + row_pitch; + } + host_ptr_slice = host_ptr_slice + slice_pitch; + host_ptr = host_ptr_slice; + } + + return TEST_PASS; +} + +/** + * Use fill image to fill the buffer that was used to create the image + */ +int image_from_buffer_read_positive(cl_device_id device, cl_context context, + cl_command_queue queue) +{ + if (!is_extension_available(device, "cl_ext_image_requirements_info")) + { + printf("Extension cl_ext_image_requirements_info not available"); + return TEST_SKIPPED_ITSELF; + } + + if (!is_extension_available(device, "cl_ext_image_from_buffer")) + { + printf("Extension cl_ext_image_from_buffer not available"); + return TEST_SKIPPED_ITSELF; + } + + std::vector imageTypes{ + CL_MEM_OBJECT_IMAGE1D, CL_MEM_OBJECT_IMAGE2D, + CL_MEM_OBJECT_IMAGE3D, CL_MEM_OBJECT_IMAGE1D_BUFFER, + CL_MEM_OBJECT_IMAGE1D_ARRAY, CL_MEM_OBJECT_IMAGE2D_ARRAY + }; + + for (auto imageType : imageTypes) + { + cl_image_desc image_desc = { 0 }; + image_desc_init(&image_desc, imageType); + + /* Non normalized format so we can read it back directly from + * clEnqueueFillImage */ + cl_image_format format = { CL_RGBA, CL_UNSIGNED_INT8 }; + const char pattern = 0x55; + + const size_t element_size = + get_format_size(context, &format, imageType, CL_MEM_READ_WRITE); + + size_t row_pitch_alignment = 0; + size_t slice_pitch_alignment = 0; + + int get_error = get_image_requirement_alignment( + device, context, CL_MEM_READ_WRITE, &format, &image_desc, + &row_pitch_alignment, &slice_pitch_alignment, nullptr); + if (TEST_PASS != get_error) + { + return get_error; + } + + const size_t row_pitch = + aligned_size(TEST_IMAGE_SIZE * element_size, row_pitch_alignment); + const size_t slice_pitch = + aligned_size(row_pitch * TEST_IMAGE_SIZE, slice_pitch_alignment); + + const size_t buffer_size = slice_pitch * TEST_IMAGE_SIZE; + + cl_int err = CL_SUCCESS; + cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, buffer_size, + nullptr, &err); + test_error(err, "Unable to create buffer"); + + /* Check the image from buffer */ + image_desc.buffer = buffer; + + if (imageType == CL_MEM_OBJECT_IMAGE2D + || imageType == CL_MEM_OBJECT_IMAGE1D_ARRAY) + { + image_desc.image_row_pitch = row_pitch; + } + else if (imageType == CL_MEM_OBJECT_IMAGE3D + || imageType == CL_MEM_OBJECT_IMAGE2D_ARRAY) + { + image_desc.image_row_pitch = row_pitch; + image_desc.image_slice_pitch = slice_pitch; + } + + cl_mem image = clCreateImage(context, CL_MEM_READ_WRITE, &format, + &image_desc, nullptr, &err); + test_error(err, "Unable to create image"); + + size_t origin[3] = { 0, 0, 0 }; + size_t region[3] = { 1, 1, 1 }; + + region[0] = TEST_IMAGE_SIZE; + if (CL_MEM_OBJECT_IMAGE1D_BUFFER != imageType + && CL_MEM_OBJECT_IMAGE1D != imageType) + { + region[1] = TEST_IMAGE_SIZE; + } + if (CL_MEM_OBJECT_IMAGE3D == imageType + || CL_MEM_OBJECT_IMAGE2D_ARRAY == imageType) + { + region[2] = TEST_IMAGE_SIZE; + } + + /* fill the image with a pattern */ + cl_uint fill_color[4] = { pattern, pattern, pattern, pattern }; + err = clEnqueueFillImage(queue, image, fill_color, origin, region, 0, + nullptr, nullptr); + test_error(err, "Error clEnqueueFillImage"); + + err = clFinish(queue); + test_error(err, "Error clFinish"); + + int read_error = image_from_buffer_read_check( + queue, buffer, buffer_size, region, element_size, pattern, + (imageType == CL_MEM_OBJECT_IMAGE1D_ARRAY) ? slice_pitch + : row_pitch, + slice_pitch); + if (TEST_PASS != read_error) + { + return read_error; + } + + err = clReleaseMemObject(buffer); + test_error(err, "Unable to release buffer"); + + err = clReleaseMemObject(image); + test_error(err, "Unable to release image"); + } + + return TEST_PASS; +} \ No newline at end of file diff --git a/test_conformance/images/kernel_read_write/test_cl_ext_image_requirements_info.cpp b/test_conformance/images/kernel_read_write/test_cl_ext_image_requirements_info.cpp new file mode 100644 index 00000000..9212fcbc --- /dev/null +++ b/test_conformance/images/kernel_read_write/test_cl_ext_image_requirements_info.cpp @@ -0,0 +1,482 @@ +// +// Copyright (c) 2022 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +#include "../testBase.h" +#include "../common.h" +#include "test_cl_ext_image_buffer.hpp" + +/** + * Negative tests for {CL_IMAGE_REQUIREMENTS_SIZE_EXT} + * Check that attempting to perform the {CL_IMAGE_REQUIREMENTS_SIZE_EXT} query + * without specifying the _image_format_ results in {CL_INVALID_VALUE} being + * returned. Check that attempting to perform the + * {CL_IMAGE_REQUIREMENTS_SIZE_EXT} query without specifying the _image_desc_ + * results in {CL_INVALID_VALUE} being returned. + */ +int cl_image_requirements_size_ext_negative(cl_device_id device, + cl_context context, + cl_command_queue queue) +{ + if (!is_extension_available(device, "cl_ext_image_requirements_info")) + { + printf("Extension cl_ext_image_requirements_info not available"); + return TEST_SKIPPED_ITSELF; + } + + cl_platform_id platform = getPlatformFromDevice(device); + GET_EXTENSION_FUNC(platform, clGetImageRequirementsInfoEXT); + + size_t max_size = 0; + size_t param_val_size = 0; + + cl_image_desc image_desc = { 0 }; + image_desc_init(&image_desc, CL_MEM_OBJECT_IMAGE2D); + + cl_image_format format = { CL_RGBA, CL_UNSIGNED_INT16 }; + + /* Check image_format null results in CL_INVALID_VALUE */ + cl_int err = clGetImageRequirementsInfoEXT( + context, nullptr, CL_MEM_READ_WRITE, nullptr, &image_desc, + CL_IMAGE_REQUIREMENTS_SIZE_EXT, sizeof(max_size), &max_size, + ¶m_val_size); + test_failure_error(err, CL_INVALID_VALUE, + "Unexpected clGetImageRequirementsInfoEXT return"); + + /* Check image_desc null results in CL_INVALID_VALUE */ + err = clGetImageRequirementsInfoEXT( + context, nullptr, CL_MEM_READ_WRITE, &format, nullptr, + CL_IMAGE_REQUIREMENTS_SIZE_EXT, sizeof(max_size), &max_size, + ¶m_val_size); + test_failure_error(err, CL_INVALID_VALUE, + "Unexpected clGetImageRequirementsInfoEXT return"); + + return TEST_PASS; +} + +/** + * Consistency checks for CL_IMAGE_REQUIREMENTS_SIZE_EXT + * When creating 2D images from a buffer is supported + * Check that the CL_IMAGE_REQUIREMENTS_SIZE_EXT query can be performed + * successfully. Create a buffer with the size returned and check that an image + * can successfully be created from the buffer. Check that the value returned + * for CL_MEM_SIZE for the image is the same as the value returned for + * CL_IMAGE_REQUIREMENTS_SIZE_EXT. + */ +int cl_image_requirements_size_ext_consistency(cl_device_id device, + cl_context context, + cl_command_queue queue) +{ + if (!is_extension_available(device, "cl_ext_image_requirements_info")) + { + printf("Extension cl_ext_image_requirements_info not available"); + return TEST_SKIPPED_ITSELF; + } + + if (!is_extension_available(device, "cl_ext_image_from_buffer")) + { + printf("Extension cl_ext_image_from_buffer not available"); + return TEST_SKIPPED_ITSELF; + } + + cl_platform_id platform = getPlatformFromDevice(device); + GET_EXTENSION_FUNC(platform, clGetImageRequirementsInfoEXT); + + size_t max_size = 0; + size_t param_val_size = 0; + + std::vector imageTypes{ + CL_MEM_OBJECT_IMAGE1D, CL_MEM_OBJECT_IMAGE2D, + CL_MEM_OBJECT_IMAGE3D, CL_MEM_OBJECT_IMAGE1D_BUFFER, + CL_MEM_OBJECT_IMAGE1D_ARRAY, CL_MEM_OBJECT_IMAGE2D_ARRAY + }; + + std::vector flagTypes{ CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY, + CL_MEM_READ_WRITE, + CL_MEM_KERNEL_READ_AND_WRITE }; + + for (auto flag : flagTypes) + { + for (auto imageType : imageTypes) + { + /* Get the list of supported image formats */ + std::vector formatList; + if (TEST_PASS + != get_format_list(context, imageType, formatList, flag) + || formatList.size() == 0) + { + test_fail("Failure to get supported formats list"); + } + + for (auto format : formatList) + { + cl_image_desc image_desc = { 0 }; + image_desc_init(&image_desc, imageType); + + flag = (flag == CL_MEM_KERNEL_READ_AND_WRITE) + ? CL_MEM_READ_WRITE + : flag; + + cl_int err = clGetImageRequirementsInfoEXT( + context, nullptr, flag, &format, &image_desc, + CL_IMAGE_REQUIREMENTS_SIZE_EXT, sizeof(max_size), &max_size, + ¶m_val_size); + test_error(err, "Error clGetImageRequirementsInfoEXT"); + + /* Create buffer */ + cl_mem buffer = + clCreateBuffer(context, flag, max_size, nullptr, &err); + test_error(err, "Unable to create buffer"); + + image_desc.buffer = buffer; + + /* 2D Image from buffer */ + cl_mem image_buffer = clCreateImage(context, flag, &format, + &image_desc, nullptr, &err); + test_error(err, "Unable to create image"); + + size_t size = 0; + err = clGetMemObjectInfo(image_buffer, CL_MEM_SIZE, + sizeof(size_t), &size, NULL); + test_error(err, "Error clGetMemObjectInfo"); + + if (max_size != size) + { + test_fail("CL_IMAGE_REQUIREMENTS_SIZE_EXT different from " + "CL_MEM_SIZE"); + } + + err = clReleaseMemObject(image_buffer); + test_error(err, "Error clReleaseMemObject"); + + err = clReleaseMemObject(buffer); + test_error(err, "Error clReleaseMemObject"); + } + } + } + + return TEST_PASS; +} + +/** + * Negative testing for all testable error codes returned by + * clGetImageFormatInfoKHR + */ +int clGetImageRequirementsInfoEXT_negative(cl_device_id device, + cl_context context, + cl_command_queue queue) +{ + if (!is_extension_available(device, "cl_ext_image_requirements_info")) + { + printf("Extension cl_ext_image_requirements_info not available"); + return TEST_SKIPPED_ITSELF; + } + + cl_platform_id platform = getPlatformFromDevice(device); + GET_EXTENSION_FUNC(platform, clGetImageRequirementsInfoEXT); + + cl_image_desc image_desc = { 0 }; + image_desc_init(&image_desc, CL_MEM_OBJECT_IMAGE3D); + + cl_image_format format = { CL_RGBA, CL_UNSIGNED_INT16 }; + + /* Check that CL_INVALID_CONTEXT is returned when passing nullptr as context + */ + size_t row_pitch_alignment = 0; + cl_int err = clGetImageRequirementsInfoEXT( + nullptr, nullptr, CL_MEM_READ_WRITE, &format, &image_desc, + CL_IMAGE_REQUIREMENTS_ROW_PITCH_ALIGNMENT_EXT, + sizeof(row_pitch_alignment), &row_pitch_alignment, nullptr); + test_failure_error(err, CL_INVALID_CONTEXT, + "Unexpected clGetImageRequirementsInfoEXT return"); + + /* Check that CL_INVALID_VALUE is returned when passing an invalid + * image_type */ + cl_image_desc invalid_desc = { CL_MEM_OBJECT_BUFFER, TEST_IMAGE_SIZE }; + err = clGetImageRequirementsInfoEXT( + context, nullptr, CL_MEM_READ_WRITE, &format, &invalid_desc, + CL_IMAGE_REQUIREMENTS_ROW_PITCH_ALIGNMENT_EXT, + sizeof(row_pitch_alignment), &row_pitch_alignment, nullptr); + test_failure_error(err, CL_INVALID_IMAGE_DESCRIPTOR, + "Unexpected clGetImageRequirementsInfoEXT return"); + + /* Check that CL_INVALID_VALUE is returned when passing invalid flags */ + err = clGetImageRequirementsInfoEXT( + context, nullptr, -1, &format, &image_desc, + CL_IMAGE_REQUIREMENTS_ROW_PITCH_ALIGNMENT_EXT, + sizeof(row_pitch_alignment), &row_pitch_alignment, nullptr); + test_failure_error(err, CL_INVALID_VALUE, + "Unexpected clGetImageRequirementsInfoEXT return"); + + /* Check that CL_INVALID_IMAGE_FORMAT_DESCRIPTOR is returned when passing a + * nullptr image_format */ + cl_image_format invalid_format = { CL_INTENSITY, CL_UNORM_SHORT_555 }; + err = clGetImageRequirementsInfoEXT( + context, nullptr, CL_MEM_READ_WRITE, &invalid_format, &image_desc, + CL_IMAGE_REQUIREMENTS_ROW_PITCH_ALIGNMENT_EXT, + sizeof(row_pitch_alignment), &row_pitch_alignment, nullptr); + test_failure_error(err, CL_INVALID_IMAGE_FORMAT_DESCRIPTOR, + "Unexpected clGetImageRequirementsInfoEXT return"); + + /* Check that CL_INVALID_IMAGE_DESCRIPTOR is returned when passing an + * image_desc with invalid values */ + cl_image_desc invalid_desc_size = { CL_MEM_OBJECT_IMAGE1D, 0 }; + err = clGetImageRequirementsInfoEXT( + context, nullptr, CL_MEM_READ_WRITE, &format, &invalid_desc_size, + CL_IMAGE_REQUIREMENTS_ROW_PITCH_ALIGNMENT_EXT, + sizeof(row_pitch_alignment), &row_pitch_alignment, nullptr); + test_failure_error(err, CL_INVALID_IMAGE_DESCRIPTOR, + "Unexpected clGetImageRequirementsInfoEXT return"); + + /* Check that CL_INVALID_VALUE is returned when passing an invalid + * param_name */ + cl_image_requirements_info_ext invalid_info = CL_IMAGE_FORMAT; + err = clGetImageRequirementsInfoEXT( + context, nullptr, CL_MEM_READ_WRITE, &format, &image_desc, invalid_info, + sizeof(row_pitch_alignment), &row_pitch_alignment, nullptr); + test_failure_error(err, CL_INVALID_VALUE, + "Unexpected clGetImageRequirementsInfoEXT return"); + + /* Check that CL_INVALID_VALUE is returned when passing a param_value_size + * value smaller than the size of the return type */ + err = clGetImageRequirementsInfoEXT( + context, nullptr, CL_MEM_READ_WRITE, &format, &image_desc, + CL_IMAGE_REQUIREMENTS_ROW_PITCH_ALIGNMENT_EXT, + sizeof(row_pitch_alignment) - 1, &row_pitch_alignment, nullptr); + test_failure_error(err, CL_INVALID_VALUE, + "Unexpected clGetImageRequirementsInfoEXT return"); + + /* Check that CL_INVALID_VALUE is returned when passing a param_value_size + * value smaller than the size of the return type */ + uint32_t max_height = 0; + err = clGetImageRequirementsInfoEXT( + context, nullptr, CL_MEM_READ_WRITE, &format, &image_desc, + CL_IMAGE_REQUIREMENTS_MAX_HEIGHT_EXT, sizeof(max_height) - 1, + &max_height, nullptr); + test_failure_error(err, CL_INVALID_VALUE, + "Unexpected clGetImageRequirementsInfoEXT return"); + + return TEST_PASS; +} + +/** + * Negative tests for {CL_IMAGE_REQUIREMENTS_MAX_HEIGHT_EXT} + * Attempt to perform the {CL_IMAGE_REQUIREMENTS_MAX_HEIGHT_EXT} query on all + * image types for which it is not valid Check that + * {CL_INVALID_IMAGE_DESCRIPTOR} is returned in all cases. + * + * Negative testing for {CL_IMAGE_REQUIREMENTS_MAX_DEPTH_EXT} + * Attempt to perform the {CL_IMAGE_REQUIREMENTS_MAX_DEPTH_EXT} query on all + * image types for which it is not valid Check that + * {CL_INVALID_IMAGE_DESCRIPTOR} is returned in all cases. + * + * Negative testing for {CL_IMAGE_REQUIREMENTS_MAX_ARRAY_SIZE_EXT} + * Attempt to perform the {CL_IMAGE_REQUIREMENTS_MAX_ARRAY_SIZE_EXT} query on + * all image types for which it is not valid Check that + * {CL_INVALID_IMAGE_DESCRIPTOR} is returned in all cases. + */ +int cl_image_requirements_max_val_ext_negative(cl_device_id device, + cl_context context, + cl_command_queue queue) +{ + if (!is_extension_available(device, "cl_ext_image_requirements_info")) + { + printf("Extension cl_ext_image_requirements_info not available"); + return TEST_SKIPPED_ITSELF; + } + + cl_platform_id platform = getPlatformFromDevice(device); + GET_EXTENSION_FUNC(platform, clGetImageRequirementsInfoEXT); + + size_t value = 0; + + std::vector imageTypes_height{ + CL_MEM_OBJECT_IMAGE1D_ARRAY, CL_MEM_OBJECT_IMAGE1D_BUFFER, + CL_MEM_OBJECT_IMAGE1D + }; + + cl_image_format format = { CL_RGBA, CL_UNSIGNED_INT16 }; + + for (auto imageType : imageTypes_height) + { + cl_image_desc image_desc = { 0 }; + image_desc_init(&image_desc, imageType); + + /* Check image_format null results in CL_INVALID_VALUE */ + cl_int err = clGetImageRequirementsInfoEXT( + context, nullptr, CL_MEM_READ_WRITE, &format, &image_desc, + CL_IMAGE_REQUIREMENTS_MAX_HEIGHT_EXT, sizeof(value), &value, + nullptr); + test_failure_error(err, CL_INVALID_IMAGE_DESCRIPTOR, + "Unexpected clGetImageRequirementsInfoEXT return"); + } + + std::vector imageTypes_depth{ + CL_MEM_OBJECT_IMAGE2D, CL_MEM_OBJECT_IMAGE2D_ARRAY, + CL_MEM_OBJECT_IMAGE1D_ARRAY, CL_MEM_OBJECT_IMAGE1D_BUFFER, + CL_MEM_OBJECT_IMAGE1D + }; + + for (auto imageType : imageTypes_depth) + { + cl_image_desc image_desc = { 0 }; + image_desc_init(&image_desc, imageType); + + /* Check image_format null results in CL_INVALID_VALUE */ + cl_int err = clGetImageRequirementsInfoEXT( + context, nullptr, CL_MEM_READ_WRITE, &format, &image_desc, + CL_IMAGE_REQUIREMENTS_MAX_DEPTH_EXT, sizeof(value), &value, + nullptr); + test_failure_error(err, CL_INVALID_IMAGE_DESCRIPTOR, + "Unexpected clGetImageRequirementsInfoEXT return"); + } + + std::vector imageTypes_array_size{ + CL_MEM_OBJECT_IMAGE3D, CL_MEM_OBJECT_IMAGE2D, + CL_MEM_OBJECT_IMAGE1D_BUFFER, CL_MEM_OBJECT_IMAGE1D + }; + + for (auto imageType : imageTypes_array_size) + { + cl_image_desc image_desc = { 0 }; + image_desc_init(&image_desc, imageType); + + /* Check image_format null results in CL_INVALID_VALUE */ + cl_int err = clGetImageRequirementsInfoEXT( + context, nullptr, CL_MEM_READ_WRITE, &format, &image_desc, + CL_IMAGE_REQUIREMENTS_MAX_ARRAY_SIZE_EXT, sizeof(value), &value, + nullptr); + test_failure_error(err, CL_INVALID_IMAGE_DESCRIPTOR, + "Unexpected clGetImageRequirementsInfoEXT return"); + } + + return TEST_PASS; +} + +/** + * Consistency checks for {CL_IMAGE_REQUIREMENTS_MAX_WIDTH_EXT} + ** Check that the {CL_IMAGE_REQUIREMENTS_MAX_WIDTH_EXT} query can be performed + *successfully + * + * Consistency checks for {CL_IMAGE_REQUIREMENTS_MAX_HEIGHT_EXT} + ** Check that the {CL_IMAGE_REQUIREMENTS_MAX_HEIGHT_EXT} query can be performed + *successfully + * + * Consistency checks for {CL_IMAGE_REQUIREMENTS_MAX_DEPTH_EXT} + ** Check that the {CL_IMAGE_REQUIREMENTS_MAX_DEPTH_EXT} query can be performed + *successfully + * + * Consistency checks for {CL_IMAGE_REQUIREMENTS_MAX_ARRAY_SIZE_EXT} + ** Check that the {CL_IMAGE_REQUIREMENTS_MAX_ARRAY_SIZE_EXT} query can be + *performed successfully + */ +int cl_image_requirements_max_val_ext_positive(cl_device_id device, + cl_context context, + cl_command_queue queue) +{ + if (!is_extension_available(device, "cl_ext_image_requirements_info")) + { + printf("Extension cl_ext_image_requirements_info not available"); + return TEST_SKIPPED_ITSELF; + } + + cl_platform_id platform = getPlatformFromDevice(device); + GET_EXTENSION_FUNC(platform, clGetImageRequirementsInfoEXT); + + /* CL_IMAGE_REQUIREMENTS_MAX_WIDTH_EXT */ + cl_image_desc image_desc_1d = { 0 }; + image_desc_init(&image_desc_1d, CL_MEM_OBJECT_IMAGE1D); + + uint32_t max_width = 0; + cl_int err = clGetImageRequirementsInfoEXT( + context, nullptr, CL_MEM_READ_WRITE, nullptr, &image_desc_1d, + CL_IMAGE_REQUIREMENTS_MAX_WIDTH_EXT, sizeof(max_width), &max_width, + nullptr); + test_error(err, "Error clGetImageRequirementsInfoEXT"); + + size_t width_1d = 0; + err = clGetDeviceInfo(device, CL_DEVICE_IMAGE_MAX_BUFFER_SIZE, + sizeof(width_1d), &width_1d, NULL); + test_error(err, "Error clGetDeviceInfo"); + + if (!(max_width <= width_1d && max_width > 0)) + { + test_fail("Unexpected CL_IMAGE_REQUIREMENTS_MAX_WIDTH_EXT value"); + } + + /* CL_IMAGE_REQUIREMENTS_MAX_HEIGHT_EXT */ + cl_image_desc image_desc_2d = { 0 }; + image_desc_init(&image_desc_2d, CL_MEM_OBJECT_IMAGE2D); + + uint32_t max_height = 0; + err = clGetImageRequirementsInfoEXT( + context, nullptr, CL_MEM_READ_WRITE, nullptr, &image_desc_2d, + CL_IMAGE_REQUIREMENTS_MAX_HEIGHT_EXT, sizeof(max_height), &max_height, + nullptr); + test_error(err, "Error clGetImageRequirementsInfoEXT"); + + size_t height_2d = 0; + err = clGetDeviceInfo(device, CL_DEVICE_IMAGE2D_MAX_HEIGHT, + sizeof(height_2d), &height_2d, NULL); + test_error(err, "Error clGetDeviceInfo"); + + if (!(max_height <= height_2d && max_height > 0)) + { + test_fail("Unexpected CL_IMAGE_REQUIREMENTS_MAX_HEIGHT_EXT value"); + } + + /* CL_IMAGE_REQUIREMENTS_MAX_DEPTH_EXT */ + cl_image_desc image_desc_3d = { 0 }; + image_desc_init(&image_desc_3d, CL_MEM_OBJECT_IMAGE3D); + + uint32_t max_depth = 0; + err = clGetImageRequirementsInfoEXT(context, nullptr, CL_MEM_READ_WRITE, + nullptr, &image_desc_3d, + CL_IMAGE_REQUIREMENTS_MAX_DEPTH_EXT, + sizeof(max_depth), &max_depth, nullptr); + test_error(err, "Error clGetImageRequirementsInfoEXT"); + + size_t depth_3d = 0; + err = clGetDeviceInfo(device, CL_DEVICE_IMAGE3D_MAX_DEPTH, sizeof(depth_3d), + &depth_3d, NULL); + test_error(err, "Error clGetDeviceInfo"); + + if (!(max_depth <= depth_3d && max_depth > 0)) + { + test_fail("Unexpected CL_IMAGE_REQUIREMENTS_MAX_DEPTH_EXT value"); + } + + /* CL_IMAGE_REQUIREMENTS_MAX_ARRAY_SIZE_EXT */ + cl_image_desc image_desc_array = { 0 }; + image_desc_init(&image_desc_array, CL_MEM_OBJECT_IMAGE2D_ARRAY); + + uint32_t max_array_size = 0; + err = clGetImageRequirementsInfoEXT( + context, nullptr, CL_MEM_READ_WRITE, nullptr, &image_desc_array, + CL_IMAGE_REQUIREMENTS_MAX_ARRAY_SIZE_EXT, sizeof(max_array_size), + &max_array_size, nullptr); + test_error(err, "Error clGetImageRequirementsInfoEXT"); + + size_t array_size = 0; + err = clGetDeviceInfo(device, CL_DEVICE_IMAGE_MAX_ARRAY_SIZE, + sizeof(array_size), &array_size, NULL); + test_error(err, "Error clGetDeviceInfo"); + + if (!(max_array_size <= array_size && max_array_size > 0)) + { + test_fail("Unexpected CL_IMAGE_REQUIREMENTS_MAX_ARRAY_SIZE_EXT value"); + } + + return TEST_PASS; +} \ No newline at end of file -- cgit v1.2.3 From 7859a33182a96462067448b4a350823543f8dd39 Mon Sep 17 00:00:00 2001 From: Stuart Brady Date: Tue, 13 Sep 2022 18:22:52 +0100 Subject: Include release builds in GitHub Actions (#1486) The "Ninja" CMake generator does not support multiple configurations, i.e. it does not support use of the '--config' option when running 'cmake --build'. As such, the default configuration (i.e. Debug) was getting used for all builds. Use the CMAKE_BUILD_TYPE variable instead, so that we do release builds, but change one build (ubuntu-20.04 aarch64) to use Debug as its build type, to keep some build coverage for asserts, etc. For Vulkan-Loader and OpenCL-ICD-Loader, we do release builds unconditionally, as we assume there is no need in the CI workflow to actually run the binaries that are built, and therefore no need for any additional debug info. Signed-off-by: Stuart Brady --- .github/workflows/presubmit.yml | 6 +++++- presubmit.sh | 32 +++++++++++++++++++++++++------- 2 files changed, 30 insertions(+), 8 deletions(-) diff --git a/.github/workflows/presubmit.yml b/.github/workflows/presubmit.yml index bac4ceba..1dfdb963 100644 --- a/.github/workflows/presubmit.yml +++ b/.github/workflows/presubmit.yml @@ -3,11 +3,12 @@ on: [push, pull_request] jobs: build: - name: Build ${{ matrix.os }} ${{ matrix.arch }} + name: Build ${{ matrix.os }} ${{ matrix.arch }}${{ matrix.extra }} runs-on: ${{ matrix.os }} env: JOB_ARCHITECTURE: ${{ matrix.arch }} JOB_ENABLE_GL: ${{ matrix.gl }} + JOB_ENABLE_DEBUG: ${{ matrix.debug }} strategy: fail-fast: false matrix: @@ -17,12 +18,15 @@ jobs: - os: ubuntu-20.04 mainmatrix: true gl: 1 + extra: " gl" - os: ubuntu-20.04 mainmatrix: false arch: arm - os: ubuntu-20.04 mainmatrix: false arch: aarch64 + debug: 1 + extra: " debug" steps: - uses: actions/checkout@v2 - name: Setup Ninja diff --git a/presubmit.sh b/presubmit.sh index 6c3a293e..ca39b9a2 100755 --- a/presubmit.sh +++ b/presubmit.sh @@ -14,6 +14,9 @@ TOOLCHAIN_FILE=${TOP}/toolchain.cmake touch ${TOOLCHAIN_FILE} BUILD_OPENGL_TEST="OFF" +cmake --version +echo + # Prepare toolchain if needed if [[ ${JOB_ARCHITECTURE} != "" && ${RUNNER_OS} != "Windows" ]]; then TOOLCHAIN_URL_VAR=TOOLCHAIN_URL_${JOB_ARCHITECTURE} @@ -40,6 +43,12 @@ if [[ ( ${JOB_ARCHITECTURE} == "" && ${JOB_ENABLE_GL} == "1" ) ]]; then BUILD_OPENGL_TEST="ON" fi +if [[ ${JOB_ENABLE_DEBUG} == 1 ]]; then + BUILD_CONFIG="Debug" +else + BUILD_CONFIG="Release" +fi + #Vulkan Headers git clone https://github.com/KhronosGroup/Vulkan-Headers.git @@ -48,8 +57,11 @@ git clone https://github.com/KhronosGroup/OpenCL-ICD-Loader.git cd ${TOP}/OpenCL-ICD-Loader mkdir build cd build -cmake .. -G Ninja -DCMAKE_TOOLCHAIN_FILE=${TOOLCHAIN_FILE} -DOPENCL_ICD_LOADER_HEADERS_DIR=${TOP}/OpenCL-Headers/ -cmake --build . -j2 --config Release +cmake .. -G Ninja \ + -DCMAKE_BUILD_TYPE=Release \ + -DCMAKE_TOOLCHAIN_FILE=${TOOLCHAIN_FILE} \ + -DOPENCL_ICD_LOADER_HEADERS_DIR=${TOP}/OpenCL-Headers/ +cmake --build . -j2 #Vulkan Loader cd ${TOP} @@ -58,8 +70,15 @@ cd Vulkan-Loader mkdir build cd build python3 ../scripts/update_deps.py -cmake .. -G Ninja -DCMAKE_TOOLCHAIN_FILE=${TOOLCHAIN_FILE} -DBUILD_WSI_XLIB_SUPPORT=OFF -DBUILD_WSI_XCB_SUPPORT=OFF -DBUILD_WSI_WAYLAND_SUPPORT=OFF -DUSE_GAS=OFF -C helper.cmake .. -cmake --build . -j2 --config Release +cmake .. -G Ninja \ + -DCMAKE_BUILD_TYPE=Release \ + -DCMAKE_TOOLCHAIN_FILE=${TOOLCHAIN_FILE} \ + -DBUILD_WSI_XLIB_SUPPORT=OFF \ + -DBUILD_WSI_XCB_SUPPORT=OFF \ + -DBUILD_WSI_WAYLAND_SUPPORT=OFF \ + -DUSE_GAS=OFF \ + -C helper.cmake .. +cmake --build . -j2 # Build CTS cd ${TOP} @@ -74,6 +93,7 @@ else CMAKE_CACHE_OPTIONS="-DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_CXX_COMPILER_LAUNCHER=sccache" fi cmake .. -G Ninja \ + -DCMAKE_BUILD_TYPE="${BUILD_CONFIG}" \ ${CMAKE_CACHE_OPTIONS} \ -DCL_INCLUDE_DIR=${TOP}/OpenCL-Headers \ -DCL_LIB_DIR=${TOP}/OpenCL-ICD-Loader/build \ @@ -84,6 +104,4 @@ cmake .. -G Ninja \ -DGL_IS_SUPPORTED=${BUILD_OPENGL_TEST} \ -DVULKAN_INCLUDE_DIR=${TOP}/Vulkan-Headers/include/ \ -DVULKAN_LIB_DIR=${TOP}/Vulkan-Loader/build/loader/ -cmake --build . -j3 --config Release - - +cmake --build . -j3 -- cgit v1.2.3 From a87e686757f9fda5377baf73a32bb3c791eae70c Mon Sep 17 00:00:00 2001 From: Sven van Haastregt Date: Fri, 16 Sep 2022 13:34:36 +0100 Subject: Fix more warnings in math_brute_force (#1502) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Fix "‘nadj’ may be used uninitialized in this function [-Werror=maybe-uninitialized]". * Fix "specified bound 4096 equals destination size [-Werror=stringop-truncation]". Signed-off-by: Sven van Haastregt --- test_conformance/math_brute_force/main.cpp | 8 +++++--- test_conformance/math_brute_force/reference_math.cpp | 3 ++- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/test_conformance/math_brute_force/main.cpp b/test_conformance/math_brute_force/main.cpp index 8cebff9d..ee3fcbd9 100644 --- a/test_conformance/math_brute_force/main.cpp +++ b/test_conformance/math_brute_force/main.cpp @@ -360,16 +360,18 @@ static int ParseArgs(int argc, const char **argv) int singleThreaded = 0; { // Extract the app name - strncpy(appName, argv[0], MAXPATHLEN); + strncpy(appName, argv[0], MAXPATHLEN - 1); + appName[MAXPATHLEN - 1] = '\0'; #if defined(__APPLE__) char baseName[MAXPATHLEN]; char *base = NULL; - strncpy(baseName, argv[0], MAXPATHLEN); + strncpy(baseName, argv[0], MAXPATHLEN - 1); + baseName[MAXPATHLEN - 1] = '\0'; base = basename(baseName); if (NULL != base) { - strncpy(appName, base, sizeof(appName)); + strncpy(appName, base, sizeof(appName) - 1); appName[sizeof(appName) - 1] = '\0'; } #endif diff --git a/test_conformance/math_brute_force/reference_math.cpp b/test_conformance/math_brute_force/reference_math.cpp index f91ecb22..afa072f8 100644 --- a/test_conformance/math_brute_force/reference_math.cpp +++ b/test_conformance/math_brute_force/reference_math.cpp @@ -1949,7 +1949,8 @@ double reference_lgamma(double x) w6 = -1.63092934096575273989e-03; /* 0xBF5AB89D, 0x0B9E43E4 */ static const double zero = 0.00000000000000000000e+00; - double t, y, z, nadj, p, p1, p2, p3, q, r, w; + double nadj = zero; + double t, y, z, p, p1, p2, p3, q, r, w; cl_int i, hx, lx, ix; union { -- cgit v1.2.3 From 8f9c1960ff5a48d85662c568ffd43c74459fcf4c Mon Sep 17 00:00:00 2001 From: Sven van Haastregt Date: Tue, 20 Sep 2022 16:52:22 +0100 Subject: Improve MTdataHolder design and use it in math_brute_force (#1490) Improve the design of the MTdataHolder wrapper: * Make it a class instead of a struct with a private member, to make it clearer that there is no direct access to the MTdata member. * Make the 1-arg constructor `explicit` to avoid unintended conversions. * Forbid copy construction/assignment as MTdataHolder is never initialised from an MTdataHolder object in the codebase. * Define move construction/assignment as per the "rule of five". Use the MTdataHolder class throughout math_brute_force, to simplify code by avoiding manual resource management. Original patch by Marco Antognini. Signed-off-by: Marco Antognini Signed-off-by: Sven van Haastregt Signed-off-by: Marco Antognini Signed-off-by: Sven van Haastregt --- test_common/harness/mt19937.h | 33 +++++++++++++++++----- .../math_brute_force/binary_double.cpp | 11 ++------ test_conformance/math_brute_force/binary_float.cpp | 11 ++------ .../math_brute_force/binary_i_double.cpp | 11 ++------ .../math_brute_force/binary_i_float.cpp | 11 ++------ .../math_brute_force/binary_operator_double.cpp | 11 ++------ .../math_brute_force/binary_operator_float.cpp | 11 ++------ .../math_brute_force/macro_binary_double.cpp | 11 ++------ .../math_brute_force/macro_binary_float.cpp | 11 ++------ .../math_brute_force/macro_unary_double.cpp | 2 +- .../math_brute_force/macro_unary_float.cpp | 2 +- test_conformance/math_brute_force/main.cpp | 6 ++-- test_conformance/math_brute_force/unary_double.cpp | 2 +- test_conformance/math_brute_force/unary_float.cpp | 2 +- 14 files changed, 56 insertions(+), 79 deletions(-) diff --git a/test_common/harness/mt19937.h b/test_common/harness/mt19937.h index 98eec843..447ca25a 100644 --- a/test_common/harness/mt19937.h +++ b/test_common/harness/mt19937.h @@ -94,23 +94,42 @@ double genrand_res53(MTdata /*data*/); bool genrand_bool(MTdata /*data*/); #include +#include -struct MTdataHolder -{ - MTdataHolder(cl_uint seed) +class MTdataHolder { +public: + MTdataHolder() = default; + explicit MTdataHolder(cl_uint seed) { m_mtdata = init_genrand(seed); assert(m_mtdata != nullptr); } - MTdataHolder(MTdata mtdata): m_mtdata(mtdata) {} + // Forbid copy. + MTdataHolder(const MTdataHolder&) = delete; + MTdataHolder& operator=(const MTdataHolder&) = delete; - ~MTdataHolder() { free_mtdata(m_mtdata); } + // Support move semantics. + MTdataHolder(MTdataHolder&& h) { std::swap(m_mtdata, h.m_mtdata); } + MTdataHolder& operator=(MTdataHolder&& h) + { + std::swap(m_mtdata, h.m_mtdata); + return *this; + } - operator MTdata() const { return m_mtdata; } + ~MTdataHolder() + { + if (m_mtdata) free_mtdata(m_mtdata); + } + + operator MTdata() const + { + assert(m_mtdata && "Object wasn't initialised"); + return m_mtdata; + } private: - MTdata m_mtdata; + MTdata m_mtdata = nullptr; }; #endif // #ifdef __cplusplus diff --git a/test_conformance/math_brute_force/binary_double.cpp b/test_conformance/math_brute_force/binary_double.cpp index 1b1f7d4c..b6bb049b 100644 --- a/test_conformance/math_brute_force/binary_double.cpp +++ b/test_conformance/math_brute_force/binary_double.cpp @@ -134,7 +134,7 @@ struct ThreadInfo maxErrorValue; // position of the max error value (param 1). Init to 0. double maxErrorValue2; // position of the max error value (param 2). Init // to 0. - MTdata d; + MTdataHolder d; // Per thread command queue to improve performance clCommandQueueWrapper tQueue; @@ -691,7 +691,7 @@ int TestFunc_Double_Double_Double(const Func *f, MTdata d, bool relaxedMode) test_info.k[i].resize(test_info.threadCount, nullptr); } - test_info.tinfo.resize(test_info.threadCount, ThreadInfo{}); + test_info.tinfo.resize(test_info.threadCount); for (cl_uint i = 0; i < test_info.threadCount; i++) { cl_buffer_region region = { @@ -740,7 +740,7 @@ int TestFunc_Double_Double_Double(const Func *f, MTdata d, bool relaxedMode) goto exit; } - test_info.tinfo[i].d = init_genrand(genrand_int32(d)); + test_info.tinfo[i].d = MTdataHolder(genrand_int32(d)); } // Init the kernels @@ -792,10 +792,5 @@ exit: } } - for (auto &threadInfo : test_info.tinfo) - { - free_mtdata(threadInfo.d); - } - return error; } diff --git a/test_conformance/math_brute_force/binary_float.cpp b/test_conformance/math_brute_force/binary_float.cpp index d229a376..e85add4b 100644 --- a/test_conformance/math_brute_force/binary_float.cpp +++ b/test_conformance/math_brute_force/binary_float.cpp @@ -132,7 +132,7 @@ struct ThreadInfo maxErrorValue; // position of the max error value (param 1). Init to 0. double maxErrorValue2; // position of the max error value (param 2). Init // to 0. - MTdata d; + MTdataHolder d; // Per thread command queue to improve performance clCommandQueueWrapper tQueue; @@ -848,7 +848,7 @@ int TestFunc_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode) test_info.k[i].resize(test_info.threadCount, nullptr); } - test_info.tinfo.resize(test_info.threadCount, ThreadInfo{}); + test_info.tinfo.resize(test_info.threadCount); for (cl_uint i = 0; i < test_info.threadCount; i++) { cl_buffer_region region = { @@ -897,7 +897,7 @@ int TestFunc_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode) goto exit; } - test_info.tinfo[i].d = init_genrand(genrand_int32(d)); + test_info.tinfo[i].d = MTdataHolder(genrand_int32(d)); } // Init the kernels @@ -949,10 +949,5 @@ exit: } } - for (auto &threadInfo : test_info.tinfo) - { - free_mtdata(threadInfo.d); - } - return error; } diff --git a/test_conformance/math_brute_force/binary_i_double.cpp b/test_conformance/math_brute_force/binary_i_double.cpp index 7baa21a2..f8786e68 100644 --- a/test_conformance/math_brute_force/binary_i_double.cpp +++ b/test_conformance/math_brute_force/binary_i_double.cpp @@ -133,7 +133,7 @@ struct ThreadInfo maxErrorValue; // position of the max error value (param 1). Init to 0. cl_int maxErrorValue2; // position of the max error value (param 2). Init // to 0. - MTdata d; + MTdataHolder d; // Per thread command queue to improve performance clCommandQueueWrapper tQueue; @@ -610,7 +610,7 @@ int TestFunc_Double_Double_Int(const Func *f, MTdata d, bool relaxedMode) test_info.k[i].resize(test_info.threadCount, nullptr); } - test_info.tinfo.resize(test_info.threadCount, ThreadInfo{}); + test_info.tinfo.resize(test_info.threadCount); for (cl_uint i = 0; i < test_info.threadCount; i++) { cl_buffer_region region = { @@ -662,7 +662,7 @@ int TestFunc_Double_Double_Int(const Func *f, MTdata d, bool relaxedMode) goto exit; } - test_info.tinfo[i].d = init_genrand(genrand_int32(d)); + test_info.tinfo[i].d = MTdataHolder(genrand_int32(d)); } // Init the kernels @@ -714,10 +714,5 @@ exit: } } - for (auto &threadInfo : test_info.tinfo) - { - free_mtdata(threadInfo.d); - } - return error; } diff --git a/test_conformance/math_brute_force/binary_i_float.cpp b/test_conformance/math_brute_force/binary_i_float.cpp index 3f998e2e..2387ff06 100644 --- a/test_conformance/math_brute_force/binary_i_float.cpp +++ b/test_conformance/math_brute_force/binary_i_float.cpp @@ -131,7 +131,7 @@ struct ThreadInfo maxErrorValue; // position of the max error value (param 1). Init to 0. cl_int maxErrorValue2; // position of the max error value (param 2). Init // to 0. - MTdata d; + MTdataHolder d; // Per thread command queue to improve performance clCommandQueueWrapper tQueue; @@ -603,7 +603,7 @@ int TestFunc_Float_Float_Int(const Func *f, MTdata d, bool relaxedMode) test_info.k[i].resize(test_info.threadCount, nullptr); } - test_info.tinfo.resize(test_info.threadCount, ThreadInfo{}); + test_info.tinfo.resize(test_info.threadCount); for (cl_uint i = 0; i < test_info.threadCount; i++) { cl_buffer_region region = { @@ -655,7 +655,7 @@ int TestFunc_Float_Float_Int(const Func *f, MTdata d, bool relaxedMode) goto exit; } - test_info.tinfo[i].d = init_genrand(genrand_int32(d)); + test_info.tinfo[i].d = MTdataHolder(genrand_int32(d)); } // Init the kernels @@ -707,10 +707,5 @@ exit: } } - for (auto &threadInfo : test_info.tinfo) - { - free_mtdata(threadInfo.d); - } - return error; } diff --git a/test_conformance/math_brute_force/binary_operator_double.cpp b/test_conformance/math_brute_force/binary_operator_double.cpp index 74883664..34ec6197 100644 --- a/test_conformance/math_brute_force/binary_operator_double.cpp +++ b/test_conformance/math_brute_force/binary_operator_double.cpp @@ -133,7 +133,7 @@ struct ThreadInfo maxErrorValue; // position of the max error value (param 1). Init to 0. double maxErrorValue2; // position of the max error value (param 2). Init // to 0. - MTdata d; + MTdataHolder d; // Per thread command queue to improve performance clCommandQueueWrapper tQueue; @@ -658,7 +658,7 @@ int TestFunc_Double_Double_Double_Operator(const Func *f, MTdata d, test_info.k[i].resize(test_info.threadCount, nullptr); } - test_info.tinfo.resize(test_info.threadCount, ThreadInfo{}); + test_info.tinfo.resize(test_info.threadCount); for (cl_uint i = 0; i < test_info.threadCount; i++) { cl_buffer_region region = { @@ -707,7 +707,7 @@ int TestFunc_Double_Double_Double_Operator(const Func *f, MTdata d, goto exit; } - test_info.tinfo[i].d = init_genrand(genrand_int32(d)); + test_info.tinfo[i].d = MTdataHolder(genrand_int32(d)); } // Init the kernels @@ -759,10 +759,5 @@ exit: } } - for (auto &threadInfo : test_info.tinfo) - { - free_mtdata(threadInfo.d); - } - return error; } diff --git a/test_conformance/math_brute_force/binary_operator_float.cpp b/test_conformance/math_brute_force/binary_operator_float.cpp index 56f293c1..5577cffe 100644 --- a/test_conformance/math_brute_force/binary_operator_float.cpp +++ b/test_conformance/math_brute_force/binary_operator_float.cpp @@ -131,7 +131,7 @@ struct ThreadInfo maxErrorValue; // position of the max error value (param 1). Init to 0. double maxErrorValue2; // position of the max error value (param 2). Init // to 0. - MTdata d; + MTdataHolder d; // Per thread command queue to improve performance clCommandQueueWrapper tQueue; @@ -785,7 +785,7 @@ int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d, test_info.k[i].resize(test_info.threadCount, nullptr); } - test_info.tinfo.resize(test_info.threadCount, ThreadInfo{}); + test_info.tinfo.resize(test_info.threadCount); for (cl_uint i = 0; i < test_info.threadCount; i++) { cl_buffer_region region = { @@ -834,7 +834,7 @@ int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d, goto exit; } - test_info.tinfo[i].d = init_genrand(genrand_int32(d)); + test_info.tinfo[i].d = MTdataHolder(genrand_int32(d)); } // Init the kernels @@ -886,10 +886,5 @@ exit: } } - for (auto &threadInfo : test_info.tinfo) - { - free_mtdata(threadInfo.d); - } - return error; } diff --git a/test_conformance/math_brute_force/macro_binary_double.cpp b/test_conformance/math_brute_force/macro_binary_double.cpp index a697a7be..b81766bd 100644 --- a/test_conformance/math_brute_force/macro_binary_double.cpp +++ b/test_conformance/math_brute_force/macro_binary_double.cpp @@ -127,7 +127,7 @@ struct ThreadInfo clMemWrapper inBuf2; Buffers outBuf; - MTdata d; + MTdataHolder d; // Per thread command queue to improve performance clCommandQueueWrapper tQueue; @@ -616,7 +616,7 @@ int TestMacro_Int_Double_Double(const Func *f, MTdata d, bool relaxedMode) test_info.k[i].resize(test_info.threadCount, nullptr); } - test_info.tinfo.resize(test_info.threadCount, ThreadInfo{}); + test_info.tinfo.resize(test_info.threadCount); for (cl_uint i = 0; i < test_info.threadCount; i++) { cl_buffer_region region = { @@ -665,7 +665,7 @@ int TestMacro_Int_Double_Double(const Func *f, MTdata d, bool relaxedMode) goto exit; } - test_info.tinfo[i].d = init_genrand(genrand_int32(d)); + test_info.tinfo[i].d = MTdataHolder(genrand_int32(d)); } // Init the kernels @@ -704,10 +704,5 @@ exit: } } - for (auto &threadInfo : test_info.tinfo) - { - free_mtdata(threadInfo.d); - } - return error; } diff --git a/test_conformance/math_brute_force/macro_binary_float.cpp b/test_conformance/math_brute_force/macro_binary_float.cpp index 97e2f675..4a3fb67d 100644 --- a/test_conformance/math_brute_force/macro_binary_float.cpp +++ b/test_conformance/math_brute_force/macro_binary_float.cpp @@ -125,7 +125,7 @@ struct ThreadInfo clMemWrapper inBuf2; Buffers outBuf; - MTdata d; + MTdataHolder d; // Per thread command queue to improve performance clCommandQueueWrapper tQueue; @@ -605,7 +605,7 @@ int TestMacro_Int_Float_Float(const Func *f, MTdata d, bool relaxedMode) test_info.k[i].resize(test_info.threadCount, nullptr); } - test_info.tinfo.resize(test_info.threadCount, ThreadInfo{}); + test_info.tinfo.resize(test_info.threadCount); for (cl_uint i = 0; i < test_info.threadCount; i++) { cl_buffer_region region = { @@ -654,7 +654,7 @@ int TestMacro_Int_Float_Float(const Func *f, MTdata d, bool relaxedMode) goto exit; } - test_info.tinfo[i].d = init_genrand(genrand_int32(d)); + test_info.tinfo[i].d = MTdataHolder(genrand_int32(d)); } // Init the kernels @@ -693,10 +693,5 @@ exit: } } - for (auto &threadInfo : test_info.tinfo) - { - free_mtdata(threadInfo.d); - } - return error; } diff --git a/test_conformance/math_brute_force/macro_unary_double.cpp b/test_conformance/math_brute_force/macro_unary_double.cpp index 5a3ad355..19cefee4 100644 --- a/test_conformance/math_brute_force/macro_unary_double.cpp +++ b/test_conformance/math_brute_force/macro_unary_double.cpp @@ -400,7 +400,7 @@ int TestMacro_Int_Double(const Func *f, MTdata d, bool relaxedMode) test_info.k[i].resize(test_info.threadCount, nullptr); } - test_info.tinfo.resize(test_info.threadCount, ThreadInfo{}); + test_info.tinfo.resize(test_info.threadCount); for (cl_uint i = 0; i < test_info.threadCount; i++) { cl_buffer_region region = { diff --git a/test_conformance/math_brute_force/macro_unary_float.cpp b/test_conformance/math_brute_force/macro_unary_float.cpp index d2982156..6a1b9b9a 100644 --- a/test_conformance/math_brute_force/macro_unary_float.cpp +++ b/test_conformance/math_brute_force/macro_unary_float.cpp @@ -414,7 +414,7 @@ int TestMacro_Int_Float(const Func *f, MTdata d, bool relaxedMode) test_info.k[i].resize(test_info.threadCount, nullptr); } - test_info.tinfo.resize(test_info.threadCount, ThreadInfo{}); + test_info.tinfo.resize(test_info.threadCount); for (cl_uint i = 0; i < test_info.threadCount; i++) { cl_buffer_region region = { diff --git a/test_conformance/math_brute_force/main.cpp b/test_conformance/math_brute_force/main.cpp index ee3fcbd9..2c81de87 100644 --- a/test_conformance/math_brute_force/main.cpp +++ b/test_conformance/math_brute_force/main.cpp @@ -98,7 +98,7 @@ cl_mem gInBuffer2 = NULL; cl_mem gInBuffer3 = NULL; cl_mem gOutBuffer[VECTOR_SIZE_COUNT] = { NULL, NULL, NULL, NULL, NULL, NULL }; cl_mem gOutBuffer2[VECTOR_SIZE_COUNT] = { NULL, NULL, NULL, NULL, NULL, NULL }; -static MTdata gMTdata; +static MTdataHolder gMTdata; cl_device_fp_config gFloatCapabilities = 0; int gWimpyReductionFactor = 32; int gVerboseBruteForce = 0; @@ -326,7 +326,7 @@ int main(int argc, const char *argv[]) vlog("\n-------------------------------------------------------------------" "----------------------------------------\n"); - gMTdata = init_genrand(gRandomSeed); + gMTdata = MTdataHolder(gRandomSeed); FPU_mode_type oldMode; DisableFTZ(&oldMode); @@ -336,8 +336,6 @@ int main(int argc, const char *argv[]) RestoreFPState(&oldMode); - free_mtdata(gMTdata); - if (gQueue) { int error_code = clFinish(gQueue); diff --git a/test_conformance/math_brute_force/unary_double.cpp b/test_conformance/math_brute_force/unary_double.cpp index 7dfc12b1..3deac57c 100644 --- a/test_conformance/math_brute_force/unary_double.cpp +++ b/test_conformance/math_brute_force/unary_double.cpp @@ -427,7 +427,7 @@ int TestFunc_Double_Double(const Func *f, MTdata d, bool relaxedMode) test_info.k[i].resize(test_info.threadCount, nullptr); } - test_info.tinfo.resize(test_info.threadCount, ThreadInfo{}); + test_info.tinfo.resize(test_info.threadCount); for (cl_uint i = 0; i < test_info.threadCount; i++) { cl_buffer_region region = { diff --git a/test_conformance/math_brute_force/unary_float.cpp b/test_conformance/math_brute_force/unary_float.cpp index 6a5c3539..4c1f1a1d 100644 --- a/test_conformance/math_brute_force/unary_float.cpp +++ b/test_conformance/math_brute_force/unary_float.cpp @@ -580,7 +580,7 @@ int TestFunc_Float_Float(const Func *f, MTdata d, bool relaxedMode) test_info.k[i].resize(test_info.threadCount, nullptr); } - test_info.tinfo.resize(test_info.threadCount, ThreadInfo{}); + test_info.tinfo.resize(test_info.threadCount); for (cl_uint i = 0; i < test_info.threadCount; i++) { cl_buffer_region region = { -- cgit v1.2.3 From 76bd9d36744b89e791423ba16f9db323816888e4 Mon Sep 17 00:00:00 2001 From: niranjanjoshi121 <43807392+niranjanjoshi121@users.noreply.github.com> Date: Tue, 20 Sep 2022 21:23:34 +0530 Subject: Fix memory oob problem in test half (#1489) Allocate memory for argc arguments instead of argc - 1. --- test_conformance/half/main.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/test_conformance/half/main.cpp b/test_conformance/half/main.cpp index 6600cc58..104f4616 100644 --- a/test_conformance/half/main.cpp +++ b/test_conformance/half/main.cpp @@ -131,8 +131,7 @@ exit: static int ParseArgs( int argc, const char **argv ) { int i; - argList = (const char **)calloc( argc - 1, sizeof( char*) ); - + argList = (const char **)calloc(argc, sizeof(char *)); if( NULL == argList ) { vlog_error( "Failed to allocate memory for argList.\n" ); -- cgit v1.2.3 From 75edf2a8811da5ac379c9dc994d371f31bb74b6e Mon Sep 17 00:00:00 2001 From: Sven van Haastregt Date: Tue, 20 Sep 2022 16:55:51 +0100 Subject: [NFC] Enable -Wall for math_brute_force (#1477) math_brute_force compiles cleanly with `-Wall` currently, so avoid regressing from that state. Ideally we would enable `-Wall` in the top-level CMakeLists.txt, but other tests do not compile cleanly with `-Wall` yet. Signed-off-by: Sven van Haastregt Signed-off-by: Sven van Haastregt --- test_conformance/math_brute_force/CMakeLists.txt | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/test_conformance/math_brute_force/CMakeLists.txt b/test_conformance/math_brute_force/CMakeLists.txt index 28d2716f..23ee6849 100644 --- a/test_conformance/math_brute_force/CMakeLists.txt +++ b/test_conformance/math_brute_force/CMakeLists.txt @@ -40,4 +40,14 @@ set(${MODULE_NAME}_SOURCES utility.h ) +# math_brute_force compiles cleanly with -Wall but other tests not (yet), so +# enable -Wall locally. +if(CMAKE_COMPILER_IS_GNUCC OR "${CMAKE_CXX_COMPILER_ID}" MATCHES "(Apple)?Clang") + SET_SOURCE_FILES_PROPERTIES( + ${${MODULE_NAME}_SOURCES} + PROPERTIES + COMPILE_FLAGS "-Wall -Wno-format -Wno-strict-aliasing -Wno-unknown-pragmas" + ) +endif() + include(../CMakeCommon.txt) -- cgit v1.2.3 From 86d5ee54140a9b0bf2bef5770e00748438bf05fe Mon Sep 17 00:00:00 2001 From: Nikhil Joshi Date: Tue, 20 Sep 2022 21:37:14 +0530 Subject: Update extension list of test_compiler (#1507) * Update extension list of test_compiler Upate extension list of test_compiler with missing external memory and semaphore extensions --- test_conformance/compiler/test_compiler_defines_for_extensions.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/test_conformance/compiler/test_compiler_defines_for_extensions.cpp b/test_conformance/compiler/test_compiler_defines_for_extensions.cpp index 94657d61..b95b0f53 100644 --- a/test_conformance/compiler/test_compiler_defines_for_extensions.cpp +++ b/test_conformance/compiler/test_compiler_defines_for_extensions.cpp @@ -79,11 +79,13 @@ const char *known_extensions[] = { "cl_khr_spirv_linkonce_odr", "cl_khr_semaphore", "cl_khr_external_semaphore", - "cl_khr_external_semaphore_opaque_fd", + "cl_khr_external_semaphore_win32", "cl_khr_external_semaphore_sync_fd", - "cl_khr_command_buffer", + "cl_khr_external_semaphore_opaque_fd", "cl_khr_external_memory", + "cl_khr_external_memory_win32", "cl_khr_external_memory_opaque_fd", + "cl_khr_command_buffer", "cl_khr_command_buffer_mutable_dispatch", }; -- cgit v1.2.3 From 92285f7c9de965ddc41e7dfaaab8c7c75aa55dbe Mon Sep 17 00:00:00 2001 From: Sven van Haastregt Date: Thu, 22 Sep 2022 21:17:55 +0100 Subject: cmake: Add set_gnulike_module_compile_flags (#1510) Factor out a macro to set module-specific compilation flags for GNU-like compilers. This simplifies setting compilation flags per test. Signed-off-by: Sven van Haastregt Signed-off-by: Sven van Haastregt --- CMakeLists.txt | 11 +++++++++++ test_conformance/images/kernel_read_write/CMakeLists.txt | 8 +------- test_conformance/math_brute_force/CMakeLists.txt | 12 +++--------- 3 files changed, 15 insertions(+), 16 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index b7c86ba1..6a25d5b5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -113,6 +113,17 @@ else() set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /D__SSE__") endif() +# Set a module's COMPILE_FLAGS if using gcc or clang. +macro(set_gnulike_module_compile_flags flags) + if(CMAKE_COMPILER_IS_GNUCC OR "${CMAKE_CXX_COMPILER_ID}" MATCHES "(Apple)?Clang") + SET_SOURCE_FILES_PROPERTIES( + ${${MODULE_NAME}_SOURCES} + PROPERTIES + COMPILE_FLAGS ${flags} + ) + endif() +endmacro(set_gnulike_module_compile_flags) + if(MSVC) # Don't warn when using standard non-secure functions. add_compile_definitions(_CRT_SECURE_NO_WARNINGS) diff --git a/test_conformance/images/kernel_read_write/CMakeLists.txt b/test_conformance/images/kernel_read_write/CMakeLists.txt index 6eb5dc7f..ccd678c1 100644 --- a/test_conformance/images/kernel_read_write/CMakeLists.txt +++ b/test_conformance/images/kernel_read_write/CMakeLists.txt @@ -21,13 +21,7 @@ set(${MODULE_NAME}_SOURCES # Make unused variables not fatal in this module; see # https://github.com/KhronosGroup/OpenCL-CTS/issues/1484 -if(CMAKE_COMPILER_IS_GNUCC OR "${CMAKE_CXX_COMPILER_ID}" MATCHES "(Apple)?Clang") - SET_SOURCE_FILES_PROPERTIES( - ${${MODULE_NAME}_SOURCES} - PROPERTIES - COMPILE_FLAGS "-Wno-error=unused-variable" - ) -endif() +set_gnulike_module_compile_flags("-Wno-error=unused-variable") include(../../CMakeCommon.txt) diff --git a/test_conformance/math_brute_force/CMakeLists.txt b/test_conformance/math_brute_force/CMakeLists.txt index 23ee6849..1db1ecdf 100644 --- a/test_conformance/math_brute_force/CMakeLists.txt +++ b/test_conformance/math_brute_force/CMakeLists.txt @@ -40,14 +40,8 @@ set(${MODULE_NAME}_SOURCES utility.h ) -# math_brute_force compiles cleanly with -Wall but other tests not (yet), so -# enable -Wall locally. -if(CMAKE_COMPILER_IS_GNUCC OR "${CMAKE_CXX_COMPILER_ID}" MATCHES "(Apple)?Clang") - SET_SOURCE_FILES_PROPERTIES( - ${${MODULE_NAME}_SOURCES} - PROPERTIES - COMPILE_FLAGS "-Wall -Wno-format -Wno-strict-aliasing -Wno-unknown-pragmas" - ) -endif() +# math_brute_force compiles cleanly with -Wall (except for a few remaining +# warnings), but other tests not (yet); so enable -Wall locally. +set_gnulike_module_compile_flags("-Wall -Wno-format -Wno-strict-aliasing -Wno-unknown-pragmas") include(../CMakeCommon.txt) -- cgit v1.2.3 From 180adef84c535588c1743673f3468c28cf564a09 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Krzysztof=20Kosi=C5=84ski?= Date: Fri, 23 Sep 2022 09:29:18 -0700 Subject: Remove __DATE__ and __TIME__ usage (#1506) These macros make the build non-deterministic. --- test_conformance/contractions/contractions.cpp | 3 +-- test_conformance/conversions/test_conversions.cpp | 4 +--- test_conformance/half/main.cpp | 4 +--- test_conformance/math_brute_force/main.cpp | 2 -- test_conformance/printf/test_printf.cpp | 4 +--- test_conformance/select/test_select.cpp | 4 +--- 6 files changed, 5 insertions(+), 16 deletions(-) diff --git a/test_conformance/contractions/contractions.cpp b/test_conformance/contractions/contractions.cpp index dddebb40..474fd364 100644 --- a/test_conformance/contractions/contractions.cpp +++ b/test_conformance/contractions/contractions.cpp @@ -1,6 +1,6 @@ // // Copyright (c) 2017 The Khronos Group Inc. -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -434,7 +434,6 @@ static int ParseArgs( int argc, const char **argv ) gArgCount++; } } - vlog( "\n\nTest binary built %s %s\n", __DATE__, __TIME__ ); PrintArch(); diff --git a/test_conformance/conversions/test_conversions.cpp b/test_conformance/conversions/test_conversions.cpp index 788af99b..765d09ff 100644 --- a/test_conformance/conversions/test_conversions.cpp +++ b/test_conformance/conversions/test_conversions.cpp @@ -1,6 +1,6 @@ // // Copyright (c) 2017 The Khronos Group Inc. -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -484,8 +484,6 @@ static int ParseArgs( int argc, const char **argv ) vlog( "\n" ); - vlog( "Test binary built %s %s\n", __DATE__, __TIME__ ); - PrintArch(); if( gWimpyMode ) diff --git a/test_conformance/half/main.cpp b/test_conformance/half/main.cpp index 104f4616..6bc7db95 100644 --- a/test_conformance/half/main.cpp +++ b/test_conformance/half/main.cpp @@ -1,6 +1,6 @@ // // Copyright (c) 2017 The Khronos Group Inc. -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -221,7 +221,6 @@ static int ParseArgs( int argc, const char **argv ) gWimpyMode = 1; } - vlog( "Test binary built %s %s\n", __DATE__, __TIME__ ); PrintArch(); if( gWimpyMode ) { @@ -247,4 +246,3 @@ static void PrintUsage( void ) vlog("\t\t%s\n", test_list[i].name ); } } - diff --git a/test_conformance/math_brute_force/main.cpp b/test_conformance/math_brute_force/main.cpp index 2c81de87..59960a85 100644 --- a/test_conformance/math_brute_force/main.cpp +++ b/test_conformance/math_brute_force/main.cpp @@ -502,8 +502,6 @@ static int ParseArgs(int argc, const char **argv) gWimpyMode = 1; } - vlog("\nTest binary built %s %s\n", __DATE__, __TIME__); - PrintArch(); if (gWimpyMode) diff --git a/test_conformance/printf/test_printf.cpp b/test_conformance/printf/test_printf.cpp index a32ee4ea..d638cd46 100644 --- a/test_conformance/printf/test_printf.cpp +++ b/test_conformance/printf/test_printf.cpp @@ -1,6 +1,6 @@ // // Copyright (c) 2017 The Khronos Group Inc. -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -1030,8 +1030,6 @@ test_status InitCL( cl_device_id device ) return TEST_SKIP; } - log_info( "Test binary built %s %s\n", __DATE__, __TIME__ ); - gFd = acquireOutputStream(&err); if (err != 0) { diff --git a/test_conformance/select/test_select.cpp b/test_conformance/select/test_select.cpp index 27ee5ffd..972a53c6 100644 --- a/test_conformance/select/test_select.cpp +++ b/test_conformance/select/test_select.cpp @@ -1,6 +1,6 @@ // // Copyright (c) 2017 The Khronos Group Inc. -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -639,7 +639,6 @@ int main(int argc, const char* argv[]) s_wimpy_mode = true; } - log_info( "Test binary built %s %s\n", __DATE__, __TIME__ ); if (s_wimpy_mode) { log_info("\n"); log_info("*** WARNING: Testing in Wimpy mode! ***\n"); @@ -668,4 +667,3 @@ static void printUsage( void ) log_info( "\t%s\n", test_list[i].name ); } } - -- cgit v1.2.3 From 2012c6cadd4707d40a83da5fecd080de908d5973 Mon Sep 17 00:00:00 2001 From: Sven van Haastregt Date: Fri, 23 Sep 2022 18:08:10 +0100 Subject: [NFC] Fix typo in clang-format directive (#1512) Signed-off-by: Sven van Haastregt Signed-off-by: Sven van Haastregt --- test_conformance/pipes/test_pipe_limits.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test_conformance/pipes/test_pipe_limits.cpp b/test_conformance/pipes/test_pipe_limits.cpp index 7e979251..e1048f5f 100644 --- a/test_conformance/pipes/test_pipe_limits.cpp +++ b/test_conformance/pipes/test_pipe_limits.cpp @@ -69,7 +69,7 @@ void createKernelSourceCode(std::stringstream &stream, int num_pipes) } } )"; - // clang-format om + // clang-format on } stream << R"( } -- cgit v1.2.3 From c014122742c211f8febb49324b9e99302e26018c Mon Sep 17 00:00:00 2001 From: ellnor01 <51320439+ellnor01@users.noreply.github.com> Date: Mon, 26 Sep 2022 12:57:42 +0100 Subject: Creating common functions for image/kernel_read_write read tests (#1141) * Make InitFloatCoords suitable for all image types Contributes #616 * Create common functions neutral for image types Remove 3D specific code from common test_read_image so using it for other image types is simpler in following patches Contributes #616 * Removing unused code Tidying commented out or unnecessary code Contributes #616 Signed-off-by: Ellen Norris-Thompson * Restoring 'lod' variable name Contributes #616 * Default cases to handle unsupported image types Contributes #616 * Resolving build issues Contributes #616 * Fix formatting Contributes #616 * Using TEST_FAIL as an error code. Contributes #616 * Add static keyword, improve error handling Contributes #616 * Fix build errors with least disruption Contributes #616 Signed-off-by: Ellen Norris-Thompson --- .../images/kernel_read_write/test_common.cpp | 443 +++++++++++++-------- .../images/kernel_read_write/test_common.h | 139 +++---- 2 files changed, 331 insertions(+), 251 deletions(-) diff --git a/test_conformance/images/kernel_read_write/test_common.cpp b/test_conformance/images/kernel_read_write/test_common.cpp index 62bd4ab1..a22db195 100644 --- a/test_conformance/images/kernel_read_write/test_common.cpp +++ b/test_conformance/images/kernel_read_write/test_common.cpp @@ -34,122 +34,210 @@ cl_sampler create_sampler(cl_context context, image_sampler_data *sdata, bool te return sampler; } -void InitFloatCoordsCommon(image_descriptor *imageInfo, - image_sampler_data *imageSampler, float *xOffsets, - float *yOffsets, float *zOffsets, float xfract, - float yfract, float zfract, int normalized_coords, - MTdata d, int lod) +bool get_image_dimensions(image_descriptor *imageInfo, size_t &width, + size_t &height, size_t &depth) +{ + width = imageInfo->width; + height = 1; + depth = 1; + switch (imageInfo->type) + { + case CL_MEM_OBJECT_IMAGE1D: break; + case CL_MEM_OBJECT_IMAGE1D_ARRAY: height = imageInfo->arraySize; break; + case CL_MEM_OBJECT_IMAGE2D: height = imageInfo->height; break; + case CL_MEM_OBJECT_IMAGE2D_ARRAY: + height = imageInfo->height; + depth = imageInfo->arraySize; + break; + case CL_MEM_OBJECT_IMAGE3D: + height = imageInfo->height; + depth = imageInfo->depth; + break; + default: + log_error("ERROR: Test does not support image type"); + return TEST_FAIL; + } + return 0; +} + +static bool InitFloatCoordsCommon(image_descriptor *imageInfo, + image_sampler_data *imageSampler, + float *xOffsets, float *yOffsets, + float *zOffsets, float xfract, float yfract, + float zfract, int normalized_coords, MTdata d, + int lod) { size_t i = 0; - if (gDisableOffsets) + size_t width_loop, height_loop, depth_loop; + bool error = + get_image_dimensions(imageInfo, width_loop, height_loop, depth_loop); + if (!error) { - for (size_t z = 0; z < imageInfo->depth; z++) + if (gDisableOffsets) { - for (size_t y = 0; y < imageInfo->height; y++) + for (size_t z = 0; z < depth_loop; z++) { - for (size_t x = 0; x < imageInfo->width; x++, i++) + for (size_t y = 0; y < height_loop; y++) { - xOffsets[i] = (float)(xfract + (double)x); - yOffsets[i] = (float)(yfract + (double)y); - zOffsets[i] = (float)(zfract + (double)z); + for (size_t x = 0; x < width_loop; x++, i++) + { + xOffsets[i] = (float)(xfract + (double)x); + yOffsets[i] = (float)(yfract + (double)y); + zOffsets[i] = (float)(zfract + (double)z); + } } } } - } - else - { - for (size_t z = 0; z < imageInfo->depth; z++) + else { - for (size_t y = 0; y < imageInfo->height; y++) + for (size_t z = 0; z < depth_loop; z++) { - for (size_t x = 0; x < imageInfo->width; x++, i++) + for (size_t y = 0; y < height_loop; y++) { - xOffsets[i] = - (float)(xfract - + (double)((int)x - + random_in_range(-10, 10, d))); - yOffsets[i] = - (float)(yfract - + (double)((int)y - + random_in_range(-10, 10, d))); - zOffsets[i] = - (float)(zfract - + (double)((int)z - + random_in_range(-10, 10, d))); + for (size_t x = 0; x < width_loop; x++, i++) + { + xOffsets[i] = + (float)(xfract + + (double)((int)x + + random_in_range(-10, 10, d))); + yOffsets[i] = + (float)(yfract + + (double)((int)y + + random_in_range(-10, 10, d))); + zOffsets[i] = + (float)(zfract + + (double)((int)z + + random_in_range(-10, 10, d))); + } } } } - } - if (imageSampler->addressing_mode == CL_ADDRESS_NONE) - { - i = 0; - for (size_t z = 0; z < imageInfo->depth; z++) + if (imageSampler->addressing_mode == CL_ADDRESS_NONE) { - for (size_t y = 0; y < imageInfo->height; y++) + i = 0; + for (size_t z = 0; z < depth_loop; z++) { - for (size_t x = 0; x < imageInfo->width; x++, i++) + for (size_t y = 0; y < height_loop; y++) { - xOffsets[i] = (float)CLAMP((double)xOffsets[i], 0.0, - (double)imageInfo->width - 1.0); - yOffsets[i] = (float)CLAMP((double)yOffsets[i], 0.0, - (double)imageInfo->height - 1.0); - zOffsets[i] = (float)CLAMP((double)zOffsets[i], 0.0, - (double)imageInfo->depth - 1.0); + for (size_t x = 0; x < width_loop; x++, i++) + { + xOffsets[i] = (float)CLAMP((double)xOffsets[i], 0.0, + (double)width_loop - 1.0); + yOffsets[i] = (float)CLAMP((double)yOffsets[i], 0.0, + (double)height_loop - 1.0); + zOffsets[i] = (float)CLAMP((double)zOffsets[i], 0.0, + (double)depth_loop - 1.0); + } } } } - } - if (normalized_coords || gTestMipmaps) - { - i = 0; - if (lod == 0) + if (normalized_coords || gTestMipmaps) { - for (size_t z = 0; z < imageInfo->depth; z++) + i = 0; + if (lod == 0) { - for (size_t y = 0; y < imageInfo->height; y++) + for (size_t z = 0; z < depth_loop; z++) { - for (size_t x = 0; x < imageInfo->width; x++, i++) + for (size_t y = 0; y < height_loop; y++) { - xOffsets[i] = (float)((double)xOffsets[i] - / (double)imageInfo->width); - yOffsets[i] = (float)((double)yOffsets[i] - / (double)imageInfo->height); - zOffsets[i] = (float)((double)zOffsets[i] - / (double)imageInfo->depth); + for (size_t x = 0; x < width_loop; x++, i++) + { + xOffsets[i] = (float)((double)xOffsets[i] + / (double)width_loop); + if (imageInfo->type != CL_MEM_OBJECT_IMAGE1D_ARRAY) + { + yOffsets[i] = (float)((double)yOffsets[i] + / (double)height_loop); + } + if (imageInfo->type != CL_MEM_OBJECT_IMAGE2D_ARRAY) + { + zOffsets[i] = (float)((double)zOffsets[i] + / (double)depth_loop); + } + } } } } - } - else if (gTestMipmaps) - { - size_t width_lod, height_lod, depth_lod; - - width_lod = - (imageInfo->width >> lod) ? (imageInfo->width >> lod) : 1; - height_lod = - (imageInfo->height >> lod) ? (imageInfo->height >> lod) : 1; - depth_lod = - (imageInfo->depth >> lod) ? (imageInfo->depth >> lod) : 1; - - for (size_t z = 0; z < depth_lod; z++) + else if (gTestMipmaps) { - for (size_t y = 0; y < height_lod; y++) + size_t width_lod = + (width_loop >> lod) ? (width_loop >> lod) : 1; + size_t height_lod = height_loop; + size_t depth_lod = depth_loop; + if (imageInfo->type != CL_MEM_OBJECT_IMAGE1D_ARRAY) { - for (size_t x = 0; x < width_lod; x++, i++) + height_lod = + (height_loop >> lod) ? (height_loop >> lod) : 1; + } + if (imageInfo->type != CL_MEM_OBJECT_IMAGE2D_ARRAY) + { + depth_lod = (depth_loop >> lod) ? (depth_loop >> lod) : 1; + } + + for (size_t z = 0; z < depth_lod; z++) + { + for (size_t y = 0; y < height_lod; y++) { - xOffsets[i] = - (float)((double)xOffsets[i] / (double)width_lod); - yOffsets[i] = - (float)((double)yOffsets[i] / (double)height_lod); - zOffsets[i] = - (float)((double)zOffsets[i] / (double)depth_lod); + for (size_t x = 0; x < width_lod; x++, i++) + { + xOffsets[i] = (float)((double)xOffsets[i] + / (double)width_lod); + if (imageInfo->type != CL_MEM_OBJECT_IMAGE1D_ARRAY) + { + yOffsets[i] = (float)((double)yOffsets[i] + / (double)height_lod); + } + if (imageInfo->type != CL_MEM_OBJECT_IMAGE2D_ARRAY) + { + zOffsets[i] = (float)((double)zOffsets[i] + / (double)depth_lod); + } + } } } } } } + return error; +} + +cl_mem create_image_of_type(cl_context context, cl_mem_flags mem_flags, + image_descriptor *imageInfo, size_t row_pitch, + size_t slice_pitch, void *host_ptr, cl_int *error) +{ + cl_mem image; + switch (imageInfo->type) + { + case CL_MEM_OBJECT_IMAGE3D: + image = create_image_3d(context, mem_flags, imageInfo->format, + imageInfo->width, imageInfo->height, + imageInfo->depth, row_pitch, slice_pitch, + host_ptr, error); + break; + default: + log_error("Implementation is incomplete, only 3D images are " + "supported so far"); + return nullptr; + } + return image; +} + +static size_t get_image_num_pixels(image_descriptor *imageInfo, size_t width, + size_t height, size_t depth, + size_t array_size) +{ + size_t image_size; + switch (imageInfo->type) + { + case CL_MEM_OBJECT_IMAGE3D: image_size = width * height * depth; break; + default: + log_error("Implementation is incomplete, only 3D images are " + "supported so far"); + return 0; + } + return image_size; } int test_read_image(cl_context context, cl_command_queue queue, @@ -161,6 +249,17 @@ int test_read_image(cl_context context, cl_command_queue queue, size_t threads[3]; static int initHalf = 0; + size_t image_size = + get_image_num_pixels(imageInfo, imageInfo->width, imageInfo->height, + imageInfo->depth, imageInfo->arraySize); + test_assert_error(0 != image_size, "Invalid image size"); + size_t width_size, height_size, depth_size; + if (get_image_dimensions(imageInfo, width_size, height_size, depth_size)) + { + log_error("ERROR: invalid image dimensions"); + return CL_INVALID_VALUE; + } + cl_mem_flags image_read_write_flags = CL_MEM_READ_ONLY; clMemWrapper xOffsets, yOffsets, zOffsets, results; @@ -169,14 +268,11 @@ int test_read_image(cl_context context, cl_command_queue queue, // Create offset data BufferOwningPtr xOffsetValues( - malloc(sizeof(cl_float) * imageInfo->width * imageInfo->height - * imageInfo->depth)); + malloc(sizeof(cl_float) * image_size)); BufferOwningPtr yOffsetValues( - malloc(sizeof(cl_float) * imageInfo->width * imageInfo->height - * imageInfo->depth)); + malloc(sizeof(cl_float) * image_size)); BufferOwningPtr zOffsetValues( - malloc(sizeof(cl_float) * imageInfo->width * imageInfo->height - * imageInfo->depth)); + malloc(sizeof(cl_float) * image_size)); if (imageInfo->format->image_channel_data_type == CL_HALF_FLOAT) if (DetectFloatToHalfRoundingMode(queue)) return 1; @@ -207,26 +303,27 @@ int test_read_image(cl_context context, cl_command_queue queue, { generate_random_image_data(imageInfo, maxImageUseHostPtrBackingStore, d); - unprotImage = create_image_3d( + unprotImage = create_image_of_type( context, image_read_write_flags | CL_MEM_USE_HOST_PTR, - imageInfo->format, imageInfo->width, imageInfo->height, - imageInfo->depth, (gEnablePitch ? imageInfo->rowPitch : 0), + imageInfo, (gEnablePitch ? imageInfo->rowPitch : 0), (gEnablePitch ? imageInfo->slicePitch : 0), maxImageUseHostPtrBackingStore, &error); } else { - error = protImage.Create(context, image_read_write_flags, - imageInfo->format, imageInfo->width, - imageInfo->height, imageInfo->depth); + error = protImage.Create(context, imageInfo->type, + image_read_write_flags, imageInfo->format, + imageInfo->width, imageInfo->height, + imageInfo->depth, imageInfo->arraySize); } if (error != CL_SUCCESS) { - log_error("ERROR: Unable to create 3D image of size %d x %d x %d " + log_error("ERROR: Unable to create image of size %d x %d x %d x %d " "(pitch %d, %d ) (%s)", (int)imageInfo->width, (int)imageInfo->height, - (int)imageInfo->depth, (int)imageInfo->rowPitch, - (int)imageInfo->slicePitch, IGetErrorString(error)); + (int)imageInfo->depth, (int)imageInfo->arraySize, + (int)imageInfo->rowPitch, (int)imageInfo->slicePitch, + IGetErrorString(error)); return error; } if (gTestMaxImages) @@ -238,18 +335,18 @@ int test_read_image(cl_context context, cl_command_queue queue, { // Don't use clEnqueueWriteImage; just use copy host ptr to get the data // in - unprotImage = create_image_3d( - context, image_read_write_flags | CL_MEM_COPY_HOST_PTR, - imageInfo->format, imageInfo->width, imageInfo->height, - imageInfo->depth, (gEnablePitch ? imageInfo->rowPitch : 0), + unprotImage = create_image_of_type( + context, image_read_write_flags | CL_MEM_COPY_HOST_PTR, imageInfo, + (gEnablePitch ? imageInfo->rowPitch : 0), (gEnablePitch ? imageInfo->slicePitch : 0), imageValues, &error); if (error != CL_SUCCESS) { - log_error("ERROR: Unable to create 3D image of size %d x %d x %d " + log_error("ERROR: Unable to create image of size %d x %d x %d x %d " "(pitch %d, %d ) (%s)", (int)imageInfo->width, (int)imageInfo->height, - (int)imageInfo->depth, (int)imageInfo->rowPitch, - (int)imageInfo->slicePitch, IGetErrorString(error)); + (int)imageInfo->depth, (int)imageInfo->arraySize, + (int)imageInfo->rowPitch, (int)imageInfo->slicePitch, + IGetErrorString(error)); return error; } image = unprotImage; @@ -261,19 +358,19 @@ int test_read_image(cl_context context, cl_command_queue queue, // specified, so we just do the same thing either way if (!gTestMipmaps) { - unprotImage = create_image_3d( - context, image_read_write_flags | gMemFlagsToUse, - imageInfo->format, imageInfo->width, imageInfo->height, - imageInfo->depth, (gEnablePitch ? imageInfo->rowPitch : 0), + unprotImage = create_image_of_type( + context, image_read_write_flags | gMemFlagsToUse, imageInfo, + (gEnablePitch ? imageInfo->rowPitch : 0), (gEnablePitch ? imageInfo->slicePitch : 0), imageValues, &error); if (error != CL_SUCCESS) { - log_error("ERROR: Unable to create 3D image of size %d x %d x " - "%d (pitch %d, %d ) (%s)", + log_error("ERROR: Unable to create image of size %d x %d x " + "%d x %d (pitch %d, %d ) (%s)", (int)imageInfo->width, (int)imageInfo->height, - (int)imageInfo->depth, (int)imageInfo->rowPitch, - (int)imageInfo->slicePitch, IGetErrorString(error)); + (int)imageInfo->depth, (int)imageInfo->arraySize, + (int)imageInfo->rowPitch, (int)imageInfo->slicePitch, + IGetErrorString(error)); return error; } image = unprotImage; @@ -281,10 +378,11 @@ int test_read_image(cl_context context, cl_command_queue queue, else { cl_image_desc image_desc = { 0 }; - image_desc.image_type = CL_MEM_OBJECT_IMAGE3D; + image_desc.image_type = imageInfo->type; image_desc.image_width = imageInfo->width; image_desc.image_height = imageInfo->height; image_desc.image_depth = imageInfo->depth; + image_desc.image_array_size = imageInfo->arraySize; image_desc.num_mip_levels = imageInfo->num_mip_levels; @@ -293,23 +391,24 @@ int test_read_image(cl_context context, cl_command_queue queue, imageInfo->format, &image_desc, NULL, &error); if (error != CL_SUCCESS) { - log_error("ERROR: Unable to create %d level mipmapped 3D image " - "of size %d x %d x %d (pitch %d, %d ) (%s)", + log_error("ERROR: Unable to create %d level mipmapped image " + "of size %d x %d x %d x %d (pitch %d, %d ) (%s)", (int)imageInfo->num_mip_levels, (int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->depth, - (int)imageInfo->rowPitch, (int)imageInfo->slicePitch, - IGetErrorString(error)); + (int)imageInfo->arraySize, (int)imageInfo->rowPitch, + (int)imageInfo->slicePitch, IGetErrorString(error)); return error; } image = unprotImage; } } + test_assert_error(nullptr != image, "Image creation failed"); + if (gMemFlagsToUse != CL_MEM_COPY_HOST_PTR) { size_t origin[4] = { 0, 0, 0, 0 }; - size_t region[3] = { imageInfo->width, imageInfo->height, - imageInfo->depth }; + size_t region[3] = { width_size, height_size, depth_size }; if (gDebugTrace) log_info(" - Writing image...\n"); @@ -324,10 +423,10 @@ int test_read_image(cl_context context, cl_command_queue queue, if (error != CL_SUCCESS) { - log_error("ERROR: Unable to write to 3D image of size %d x %d " - "x %d \n", + log_error("ERROR: Unable to write to image of size %d x %d " + "x %d x %d\n", (int)imageInfo->width, (int)imageInfo->height, - (int)imageInfo->depth); + (int)imageInfo->depth, (int)imageInfo->arraySize); return error; } } @@ -339,17 +438,15 @@ int test_read_image(cl_context context, cl_command_queue queue, { origin[3] = i; error = clEnqueueWriteImage( - queue, image, CL_TRUE, origin, region, - /*gEnablePitch ? imageInfo->rowPitch :*/ 0, - /*gEnablePitch ? imageInfo->slicePitch :*/ 0, + queue, image, CL_TRUE, origin, region, 0, 0, ((char *)imageValues + nextLevelOffset), 0, NULL, NULL); if (error != CL_SUCCESS) { - log_error("ERROR: Unable to write to %d level mipmapped 3D " - "image of size %d x %d x %d\n", + log_error("ERROR: Unable to write to %d level mipmapped " + "image of size %d x %d x %d x %d\n", (int)imageInfo->num_mip_levels, (int)imageInfo->width, (int)imageInfo->height, - (int)imageInfo->depth); + (int)imageInfo->arraySize, (int)imageInfo->depth); return error; } nextLevelOffset += region[0] * region[1] * region[2] @@ -362,26 +459,21 @@ int test_read_image(cl_context context, cl_command_queue queue, } } - xOffsets = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, - sizeof(cl_float) * imageInfo->width - * imageInfo->height * imageInfo->depth, - xOffsetValues, &error); + xOffsets = + clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, + sizeof(cl_float) * image_size, xOffsetValues, &error); test_error(error, "Unable to create x offset buffer"); - yOffsets = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, - sizeof(cl_float) * imageInfo->width - * imageInfo->height * imageInfo->depth, - yOffsetValues, &error); + yOffsets = + clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, + sizeof(cl_float) * image_size, yOffsetValues, &error); test_error(error, "Unable to create y offset buffer"); - zOffsets = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, - sizeof(cl_float) * imageInfo->width - * imageInfo->height * imageInfo->depth, - zOffsetValues, &error); + zOffsets = + clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, + sizeof(cl_float) * image_size, zOffsetValues, &error); test_error(error, "Unable to create y offset buffer"); - results = - clCreateBuffer(context, CL_MEM_READ_WRITE, - get_explicit_type_size(outputType) * 4 * imageInfo->width - * imageInfo->height * imageInfo->depth, - NULL, &error); + results = clCreateBuffer( + context, CL_MEM_READ_WRITE, + get_explicit_type_size(outputType) * 4 * image_size, NULL, &error); test_error(error, "Unable to create result buffer"); // Create sampler to use @@ -444,16 +536,19 @@ int test_read_image(cl_context context, cl_command_queue queue, } int nextLevelOffset = 0; - size_t width_lod = imageInfo->width, height_lod = imageInfo->height, - depth_lod = imageInfo->depth; + size_t width_lod = width_size, height_lod = height_size, + depth_lod = depth_size; // Loop over all mipmap levels, if we are testing mipmapped images. for (int lod = 0; (gTestMipmaps && lod < imageInfo->num_mip_levels) || (!gTestMipmaps && lod < 1); lod++) { - size_t resultValuesSize = width_lod * height_lod * depth_lod - * get_explicit_type_size(outputType) * 4; + size_t image_lod_size = get_image_num_pixels( + imageInfo, width_lod, height_lod, depth_lod, imageInfo->arraySize); + test_assert_error(0 != image_lod_size, "Invalid image size"); + size_t resultValuesSize = + image_lod_size * get_explicit_type_size(outputType) * 4; BufferOwningPtr resultValues(malloc(resultValuesSize)); float lod_float = (float)lod; if (gTestMipmaps) @@ -469,30 +564,25 @@ int test_read_image(cl_context context, cl_command_queue queue, float offset = float_offsets[q % float_offset_count]; // Init the coordinates - InitFloatCoordsCommon(imageInfo, imageSampler, xOffsetValues, - yOffsetValues, zOffsetValues, - q >= float_offset_count ? -offset : offset, - q >= float_offset_count ? offset : -offset, - q >= float_offset_count ? -offset : offset, - imageSampler->normalized_coords, d, lod); - - error = - clEnqueueWriteBuffer(queue, xOffsets, CL_TRUE, 0, - sizeof(cl_float) * imageInfo->height - * imageInfo->width * imageInfo->depth, - xOffsetValues, 0, NULL, NULL); + error = InitFloatCoordsCommon( + imageInfo, imageSampler, xOffsetValues, yOffsetValues, + zOffsetValues, q >= float_offset_count ? -offset : offset, + q >= float_offset_count ? offset : -offset, + q >= float_offset_count ? -offset : offset, + imageSampler->normalized_coords, d, lod); + test_error(error, "Unable to initialise coordinates"); + + error = clEnqueueWriteBuffer(queue, xOffsets, CL_TRUE, 0, + sizeof(cl_float) * image_size, + xOffsetValues, 0, NULL, NULL); test_error(error, "Unable to write x offsets"); - error = - clEnqueueWriteBuffer(queue, yOffsets, CL_TRUE, 0, - sizeof(cl_float) * imageInfo->height - * imageInfo->width * imageInfo->depth, - yOffsetValues, 0, NULL, NULL); + error = clEnqueueWriteBuffer(queue, yOffsets, CL_TRUE, 0, + sizeof(cl_float) * image_size, + yOffsetValues, 0, NULL, NULL); test_error(error, "Unable to write y offsets"); - error = - clEnqueueWriteBuffer(queue, zOffsets, CL_TRUE, 0, - sizeof(cl_float) * imageInfo->height - * imageInfo->width * imageInfo->depth, - zOffsetValues, 0, NULL, NULL); + error = clEnqueueWriteBuffer(queue, zOffsets, CL_TRUE, 0, + sizeof(cl_float) * image_size, + zOffsetValues, 0, NULL, NULL); test_error(error, "Unable to write z offsets"); @@ -511,11 +601,10 @@ int test_read_image(cl_context context, cl_command_queue queue, test_error(error, "Unable to run kernel"); // Get results - error = clEnqueueReadBuffer(queue, results, CL_TRUE, 0, - width_lod * height_lod * depth_lod - * get_explicit_type_size(outputType) - * 4, - resultValues, 0, NULL, NULL); + error = clEnqueueReadBuffer( + queue, results, CL_TRUE, 0, + image_lod_size * get_explicit_type_size(outputType) * 4, + resultValues, 0, NULL, NULL); test_error(error, "Unable to read results from kernel"); if (gDebugTrace) log_info(" results read\n"); @@ -1540,8 +1629,14 @@ int test_read_image(cl_context context, cl_command_queue queue, nextLevelOffset += width_lod * height_lod * depth_lod * get_pixel_size(imageInfo->format); width_lod = (width_lod >> 1) ? (width_lod >> 1) : 1; - height_lod = (height_lod >> 1) ? (height_lod >> 1) : 1; - depth_lod = (depth_lod >> 1) ? (depth_lod >> 1) : 1; + if (imageInfo->type != CL_MEM_OBJECT_IMAGE1D_ARRAY) + { + height_lod = (height_lod >> 1) ? (height_lod >> 1) : 1; + } + if (imageInfo->type != CL_MEM_OBJECT_IMAGE2D_ARRAY) + { + depth_lod = (depth_lod >> 1) ? (depth_lod >> 1) : 1; + } } } diff --git a/test_conformance/images/kernel_read_write/test_common.h b/test_conformance/images/kernel_read_write/test_common.h index 656c41f4..fc95bee2 100644 --- a/test_conformance/images/kernel_read_write/test_common.h +++ b/test_conformance/images/kernel_read_write/test_common.h @@ -42,12 +42,8 @@ extern int test_read_image(cl_context context, cl_command_queue queue, bool useFloatCoords, ExplicitType outputType, MTdata d); -extern void InitFloatCoordsCommon(image_descriptor *imageInfo, - image_sampler_data *imageSampler, - float *xOffsets, float *yOffsets, - float *zOffsets, float xfract, float yfract, - float zfract, int normalized_coords, MTdata d, - int lod); +extern bool get_image_dimensions(image_descriptor *imageInfo, size_t &width, + size_t &height, size_t &depth); template int determine_validation_error_offset( @@ -63,8 +59,12 @@ int determine_validation_error_offset( bool clampingErr = false, clamped = false, otherClampingBug = false; int clampedX, clampedY, clampedZ; - size_t imageWidth = imageInfo->width, imageHeight = imageInfo->height, - imageDepth = imageInfo->depth; + size_t imageWidth, imageHeight, imageDepth; + if (get_image_dimensions(imageInfo, imageWidth, imageHeight, imageDepth)) + { + log_error("ERROR: invalid image dimensions"); + return TEST_FAIL; + } clamped = get_integer_coords_offset(x, y, z, xAddressOffset, yAddressOffset, zAddressOffset, imageWidth, imageHeight, @@ -147,82 +147,67 @@ int determine_validation_error_offset( } if (!clampingErr) { - /* if( clamped && ( (int)x + (int)xOffsetValues[ j ] < 0 || - (int)y + (int)yOffsetValues[ j ] < 0 ) ) - { - log_error( "NEGATIVE COORDINATE ERROR\n" ); - return -1; - } - */ - if (true) // gExtraValidateInfo ) + if (printAsFloat) { - if (printAsFloat) - { - log_error("Sample %ld: coord {%f(%a),%f(%a),%f(%a)} did not " - "validate!\n\tExpected (%g,%g,%g,%g),\n\t got " - "(%g,%g,%g,%g), error of %g\n", - j, x, x, y, y, z, z, (float)expected[0], - (float)expected[1], (float)expected[2], - (float)expected[3], (float)resultPtr[0], - (float)resultPtr[1], (float)resultPtr[2], - (float)resultPtr[3], error); - } - else - { - log_error("Sample %ld: coord {%f(%a),%f(%a),%f(%a)} did not " - "validate!\n\tExpected (%x,%x,%x,%x),\n\t got " - "(%x,%x,%x,%x)\n", - j, x, x, y, y, z, z, (int)expected[0], - (int)expected[1], (int)expected[2], (int)expected[3], - (int)resultPtr[0], (int)resultPtr[1], - (int)resultPtr[2], (int)resultPtr[3]); - } - log_error( - "Integer coords resolve to %d,%d,%d with img size %d,%d,%d\n", - clampedX, clampedY, clampedZ, (int)imageWidth, (int)imageHeight, - (int)imageDepth); + log_error("Sample %ld: coord {%f(%a),%f(%a),%f(%a)} did not " + "validate!\n\tExpected (%g,%g,%g,%g),\n\t got " + "(%g,%g,%g,%g), error of %g\n", + j, x, x, y, y, z, z, (float)expected[0], + (float)expected[1], (float)expected[2], + (float)expected[3], (float)resultPtr[0], + (float)resultPtr[1], (float)resultPtr[2], + (float)resultPtr[3], error); + } + else + { + log_error("Sample %ld: coord {%f(%a),%f(%a),%f(%a)} did not " + "validate!\n\tExpected (%x,%x,%x,%x),\n\t got " + "(%x,%x,%x,%x)\n", + j, x, x, y, y, z, z, (int)expected[0], (int)expected[1], + (int)expected[2], (int)expected[3], (int)resultPtr[0], + (int)resultPtr[1], (int)resultPtr[2], (int)resultPtr[3]); + } + log_error( + "Integer coords resolve to %d,%d,%d with img size %d,%d,%d\n", + clampedX, clampedY, clampedZ, (int)imageWidth, (int)imageHeight, + (int)imageDepth); - if (printAsFloat && gExtraValidateInfo) + if (printAsFloat && gExtraValidateInfo) + { + log_error("\nNearby values:\n"); + for (int zOff = -1; zOff <= 1; zOff++) { - log_error("\nNearby values:\n"); - for (int zOff = -1; zOff <= 1; zOff++) + for (int yOff = -1; yOff <= 1; yOff++) { - for (int yOff = -1; yOff <= 1; yOff++) - { - float top[4], real[4], bot[4]; - read_image_pixel_float(imagePtr, imageInfo, - clampedX - 1, clampedY + yOff, - clampedZ + zOff, top); - read_image_pixel_float(imagePtr, imageInfo, clampedX, - clampedY + yOff, clampedZ + zOff, - real); - read_image_pixel_float(imagePtr, imageInfo, - clampedX + 1, clampedY + yOff, - clampedZ + zOff, bot); - log_error("\t(%g,%g,%g,%g)", top[0], top[1], top[2], - top[3]); - log_error(" (%g,%g,%g,%g)", real[0], real[1], real[2], - real[3]); - log_error(" (%g,%g,%g,%g)\n", bot[0], bot[1], bot[2], - bot[3]); - } + float top[4], real[4], bot[4]; + read_image_pixel_float(imagePtr, imageInfo, clampedX - 1, + clampedY + yOff, clampedZ + zOff, + top); + read_image_pixel_float(imagePtr, imageInfo, clampedX, + clampedY + yOff, clampedZ + zOff, + real); + read_image_pixel_float(imagePtr, imageInfo, clampedX + 1, + clampedY + yOff, clampedZ + zOff, + bot); + log_error("\t(%g,%g,%g,%g)", top[0], top[1], top[2], + top[3]); + log_error(" (%g,%g,%g,%g)", real[0], real[1], real[2], + real[3]); + log_error(" (%g,%g,%g,%g)\n", bot[0], bot[1], bot[2], + bot[3]); } } - // } - // else - // log_error( "\n" ); - if (imageSampler->filter_mode != CL_FILTER_LINEAR) - { - if (found) - log_error( - "\tValue really found in image at %d,%d,%d (%s)\n", - actualX, actualY, actualZ, - (found > 1) ? "NOT unique!!" : "unique"); - else - log_error("\tValue not actually found in image\n"); - } - log_error("\n"); } + if (imageSampler->filter_mode != CL_FILTER_LINEAR) + { + if (found) + log_error("\tValue really found in image at %d,%d,%d (%s)\n", + actualX, actualY, actualZ, + (found > 1) ? "NOT unique!!" : "unique"); + else + log_error("\tValue not actually found in image\n"); + } + log_error("\n"); numClamped = -1; // We force the clamped counter to never work if ((--numTries) == 0) return -1; -- cgit v1.2.3 From 30500fba06973115cab6333d96d2b75d53476daa Mon Sep 17 00:00:00 2001 From: Sreelakshmi Haridas Maruthur Date: Tue, 27 Sep 2022 10:28:57 -0600 Subject: SVM: Fix memory allocation size. (#1514) * SVM: Fix memory allocation size. 9ad48998 generally made memory allocation and mapping consistent with a size of size_t. Apply that fix to the final two allocations. * check-format fixes Co-authored-by: spauls --- test_conformance/SVM/test_cross_buffer_pointers.cpp | 3 ++- test_conformance/SVM/test_shared_sub_buffers.cpp | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/test_conformance/SVM/test_cross_buffer_pointers.cpp b/test_conformance/SVM/test_cross_buffer_pointers.cpp index c1caebb9..2baa7ad7 100644 --- a/test_conformance/SVM/test_cross_buffer_pointers.cpp +++ b/test_conformance/SVM/test_cross_buffer_pointers.cpp @@ -162,7 +162,8 @@ int test_svm_cross_buffer_pointers_coarse_grain(cl_device_id deviceID, cl_contex test_error(error, "clCreateBuffer failed."); // this buffer holds the index into the nodes buffer that is used for node allocation - clMemWrapper allocator = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_int), NULL, &error); + clMemWrapper allocator = clCreateBuffer(context, CL_MEM_READ_WRITE, + sizeof(size_t), NULL, &error); test_error(error, "clCreateBuffer failed."); // this buffer holds the count of correct nodes which is computed by the verify kernel. diff --git a/test_conformance/SVM/test_shared_sub_buffers.cpp b/test_conformance/SVM/test_shared_sub_buffers.cpp index a79484c9..2532886e 100644 --- a/test_conformance/SVM/test_shared_sub_buffers.cpp +++ b/test_conformance/SVM/test_shared_sub_buffers.cpp @@ -182,7 +182,8 @@ int test_svm_shared_sub_buffers(cl_device_id deviceID, cl_context context2, cl_c // this buffer holds the index into the nodes buffer that is used for node allocation - clMemWrapper allocator = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_int), NULL, &error); + clMemWrapper allocator = clCreateBuffer(context, CL_MEM_READ_WRITE, + sizeof(size_t), NULL, &error); test_error(error, "clCreateBuffer failed."); // this buffer holds the count of correct nodes which is computed by the verify kernel. -- cgit v1.2.3 From 9b21e9f06b88e7ce96b76b0e94c6dfef644ac1ee Mon Sep 17 00:00:00 2001 From: Sven van Haastregt Date: Tue, 27 Sep 2022 17:29:58 +0100 Subject: [NFC] Avoid mixing signed and unsigned in subhelpers run (#1505) Fix a `-Wsign-compare` warning in the `run()` function, which resulted in many repeated warnings when compiling with `-Wall` due to the many template instantiations. Both `clGetKernelSubGroupInfo` queries return a `size_t`, so it is unclear why the results of these queries were being cast to `int`. The `dynsc` uses don't seem to work with negative values, so make the field unsigned. Signed-off-by: Sven van Haastregt Signed-off-by: Sven van Haastregt --- test_conformance/subgroups/subhelpers.h | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/test_conformance/subgroups/subhelpers.h b/test_conformance/subgroups/subhelpers.h index 0944ffb3..0a2c3903 100644 --- a/test_conformance/subgroups/subhelpers.h +++ b/test_conformance/subgroups/subhelpers.h @@ -72,7 +72,7 @@ struct WorkGroupParams size_t subgroup_size; cl_uint cluster_size; bs128 work_items_mask; - int dynsc; + size_t dynsc; bool use_core_subgroups; std::vector all_work_item_masks; int divergence_mask_arg; @@ -1495,7 +1495,7 @@ template struct test { size_t tmp; cl_int error; - int subgroup_size, num_subgroups; + size_t subgroup_size, num_subgroups; size_t global = test_params.global_workgroup_size; size_t local = test_params.local_workgroup_size; clProgramWrapper program; @@ -1580,7 +1580,7 @@ template struct test return TEST_FAIL; } - subgroup_size = (int)tmp; + subgroup_size = tmp; error = clGetKernelSubGroupInfo_ptr( kernel, device, CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE, @@ -1593,11 +1593,11 @@ template struct test return TEST_FAIL; } - num_subgroups = (int)tmp; + num_subgroups = tmp; // Make sure the number of sub groups is what we expect if (num_subgroups != (local + subgroup_size - 1) / subgroup_size) { - log_error("ERROR: unexpected number of subgroups (%d) returned\n", + log_error("ERROR: unexpected number of subgroups (%zu) returned\n", num_subgroups); return TEST_FAIL; } @@ -1606,13 +1606,12 @@ template struct test std::vector odata; size_t input_array_size = global; size_t output_array_size = global; - int dynscl = test_params.dynsc; + size_t dynscl = test_params.dynsc; if (dynscl != 0) { - input_array_size = - (int)global / (int)local * num_subgroups * dynscl; - output_array_size = (int)global / (int)local * dynscl; + input_array_size = global / local * num_subgroups * dynscl; + output_array_size = global / local * dynscl; } idata.resize(input_array_size); -- cgit v1.2.3 From 9bf6486352bf4c87a49ecb212ae71f96c293c26f Mon Sep 17 00:00:00 2001 From: Sven van Haastregt Date: Tue, 27 Sep 2022 17:32:23 +0100 Subject: [NFC] clang-format test_atomics (#1516) Add some clang-format off/on comments to keep lists and kernel code readable. Signed-off-by: Sven van Haastregt Signed-off-by: Sven van Haastregt --- test_conformance/atomics/main.cpp | 7 +- test_conformance/atomics/procs.h | 49 +- test_conformance/atomics/testBase.h | 5 +- test_conformance/atomics/test_atomics.cpp | 1255 ++++++++++++++--------- test_conformance/atomics/test_indexed_cases.cpp | 507 +++++---- 5 files changed, 1143 insertions(+), 680 deletions(-) diff --git a/test_conformance/atomics/main.cpp b/test_conformance/atomics/main.cpp index afdea376..987d6bfa 100644 --- a/test_conformance/atomics/main.cpp +++ b/test_conformance/atomics/main.cpp @@ -1,6 +1,6 @@ // // Copyright (c) 2017 The Khronos Group Inc. -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -24,6 +24,7 @@ #include #endif +// clang-format off test_definition test_list[] = { ADD_TEST( atomic_add ), ADD_TEST( atomic_sub ), @@ -40,11 +41,11 @@ test_definition test_list[] = { ADD_TEST( atomic_add_index ), ADD_TEST( atomic_add_index_bin ), }; +// clang-format on -const int test_num = ARRAY_SIZE( test_list ); +const int test_num = ARRAY_SIZE(test_list); int main(int argc, const char *argv[]) { return runTestHarness(argc, argv, test_num, test_list, false, 0); } - diff --git a/test_conformance/atomics/procs.h b/test_conformance/atomics/procs.h index bf053f25..fa85aad5 100644 --- a/test_conformance/atomics/procs.h +++ b/test_conformance/atomics/procs.h @@ -1,6 +1,6 @@ // // Copyright (c) 2017 The Khronos Group Inc. -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -18,22 +18,35 @@ #include "harness/threadTesting.h" #include "harness/typeWrappers.h" -extern int create_program_and_kernel(const char *source, const char *kernel_name, cl_program *program_ret, cl_kernel *kernel_ret); - -extern int test_atomic_add(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); -extern int test_atomic_sub(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); -extern int test_atomic_xchg(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); -extern int test_atomic_min(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); -extern int test_atomic_max(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); -extern int test_atomic_inc(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); -extern int test_atomic_dec(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); -extern int test_atomic_cmpxchg(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); -extern int test_atomic_and(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); -extern int test_atomic_or(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); -extern int test_atomic_xor(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); - -extern int test_atomic_add_index(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); -extern int test_atomic_add_index_bin(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); - +extern int create_program_and_kernel(const char *source, + const char *kernel_name, + cl_program *program_ret, + cl_kernel *kernel_ret); +extern int test_atomic_add(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements); +extern int test_atomic_sub(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements); +extern int test_atomic_xchg(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements); +extern int test_atomic_min(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements); +extern int test_atomic_max(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements); +extern int test_atomic_inc(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements); +extern int test_atomic_dec(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements); +extern int test_atomic_cmpxchg(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements); +extern int test_atomic_and(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements); +extern int test_atomic_or(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements); +extern int test_atomic_xor(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements); +extern int test_atomic_add_index(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements); +extern int test_atomic_add_index_bin(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements); diff --git a/test_conformance/atomics/testBase.h b/test_conformance/atomics/testBase.h index ba67d140..22bce1d2 100644 --- a/test_conformance/atomics/testBase.h +++ b/test_conformance/atomics/testBase.h @@ -1,6 +1,6 @@ // // Copyright (c) 2017 The Khronos Group Inc. -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -26,6 +26,3 @@ #include "procs.h" #endif // _testBase_h - - - diff --git a/test_conformance/atomics/test_atomics.cpp b/test_conformance/atomics/test_atomics.cpp index c0c01363..31d08500 100644 --- a/test_conformance/atomics/test_atomics.cpp +++ b/test_conformance/atomics/test_atomics.cpp @@ -1,6 +1,6 @@ // // Copyright (c) 2017 The Khronos Group Inc. -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -22,7 +22,7 @@ #define INT_TEST_VALUE 402258822 #define LONG_TEST_VALUE 515154531254381446LL - +// clang-format off const char *atomic_global_pattern[] = { "__kernel void test_atomic_fn(volatile __global %s *destMemory, __global %s *oldValues)\n" "{\n" @@ -36,19 +36,20 @@ const char *atomic_local_pattern[] = { "__kernel void test_atomic_fn(__global %s *finalDest, __global %s *oldValues, volatile __local %s *destMemory, int numDestItems )\n" "{\n" " int tid = get_global_id(0);\n" - " int dstItemIdx;\n" + " int dstItemIdx;\n" "\n" " // Everybody does the following line(s), but it all has the same result. We still need to ensure we sync before the atomic op, though\n" - " for( dstItemIdx = 0; dstItemIdx < numDestItems; dstItemIdx++ )\n" + " for( dstItemIdx = 0; dstItemIdx < numDestItems; dstItemIdx++ )\n" " destMemory[ dstItemIdx ] = finalDest[ dstItemIdx ];\n" " barrier( CLK_LOCAL_MEM_FENCE );\n" "\n" , " barrier( CLK_LOCAL_MEM_FENCE );\n" " // Finally, write out the last value. Again, we're synced, so everyone will be writing the same value\n" - " for( dstItemIdx = 0; dstItemIdx < numDestItems; dstItemIdx++ )\n" + " for( dstItemIdx = 0; dstItemIdx < numDestItems; dstItemIdx++ )\n" " finalDest[ dstItemIdx ] = destMemory[ dstItemIdx ];\n" "}\n" }; +// clang-format on #define TEST_COUNT 128 * 1024 @@ -56,41 +57,48 @@ const char *atomic_local_pattern[] = { struct TestFns { - cl_int mIntStartValue; - cl_long mLongStartValue; + cl_int mIntStartValue; + cl_long mLongStartValue; - size_t (*NumResultsFn)( size_t threadSize, ExplicitType dataType ); + size_t (*NumResultsFn)(size_t threadSize, ExplicitType dataType); // Integer versions - cl_int (*ExpectedValueIntFn)( size_t size, cl_int *startRefValues, size_t whichDestValue ); - void (*GenerateRefsIntFn)( size_t size, cl_int *startRefValues, MTdata d ); - bool (*VerifyRefsIntFn)( size_t size, cl_int *refValues, cl_int finalValue ); + cl_int (*ExpectedValueIntFn)(size_t size, cl_int *startRefValues, + size_t whichDestValue); + void (*GenerateRefsIntFn)(size_t size, cl_int *startRefValues, MTdata d); + bool (*VerifyRefsIntFn)(size_t size, cl_int *refValues, cl_int finalValue); // Long versions - cl_long (*ExpectedValueLongFn)( size_t size, cl_long *startRefValues, size_t whichDestValue ); - void (*GenerateRefsLongFn)( size_t size, cl_long *startRefValues, MTdata d ); - bool (*VerifyRefsLongFn)( size_t size, cl_long *refValues, cl_long finalValue ); + cl_long (*ExpectedValueLongFn)(size_t size, cl_long *startRefValues, + size_t whichDestValue); + void (*GenerateRefsLongFn)(size_t size, cl_long *startRefValues, MTdata d); + bool (*VerifyRefsLongFn)(size_t size, cl_long *refValues, + cl_long finalValue); // Float versions - cl_float (*ExpectedValueFloatFn)( size_t size, cl_float *startRefValues, size_t whichDestValue ); - void (*GenerateRefsFloatFn)( size_t size, cl_float *startRefValues, MTdata d ); - bool (*VerifyRefsFloatFn)( size_t size, cl_float *refValues, cl_float finalValue ); + cl_float (*ExpectedValueFloatFn)(size_t size, cl_float *startRefValues, + size_t whichDestValue); + void (*GenerateRefsFloatFn)(size_t size, cl_float *startRefValues, + MTdata d); + bool (*VerifyRefsFloatFn)(size_t size, cl_float *refValues, + cl_float finalValue); }; -bool check_atomic_support( cl_device_id device, bool extended, bool isLocal, ExplicitType dataType ) +bool check_atomic_support(cl_device_id device, bool extended, bool isLocal, + ExplicitType dataType) { + // clang-format off const char *extensionNames[8] = { "cl_khr_global_int32_base_atomics", "cl_khr_global_int32_extended_atomics", "cl_khr_local_int32_base_atomics", "cl_khr_local_int32_extended_atomics", "cl_khr_int64_base_atomics", "cl_khr_int64_extended_atomics", "cl_khr_int64_base_atomics", "cl_khr_int64_extended_atomics" // this line intended to be the same as the last one }; + // clang-format on size_t index = 0; - if( extended ) - index += 1; - if( isLocal ) - index += 2; + if (extended) index += 1; + if (isLocal) index += 2; Version version = get_device_cl_version(device); @@ -98,26 +106,28 @@ bool check_atomic_support( cl_device_id device, bool extended, bool isLocal, Exp { case kInt: case kUInt: - if( version >= Version(1,1) ) - return 1; + if (version >= Version(1, 1)) return 1; break; case kLong: - case kULong: - index += 4; - break; - case kFloat: // this has to stay separate since the float atomics arent in the 1.0 extensions - return version >= Version(1,1); + case kULong: index += 4; break; + case kFloat: // this has to stay separate since the float atomics arent + // in the 1.0 extensions + return version >= Version(1, 1); default: - log_error( "ERROR: Unsupported data type (%d) in check_atomic_support\n", dataType ); + log_error( + "ERROR: Unsupported data type (%d) in check_atomic_support\n", + dataType); return 0; } - return is_extension_available( device, extensionNames[index] ); + return is_extension_available(device, extensionNames[index]); } -int test_atomic_function(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, const char *programCore, - TestFns testFns, - bool extended, bool isLocal, ExplicitType dataType, bool matchGroupSize ) +int test_atomic_function(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements, + const char *programCore, TestFns testFns, + bool extended, bool isLocal, ExplicitType dataType, + bool matchGroupSize) { clProgramWrapper program; clKernelWrapper kernel; @@ -127,55 +137,65 @@ int test_atomic_function(cl_device_id deviceID, cl_context context, cl_command_q void *refValues, *startRefValues; size_t threadSize, groupSize; const char *programLines[4]; - char pragma[ 512 ]; - char programHeader[ 512 ]; + char pragma[512]; + char programHeader[512]; MTdata d; - size_t typeSize = get_explicit_type_size( dataType ); + size_t typeSize = get_explicit_type_size(dataType); // Verify we can run first - bool isUnsigned = ( dataType == kULong ) || ( dataType == kUInt ); - if( !check_atomic_support( deviceID, extended, isLocal, dataType ) ) + bool isUnsigned = (dataType == kULong) || (dataType == kUInt); + if (!check_atomic_support(deviceID, extended, isLocal, dataType)) { - // Only print for the signed (unsigned comes right after, and if signed isn't supported, unsigned isn't either) - if( dataType == kFloat ) - log_info( "\t%s float not supported\n", isLocal ? "Local" : "Global" ); - else if( !isUnsigned ) - log_info( "\t%s %sint%d not supported\n", isLocal ? "Local" : "Global", isUnsigned ? "u" : "", (int)typeSize * 8 ); + // Only print for the signed (unsigned comes right after, and if signed + // isn't supported, unsigned isn't either) + if (dataType == kFloat) + log_info("\t%s float not supported\n", + isLocal ? "Local" : "Global"); + else if (!isUnsigned) + log_info("\t%s %sint%d not supported\n", + isLocal ? "Local" : "Global", isUnsigned ? "u" : "", + (int)typeSize * 8); // Since we don't support the operation, they implicitly pass return 0; } else { - if( dataType == kFloat ) - log_info( "\t%s float%s...", isLocal ? "local" : "global", isLocal ? " " : "" ); + if (dataType == kFloat) + log_info("\t%s float%s...", isLocal ? "local" : "global", + isLocal ? " " : ""); else - log_info( "\t%s %sint%d%s%s...", isLocal ? "local" : "global", isUnsigned ? "u" : "", - (int)typeSize * 8, isUnsigned ? "" : " ", isLocal ? " " : "" ); + log_info("\t%s %sint%d%s%s...", isLocal ? "local" : "global", + isUnsigned ? "u" : "", (int)typeSize * 8, + isUnsigned ? "" : " ", isLocal ? " " : ""); } //// Set up the kernel code // Create the pragma line for this kernel - bool isLong = ( dataType == kLong || dataType == kULong ); - sprintf( pragma, "#pragma OPENCL EXTENSION cl_khr%s_int%s_%s_atomics : enable\n", - isLong ? "" : (isLocal ? "_local" : "_global"), isLong ? "64" : "32", - extended ? "extended" : "base" ); + bool isLong = (dataType == kLong || dataType == kULong); + sprintf(pragma, + "#pragma OPENCL EXTENSION cl_khr%s_int%s_%s_atomics : enable\n", + isLong ? "" : (isLocal ? "_local" : "_global"), + isLong ? "64" : "32", extended ? "extended" : "base"); // Now create the program header - const char *typeName = get_explicit_type_name( dataType ); - if( isLocal ) - sprintf( programHeader, atomic_local_pattern[ 0 ], typeName, typeName, typeName ); + const char *typeName = get_explicit_type_name(dataType); + if (isLocal) + sprintf(programHeader, atomic_local_pattern[0], typeName, typeName, + typeName); else - sprintf( programHeader, atomic_global_pattern[ 0 ], typeName, typeName ); + sprintf(programHeader, atomic_global_pattern[0], typeName, typeName); // Set up our entire program now - programLines[ 0 ] = pragma; - programLines[ 1 ] = programHeader; - programLines[ 2 ] = programCore; - programLines[ 3 ] = ( isLocal ) ? atomic_local_pattern[ 1 ] : atomic_global_pattern[ 1 ]; - - if( create_single_kernel_helper( context, &program, &kernel, 4, programLines, "test_atomic_fn" ) ) + programLines[0] = pragma; + programLines[1] = programHeader; + programLines[2] = programCore; + programLines[3] = + (isLocal) ? atomic_local_pattern[1] : atomic_global_pattern[1]; + + if (create_single_kernel_helper(context, &program, &kernel, 4, programLines, + "test_atomic_fn")) { return -1; } @@ -183,29 +203,37 @@ int test_atomic_function(cl_device_id deviceID, cl_context context, cl_command_q //// Set up to actually run threadSize = num_elements; - error = get_max_common_work_group_size( context, kernel, threadSize, &groupSize ); - test_error( error, "Unable to get thread group max size" ); + error = + get_max_common_work_group_size(context, kernel, threadSize, &groupSize); + test_error(error, "Unable to get thread group max size"); - if( matchGroupSize ) + if (matchGroupSize) // HACK because xchg and cmpxchg apparently are limited by hardware threadSize = groupSize; - if( isLocal ) + if (isLocal) { - size_t maxSizes[3] = {0, 0, 0}; - error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_WORK_ITEM_SIZES, 3*sizeof(size_t), maxSizes, 0); - test_error( error, "Unable to obtain max work item sizes for the device" ); + size_t maxSizes[3] = { 0, 0, 0 }; + error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_WORK_ITEM_SIZES, + 3 * sizeof(size_t), maxSizes, 0); + test_error(error, + "Unable to obtain max work item sizes for the device"); size_t workSize; - error = clGetKernelWorkGroupInfo( kernel, deviceID, CL_KERNEL_WORK_GROUP_SIZE, sizeof( workSize ), &workSize, NULL ); - test_error( error, "Unable to obtain max work group size for device and kernel combo" ); + error = clGetKernelWorkGroupInfo(kernel, deviceID, + CL_KERNEL_WORK_GROUP_SIZE, + sizeof(workSize), &workSize, NULL); + test_error( + error, + "Unable to obtain max work group size for device and kernel combo"); // Limit workSize to avoid extremely large local buffer size and slow // run. if (workSize > 65536) workSize = 65536; - // "workSize" is limited to that of the first dimension as only a 1DRange is executed. - if( maxSizes[0] < workSize ) + // "workSize" is limited to that of the first dimension as only a + // 1DRange is executed. + if (maxSizes[0] < workSize) { workSize = maxSizes[0]; } @@ -214,38 +242,43 @@ int test_atomic_function(cl_device_id deviceID, cl_context context, cl_command_q } - log_info( "\t(thread count %d, group size %d)\n", (int)threadSize, (int)groupSize ); + log_info("\t(thread count %d, group size %d)\n", (int)threadSize, + (int)groupSize); - refValues = (cl_int *)malloc( typeSize * threadSize ); + refValues = (cl_int *)malloc(typeSize * threadSize); - if( testFns.GenerateRefsIntFn != NULL ) + if (testFns.GenerateRefsIntFn != NULL) { // We have a ref generator provided - d = init_genrand( gRandomSeed ); - startRefValues = malloc( typeSize * threadSize ); - if( typeSize == 4 ) - testFns.GenerateRefsIntFn( threadSize, (cl_int *)startRefValues, d ); + d = init_genrand(gRandomSeed); + startRefValues = malloc(typeSize * threadSize); + if (typeSize == 4) + testFns.GenerateRefsIntFn(threadSize, (cl_int *)startRefValues, d); else - testFns.GenerateRefsLongFn( threadSize, (cl_long *)startRefValues, d ); + testFns.GenerateRefsLongFn(threadSize, (cl_long *)startRefValues, + d); free_mtdata(d); d = NULL; } else startRefValues = NULL; - // If we're given a num_results function, we need to determine how many result objects we need. If - // we don't have it, we assume it's just 1 - size_t numDestItems = ( testFns.NumResultsFn != NULL ) ? testFns.NumResultsFn( threadSize, dataType ) : 1; + // If we're given a num_results function, we need to determine how many + // result objects we need. If we don't have it, we assume it's just 1 + size_t numDestItems = (testFns.NumResultsFn != NULL) + ? testFns.NumResultsFn(threadSize, dataType) + : 1; - char * destItems = new char[ typeSize * numDestItems ]; - if( destItems == NULL ) + char *destItems = new char[typeSize * numDestItems]; + if (destItems == NULL) { - log_error( "ERROR: Unable to allocate memory!\n" ); + log_error("ERROR: Unable to allocate memory!\n"); return -1; } - void * startValue = ( typeSize == 4 ) ? (void *)&testFns.mIntStartValue : (void *)&testFns.mLongStartValue; - for( size_t i = 0; i < numDestItems; i++ ) - memcpy( destItems + i * typeSize, startValue, typeSize ); + void *startValue = (typeSize == 4) ? (void *)&testFns.mIntStartValue + : (void *)&testFns.mLongStartValue; + for (size_t i = 0; i < numDestItems; i++) + memcpy(destItems + i * typeSize, startValue, typeSize); streams[0] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, typeSize * numDestItems, destItems, NULL); @@ -265,82 +298,96 @@ int test_atomic_function(cl_device_id deviceID, cl_context context, cl_command_q } /* Set the arguments */ - error = clSetKernelArg( kernel, 0, sizeof( streams[0] ), &streams[0] ); - test_error( error, "Unable to set indexed kernel arguments" ); - error = clSetKernelArg( kernel, 1, sizeof( streams[1] ), &streams[1] ); - test_error( error, "Unable to set indexed kernel arguments" ); + error = clSetKernelArg(kernel, 0, sizeof(streams[0]), &streams[0]); + test_error(error, "Unable to set indexed kernel arguments"); + error = clSetKernelArg(kernel, 1, sizeof(streams[1]), &streams[1]); + test_error(error, "Unable to set indexed kernel arguments"); - if( isLocal ) + if (isLocal) { - error = clSetKernelArg( kernel, 2, typeSize * numDestItems, NULL ); - test_error( error, "Unable to set indexed local kernel argument" ); + error = clSetKernelArg(kernel, 2, typeSize * numDestItems, NULL); + test_error(error, "Unable to set indexed local kernel argument"); cl_int numDestItemsInt = (cl_int)numDestItems; - error = clSetKernelArg( kernel, 3, sizeof( cl_int ), &numDestItemsInt ); - test_error( error, "Unable to set indexed kernel argument" ); + error = clSetKernelArg(kernel, 3, sizeof(cl_int), &numDestItemsInt); + test_error(error, "Unable to set indexed kernel argument"); } /* Run the kernel */ threads[0] = threadSize; - error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, &groupSize, 0, NULL, NULL ); - test_error( error, "Unable to execute test kernel" ); - - error = clEnqueueReadBuffer( queue, streams[0], true, 0, typeSize * numDestItems, destItems, 0, NULL, NULL ); - test_error( error, "Unable to read result value!" ); - - error = clEnqueueReadBuffer( queue, streams[1], true, 0, typeSize * threadSize, refValues, 0, NULL, NULL ); - test_error( error, "Unable to read reference values!" ); - - // If we have an expectedFn, then we need to generate a final value to compare against. If we don't - // have one, it's because we're comparing ref values only - if( testFns.ExpectedValueIntFn != NULL ) + error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threads, &groupSize, + 0, NULL, NULL); + test_error(error, "Unable to execute test kernel"); + + error = + clEnqueueReadBuffer(queue, streams[0], true, 0, typeSize * numDestItems, + destItems, 0, NULL, NULL); + test_error(error, "Unable to read result value!"); + + error = + clEnqueueReadBuffer(queue, streams[1], true, 0, typeSize * threadSize, + refValues, 0, NULL, NULL); + test_error(error, "Unable to read reference values!"); + + // If we have an expectedFn, then we need to generate a final value to + // compare against. If we don't have one, it's because we're comparing ref + // values only + if (testFns.ExpectedValueIntFn != NULL) { - for( size_t i = 0; i < numDestItems; i++ ) + for (size_t i = 0; i < numDestItems; i++) { - char expected[ 8 ]; + char expected[8]; cl_int intVal; cl_long longVal; - if( typeSize == 4 ) + if (typeSize == 4) { // Int version - intVal = testFns.ExpectedValueIntFn( threadSize, (cl_int *)startRefValues, i ); - memcpy( expected, &intVal, sizeof( intVal ) ); + intVal = testFns.ExpectedValueIntFn( + threadSize, (cl_int *)startRefValues, i); + memcpy(expected, &intVal, sizeof(intVal)); } else { // Long version - longVal = testFns.ExpectedValueLongFn( threadSize, (cl_long *)startRefValues, i ); - memcpy( expected, &longVal, sizeof( longVal ) ); + longVal = testFns.ExpectedValueLongFn( + threadSize, (cl_long *)startRefValues, i); + memcpy(expected, &longVal, sizeof(longVal)); } - if( memcmp( expected, destItems + i * typeSize, typeSize ) != 0 ) + if (memcmp(expected, destItems + i * typeSize, typeSize) != 0) { - if( typeSize == 4 ) + if (typeSize == 4) { - cl_int *outValue = (cl_int *)( destItems + i * typeSize ); - log_error( "ERROR: Result %ld from kernel does not validate! (should be %d, was %d)\n", i, intVal, *outValue ); + cl_int *outValue = (cl_int *)(destItems + i * typeSize); + log_error("ERROR: Result %ld from kernel does not " + "validate! (should be %d, was %d)\n", + i, intVal, *outValue); cl_int *startRefs = (cl_int *)startRefValues; cl_int *refs = (cl_int *)refValues; - for( i = 0; i < threadSize; i++ ) + for (i = 0; i < threadSize; i++) { - if( startRefs != NULL ) - log_info( " --- %ld - %d --- %d\n", i, startRefs[i], refs[i] ); + if (startRefs != NULL) + log_info(" --- %ld - %d --- %d\n", i, startRefs[i], + refs[i]); else - log_info( " --- %ld --- %d\n", i, refs[i] ); + log_info(" --- %ld --- %d\n", i, refs[i]); } } else { - cl_long *outValue = (cl_long *)( destItems + i * typeSize ); - log_error( "ERROR: Result %ld from kernel does not validate! (should be %lld, was %lld)\n", i, longVal, *outValue ); + cl_long *outValue = (cl_long *)(destItems + i * typeSize); + log_error("ERROR: Result %ld from kernel does not " + "validate! (should be %lld, was %lld)\n", + i, longVal, *outValue); cl_long *startRefs = (cl_long *)startRefValues; cl_long *refs = (cl_long *)refValues; - for( i = 0; i < threadSize; i++ ) + for (i = 0; i < threadSize; i++) { - if( startRefs != NULL ) - log_info( " --- %ld - %lld --- %lld\n", i, startRefs[i], refs[i] ); + if (startRefs != NULL) + log_info(" --- %ld - %lld --- %lld\n", i, + startRefs[i], refs[i]); else - log_info( " --- %ld --- %lld\n", i, refs[i] ); + log_info(" --- %ld --- %lld\n", i, refs[i]); } } return -1; @@ -348,104 +395,140 @@ int test_atomic_function(cl_device_id deviceID, cl_context context, cl_command_q } } - if( testFns.VerifyRefsIntFn != NULL ) + if (testFns.VerifyRefsIntFn != NULL) { /* Use the verify function to also check the results */ - if( dataType == kFloat ) + if (dataType == kFloat) { cl_float *outValue = (cl_float *)destItems; - if( !testFns.VerifyRefsFloatFn( threadSize, (cl_float *)refValues, *outValue ) != 0 ) + if (!testFns.VerifyRefsFloatFn(threadSize, (cl_float *)refValues, + *outValue) + != 0) { - log_error( "ERROR: Reference values did not validate!\n" ); + log_error("ERROR: Reference values did not validate!\n"); return -1; } } - else if( typeSize == 4 ) + else if (typeSize == 4) { cl_int *outValue = (cl_int *)destItems; - if( !testFns.VerifyRefsIntFn( threadSize, (cl_int *)refValues, *outValue ) != 0 ) + if (!testFns.VerifyRefsIntFn(threadSize, (cl_int *)refValues, + *outValue) + != 0) { - log_error( "ERROR: Reference values did not validate!\n" ); + log_error("ERROR: Reference values did not validate!\n"); return -1; } } else { cl_long *outValue = (cl_long *)destItems; - if( !testFns.VerifyRefsLongFn( threadSize, (cl_long *)refValues, *outValue ) != 0 ) + if (!testFns.VerifyRefsLongFn(threadSize, (cl_long *)refValues, + *outValue) + != 0) { - log_error( "ERROR: Reference values did not validate!\n" ); + log_error("ERROR: Reference values did not validate!\n"); return -1; } } } - else if( testFns.ExpectedValueIntFn == NULL ) + else if (testFns.ExpectedValueIntFn == NULL) { - log_error( "ERROR: Test doesn't check total or refs; no values are verified!\n" ); + log_error("ERROR: Test doesn't check total or refs; no values are " + "verified!\n"); return -1; } /* Re-write the starting value */ - for( size_t i = 0; i < numDestItems; i++ ) - memcpy( destItems + i * typeSize, startValue, typeSize ); - error = clEnqueueWriteBuffer( queue, streams[0], true, 0, typeSize * numDestItems, destItems, 0, NULL, NULL ); - test_error( error, "Unable to write starting values!" ); - - /* Run the kernel once for a single thread, so we can verify that the returned value is the original one */ + for (size_t i = 0; i < numDestItems; i++) + memcpy(destItems + i * typeSize, startValue, typeSize); + error = + clEnqueueWriteBuffer(queue, streams[0], true, 0, + typeSize * numDestItems, destItems, 0, NULL, NULL); + test_error(error, "Unable to write starting values!"); + + /* Run the kernel once for a single thread, so we can verify that the + * returned value is the original one */ threads[0] = 1; - error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, threads, 0, NULL, NULL ); - test_error( error, "Unable to execute test kernel" ); + error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threads, threads, 0, + NULL, NULL); + test_error(error, "Unable to execute test kernel"); - error = clEnqueueReadBuffer( queue, streams[1], true, 0, typeSize, refValues, 0, NULL, NULL ); - test_error( error, "Unable to read reference values!" ); + error = clEnqueueReadBuffer(queue, streams[1], true, 0, typeSize, refValues, + 0, NULL, NULL); + test_error(error, "Unable to read reference values!"); - if( memcmp( refValues, destItems, typeSize ) != 0 ) + if (memcmp(refValues, destItems, typeSize) != 0) { - if( typeSize == 4 ) + if (typeSize == 4) { cl_int *s = (cl_int *)destItems; cl_int *r = (cl_int *)refValues; - log_error( "ERROR: atomic function operated correctly but did NOT return correct 'old' value " - " (should have been %d, returned %d)!\n", *s, *r ); + log_error("ERROR: atomic function operated correctly but did NOT " + "return correct 'old' value " + " (should have been %d, returned %d)!\n", + *s, *r); } else { cl_long *s = (cl_long *)destItems; cl_long *r = (cl_long *)refValues; - log_error( "ERROR: atomic function operated correctly but did NOT return correct 'old' value " - " (should have been %lld, returned %lld)!\n", *s, *r ); + log_error("ERROR: atomic function operated correctly but did NOT " + "return correct 'old' value " + " (should have been %lld, returned %lld)!\n", + *s, *r); } return -1; } - delete [] destItems; - free( refValues ); - if( startRefValues != NULL ) - free( startRefValues ); + delete[] destItems; + free(refValues); + if (startRefValues != NULL) free(startRefValues); return 0; } -int test_atomic_function_set(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, const char *programCore, - TestFns testFns, - bool extended, bool matchGroupSize, bool usingAtomicPrefix ) +int test_atomic_function_set(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements, + const char *programCore, TestFns testFns, + bool extended, bool matchGroupSize, + bool usingAtomicPrefix) { - log_info(" Testing %s functions...\n", usingAtomicPrefix ? "atomic_" : "atom_"); + log_info(" Testing %s functions...\n", + usingAtomicPrefix ? "atomic_" : "atom_"); int errors = 0; - errors |= test_atomic_function( deviceID, context, queue, num_elements, programCore, testFns, extended, false, kInt, matchGroupSize ); - errors |= test_atomic_function( deviceID, context, queue, num_elements, programCore, testFns, extended, false, kUInt, matchGroupSize ); - errors |= test_atomic_function( deviceID, context, queue, num_elements, programCore, testFns, extended, true, kInt, matchGroupSize ); - errors |= test_atomic_function( deviceID, context, queue, num_elements, programCore, testFns, extended, true, kUInt, matchGroupSize ); - - // Only the 32 bit atomic functions use the "atomic" prefix in 1.1, the 64 bit functions still use the "atom" prefix. - // The argument usingAtomicPrefix is set to true if programCore was generated with the "atomic" prefix. - if (!usingAtomicPrefix) { - errors |= test_atomic_function( deviceID, context, queue, num_elements, programCore, testFns, extended, false, kLong, matchGroupSize ); - errors |= test_atomic_function( deviceID, context, queue, num_elements, programCore, testFns, extended, false, kULong, matchGroupSize ); - errors |= test_atomic_function( deviceID, context, queue, num_elements, programCore, testFns, extended, true, kLong, matchGroupSize ); - errors |= test_atomic_function( deviceID, context, queue, num_elements, programCore, testFns, extended, true, kULong, matchGroupSize ); + errors |= test_atomic_function(deviceID, context, queue, num_elements, + programCore, testFns, extended, false, kInt, + matchGroupSize); + errors |= test_atomic_function(deviceID, context, queue, num_elements, + programCore, testFns, extended, false, kUInt, + matchGroupSize); + errors |= test_atomic_function(deviceID, context, queue, num_elements, + programCore, testFns, extended, true, kInt, + matchGroupSize); + errors |= test_atomic_function(deviceID, context, queue, num_elements, + programCore, testFns, extended, true, kUInt, + matchGroupSize); + + // Only the 32 bit atomic functions use the "atomic" prefix in 1.1, the 64 + // bit functions still use the "atom" prefix. The argument usingAtomicPrefix + // is set to true if programCore was generated with the "atomic" prefix. + if (!usingAtomicPrefix) + { + errors |= test_atomic_function(deviceID, context, queue, num_elements, + programCore, testFns, extended, false, + kLong, matchGroupSize); + errors |= test_atomic_function(deviceID, context, queue, num_elements, + programCore, testFns, extended, false, + kULong, matchGroupSize); + errors |= test_atomic_function(deviceID, context, queue, num_elements, + programCore, testFns, extended, true, + kLong, matchGroupSize); + errors |= test_atomic_function(deviceID, context, queue, num_elements, + programCore, testFns, extended, true, + kULong, matchGroupSize); } return errors; @@ -454,265 +537,345 @@ int test_atomic_function_set(cl_device_id deviceID, cl_context context, cl_comma #pragma mark ---- add const char atom_add_core[] = -" oldValues[tid] = atom_add( &destMemory[0], tid + 3 );\n" -" atom_add( &destMemory[0], tid + 3 );\n" -" atom_add( &destMemory[0], tid + 3 );\n" -" atom_add( &destMemory[0], tid + 3 );\n"; + " oldValues[tid] = atom_add( &destMemory[0], tid + 3 );\n" + " atom_add( &destMemory[0], tid + 3 );\n" + " atom_add( &destMemory[0], tid + 3 );\n" + " atom_add( &destMemory[0], tid + 3 );\n"; const char atomic_add_core[] = -" oldValues[tid] = atomic_add( &destMemory[0], tid + 3 );\n" -" atomic_add( &destMemory[0], tid + 3 );\n" -" atomic_add( &destMemory[0], tid + 3 );\n" -" atomic_add( &destMemory[0], tid + 3 );\n"; + " oldValues[tid] = atomic_add( &destMemory[0], tid + 3 );\n" + " atomic_add( &destMemory[0], tid + 3 );\n" + " atomic_add( &destMemory[0], tid + 3 );\n" + " atomic_add( &destMemory[0], tid + 3 );\n"; -cl_int test_atomic_add_result_int( size_t size, cl_int *startRefValues, size_t whichDestValue ) +cl_int test_atomic_add_result_int(size_t size, cl_int *startRefValues, + size_t whichDestValue) { cl_int total = 0; - for( size_t i = 0; i < size; i++ ) - total += ( (cl_int)i + 3 ) * 4; + for (size_t i = 0; i < size; i++) total += ((cl_int)i + 3) * 4; return total; } -cl_long test_atomic_add_result_long( size_t size, cl_long *startRefValues, size_t whichDestValue ) +cl_long test_atomic_add_result_long(size_t size, cl_long *startRefValues, + size_t whichDestValue) { cl_long total = 0; - for( size_t i = 0; i < size; i++ ) - total += ( ( i + 3 ) * 4 ); + for (size_t i = 0; i < size; i++) total += ((i + 3) * 4); return total; } -int test_atomic_add(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_atomic_add(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { - TestFns set = { 0, 0LL, NULL, test_atomic_add_result_int, NULL, NULL, test_atomic_add_result_long, NULL, NULL }; - - if( test_atomic_function_set( deviceID, context, queue, num_elements, atom_add_core, set, false, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false ) != 0 ) + TestFns set = { 0, + 0LL, + NULL, + test_atomic_add_result_int, + NULL, + NULL, + test_atomic_add_result_long, + NULL, + NULL }; + + if (test_atomic_function_set( + deviceID, context, queue, num_elements, atom_add_core, set, false, + /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false) + != 0) + return -1; + if (test_atomic_function_set( + deviceID, context, queue, num_elements, atomic_add_core, set, false, + /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true) + != 0) return -1; - if( test_atomic_function_set( deviceID, context, queue, num_elements, atomic_add_core, set, false, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true ) != 0 ) - return -1; return 0; } #pragma mark ---- sub -const char atom_sub_core[] = " oldValues[tid] = atom_sub( &destMemory[0], tid + 3 );\n"; +const char atom_sub_core[] = + " oldValues[tid] = atom_sub( &destMemory[0], tid + 3 );\n"; -const char atomic_sub_core[] = " oldValues[tid] = atomic_sub( &destMemory[0], tid + 3 );\n"; +const char atomic_sub_core[] = + " oldValues[tid] = atomic_sub( &destMemory[0], tid + 3 );\n"; -cl_int test_atomic_sub_result_int( size_t size, cl_int *startRefValues, size_t whichDestValue ) +cl_int test_atomic_sub_result_int(size_t size, cl_int *startRefValues, + size_t whichDestValue) { cl_int total = INT_TEST_VALUE; - for( size_t i = 0; i < size; i++ ) - total -= (cl_int)i + 3; + for (size_t i = 0; i < size; i++) total -= (cl_int)i + 3; return total; } -cl_long test_atomic_sub_result_long( size_t size, cl_long *startRefValues, size_t whichDestValue ) +cl_long test_atomic_sub_result_long(size_t size, cl_long *startRefValues, + size_t whichDestValue) { cl_long total = LONG_TEST_VALUE; - for( size_t i = 0; i < size; i++ ) - total -= i + 3; + for (size_t i = 0; i < size; i++) total -= i + 3; return total; } -int test_atomic_sub(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_atomic_sub(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { - TestFns set = { INT_TEST_VALUE, LONG_TEST_VALUE, NULL, test_atomic_sub_result_int, NULL, NULL, test_atomic_sub_result_long, NULL, NULL }; - - if( test_atomic_function_set( deviceID, context, queue, num_elements, atom_sub_core, set, false, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false ) != 0 ) + TestFns set = { INT_TEST_VALUE, + LONG_TEST_VALUE, + NULL, + test_atomic_sub_result_int, + NULL, + NULL, + test_atomic_sub_result_long, + NULL, + NULL }; + + if (test_atomic_function_set( + deviceID, context, queue, num_elements, atom_sub_core, set, false, + /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false) + != 0) return -1; - if( test_atomic_function_set( deviceID, context, queue, num_elements, atomic_sub_core, set, false, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true ) != 0 ) + if (test_atomic_function_set( + deviceID, context, queue, num_elements, atomic_sub_core, set, false, + /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true) + != 0) return -1; return 0; } #pragma mark ---- xchg -const char atom_xchg_core[] = " oldValues[tid] = atom_xchg( &destMemory[0], tid );\n"; +const char atom_xchg_core[] = + " oldValues[tid] = atom_xchg( &destMemory[0], tid );\n"; -const char atomic_xchg_core[] = " oldValues[tid] = atomic_xchg( &destMemory[0], tid );\n"; -const char atomic_xchg_float_core[] = " oldValues[tid] = atomic_xchg( &destMemory[0], tid );\n"; +const char atomic_xchg_core[] = + " oldValues[tid] = atomic_xchg( &destMemory[0], tid );\n"; +const char atomic_xchg_float_core[] = + " oldValues[tid] = atomic_xchg( &destMemory[0], tid );\n"; -bool test_atomic_xchg_verify_int( size_t size, cl_int *refValues, cl_int finalValue ) +bool test_atomic_xchg_verify_int(size_t size, cl_int *refValues, + cl_int finalValue) { - /* For xchg, each value from 0 to size - 1 should have an entry in the ref array, and ONLY one entry */ + /* For xchg, each value from 0 to size - 1 should have an entry in the ref + * array, and ONLY one entry */ char *valids; size_t i; char originalValidCount = 0; - valids = (char *)malloc( sizeof( char ) * size ); - memset( valids, 0, sizeof( char ) * size ); + valids = (char *)malloc(sizeof(char) * size); + memset(valids, 0, sizeof(char) * size); - for( i = 0; i < size; i++ ) + for (i = 0; i < size; i++) { - if( refValues[ i ] == INT_TEST_VALUE ) + if (refValues[i] == INT_TEST_VALUE) { // Special initial value originalValidCount++; continue; } - if( refValues[ i ] < 0 || (size_t)refValues[ i ] >= size ) + if (refValues[i] < 0 || (size_t)refValues[i] >= size) { - log_error( "ERROR: Reference value %ld outside of valid range! (%d)\n", i, refValues[ i ] ); + log_error( + "ERROR: Reference value %ld outside of valid range! (%d)\n", i, + refValues[i]); return false; } - valids[ refValues[ i ] ] ++; + valids[refValues[i]]++; } - /* Note: ONE entry will have zero count. It'll be the last one that executed, because that value should be - the final value outputted */ - if( valids[ finalValue ] > 0 ) + /* Note: ONE entry will have zero count. It'll be the last one that + executed, because that value should be the final value outputted */ + if (valids[finalValue] > 0) { - log_error( "ERROR: Final value %d was also in ref list!\n", finalValue ); + log_error("ERROR: Final value %d was also in ref list!\n", finalValue); return false; } else - valids[ finalValue ] = 1; // So the following loop will be okay + valids[finalValue] = 1; // So the following loop will be okay /* Now check that every entry has one and only one count */ - if( originalValidCount != 1 ) + if (originalValidCount != 1) { - log_error( "ERROR: Starting reference value %d did not occur once-and-only-once (occurred %d)\n", 65191, originalValidCount ); + log_error("ERROR: Starting reference value %d did not occur " + "once-and-only-once (occurred %d)\n", + 65191, originalValidCount); return false; } - for( i = 0; i < size; i++ ) + for (i = 0; i < size; i++) { - if( valids[ i ] != 1 ) + if (valids[i] != 1) { - log_error( "ERROR: Reference value %ld did not occur once-and-only-once (occurred %d)\n", i, valids[ i ] ); - for( size_t j = 0; j < size; j++ ) - log_info( "%d: %d\n", (int)j, (int)valids[ j ] ); + log_error("ERROR: Reference value %ld did not occur " + "once-and-only-once (occurred %d)\n", + i, valids[i]); + for (size_t j = 0; j < size; j++) + log_info("%d: %d\n", (int)j, (int)valids[j]); return false; } } - free( valids ); + free(valids); return true; } -bool test_atomic_xchg_verify_long( size_t size, cl_long *refValues, cl_long finalValue ) +bool test_atomic_xchg_verify_long(size_t size, cl_long *refValues, + cl_long finalValue) { - /* For xchg, each value from 0 to size - 1 should have an entry in the ref array, and ONLY one entry */ + /* For xchg, each value from 0 to size - 1 should have an entry in the ref + * array, and ONLY one entry */ char *valids; size_t i; char originalValidCount = 0; - valids = (char *)malloc( sizeof( char ) * size ); - memset( valids, 0, sizeof( char ) * size ); + valids = (char *)malloc(sizeof(char) * size); + memset(valids, 0, sizeof(char) * size); - for( i = 0; i < size; i++ ) + for (i = 0; i < size; i++) { - if( refValues[ i ] == LONG_TEST_VALUE ) + if (refValues[i] == LONG_TEST_VALUE) { // Special initial value originalValidCount++; continue; } - if( refValues[ i ] < 0 || (size_t)refValues[ i ] >= size ) + if (refValues[i] < 0 || (size_t)refValues[i] >= size) { - log_error( "ERROR: Reference value %ld outside of valid range! (%lld)\n", i, refValues[ i ] ); + log_error( + "ERROR: Reference value %ld outside of valid range! (%lld)\n", + i, refValues[i]); return false; } - valids[ refValues[ i ] ] ++; + valids[refValues[i]]++; } - /* Note: ONE entry will have zero count. It'll be the last one that executed, because that value should be - the final value outputted */ - if( valids[ finalValue ] > 0 ) + /* Note: ONE entry will have zero count. It'll be the last one that + executed, because that value should be the final value outputted */ + if (valids[finalValue] > 0) { - log_error( "ERROR: Final value %lld was also in ref list!\n", finalValue ); + log_error("ERROR: Final value %lld was also in ref list!\n", + finalValue); return false; } else - valids[ finalValue ] = 1; // So the following loop will be okay + valids[finalValue] = 1; // So the following loop will be okay /* Now check that every entry has one and only one count */ - if( originalValidCount != 1 ) + if (originalValidCount != 1) { - log_error( "ERROR: Starting reference value %d did not occur once-and-only-once (occurred %d)\n", 65191, originalValidCount ); + log_error("ERROR: Starting reference value %d did not occur " + "once-and-only-once (occurred %d)\n", + 65191, originalValidCount); return false; } - for( i = 0; i < size; i++ ) + for (i = 0; i < size; i++) { - if( valids[ i ] != 1 ) + if (valids[i] != 1) { - log_error( "ERROR: Reference value %ld did not occur once-and-only-once (occurred %d)\n", i, valids[ i ] ); - for( size_t j = 0; j < size; j++ ) - log_info( "%d: %d\n", (int)j, (int)valids[ j ] ); + log_error("ERROR: Reference value %ld did not occur " + "once-and-only-once (occurred %d)\n", + i, valids[i]); + for (size_t j = 0; j < size; j++) + log_info("%d: %d\n", (int)j, (int)valids[j]); return false; } } - free( valids ); + free(valids); return true; } -bool test_atomic_xchg_verify_float( size_t size, cl_float *refValues, cl_float finalValue ) +bool test_atomic_xchg_verify_float(size_t size, cl_float *refValues, + cl_float finalValue) { - /* For xchg, each value from 0 to size - 1 should have an entry in the ref array, and ONLY one entry */ + /* For xchg, each value from 0 to size - 1 should have an entry in the ref + * array, and ONLY one entry */ char *valids; size_t i; char originalValidCount = 0; - valids = (char *)malloc( sizeof( char ) * size ); - memset( valids, 0, sizeof( char ) * size ); + valids = (char *)malloc(sizeof(char) * size); + memset(valids, 0, sizeof(char) * size); - for( i = 0; i < size; i++ ) + for (i = 0; i < size; i++) { - cl_int *intRefValue = (cl_int *)( &refValues[ i ] ); - if( *intRefValue == INT_TEST_VALUE ) + cl_int *intRefValue = (cl_int *)(&refValues[i]); + if (*intRefValue == INT_TEST_VALUE) { // Special initial value originalValidCount++; continue; } - if( refValues[ i ] < 0 || (size_t)refValues[ i ] >= size ) + if (refValues[i] < 0 || (size_t)refValues[i] >= size) { - log_error( "ERROR: Reference value %ld outside of valid range! (%a)\n", i, refValues[ i ] ); + log_error( + "ERROR: Reference value %ld outside of valid range! (%a)\n", i, + refValues[i]); return false; } - valids[ (int)refValues[ i ] ] ++; + valids[(int)refValues[i]]++; } - /* Note: ONE entry will have zero count. It'll be the last one that executed, because that value should be - the final value outputted */ - if( valids[ (int)finalValue ] > 0 ) + /* Note: ONE entry will have zero count. It'll be the last one that + executed, because that value should be the final value outputted */ + if (valids[(int)finalValue] > 0) { - log_error( "ERROR: Final value %a was also in ref list!\n", finalValue ); + log_error("ERROR: Final value %a was also in ref list!\n", finalValue); return false; } else - valids[ (int)finalValue ] = 1; // So the following loop will be okay + valids[(int)finalValue] = 1; // So the following loop will be okay /* Now check that every entry has one and only one count */ - if( originalValidCount != 1 ) + if (originalValidCount != 1) { - log_error( "ERROR: Starting reference value %d did not occur once-and-only-once (occurred %d)\n", 65191, originalValidCount ); + log_error("ERROR: Starting reference value %d did not occur " + "once-and-only-once (occurred %d)\n", + 65191, originalValidCount); return false; } - for( i = 0; i < size; i++ ) + for (i = 0; i < size; i++) { - if( valids[ i ] != 1 ) + if (valids[i] != 1) { - log_error( "ERROR: Reference value %ld did not occur once-and-only-once (occurred %d)\n", i, valids[ i ] ); - for( size_t j = 0; j < size; j++ ) - log_info( "%d: %d\n", (int)j, (int)valids[ j ] ); + log_error("ERROR: Reference value %ld did not occur " + "once-and-only-once (occurred %d)\n", + i, valids[i]); + for (size_t j = 0; j < size; j++) + log_info("%d: %d\n", (int)j, (int)valids[j]); return false; } } - free( valids ); + free(valids); return true; } -int test_atomic_xchg(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_atomic_xchg(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { - TestFns set = { INT_TEST_VALUE, LONG_TEST_VALUE, NULL, NULL, NULL, test_atomic_xchg_verify_int, NULL, NULL, test_atomic_xchg_verify_long, NULL, NULL, test_atomic_xchg_verify_float }; - - int errors = test_atomic_function_set( deviceID, context, queue, num_elements, atom_xchg_core, set, false, true, /*usingAtomicPrefix*/ false ); - errors |= test_atomic_function_set( deviceID, context, queue, num_elements, atomic_xchg_core, set, false, true, /*usingAtomicPrefix*/ true ); - - errors |= test_atomic_function( deviceID, context, queue, num_elements, atomic_xchg_float_core, set, false, false, kFloat, true ); - errors |= test_atomic_function( deviceID, context, queue, num_elements, atomic_xchg_float_core, set, false, true, kFloat, true ); + TestFns set = { INT_TEST_VALUE, + LONG_TEST_VALUE, + NULL, + NULL, + NULL, + test_atomic_xchg_verify_int, + NULL, + NULL, + test_atomic_xchg_verify_long, + NULL, + NULL, + test_atomic_xchg_verify_float }; + + int errors = test_atomic_function_set( + deviceID, context, queue, num_elements, atom_xchg_core, set, false, + true, /*usingAtomicPrefix*/ false); + errors |= test_atomic_function_set(deviceID, context, queue, num_elements, + atomic_xchg_core, set, false, true, + /*usingAtomicPrefix*/ true); + + errors |= test_atomic_function(deviceID, context, queue, num_elements, + atomic_xchg_float_core, set, false, false, + kFloat, true); + errors |= test_atomic_function(deviceID, context, queue, num_elements, + atomic_xchg_float_core, set, false, true, + kFloat, true); return errors; } @@ -720,51 +883,71 @@ int test_atomic_xchg(cl_device_id deviceID, cl_context context, cl_command_queue #pragma mark ---- min -const char atom_min_core[] = " oldValues[tid] = atom_min( &destMemory[0], oldValues[tid] );\n"; +const char atom_min_core[] = + " oldValues[tid] = atom_min( &destMemory[0], oldValues[tid] );\n"; -const char atomic_min_core[] = " oldValues[tid] = atomic_min( &destMemory[0], oldValues[tid] );\n"; +const char atomic_min_core[] = + " oldValues[tid] = atomic_min( &destMemory[0], oldValues[tid] );\n"; -cl_int test_atomic_min_result_int( size_t size, cl_int *startRefValues, size_t whichDestValue ) +cl_int test_atomic_min_result_int(size_t size, cl_int *startRefValues, + size_t whichDestValue) { cl_int total = 0x7fffffffL; - for( size_t i = 0; i < size; i++ ) + for (size_t i = 0; i < size; i++) { - if( startRefValues[ i ] < total ) - total = startRefValues[ i ]; + if (startRefValues[i] < total) total = startRefValues[i]; } return total; } -void test_atomic_min_gen_int( size_t size, cl_int *startRefValues, MTdata d ) +void test_atomic_min_gen_int(size_t size, cl_int *startRefValues, MTdata d) { - for( size_t i = 0; i < size; i++ ) - startRefValues[i] = (cl_int)( genrand_int32(d) % 0x3fffffff ) + 0x3fffffff; + for (size_t i = 0; i < size; i++) + startRefValues[i] = + (cl_int)(genrand_int32(d) % 0x3fffffff) + 0x3fffffff; } -cl_long test_atomic_min_result_long( size_t size, cl_long *startRefValues, size_t whichDestValue ) +cl_long test_atomic_min_result_long(size_t size, cl_long *startRefValues, + size_t whichDestValue) { cl_long total = 0x7fffffffffffffffLL; - for( size_t i = 0; i < size; i++ ) + for (size_t i = 0; i < size; i++) { - if( startRefValues[ i ] < total ) - total = startRefValues[ i ]; + if (startRefValues[i] < total) total = startRefValues[i]; } return total; } -void test_atomic_min_gen_long( size_t size, cl_long *startRefValues, MTdata d ) +void test_atomic_min_gen_long(size_t size, cl_long *startRefValues, MTdata d) { - for( size_t i = 0; i < size; i++ ) - startRefValues[i] = (cl_long)( genrand_int32(d) | ( ( (cl_long)genrand_int32(d) & 0x7fffffffL ) << 16 ) ); + for (size_t i = 0; i < size; i++) + startRefValues[i] = + (cl_long)(genrand_int32(d) + | (((cl_long)genrand_int32(d) & 0x7fffffffL) << 16)); } -int test_atomic_min(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_atomic_min(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { - TestFns set = { 0x7fffffffL, 0x7fffffffffffffffLL, NULL, test_atomic_min_result_int, test_atomic_min_gen_int, NULL, test_atomic_min_result_long, test_atomic_min_gen_long, NULL }; - - if( test_atomic_function_set( deviceID, context, queue, num_elements, atom_min_core, set, true, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false ) != 0 ) + TestFns set = { 0x7fffffffL, + 0x7fffffffffffffffLL, + NULL, + test_atomic_min_result_int, + test_atomic_min_gen_int, + NULL, + test_atomic_min_result_long, + test_atomic_min_gen_long, + NULL }; + + if (test_atomic_function_set( + deviceID, context, queue, num_elements, atom_min_core, set, true, + /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false) + != 0) return -1; - if( test_atomic_function_set( deviceID, context, queue, num_elements, atomic_min_core, set, true, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true ) != 0 ) + if (test_atomic_function_set( + deviceID, context, queue, num_elements, atomic_min_core, set, true, + /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true) + != 0) return -1; return 0; } @@ -772,79 +955,118 @@ int test_atomic_min(cl_device_id deviceID, cl_context context, cl_command_queue #pragma mark ---- max -const char atom_max_core[] = " oldValues[tid] = atom_max( &destMemory[0], oldValues[tid] );\n"; +const char atom_max_core[] = + " oldValues[tid] = atom_max( &destMemory[0], oldValues[tid] );\n"; -const char atomic_max_core[] = " oldValues[tid] = atomic_max( &destMemory[0], oldValues[tid] );\n"; +const char atomic_max_core[] = + " oldValues[tid] = atomic_max( &destMemory[0], oldValues[tid] );\n"; -cl_int test_atomic_max_result_int( size_t size, cl_int *startRefValues, size_t whichDestValue ) +cl_int test_atomic_max_result_int(size_t size, cl_int *startRefValues, + size_t whichDestValue) { cl_int total = 0; - for( size_t i = 0; i < size; i++ ) + for (size_t i = 0; i < size; i++) { - if( startRefValues[ i ] > total ) - total = startRefValues[ i ]; + if (startRefValues[i] > total) total = startRefValues[i]; } return total; } -void test_atomic_max_gen_int( size_t size, cl_int *startRefValues, MTdata d ) +void test_atomic_max_gen_int(size_t size, cl_int *startRefValues, MTdata d) { - for( size_t i = 0; i < size; i++ ) - startRefValues[i] = (cl_int)( genrand_int32(d) % 0x3fffffff ) + 0x3fffffff; + for (size_t i = 0; i < size; i++) + startRefValues[i] = + (cl_int)(genrand_int32(d) % 0x3fffffff) + 0x3fffffff; } -cl_long test_atomic_max_result_long( size_t size, cl_long *startRefValues, size_t whichDestValue ) +cl_long test_atomic_max_result_long(size_t size, cl_long *startRefValues, + size_t whichDestValue) { cl_long total = 0; - for( size_t i = 0; i < size; i++ ) + for (size_t i = 0; i < size; i++) { - if( startRefValues[ i ] > total ) - total = startRefValues[ i ]; + if (startRefValues[i] > total) total = startRefValues[i]; } return total; } -void test_atomic_max_gen_long( size_t size, cl_long *startRefValues, MTdata d ) +void test_atomic_max_gen_long(size_t size, cl_long *startRefValues, MTdata d) { - for( size_t i = 0; i < size; i++ ) - startRefValues[i] = (cl_long)( genrand_int32(d) | ( ( (cl_long)genrand_int32(d) & 0x7fffffffL ) << 16 ) ); + for (size_t i = 0; i < size; i++) + startRefValues[i] = + (cl_long)(genrand_int32(d) + | (((cl_long)genrand_int32(d) & 0x7fffffffL) << 16)); } -int test_atomic_max(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_atomic_max(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { - TestFns set = { 0, 0, NULL, test_atomic_max_result_int, test_atomic_max_gen_int, NULL, test_atomic_max_result_long, test_atomic_max_gen_long, NULL }; - - if( test_atomic_function_set( deviceID, context, queue, num_elements, atom_max_core, set, true, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false ) != 0 ) + TestFns set = { 0, + 0, + NULL, + test_atomic_max_result_int, + test_atomic_max_gen_int, + NULL, + test_atomic_max_result_long, + test_atomic_max_gen_long, + NULL }; + + if (test_atomic_function_set( + deviceID, context, queue, num_elements, atom_max_core, set, true, + /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false) + != 0) + return -1; + if (test_atomic_function_set( + deviceID, context, queue, num_elements, atomic_max_core, set, true, + /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true) + != 0) return -1; - if( test_atomic_function_set( deviceID, context, queue, num_elements, atomic_max_core, set, true, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true ) != 0 ) - return -1; return 0; } #pragma mark ---- inc -const char atom_inc_core[] = " oldValues[tid] = atom_inc( &destMemory[0] );\n"; +const char atom_inc_core[] = + " oldValues[tid] = atom_inc( &destMemory[0] );\n"; -const char atomic_inc_core[] = " oldValues[tid] = atomic_inc( &destMemory[0] );\n"; +const char atomic_inc_core[] = + " oldValues[tid] = atomic_inc( &destMemory[0] );\n"; -cl_int test_atomic_inc_result_int( size_t size, cl_int *startRefValues, size_t whichDestValue ) +cl_int test_atomic_inc_result_int(size_t size, cl_int *startRefValues, + size_t whichDestValue) { return INT_TEST_VALUE + (cl_int)size; } -cl_long test_atomic_inc_result_long( size_t size, cl_long *startRefValues, size_t whichDestValue ) +cl_long test_atomic_inc_result_long(size_t size, cl_long *startRefValues, + size_t whichDestValue) { return LONG_TEST_VALUE + size; } -int test_atomic_inc(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_atomic_inc(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { - TestFns set = { INT_TEST_VALUE, LONG_TEST_VALUE, NULL, test_atomic_inc_result_int, NULL, NULL, test_atomic_inc_result_long, NULL, NULL }; - - if( test_atomic_function_set( deviceID, context, queue, num_elements, atom_inc_core, set, false, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false ) != 0 ) + TestFns set = { INT_TEST_VALUE, + LONG_TEST_VALUE, + NULL, + test_atomic_inc_result_int, + NULL, + NULL, + test_atomic_inc_result_long, + NULL, + NULL }; + + if (test_atomic_function_set( + deviceID, context, queue, num_elements, atom_inc_core, set, false, + /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false) + != 0) return -1; - if( test_atomic_function_set( deviceID, context, queue, num_elements, atomic_inc_core, set, false, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true ) != 0 ) + if (test_atomic_function_set( + deviceID, context, queue, num_elements, atomic_inc_core, set, false, + /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true) + != 0) return -1; return 0; } @@ -852,27 +1074,46 @@ int test_atomic_inc(cl_device_id deviceID, cl_context context, cl_command_queue #pragma mark ---- dec -const char atom_dec_core[] = " oldValues[tid] = atom_dec( &destMemory[0] );\n"; +const char atom_dec_core[] = + " oldValues[tid] = atom_dec( &destMemory[0] );\n"; -const char atomic_dec_core[] = " oldValues[tid] = atomic_dec( &destMemory[0] );\n"; +const char atomic_dec_core[] = + " oldValues[tid] = atomic_dec( &destMemory[0] );\n"; -cl_int test_atomic_dec_result_int( size_t size, cl_int *startRefValues, size_t whichDestValue ) +cl_int test_atomic_dec_result_int(size_t size, cl_int *startRefValues, + size_t whichDestValue) { return INT_TEST_VALUE - (cl_int)size; } -cl_long test_atomic_dec_result_long( size_t size, cl_long *startRefValues, size_t whichDestValue ) +cl_long test_atomic_dec_result_long(size_t size, cl_long *startRefValues, + size_t whichDestValue) { return LONG_TEST_VALUE - size; } -int test_atomic_dec(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_atomic_dec(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { - TestFns set = { INT_TEST_VALUE, LONG_TEST_VALUE, NULL, test_atomic_dec_result_int, NULL, NULL, test_atomic_dec_result_long, NULL, NULL }; - - if( test_atomic_function_set( deviceID, context, queue, num_elements, atom_dec_core, set, false, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false ) != 0 ) + TestFns set = { INT_TEST_VALUE, + LONG_TEST_VALUE, + NULL, + test_atomic_dec_result_int, + NULL, + NULL, + test_atomic_dec_result_long, + NULL, + NULL }; + + if (test_atomic_function_set( + deviceID, context, queue, num_elements, atom_dec_core, set, false, + /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false) + != 0) return -1; - if( test_atomic_function_set( deviceID, context, queue, num_elements, atomic_dec_core, set, false, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true ) != 0 ) + if (test_atomic_function_set( + deviceID, context, queue, num_elements, atomic_dec_core, set, false, + /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true) + != 0) return -1; return 0; } @@ -881,129 +1122,159 @@ int test_atomic_dec(cl_device_id deviceID, cl_context context, cl_command_queue #pragma mark ---- cmpxchg /* We test cmpxchg by implementing (the long way) atom_add */ +// clang-format off const char atom_cmpxchg_core[] = -" int oldValue, origValue, newValue;\n" -" do { \n" -" origValue = destMemory[0];\n" -" newValue = origValue + tid + 2;\n" -" oldValue = atom_cmpxchg( &destMemory[0], origValue, newValue );\n" -" } while( oldValue != origValue );\n" -" oldValues[tid] = oldValue;\n" -; + " int oldValue, origValue, newValue;\n" + " do { \n" + " origValue = destMemory[0];\n" + " newValue = origValue + tid + 2;\n" + " oldValue = atom_cmpxchg( &destMemory[0], origValue, newValue );\n" + " } while( oldValue != origValue );\n" + " oldValues[tid] = oldValue;\n"; const char atom_cmpxchg64_core[] = -" long oldValue, origValue, newValue;\n" -" do { \n" -" origValue = destMemory[0];\n" -" newValue = origValue + tid + 2;\n" -" oldValue = atom_cmpxchg( &destMemory[0], origValue, newValue );\n" -" } while( oldValue != origValue );\n" -" oldValues[tid] = oldValue;\n" -; + " long oldValue, origValue, newValue;\n" + " do { \n" + " origValue = destMemory[0];\n" + " newValue = origValue + tid + 2;\n" + " oldValue = atom_cmpxchg( &destMemory[0], origValue, newValue );\n" + " } while( oldValue != origValue );\n" + " oldValues[tid] = oldValue;\n"; const char atomic_cmpxchg_core[] = -" int oldValue, origValue, newValue;\n" -" do { \n" -" origValue = destMemory[0];\n" -" newValue = origValue + tid + 2;\n" -" oldValue = atomic_cmpxchg( &destMemory[0], origValue, newValue );\n" -" } while( oldValue != origValue );\n" -" oldValues[tid] = oldValue;\n" -; - -cl_int test_atomic_cmpxchg_result_int( size_t size, cl_int *startRefValues, size_t whichDestValue ) + " int oldValue, origValue, newValue;\n" + " do { \n" + " origValue = destMemory[0];\n" + " newValue = origValue + tid + 2;\n" + " oldValue = atomic_cmpxchg( &destMemory[0], origValue, newValue );\n" + " } while( oldValue != origValue );\n" + " oldValues[tid] = oldValue;\n"; +// clang-format on + +cl_int test_atomic_cmpxchg_result_int(size_t size, cl_int *startRefValues, + size_t whichDestValue) { cl_int total = INT_TEST_VALUE; - for( size_t i = 0; i < size; i++ ) - total += (cl_int)i + 2; + for (size_t i = 0; i < size; i++) total += (cl_int)i + 2; return total; } -cl_long test_atomic_cmpxchg_result_long( size_t size, cl_long *startRefValues, size_t whichDestValue ) +cl_long test_atomic_cmpxchg_result_long(size_t size, cl_long *startRefValues, + size_t whichDestValue) { cl_long total = LONG_TEST_VALUE; - for( size_t i = 0; i < size; i++ ) - total += i + 2; + for (size_t i = 0; i < size; i++) total += i + 2; return total; } -int test_atomic_cmpxchg(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_atomic_cmpxchg(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { - TestFns set = { INT_TEST_VALUE, LONG_TEST_VALUE, NULL, test_atomic_cmpxchg_result_int, NULL, NULL, test_atomic_cmpxchg_result_long, NULL, NULL }; + TestFns set = { INT_TEST_VALUE, + LONG_TEST_VALUE, + NULL, + test_atomic_cmpxchg_result_int, + NULL, + NULL, + test_atomic_cmpxchg_result_long, + NULL, + NULL }; int errors = 0; log_info(" Testing atom_ functions...\n"); - errors |= test_atomic_function( deviceID, context, queue, num_elements, atom_cmpxchg_core, set, false, false, kInt, true ); - errors |= test_atomic_function( deviceID, context, queue, num_elements, atom_cmpxchg_core, set, false, false, kUInt, true ); - errors |= test_atomic_function( deviceID, context, queue, num_elements, atom_cmpxchg_core, set, false, true, kInt, true ); - errors |= test_atomic_function( deviceID, context, queue, num_elements, atom_cmpxchg_core, set, false, true, kUInt, true ); - - errors |= test_atomic_function( deviceID, context, queue, num_elements, atom_cmpxchg64_core, set, false, false, kLong, true ); - errors |= test_atomic_function( deviceID, context, queue, num_elements, atom_cmpxchg64_core, set, false, false, kULong, true ); - errors |= test_atomic_function( deviceID, context, queue, num_elements, atom_cmpxchg64_core, set, false, true, kLong, true ); - errors |= test_atomic_function( deviceID, context, queue, num_elements, atom_cmpxchg64_core, set, false, true, kULong, true ); + errors |= + test_atomic_function(deviceID, context, queue, num_elements, + atom_cmpxchg_core, set, false, false, kInt, true); + errors |= + test_atomic_function(deviceID, context, queue, num_elements, + atom_cmpxchg_core, set, false, false, kUInt, true); + errors |= + test_atomic_function(deviceID, context, queue, num_elements, + atom_cmpxchg_core, set, false, true, kInt, true); + errors |= + test_atomic_function(deviceID, context, queue, num_elements, + atom_cmpxchg_core, set, false, true, kUInt, true); + + errors |= test_atomic_function(deviceID, context, queue, num_elements, + atom_cmpxchg64_core, set, false, false, + kLong, true); + errors |= test_atomic_function(deviceID, context, queue, num_elements, + atom_cmpxchg64_core, set, false, false, + kULong, true); + errors |= test_atomic_function(deviceID, context, queue, num_elements, + atom_cmpxchg64_core, set, false, true, kLong, + true); + errors |= test_atomic_function(deviceID, context, queue, num_elements, + atom_cmpxchg64_core, set, false, true, + kULong, true); log_info(" Testing atomic_ functions...\n"); - errors |= test_atomic_function( deviceID, context, queue, num_elements, atomic_cmpxchg_core, set, false, false, kInt, true ); - errors |= test_atomic_function( deviceID, context, queue, num_elements, atomic_cmpxchg_core, set, false, false, kUInt, true ); - errors |= test_atomic_function( deviceID, context, queue, num_elements, atomic_cmpxchg_core, set, false, true, kInt, true ); - errors |= test_atomic_function( deviceID, context, queue, num_elements, atomic_cmpxchg_core, set, false, true, kUInt, true ); - - if( errors ) - return -1; + errors |= test_atomic_function(deviceID, context, queue, num_elements, + atomic_cmpxchg_core, set, false, false, kInt, + true); + errors |= test_atomic_function(deviceID, context, queue, num_elements, + atomic_cmpxchg_core, set, false, false, + kUInt, true); + errors |= + test_atomic_function(deviceID, context, queue, num_elements, + atomic_cmpxchg_core, set, false, true, kInt, true); + errors |= test_atomic_function(deviceID, context, queue, num_elements, + atomic_cmpxchg_core, set, false, true, kUInt, + true); + + if (errors) return -1; return 0; } #pragma mark -------- Bitwise functions -size_t test_bitwise_num_results( size_t threadCount, ExplicitType dataType ) +size_t test_bitwise_num_results(size_t threadCount, ExplicitType dataType) { - size_t numBits = get_explicit_type_size( dataType ) * 8; + size_t numBits = get_explicit_type_size(dataType) * 8; - return ( threadCount + numBits - 1 ) / numBits; + return (threadCount + numBits - 1) / numBits; } #pragma mark ---- and +// clang-format off const char atom_and_core[] = -" size_t numBits = sizeof( destMemory[0] ) * 8;\n" -" int whichResult = tid / numBits;\n" -" int bitIndex = tid - ( whichResult * numBits );\n" -"\n" -" oldValues[tid] = atom_and( &destMemory[whichResult], ~( 1L << bitIndex ) );\n" -; + " size_t numBits = sizeof( destMemory[0] ) * 8;\n" + " int whichResult = tid / numBits;\n" + " int bitIndex = tid - ( whichResult * numBits );\n" + "\n" + " oldValues[tid] = atom_and( &destMemory[whichResult], ~( 1L << bitIndex ) );\n"; const char atomic_and_core[] = -" size_t numBits = sizeof( destMemory[0] ) * 8;\n" -" int whichResult = tid / numBits;\n" -" int bitIndex = tid - ( whichResult * numBits );\n" -"\n" -" oldValues[tid] = atomic_and( &destMemory[whichResult], ~( 1L << bitIndex ) );\n" -; + " size_t numBits = sizeof( destMemory[0] ) * 8;\n" + " int whichResult = tid / numBits;\n" + " int bitIndex = tid - ( whichResult * numBits );\n" + "\n" + " oldValues[tid] = atomic_and( &destMemory[whichResult], ~( 1L << bitIndex ) );\n"; +// clang-format on -cl_int test_atomic_and_result_int( size_t size, cl_int *startRefValues, size_t whichResult ) +cl_int test_atomic_and_result_int(size_t size, cl_int *startRefValues, + size_t whichResult) { - size_t numThreads = ( (size_t)size + 31 ) / 32; - if( whichResult < numThreads - 1 ) - return 0; + size_t numThreads = ((size_t)size + 31) / 32; + if (whichResult < numThreads - 1) return 0; // Last item doesn't get and'ed on every bit, so we have to mask away size_t numBits = (size_t)size - whichResult * 32; cl_int bits = (cl_int)0xffffffffL; - for( size_t i = 0; i < numBits; i++ ) - bits &= ~( 1 << i ); + for (size_t i = 0; i < numBits; i++) bits &= ~(1 << i); return bits; } -cl_long test_atomic_and_result_long( size_t size, cl_long *startRefValues, size_t whichResult ) +cl_long test_atomic_and_result_long(size_t size, cl_long *startRefValues, + size_t whichResult) { - size_t numThreads = ( (size_t)size + 63 ) / 64; - if( whichResult < numThreads - 1 ) - return 0; + size_t numThreads = ((size_t)size + 63) / 64; + if (whichResult < numThreads - 1) return 0; // Last item doesn't get and'ed on every bit, so we have to mask away size_t numBits = (size_t)size - whichResult * 64; @@ -1013,14 +1284,28 @@ cl_long test_atomic_and_result_long( size_t size, cl_long *startRefValues, size_ return bits; } -int test_atomic_and(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_atomic_and(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { - TestFns set = { 0xffffffff, 0xffffffffffffffffLL, test_bitwise_num_results, - test_atomic_and_result_int, NULL, NULL, test_atomic_and_result_long, NULL, NULL }; - - if( test_atomic_function_set( deviceID, context, queue, num_elements, atom_and_core, set, true, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false ) != 0 ) + TestFns set = { 0xffffffff, + 0xffffffffffffffffLL, + test_bitwise_num_results, + test_atomic_and_result_int, + NULL, + NULL, + test_atomic_and_result_long, + NULL, + NULL }; + + if (test_atomic_function_set( + deviceID, context, queue, num_elements, atom_and_core, set, true, + /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false) + != 0) return -1; - if( test_atomic_function_set( deviceID, context, queue, num_elements, atomic_and_core, set, true, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true ) != 0 ) + if (test_atomic_function_set( + deviceID, context, queue, num_elements, atomic_and_core, set, true, + /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true) + != 0) return -1; return 0; } @@ -1028,59 +1313,68 @@ int test_atomic_and(cl_device_id deviceID, cl_context context, cl_command_queue #pragma mark ---- or +// clang-format off const char atom_or_core[] = -" size_t numBits = sizeof( destMemory[0] ) * 8;\n" -" int whichResult = tid / numBits;\n" -" int bitIndex = tid - ( whichResult * numBits );\n" -"\n" -" oldValues[tid] = atom_or( &destMemory[whichResult], ( 1L << bitIndex ) );\n" -; + " size_t numBits = sizeof( destMemory[0] ) * 8;\n" + " int whichResult = tid / numBits;\n" + " int bitIndex = tid - ( whichResult * numBits );\n" + "\n" + " oldValues[tid] = atom_or( &destMemory[whichResult], ( 1L << bitIndex ) );\n"; const char atomic_or_core[] = -" size_t numBits = sizeof( destMemory[0] ) * 8;\n" -" int whichResult = tid / numBits;\n" -" int bitIndex = tid - ( whichResult * numBits );\n" -"\n" -" oldValues[tid] = atomic_or( &destMemory[whichResult], ( 1L << bitIndex ) );\n" -; - -cl_int test_atomic_or_result_int( size_t size, cl_int *startRefValues, size_t whichResult ) + " size_t numBits = sizeof( destMemory[0] ) * 8;\n" + " int whichResult = tid / numBits;\n" + " int bitIndex = tid - ( whichResult * numBits );\n" + "\n" + " oldValues[tid] = atomic_or( &destMemory[whichResult], ( 1L << bitIndex ) );\n"; +// clang-format on + +cl_int test_atomic_or_result_int(size_t size, cl_int *startRefValues, + size_t whichResult) { - size_t numThreads = ( (size_t)size + 31 ) / 32; - if( whichResult < numThreads - 1 ) - return 0xffffffff; + size_t numThreads = ((size_t)size + 31) / 32; + if (whichResult < numThreads - 1) return 0xffffffff; // Last item doesn't get and'ed on every bit, so we have to mask away size_t numBits = (size_t)size - whichResult * 32; cl_int bits = 0; - for( size_t i = 0; i < numBits; i++ ) - bits |= ( 1 << i ); + for (size_t i = 0; i < numBits; i++) bits |= (1 << i); return bits; } -cl_long test_atomic_or_result_long( size_t size, cl_long *startRefValues, size_t whichResult ) +cl_long test_atomic_or_result_long(size_t size, cl_long *startRefValues, + size_t whichResult) { - size_t numThreads = ( (size_t)size + 63 ) / 64; - if( whichResult < numThreads - 1 ) - return 0x0ffffffffffffffffLL; + size_t numThreads = ((size_t)size + 63) / 64; + if (whichResult < numThreads - 1) return 0x0ffffffffffffffffLL; // Last item doesn't get and'ed on every bit, so we have to mask away size_t numBits = (size_t)size - whichResult * 64; cl_long bits = 0; - for( size_t i = 0; i < numBits; i++ ) - bits |= ( 1LL << i ); + for (size_t i = 0; i < numBits; i++) bits |= (1LL << i); return bits; } -int test_atomic_or(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_atomic_or(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { - TestFns set = { 0, 0LL, test_bitwise_num_results, test_atomic_or_result_int, NULL, NULL, test_atomic_or_result_long, NULL, NULL }; + TestFns set = { + 0, 0LL, test_bitwise_num_results, test_atomic_or_result_int, + NULL, NULL, test_atomic_or_result_long, NULL, + NULL + }; - if( test_atomic_function_set( deviceID, context, queue, num_elements, atom_or_core, set, true, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false ) != 0 ) + if (test_atomic_function_set( + deviceID, context, queue, num_elements, atom_or_core, set, true, + /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false) + != 0) return -1; - if( test_atomic_function_set( deviceID, context, queue, num_elements, atomic_or_core, set, true, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true ) != 0 ) + if (test_atomic_function_set( + deviceID, context, queue, num_elements, atomic_or_core, set, true, + /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true) + != 0) return -1; return 0; } @@ -1100,33 +1394,44 @@ const char atomic_xor_core[] = "\n" " oldValues[tid] = atomic_xor( &destMemory[0], 1L << bitIndex );\n"; -cl_int test_atomic_xor_result_int( size_t size, cl_int *startRefValues, size_t whichResult ) +cl_int test_atomic_xor_result_int(size_t size, cl_int *startRefValues, + size_t whichResult) { cl_int total = 0x2f08ab41; - for( size_t i = 0; i < size; i++ ) - total ^= ( 1 << ( i & 31 ) ); + for (size_t i = 0; i < size; i++) total ^= (1 << (i & 31)); return total; } -cl_long test_atomic_xor_result_long( size_t size, cl_long *startRefValues, size_t whichResult ) +cl_long test_atomic_xor_result_long(size_t size, cl_long *startRefValues, + size_t whichResult) { cl_long total = 0x2f08ab418ba0541LL; - for( size_t i = 0; i < size; i++ ) - total ^= ( 1LL << ( i & 63 ) ); + for (size_t i = 0; i < size; i++) total ^= (1LL << (i & 63)); return total; } -int test_atomic_xor(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_atomic_xor(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { - TestFns set = { 0x2f08ab41, 0x2f08ab418ba0541LL, NULL, test_atomic_xor_result_int, NULL, NULL, test_atomic_xor_result_long, NULL, NULL }; - - if( test_atomic_function_set( deviceID, context, queue, num_elements, atom_xor_core, set, true, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false ) != 0 ) + TestFns set = { 0x2f08ab41, + 0x2f08ab418ba0541LL, + NULL, + test_atomic_xor_result_int, + NULL, + NULL, + test_atomic_xor_result_long, + NULL, + NULL }; + + if (test_atomic_function_set( + deviceID, context, queue, num_elements, atom_xor_core, set, true, + /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false) + != 0) return -1; - if( test_atomic_function_set( deviceID, context, queue, num_elements, atomic_xor_core, set, true, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true ) != 0 ) + if (test_atomic_function_set( + deviceID, context, queue, num_elements, atomic_xor_core, set, true, + /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true) + != 0) return -1; return 0; } - - - - diff --git a/test_conformance/atomics/test_indexed_cases.cpp b/test_conformance/atomics/test_indexed_cases.cpp index b85e3d24..d625d8b4 100644 --- a/test_conformance/atomics/test_indexed_cases.cpp +++ b/test_conformance/atomics/test_indexed_cases.cpp @@ -1,6 +1,6 @@ // // Copyright (c) 2017 The Khronos Group Inc. -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -16,22 +16,25 @@ #include "testBase.h" #include "harness/conversions.h" -const char * atomic_index_source = -"#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n" -"// Counter keeps track of which index in counts we are using.\n" -"// We get that value, increment it, and then set that index in counts to our thread ID.\n" -"// At the end of this we should have all thread IDs in some random location in counts\n" -"// exactly once. If atom_add failed then we will write over various thread IDs and we\n" -"// will be missing some.\n" -"\n" -"__kernel void add_index_test(__global int *counter, __global int *counts) {\n" -" int tid = get_global_id(0);\n" -" \n" -" int counter_to_use = atom_add(counter, 1);\n" -" counts[counter_to_use] = tid;\n" -"}"; - -int test_atomic_add_index(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +// clang-format off +const char *atomic_index_source = + "#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n" + "// Counter keeps track of which index in counts we are using.\n" + "// We get that value, increment it, and then set that index in counts to our thread ID.\n" + "// At the end of this we should have all thread IDs in some random location in counts\n" + "// exactly once. If atom_add failed then we will write over various thread IDs and we\n" + "// will be missing some.\n" + "\n" + "__kernel void add_index_test(__global int *counter, __global int *counts) {\n" + " int tid = get_global_id(0);\n" + " \n" + " int counter_to_use = atom_add(counter, 1);\n" + " counts[counter_to_use] = tid;\n" + "}"; +// clang-format on + +int test_atomic_add_index(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { clProgramWrapper program; clKernelWrapper kernel; @@ -39,25 +42,29 @@ int test_atomic_add_index(cl_device_id deviceID, cl_context context, cl_command_ size_t numGlobalThreads, numLocalThreads; int fail = 0, succeed = 0, err; - /* Check if atomics are supported. */ - if (!is_extension_available(deviceID, "cl_khr_global_int32_base_atomics")) { - log_info("Base atomics not supported (cl_khr_global_int32_base_atomics). Skipping test.\n"); - return 0; - } + /* Check if atomics are supported. */ + if (!is_extension_available(deviceID, "cl_khr_global_int32_base_atomics")) + { + log_info("Base atomics not supported " + "(cl_khr_global_int32_base_atomics). Skipping test.\n"); + return 0; + } //===== add_index test // The index test replicates what particles does. - // It uses one memory location to keep track of the current index and then each thread - // does an atomic add to it to get its new location. The threads then write to their - // assigned location. At the end we check to make sure that each thread's ID shows up - // exactly once in the output. + // It uses one memory location to keep track of the current index and then + // each thread does an atomic add to it to get its new location. The threads + // then write to their assigned location. At the end we check to make sure + // that each thread's ID shows up exactly once in the output. numGlobalThreads = 2048; - if( create_single_kernel_helper( context, &program, &kernel, 1, &atomic_index_source, "add_index_test" ) ) + if (create_single_kernel_helper(context, &program, &kernel, 1, + &atomic_index_source, "add_index_test")) return -1; - if( get_max_common_work_group_size( context, kernel, numGlobalThreads, &numLocalThreads ) ) + if (get_max_common_work_group_size(context, kernel, numGlobalThreads, + &numLocalThreads)) return -1; log_info("Execute global_threads:%d local_threads:%d\n", @@ -72,86 +79,133 @@ int test_atomic_add_index(cl_device_id deviceID, cl_context context, cl_command_ sizeof(cl_int) * numGlobalThreads, NULL, NULL); // Reset all those locations to -1 to indciate they have not been used. - cl_int *values = (cl_int*) malloc(sizeof(cl_int)*numGlobalThreads); - if (values == NULL) { - log_error("add_index_test FAILED to allocate memory for initial values.\n"); - fail = 1; succeed = -1; - } else { + cl_int *values = (cl_int *)malloc(sizeof(cl_int) * numGlobalThreads); + if (values == NULL) + { + log_error( + "add_index_test FAILED to allocate memory for initial values.\n"); + fail = 1; + succeed = -1; + } + else + { memset(values, -1, numLocalThreads); - unsigned int i=0; - for (i=0; i= max_counts_per_bin) { - bin = random_in_range(0, number_of_bins-1, d); + for (i = 0; i < number_of_items; i++) + { + int bin = random_in_range(0, number_of_bins - 1, d); + while (l_bin_counts[bin] >= max_counts_per_bin) + { + bin = random_in_range(0, number_of_bins - 1, d); } if (bin >= number_of_bins) - log_error("add_index_bin_test internal error generating bin assignments: bin %d >= number_of_bins %d.\n", bin, number_of_bins); - if (l_bin_counts[bin]+1 > max_counts_per_bin) - log_error("add_index_bin_test internal error generating bin assignments: bin %d has more entries (%d) than max_counts_per_bin (%d).\n", bin, l_bin_counts[bin], max_counts_per_bin); + log_error("add_index_bin_test internal error generating bin " + "assignments: bin %d >= number_of_bins %d.\n", + bin, number_of_bins); + if (l_bin_counts[bin] + 1 > max_counts_per_bin) + log_error( + "add_index_bin_test internal error generating bin assignments: " + "bin %d has more entries (%d) than max_counts_per_bin (%d).\n", + bin, l_bin_counts[bin], max_counts_per_bin); l_bin_counts[bin]++; l_bin_assignments[i] = bin; - // log_info("item %d assigned to bin %d (%d items)\n", i, bin, l_bin_counts[bin]); + // log_info("item %d assigned to bin %d (%d items)\n", i, bin, + // l_bin_counts[bin]); } - err = clEnqueueWriteBuffer(queue, bin_assignments, true, 0, sizeof(cl_int)*number_of_items, l_bin_assignments, 0, NULL, NULL); - if (err) { - log_error("add_index_bin_test FAILED to set initial values for bin_assignments: %d\n", err); + err = clEnqueueWriteBuffer(queue, bin_assignments, true, 0, + sizeof(cl_int) * number_of_items, + l_bin_assignments, 0, NULL, NULL); + if (err) + { + log_error("add_index_bin_test FAILED to set initial values for " + "bin_assignments: %d\n", + err); return -1; } // Setup the kernel err = clSetKernelArg(kernel, 0, sizeof(bin_counters), &bin_counters); err |= clSetKernelArg(kernel, 1, sizeof(bins), &bins); err |= clSetKernelArg(kernel, 2, sizeof(bin_assignments), &bin_assignments); - err |= clSetKernelArg(kernel, 3, sizeof(max_counts_per_bin), &max_counts_per_bin); - if (err) { - log_error("add_index_bin_test FAILED to set kernel arguments: %d\n", err); - fail=1; succeed=-1; + err |= clSetKernelArg(kernel, 3, sizeof(max_counts_per_bin), + &max_counts_per_bin); + if (err) + { + log_error("add_index_bin_test FAILED to set kernel arguments: %d\n", + err); + fail = 1; + succeed = -1; return -1; } - err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, global_threads, local_threads, 0, NULL, NULL ); - if (err) { + err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global_threads, + local_threads, 0, NULL, NULL); + if (err) + { log_error("add_index_bin_test FAILED to execute kernel: %d\n", err); - fail=1; succeed=-1; + fail = 1; + succeed = -1; } - cl_int *final_bin_assignments = (cl_int*)malloc(sizeof(cl_int)*number_of_bins*max_counts_per_bin); - if (!final_bin_assignments) { - log_error("add_index_bin_test FAILED to allocate initial values for final_bin_assignments.\n"); + cl_int *final_bin_assignments = + (cl_int *)malloc(sizeof(cl_int) * number_of_bins * max_counts_per_bin); + if (!final_bin_assignments) + { + log_error("add_index_bin_test FAILED to allocate initial values for " + "final_bin_assignments.\n"); return -1; } - err = clEnqueueReadBuffer( queue, bins, true, 0, sizeof(cl_int)*number_of_bins*max_counts_per_bin, final_bin_assignments, 0, NULL, NULL ); - if (err) { + err = clEnqueueReadBuffer(queue, bins, true, 0, + sizeof(cl_int) * number_of_bins + * max_counts_per_bin, + final_bin_assignments, 0, NULL, NULL); + if (err) + { log_error("add_index_bin_test FAILED to read back bins: %d\n", err); - fail = 1; succeed=-1; + fail = 1; + succeed = -1; } - cl_int *final_bin_counts = (cl_int*)malloc(sizeof(cl_int)*number_of_bins); - if (!final_bin_counts) { - log_error("add_index_bin_test FAILED to allocate initial values for final_bin_counts.\n"); + cl_int *final_bin_counts = + (cl_int *)malloc(sizeof(cl_int) * number_of_bins); + if (!final_bin_counts) + { + log_error("add_index_bin_test FAILED to allocate initial values for " + "final_bin_counts.\n"); return -1; } - err = clEnqueueReadBuffer( queue, bin_counters, true, 0, sizeof(cl_int)*number_of_bins, final_bin_counts, 0, NULL, NULL ); - if (err) { - log_error("add_index_bin_test FAILED to read back bin_counters: %d\n", err); - fail = 1; succeed=-1; + err = clEnqueueReadBuffer(queue, bin_counters, true, 0, + sizeof(cl_int) * number_of_bins, final_bin_counts, + 0, NULL, NULL); + if (err) + { + log_error("add_index_bin_test FAILED to read back bin_counters: %d\n", + err); + fail = 1; + succeed = -1; } // Verification. - int errors=0; + int errors = 0; int current_bin; int search; // Print out all the contents of the bins. // for (current_bin=0; current_bin Date: Fri, 30 Sep 2022 11:41:19 +0100 Subject: [NFC] atomics: Remove set-but-unused "succeed" variables (#1517) The "succeed" variables are never read and they don't seem to serve any purpose that's not already provided by the "fail" variables. In `add_index_bin_test` the "fail" variable is also set but unused, but that may require an actual fix, so leaving that out of this commit. Signed-off-by: Sven van Haastregt Signed-off-by: Sven van Haastregt --- test_conformance/atomics/test_indexed_cases.cpp | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/test_conformance/atomics/test_indexed_cases.cpp b/test_conformance/atomics/test_indexed_cases.cpp index d625d8b4..2bba3e24 100644 --- a/test_conformance/atomics/test_indexed_cases.cpp +++ b/test_conformance/atomics/test_indexed_cases.cpp @@ -40,7 +40,7 @@ int test_atomic_add_index(cl_device_id deviceID, cl_context context, clKernelWrapper kernel; clMemWrapper counter, counters; size_t numGlobalThreads, numLocalThreads; - int fail = 0, succeed = 0, err; + int fail = 0, err; /* Check if atomics are supported. */ if (!is_extension_available(deviceID, "cl_khr_global_int32_base_atomics")) @@ -85,7 +85,6 @@ int test_atomic_add_index(cl_device_id deviceID, cl_context context, log_error( "add_index_test FAILED to allocate memory for initial values.\n"); fail = 1; - succeed = -1; } else { @@ -104,7 +103,6 @@ int test_atomic_add_index(cl_device_id deviceID, cl_context context, "add_index_test FAILED to write initial values to arrays: %d\n", err); fail = 1; - succeed = -1; } else { @@ -115,7 +113,6 @@ int test_atomic_add_index(cl_device_id deviceID, cl_context context, log_error("add_index_test FAILED to set kernel arguments: %d\n", err); fail = 1; - succeed = -1; } else { @@ -127,7 +124,6 @@ int test_atomic_add_index(cl_device_id deviceID, cl_context context, log_error("add_index_test FAILED to execute kernel: %d\n", err); fail = 1; - succeed = -1; } else { @@ -140,7 +136,6 @@ int test_atomic_add_index(cl_device_id deviceID, cl_context context, "add_index_test FAILED to read back results: %d\n", err); fail = 1; - succeed = -1; } else { @@ -161,7 +156,6 @@ int test_atomic_add_index(cl_device_id deviceID, cl_context context, "instances (%d!=1) for counter %d.\n", instances_found, looking_for); fail = 1; - succeed = -1; } } } @@ -208,7 +202,6 @@ int add_index_bin_test(size_t *global_threads, cl_command_queue queue, int max_counts_per_bin = divisor * 2; int fail = 0; - int succeed = 0; int err; clProgramWrapper program; @@ -353,7 +346,6 @@ int add_index_bin_test(size_t *global_threads, cl_command_queue queue, log_error("add_index_bin_test FAILED to set kernel arguments: %d\n", err); fail = 1; - succeed = -1; return -1; } @@ -363,7 +355,6 @@ int add_index_bin_test(size_t *global_threads, cl_command_queue queue, { log_error("add_index_bin_test FAILED to execute kernel: %d\n", err); fail = 1; - succeed = -1; } cl_int *final_bin_assignments = @@ -382,7 +373,6 @@ int add_index_bin_test(size_t *global_threads, cl_command_queue queue, { log_error("add_index_bin_test FAILED to read back bins: %d\n", err); fail = 1; - succeed = -1; } cl_int *final_bin_counts = @@ -401,7 +391,6 @@ int add_index_bin_test(size_t *global_threads, cl_command_queue queue, log_error("add_index_bin_test FAILED to read back bin_counters: %d\n", err); fail = 1; - succeed = -1; } // Verification. -- cgit v1.2.3 From 73f51ccff747607b091c556e1b4b0134423e811e Mon Sep 17 00:00:00 2001 From: Sven van Haastregt Date: Sat, 1 Oct 2022 10:14:32 +0100 Subject: math_brute_force: Fix -Wformat warnings (#1518) * math_brute_force: Fix -Wformat warnings The main sources of warnings were: * Printing of 64-bit types, which is now done using the `PRI*64` macros from to ensure portability across 32 and 64-bit builds. * Printing of `size_t` types that lacked a `z` length modifier. * Printing of values with a `z` length modifier that weren't a `size_t` type. Signed-off-by: Sven van Haastregt * [NFC] math_brute_force: clang-format after -Wformat changes Signed-off-by: Sven van Haastregt Signed-off-by: Sven van Haastregt --- test_conformance/math_brute_force/CMakeLists.txt | 2 +- .../math_brute_force/binary_double.cpp | 2 +- test_conformance/math_brute_force/binary_float.cpp | 4 +-- .../math_brute_force/binary_i_float.cpp | 4 +-- .../math_brute_force/binary_operator_double.cpp | 2 +- .../math_brute_force/binary_operator_float.cpp | 4 +-- .../binary_two_results_i_double.cpp | 35 ++++++++++++---------- .../binary_two_results_i_float.cpp | 31 ++++++++++--------- .../math_brute_force/i_unary_double.cpp | 6 ++-- .../math_brute_force/i_unary_float.cpp | 6 ++-- .../math_brute_force/macro_binary_double.cpp | 11 ++++--- .../math_brute_force/macro_binary_float.cpp | 4 +-- .../math_brute_force/macro_unary_double.cpp | 7 +++-- test_conformance/math_brute_force/main.cpp | 2 +- .../math_brute_force/ternary_double.cpp | 6 ++-- .../math_brute_force/ternary_float.cpp | 5 ++-- test_conformance/math_brute_force/unary_double.cpp | 3 +- .../math_brute_force/unary_two_results_double.cpp | 6 ++-- .../math_brute_force/unary_two_results_float.cpp | 6 ++-- .../unary_two_results_i_double.cpp | 10 ++++--- .../math_brute_force/unary_two_results_i_float.cpp | 10 ++++--- .../math_brute_force/unary_u_double.cpp | 16 +++++----- .../math_brute_force/unary_u_float.cpp | 6 ++-- 23 files changed, 111 insertions(+), 77 deletions(-) diff --git a/test_conformance/math_brute_force/CMakeLists.txt b/test_conformance/math_brute_force/CMakeLists.txt index 1db1ecdf..1c96f521 100644 --- a/test_conformance/math_brute_force/CMakeLists.txt +++ b/test_conformance/math_brute_force/CMakeLists.txt @@ -42,6 +42,6 @@ set(${MODULE_NAME}_SOURCES # math_brute_force compiles cleanly with -Wall (except for a few remaining # warnings), but other tests not (yet); so enable -Wall locally. -set_gnulike_module_compile_flags("-Wall -Wno-format -Wno-strict-aliasing -Wno-unknown-pragmas") +set_gnulike_module_compile_flags("-Wall -Wno-strict-aliasing -Wno-unknown-pragmas") include(../CMakeCommon.txt) diff --git a/test_conformance/math_brute_force/binary_double.cpp b/test_conformance/math_brute_force/binary_double.cpp index b6bb049b..f18d0b97 100644 --- a/test_conformance/math_brute_force/binary_double.cpp +++ b/test_conformance/math_brute_force/binary_double.cpp @@ -630,7 +630,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) { if (gVerboseBruteForce) { - vlog("base:%14u step:%10u scale:%10zu buf_elements:%10u ulps:%5.3f " + vlog("base:%14u step:%10u scale:%10u buf_elements:%10zu ulps:%5.3f " "ThreadCount:%2u\n", base, job->step, job->scale, buffer_elements, job->ulps, job->threadCount); diff --git a/test_conformance/math_brute_force/binary_float.cpp b/test_conformance/math_brute_force/binary_float.cpp index e85add4b..fe1491d7 100644 --- a/test_conformance/math_brute_force/binary_float.cpp +++ b/test_conformance/math_brute_force/binary_float.cpp @@ -755,7 +755,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) { vlog_error( "\nERROR: %s%s: %f ulp error at {%a (0x%x), %a " - "(0x%x)}: *%a vs. %a (0x%8.8x) at index: %d\n", + "(0x%x)}: *%a vs. %a (0x%8.8x) at index: %zu\n", name, sizeNames[k], err, s[j], ((cl_uint *)s)[j], s2[j], ((cl_uint *)s2)[j], r[j], test, ((cl_uint *)&test)[0], j); @@ -787,7 +787,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) { if (gVerboseBruteForce) { - vlog("base:%14u step:%10u scale:%10zu buf_elements:%10u ulps:%5.3f " + vlog("base:%14u step:%10u scale:%10u buf_elements:%10zu ulps:%5.3f " "ThreadCount:%2u\n", base, job->step, job->scale, buffer_elements, job->ulps, job->threadCount); diff --git a/test_conformance/math_brute_force/binary_i_float.cpp b/test_conformance/math_brute_force/binary_i_float.cpp index 2387ff06..d855f447 100644 --- a/test_conformance/math_brute_force/binary_i_float.cpp +++ b/test_conformance/math_brute_force/binary_i_float.cpp @@ -516,7 +516,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) { vlog_error( "\nERROR: %s%s: %f ulp error at {%a (0x%8.8x), %d}: " - "*%a (0x%8.8x) vs. %a (0x%8.8x) at index: %d\n", + "*%a (0x%8.8x) vs. %a (0x%8.8x) at index: %zu\n", name, sizeNames[k], err, s[j], ((uint32_t *)s)[j], s2[j], r[j], ((uint32_t *)r)[j], test, ((cl_uint *)&test)[0], j); @@ -545,7 +545,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) { if (gVerboseBruteForce) { - vlog("base:%14u step:%10u scale:%10zu buf_elements:%10u ulps:%5.3f " + vlog("base:%14u step:%10u scale:%10u buf_elements:%10zu ulps:%5.3f " "ThreadCount:%2u\n", base, job->step, job->scale, buffer_elements, job->ulps, job->threadCount); diff --git a/test_conformance/math_brute_force/binary_operator_double.cpp b/test_conformance/math_brute_force/binary_operator_double.cpp index 34ec6197..bbe5c438 100644 --- a/test_conformance/math_brute_force/binary_operator_double.cpp +++ b/test_conformance/math_brute_force/binary_operator_double.cpp @@ -601,7 +601,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) { if (gVerboseBruteForce) { - vlog("base:%14u step:%10u scale:%10zu buf_elements:%10u ulps:%5.3f " + vlog("base:%14u step:%10u scale:%10u buf_elements:%10zu ulps:%5.3f " "ThreadCount:%2u\n", base, job->step, job->scale, buffer_elements, job->ulps, job->threadCount); diff --git a/test_conformance/math_brute_force/binary_operator_float.cpp b/test_conformance/math_brute_force/binary_operator_float.cpp index 5577cffe..1a28d8d8 100644 --- a/test_conformance/math_brute_force/binary_operator_float.cpp +++ b/test_conformance/math_brute_force/binary_operator_float.cpp @@ -698,7 +698,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) if (fail) { vlog_error("\nERROR: %s%s: %f ulp error at {%a, %a}: *%a " - "vs. %a (0x%8.8x) at index: %d\n", + "vs. %a (0x%8.8x) at index: %zu\n", name, sizeNames[k], err, s[j], s2[j], r[j], test, ((cl_uint *)&test)[0], j); error = -1; @@ -726,7 +726,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) { if (gVerboseBruteForce) { - vlog("base:%14u step:%10u scale:%10zu buf_elements:%10u ulps:%5.3f " + vlog("base:%14u step:%10u scale:%10u buf_elements:%10zu ulps:%5.3f " "ThreadCount:%2u\n", base, job->step, job->scale, buffer_elements, job->ulps, job->threadCount); diff --git a/test_conformance/math_brute_force/binary_two_results_i_double.cpp b/test_conformance/math_brute_force/binary_two_results_i_double.cpp index 59a5bfe2..bbfd707b 100644 --- a/test_conformance/math_brute_force/binary_two_results_i_double.cpp +++ b/test_conformance/math_brute_force/binary_two_results_i_double.cpp @@ -19,6 +19,7 @@ #include "test_functions.h" #include "utility.h" +#include #include #include @@ -527,17 +528,20 @@ int TestFunc_DoubleI_Double_Double(const Func *f, MTdata d, bool relaxedMode) if (fail) { - vlog_error( - "\nERROR: %sD%s: {%f, %lld} ulp error at {%.13la, " - "%.13la} ({ 0x%16.16llx, 0x%16.16llx}): *{%.13la, " - "%d} ({ 0x%16.16llx, 0x%8.8x}) vs. {%.13la, %d} ({ " - "0x%16.16llx, 0x%8.8x})\n", - f->name, sizeNames[k], err, iErr, ((double *)gIn)[j], - ((double *)gIn2)[j], ((cl_ulong *)gIn)[j], - ((cl_ulong *)gIn2)[j], ((double *)gOut_Ref)[j], - ((int *)gOut_Ref2)[j], ((cl_ulong *)gOut_Ref)[j], - ((cl_uint *)gOut_Ref2)[j], test, q2[j], - ((cl_ulong *)q)[j], ((cl_uint *)q2)[j]); + vlog_error("\nERROR: %sD%s: {%f, %" PRId64 + "} ulp error at {%.13la, " + "%.13la} ({ 0x%16.16" PRIx64 ", 0x%16.16" PRIx64 + "}): *{%.13la, " + "%d} ({ 0x%16.16" PRIx64 + ", 0x%8.8x}) vs. {%.13la, %d} ({ " + "0x%16.16" PRIx64 ", 0x%8.8x})\n", + f->name, sizeNames[k], err, iErr, + ((double *)gIn)[j], ((double *)gIn2)[j], + ((cl_ulong *)gIn)[j], ((cl_ulong *)gIn2)[j], + ((double *)gOut_Ref)[j], ((int *)gOut_Ref2)[j], + ((cl_ulong *)gOut_Ref)[j], + ((cl_uint *)gOut_Ref2)[j], test, q2[j], + ((cl_ulong *)q)[j], ((cl_uint *)q2)[j]); error = -1; goto exit; } @@ -548,8 +552,9 @@ int TestFunc_DoubleI_Double_Double(const Func *f, MTdata d, bool relaxedMode) { if (gVerboseBruteForce) { - vlog("base:%14u step:%10zu bufferSize:%10zd \n", i, step, - BUFFER_SIZE); + vlog("base:%14" PRIu64 " step:%10" PRIu64 + " bufferSize:%10d \n", + i, step, BUFFER_SIZE); } else { @@ -566,8 +571,8 @@ int TestFunc_DoubleI_Double_Double(const Func *f, MTdata d, bool relaxedMode) else vlog("passed"); - vlog("\t{%8.2f, %lld} @ {%a, %a}", maxError, maxError2, maxErrorVal, - maxErrorVal2); + vlog("\t{%8.2f, %" PRId64 "} @ {%a, %a}", maxError, maxError2, + maxErrorVal, maxErrorVal2); } vlog("\n"); diff --git a/test_conformance/math_brute_force/binary_two_results_i_float.cpp b/test_conformance/math_brute_force/binary_two_results_i_float.cpp index 6c1dd3bc..07473376 100644 --- a/test_conformance/math_brute_force/binary_two_results_i_float.cpp +++ b/test_conformance/math_brute_force/binary_two_results_i_float.cpp @@ -19,6 +19,7 @@ #include "test_functions.h" #include "utility.h" +#include #include #include @@ -513,16 +514,17 @@ int TestFunc_FloatI_Float_Float(const Func *f, MTdata d, bool relaxedMode) if (fail) { - vlog_error( - "\nERROR: %s%s: {%f, %lld} ulp error at {%a, %a} " - "({0x%8.8x, 0x%8.8x}): *{%a, %d} ({0x%8.8x, " - "0x%8.8x}) vs. {%a, %d} ({0x%8.8x, 0x%8.8x})\n", - f->name, sizeNames[k], err, iErr, ((float *)gIn)[j], - ((float *)gIn2)[j], ((cl_uint *)gIn)[j], - ((cl_uint *)gIn2)[j], ((float *)gOut_Ref)[j], - ((int *)gOut_Ref2)[j], ((cl_uint *)gOut_Ref)[j], - ((cl_uint *)gOut_Ref2)[j], test, q2[j], - ((cl_uint *)&test)[0], ((cl_uint *)q2)[j]); + vlog_error("\nERROR: %s%s: {%f, %" PRId64 + "} ulp error at {%a, %a} " + "({0x%8.8x, 0x%8.8x}): *{%a, %d} ({0x%8.8x, " + "0x%8.8x}) vs. {%a, %d} ({0x%8.8x, 0x%8.8x})\n", + f->name, sizeNames[k], err, iErr, + ((float *)gIn)[j], ((float *)gIn2)[j], + ((cl_uint *)gIn)[j], ((cl_uint *)gIn2)[j], + ((float *)gOut_Ref)[j], ((int *)gOut_Ref2)[j], + ((cl_uint *)gOut_Ref)[j], + ((cl_uint *)gOut_Ref2)[j], test, q2[j], + ((cl_uint *)&test)[0], ((cl_uint *)q2)[j]); error = -1; goto exit; } @@ -533,8 +535,9 @@ int TestFunc_FloatI_Float_Float(const Func *f, MTdata d, bool relaxedMode) { if (gVerboseBruteForce) { - vlog("base:%14u step:%10zu bufferSize:%10zd \n", i, step, - BUFFER_SIZE); + vlog("base:%14" PRIu64 " step:%10" PRIu64 + " bufferSize:%10d \n", + i, step, BUFFER_SIZE); } else { @@ -551,8 +554,8 @@ int TestFunc_FloatI_Float_Float(const Func *f, MTdata d, bool relaxedMode) else vlog("passed"); - vlog("\t{%8.2f, %lld} @ {%a, %a}", maxError, maxError2, maxErrorVal, - maxErrorVal2); + vlog("\t{%8.2f, %" PRId64 "} @ {%a, %a}", maxError, maxError2, + maxErrorVal, maxErrorVal2); } vlog("\n"); diff --git a/test_conformance/math_brute_force/i_unary_double.cpp b/test_conformance/math_brute_force/i_unary_double.cpp index a05737da..0cbcf86e 100644 --- a/test_conformance/math_brute_force/i_unary_double.cpp +++ b/test_conformance/math_brute_force/i_unary_double.cpp @@ -19,6 +19,7 @@ #include "test_functions.h" #include "utility.h" +#include #include namespace { @@ -271,8 +272,9 @@ int TestFunc_Int_Double(const Func *f, MTdata d, bool relaxedMode) { if (gVerboseBruteForce) { - vlog("base:%14u step:%10zu bufferSize:%10zd \n", i, step, - BUFFER_SIZE); + vlog("base:%14" PRIu64 " step:%10" PRIu64 + " bufferSize:%10d \n", + i, step, BUFFER_SIZE); } else { diff --git a/test_conformance/math_brute_force/i_unary_float.cpp b/test_conformance/math_brute_force/i_unary_float.cpp index 13442e61..90bb1e16 100644 --- a/test_conformance/math_brute_force/i_unary_float.cpp +++ b/test_conformance/math_brute_force/i_unary_float.cpp @@ -19,6 +19,7 @@ #include "test_functions.h" #include "utility.h" +#include #include namespace { @@ -268,8 +269,9 @@ int TestFunc_Int_Float(const Func *f, MTdata d, bool relaxedMode) { if (gVerboseBruteForce) { - vlog("base:%14u step:%10zu bufferSize:%10zd \n", i, step, - BUFFER_SIZE); + vlog("base:%14" PRIu64 " step:%10" PRIu64 + " bufferSize:%10d \n", + i, step, BUFFER_SIZE); } else { diff --git a/test_conformance/math_brute_force/macro_binary_double.cpp b/test_conformance/math_brute_force/macro_binary_double.cpp index b81766bd..412f210b 100644 --- a/test_conformance/math_brute_force/macro_binary_double.cpp +++ b/test_conformance/math_brute_force/macro_binary_double.cpp @@ -19,6 +19,7 @@ #include "test_functions.h" #include "utility.h" +#include #include namespace { @@ -487,8 +488,9 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) cl_ulong err = t[j] - q[j]; if (q[j] > t[j]) err = q[j] - t[j]; - vlog_error("\nERROR: %s: %lld ulp error at {%.13la, %.13la}: *%lld " - "vs. %lld (index: %d)\n", + vlog_error("\nERROR: %s: %" PRId64 + " ulp error at {%.13la, %.13la}: *%" PRId64 " " + "vs. %" PRId64 " (index: %zu)\n", name, err, ((double *)s)[j], ((double *)s2)[j], t[j], q[j], j); error = -1; @@ -535,8 +537,9 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) cl_ulong err = -t[j] - q[j]; if (q[j] > -t[j]) err = q[j] + t[j]; - vlog_error("\nERROR: %sD%s: %lld ulp error at {%.13la, " - "%.13la}: *%lld vs. %lld (index: %d)\n", + vlog_error("\nERROR: %sD%s: %" PRId64 " ulp error at {%.13la, " + "%.13la}: *%" PRId64 " vs. %" PRId64 + " (index: %zu)\n", name, sizeNames[k], err, ((double *)s)[j], ((double *)s2)[j], -t[j], q[j], j); error = -1; diff --git a/test_conformance/math_brute_force/macro_binary_float.cpp b/test_conformance/math_brute_force/macro_binary_float.cpp index 4a3fb67d..cb915fc7 100644 --- a/test_conformance/math_brute_force/macro_binary_float.cpp +++ b/test_conformance/math_brute_force/macro_binary_float.cpp @@ -478,7 +478,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) uint32_t err = t[j] - q[j]; if (q[j] > t[j]) err = q[j] - t[j]; vlog_error("\nERROR: %s: %d ulp error at {%a, %a}: *0x%8.8x vs. " - "0x%8.8x (index: %d)\n", + "0x%8.8x (index: %zu)\n", name, err, ((float *)s)[j], ((float *)s2)[j], t[j], q[j], j); error = -1; @@ -524,7 +524,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) cl_uint err = -t[j] - q[j]; if (q[j] > -t[j]) err = q[j] + t[j]; vlog_error("\nERROR: %s%s: %d ulp error at {%a, %a}: *0x%8.8x " - "vs. 0x%8.8x (index: %d)\n", + "vs. 0x%8.8x (index: %zu)\n", name, sizeNames[k], err, ((float *)s)[j], ((float *)s2)[j], -t[j], q[j], j); error = -1; diff --git a/test_conformance/math_brute_force/macro_unary_double.cpp b/test_conformance/math_brute_force/macro_unary_double.cpp index 19cefee4..c2e7cdcc 100644 --- a/test_conformance/math_brute_force/macro_unary_double.cpp +++ b/test_conformance/math_brute_force/macro_unary_double.cpp @@ -19,6 +19,7 @@ #include "test_functions.h" #include "utility.h" +#include #include namespace { @@ -297,7 +298,8 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) cl_ulong err = t[j] - q[j]; if (q[j] > t[j]) err = q[j] - t[j]; - vlog_error("\nERROR: %sD: %zd ulp error at %.13la: *%zd vs. %zd\n", + vlog_error("\nERROR: %sD: %" PRId64 + " ulp error at %.13la: *%" PRId64 " vs. %" PRId64 "\n", name, err, ((double *)gIn)[j], t[j], q[j]); return -1; } @@ -323,7 +325,8 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) cl_ulong err = -t[j] - q[j]; if (q[j] > -t[j]) err = q[j] + t[j]; vlog_error( - "\nERROR: %sD%s: %zd ulp error at %.13la: *%zd vs. %zd\n", + "\nERROR: %sD%s: %" PRId64 " ulp error at %.13la: *%" PRId64 + " vs. %" PRId64 "\n", name, sizeNames[k], err, ((double *)gIn)[j], -t[j], q[j]); return -1; } diff --git a/test_conformance/math_brute_force/main.cpp b/test_conformance/math_brute_force/main.cpp index 59960a85..64491bd4 100644 --- a/test_conformance/math_brute_force/main.cpp +++ b/test_conformance/math_brute_force/main.cpp @@ -132,7 +132,7 @@ static int doTest(const char *name) if ((gStartTestNumber != ~0u && i < gStartTestNumber) || i > gEndTestNumber) { - vlog("Skipping function #%d\n", i); + vlog("Skipping function #%zu\n", i); return 0; } diff --git a/test_conformance/math_brute_force/ternary_double.cpp b/test_conformance/math_brute_force/ternary_double.cpp index 94fbe268..a7fa5625 100644 --- a/test_conformance/math_brute_force/ternary_double.cpp +++ b/test_conformance/math_brute_force/ternary_double.cpp @@ -19,6 +19,7 @@ #include "test_functions.h" #include "utility.h" +#include #include #define CORRECTLY_ROUNDED 0 @@ -708,8 +709,9 @@ int TestFunc_Double_Double_Double_Double(const Func *f, MTdata d, { if (gVerboseBruteForce) { - vlog("base:%14u step:%10zu bufferSize:%10zd \n", i, step, - BUFFER_SIZE); + vlog("base:%14" PRIu64 " step:%10" PRIu64 + " bufferSize:%10d \n", + i, step, BUFFER_SIZE); } else { diff --git a/test_conformance/math_brute_force/ternary_float.cpp b/test_conformance/math_brute_force/ternary_float.cpp index 762c57de..3b8c2c3b 100644 --- a/test_conformance/math_brute_force/ternary_float.cpp +++ b/test_conformance/math_brute_force/ternary_float.cpp @@ -19,6 +19,7 @@ #include "test_functions.h" #include "utility.h" +#include #include #define CORRECTLY_ROUNDED 0 @@ -843,8 +844,8 @@ int TestFunc_Float_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode) { if (gVerboseBruteForce) { - vlog("base:%14u step:%10u bufferSize:%10zd \n", i, step, - BUFFER_SIZE); + vlog("base:%14" PRIu64 " step:%10" PRIu64 " bufferSize:%10d \n", + i, step, BUFFER_SIZE); } else { diff --git a/test_conformance/math_brute_force/unary_double.cpp b/test_conformance/math_brute_force/unary_double.cpp index 3deac57c..177cfe5b 100644 --- a/test_conformance/math_brute_force/unary_double.cpp +++ b/test_conformance/math_brute_force/unary_double.cpp @@ -19,6 +19,7 @@ #include "test_functions.h" #include "utility.h" +#include #include namespace { @@ -345,7 +346,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) if (fail) { vlog_error("\nERROR: %s%s: %f ulp error at %.13la " - "(0x%16.16llx): *%.13la vs. %.13la\n", + "(0x%16.16" PRIx64 "): *%.13la vs. %.13la\n", job->f->name, sizeNames[k], err, ((cl_double *)gIn)[j], ((cl_ulong *)gIn)[j], ((cl_double *)gOut_Ref)[j], test); diff --git a/test_conformance/math_brute_force/unary_two_results_double.cpp b/test_conformance/math_brute_force/unary_two_results_double.cpp index 858b2c35..6d7c61d6 100644 --- a/test_conformance/math_brute_force/unary_two_results_double.cpp +++ b/test_conformance/math_brute_force/unary_two_results_double.cpp @@ -19,6 +19,7 @@ #include "test_functions.h" #include "utility.h" +#include #include namespace { @@ -414,8 +415,9 @@ int TestFunc_Double2_Double(const Func *f, MTdata d, bool relaxedMode) { if (gVerboseBruteForce) { - vlog("base:%14u step:%10zu bufferSize:%10zd \n", i, step, - BUFFER_SIZE); + vlog("base:%14" PRIu64 " step:%10" PRIu64 + " bufferSize:%10d \n", + i, step, BUFFER_SIZE); } else { diff --git a/test_conformance/math_brute_force/unary_two_results_float.cpp b/test_conformance/math_brute_force/unary_two_results_float.cpp index 85e5d014..42e858c4 100644 --- a/test_conformance/math_brute_force/unary_two_results_float.cpp +++ b/test_conformance/math_brute_force/unary_two_results_float.cpp @@ -19,6 +19,7 @@ #include "test_functions.h" #include "utility.h" +#include #include namespace { @@ -546,8 +547,9 @@ int TestFunc_Float2_Float(const Func *f, MTdata d, bool relaxedMode) { if (gVerboseBruteForce) { - vlog("base:%14u step:%10zu bufferSize:%10zd \n", i, step, - BUFFER_SIZE); + vlog("base:%14" PRIu64 " step:%10" PRIu64 + " bufferSize:%10d \n", + i, step, BUFFER_SIZE); } else { diff --git a/test_conformance/math_brute_force/unary_two_results_i_double.cpp b/test_conformance/math_brute_force/unary_two_results_i_double.cpp index 4cfbca9c..8b751944 100644 --- a/test_conformance/math_brute_force/unary_two_results_i_double.cpp +++ b/test_conformance/math_brute_force/unary_two_results_i_double.cpp @@ -19,6 +19,7 @@ #include "test_functions.h" #include "utility.h" +#include #include #include @@ -386,8 +387,9 @@ int TestFunc_DoubleI_Double(const Func *f, MTdata d, bool relaxedMode) { if (gVerboseBruteForce) { - vlog("base:%14u step:%10zu bufferSize:%10zd \n", i, step, - BUFFER_SIZE); + vlog("base:%14" PRIu64 " step:%10" PRIu64 + " bufferSize:%10d \n", + i, step, BUFFER_SIZE); } else { @@ -404,8 +406,8 @@ int TestFunc_DoubleI_Double(const Func *f, MTdata d, bool relaxedMode) else vlog("passed"); - vlog("\t{%8.2f, %lld} @ {%a, %a}", maxError, maxError2, maxErrorVal, - maxErrorVal2); + vlog("\t{%8.2f, %" PRId64 "} @ {%a, %a}", maxError, maxError2, + maxErrorVal, maxErrorVal2); } vlog("\n"); diff --git a/test_conformance/math_brute_force/unary_two_results_i_float.cpp b/test_conformance/math_brute_force/unary_two_results_i_float.cpp index e324ad09..54843a29 100644 --- a/test_conformance/math_brute_force/unary_two_results_i_float.cpp +++ b/test_conformance/math_brute_force/unary_two_results_i_float.cpp @@ -19,6 +19,7 @@ #include "test_functions.h" #include "utility.h" +#include #include #include @@ -384,8 +385,9 @@ int TestFunc_FloatI_Float(const Func *f, MTdata d, bool relaxedMode) { if (gVerboseBruteForce) { - vlog("base:%14u step:%10zu bufferSize:%10zd \n", i, step, - BUFFER_SIZE); + vlog("base:%14" PRIu64 " step:%10" PRIu64 + " bufferSize:%10d \n", + i, step, BUFFER_SIZE); } else { @@ -402,8 +404,8 @@ int TestFunc_FloatI_Float(const Func *f, MTdata d, bool relaxedMode) else vlog("passed"); - vlog("\t{%8.2f, %lld} @ {%a, %a}", maxError, maxError2, maxErrorVal, - maxErrorVal2); + vlog("\t{%8.2f, %" PRId64 "} @ {%a, %a}", maxError, maxError2, + maxErrorVal, maxErrorVal2); } vlog("\n"); diff --git a/test_conformance/math_brute_force/unary_u_double.cpp b/test_conformance/math_brute_force/unary_u_double.cpp index a0c6b793..9b60904a 100644 --- a/test_conformance/math_brute_force/unary_u_double.cpp +++ b/test_conformance/math_brute_force/unary_u_double.cpp @@ -19,6 +19,7 @@ #include "test_functions.h" #include "utility.h" +#include #include namespace { @@ -267,11 +268,11 @@ int TestFunc_Double_ULong(const Func *f, MTdata d, bool relaxedMode) } if (fail) { - vlog_error("\n%s%sD: %f ulp error at 0x%16.16llx: " - "*%.13la vs. %.13la\n", - f->name, sizeNames[k], err, - ((uint64_t *)gIn)[j], - ((double *)gOut_Ref)[j], test); + vlog_error( + "\n%s%sD: %f ulp error at 0x%16.16" PRIx64 ": " + "*%.13la vs. %.13la\n", + f->name, sizeNames[k], err, ((uint64_t *)gIn)[j], + ((double *)gOut_Ref)[j], test); error = -1; goto exit; } @@ -283,8 +284,9 @@ int TestFunc_Double_ULong(const Func *f, MTdata d, bool relaxedMode) { if (gVerboseBruteForce) { - vlog("base:%14u step:%10zu bufferSize:%10zd \n", i, step, - BUFFER_SIZE); + vlog("base:%14" PRIu64 " step:%10" PRIu64 + " bufferSize:%10d \n", + i, step, BUFFER_SIZE); } else { diff --git a/test_conformance/math_brute_force/unary_u_float.cpp b/test_conformance/math_brute_force/unary_u_float.cpp index ccfbc3be..b67a9bda 100644 --- a/test_conformance/math_brute_force/unary_u_float.cpp +++ b/test_conformance/math_brute_force/unary_u_float.cpp @@ -19,6 +19,7 @@ #include "test_functions.h" #include "utility.h" +#include #include namespace { @@ -285,8 +286,9 @@ int TestFunc_Float_UInt(const Func *f, MTdata d, bool relaxedMode) { if (gVerboseBruteForce) { - vlog("base:%14u step:%10zu bufferSize:%10zd \n", i, step, - BUFFER_SIZE); + vlog("base:%14" PRIu64 " step:%10" PRIu64 + " bufferSize:%10d \n", + i, step, BUFFER_SIZE); } else { -- cgit v1.2.3 From a3294d4c9542df3329fcf528429b7b1285cf4ebf Mon Sep 17 00:00:00 2001 From: victzhan <111778801+victzhan@users.noreply.github.com> Date: Mon, 3 Oct 2022 09:26:43 -0400 Subject: Add Python 3 support to run_conformance.py (#1470) --- test_conformance/run_conformance.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/test_conformance/run_conformance.py b/test_conformance/run_conformance.py index bb8f86ff..974491e1 100755 --- a/test_conformance/run_conformance.py +++ b/test_conformance/run_conformance.py @@ -16,7 +16,6 @@ import sys import subprocess import time import tempfile -import string DEBUG = 0 @@ -27,7 +26,6 @@ process_pid = 0 # to the screen while the tests are running. seconds_between_status_updates = 60 * 60 * 24 * 7 # effectively never - # Help info def write_help_info(): print("run_conformance.py test_list [CL_DEVICE_TYPE(s) to test] [partial-test-names, ...] [log=path/to/log/file/]") @@ -66,16 +64,16 @@ def get_tests(filename, devices_to_test): device_specific_match = re.search("^\s*(.+?)\s*,\s*(.+?)\s*,\s*(.+?)\s*$", line) if device_specific_match: if device_specific_match.group(1) in devices_to_test: - test_path = string.replace(device_specific_match.group(3), '/', os.sep) - test_name = string.replace(device_specific_match.group(2), '/', os.sep) + test_path = str.replace(device_specific_match.group(3), '/', os.sep) + test_name = str.replace(device_specific_match.group(2), '/', os.sep) tests.append((test_name, test_path)) else: print("Skipping " + device_specific_match.group(2) + " because " + device_specific_match.group(1) + " is not in the list of devices to test.") continue match = re.search("^\s*(.+?)\s*,\s*(.+?)\s*$", line) if match: - test_path = string.replace(match.group(2), '/', os.sep) - test_name = string.replace(match.group(1), '/', os.sep) + test_path = str.replace(match.group(2), '/', os.sep) + test_name = str.replace(match.group(1), '/', os.sep) tests.append((test_name, test_path)) return tests @@ -243,7 +241,10 @@ def run_tests(tests): # Catch an interrupt from the user write_screen_log("\nFAILED: Execution interrupted. Killing test process, but not aborting full test run.") os.kill(process_pid, 9) - answer = raw_input("Abort all tests? (y/n)") + if sys.version_info[0] < 3: + answer = raw_input("Abort all tests? (y/n)") + else: + answer = input("Abort all tests? (y/n)") if answer.find("y") != -1: write_screen_log("\nUser chose to abort all tests.") log_file.close() -- cgit v1.2.3 From 28e76e532a60dbe334bb3bcd729ef715adc8b1dc Mon Sep 17 00:00:00 2001 From: Jack Frankland <30410009+FranklandJack@users.noreply.github.com> Date: Mon, 3 Oct 2022 14:32:11 +0100 Subject: Add missing type declaration (#1520) Add a missing type declaration to OpenCL C code strings in 2D async copy tests. --- test_conformance/basic/test_async_copy2D.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test_conformance/basic/test_async_copy2D.cpp b/test_conformance/basic/test_async_copy2D.cpp index 54633a31..bf3f1552 100644 --- a/test_conformance/basic/test_async_copy2D.cpp +++ b/test_conformance/basic/test_async_copy2D.cpp @@ -53,7 +53,7 @@ __kernel void test_fn(const __global %s *src, __global %s *dst, for (int i = 0; i < lineCopiesPerWorkItem; i++) { for (int j = 0; j < numElementsPerLine; j++) { - const local_index = (get_local_id(0) * lineCopiesPerWorkItem + i) * dstStride + j; + const int local_index = (get_local_id(0) * lineCopiesPerWorkItem + i) * dstStride + j; const int global_index = (get_global_id(0) * lineCopiesPerWorkItem + i) * dstStride + j; dst[global_index] = localBuffer[local_index]; } -- cgit v1.2.3 From 18825769e5e34dae0eefeae9ba19e3ac5af9a4d8 Mon Sep 17 00:00:00 2001 From: Sven van Haastregt Date: Mon, 3 Oct 2022 22:09:05 +0100 Subject: pipes: Fix typos in skip messages (#1523) Signed-off-by: Sven van Haastregt Signed-off-by: Sven van Haastregt --- test_conformance/pipes/test_pipe_read_write.cpp | 42 ++++++++++++++++--------- test_conformance/pipes/test_pipe_subgroups.cpp | 5 ++- 2 files changed, 30 insertions(+), 17 deletions(-) diff --git a/test_conformance/pipes/test_pipe_read_write.cpp b/test_conformance/pipes/test_pipe_read_write.cpp index a502e03e..cb72e96b 100644 --- a/test_conformance/pipes/test_pipe_read_write.cpp +++ b/test_conformance/pipes/test_pipe_read_write.cpp @@ -1075,7 +1075,8 @@ int test_pipe_readwrite_half( cl_device_id deviceID, cl_context context, cl_comm if(!is_extension_available(deviceID, "cl_khr_fp16")) { - log_info("cl_khr_fp16 is not supported on this platoform. Skipping test.\n"); + log_info( + "cl_khr_fp16 is not supported on this platform. Skipping test.\n"); return CL_SUCCESS; } ptrSizes[0] = sizeof(cl_float) / 2; @@ -1256,7 +1257,8 @@ int test_pipe_readwrite_double( cl_device_id deviceID, cl_context context, cl_co //skip devices that don't support double if(!is_extension_available(deviceID, "cl_khr_fp64")) { - log_info("cl_khr_fp64 is not supported on this platoform. Skipping test.\n"); + log_info( + "cl_khr_fp64 is not supported on this platform. Skipping test.\n"); return CL_SUCCESS; } @@ -1403,7 +1405,8 @@ int test_pipe_subgroup_readwrite_int( cl_device_id deviceID, cl_context context, if(!is_extension_available(deviceID, "cl_khr_subgroups")) { - log_info("cl_khr_subgroups is not supported on this platoform. Skipping test.\n"); + log_info("cl_khr_subgroups is not supported on this platform. Skipping " + "test.\n"); return CL_SUCCESS; } return test_pipe_readwrite_int(deviceID, context, queue, num_elements); @@ -1417,7 +1420,8 @@ int test_pipe_subgroup_readwrite_uint( cl_device_id deviceID, cl_context context if(!is_extension_available(deviceID, "cl_khr_subgroups")) { - log_info("cl_khr_subgroups is not supported on this platoform. Skipping test.\n"); + log_info("cl_khr_subgroups is not supported on this platform. Skipping " + "test.\n"); return CL_SUCCESS; } return test_pipe_readwrite_uint(deviceID, context, queue, num_elements); @@ -1431,7 +1435,8 @@ int test_pipe_subgroup_readwrite_short( cl_device_id deviceID, cl_context contex if(!is_extension_available(deviceID, "cl_khr_subgroups")) { - log_info("cl_khr_subgroups is not supported on this platoform. Skipping test.\n"); + log_info("cl_khr_subgroups is not supported on this platform. Skipping " + "test.\n"); return CL_SUCCESS; } return test_pipe_readwrite_short(deviceID, context, queue, num_elements); @@ -1445,7 +1450,8 @@ int test_pipe_subgroup_readwrite_ushort( cl_device_id deviceID, cl_context conte if(!is_extension_available(deviceID, "cl_khr_subgroups")) { - log_info("cl_khr_subgroups is not supported on this platoform. Skipping test.\n"); + log_info("cl_khr_subgroups is not supported on this platform. Skipping " + "test.\n"); return CL_SUCCESS; } return test_pipe_readwrite_ushort(deviceID, context, queue, num_elements); @@ -1459,7 +1465,8 @@ int test_pipe_subgroup_readwrite_char( cl_device_id deviceID, cl_context context if(!is_extension_available(deviceID, "cl_khr_subgroups")) { - log_info("cl_khr_subgroups is not supported on this platoform. Skipping test.\n"); + log_info("cl_khr_subgroups is not supported on this platform. Skipping " + "test.\n"); return CL_SUCCESS; } return test_pipe_readwrite_char(deviceID, context, queue, num_elements); @@ -1473,7 +1480,8 @@ int test_pipe_subgroup_readwrite_uchar( cl_device_id deviceID, cl_context contex if(!is_extension_available(deviceID, "cl_khr_subgroups")) { - log_info("cl_khr_subgroups is not supported on this platoform. Skipping test.\n"); + log_info("cl_khr_subgroups is not supported on this platform. Skipping " + "test.\n"); return CL_SUCCESS; } return test_pipe_readwrite_uchar(deviceID, context, queue, num_elements); @@ -1488,7 +1496,8 @@ int test_pipe_subgroup_readwrite_float( cl_device_id deviceID, cl_context contex if(!is_extension_available(deviceID, "cl_khr_subgroups")) { - log_info("cl_khr_subgroups is not supported on this platoform. Skipping test.\n"); + log_info("cl_khr_subgroups is not supported on this platform. Skipping " + "test.\n"); return CL_SUCCESS; } return test_pipe_readwrite_float(deviceID, context, queue, num_elements); @@ -1502,7 +1511,8 @@ int test_pipe_subgroup_readwrite_half( cl_device_id deviceID, cl_context context if(!is_extension_available(deviceID, "cl_khr_subgroups")) { - log_info("cl_khr_subgroups is not supported on this platoform. Skipping test.\n"); + log_info("cl_khr_subgroups is not supported on this platform. Skipping " + "test.\n"); return CL_SUCCESS; } return test_pipe_readwrite_half(deviceID, context, queue, num_elements); @@ -1516,7 +1526,8 @@ int test_pipe_subgroup_readwrite_long( cl_device_id deviceID, cl_context context if(!is_extension_available(deviceID, "cl_khr_subgroups")) { - log_info("cl_khr_subgroups is not supported on this platoform. Skipping test.\n"); + log_info("cl_khr_subgroups is not supported on this platform. Skipping " + "test.\n"); return CL_SUCCESS; } return test_pipe_readwrite_long(deviceID, context, queue, num_elements); @@ -1530,7 +1541,8 @@ int test_pipe_subgroup_readwrite_ulong( cl_device_id deviceID, cl_context contex if(!is_extension_available(deviceID, "cl_khr_subgroups")) { - log_info("cl_khr_subgroups is not supported on this platoform. Skipping test.\n"); + log_info("cl_khr_subgroups is not supported on this platform. Skipping " + "test.\n"); return CL_SUCCESS; } return test_pipe_readwrite_ulong(deviceID, context, queue, num_elements); @@ -1544,7 +1556,8 @@ int test_pipe_subgroup_readwrite_double( cl_device_id deviceID, cl_context conte if(!is_extension_available(deviceID, "cl_khr_subgroups")) { - log_info("cl_khr_subgroups is not supported on this platoform. Skipping test.\n"); + log_info("cl_khr_subgroups is not supported on this platform. Skipping " + "test.\n"); return CL_SUCCESS; } return test_pipe_readwrite_double(deviceID, context, queue, num_elements); @@ -1554,7 +1567,8 @@ int test_pipe_subgroup_readwrite_struct( cl_device_id deviceID, cl_context conte { if(!is_extension_available(deviceID, "cl_khr_subgroups")) { - log_info("cl_khr_subgroups is not supported on this platoform. Skipping test.\n"); + log_info("cl_khr_subgroups is not supported on this platform. Skipping " + "test.\n"); return CL_SUCCESS; } const char *kernelNames[] = {"test_pipe_subgroup_write_struct","test_pipe_subgroup_read_struct"}; diff --git a/test_conformance/pipes/test_pipe_subgroups.cpp b/test_conformance/pipes/test_pipe_subgroups.cpp index b3e17183..8e2f6e57 100644 --- a/test_conformance/pipes/test_pipe_subgroups.cpp +++ b/test_conformance/pipes/test_pipe_subgroups.cpp @@ -114,9 +114,8 @@ int test_pipe_subgroups_divergence(cl_device_id deviceID, cl_context context, cl if (!is_extension_available(deviceID, "cl_khr_subgroups")) { - log_info( - "cl_khr_subgroups is not supported on this platoform. Skipping " - "test.\n"); + log_info("cl_khr_subgroups is not supported on this platform. Skipping " + "test.\n"); return CL_SUCCESS; } -- cgit v1.2.3 From 30cc3db4ecdaccd0b5367ca3da2bda013fc6b81b Mon Sep 17 00:00:00 2001 From: Sven van Haastregt Date: Mon, 3 Oct 2022 22:09:28 +0100 Subject: atomics: Fix -Wformat warnings (#1519) The main sources of warnings were: * Printing of `i` which is a `size_t` requiring the `%zu` specifier. * Printing of `cl_long` which is now done using the `PRId64` macro to ensure portability across 32 and 64-bit builds. Signed-off-by: Sven van Haastregt Signed-off-by: Sven van Haastregt --- test_conformance/atomics/test_atomics.cpp | 37 ++++++++++++++++++------------- 1 file changed, 21 insertions(+), 16 deletions(-) diff --git a/test_conformance/atomics/test_atomics.cpp b/test_conformance/atomics/test_atomics.cpp index 31d08500..caa4b78f 100644 --- a/test_conformance/atomics/test_atomics.cpp +++ b/test_conformance/atomics/test_atomics.cpp @@ -19,6 +19,8 @@ #include #endif +#include + #define INT_TEST_VALUE 402258822 #define LONG_TEST_VALUE 515154531254381446LL @@ -359,7 +361,7 @@ int test_atomic_function(cl_device_id deviceID, cl_context context, if (typeSize == 4) { cl_int *outValue = (cl_int *)(destItems + i * typeSize); - log_error("ERROR: Result %ld from kernel does not " + log_error("ERROR: Result %zu from kernel does not " "validate! (should be %d, was %d)\n", i, intVal, *outValue); cl_int *startRefs = (cl_int *)startRefValues; @@ -367,27 +369,28 @@ int test_atomic_function(cl_device_id deviceID, cl_context context, for (i = 0; i < threadSize; i++) { if (startRefs != NULL) - log_info(" --- %ld - %d --- %d\n", i, startRefs[i], + log_info(" --- %zu - %d --- %d\n", i, startRefs[i], refs[i]); else - log_info(" --- %ld --- %d\n", i, refs[i]); + log_info(" --- %zu --- %d\n", i, refs[i]); } } else { cl_long *outValue = (cl_long *)(destItems + i * typeSize); - log_error("ERROR: Result %ld from kernel does not " - "validate! (should be %lld, was %lld)\n", + log_error("ERROR: Result %zu from kernel does not " + "validate! (should be %" PRId64 ", was %" PRId64 + ")\n", i, longVal, *outValue); cl_long *startRefs = (cl_long *)startRefValues; cl_long *refs = (cl_long *)refValues; for (i = 0; i < threadSize; i++) { if (startRefs != NULL) - log_info(" --- %ld - %lld --- %lld\n", i, - startRefs[i], refs[i]); + log_info(" --- %zu - %" PRId64 " --- %" PRId64 "\n", + i, startRefs[i], refs[i]); else - log_info(" --- %ld --- %lld\n", i, refs[i]); + log_info(" --- %zu --- %" PRId64 "\n", i, refs[i]); } } return -1; @@ -476,7 +479,8 @@ int test_atomic_function(cl_device_id deviceID, cl_context context, cl_long *r = (cl_long *)refValues; log_error("ERROR: atomic function operated correctly but did NOT " "return correct 'old' value " - " (should have been %lld, returned %lld)!\n", + " (should have been %" PRId64 ", returned %" PRId64 + ")!\n", *s, *r); } return -1; @@ -673,7 +677,7 @@ bool test_atomic_xchg_verify_int(size_t size, cl_int *refValues, if (refValues[i] < 0 || (size_t)refValues[i] >= size) { log_error( - "ERROR: Reference value %ld outside of valid range! (%d)\n", i, + "ERROR: Reference value %zu outside of valid range! (%d)\n", i, refValues[i]); return false; } @@ -702,7 +706,7 @@ bool test_atomic_xchg_verify_int(size_t size, cl_int *refValues, { if (valids[i] != 1) { - log_error("ERROR: Reference value %ld did not occur " + log_error("ERROR: Reference value %zu did not occur " "once-and-only-once (occurred %d)\n", i, valids[i]); for (size_t j = 0; j < size; j++) @@ -738,7 +742,8 @@ bool test_atomic_xchg_verify_long(size_t size, cl_long *refValues, if (refValues[i] < 0 || (size_t)refValues[i] >= size) { log_error( - "ERROR: Reference value %ld outside of valid range! (%lld)\n", + "ERROR: Reference value %zu outside of valid range! (%" PRId64 + ")\n", i, refValues[i]); return false; } @@ -749,7 +754,7 @@ bool test_atomic_xchg_verify_long(size_t size, cl_long *refValues, executed, because that value should be the final value outputted */ if (valids[finalValue] > 0) { - log_error("ERROR: Final value %lld was also in ref list!\n", + log_error("ERROR: Final value %" PRId64 " was also in ref list!\n", finalValue); return false; } @@ -768,7 +773,7 @@ bool test_atomic_xchg_verify_long(size_t size, cl_long *refValues, { if (valids[i] != 1) { - log_error("ERROR: Reference value %ld did not occur " + log_error("ERROR: Reference value %zu did not occur " "once-and-only-once (occurred %d)\n", i, valids[i]); for (size_t j = 0; j < size; j++) @@ -805,7 +810,7 @@ bool test_atomic_xchg_verify_float(size_t size, cl_float *refValues, if (refValues[i] < 0 || (size_t)refValues[i] >= size) { log_error( - "ERROR: Reference value %ld outside of valid range! (%a)\n", i, + "ERROR: Reference value %zu outside of valid range! (%a)\n", i, refValues[i]); return false; } @@ -834,7 +839,7 @@ bool test_atomic_xchg_verify_float(size_t size, cl_float *refValues, { if (valids[i] != 1) { - log_error("ERROR: Reference value %ld did not occur " + log_error("ERROR: Reference value %zu did not occur " "once-and-only-once (occurred %d)\n", i, valids[i]); for (size_t j = 0; j < size; j++) -- cgit v1.2.3 From dbd33bc9cfd2ace62445a812a6aabb901c2f7e74 Mon Sep 17 00:00:00 2001 From: Nikhil Joshi Date: Tue, 4 Oct 2022 21:30:03 +0530 Subject: External sharing new updates (#1482) * Fix enqueue_flags test to use correct barrier type. Currently, enqueue_flags test uses CLK_LOCAL_MEM_FENCE. Use CLK_GLOBAL_MEM_FENCE instead as all threads across work-groups need to wait here. * Add check for support for Read-Wrie images Read-Write images have required OpenCL 2.x. Read-Write image tests are already being skipped for 1.x devices. With OpenCL 3.0, read-write images being optional, the tests should be run or skipped depending on the implementation support. Add a check to decide if Read-Write images are supported or required to be supported depending on OpenCL version and decide if the tests should be run on skipped. Fixes issue #894 * Fix formatting in case of Read-Write image checks. Fix formatting in case of Read-write image checks. Also, combine two ifs into one in case of kerne_read_write tests * Fix some more formatting for RW-image checks Remove unnecessary spaces at various places. Also, fix lengthy lines. * Fix malloc-size calculation in test imagedim unsigned char size is silently assumed to be 1 in imagedim test of test_basic. Pass sizeof(type) in malloc size calculation. Also, change loop variable from signed to unsigned. Add checks for null pointer for malloced memory. * Initial CTS for external sharing extensions Initial set of tests for below extensions with Vulkan as producer 1. cl_khr_external_memory 2. cl_khr_external_memory_win32 3. cl_khr_external_memory_opaque_fd 4. cl_khr_external_semaphore 5. cl_khr_external_semaphore_win32 6. cl_khr_external_semaphore_opaque_fd * Updates to external sharing CTS Updates to external sharing CTS 1. Fix some build issues to remove unnecessary, non-existent files 2. Add new tests for platform and device queries. 3. Some added checks for VK Support. * Update CTS build script for Vulkan Headers Update CTS build to clone Vulkan Headers repo and pass it to CTS build in preparation for external memory and semaphore tests * Fix Vulkan header path Fix Vulkan header include path. * Add Vulkan loader dependency Vulkan loader is required to build test_vulkan of OpenCL-CTS. Clone and build Vulkan loader as prerequisite to OpenCL-CTS. * Fix Vulkan loader path in test_vulkan Remove arch/os suffix in Vulkan loader path to match vulkan loader repo build. * Fix warnings around getHandle API. Return type of getHandle is defined differently based on win or linux builds. Use appropriate guards when using API at other places. While at it remove duplicate definition of ARRAY_SIZE. * Use ARRAY_SIZE in harness. Use already defined ARRAY_SIZE macro from test_harness. * Fix build issues for test_vulkan Fix build issues for test_vulkan 1. Add cl_ext.h in common files 2. Replace cl_mem_properties_khr with cl_mem_properties 3. Replace cl_external_mem_handle_type_khr with cl_external_memory_handle_type_khr 4. Type-cast malloc as required. * Fix code formatting. Fix code formatting to get CTS CI builds clean. * Fix formatting fixes part-2 Another set of formatting fixes. * Fix code formatting part-3 Some more code formatting fixes. * Fix code formatting issues part-4 More code formatting fixes. * Formatting fixes part-5 Some more formatting fixes * Fix formatting part-6 More formatting fixes continued. * Code formatting fixes part-7 Code formatting fixes for image * Code formatting fixes part-8 Fixes for platform and device query tests. * Code formatting fixes part-9 More formatting fixes for vulkan_wrapper * Code formatting fixes part-10 More fixes to wrapper header * Code formatting fixes part-11 Formatting fixes for api_list * Code formatting fixes part-12 Formatting fixes for api_list_map. * Code formatting changes part-13 Code formatting changes for utility. * Code formatting fixes part-15 Formatting fixes for wrapper. * Misc Code formatting fixes Some more misc code formatting fixes. * Fix build breaks due to code formatting Fix build issues arised with recent code formatting issues. * Fix presubmit script after merge Fix presubmit script after merge conflicts. * Fix Vulkan loader build in presubmit script. Use cmake ninja and appropriate toolchain for Vulkan loader dependency to fix linking issue on arm/aarch64. * Use static array sizes Use static array sizes to fix windows builds. * Some left-out formatting fixes. Fix remaining formatting issues. * Fix harness header path Fix harness header path While at it, remove Misc and test pragma. * Add/Fix license information Add Khronos License info for test_vulkan. Replace Apple license with Khronos as applicable. * Fix headers for Mac OSX builds. Use appropriate headers for Mac OSX builds * Fix Mac OSX builds. Use appropriate headers for Mac OSX builds. Also, fix some build issues due to type-casting. * Fix new code formatting issues Fix new code formatting issues with recent MacOS fixes. * Add back missing case statement Add back missing case statement that was accidentally removed. * Disable USE_GAS for Vulkan Loader build. Disable USE_GAS for Vulkan Loader build to fix aarch64 build. * Fixes to OpenCL external sharing tests Fix clReleaseSemaphore() API. Fix copyright year. Some other minor fixes. * Improvements to OpenCL external sharing CTS Use SPIR-V shaders instead of NV extension path from GLSL to Vulkan shaders. Fixes for lower end GPUs to use limited memory. Update copy-right year at some more places. * Fix new code formatting issues. Fix code formatting issues with recent changes for external sharing tests. * More formatting fixes. More formatting fixes for recent updates to external sharing tests. * Final code formatting fixes. Minor formatting fixes to get format checks clean. --- test_conformance/vulkan/main.cpp | 4 +- test_conformance/vulkan/shaders/buffer.comp | 28 ++++ test_conformance/vulkan/shaders/buffer.spv | Bin 0 -> 2168 bytes test_conformance/vulkan/shaders/image2D.comp | 31 +++++ test_conformance/vulkan/shaders/image2D_r16i.spv | Bin 0 -> 3264 bytes test_conformance/vulkan/shaders/image2D_r16ui.spv | Bin 0 -> 3264 bytes test_conformance/vulkan/shaders/image2D_r32f.spv | Bin 0 -> 3268 bytes test_conformance/vulkan/shaders/image2D_r32i.spv | Bin 0 -> 3256 bytes test_conformance/vulkan/shaders/image2D_r32ui.spv | Bin 0 -> 3256 bytes test_conformance/vulkan/shaders/image2D_r8i.spv | Bin 0 -> 3264 bytes test_conformance/vulkan/shaders/image2D_r8ui.spv | Bin 0 -> 3264 bytes test_conformance/vulkan/shaders/image2D_rg16i.spv | Bin 0 -> 3264 bytes test_conformance/vulkan/shaders/image2D_rg16ui.spv | Bin 0 -> 3264 bytes test_conformance/vulkan/shaders/image2D_rg32f.spv | Bin 0 -> 3276 bytes test_conformance/vulkan/shaders/image2D_rg32i.spv | Bin 0 -> 3264 bytes test_conformance/vulkan/shaders/image2D_rg32ui.spv | Bin 0 -> 3264 bytes test_conformance/vulkan/shaders/image2D_rg8i.spv | Bin 0 -> 3264 bytes test_conformance/vulkan/shaders/image2D_rg8ui.spv | Bin 0 -> 3264 bytes .../vulkan/shaders/image2D_rgba16i.spv | Bin 0 -> 3256 bytes .../vulkan/shaders/image2D_rgba16ui.spv | Bin 0 -> 3256 bytes .../vulkan/shaders/image2D_rgba32f.spv | Bin 0 -> 3268 bytes .../vulkan/shaders/image2D_rgba32i.spv | Bin 0 -> 3256 bytes .../vulkan/shaders/image2D_rgba32ui.spv | Bin 0 -> 3256 bytes test_conformance/vulkan/shaders/image2D_rgba8i.spv | Bin 0 -> 3256 bytes .../vulkan/shaders/image2D_rgba8ui.spv | Bin 0 -> 3256 bytes .../vulkan/test_vulkan_api_consistency.cpp | 14 +- .../vulkan/test_vulkan_interop_buffer.cpp | 36 +---- .../vulkan/test_vulkan_interop_image.cpp | 146 +++++++-------------- .../opencl_vulkan_wrapper.cpp | 51 +++++-- .../opencl_vulkan_wrapper.hpp | 6 +- .../vulkan_interop_common/vulkan_list_map.hpp | 7 +- .../vulkan_interop_common/vulkan_utility.cpp | 105 ++++++++------- .../vulkan_interop_common/vulkan_utility.hpp | 1 + .../vulkan_interop_common/vulkan_wrapper.cpp | 14 +- .../vulkan_interop_common/vulkan_wrapper.hpp | 3 +- 35 files changed, 232 insertions(+), 214 deletions(-) create mode 100644 test_conformance/vulkan/shaders/buffer.comp create mode 100644 test_conformance/vulkan/shaders/buffer.spv create mode 100644 test_conformance/vulkan/shaders/image2D.comp create mode 100644 test_conformance/vulkan/shaders/image2D_r16i.spv create mode 100644 test_conformance/vulkan/shaders/image2D_r16ui.spv create mode 100644 test_conformance/vulkan/shaders/image2D_r32f.spv create mode 100644 test_conformance/vulkan/shaders/image2D_r32i.spv create mode 100644 test_conformance/vulkan/shaders/image2D_r32ui.spv create mode 100644 test_conformance/vulkan/shaders/image2D_r8i.spv create mode 100644 test_conformance/vulkan/shaders/image2D_r8ui.spv create mode 100644 test_conformance/vulkan/shaders/image2D_rg16i.spv create mode 100644 test_conformance/vulkan/shaders/image2D_rg16ui.spv create mode 100644 test_conformance/vulkan/shaders/image2D_rg32f.spv create mode 100644 test_conformance/vulkan/shaders/image2D_rg32i.spv create mode 100644 test_conformance/vulkan/shaders/image2D_rg32ui.spv create mode 100644 test_conformance/vulkan/shaders/image2D_rg8i.spv create mode 100644 test_conformance/vulkan/shaders/image2D_rg8ui.spv create mode 100644 test_conformance/vulkan/shaders/image2D_rgba16i.spv create mode 100644 test_conformance/vulkan/shaders/image2D_rgba16ui.spv create mode 100644 test_conformance/vulkan/shaders/image2D_rgba32f.spv create mode 100644 test_conformance/vulkan/shaders/image2D_rgba32i.spv create mode 100644 test_conformance/vulkan/shaders/image2D_rgba32ui.spv create mode 100644 test_conformance/vulkan/shaders/image2D_rgba8i.spv create mode 100644 test_conformance/vulkan/shaders/image2D_rgba8ui.spv diff --git a/test_conformance/vulkan/main.cpp b/test_conformance/vulkan/main.cpp index 6cbde5cc..2eeb0c36 100644 --- a/test_conformance/vulkan/main.cpp +++ b/test_conformance/vulkan/main.cpp @@ -134,7 +134,6 @@ cl_device_id *devices; const size_t bufsize = BUFFERSIZE; char buf[BUFFERSIZE]; cl_uchar uuid[CL_UUID_SIZE_KHR]; -VulkanDevice vkDevice; unsigned int numCQ; bool multiImport; bool multiCtx; @@ -220,9 +219,12 @@ int main(int argc, const char *argv[]) if (!checkVkSupport()) { log_info("Vulkan supported GPU not found \n"); + log_info("TEST SKIPPED \n"); return 0; } + VulkanDevice vkDevice; + cl_device_type requestedDeviceType = CL_DEVICE_TYPE_GPU; char *force_cpu = getenv("CL_DEVICE_TYPE"); if (force_cpu != NULL) diff --git a/test_conformance/vulkan/shaders/buffer.comp b/test_conformance/vulkan/shaders/buffer.comp new file mode 100644 index 00000000..d8756f92 --- /dev/null +++ b/test_conformance/vulkan/shaders/buffer.comp @@ -0,0 +1,28 @@ +#version 450 +#extension GL_ARB_separate_shader_objects : enable +#extension GL_EXT_shader_explicit_arithmetic_types_int8 : enable +#extension GL_EXT_shader_explicit_arithmetic_types_int32 : enable + +#define MAX_BUFFERS 5 + +layout(binding = 0) buffer Params +{ + uint32_t numBuffers; + uint32_t bufferSize; + uint32_t interBufferOffset; +}; +layout(binding = 1) buffer Buffer +{ + uint8_t ptr[]; +} bufferPtrList[MAX_BUFFERS]; +layout(local_size_x = 512) in; +void main() { + for (uint32_t bufIdx = 0; bufIdx < numBuffers; bufIdx++) { + uint32_t ptrIdx = gl_GlobalInvocationID.x; + uint32_t limit = bufferSize; + while (ptrIdx < limit) { + bufferPtrList[bufIdx].ptr[ptrIdx]++; + ptrIdx += (gl_NumWorkGroups.x * gl_WorkGroupSize.x); + } + } +} \ No newline at end of file diff --git a/test_conformance/vulkan/shaders/buffer.spv b/test_conformance/vulkan/shaders/buffer.spv new file mode 100644 index 00000000..685523ba Binary files /dev/null and b/test_conformance/vulkan/shaders/buffer.spv differ diff --git a/test_conformance/vulkan/shaders/image2D.comp b/test_conformance/vulkan/shaders/image2D.comp new file mode 100644 index 00000000..42fa2f73 --- /dev/null +++ b/test_conformance/vulkan/shaders/image2D.comp @@ -0,0 +1,31 @@ +#version 450 +#extension GL_ARB_separate_shader_objects : enable +#extension GL_EXT_shader_explicit_arithmetic_types_int32 : enable + +#define MAX_2D_IMAGES 5 +#define MAX_2D_IMAGE_MIP_LEVELS 11 +#define MAX_2D_IMAGE_DESCRIPTORS MAX_2D_IMAGES * MAX_2D_IMAGE_MIP_LEVELS + +layout(binding = 0) buffer Params +{ + uint32_t numImage2DDescriptors; +}; +layout(binding = 1, rgba32f ) uniform image2D image2DList[ MAX_2D_IMAGE_DESCRIPTORS ]; +layout(local_size_x = 32, local_size_y = 32) in; +void main() { + uvec3 numThreads = gl_NumWorkGroups * gl_WorkGroupSize; + for (uint32_t image2DIdx = 0; image2DIdx < numImage2DDescriptors; image2DIdx++) { + ivec2 imageDim = imageSize(image2DList[image2DIdx]); + uint32_t heightBy2 = imageDim.y / 2; + for (uint32_t row = gl_GlobalInvocationID.y; row < heightBy2; row += numThreads.y) { + for (uint32_t col = gl_GlobalInvocationID.x; col < imageDim.x; col += numThreads.x) { + ivec2 coordsA = ivec2(col, row); + ivec2 coordsB = ivec2(col, imageDim.y - row - 1); + vec4 dataA = imageLoad(image2DList[image2DIdx], coordsA); + vec4 dataB = imageLoad(image2DList[image2DIdx], coordsB); + imageStore(image2DList[image2DIdx], coordsA, dataB); + imageStore(image2DList[image2DIdx], coordsB, dataA); + } + } + } +} \ No newline at end of file diff --git a/test_conformance/vulkan/shaders/image2D_r16i.spv b/test_conformance/vulkan/shaders/image2D_r16i.spv new file mode 100644 index 00000000..00c5c283 Binary files /dev/null and b/test_conformance/vulkan/shaders/image2D_r16i.spv differ diff --git a/test_conformance/vulkan/shaders/image2D_r16ui.spv b/test_conformance/vulkan/shaders/image2D_r16ui.spv new file mode 100644 index 00000000..87514d9f Binary files /dev/null and b/test_conformance/vulkan/shaders/image2D_r16ui.spv differ diff --git a/test_conformance/vulkan/shaders/image2D_r32f.spv b/test_conformance/vulkan/shaders/image2D_r32f.spv new file mode 100644 index 00000000..e82c9c19 Binary files /dev/null and b/test_conformance/vulkan/shaders/image2D_r32f.spv differ diff --git a/test_conformance/vulkan/shaders/image2D_r32i.spv b/test_conformance/vulkan/shaders/image2D_r32i.spv new file mode 100644 index 00000000..7ea8d26f Binary files /dev/null and b/test_conformance/vulkan/shaders/image2D_r32i.spv differ diff --git a/test_conformance/vulkan/shaders/image2D_r32ui.spv b/test_conformance/vulkan/shaders/image2D_r32ui.spv new file mode 100644 index 00000000..dbcdbc5f Binary files /dev/null and b/test_conformance/vulkan/shaders/image2D_r32ui.spv differ diff --git a/test_conformance/vulkan/shaders/image2D_r8i.spv b/test_conformance/vulkan/shaders/image2D_r8i.spv new file mode 100644 index 00000000..1a641475 Binary files /dev/null and b/test_conformance/vulkan/shaders/image2D_r8i.spv differ diff --git a/test_conformance/vulkan/shaders/image2D_r8ui.spv b/test_conformance/vulkan/shaders/image2D_r8ui.spv new file mode 100644 index 00000000..a90ccf98 Binary files /dev/null and b/test_conformance/vulkan/shaders/image2D_r8ui.spv differ diff --git a/test_conformance/vulkan/shaders/image2D_rg16i.spv b/test_conformance/vulkan/shaders/image2D_rg16i.spv new file mode 100644 index 00000000..07996173 Binary files /dev/null and b/test_conformance/vulkan/shaders/image2D_rg16i.spv differ diff --git a/test_conformance/vulkan/shaders/image2D_rg16ui.spv b/test_conformance/vulkan/shaders/image2D_rg16ui.spv new file mode 100644 index 00000000..f73e096b Binary files /dev/null and b/test_conformance/vulkan/shaders/image2D_rg16ui.spv differ diff --git a/test_conformance/vulkan/shaders/image2D_rg32f.spv b/test_conformance/vulkan/shaders/image2D_rg32f.spv new file mode 100644 index 00000000..1489660e Binary files /dev/null and b/test_conformance/vulkan/shaders/image2D_rg32f.spv differ diff --git a/test_conformance/vulkan/shaders/image2D_rg32i.spv b/test_conformance/vulkan/shaders/image2D_rg32i.spv new file mode 100644 index 00000000..b7d302f4 Binary files /dev/null and b/test_conformance/vulkan/shaders/image2D_rg32i.spv differ diff --git a/test_conformance/vulkan/shaders/image2D_rg32ui.spv b/test_conformance/vulkan/shaders/image2D_rg32ui.spv new file mode 100644 index 00000000..6cf2f1b8 Binary files /dev/null and b/test_conformance/vulkan/shaders/image2D_rg32ui.spv differ diff --git a/test_conformance/vulkan/shaders/image2D_rg8i.spv b/test_conformance/vulkan/shaders/image2D_rg8i.spv new file mode 100644 index 00000000..a71b9bf0 Binary files /dev/null and b/test_conformance/vulkan/shaders/image2D_rg8i.spv differ diff --git a/test_conformance/vulkan/shaders/image2D_rg8ui.spv b/test_conformance/vulkan/shaders/image2D_rg8ui.spv new file mode 100644 index 00000000..2aca9290 Binary files /dev/null and b/test_conformance/vulkan/shaders/image2D_rg8ui.spv differ diff --git a/test_conformance/vulkan/shaders/image2D_rgba16i.spv b/test_conformance/vulkan/shaders/image2D_rgba16i.spv new file mode 100644 index 00000000..0cb95dfd Binary files /dev/null and b/test_conformance/vulkan/shaders/image2D_rgba16i.spv differ diff --git a/test_conformance/vulkan/shaders/image2D_rgba16ui.spv b/test_conformance/vulkan/shaders/image2D_rgba16ui.spv new file mode 100644 index 00000000..84c3d3db Binary files /dev/null and b/test_conformance/vulkan/shaders/image2D_rgba16ui.spv differ diff --git a/test_conformance/vulkan/shaders/image2D_rgba32f.spv b/test_conformance/vulkan/shaders/image2D_rgba32f.spv new file mode 100644 index 00000000..35136c58 Binary files /dev/null and b/test_conformance/vulkan/shaders/image2D_rgba32f.spv differ diff --git a/test_conformance/vulkan/shaders/image2D_rgba32i.spv b/test_conformance/vulkan/shaders/image2D_rgba32i.spv new file mode 100644 index 00000000..4d1ae581 Binary files /dev/null and b/test_conformance/vulkan/shaders/image2D_rgba32i.spv differ diff --git a/test_conformance/vulkan/shaders/image2D_rgba32ui.spv b/test_conformance/vulkan/shaders/image2D_rgba32ui.spv new file mode 100644 index 00000000..bed86f0c Binary files /dev/null and b/test_conformance/vulkan/shaders/image2D_rgba32ui.spv differ diff --git a/test_conformance/vulkan/shaders/image2D_rgba8i.spv b/test_conformance/vulkan/shaders/image2D_rgba8i.spv new file mode 100644 index 00000000..edf8c58c Binary files /dev/null and b/test_conformance/vulkan/shaders/image2D_rgba8i.spv differ diff --git a/test_conformance/vulkan/shaders/image2D_rgba8ui.spv b/test_conformance/vulkan/shaders/image2D_rgba8ui.spv new file mode 100644 index 00000000..bb9a770c Binary files /dev/null and b/test_conformance/vulkan/shaders/image2D_rgba8ui.spv differ diff --git a/test_conformance/vulkan/test_vulkan_api_consistency.cpp b/test_conformance/vulkan/test_vulkan_api_consistency.cpp index 2987418f..f22ac319 100644 --- a/test_conformance/vulkan/test_vulkan_api_consistency.cpp +++ b/test_conformance/vulkan/test_vulkan_api_consistency.cpp @@ -238,7 +238,7 @@ int test_consistency_external_image(cl_device_id deviceID, cl_context _context, const VulkanMemoryTypeList& memoryTypeList = vkImage2D->getMemoryTypeList(); uint64_t totalImageMemSize = vkImage2D->getSize(); - log_info("Memory type index: %d\n", (uint32_t)memoryTypeList[0]); + log_info("Memory type index: %lu\n", (uint32_t)memoryTypeList[0]); log_info("Memory type property: %d\n", memoryTypeList[0].getMemoryTypeProperty()); log_info("Image size : %d\n", totalImageMemSize); @@ -552,17 +552,17 @@ int test_consistency_external_semaphore(cl_device_id deviceID, // Pass invalid object to release call - errNum = clReleaseSemaphoreObjectKHRptr(NULL); + errNum = clReleaseSemaphoreKHRptr(NULL); test_failure_error(errNum, CL_INVALID_VALUE, - "clReleaseSemaphoreObjectKHRptr fails with " + "clReleaseSemaphoreKHRptr fails with " "CL_INVALID_VALUE when NULL semaphore object is passed"); // Release both semaphore objects - errNum = clReleaseSemaphoreObjectKHRptr(clVk2Clsemaphore); - test_error(errNum, "clReleaseSemaphoreObjectKHRptr failed"); + errNum = clReleaseSemaphoreKHRptr(clVk2Clsemaphore); + test_error(errNum, "clReleaseSemaphoreKHRptr failed"); - errNum = clReleaseSemaphoreObjectKHRptr(clCl2Vksemaphore); - test_error(errNum, "clReleaseSemaphoreObjectKHRptr failed"); + errNum = clReleaseSemaphoreKHRptr(clCl2Vksemaphore); + test_error(errNum, "clReleaseSemaphoreKHRptr failed"); return TEST_PASS; } diff --git a/test_conformance/vulkan/test_vulkan_interop_buffer.cpp b/test_conformance/vulkan/test_vulkan_interop_buffer.cpp index 7daf96de..9b0bc9de 100644 --- a/test_conformance/vulkan/test_vulkan_interop_buffer.cpp +++ b/test_conformance/vulkan/test_vulkan_interop_buffer.cpp @@ -39,35 +39,6 @@ struct Params }; } -static const char *vkBufferShader = - "#version 450\n" - "#extension GL_ARB_separate_shader_objects : enable\n" - "#extension GL_NV_gpu_shader5 : enable\n" - "layout(binding = 0) buffer Params\n" - "{\n" - " uint32_t numBuffers;\n" - " uint32_t bufferSize;\n" - " uint32_t interBufferOffset;\n" - "};\n" - "layout(binding = 1) buffer Buffer\n" - "{\n" - " uint8_t ptr[];\n" - "} bufferPtrList[" STRING( - MAX_BUFFERS) "];\n" - "layout(local_size_x = 512) in;\n" - "void main() {\n" - " for (uint32_t bufIdx = 0; bufIdx < numBuffers;" - " bufIdx++) {\n" - " uint32_t ptrIdx = gl_GlobalInvocationID.x;\n" - " uint32_t limit = bufferSize;\n" - " while (ptrIdx < limit) {\n" - " bufferPtrList[bufIdx].ptr[ptrIdx]++;\n" - " ptrIdx += (gl_NumWorkGroups.x * " - "gl_WorkGroupSize.x);\n" - " }\n" - " }\n" - "}\n"; - const char *kernel_text_numbuffer_1 = " \ __kernel void clUpdateBuffer(int bufferSize, __global unsigned char *a) { \n\ int gid = get_global_id(0); \n\ @@ -149,6 +120,8 @@ int run_test_with_two_queue(cl_context &context, cl_command_queue &cmd_queue1, VulkanQueue &vkQueue = vkDevice.getQueue(); + std::vector vkBufferShader = readFile("buffer.spv"); + VulkanShaderModule vkBufferShaderModule(vkDevice, vkBufferShader); VulkanDescriptorSetLayoutBindingList vkDescriptorSetLayoutBindingList( MAX_BUFFERS + 1, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER); @@ -446,6 +419,7 @@ int run_test_with_one_queue(cl_context &context, cl_command_queue &cmd_queue1, VulkanQueue &vkQueue = vkDevice.getQueue(); + std::vector vkBufferShader = readFile("buffer.spv"); VulkanShaderModule vkBufferShaderModule(vkDevice, vkBufferShader); VulkanDescriptorSetLayoutBindingList vkDescriptorSetLayoutBindingList( MAX_BUFFERS + 1, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER); @@ -716,6 +690,8 @@ int run_test_with_multi_import_same_ctx( VulkanQueue &vkQueue = vkDevice.getQueue(); + std::vector vkBufferShader = readFile("buffer.spv"); + VulkanShaderModule vkBufferShaderModule(vkDevice, vkBufferShader); VulkanDescriptorSetLayoutBindingList vkDescriptorSetLayoutBindingList( MAX_BUFFERS + 1, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER); @@ -1050,6 +1026,8 @@ int run_test_with_multi_import_diff_ctx( VulkanQueue &vkQueue = vkDevice.getQueue(); + std::vector vkBufferShader = readFile("buffer.spv"); + VulkanShaderModule vkBufferShaderModule(vkDevice, vkBufferShader); VulkanDescriptorSetLayoutBindingList vkDescriptorSetLayoutBindingList( MAX_BUFFERS + 1, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER); diff --git a/test_conformance/vulkan/test_vulkan_interop_image.cpp b/test_conformance/vulkan/test_vulkan_interop_image.cpp index f1d0af1f..7577de09 100644 --- a/test_conformance/vulkan/test_vulkan_interop_image.cpp +++ b/test_conformance/vulkan/test_vulkan_interop_image.cpp @@ -25,8 +25,6 @@ #define MAX_2D_IMAGE_ELEMENT_SIZE 16 #define MAX_2D_IMAGE_MIP_LEVELS 11 #define MAX_2D_IMAGE_DESCRIPTORS MAX_2D_IMAGES *MAX_2D_IMAGE_MIP_LEVELS -#define GLSL_FORMAT_STRING "" -#define GLSL_TYPE_PREFIX_STRING "" #define NUM_THREADS_PER_GROUP_X 32 #define NUM_THREADS_PER_GROUP_Y 32 #define NUM_BLOCKS(size, blockSize) \ @@ -54,61 +52,8 @@ struct Params } static cl_uchar uuid[CL_UUID_SIZE_KHR]; static cl_device_id deviceId = NULL; - -static const char *vkImage2DShader = - "#version 450\n" - "#extension GL_ARB_separate_shader_objects : enable\n" - "#extension GL_NV_gpu_shader5 : enable\n" - "layout(binding = 0) buffer Params\n" - "{\n" - " uint32_t numImage2DDescriptors;\n" - "};\n" - "layout(binding = 1, " GLSL_FORMAT_STRING - ") uniform " GLSL_TYPE_PREFIX_STRING "image2D image2DList[" STRING( - MAX_2D_IMAGE_DESCRIPTORS) "];\n" - "layout(local_size_x = 32, local_size_y = " - "32) in;\n" - "void main() {\n" - " uvec3 numThreads = gl_NumWorkGroups * " - "gl_WorkGroupSize;\n" - " for (uint32_t image2DIdx = 0; " - "image2DIdx < numImage2DDescriptors; " - "image2DIdx++)" - " {\n" - " ivec2 imageDim = " - "imageSize(image2DList[image2DIdx]);\n" - " uint32_t heightBy2 = imageDim.y / " - "2;\n" - " for (uint32_t row = " - "gl_GlobalInvocationID.y; row < heightBy2; " - "row += numThreads.y)" - " {\n" - " for (uint32_t col = " - "gl_GlobalInvocationID.x; col < imageDim.x; " - "col += numThreads.x)" - " {\n" - " ivec2 coordsA = ivec2(col, " - "row);\n" - " ivec2 coordsB = ivec2(col, " - "imageDim.y - row - 1);\n" - " " GLSL_TYPE_PREFIX_STRING - "vec4 dataA = " - "imageLoad(image2DList[image2DIdx], " - "coordsA);\n" - " " GLSL_TYPE_PREFIX_STRING - "vec4 dataB = " - "imageLoad(image2DList[image2DIdx], " - "coordsB);\n" - " " - "imageStore(image2DList[image2DIdx], " - "coordsA, dataB);\n" - " " - "imageStore(image2DList[image2DIdx], " - "coordsB, dataA);\n" - " }\n" - " }\n" - " }\n" - "}\n"; +size_t max_width = MAX_2D_IMAGE_WIDTH; +size_t max_height = MAX_2D_IMAGE_HEIGHT; const char *kernel_text_numImage_1 = " \ __constant sampler_t smpImg = CLK_NORMALIZED_COORDS_FALSE|CLK_ADDRESS_NONE|CLK_FILTER_NEAREST;\n\ @@ -268,8 +213,8 @@ int run_test_with_two_queue(cl_context &context, cl_command_queue &cmd_queue1, VULKAN_MEMORY_TYPE_PROPERTY_HOST_VISIBLE_COHERENT)); vkParamsDeviceMemory.bindBuffer(vkParamsBuffer); - uint64_t maxImage2DSize = MAX_2D_IMAGE_WIDTH * MAX_2D_IMAGE_HEIGHT - * MAX_2D_IMAGE_ELEMENT_SIZE * 2; + uint64_t maxImage2DSize = + max_width * max_height * MAX_2D_IMAGE_ELEMENT_SIZE * 2; VulkanBuffer vkSrcBuffer(vkDevice, maxImage2DSize); VulkanDeviceMemory vkSrcBufferDeviceMemory( vkDevice, vkSrcBuffer.getSize(), @@ -310,6 +255,12 @@ int run_test_with_two_queue(cl_context &context, cl_command_queue &cmd_queue1, clCl2VkExternalSemaphore = new clExternalSemaphore( vkCl2VkSemaphore, context, vkExternalSemaphoreHandleType, deviceId); + std::vector vkNonDedicatedImage2DListDeviceMemory1; + std::vector vkNonDedicatedImage2DListDeviceMemory2; + std::vector nonDedicatedExternalMemory1; + std::vector nonDedicatedExternalMemory2; + std::vector vkImage2DShader; + for (size_t fIdx = 0; fIdx < vkFormatList.size(); fIdx++) { VulkanFormat vkFormat = vkFormatList[fIdx]; @@ -317,15 +268,13 @@ int run_test_with_two_queue(cl_context &context, cl_command_queue &cmd_queue1, uint32_t elementSize = getVulkanFormatElementSize(vkFormat); ASSERT_LEQ(elementSize, (uint32_t)MAX_2D_IMAGE_ELEMENT_SIZE); log_info("elementSize= %d\n", elementSize); - std::map patternToSubstituteMap; - patternToSubstituteMap[GLSL_FORMAT_STRING] = - getVulkanFormatGLSLFormat(vkFormat); - patternToSubstituteMap[GLSL_TYPE_PREFIX_STRING] = - getVulkanFormatGLSLTypePrefix(vkFormat); - - VulkanShaderModule vkImage2DShaderModule( - vkDevice, - prepareVulkanShader(vkImage2DShader, patternToSubstituteMap)); + + std::string fileName = "image2D_" + + std::string(getVulkanFormatGLSLFormat(vkFormat)) + ".spv"; + log_info("Load %s file", fileName.c_str()); + vkImage2DShader = readFile(fileName); + VulkanShaderModule vkImage2DShaderModule(vkDevice, vkImage2DShader); + VulkanComputePipeline vkComputePipeline(vkDevice, vkPipelineLayout, vkImage2DShaderModule); @@ -333,13 +282,13 @@ int run_test_with_two_queue(cl_context &context, cl_command_queue &cmd_queue1, { uint32_t width = widthList[wIdx]; log_info("Width: %d\n", width); - ASSERT_LEQ(width, (uint32_t)MAX_2D_IMAGE_WIDTH); + if (width > max_width) continue; region[0] = width; for (size_t hIdx = 0; hIdx < ARRAY_SIZE(heightList); hIdx++) { uint32_t height = heightList[hIdx]; log_info("Height: %d", height); - ASSERT_LEQ(height, (uint32_t)MAX_2D_IMAGE_HEIGHT); + if (height > max_height) continue; region[1] = height; uint32_t numMipLevels = 1; @@ -418,14 +367,6 @@ int run_test_with_two_queue(cl_context &context, cl_command_queue &cmd_queue1, const VulkanMemoryTypeList &memoryTypeList = vkDummyImage2D.getMemoryTypeList(); - std::vector - vkNonDedicatedImage2DListDeviceMemory1; - std::vector - vkNonDedicatedImage2DListDeviceMemory2; - std::vector - nonDedicatedExternalMemory1; - std::vector - nonDedicatedExternalMemory2; for (size_t mtIdx = 0; mtIdx < memoryTypeList.size(); mtIdx++) { @@ -834,6 +775,8 @@ int run_test_with_two_queue(cl_context &context, cl_command_queue &cmd_queue1, } } } + + vkImage2DShader.clear(); } CLEANUP: if (clVk2CLExternalSemaphore) delete clVk2CLExternalSemaphore; @@ -866,8 +809,8 @@ int run_test_with_one_queue(cl_context &context, cl_command_queue &cmd_queue1, VULKAN_MEMORY_TYPE_PROPERTY_HOST_VISIBLE_COHERENT)); vkParamsDeviceMemory.bindBuffer(vkParamsBuffer); - uint64_t maxImage2DSize = MAX_2D_IMAGE_WIDTH * MAX_2D_IMAGE_HEIGHT - * MAX_2D_IMAGE_ELEMENT_SIZE * 2; + uint64_t maxImage2DSize = + max_width * max_height * MAX_2D_IMAGE_ELEMENT_SIZE * 2; VulkanBuffer vkSrcBuffer(vkDevice, maxImage2DSize); VulkanDeviceMemory vkSrcBufferDeviceMemory( vkDevice, vkSrcBuffer.getSize(), @@ -908,6 +851,12 @@ int run_test_with_one_queue(cl_context &context, cl_command_queue &cmd_queue1, clCl2VkExternalSemaphore = new clExternalSemaphore( vkCl2VkSemaphore, context, vkExternalSemaphoreHandleType, deviceId); + std::vector vkNonDedicatedImage2DListDeviceMemory1; + std::vector vkNonDedicatedImage2DListDeviceMemory2; + std::vector nonDedicatedExternalMemory1; + std::vector nonDedicatedExternalMemory2; + std::vector vkImage2DShader; + for (size_t fIdx = 0; fIdx < vkFormatList.size(); fIdx++) { VulkanFormat vkFormat = vkFormatList[fIdx]; @@ -915,15 +864,13 @@ int run_test_with_one_queue(cl_context &context, cl_command_queue &cmd_queue1, uint32_t elementSize = getVulkanFormatElementSize(vkFormat); ASSERT_LEQ(elementSize, (uint32_t)MAX_2D_IMAGE_ELEMENT_SIZE); log_info("elementSize= %d\n", elementSize); - std::map patternToSubstituteMap; - patternToSubstituteMap[GLSL_FORMAT_STRING] = - getVulkanFormatGLSLFormat(vkFormat); - patternToSubstituteMap[GLSL_TYPE_PREFIX_STRING] = - getVulkanFormatGLSLTypePrefix(vkFormat); - - VulkanShaderModule vkImage2DShaderModule( - vkDevice, - prepareVulkanShader(vkImage2DShader, patternToSubstituteMap)); + + std::string fileName = "image2D_" + + std::string(getVulkanFormatGLSLFormat(vkFormat)) + ".spv"; + log_info("Load %s file", fileName.c_str()); + vkImage2DShader = readFile(fileName); + VulkanShaderModule vkImage2DShaderModule(vkDevice, vkImage2DShader); + VulkanComputePipeline vkComputePipeline(vkDevice, vkPipelineLayout, vkImage2DShaderModule); @@ -931,13 +878,13 @@ int run_test_with_one_queue(cl_context &context, cl_command_queue &cmd_queue1, { uint32_t width = widthList[wIdx]; log_info("Width: %d\n", width); - ASSERT_LEQ(width, (uint32_t)MAX_2D_IMAGE_WIDTH); + if (width > max_width) continue; region[0] = width; for (size_t hIdx = 0; hIdx < ARRAY_SIZE(heightList); hIdx++) { uint32_t height = heightList[hIdx]; log_info("Height: %d\n", height); - ASSERT_LEQ(height, (uint32_t)MAX_2D_IMAGE_HEIGHT); + if (height > max_height) continue; region[1] = height; uint32_t numMipLevels = 1; @@ -1016,14 +963,6 @@ int run_test_with_one_queue(cl_context &context, cl_command_queue &cmd_queue1, const VulkanMemoryTypeList &memoryTypeList = vkDummyImage2D.getMemoryTypeList(); - std::vector - vkNonDedicatedImage2DListDeviceMemory1; - std::vector - vkNonDedicatedImage2DListDeviceMemory2; - std::vector - nonDedicatedExternalMemory1; - std::vector - nonDedicatedExternalMemory2; for (size_t mtIdx = 0; mtIdx < memoryTypeList.size(); mtIdx++) { @@ -1368,6 +1307,7 @@ int run_test_with_one_queue(cl_context &context, cl_command_queue &cmd_queue1, } } } + vkImage2DShader.clear(); } CLEANUP: if (clVk2CLExternalSemaphore) delete clVk2CLExternalSemaphore; @@ -1494,6 +1434,14 @@ int test_image_common(cl_device_id device_, cl_context context_, goto CLEANUP; } deviceId = devices[device_no]; + err = setMaxImageDimensions(deviceId, max_width, max_height); + if (CL_SUCCESS != err) + { + print_error(err, "error setting max image dimensions"); + goto CLEANUP; + } + log_info("Set max_width to %lu and max_height to %lu\n", max_width, + max_height); context = clCreateContextFromType(contextProperties, CL_DEVICE_TYPE_GPU, NULL, NULL, &err); if (CL_SUCCESS != err) diff --git a/test_conformance/vulkan/vulkan_interop_common/opencl_vulkan_wrapper.cpp b/test_conformance/vulkan/vulkan_interop_common/opencl_vulkan_wrapper.cpp index 136818f6..9d9a6601 100644 --- a/test_conformance/vulkan/vulkan_interop_common/opencl_vulkan_wrapper.cpp +++ b/test_conformance/vulkan/vulkan_interop_common/opencl_vulkan_wrapper.cpp @@ -23,6 +23,7 @@ #include #define ASSERT(x) assert((x)) +#define GB(x) ((unsigned long long)(x) << 30) pfnclCreateSemaphoreWithPropertiesKHR clCreateSemaphoreWithPropertiesKHRptr; pfnclEnqueueWaitSemaphoresKHR clEnqueueWaitSemaphoresKHRptr; @@ -31,7 +32,7 @@ pfnclEnqueueAcquireExternalMemObjectsKHR clEnqueueAcquireExternalMemObjectsKHRptr; pfnclEnqueueReleaseExternalMemObjectsKHR clEnqueueReleaseExternalMemObjectsKHRptr; -pfnclReleaseSemaphoreObjectKHR clReleaseSemaphoreObjectKHRptr; +pfnclReleaseSemaphoreKHR clReleaseSemaphoreKHRptr; void init_cl_vk_ext(cl_platform_id opencl_platform) { @@ -51,13 +52,13 @@ void init_cl_vk_ext(cl_platform_id opencl_platform) throw std::runtime_error("Failed to get the function pointer of " "clEnqueueSignalSemaphoresKHRptr!"); } - clReleaseSemaphoreObjectKHRptr = (pfnclReleaseSemaphoreObjectKHR) - clGetExtensionFunctionAddressForPlatform(opencl_platform, - "clReleaseSemaphoreObjectKHR"); - if (NULL == clReleaseSemaphoreObjectKHRptr) + clReleaseSemaphoreKHRptr = + (pfnclReleaseSemaphoreKHR)clGetExtensionFunctionAddressForPlatform( + opencl_platform, "clReleaseSemaphoreKHR"); + if (NULL == clReleaseSemaphoreKHRptr) { throw std::runtime_error("Failed to get the function pointer of " - "clReleaseSemaphoreObjectKHRptr!"); + "clReleaseSemaphoreKHRptr!"); } clCreateSemaphoreWithPropertiesKHRptr = (pfnclCreateSemaphoreWithPropertiesKHR) @@ -70,6 +71,40 @@ void init_cl_vk_ext(cl_platform_id opencl_platform) } } +cl_int setMaxImageDimensions(cl_device_id deviceID, size_t &max_width, + size_t &max_height) +{ + cl_int result = CL_SUCCESS; + cl_ulong val; + size_t paramSize; + + result = clGetDeviceInfo(deviceID, CL_DEVICE_GLOBAL_MEM_SIZE, + sizeof(cl_ulong), &val, ¶mSize); + + if (result != CL_SUCCESS) + { + return result; + } + + if (val < GB(4)) + { + max_width = 256; + max_height = 256; + } + else if (val < GB(8)) + { + max_width = 512; + max_height = 256; + } + else + { + max_width = 1024; + max_height = 512; + } + + return result; +} + cl_int getCLFormatFromVkFormat(VkFormat vkFormat, cl_image_format *clImageFormat) { @@ -798,10 +833,10 @@ clExternalSemaphore::clExternalSemaphore( clExternalSemaphore::~clExternalSemaphore() { - cl_int err = clReleaseSemaphoreObjectKHRptr(m_externalSemaphore); + cl_int err = clReleaseSemaphoreKHRptr(m_externalSemaphore); if (err != CL_SUCCESS) { - throw std::runtime_error("clReleaseSemaphoreObjectKHR failed!"); + throw std::runtime_error("clReleaseSemaphoreKHR failed!"); } } diff --git a/test_conformance/vulkan/vulkan_interop_common/opencl_vulkan_wrapper.hpp b/test_conformance/vulkan/vulkan_interop_common/opencl_vulkan_wrapper.hpp index c1d2a766..d9f8dccb 100644 --- a/test_conformance/vulkan/vulkan_interop_common/opencl_vulkan_wrapper.hpp +++ b/test_conformance/vulkan/vulkan_interop_common/opencl_vulkan_wrapper.hpp @@ -49,7 +49,7 @@ typedef cl_int (*pfnclEnqueueReleaseExternalMemObjectsKHR)( cl_command_queue command_queue, cl_uint num_mem_objects, const cl_mem *mem_objects, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event); -typedef cl_int (*pfnclReleaseSemaphoreObjectKHR)(cl_semaphore_khr sema_object); +typedef cl_int (*pfnclReleaseSemaphoreKHR)(cl_semaphore_khr sema_object); extern pfnclCreateSemaphoreWithPropertiesKHR clCreateSemaphoreWithPropertiesKHRptr; @@ -59,7 +59,7 @@ extern pfnclEnqueueAcquireExternalMemObjectsKHR clEnqueueAcquireExternalMemObjectsKHRptr; extern pfnclEnqueueReleaseExternalMemObjectsKHR clEnqueueReleaseExternalMemObjectsKHRptr; -extern pfnclReleaseSemaphoreObjectKHR clReleaseSemaphoreObjectKHRptr; +extern pfnclReleaseSemaphoreKHR clReleaseSemaphoreKHRptr; cl_int getCLImageInfoFromVkImageInfo(const VkImageCreateInfo *, size_t, cl_image_format *, cl_image_desc *); @@ -69,6 +69,8 @@ cl_int check_external_memory_handle_type( cl_int check_external_semaphore_handle_type( cl_device_id deviceID, cl_external_semaphore_handle_type_khr requiredHandleType); +cl_int setMaxImageDimensions(cl_device_id deviceID, size_t &width, + size_t &height); class clExternalMemory { protected: diff --git a/test_conformance/vulkan/vulkan_interop_common/vulkan_list_map.hpp b/test_conformance/vulkan/vulkan_interop_common/vulkan_list_map.hpp index 831403e1..10a7b221 100644 --- a/test_conformance/vulkan/vulkan_interop_common/vulkan_list_map.hpp +++ b/test_conformance/vulkan/vulkan_interop_common/vulkan_list_map.hpp @@ -335,11 +335,8 @@ const VulkanWrapper & template VulkanWrapper &VulkanList::operator[](size_t idx) { - if (idx < m_wrapperList.size()) - { - // CHECK_LT(idx, m_wrapperList.size()); - return m_wrapperList[idx].get(); - } + // CHECK_LT(idx, m_wrapperList.size()); + return m_wrapperList[idx].get(); } template diff --git a/test_conformance/vulkan/vulkan_interop_common/vulkan_utility.cpp b/test_conformance/vulkan/vulkan_interop_common/vulkan_utility.cpp index 81e12621..4e6118b1 100644 --- a/test_conformance/vulkan/vulkan_interop_common/vulkan_utility.cpp +++ b/test_conformance/vulkan/vulkan_interop_common/vulkan_utility.cpp @@ -18,6 +18,7 @@ #include "vulkan_wrapper.hpp" #include #include +#include #include #include #include @@ -541,59 +542,6 @@ const char *getVulkanFormatGLSLFormat(VulkanFormat format) return (const char *)size_t(0); } -const char *getVulkanFormatGLSLTypePrefix(VulkanFormat format) -{ - switch (format) - { - case VULKAN_FORMAT_R8_UINT: - case VULKAN_FORMAT_R8G8_UINT: - case VULKAN_FORMAT_R8G8B8A8_UINT: - case VULKAN_FORMAT_R16_UINT: - case VULKAN_FORMAT_R16G16_UINT: - case VULKAN_FORMAT_R16G16B16A16_UINT: - case VULKAN_FORMAT_R32_UINT: - case VULKAN_FORMAT_R32G32_UINT: - case VULKAN_FORMAT_R32G32B32A32_UINT: return "u"; - - case VULKAN_FORMAT_R8_SINT: - case VULKAN_FORMAT_R8G8_SINT: - case VULKAN_FORMAT_R8G8B8A8_SINT: - case VULKAN_FORMAT_R16_SINT: - case VULKAN_FORMAT_R16G16_SINT: - case VULKAN_FORMAT_R16G16B16A16_SINT: - case VULKAN_FORMAT_R32_SINT: - case VULKAN_FORMAT_R32G32_SINT: - case VULKAN_FORMAT_R32G32B32A32_SINT: return "i"; - - case VULKAN_FORMAT_R32_SFLOAT: - case VULKAN_FORMAT_R32G32_SFLOAT: - case VULKAN_FORMAT_R32G32B32A32_SFLOAT: return ""; - - default: ASSERT(0); std::cout << "Unknown format"; - } - - return ""; -} - -std::string prepareVulkanShader( - std::string shaderCode, - const std::map &patternToSubstituteMap) -{ - for (std::map::const_iterator psIt = - patternToSubstituteMap.begin(); - psIt != patternToSubstituteMap.end(); ++psIt) - { - std::string::size_type pos = 0u; - while ((pos = shaderCode.find(psIt->first, pos)) != std::string::npos) - { - shaderCode.replace(pos, psIt->first.length(), psIt->second); - pos += psIt->second.length(); - } - } - - return shaderCode; -} - std::ostream &operator<<(std::ostream &os, VulkanMemoryTypeProperty memoryTypeProperty) { @@ -691,3 +639,54 @@ std::ostream &operator<<(std::ostream &os, VulkanFormat format) return os; } + +static char *findFilePath(const std::string filename) +{ + const char *searchPath[] = { + "./", // Same dir + "./shaders/", // In shaders folder in same dir + "../test_conformance/vulkan/shaders/" // In src folder + }; + for (unsigned int i = 0; i < sizeof(searchPath) / sizeof(char *); ++i) + { + std::string path(searchPath[i]); + + path.append(filename); + FILE *fp; + fp = fopen(path.c_str(), "rb"); + + if (fp != NULL) + { + fclose(fp); + // File found + char *file_path = (char *)(malloc(path.length() + 1)); + strncpy(file_path, path.c_str(), path.length() + 1); + return file_path; + } + if (fp) + { + fclose(fp); + } + } + // File not found + return 0; +} + +std::vector readFile(const std::string &filename) +{ + char *file_path = findFilePath(filename); + + std::ifstream file(file_path, std::ios::ate | std::ios::binary); + + if (!file.is_open()) + { + throw std::runtime_error("failed to open shader spv file!\n"); + } + size_t fileSize = (size_t)file.tellg(); + std::vector buffer(fileSize); + file.seekg(0); + file.read(buffer.data(), fileSize); + file.close(); + printf("filesize is %d", fileSize); + return buffer; +} diff --git a/test_conformance/vulkan/vulkan_interop_common/vulkan_utility.hpp b/test_conformance/vulkan/vulkan_interop_common/vulkan_utility.hpp index 7022fd5a..04f5a594 100644 --- a/test_conformance/vulkan/vulkan_interop_common/vulkan_utility.hpp +++ b/test_conformance/vulkan/vulkan_interop_common/vulkan_utility.hpp @@ -66,4 +66,5 @@ operator<<(std::ostream& os, VulkanExternalSemaphoreHandleType externalSemaphoreHandleType); std::ostream& operator<<(std::ostream& os, VulkanFormat format); +std::vector readFile(const std::string& filename); #endif // _vulkan_utility_hpp_ diff --git a/test_conformance/vulkan/vulkan_interop_common/vulkan_wrapper.cpp b/test_conformance/vulkan/vulkan_interop_common/vulkan_wrapper.cpp index c044e009..e5d3a271 100644 --- a/test_conformance/vulkan/vulkan_interop_common/vulkan_wrapper.cpp +++ b/test_conformance/vulkan/vulkan_interop_common/vulkan_wrapper.cpp @@ -201,7 +201,8 @@ VulkanInstance::VulkanInstance(): m_vkInstance(VK_NULL_HANDLE) if (physicalDeviceCount == uint32_t(0)) { - throw std::runtime_error("failed to find GPUs with Vulkan support!"); + std::cout << "failed to find GPUs with Vulkan support!\n"; + return; } std::vector vkPhysicalDeviceList(physicalDeviceCount, @@ -846,23 +847,18 @@ VulkanShaderModule::VulkanShaderModule(const VulkanShaderModule &shaderModule) {} VulkanShaderModule::VulkanShaderModule(const VulkanDevice &device, - const std::string &code) + const std::vector &code) : m_device(device) { - std::string paddedCode = code; - while (paddedCode.size() % 4) - { - paddedCode += " "; - } VkShaderModuleCreateInfo vkShaderModuleCreateInfo = {}; vkShaderModuleCreateInfo.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; vkShaderModuleCreateInfo.pNext = NULL; vkShaderModuleCreateInfo.flags = 0; - vkShaderModuleCreateInfo.codeSize = paddedCode.size(); + vkShaderModuleCreateInfo.codeSize = code.size(); vkShaderModuleCreateInfo.pCode = - (const uint32_t *)(void *)paddedCode.c_str(); + reinterpret_cast(code.data()); vkCreateShaderModule(m_device, &vkShaderModuleCreateInfo, NULL, &m_vkShaderModule); diff --git a/test_conformance/vulkan/vulkan_interop_common/vulkan_wrapper.hpp b/test_conformance/vulkan/vulkan_interop_common/vulkan_wrapper.hpp index 1f68a92b..37925ee4 100644 --- a/test_conformance/vulkan/vulkan_interop_common/vulkan_wrapper.hpp +++ b/test_conformance/vulkan/vulkan_interop_common/vulkan_wrapper.hpp @@ -240,7 +240,8 @@ protected: VulkanShaderModule(const VulkanShaderModule &shaderModule); public: - VulkanShaderModule(const VulkanDevice &device, const std::string &code); + VulkanShaderModule(const VulkanDevice &device, + const std::vector &code); virtual ~VulkanShaderModule(); operator VkShaderModule() const; }; -- cgit v1.2.3 From 6659a1b6b8a4a989fe5d28ebd012c15f4e6872cf Mon Sep 17 00:00:00 2001 From: Ben Ashbaugh Date: Tue, 4 Oct 2022 09:02:25 -0700 Subject: remove implicit conversion to pointer to fix 32-bit compile (#1488) * remove implicit conversion to pointer to fix 32-bit compile * fix formatting --- .../vulkan/vulkan_interop_common/vulkan_list_map.hpp | 4 ++-- .../vulkan/vulkan_interop_common/vulkan_utility.cpp | 2 +- .../vulkan/vulkan_interop_common/vulkan_wrapper.cpp | 13 +++++++------ 3 files changed, 10 insertions(+), 9 deletions(-) diff --git a/test_conformance/vulkan/vulkan_interop_common/vulkan_list_map.hpp b/test_conformance/vulkan/vulkan_interop_common/vulkan_list_map.hpp index 10a7b221..52206779 100644 --- a/test_conformance/vulkan/vulkan_interop_common/vulkan_list_map.hpp +++ b/test_conformance/vulkan/vulkan_interop_common/vulkan_list_map.hpp @@ -37,7 +37,7 @@ public: virtual size_t size() const; virtual const VulkanWrapper &operator[](size_t idx) const; virtual VulkanWrapper &operator[](size_t idx); - virtual operator const VulkanNative *() const; + virtual const VulkanNative *operator()() const; }; template class VulkanMap { @@ -340,7 +340,7 @@ VulkanWrapper &VulkanList::operator[](size_t idx) } template -VulkanList::operator const VulkanNative *() const +const VulkanNative *VulkanList::operator()() const { return m_nativeList.data(); } diff --git a/test_conformance/vulkan/vulkan_interop_common/vulkan_utility.cpp b/test_conformance/vulkan/vulkan_interop_common/vulkan_utility.cpp index 4e6118b1..1a313cce 100644 --- a/test_conformance/vulkan/vulkan_interop_common/vulkan_utility.cpp +++ b/test_conformance/vulkan/vulkan_interop_common/vulkan_utility.cpp @@ -183,7 +183,7 @@ bool checkVkSupport() const VulkanInstance &instance = getVulkanInstance(); const VulkanPhysicalDeviceList &physicalDeviceList = instance.getPhysicalDeviceList(); - if (physicalDeviceList == NULL) + if (physicalDeviceList() == NULL) { std::cout << "physicalDeviceList is null, No GPUs found with " "Vulkan support !!!\n"; diff --git a/test_conformance/vulkan/vulkan_interop_common/vulkan_wrapper.cpp b/test_conformance/vulkan/vulkan_interop_common/vulkan_wrapper.cpp index e5d3a271..6209a747 100644 --- a/test_conformance/vulkan/vulkan_interop_common/vulkan_wrapper.cpp +++ b/test_conformance/vulkan/vulkan_interop_common/vulkan_wrapper.cpp @@ -626,12 +626,12 @@ void VulkanQueue::submit(const VulkanSemaphoreList &waitSemaphoreList, vkSubmitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; vkSubmitInfo.pNext = NULL; vkSubmitInfo.waitSemaphoreCount = (uint32_t)waitSemaphoreList.size(); - vkSubmitInfo.pWaitSemaphores = waitSemaphoreList; + vkSubmitInfo.pWaitSemaphores = waitSemaphoreList(); vkSubmitInfo.pWaitDstStageMask = vkPipelineStageFlagsList.data(); vkSubmitInfo.commandBufferCount = (uint32_t)commandBufferList.size(); - vkSubmitInfo.pCommandBuffers = commandBufferList; + vkSubmitInfo.pCommandBuffers = commandBufferList(); vkSubmitInfo.signalSemaphoreCount = (uint32_t)signalSemaphoreList.size(); - vkSubmitInfo.pSignalSemaphores = signalSemaphoreList; + vkSubmitInfo.pSignalSemaphores = signalSemaphoreList(); vkQueueSubmit(m_vkQueue, 1, &vkSubmitInfo, NULL); } @@ -729,7 +729,8 @@ void VulkanDescriptorSetLayout::VulkanDescriptorSetLayoutCommon( vkDescriptorSetLayoutCreateInfo.flags = 0; vkDescriptorSetLayoutCreateInfo.bindingCount = (uint32_t)descriptorSetLayoutBindingList.size(); - vkDescriptorSetLayoutCreateInfo.pBindings = descriptorSetLayoutBindingList; + vkDescriptorSetLayoutCreateInfo.pBindings = + descriptorSetLayoutBindingList(); vkCreateDescriptorSetLayout(m_device, &vkDescriptorSetLayoutCreateInfo, NULL, &m_vkDescriptorSetLayout); @@ -800,7 +801,7 @@ void VulkanPipelineLayout::VulkanPipelineLayoutCommon( vkPipelineLayoutCreateInfo.flags = 0; vkPipelineLayoutCreateInfo.setLayoutCount = (uint32_t)descriptorSetLayoutList.size(); - vkPipelineLayoutCreateInfo.pSetLayouts = descriptorSetLayoutList; + vkPipelineLayoutCreateInfo.pSetLayouts = descriptorSetLayoutList(); vkPipelineLayoutCreateInfo.pushConstantRangeCount = 0; vkPipelineLayoutCreateInfo.pPushConstantRanges = NULL; @@ -1573,7 +1574,7 @@ VulkanImage::VulkanImage( vkImageCreateInfo.queueFamilyIndexCount = (uint32_t)m_device.getPhysicalDevice().getQueueFamilyList().size(); vkImageCreateInfo.pQueueFamilyIndices = - m_device.getPhysicalDevice().getQueueFamilyList(); + m_device.getPhysicalDevice().getQueueFamilyList()(); vkImageCreateInfo.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; VkExternalMemoryImageCreateInfo vkExternalMemoryImageCreateInfo = {}; -- cgit v1.2.3 From 07b055cd68072bf53151f2f059ba89c8e876c0d7 Mon Sep 17 00:00:00 2001 From: Nikhil Joshi Date: Tue, 4 Oct 2022 21:43:18 +0530 Subject: Cap CL_DEVICE_MAX_MEM_ALLOC_SIZE to SIZE_MAX (#1501) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Fix enqueue_flags test to use correct barrier type. Currently, enqueue_flags test uses CLK_LOCAL_MEM_FENCE. Use CLK_GLOBAL_MEM_FENCE instead as all threads across work-groups need to wait here. * Add check for support for Read-Wrie images Read-Write images have required OpenCL 2.x. Read-Write image tests are already being skipped for 1.x devices. With OpenCL 3.0, read-write images being optional, the tests should be run or skipped depending on the implementation support. Add a check to decide if Read-Write images are supported or required to be supported depending on OpenCL version and decide if the tests should be run on skipped. Fixes issue #894 * Fix formatting in case of Read-Write image checks. Fix formatting in case of Read-write image checks. Also, combine two ifs into one in case of kerne_read_write tests * Fix some more formatting for RW-image checks Remove unnecessary spaces at various places. Also, fix lengthy lines. * Fix malloc-size calculation in test imagedim unsigned char size is silently assumed to be 1 in imagedim test of test_basic. Pass sizeof(type) in malloc size calculation. Also, change loop variable from signed to unsigned. Add checks for null pointer for malloced memory. * Cap CL_DEVICE_MAX_MEM_ALLOC_SIZE to SIZE_MAX Cap CL_DEVICE_MAX_MEM_ALLOC_SIZE to SIZE_MAX when CL_DEVICE_GLOBAL_MEM_SIZE is capped with SIZE_MAX. test_allocation caps the value of GLOBAL_MEM_SIZE to SIZE_MAX if it exceeds the value of SIZE_MAX(value depends on platform bitness), but doesn’t modify MAX_ALLOC_SIZE the same way. Due to this MAX_ALLOC_SIZE becomes greater than GLOBAL_MEM_SIZE and the test fails. Modify MAX_MEM_ALLOC_SIZE as GLOBAL_MEM_SIZE when it exceeds SIZE_MAX OpenCL-CTS #1022 --- test_conformance/images/clCopyImage/test_copy_1D.cpp | 1 + test_conformance/images/clCopyImage/test_copy_1D_array.cpp | 1 + test_conformance/images/clCopyImage/test_copy_2D.cpp | 1 + test_conformance/images/clCopyImage/test_copy_2D_2D_array.cpp | 1 + test_conformance/images/clCopyImage/test_copy_2D_3D.cpp | 1 + test_conformance/images/clCopyImage/test_copy_2D_array.cpp | 1 + test_conformance/images/clCopyImage/test_copy_3D.cpp | 1 + test_conformance/images/clCopyImage/test_copy_3D_2D_array.cpp | 1 + test_conformance/images/clFillImage/test_fill_1D.cpp | 1 + test_conformance/images/clFillImage/test_fill_1D_array.cpp | 1 + test_conformance/images/clFillImage/test_fill_2D.cpp | 1 + test_conformance/images/clFillImage/test_fill_2D_array.cpp | 1 + test_conformance/images/clFillImage/test_fill_3D.cpp | 1 + test_conformance/images/clGetInfo/test_1D.cpp | 1 + test_conformance/images/clGetInfo/test_1D_2D_array.cpp | 2 ++ test_conformance/images/clGetInfo/test_2D.cpp | 1 + test_conformance/images/clGetInfo/test_3D.cpp | 1 + test_conformance/images/clReadWriteImage/test_read_1D.cpp | 1 + test_conformance/images/clReadWriteImage/test_read_1D_array.cpp | 1 + test_conformance/images/clReadWriteImage/test_read_2D.cpp | 1 + test_conformance/images/clReadWriteImage/test_read_2D_array.cpp | 1 + test_conformance/images/clReadWriteImage/test_read_3D.cpp | 1 + test_conformance/images/kernel_image_methods/test_1D.cpp | 1 + test_conformance/images/kernel_image_methods/test_1D_array.cpp | 1 + test_conformance/images/kernel_image_methods/test_2D.cpp | 1 + test_conformance/images/samplerlessReads/test_iterations.cpp | 1 + test_conformance/images/samplerlessReads/test_read_1D.cpp | 1 + test_conformance/images/samplerlessReads/test_read_1D_array.cpp | 1 + test_conformance/images/samplerlessReads/test_read_1D_buffer.cpp | 1 + test_conformance/images/samplerlessReads/test_read_2D_array.cpp | 1 + test_conformance/images/samplerlessReads/test_read_3D.cpp | 1 + 31 files changed, 32 insertions(+) diff --git a/test_conformance/images/clCopyImage/test_copy_1D.cpp b/test_conformance/images/clCopyImage/test_copy_1D.cpp index 2c996c72..0f6f3ce4 100644 --- a/test_conformance/images/clCopyImage/test_copy_1D.cpp +++ b/test_conformance/images/clCopyImage/test_copy_1D.cpp @@ -113,6 +113,7 @@ int test_copy_image_set_1D( cl_device_id device, cl_context context, cl_command_ if (memSize > (cl_ulong)SIZE_MAX) { memSize = (cl_ulong)SIZE_MAX; + maxAllocSize = (cl_ulong)SIZE_MAX; } if( gTestSmallImages ) diff --git a/test_conformance/images/clCopyImage/test_copy_1D_array.cpp b/test_conformance/images/clCopyImage/test_copy_1D_array.cpp index 0b616934..f0b610bb 100644 --- a/test_conformance/images/clCopyImage/test_copy_1D_array.cpp +++ b/test_conformance/images/clCopyImage/test_copy_1D_array.cpp @@ -118,6 +118,7 @@ int test_copy_image_set_1D_array( cl_device_id device, cl_context context, cl_co if (memSize > (cl_ulong)SIZE_MAX) { memSize = (cl_ulong)SIZE_MAX; + maxAllocSize = (cl_ulong)SIZE_MAX; } if( gTestSmallImages ) diff --git a/test_conformance/images/clCopyImage/test_copy_2D.cpp b/test_conformance/images/clCopyImage/test_copy_2D.cpp index 1a69a1fe..448b47f0 100644 --- a/test_conformance/images/clCopyImage/test_copy_2D.cpp +++ b/test_conformance/images/clCopyImage/test_copy_2D.cpp @@ -125,6 +125,7 @@ int test_copy_image_set_2D( cl_device_id device, cl_context context, cl_command_ if (memSize > (cl_ulong)SIZE_MAX) { memSize = (cl_ulong)SIZE_MAX; + maxAllocSize = (cl_ulong)SIZE_MAX; } if( gTestSmallImages ) diff --git a/test_conformance/images/clCopyImage/test_copy_2D_2D_array.cpp b/test_conformance/images/clCopyImage/test_copy_2D_2D_array.cpp index eb6dd552..1819d87c 100644 --- a/test_conformance/images/clCopyImage/test_copy_2D_2D_array.cpp +++ b/test_conformance/images/clCopyImage/test_copy_2D_2D_array.cpp @@ -224,6 +224,7 @@ int test_copy_image_set_2D_2D_array( cl_device_id device, cl_context context, cl if (memSize > (cl_ulong)SIZE_MAX) { memSize = (cl_ulong)SIZE_MAX; + maxAllocSize = (cl_ulong)SIZE_MAX; } if( gTestSmallImages ) diff --git a/test_conformance/images/clCopyImage/test_copy_2D_3D.cpp b/test_conformance/images/clCopyImage/test_copy_2D_3D.cpp index 8a56c95f..4ab6b42a 100644 --- a/test_conformance/images/clCopyImage/test_copy_2D_3D.cpp +++ b/test_conformance/images/clCopyImage/test_copy_2D_3D.cpp @@ -230,6 +230,7 @@ int test_copy_image_set_2D_3D( cl_device_id device, cl_context context, cl_comma if (memSize > (cl_ulong)SIZE_MAX) { memSize = (cl_ulong)SIZE_MAX; + maxAllocSize = (cl_ulong)SIZE_MAX; } if( gTestSmallImages ) diff --git a/test_conformance/images/clCopyImage/test_copy_2D_array.cpp b/test_conformance/images/clCopyImage/test_copy_2D_array.cpp index 6327ba58..3376bf9a 100644 --- a/test_conformance/images/clCopyImage/test_copy_2D_array.cpp +++ b/test_conformance/images/clCopyImage/test_copy_2D_array.cpp @@ -71,6 +71,7 @@ int test_copy_image_set_2D_array( cl_device_id device, cl_context context, cl_co if (memSize > (cl_ulong)SIZE_MAX) { memSize = (cl_ulong)SIZE_MAX; + maxAllocSize = (cl_ulong)SIZE_MAX; } if( gTestSmallImages ) diff --git a/test_conformance/images/clCopyImage/test_copy_3D.cpp b/test_conformance/images/clCopyImage/test_copy_3D.cpp index da6731d7..cdfdccec 100644 --- a/test_conformance/images/clCopyImage/test_copy_3D.cpp +++ b/test_conformance/images/clCopyImage/test_copy_3D.cpp @@ -57,6 +57,7 @@ int test_copy_image_set_3D( cl_device_id device, cl_context context, cl_command_ if (memSize > (cl_ulong)SIZE_MAX) { memSize = (cl_ulong)SIZE_MAX; + maxAllocSize = (cl_ulong)SIZE_MAX; } if( gTestSmallImages ) diff --git a/test_conformance/images/clCopyImage/test_copy_3D_2D_array.cpp b/test_conformance/images/clCopyImage/test_copy_3D_2D_array.cpp index c098f645..1da1e477 100644 --- a/test_conformance/images/clCopyImage/test_copy_3D_2D_array.cpp +++ b/test_conformance/images/clCopyImage/test_copy_3D_2D_array.cpp @@ -251,6 +251,7 @@ int test_copy_image_set_3D_2D_array(cl_device_id device, cl_context context, cl_ if (memSize > (cl_ulong)SIZE_MAX) { memSize = (cl_ulong)SIZE_MAX; + maxAllocSize = (cl_ulong)SIZE_MAX; } if( gTestSmallImages ) diff --git a/test_conformance/images/clFillImage/test_fill_1D.cpp b/test_conformance/images/clFillImage/test_fill_1D.cpp index c3f23185..b1550bf3 100644 --- a/test_conformance/images/clFillImage/test_fill_1D.cpp +++ b/test_conformance/images/clFillImage/test_fill_1D.cpp @@ -80,6 +80,7 @@ int test_fill_image_set_1D( cl_device_id device, cl_context context, cl_command_ if (memSize > (cl_ulong)SIZE_MAX) { memSize = (cl_ulong)SIZE_MAX; + maxAllocSize = (cl_ulong)SIZE_MAX; } if ( gTestSmallImages ) diff --git a/test_conformance/images/clFillImage/test_fill_1D_array.cpp b/test_conformance/images/clFillImage/test_fill_1D_array.cpp index b4347a47..be32ec6a 100644 --- a/test_conformance/images/clFillImage/test_fill_1D_array.cpp +++ b/test_conformance/images/clFillImage/test_fill_1D_array.cpp @@ -83,6 +83,7 @@ int test_fill_image_set_1D_array( cl_device_id device, cl_context context, cl_co if (memSize > (cl_ulong)SIZE_MAX) { memSize = (cl_ulong)SIZE_MAX; + maxAllocSize = (cl_ulong)SIZE_MAX; } if ( gTestSmallImages ) diff --git a/test_conformance/images/clFillImage/test_fill_2D.cpp b/test_conformance/images/clFillImage/test_fill_2D.cpp index bb66fc27..e941abcf 100644 --- a/test_conformance/images/clFillImage/test_fill_2D.cpp +++ b/test_conformance/images/clFillImage/test_fill_2D.cpp @@ -83,6 +83,7 @@ int test_fill_image_set_2D( cl_device_id device, cl_context context, cl_command_ if (memSize > (cl_ulong)SIZE_MAX) { memSize = (cl_ulong)SIZE_MAX; + maxAllocSize = (cl_ulong)SIZE_MAX; } if ( gTestSmallImages ) diff --git a/test_conformance/images/clFillImage/test_fill_2D_array.cpp b/test_conformance/images/clFillImage/test_fill_2D_array.cpp index 3265aab0..38196cfc 100644 --- a/test_conformance/images/clFillImage/test_fill_2D_array.cpp +++ b/test_conformance/images/clFillImage/test_fill_2D_array.cpp @@ -87,6 +87,7 @@ int test_fill_image_set_2D_array( cl_device_id device, cl_context context, cl_co if (memSize > (cl_ulong)SIZE_MAX) { memSize = (cl_ulong)SIZE_MAX; + maxAllocSize = (cl_ulong)SIZE_MAX; } if ( gTestSmallImages ) diff --git a/test_conformance/images/clFillImage/test_fill_3D.cpp b/test_conformance/images/clFillImage/test_fill_3D.cpp index 9db0ac7c..0b8e4e58 100644 --- a/test_conformance/images/clFillImage/test_fill_3D.cpp +++ b/test_conformance/images/clFillImage/test_fill_3D.cpp @@ -87,6 +87,7 @@ int test_fill_image_set_3D( cl_device_id device, cl_context context, cl_command_ if (memSize > (cl_ulong)SIZE_MAX) { memSize = (cl_ulong)SIZE_MAX; + maxAllocSize = (cl_ulong)SIZE_MAX; } if ( gTestSmallImages ) diff --git a/test_conformance/images/clGetInfo/test_1D.cpp b/test_conformance/images/clGetInfo/test_1D.cpp index 0d704b82..7e044856 100644 --- a/test_conformance/images/clGetInfo/test_1D.cpp +++ b/test_conformance/images/clGetInfo/test_1D.cpp @@ -46,6 +46,7 @@ int test_get_image_info_1D( cl_device_id device, cl_context context, cl_image_fo if (memSize > (cl_ulong)SIZE_MAX) { memSize = (cl_ulong)SIZE_MAX; + maxAllocSize = (cl_ulong)SIZE_MAX; } if( gTestSmallImages ) diff --git a/test_conformance/images/clGetInfo/test_1D_2D_array.cpp b/test_conformance/images/clGetInfo/test_1D_2D_array.cpp index 447fc7c2..c35bf22b 100644 --- a/test_conformance/images/clGetInfo/test_1D_2D_array.cpp +++ b/test_conformance/images/clGetInfo/test_1D_2D_array.cpp @@ -44,6 +44,7 @@ int test_get_image_info_1D_array( cl_device_id device, cl_context context, cl_im if (memSize > (cl_ulong)SIZE_MAX) { memSize = (cl_ulong)SIZE_MAX; + maxAllocSize = (cl_ulong)SIZE_MAX; } if( gTestSmallImages ) @@ -168,6 +169,7 @@ int test_get_image_info_2D_array( cl_device_id device, cl_context context, cl_im if (memSize > (cl_ulong)SIZE_MAX) { memSize = (cl_ulong)SIZE_MAX; + maxAllocSize = (cl_ulong)SIZE_MAX; } if( gTestSmallImages ) diff --git a/test_conformance/images/clGetInfo/test_2D.cpp b/test_conformance/images/clGetInfo/test_2D.cpp index 74a60123..764b186d 100644 --- a/test_conformance/images/clGetInfo/test_2D.cpp +++ b/test_conformance/images/clGetInfo/test_2D.cpp @@ -285,6 +285,7 @@ int test_get_image_info_2D( cl_device_id device, cl_context context, cl_image_fo if (memSize > (cl_ulong)SIZE_MAX) { memSize = (cl_ulong)SIZE_MAX; + maxAllocSize = (cl_ulong)SIZE_MAX; } if( gTestSmallImages ) diff --git a/test_conformance/images/clGetInfo/test_3D.cpp b/test_conformance/images/clGetInfo/test_3D.cpp index af5062e3..e1261863 100644 --- a/test_conformance/images/clGetInfo/test_3D.cpp +++ b/test_conformance/images/clGetInfo/test_3D.cpp @@ -47,6 +47,7 @@ int test_get_image_info_3D( cl_device_id device, cl_context context, cl_image_fo if (memSize > (cl_ulong)SIZE_MAX) { memSize = (cl_ulong)SIZE_MAX; + maxAllocSize = (cl_ulong)SIZE_MAX; } if( gTestSmallImages ) diff --git a/test_conformance/images/clReadWriteImage/test_read_1D.cpp b/test_conformance/images/clReadWriteImage/test_read_1D.cpp index 42933c0f..2d94dc82 100644 --- a/test_conformance/images/clReadWriteImage/test_read_1D.cpp +++ b/test_conformance/images/clReadWriteImage/test_read_1D.cpp @@ -187,6 +187,7 @@ int test_read_image_set_1D(cl_device_id device, cl_context context, if (memSize > (cl_ulong)SIZE_MAX) { memSize = (cl_ulong)SIZE_MAX; + maxAllocSize = (cl_ulong)SIZE_MAX; } if( gTestSmallImages ) diff --git a/test_conformance/images/clReadWriteImage/test_read_1D_array.cpp b/test_conformance/images/clReadWriteImage/test_read_1D_array.cpp index efd2a795..cc902042 100644 --- a/test_conformance/images/clReadWriteImage/test_read_1D_array.cpp +++ b/test_conformance/images/clReadWriteImage/test_read_1D_array.cpp @@ -191,6 +191,7 @@ int test_read_image_set_1D_array(cl_device_id device, cl_context context, if (memSize > (cl_ulong)SIZE_MAX) { memSize = (cl_ulong)SIZE_MAX; + maxAllocSize = (cl_ulong)SIZE_MAX; } if( gTestSmallImages ) diff --git a/test_conformance/images/clReadWriteImage/test_read_2D.cpp b/test_conformance/images/clReadWriteImage/test_read_2D.cpp index b7f8553b..b6102874 100644 --- a/test_conformance/images/clReadWriteImage/test_read_2D.cpp +++ b/test_conformance/images/clReadWriteImage/test_read_2D.cpp @@ -194,6 +194,7 @@ int test_read_image_set_2D(cl_device_id device, cl_context context, if (memSize > (cl_ulong)SIZE_MAX) { memSize = (cl_ulong)SIZE_MAX; + maxAllocSize = (cl_ulong)SIZE_MAX; } if( gTestSmallImages ) diff --git a/test_conformance/images/clReadWriteImage/test_read_2D_array.cpp b/test_conformance/images/clReadWriteImage/test_read_2D_array.cpp index 5889ad6a..401b0e4d 100644 --- a/test_conformance/images/clReadWriteImage/test_read_2D_array.cpp +++ b/test_conformance/images/clReadWriteImage/test_read_2D_array.cpp @@ -169,6 +169,7 @@ int test_read_image_set_2D_array(cl_device_id device, cl_context context, if (memSize > (cl_ulong)SIZE_MAX) { memSize = (cl_ulong)SIZE_MAX; + maxAllocSize = (cl_ulong)SIZE_MAX; } if( gTestSmallImages ) diff --git a/test_conformance/images/clReadWriteImage/test_read_3D.cpp b/test_conformance/images/clReadWriteImage/test_read_3D.cpp index 6f73f423..ced04abf 100644 --- a/test_conformance/images/clReadWriteImage/test_read_3D.cpp +++ b/test_conformance/images/clReadWriteImage/test_read_3D.cpp @@ -174,6 +174,7 @@ int test_read_image_set_3D(cl_device_id device, cl_context context, if (memSize > (cl_ulong)SIZE_MAX) { memSize = (cl_ulong)SIZE_MAX; + maxAllocSize = (cl_ulong)SIZE_MAX; } if( gTestSmallImages ) diff --git a/test_conformance/images/kernel_image_methods/test_1D.cpp b/test_conformance/images/kernel_image_methods/test_1D.cpp index 0059d4c2..934e78ba 100644 --- a/test_conformance/images/kernel_image_methods/test_1D.cpp +++ b/test_conformance/images/kernel_image_methods/test_1D.cpp @@ -171,6 +171,7 @@ int test_get_image_info_1D(cl_device_id device, cl_context context, if (memSize > (cl_ulong)SIZE_MAX) { memSize = (cl_ulong)SIZE_MAX; + maxAllocSize = (cl_ulong)SIZE_MAX; } if( gTestSmallImages ) diff --git a/test_conformance/images/kernel_image_methods/test_1D_array.cpp b/test_conformance/images/kernel_image_methods/test_1D_array.cpp index 797161c4..a824f088 100644 --- a/test_conformance/images/kernel_image_methods/test_1D_array.cpp +++ b/test_conformance/images/kernel_image_methods/test_1D_array.cpp @@ -181,6 +181,7 @@ int test_get_image_info_1D_array(cl_device_id device, cl_context context, if (memSize > (cl_ulong)SIZE_MAX) { memSize = (cl_ulong)SIZE_MAX; + maxAllocSize = (cl_ulong)SIZE_MAX; } if( gTestSmallImages ) diff --git a/test_conformance/images/kernel_image_methods/test_2D.cpp b/test_conformance/images/kernel_image_methods/test_2D.cpp index b0d4a708..07f8d929 100644 --- a/test_conformance/images/kernel_image_methods/test_2D.cpp +++ b/test_conformance/images/kernel_image_methods/test_2D.cpp @@ -232,6 +232,7 @@ int test_get_image_info_2D(cl_device_id device, cl_context context, if (memSize > (cl_ulong)SIZE_MAX) { memSize = (cl_ulong)SIZE_MAX; + maxAllocSize = (cl_ulong)SIZE_MAX; } if( gTestSmallImages ) diff --git a/test_conformance/images/samplerlessReads/test_iterations.cpp b/test_conformance/images/samplerlessReads/test_iterations.cpp index 55eaaf48..e2f89aad 100644 --- a/test_conformance/images/samplerlessReads/test_iterations.cpp +++ b/test_conformance/images/samplerlessReads/test_iterations.cpp @@ -215,6 +215,7 @@ int test_read_image_set_2D(cl_device_id device, cl_context context, if (memSize > (cl_ulong)SIZE_MAX) { memSize = (cl_ulong)SIZE_MAX; + maxAllocSize = (cl_ulong)SIZE_MAX; } // Determine types diff --git a/test_conformance/images/samplerlessReads/test_read_1D.cpp b/test_conformance/images/samplerlessReads/test_read_1D.cpp index aa261b7e..6ed9910a 100644 --- a/test_conformance/images/samplerlessReads/test_read_1D.cpp +++ b/test_conformance/images/samplerlessReads/test_read_1D.cpp @@ -215,6 +215,7 @@ int test_read_image_set_1D(cl_device_id device, cl_context context, if (memSize > (cl_ulong)SIZE_MAX) { memSize = (cl_ulong)SIZE_MAX; + maxAllocSize = (cl_ulong)SIZE_MAX; } // Determine types diff --git a/test_conformance/images/samplerlessReads/test_read_1D_array.cpp b/test_conformance/images/samplerlessReads/test_read_1D_array.cpp index fb0c2632..677eb9f1 100644 --- a/test_conformance/images/samplerlessReads/test_read_1D_array.cpp +++ b/test_conformance/images/samplerlessReads/test_read_1D_array.cpp @@ -214,6 +214,7 @@ int test_read_image_set_1D_array(cl_device_id device, cl_context context, if (memSize > (cl_ulong)SIZE_MAX) { memSize = (cl_ulong)SIZE_MAX; + maxAllocSize = (cl_ulong)SIZE_MAX; } // Determine types diff --git a/test_conformance/images/samplerlessReads/test_read_1D_buffer.cpp b/test_conformance/images/samplerlessReads/test_read_1D_buffer.cpp index 7a3084d3..c3a991a7 100644 --- a/test_conformance/images/samplerlessReads/test_read_1D_buffer.cpp +++ b/test_conformance/images/samplerlessReads/test_read_1D_buffer.cpp @@ -219,6 +219,7 @@ int test_read_image_set_1D_buffer(cl_device_id device, cl_context context, if (memSize > (cl_ulong)SIZE_MAX) { memSize = (cl_ulong)SIZE_MAX; + maxAllocSize = (cl_ulong)SIZE_MAX; } // note: image_buffer test uses image1D for results validation. diff --git a/test_conformance/images/samplerlessReads/test_read_2D_array.cpp b/test_conformance/images/samplerlessReads/test_read_2D_array.cpp index 99f24266..8273f538 100644 --- a/test_conformance/images/samplerlessReads/test_read_2D_array.cpp +++ b/test_conformance/images/samplerlessReads/test_read_2D_array.cpp @@ -202,6 +202,7 @@ int test_read_image_set_2D_array(cl_device_id device, cl_context context, if (memSize > (cl_ulong)SIZE_MAX) { memSize = (cl_ulong)SIZE_MAX; + maxAllocSize = (cl_ulong)SIZE_MAX; } // Determine types diff --git a/test_conformance/images/samplerlessReads/test_read_3D.cpp b/test_conformance/images/samplerlessReads/test_read_3D.cpp index cf411407..0df46c86 100644 --- a/test_conformance/images/samplerlessReads/test_read_3D.cpp +++ b/test_conformance/images/samplerlessReads/test_read_3D.cpp @@ -206,6 +206,7 @@ int test_read_image_set_3D(cl_device_id device, cl_context context, if (memSize > (cl_ulong)SIZE_MAX) { memSize = (cl_ulong)SIZE_MAX; + maxAllocSize = (cl_ulong)SIZE_MAX; } // Determine types -- cgit v1.2.3 From d9a938b698985ec2377786299dd96db189d7ca41 Mon Sep 17 00:00:00 2001 From: Sven van Haastregt Date: Tue, 4 Oct 2022 17:28:29 +0100 Subject: Factor out GetTernaryKernel (#1511) Use a common function to create the kernel source code for testing 3-argument math builtins. This reduces code duplication. 1-argument and 2-argument math kernel construction will be factored out in future work. Change the kernels to use preprocessor defines for argument types and undef values, to make the CTS code easier to read. Co-authored-by: Marco Antognini Signed-off-by: Marco Antognini Signed-off-by: Sven van Haastregt Signed-off-by: Marco Antognini Signed-off-by: Sven van Haastregt Co-authored-by: Marco Antognini --- test_conformance/math_brute_force/CMakeLists.txt | 1 + test_conformance/math_brute_force/common.cpp | 170 +++++++++++++++++++++ test_conformance/math_brute_force/common.h | 17 +++ test_conformance/math_brute_force/mad_double.cpp | 95 +----------- test_conformance/math_brute_force/mad_float.cpp | 93 +---------- .../math_brute_force/ternary_double.cpp | 95 +----------- .../math_brute_force/ternary_float.cpp | 93 +---------- 7 files changed, 216 insertions(+), 348 deletions(-) create mode 100644 test_conformance/math_brute_force/common.cpp diff --git a/test_conformance/math_brute_force/CMakeLists.txt b/test_conformance/math_brute_force/CMakeLists.txt index 1c96f521..32814026 100644 --- a/test_conformance/math_brute_force/CMakeLists.txt +++ b/test_conformance/math_brute_force/CMakeLists.txt @@ -9,6 +9,7 @@ set(${MODULE_NAME}_SOURCES binary_operator_float.cpp binary_two_results_i_double.cpp binary_two_results_i_float.cpp + common.cpp common.h function_list.cpp function_list.h diff --git a/test_conformance/math_brute_force/common.cpp b/test_conformance/math_brute_force/common.cpp new file mode 100644 index 00000000..f5e9f993 --- /dev/null +++ b/test_conformance/math_brute_force/common.cpp @@ -0,0 +1,170 @@ +// +// Copyright (c) 2022 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#include "common.h" + +#include "utility.h" // for sizeNames and sizeValues. + +#include +#include + +namespace { + +const char *GetTypeName(ParameterType type) +{ + switch (type) + { + case ParameterType::Float: return "float"; + case ParameterType::Double: return "double"; + } + return nullptr; +} + +const char *GetUndefValue(ParameterType type) +{ + switch (type) + { + case ParameterType::Float: + case ParameterType::Double: return "NAN"; + } + return nullptr; +} + +void EmitDefineType(std::ostringstream &kernel, const char *name, + ParameterType type, int vector_size_index) +{ + kernel << "#define " << name << " " << GetTypeName(type) + << sizeNames[vector_size_index] << '\n'; + kernel << "#define " << name << "_SCALAR " << GetTypeName(type) << '\n'; +} + +void EmitDefineUndef(std::ostringstream &kernel, const char *name, + ParameterType type) +{ + kernel << "#define " << name << " " << GetUndefValue(type) << '\n'; +} + +void EmitEnableExtension(std::ostringstream &kernel, ParameterType type) +{ + switch (type) + { + case ParameterType::Double: + kernel << "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"; + break; + + case ParameterType::Float: + // No extension required. + break; + } +} + +} // anonymous namespace + +std::string GetKernelName(int vector_size_index) +{ + return std::string("math_kernel") + sizeNames[vector_size_index]; +} + +std::string GetTernaryKernel(const std::string &kernel_name, + const char *builtin, ParameterType retType, + ParameterType type1, ParameterType type2, + ParameterType type3, int vector_size_index) +{ + // To keep the kernel code readable, use macros for types and undef values. + std::ostringstream kernel; + EmitDefineType(kernel, "RETTYPE", retType, vector_size_index); + EmitDefineType(kernel, "TYPE1", type1, vector_size_index); + EmitDefineType(kernel, "TYPE2", type2, vector_size_index); + EmitDefineType(kernel, "TYPE3", type3, vector_size_index); + EmitDefineUndef(kernel, "UNDEF1", type1); + EmitDefineUndef(kernel, "UNDEF2", type2); + EmitDefineUndef(kernel, "UNDEF3", type3); + EmitEnableExtension(kernel, type1); + + // clang-format off + const char *kernel_nonvec3[] = { R"( +__kernel void )", kernel_name.c_str(), R"((__global RETTYPE* out, + __global TYPE1* in1, + __global TYPE2* in2, + __global TYPE3* in3) +{ + size_t i = get_global_id(0); + out[i] = )", builtin, R"((in1[i], in2[i], in3[i]); +} +)" }; + + const char *kernel_vec3[] = { R"( +__kernel void )", kernel_name.c_str(), R"((__global RETTYPE_SCALAR* out, + __global TYPE1_SCALAR* in1, + __global TYPE2_SCALAR* in2, + __global TYPE3_SCALAR* in3) +{ + size_t i = get_global_id(0); + + if (i + 1 < get_global_size(0)) + { + TYPE1 a = vload3(0, in1 + 3 * i); + TYPE2 b = vload3(0, in2 + 3 * i); + TYPE3 c = vload3(0, in3 + 3 * i); + RETTYPE res = )", builtin, R"((a, b, c); + vstore3(res, 0, out + 3 * i); + } + else + { + // Figure out how many elements are left over after + // BUFFER_SIZE % (3 * sizeof(type)). + // Assume power of two buffer size. + size_t parity = i & 1; + TYPE1 a = (TYPE1)(UNDEF1, UNDEF1, UNDEF1); + TYPE2 b = (TYPE2)(UNDEF2, UNDEF2, UNDEF2); + TYPE3 c = (TYPE3)(UNDEF3, UNDEF3, UNDEF3); + switch (parity) + { + case 0: + a.y = in1[3 * i + 1]; + b.y = in2[3 * i + 1]; + c.y = in3[3 * i + 1]; + // fall through + case 1: + a.x = in1[3 * i]; + b.x = in2[3 * i]; + c.x = in3[3 * i]; + break; + } + + RETTYPE res = )", builtin, R"((a, b, c); + + switch (parity) + { + case 0: + out[3 * i + 1] = res.y; + // fall through + case 1: + out[3 * i] = res.x; + break; + } + } +} +)" }; + // clang-format on + + if (sizeValues[vector_size_index] != 3) + for (const auto &chunk : kernel_nonvec3) kernel << chunk; + else + for (const auto &chunk : kernel_vec3) kernel << chunk; + + return kernel.str(); +} diff --git a/test_conformance/math_brute_force/common.h b/test_conformance/math_brute_force/common.h index 6f17898f..143814ca 100644 --- a/test_conformance/math_brute_force/common.h +++ b/test_conformance/math_brute_force/common.h @@ -20,6 +20,7 @@ #include "utility.h" #include +#include #include // Array of thread-specific kernels for each vector size. @@ -31,6 +32,22 @@ using Programs = std::array; // Array of buffers for each vector size. using Buffers = std::array; +// Types supported for kernel code generation. +enum class ParameterType +{ + Float, + Double, +}; + +// Return kernel name suffixed with vector size. +std::string GetKernelName(int vector_size_index); + +// Generate kernel code for the given builtin function/operator. +std::string GetTernaryKernel(const std::string &kernel_name, + const char *builtin, ParameterType retType, + ParameterType type1, ParameterType type2, + ParameterType type3, int vector_size_index); + // Information to generate OpenCL kernels. struct BuildKernelInfo { diff --git a/test_conformance/math_brute_force/mad_double.cpp b/test_conformance/math_brute_force/mad_double.cpp index 3def6a80..8d8fec52 100644 --- a/test_conformance/math_brute_force/mad_double.cpp +++ b/test_conformance/math_brute_force/mad_double.cpp @@ -26,94 +26,13 @@ namespace { int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p, bool relaxedMode) { - const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n", - "__kernel void math_kernel", - sizeNames[vectorSize], - "( __global double", - sizeNames[vectorSize], - "* out, __global double", - sizeNames[vectorSize], - "* in1, __global double", - sizeNames[vectorSize], - "* in2, __global double", - sizeNames[vectorSize], - "* in3 )\n" - "{\n" - " size_t i = get_global_id(0);\n" - " out[i] = ", - name, - "( in1[i], in2[i], in3[i] );\n" - "}\n" }; - - const char *c3[] = { - "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n", - "__kernel void math_kernel", - sizeNames[vectorSize], - "( __global double* out, __global double* in, __global double* in2, " - "__global double* in3)\n" - "{\n" - " size_t i = get_global_id(0);\n" - " if( i + 1 < get_global_size(0) )\n" - " {\n" - " double3 d0 = vload3( 0, in + 3 * i );\n" - " double3 d1 = vload3( 0, in2 + 3 * i );\n" - " double3 d2 = vload3( 0, in3 + 3 * i );\n" - " d0 = ", - name, - "( d0, d1, d2 );\n" - " vstore3( d0, 0, out + 3*i );\n" - " }\n" - " else\n" - " {\n" - " size_t parity = i & 1; // Figure out how many elements are " - "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two " - "buffer size \n" - " double3 d0;\n" - " double3 d1;\n" - " double3 d2;\n" - " switch( parity )\n" - " {\n" - " case 1:\n" - " d0 = (double3)( in[3*i], NAN, NAN ); \n" - " d1 = (double3)( in2[3*i], NAN, NAN ); \n" - " d2 = (double3)( in3[3*i], NAN, NAN ); \n" - " break;\n" - " case 0:\n" - " d0 = (double3)( in[3*i], in[3*i+1], NAN ); \n" - " d1 = (double3)( in2[3*i], in2[3*i+1], NAN ); \n" - " d2 = (double3)( in3[3*i], in3[3*i+1], NAN ); \n" - " break;\n" - " }\n" - " d0 = ", - name, - "( d0, d1, d2 );\n" - " switch( parity )\n" - " {\n" - " case 0:\n" - " out[3*i+1] = d0.y; \n" - " // fall through\n" - " case 1:\n" - " out[3*i] = d0.x; \n" - " break;\n" - " }\n" - " }\n" - "}\n" - }; - - const char **kern = c; - size_t kernSize = sizeof(c) / sizeof(c[0]); - - if (sizeValues[vectorSize] == 3) - { - kern = c3; - kernSize = sizeof(c3) / sizeof(c3[0]); - } - - char testName[32]; - snprintf(testName, sizeof(testName) - 1, "math_kernel%s", - sizeNames[vectorSize]); - - return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode); + auto kernel_name = GetKernelName(vectorSize); + auto source = GetTernaryKernel(kernel_name, name, ParameterType::Double, + ParameterType::Double, ParameterType::Double, + ParameterType::Double, vectorSize); + std::array sources{ source.c_str() }; + return MakeKernel(sources.data(), sources.size(), kernel_name.c_str(), k, p, + relaxedMode); } struct BuildKernelInfo2 diff --git a/test_conformance/math_brute_force/mad_float.cpp b/test_conformance/math_brute_force/mad_float.cpp index 498f25eb..04ac5aa6 100644 --- a/test_conformance/math_brute_force/mad_float.cpp +++ b/test_conformance/math_brute_force/mad_float.cpp @@ -26,92 +26,13 @@ namespace { int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p, bool relaxedMode) { - const char *c[] = { "__kernel void math_kernel", - sizeNames[vectorSize], - "( __global float", - sizeNames[vectorSize], - "* out, __global float", - sizeNames[vectorSize], - "* in1, __global float", - sizeNames[vectorSize], - "* in2, __global float", - sizeNames[vectorSize], - "* in3 )\n" - "{\n" - " size_t i = get_global_id(0);\n" - " out[i] = ", - name, - "( in1[i], in2[i], in3[i] );\n" - "}\n" }; - - const char *c3[] = { - "__kernel void math_kernel", - sizeNames[vectorSize], - "( __global float* out, __global float* in, __global float* in2, " - "__global float* in3)\n" - "{\n" - " size_t i = get_global_id(0);\n" - " if( i + 1 < get_global_size(0) )\n" - " {\n" - " float3 f0 = vload3( 0, in + 3 * i );\n" - " float3 f1 = vload3( 0, in2 + 3 * i );\n" - " float3 f2 = vload3( 0, in3 + 3 * i );\n" - " f0 = ", - name, - "( f0, f1, f2 );\n" - " vstore3( f0, 0, out + 3*i );\n" - " }\n" - " else\n" - " {\n" - " size_t parity = i & 1; // Figure out how many elements are " - "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two " - "buffer size \n" - " float3 f0;\n" - " float3 f1;\n" - " float3 f2;\n" - " switch( parity )\n" - " {\n" - " case 1:\n" - " f0 = (float3)( in[3*i], NAN, NAN ); \n" - " f1 = (float3)( in2[3*i], NAN, NAN ); \n" - " f2 = (float3)( in3[3*i], NAN, NAN ); \n" - " break;\n" - " case 0:\n" - " f0 = (float3)( in[3*i], in[3*i+1], NAN ); \n" - " f1 = (float3)( in2[3*i], in2[3*i+1], NAN ); \n" - " f2 = (float3)( in3[3*i], in3[3*i+1], NAN ); \n" - " break;\n" - " }\n" - " f0 = ", - name, - "( f0, f1, f2 );\n" - " switch( parity )\n" - " {\n" - " case 0:\n" - " out[3*i+1] = f0.y; \n" - " // fall through\n" - " case 1:\n" - " out[3*i] = f0.x; \n" - " break;\n" - " }\n" - " }\n" - "}\n" - }; - - const char **kern = c; - size_t kernSize = sizeof(c) / sizeof(c[0]); - - if (sizeValues[vectorSize] == 3) - { - kern = c3; - kernSize = sizeof(c3) / sizeof(c3[0]); - } - - char testName[32]; - snprintf(testName, sizeof(testName) - 1, "math_kernel%s", - sizeNames[vectorSize]); - - return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode); + auto kernel_name = GetKernelName(vectorSize); + auto source = GetTernaryKernel(kernel_name, name, ParameterType::Float, + ParameterType::Float, ParameterType::Float, + ParameterType::Float, vectorSize); + std::array sources{ source.c_str() }; + return MakeKernel(sources.data(), sources.size(), kernel_name.c_str(), k, p, + relaxedMode); } struct BuildKernelInfo2 diff --git a/test_conformance/math_brute_force/ternary_double.cpp b/test_conformance/math_brute_force/ternary_double.cpp index a7fa5625..b5f1ab09 100644 --- a/test_conformance/math_brute_force/ternary_double.cpp +++ b/test_conformance/math_brute_force/ternary_double.cpp @@ -30,94 +30,13 @@ namespace { int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p, bool relaxedMode) { - const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n", - "__kernel void math_kernel", - sizeNames[vectorSize], - "( __global double", - sizeNames[vectorSize], - "* out, __global double", - sizeNames[vectorSize], - "* in1, __global double", - sizeNames[vectorSize], - "* in2, __global double", - sizeNames[vectorSize], - "* in3 )\n" - "{\n" - " size_t i = get_global_id(0);\n" - " out[i] = ", - name, - "( in1[i], in2[i], in3[i] );\n" - "}\n" }; - - const char *c3[] = { - "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n", - "__kernel void math_kernel", - sizeNames[vectorSize], - "( __global double* out, __global double* in, __global double* in2, " - "__global double* in3)\n" - "{\n" - " size_t i = get_global_id(0);\n" - " if( i + 1 < get_global_size(0) )\n" - " {\n" - " double3 d0 = vload3( 0, in + 3 * i );\n" - " double3 d1 = vload3( 0, in2 + 3 * i );\n" - " double3 d2 = vload3( 0, in3 + 3 * i );\n" - " d0 = ", - name, - "( d0, d1, d2 );\n" - " vstore3( d0, 0, out + 3*i );\n" - " }\n" - " else\n" - " {\n" - " size_t parity = i & 1; // Figure out how many elements are " - "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two " - "buffer size \n" - " double3 d0;\n" - " double3 d1;\n" - " double3 d2;\n" - " switch( parity )\n" - " {\n" - " case 1:\n" - " d0 = (double3)( in[3*i], NAN, NAN ); \n" - " d1 = (double3)( in2[3*i], NAN, NAN ); \n" - " d2 = (double3)( in3[3*i], NAN, NAN ); \n" - " break;\n" - " case 0:\n" - " d0 = (double3)( in[3*i], in[3*i+1], NAN ); \n" - " d1 = (double3)( in2[3*i], in2[3*i+1], NAN ); \n" - " d2 = (double3)( in3[3*i], in3[3*i+1], NAN ); \n" - " break;\n" - " }\n" - " d0 = ", - name, - "( d0, d1, d2 );\n" - " switch( parity )\n" - " {\n" - " case 0:\n" - " out[3*i+1] = d0.y; \n" - " // fall through\n" - " case 1:\n" - " out[3*i] = d0.x; \n" - " break;\n" - " }\n" - " }\n" - "}\n" - }; - - const char **kern = c; - size_t kernSize = sizeof(c) / sizeof(c[0]); - - if (sizeValues[vectorSize] == 3) - { - kern = c3; - kernSize = sizeof(c3) / sizeof(c3[0]); - } - - char testName[32]; - snprintf(testName, sizeof(testName) - 1, "math_kernel%s", - sizeNames[vectorSize]); - - return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode); + auto kernel_name = GetKernelName(vectorSize); + auto source = GetTernaryKernel(kernel_name, name, ParameterType::Double, + ParameterType::Double, ParameterType::Double, + ParameterType::Double, vectorSize); + std::array sources{ source.c_str() }; + return MakeKernel(sources.data(), sources.size(), kernel_name.c_str(), k, p, + relaxedMode); } struct BuildKernelInfo2 diff --git a/test_conformance/math_brute_force/ternary_float.cpp b/test_conformance/math_brute_force/ternary_float.cpp index 3b8c2c3b..cf361841 100644 --- a/test_conformance/math_brute_force/ternary_float.cpp +++ b/test_conformance/math_brute_force/ternary_float.cpp @@ -30,92 +30,13 @@ namespace { int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p, bool relaxedMode) { - const char *c[] = { "__kernel void math_kernel", - sizeNames[vectorSize], - "( __global float", - sizeNames[vectorSize], - "* out, __global float", - sizeNames[vectorSize], - "* in1, __global float", - sizeNames[vectorSize], - "* in2, __global float", - sizeNames[vectorSize], - "* in3 )\n" - "{\n" - " size_t i = get_global_id(0);\n" - " out[i] = ", - name, - "( in1[i], in2[i], in3[i] );\n" - "}\n" }; - - const char *c3[] = { - "__kernel void math_kernel", - sizeNames[vectorSize], - "( __global float* out, __global float* in, __global float* in2, " - "__global float* in3)\n" - "{\n" - " size_t i = get_global_id(0);\n" - " if( i + 1 < get_global_size(0) )\n" - " {\n" - " float3 f0 = vload3( 0, in + 3 * i );\n" - " float3 f1 = vload3( 0, in2 + 3 * i );\n" - " float3 f2 = vload3( 0, in3 + 3 * i );\n" - " f0 = ", - name, - "( f0, f1, f2 );\n" - " vstore3( f0, 0, out + 3*i );\n" - " }\n" - " else\n" - " {\n" - " size_t parity = i & 1; // Figure out how many elements are " - "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two " - "buffer size \n" - " float3 f0;\n" - " float3 f1;\n" - " float3 f2;\n" - " switch( parity )\n" - " {\n" - " case 1:\n" - " f0 = (float3)( in[3*i], NAN, NAN ); \n" - " f1 = (float3)( in2[3*i], NAN, NAN ); \n" - " f2 = (float3)( in3[3*i], NAN, NAN ); \n" - " break;\n" - " case 0:\n" - " f0 = (float3)( in[3*i], in[3*i+1], NAN ); \n" - " f1 = (float3)( in2[3*i], in2[3*i+1], NAN ); \n" - " f2 = (float3)( in3[3*i], in3[3*i+1], NAN ); \n" - " break;\n" - " }\n" - " f0 = ", - name, - "( f0, f1, f2 );\n" - " switch( parity )\n" - " {\n" - " case 0:\n" - " out[3*i+1] = f0.y; \n" - " // fall through\n" - " case 1:\n" - " out[3*i] = f0.x; \n" - " break;\n" - " }\n" - " }\n" - "}\n" - }; - - const char **kern = c; - size_t kernSize = sizeof(c) / sizeof(c[0]); - - if (sizeValues[vectorSize] == 3) - { - kern = c3; - kernSize = sizeof(c3) / sizeof(c3[0]); - } - - char testName[32]; - snprintf(testName, sizeof(testName) - 1, "math_kernel%s", - sizeNames[vectorSize]); - - return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode); + auto kernel_name = GetKernelName(vectorSize); + auto source = GetTernaryKernel(kernel_name, name, ParameterType::Float, + ParameterType::Float, ParameterType::Float, + ParameterType::Float, vectorSize); + std::array sources{ source.c_str() }; + return MakeKernel(sources.data(), sources.size(), kernel_name.c_str(), k, p, + relaxedMode); } struct BuildKernelInfo2 -- cgit v1.2.3 From d285ebe5beb484702601d540f85a7b32f3b68643 Mon Sep 17 00:00:00 2001 From: niranjanjoshi121 <43807392+niranjanjoshi121@users.noreply.github.com> Date: Tue, 11 Oct 2022 22:02:47 +0530 Subject: Fix memory oob problem in test conversions (#1513) * Fix memory oob problem in test conversions Allocate memory for argc arguments instead of argc - 1. * Fix formatting issue. --- test_conformance/conversions/test_conversions.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test_conformance/conversions/test_conversions.cpp b/test_conformance/conversions/test_conversions.cpp index 765d09ff..2b18b925 100644 --- a/test_conformance/conversions/test_conversions.cpp +++ b/test_conformance/conversions/test_conversions.cpp @@ -343,7 +343,7 @@ int main (int argc, const char **argv ) static int ParseArgs( int argc, const char **argv ) { int i; - argList = (const char **)calloc( argc - 1, sizeof( char*) ); + argList = (const char **)calloc(argc, sizeof(char *)); argCount = 0; if( NULL == argList && argc > 1 ) -- cgit v1.2.3 From f6e37b17d2dc5152bb96f35a108e5e2a458c4237 Mon Sep 17 00:00:00 2001 From: Grzegorz Wawiorko Date: Tue, 11 Oct 2022 18:34:33 +0200 Subject: Fix image test image2d_from_buffer_positive (#1515) --- .../images/kernel_read_write/test_cl_ext_image_from_buffer.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/test_conformance/images/kernel_read_write/test_cl_ext_image_from_buffer.cpp b/test_conformance/images/kernel_read_write/test_cl_ext_image_from_buffer.cpp index 1b3b04b7..2ce33a17 100644 --- a/test_conformance/images/kernel_read_write/test_cl_ext_image_from_buffer.cpp +++ b/test_conformance/images/kernel_read_write/test_cl_ext_image_from_buffer.cpp @@ -73,6 +73,12 @@ int image2d_from_buffer_positive(cl_device_id device, cl_context context, return TEST_SKIPPED_ITSELF; } + if (!is_extension_available(device, "cl_ext_image_requirements_info")) + { + printf("Extension cl_ext_image_requirements_info not available"); + return TEST_SKIPPED_ITSELF; + } + std::vector imageTypes{ CL_MEM_OBJECT_IMAGE1D, CL_MEM_OBJECT_IMAGE2D, CL_MEM_OBJECT_IMAGE3D, CL_MEM_OBJECT_IMAGE1D_BUFFER, -- cgit v1.2.3 From 9e0ce2ba80b0af7e64b013918c8b46dad51107dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?K=C3=A9vin=20Petit?= Date: Tue, 11 Oct 2022 17:35:36 +0100 Subject: Produce JSON results even when a suite's init function reports SKIP or FAIL (#1521) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Also tidy-up some surrounding code. Signed-off-by: Kévin Petit Signed-off-by: Kévin Petit --- test_common/harness/testHarness.cpp | 164 ++++++++++++++++++++---------------- 1 file changed, 92 insertions(+), 72 deletions(-) diff --git a/test_common/harness/testHarness.cpp b/test_common/harness/testHarness.cpp index b3863918..d07d982c 100644 --- a/test_common/harness/testHarness.cpp +++ b/test_common/harness/testHarness.cpp @@ -60,6 +60,54 @@ bool gCoreILProgram = true; #define DEFAULT_NUM_ELEMENTS 0x4000 +static int saveResultsToJson(const char *suiteName, test_definition testList[], + unsigned char selectedTestList[], + test_status resultTestList[], int testNum) +{ + char *fileName = getenv("CL_CONFORMANCE_RESULTS_FILENAME"); + if (fileName == nullptr) + { + return EXIT_SUCCESS; + } + + FILE *file = fopen(fileName, "w"); + if (NULL == file) + { + log_error("ERROR: Failed to open '%s' for writing results.\n", + fileName); + return EXIT_FAILURE; + } + + const char *save_map[] = { "success", "failure" }; + const char *result_map[] = { "pass", "fail", "skip" }; + const char *linebreak[] = { "", ",\n" }; + int add_linebreak = 0; + + fprintf(file, "{\n"); + fprintf(file, "\t\"cmd\": \"%s\",\n", suiteName); + fprintf(file, "\t\"results\": {\n"); + + for (int i = 0; i < testNum; ++i) + { + if (selectedTestList[i]) + { + fprintf(file, "%s\t\t\"%s\": \"%s\"", linebreak[add_linebreak], + testList[i].name, result_map[(int)resultTestList[i]]); + add_linebreak = 1; + } + } + fprintf(file, "\n"); + + fprintf(file, "\t}\n"); + fprintf(file, "}\n"); + + int ret = fclose(file) ? EXIT_FAILURE : EXIT_SUCCESS; + + log_info("Saving results to %s: %s!\n", fileName, save_map[ret]); + + return ret; +} + int runTestHarness(int argc, const char *argv[], int testNum, test_definition testList[], int forceNoContextCreation, cl_command_queue_properties queueProps) @@ -68,19 +116,28 @@ int runTestHarness(int argc, const char *argv[], int testNum, forceNoContextCreation, queueProps, NULL); } -int skip_init_info(int count) +int suite_did_not_pass_init(const char *suiteName, test_status status, + int testNum, test_definition testList[]) { - log_info("Test skipped while initialization\n"); - log_info("SKIPPED %d of %d tests.\n", count, count); - return EXIT_SUCCESS; -} + std::vector selectedTestList(testNum, 1); + std::vector resultTestList(testNum, status); -int fail_init_info(int count) -{ - log_info("Test failed while initialization\n"); - log_info("FAILED %d of %d tests.\n", count, count); - return EXIT_FAILURE; + int ret = saveResultsToJson(suiteName, testList, selectedTestList.data(), + resultTestList.data(), testNum); + + log_info("Test %s while initialization\n", + status == TEST_SKIP ? "skipped" : "failed"); + log_info("%s %d of %d tests.\n", status == TEST_SKIP ? "SKIPPED" : "FAILED", + testNum, testNum); + + if (ret != EXIT_SUCCESS) + { + return ret; + } + + return status == TEST_SKIP ? EXIT_SUCCESS : EXIT_FAILURE; } + void version_expected_info(const char *test_name, const char *api_name, const char *expected_version, const char *device_version) @@ -470,6 +527,7 @@ int runTestHarnessWithCheck(int argc, const char *argv[], int testNum, log_error("Invalid device address bit size returned by device.\n"); return EXIT_FAILURE; } + const char *suiteName = argv[0]; if (gCompilationMode == kSpir_v) { test_status spirv_readiness = check_spirv_compilation_readiness(device); @@ -478,9 +536,15 @@ int runTestHarnessWithCheck(int argc, const char *argv[], int testNum, switch (spirv_readiness) { case TEST_PASS: break; - case TEST_FAIL: return fail_init_info(testNum); - case TEST_SKIP: return skip_init_info(testNum); - case TEST_SKIPPED_ITSELF: return skip_init_info(testNum); + case TEST_FAIL: + return suite_did_not_pass_init(suiteName, TEST_FAIL, + testNum, testList); + case TEST_SKIP: + return suite_did_not_pass_init(suiteName, TEST_SKIP, + testNum, testList); + case TEST_SKIPPED_ITSELF: + return suite_did_not_pass_init(suiteName, TEST_SKIP, + testNum, testList); } } } @@ -492,9 +556,15 @@ int runTestHarnessWithCheck(int argc, const char *argv[], int testNum, switch (status) { case TEST_PASS: break; - case TEST_FAIL: return fail_init_info(testNum); - case TEST_SKIP: return skip_init_info(testNum); - case TEST_SKIPPED_ITSELF: return skip_init_info(testNum); + case TEST_FAIL: + return suite_did_not_pass_init(suiteName, TEST_FAIL, testNum, + testList); + case TEST_SKIP: + return suite_did_not_pass_init(suiteName, TEST_SKIP, testNum, + testList); + case TEST_SKIPPED_ITSELF: + return suite_did_not_pass_init(suiteName, TEST_SKIP, testNum, + testList); } } @@ -574,49 +644,6 @@ static int find_matching_tests(test_definition testList[], return EXIT_SUCCESS; } -static int saveResultsToJson(const char *fileName, const char *suiteName, - test_definition testList[], - unsigned char selectedTestList[], - test_status resultTestList[], int testNum) -{ - FILE *file = fopen(fileName, "w"); - if (NULL == file) - { - log_error("ERROR: Failed to open '%s' for writing results.\n", - fileName); - return EXIT_FAILURE; - } - - const char *save_map[] = { "success", "failure" }; - const char *result_map[] = { "pass", "fail", "skip" }; - const char *linebreak[] = { "", ",\n" }; - int add_linebreak = 0; - - fprintf(file, "{\n"); - fprintf(file, "\t\"cmd\": \"%s\",\n", suiteName); - fprintf(file, "\t\"results\": {\n"); - - for (int i = 0; i < testNum; ++i) - { - if (selectedTestList[i]) - { - fprintf(file, "%s\t\t\"%s\": \"%s\"", linebreak[add_linebreak], - testList[i].name, result_map[(int)resultTestList[i]]); - add_linebreak = 1; - } - } - fprintf(file, "\n"); - - fprintf(file, "\t}\n"); - fprintf(file, "}\n"); - - int ret = fclose(file) ? 1 : 0; - - log_info("Saving results to %s: %s!\n", fileName, save_map[ret]); - - return ret; -} - static void print_results(int failed, int count, const char *name) { if (count < failed) @@ -658,7 +685,6 @@ int parseAndCallCommandLineTests(int argc, const char *argv[], int ret = EXIT_SUCCESS; unsigned char *selectedTestList = (unsigned char *)calloc(testNum, 1); - test_status *resultTestList = NULL; if (argc == 1) { @@ -697,24 +723,19 @@ int parseAndCallCommandLineTests(int argc, const char *argv[], if (ret == EXIT_SUCCESS) { - resultTestList = - (test_status *)calloc(testNum, sizeof(*resultTestList)); + std::vector resultTestList(testNum, TEST_PASS); - callTestFunctions(testList, selectedTestList, resultTestList, testNum, - device, forceNoContextCreation, num_elements, + callTestFunctions(testList, selectedTestList, resultTestList.data(), + testNum, device, forceNoContextCreation, num_elements, queueProps); print_results(gFailCount, gTestCount, "sub-test"); print_results(gTestsFailed, gTestsFailed + gTestsPassed, "test"); - char *filename = getenv("CL_CONFORMANCE_RESULTS_FILENAME"); - if (filename != NULL) - { - ret = saveResultsToJson(filename, argv[0], testList, - selectedTestList, resultTestList, testNum); - } + ret = saveResultsToJson(argv[0], testList, selectedTestList, + resultTestList.data(), testNum); - if (std::any_of(resultTestList, resultTestList + testNum, + if (std::any_of(resultTestList.begin(), resultTestList.end(), [](test_status result) { switch (result) { @@ -730,7 +751,6 @@ int parseAndCallCommandLineTests(int argc, const char *argv[], } free(selectedTestList); - free(resultTestList); return ret; } -- cgit v1.2.3 From 35cab9c85bb12c78d46a7def998857c3d1e27633 Mon Sep 17 00:00:00 2001 From: Sven van Haastregt Date: Tue, 11 Oct 2022 17:36:33 +0100 Subject: pipes: Fix readwrite verification function for fp64 (#1522) Use the appropriate function for verifying double precision values in the `pipe_readwrite_double` test. Change `verify_readwrite_double` to use `cl_long`, as `long long int` could be wider than 64 bits which would cause out-of-bound reads. This leaves no functional differences between `verify_readwrite_double` and `verify_readwrite_long`. Found by compiling with `-Wunused-function`, which flagged `verify_readwrite_double` as unused. Signed-off-by: Sven van Haastregt Signed-off-by: Sven van Haastregt --- test_conformance/pipes/test_pipe_read_write.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/test_conformance/pipes/test_pipe_read_write.cpp b/test_conformance/pipes/test_pipe_read_write.cpp index cb72e96b..425c7aee 100644 --- a/test_conformance/pipes/test_pipe_read_write.cpp +++ b/test_conformance/pipes/test_pipe_read_write.cpp @@ -414,9 +414,9 @@ static int verify_readwrite_ulong(void *ptr1, void *ptr2, int n) static int verify_readwrite_double(void *ptr1, void *ptr2, int n) { int i; - long long int sum_input = 0, sum_output = 0; - long long int *inptr = (long long int *)ptr1; - long long int *outptr = (long long int *)ptr2; + cl_long sum_input = 0, sum_output = 0; + cl_long *inptr = (cl_long *)ptr1; + cl_long *outptr = (cl_long *)ptr2; for(i = 0; i < n; i++) { @@ -1246,7 +1246,7 @@ int test_pipe_readwrite_double( cl_device_id deviceID, cl_context context, cl_co size_t min_alignment = get_min_alignment(context); - foo = verify_readwrite_long; + foo = verify_readwrite_double; ptrSizes[0] = sizeof(cl_double); ptrSizes[1] = ptrSizes[0] << 1; -- cgit v1.2.3 From 4b39b59469444d9085db302ab0d2dd5b07a9f257 Mon Sep 17 00:00:00 2001 From: Sven van Haastregt Date: Thu, 13 Oct 2022 10:01:53 +0100 Subject: [NFC] clang-format basic/test_progvar.cpp (#1528) Manually reformat the `prog_src` variable which contains kernel code and disable clang-format on it. Signed-off-by: Sven van Haastregt Signed-off-by: Sven van Haastregt --- test_conformance/basic/test_progvar.cpp | 1737 +++++++++++++++++++------------ 1 file changed, 1098 insertions(+), 639 deletions(-) diff --git a/test_conformance/basic/test_progvar.cpp b/test_conformance/basic/test_progvar.cpp index 9c872be5..e202d276 100644 --- a/test_conformance/basic/test_progvar.cpp +++ b/test_conformance/basic/test_progvar.cpp @@ -15,12 +15,13 @@ // #include "harness/compat.h" -// Bug: Missing in spec: atomic_intptr_t is always supported if device is 32-bits. +// Bug: Missing in spec: atomic_intptr_t is always supported if device is +// 32-bits. // Bug: Missing in spec: CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE #define FLUSH fflush(stdout) -#define MAX_STR 16*1024 +#define MAX_STR 16 * 1024 #define ALIGNMENT 128 @@ -66,7 +67,11 @@ static int l_host_is_big_endian = 1; static size_t l_max_global_id0 = 0; static cl_bool l_linker_available = false; -#define check_error(errCode,msg,...) ((errCode != CL_SUCCESS) ? (log_error("ERROR: " msg "! (%s:%d)\n", ## __VA_ARGS__, __FILE__, __LINE__), 1) : 0) +#define check_error(errCode, msg, ...) \ + ((errCode != CL_SUCCESS) ? (log_error("ERROR: " msg "! (%s:%d)\n", \ + ##__VA_ARGS__, __FILE__, __LINE__), \ + 1) \ + : 0) //////////////////// // Info about types we can use for program scope variables. @@ -75,110 +80,135 @@ static cl_bool l_linker_available = false; class TypeInfo { public: - TypeInfo() : - name(""), - m_buf_elem_type(""), - m_is_vecbase(false), - m_is_atomic(false), - m_is_like_size_t(false), - m_is_bool(false), - m_elem_type(0), m_num_elem(0), - m_size(0), - m_value_size(0) - {} - TypeInfo(const char* name_arg) : - name(name_arg), - m_buf_elem_type(name_arg), - m_is_vecbase(false), - m_is_atomic(false), - m_is_like_size_t(false), - m_is_bool(false), - m_elem_type(0), m_num_elem(0), - m_size(0), - m_value_size(0) - { } + TypeInfo() + : name(""), m_buf_elem_type(""), m_is_vecbase(false), + m_is_atomic(false), m_is_like_size_t(false), m_is_bool(false), + m_elem_type(0), m_num_elem(0), m_size(0), m_value_size(0) + {} + TypeInfo(const char* name_arg) + : name(name_arg), m_buf_elem_type(name_arg), m_is_vecbase(false), + m_is_atomic(false), m_is_like_size_t(false), m_is_bool(false), + m_elem_type(0), m_num_elem(0), m_size(0), m_value_size(0) + {} // Vectors - TypeInfo( TypeInfo* elem_type, int num_elem ) : - m_is_vecbase(false), - m_is_atomic(false), - m_is_like_size_t(false), - m_is_bool(false), - m_elem_type(elem_type), - m_num_elem(num_elem) - { - char the_name[10]; // long enough for longest vector type name "double16" - snprintf(the_name,sizeof(the_name),"%s%d",elem_type->get_name_c_str(),m_num_elem); + TypeInfo(TypeInfo* elem_type, int num_elem) + : m_is_vecbase(false), m_is_atomic(false), m_is_like_size_t(false), + m_is_bool(false), m_elem_type(elem_type), m_num_elem(num_elem) + { + char + the_name[10]; // long enough for longest vector type name "double16" + snprintf(the_name, sizeof(the_name), "%s%d", + elem_type->get_name_c_str(), m_num_elem); this->name = std::string(the_name); this->m_buf_elem_type = std::string(the_name); this->m_value_size = num_elem * elem_type->get_size(); - if ( m_num_elem == 3 ) { + if (m_num_elem == 3) + { this->m_size = 4 * elem_type->get_size(); - } else { + } + else + { this->m_size = num_elem * elem_type->get_size(); } } const std::string& get_name(void) const { return name; } const char* get_name_c_str(void) const { return name.c_str(); } - TypeInfo& set_vecbase(void) { this->m_is_vecbase = true; return *this; } - TypeInfo& set_atomic(void) { this->m_is_atomic = true; return *this; } - TypeInfo& set_like_size_t(void) { + TypeInfo& set_vecbase(void) + { + this->m_is_vecbase = true; + return *this; + } + TypeInfo& set_atomic(void) + { + this->m_is_atomic = true; + return *this; + } + TypeInfo& set_like_size_t(void) + { this->m_is_like_size_t = true; - this->set_size( l_64bit_device ? 8 : 4 ); + this->set_size(l_64bit_device ? 8 : 4); this->m_buf_elem_type = l_64bit_device ? "ulong" : "uint"; return *this; } - TypeInfo& set_bool(void) { this->m_is_bool = true; return *this; } - TypeInfo& set_size(size_t n) { this->m_value_size = this->m_size = n; return *this; } - TypeInfo& set_buf_elem_type( const char* name ) { this->m_buf_elem_type = std::string(name); return *this; } + TypeInfo& set_bool(void) + { + this->m_is_bool = true; + return *this; + } + TypeInfo& set_size(size_t n) + { + this->m_value_size = this->m_size = n; + return *this; + } + TypeInfo& set_buf_elem_type(const char* name) + { + this->m_buf_elem_type = std::string(name); + return *this; + } const TypeInfo* elem_type(void) const { return m_elem_type; } int num_elem(void) const { return m_num_elem; } - bool is_vecbase(void) const {return m_is_vecbase;} - bool is_atomic(void) const {return m_is_atomic;} - bool is_atomic_64bit(void) const {return m_is_atomic && m_size == 8;} - bool is_like_size_t(void) const {return m_is_like_size_t;} - bool is_bool(void) const {return m_is_bool;} - size_t get_size(void) const {return m_size;} - size_t get_value_size(void) const {return m_value_size;} + bool is_vecbase(void) const { return m_is_vecbase; } + bool is_atomic(void) const { return m_is_atomic; } + bool is_atomic_64bit(void) const { return m_is_atomic && m_size == 8; } + bool is_like_size_t(void) const { return m_is_like_size_t; } + bool is_bool(void) const { return m_is_bool; } + size_t get_size(void) const { return m_size; } + size_t get_value_size(void) const { return m_value_size; } // When passing values of this type to a kernel, what buffer type // should be used? - const char* get_buf_elem_type(void) const { return m_buf_elem_type.c_str(); } + const char* get_buf_elem_type(void) const + { + return m_buf_elem_type.c_str(); + } - std::string as_string(const cl_uchar* value_ptr) const { + std::string as_string(const cl_uchar* value_ptr) const + { // This method would be shorter if I had a real handle to element // vector type. - if ( this->is_bool() ) { - std::string result( name ); + if (this->is_bool()) + { + std::string result(name); result += "<"; result += (*value_ptr ? "true" : "false"); result += ", "; char buf[10]; - sprintf(buf,"%02x",*value_ptr); + sprintf(buf, "%02x", *value_ptr); result += buf; result += ">"; return result; - } else if ( this->num_elem() ) { - std::string result( name ); + } + else if (this->num_elem()) + { + std::string result(name); result += "<"; - for ( unsigned ielem = 0 ; ielem < this->num_elem() ; ielem++ ) { + for (unsigned ielem = 0; ielem < this->num_elem(); ielem++) + { char buf[MAX_STR]; - if ( ielem ) result += ", "; - for ( unsigned ibyte = 0; ibyte < this->m_elem_type->get_size() ; ibyte++ ) { - sprintf(buf + 2*ibyte,"%02x", value_ptr[ ielem * this->m_elem_type->get_size() + ibyte ] ); + if (ielem) result += ", "; + for (unsigned ibyte = 0; ibyte < this->m_elem_type->get_size(); + ibyte++) + { + sprintf(buf + 2 * ibyte, "%02x", + value_ptr[ielem * this->m_elem_type->get_size() + + ibyte]); } result += buf; } result += ">"; return result; - } else { - std::string result( name ); + } + else + { + std::string result(name); result += "<"; char buf[MAX_STR]; - for ( unsigned ibyte = 0; ibyte < this->get_size() ; ibyte++ ) { - sprintf(buf + 2*ibyte,"%02x", value_ptr[ ibyte ] ); + for (unsigned ibyte = 0; ibyte < this->get_size(); ibyte++) + { + sprintf(buf + 2 * ibyte, "%02x", value_ptr[ibyte]); } result += buf; result += ">"; @@ -189,51 +219,71 @@ public: // Initialize the given buffer to a constant value initialized as if it // were from the INIT_VAR macro below. // Only needs to support values 0 and 1. - void init( cl_uchar* buf, cl_uchar val) const { - if ( this->num_elem() ) { - for ( unsigned ielem = 0 ; ielem < this->num_elem() ; ielem++ ) { + void init(cl_uchar* buf, cl_uchar val) const + { + if (this->num_elem()) + { + for (unsigned ielem = 0; ielem < this->num_elem(); ielem++) + { // Delegate! - this->init_elem( buf + ielem * this->get_value_size()/this->num_elem(), val ); + this->init_elem( + buf + ielem * this->get_value_size() / this->num_elem(), + val); } - } else { - init_elem( buf, val ); + } + else + { + init_elem(buf, val); } } private: - void init_elem( cl_uchar* buf, cl_uchar val ) const { - size_t elem_size = this->num_elem() ? this->get_value_size()/this->num_elem() : this->get_size(); - memset(buf,0,elem_size); - if ( val ) { - if ( strstr( name.c_str(), "float" ) ) { + void init_elem(cl_uchar* buf, cl_uchar val) const + { + size_t elem_size = this->num_elem() + ? this->get_value_size() / this->num_elem() + : this->get_size(); + memset(buf, 0, elem_size); + if (val) + { + if (strstr(name.c_str(), "float")) + { *(float*)buf = (float)val; return; } - if ( strstr( name.c_str(), "double" ) ) { + if (strstr(name.c_str(), "double")) + { *(double*)buf = (double)val; return; } - if ( this->is_bool() ) { *buf = (bool)val; return; } + if (this->is_bool()) + { + *buf = (bool)val; + return; + } // Write a single character value to the correct spot, // depending on host endianness. - if ( l_host_is_big_endian ) *(buf + elem_size-1) = (cl_uchar)val; - else *buf = (cl_uchar)val; + if (l_host_is_big_endian) + *(buf + elem_size - 1) = (cl_uchar)val; + else + *buf = (cl_uchar)val; } } -public: - void dump(FILE* fp) const { - fprintf(fp,"Type %s : <%d,%d,%s> ", name.c_str(), - (int)m_size, - (int)m_value_size, - m_buf_elem_type.c_str() ); - if ( this->m_elem_type ) fprintf(fp, " vec(%s,%d)", this->m_elem_type->get_name_c_str(), this->num_elem() ); - if ( this->m_is_vecbase ) fprintf(fp, " vecbase"); - if ( this->m_is_bool ) fprintf(fp, " bool"); - if ( this->m_is_like_size_t ) fprintf(fp, " like-size_t"); - if ( this->m_is_atomic ) fprintf(fp, " atomic"); - fprintf(fp,"\n"); +public: + void dump(FILE* fp) const + { + fprintf(fp, "Type %s : <%d,%d,%s> ", name.c_str(), (int)m_size, + (int)m_value_size, m_buf_elem_type.c_str()); + if (this->m_elem_type) + fprintf(fp, " vec(%s,%d)", this->m_elem_type->get_name_c_str(), + this->num_elem()); + if (this->m_is_vecbase) fprintf(fp, " vecbase"); + if (this->m_is_bool) fprintf(fp, " bool"); + if (this->m_is_like_size_t) fprintf(fp, " like-size_t"); + if (this->m_is_atomic) fprintf(fp, " atomic"); + fprintf(fp, "\n"); fflush(fp); } @@ -246,7 +296,8 @@ private: bool m_is_like_size_t; bool m_is_bool; size_t m_size; // Number of bytes of storage occupied by this type. - size_t m_value_size; // Number of bytes of value significant for this type. Differs for vec3. + size_t m_value_size; // Number of bytes of value significant for this type. + // Differs for vec3. // When passing values of this type to a kernel, what buffer type // should be used? @@ -256,46 +307,65 @@ private: }; -#define NUM_SCALAR_TYPES (8+2) // signed and unsigned integral types, float and double -#define NUM_VECTOR_SIZES (5) // 2,3,4,8,16 -#define NUM_PLAIN_TYPES \ - 5 /*boolean and size_t family */ \ - + NUM_SCALAR_TYPES \ - + NUM_SCALAR_TYPES*NUM_VECTOR_SIZES \ - + 10 /* atomic types */ +#define NUM_SCALAR_TYPES \ + (8 + 2) // signed and unsigned integral types, float and double +#define NUM_VECTOR_SIZES (5) // 2,3,4,8,16 +#define NUM_PLAIN_TYPES \ + 5 /*boolean and size_t family */ \ + + NUM_SCALAR_TYPES + NUM_SCALAR_TYPES* NUM_VECTOR_SIZES \ + + 10 /* atomic types */ // Need room for plain, array, pointer, struct -#define MAX_TYPES (4*NUM_PLAIN_TYPES) +#define MAX_TYPES (4 * NUM_PLAIN_TYPES) static TypeInfo type_info[MAX_TYPES]; static int num_type_info = 0; // Number of valid entries in type_info[] - - // A helper class to form kernel source arguments for clCreateProgramWithSource. class StringTable { public: - StringTable() : m_c_strs(NULL), m_lengths(NULL), m_frozen(false), m_strings() {} + StringTable(): m_c_strs(NULL), m_lengths(NULL), m_frozen(false), m_strings() + {} ~StringTable() { release_frozen(); } - void add(std::string s) { release_frozen(); m_strings.push_back(s); } + void add(std::string s) + { + release_frozen(); + m_strings.push_back(s); + } - const size_t num_str() { freeze(); return m_strings.size(); } - const char** strs() { freeze(); return m_c_strs; } - const size_t* lengths() { freeze(); return m_lengths; } + const size_t num_str() + { + freeze(); + return m_strings.size(); + } + const char** strs() + { + freeze(); + return m_c_strs; + } + const size_t* lengths() + { + freeze(); + return m_lengths; + } private: - void freeze(void) { - if ( !m_frozen ) { + void freeze(void) + { + if (!m_frozen) + { release_frozen(); - m_c_strs = (const char**) malloc(sizeof(const char*) * m_strings.size()); - m_lengths = (size_t*) malloc(sizeof(size_t) * m_strings.size()); - assert( m_c_strs ); - assert( m_lengths ); + m_c_strs = + (const char**)malloc(sizeof(const char*) * m_strings.size()); + m_lengths = (size_t*)malloc(sizeof(size_t) * m_strings.size()); + assert(m_c_strs); + assert(m_lengths); - for ( size_t i = 0; i < m_strings.size() ; i++ ) { + for (size_t i = 0; i < m_strings.size(); i++) + { m_c_strs[i] = m_strings[i].c_str(); m_lengths[i] = strlen(m_c_strs[i]); } @@ -303,9 +373,18 @@ private: m_frozen = true; } } - void release_frozen(void) { - if ( m_c_strs ) { free(m_c_strs); m_c_strs = 0; } - if ( m_lengths ) { free(m_lengths); m_lengths = 0; } + void release_frozen(void) + { + if (m_c_strs) + { + free(m_c_strs); + m_c_strs = 0; + } + if (m_lengths) + { + free(m_lengths); + m_lengths = 0; + } m_frozen = false; } @@ -325,11 +404,15 @@ static const char* l_get_fp64_pragma(void); static const char* l_get_cles_int64_pragma(void); static int l_build_type_table(cl_device_id device); -static int l_get_device_info(cl_device_id device, size_t* max_size_ret, size_t* pref_size_ret); +static int l_get_device_info(cl_device_id device, size_t* max_size_ret, + size_t* pref_size_ret); -static void l_set_randomly( cl_uchar* buf, size_t buf_size, RandomSeed& rand_state ); -static int l_compare( const cl_uchar* expected, const cl_uchar* received, unsigned num_values, const TypeInfo&ti ); -static int l_copy( cl_uchar* dest, unsigned dest_idx, const cl_uchar* src, unsigned src_idx, const TypeInfo&ti ); +static void l_set_randomly(cl_uchar* buf, size_t buf_size, + RandomSeed& rand_state); +static int l_compare(const cl_uchar* expected, const cl_uchar* received, + unsigned num_values, const TypeInfo& ti); +static int l_copy(cl_uchar* dest, unsigned dest_idx, const cl_uchar* src, + unsigned src_idx, const TypeInfo& ti); static std::string conversion_functions(const TypeInfo& ti); static std::string global_decls(const TypeInfo& ti, bool with_init); @@ -337,90 +420,123 @@ static std::string global_check_function(const TypeInfo& ti); static std::string writer_function(const TypeInfo& ti); static std::string reader_function(const TypeInfo& ti); -static int l_write_read( cl_device_id device, cl_context context, cl_command_queue queue ); -static int l_write_read_for_type( cl_device_id device, cl_context context, cl_command_queue queue, const TypeInfo& ti, RandomSeed& rand_state ); - -static int l_init_write_read( cl_device_id device, cl_context context, cl_command_queue queue ); -static int l_init_write_read_for_type( cl_device_id device, cl_context context, cl_command_queue queue, const TypeInfo& ti, RandomSeed& rand_state ); +static int l_write_read(cl_device_id device, cl_context context, + cl_command_queue queue); +static int l_write_read_for_type(cl_device_id device, cl_context context, + cl_command_queue queue, const TypeInfo& ti, + RandomSeed& rand_state); -static int l_capacity( cl_device_id device, cl_context context, cl_command_queue queue, size_t max_size ); -static int l_user_type( cl_device_id device, cl_context context, cl_command_queue queue, size_t max_size, bool separate_compilation ); +static int l_init_write_read(cl_device_id device, cl_context context, + cl_command_queue queue); +static int l_init_write_read_for_type(cl_device_id device, cl_context context, + cl_command_queue queue, + const TypeInfo& ti, + RandomSeed& rand_state); +static int l_capacity(cl_device_id device, cl_context context, + cl_command_queue queue, size_t max_size); +static int l_user_type(cl_device_id device, cl_context context, + cl_command_queue queue, size_t max_size, + bool separate_compilation); //////////////////// // File scope function definitions -static cl_int print_build_log(cl_program program, cl_uint num_devices, cl_device_id *device_list, cl_uint count, const char **strings, const size_t *lengths, const char* options) +static cl_int print_build_log(cl_program program, cl_uint num_devices, + cl_device_id* device_list, cl_uint count, + const char** strings, const size_t* lengths, + const char* options) { cl_uint i; cl_int error; BufferOwningPtr devices; - if(num_devices == 0 || device_list == NULL) + if (num_devices == 0 || device_list == NULL) { - error = clGetProgramInfo(program, CL_PROGRAM_NUM_DEVICES, sizeof(num_devices), &num_devices, NULL); + error = clGetProgramInfo(program, CL_PROGRAM_NUM_DEVICES, + sizeof(num_devices), &num_devices, NULL); test_error(error, "clGetProgramInfo CL_PROGRAM_NUM_DEVICES failed"); - device_list = (cl_device_id*)malloc(sizeof(cl_device_id)*num_devices); + device_list = (cl_device_id*)malloc(sizeof(cl_device_id) * num_devices); devices.reset(device_list); memset(device_list, 0, sizeof(cl_device_id) * num_devices); - error = clGetProgramInfo(program, CL_PROGRAM_DEVICES, sizeof(cl_device_id) * num_devices, device_list, NULL); + error = clGetProgramInfo(program, CL_PROGRAM_DEVICES, + sizeof(cl_device_id) * num_devices, + device_list, NULL); test_error(error, "clGetProgramInfo CL_PROGRAM_DEVICES failed"); } cl_uint z; bool sourcePrinted = false; - for(z = 0; z < num_devices; z++) + for (z = 0; z < num_devices; z++) { char deviceName[4096] = ""; - error = clGetDeviceInfo(device_list[z], CL_DEVICE_NAME, sizeof(deviceName), deviceName, NULL); - check_error(error, "Device \"%d\" failed to return a name. clGetDeviceInfo CL_DEVICE_NAME failed", z); + error = clGetDeviceInfo(device_list[z], CL_DEVICE_NAME, + sizeof(deviceName), deviceName, NULL); + check_error(error, + "Device \"%d\" failed to return a name. clGetDeviceInfo " + "CL_DEVICE_NAME failed", + z); cl_build_status buildStatus; - error = clGetProgramBuildInfo(program, device_list[z], CL_PROGRAM_BUILD_STATUS, sizeof(buildStatus), &buildStatus, NULL); - check_error(error, "clGetProgramBuildInfo CL_PROGRAM_BUILD_STATUS failed"); + error = clGetProgramBuildInfo(program, device_list[z], + CL_PROGRAM_BUILD_STATUS, + sizeof(buildStatus), &buildStatus, NULL); + check_error(error, + "clGetProgramBuildInfo CL_PROGRAM_BUILD_STATUS failed"); - if(buildStatus != CL_BUILD_SUCCESS) + if (buildStatus != CL_BUILD_SUCCESS) { - if(!sourcePrinted) + if (!sourcePrinted) { log_error("Build options: %s\n", options); - if(count && strings) + if (count && strings) { log_error("Original source is: ------------\n"); - for(i = 0; i < count; i++) log_error("%s", strings[i]); + for (i = 0; i < count; i++) log_error("%s", strings[i]); } sourcePrinted = true; } char statusString[64] = ""; if (buildStatus == (cl_build_status)CL_BUILD_SUCCESS) - sprintf(statusString, "CL_BUILD_SUCCESS"); + sprintf(statusString, "CL_BUILD_SUCCESS"); else if (buildStatus == (cl_build_status)CL_BUILD_NONE) - sprintf(statusString, "CL_BUILD_NONE"); + sprintf(statusString, "CL_BUILD_NONE"); else if (buildStatus == (cl_build_status)CL_BUILD_ERROR) - sprintf(statusString, "CL_BUILD_ERROR"); + sprintf(statusString, "CL_BUILD_ERROR"); else if (buildStatus == (cl_build_status)CL_BUILD_IN_PROGRESS) - sprintf(statusString, "CL_BUILD_IN_PROGRESS"); + sprintf(statusString, "CL_BUILD_IN_PROGRESS"); else - sprintf(statusString, "UNKNOWN (%d)", buildStatus); + sprintf(statusString, "UNKNOWN (%d)", buildStatus); - log_error("Build not successful for device \"%s\", status: %s\n", deviceName, statusString); + log_error("Build not successful for device \"%s\", status: %s\n", + deviceName, statusString); size_t paramSize = 0; - error = clGetProgramBuildInfo(program, device_list[z], CL_PROGRAM_BUILD_LOG, 0, NULL, ¶mSize); - if(check_error(error, "clGetProgramBuildInfo CL_PROGRAM_BUILD_LOG failed")) break; + error = clGetProgramBuildInfo(program, device_list[z], + CL_PROGRAM_BUILD_LOG, 0, NULL, + ¶mSize); + if (check_error( + error, "clGetProgramBuildInfo CL_PROGRAM_BUILD_LOG failed")) + break; std::string log; - log.resize(paramSize/sizeof(char)); - - error = clGetProgramBuildInfo(program, device_list[z], CL_PROGRAM_BUILD_LOG, paramSize, &log[0], NULL); - if(check_error(error, "Device %d (%s) failed to return a build log", z, deviceName)) break; - if(log[0] == 0) log_error("clGetProgramBuildInfo returned an empty log.\n"); + log.resize(paramSize / sizeof(char)); + + error = clGetProgramBuildInfo(program, device_list[z], + CL_PROGRAM_BUILD_LOG, paramSize, + &log[0], NULL); + if (check_error(error, + "Device %d (%s) failed to return a build log", z, + deviceName)) + break; + if (log[0] == 0) + log_error("clGetProgramBuildInfo returned an empty log.\n"); else { log_error("Build log:\n", deviceName); @@ -433,25 +549,29 @@ static cl_int print_build_log(cl_program program, cl_uint num_devices, cl_device static void l_load_abilities(cl_device_id device) { - l_has_half = is_extension_available(device,"cl_khr_fp16"); - l_has_double = is_extension_available(device,"cl_khr_fp64"); - l_has_cles_int64 = is_extension_available(device,"cles_khr_int64"); + l_has_half = is_extension_available(device, "cl_khr_fp16"); + l_has_double = is_extension_available(device, "cl_khr_fp64"); + l_has_cles_int64 = is_extension_available(device, "cles_khr_int64"); - l_has_int64_atomics - = is_extension_available(device,"cl_khr_int64_base_atomics") - && is_extension_available(device,"cl_khr_int64_extended_atomics"); + l_has_int64_atomics = + is_extension_available(device, "cl_khr_int64_base_atomics") + && is_extension_available(device, "cl_khr_int64_extended_atomics"); { int status = CL_SUCCESS; cl_uint addr_bits = 32; - status = clGetDeviceInfo(device,CL_DEVICE_ADDRESS_BITS,sizeof(addr_bits),&addr_bits,0); - l_64bit_device = ( status == CL_SUCCESS && addr_bits == 64 ); + status = clGetDeviceInfo(device, CL_DEVICE_ADDRESS_BITS, + sizeof(addr_bits), &addr_bits, 0); + l_64bit_device = (status == CL_SUCCESS && addr_bits == 64); } // 32-bit devices always have intptr atomics. l_has_intptr_atomics = !l_64bit_device || l_has_int64_atomics; - union { char c[4]; int i; } probe; + union { + char c[4]; + int i; + } probe; probe.i = 1; l_host_is_big_endian = !probe.c[0]; @@ -459,33 +579,40 @@ static void l_load_abilities(cl_device_id device) { int status = CL_SUCCESS; cl_uint max_dim = 0; - status = clGetDeviceInfo(device,CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS,sizeof(max_dim),&max_dim,0); - assert( status == CL_SUCCESS ); - assert( max_dim > 0 ); + status = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, + sizeof(max_dim), &max_dim, 0); + assert(status == CL_SUCCESS); + assert(max_dim > 0); size_t max_id[3]; max_id[0] = 0; - status = clGetDeviceInfo(device,CL_DEVICE_MAX_WORK_ITEM_SIZES,max_dim*sizeof(size_t),&max_id[0],0); - assert( status == CL_SUCCESS ); + status = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, + max_dim * sizeof(size_t), &max_id[0], 0); + assert(status == CL_SUCCESS); l_max_global_id0 = max_id[0]; } { // Is separate compilation supported? int status = CL_SUCCESS; l_linker_available = false; - status = clGetDeviceInfo(device,CL_DEVICE_LINKER_AVAILABLE,sizeof(l_linker_available),&l_linker_available,0); - assert( status == CL_SUCCESS ); + status = + clGetDeviceInfo(device, CL_DEVICE_LINKER_AVAILABLE, + sizeof(l_linker_available), &l_linker_available, 0); + assert(status == CL_SUCCESS); } } static const char* l_get_fp64_pragma(void) { - return l_has_double ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n" : ""; + return l_has_double ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n" + : ""; } static const char* l_get_cles_int64_pragma(void) { - return l_has_cles_int64 ? "#pragma OPENCL EXTENSION cles_khr_int64 : enable\n" : ""; + return l_has_cles_int64 + ? "#pragma OPENCL EXTENSION cles_khr_int64 : enable\n" + : ""; } static const char* l_get_int64_atomic_pragma(void) @@ -500,89 +627,83 @@ static int l_build_type_table(cl_device_id device) size_t iscalar = 0; size_t ivecsize = 0; int vecsizes[] = { 2, 3, 4, 8, 16 }; - const char* vecbase[] = { - "uchar", "char", - "ushort", "short", - "uint", "int", - "ulong", "long", - "float", - "double" - }; - int vecbase_size[] = { - 1, 1, - 2, 2, - 4, 4, - 8, 8, - 4, - 8 - }; - const char* like_size_t[] = { - "intptr_t", - "uintptr_t", - "size_t", - "ptrdiff_t" - }; + const char* vecbase[] = { "uchar", "char", "ushort", "short", "uint", + "int", "ulong", "long", "float", "double" }; + int vecbase_size[] = { 1, 1, 2, 2, 4, 4, 8, 8, 4, 8 }; + const char* like_size_t[] = { "intptr_t", "uintptr_t", "size_t", + "ptrdiff_t" }; const char* atomics[] = { - "atomic_int", "atomic_uint", - "atomic_long", "atomic_ulong", - "atomic_float", - "atomic_double", - }; - int atomics_size[] = { - 4, 4, - 8, 8, - 4, - 8 - }; - const char* intptr_atomics[] = { - "atomic_intptr_t", - "atomic_uintptr_t", - "atomic_size_t", - "atomic_ptrdiff_t" + "atomic_int", "atomic_uint", "atomic_long", + "atomic_ulong", "atomic_float", "atomic_double", }; + int atomics_size[] = { 4, 4, 8, 8, 4, 8 }; + const char* intptr_atomics[] = { "atomic_intptr_t", "atomic_uintptr_t", + "atomic_size_t", "atomic_ptrdiff_t" }; l_load_abilities(device); num_type_info = 0; // Boolean. - type_info[ num_type_info++ ] = TypeInfo( "bool" ).set_bool().set_size(1).set_buf_elem_type("uchar"); + type_info[num_type_info++] = + TypeInfo("bool").set_bool().set_size(1).set_buf_elem_type("uchar"); // Vector types, and the related scalar element types. - for ( iscalar=0; iscalar < sizeof(vecbase)/sizeof(vecbase[0]) ; ++iscalar ) { - if ( !gHasLong && strstr(vecbase[iscalar],"long") ) continue; - if ( !l_has_double && strstr(vecbase[iscalar],"double") ) continue; + for (iscalar = 0; iscalar < sizeof(vecbase) / sizeof(vecbase[0]); ++iscalar) + { + if (!gHasLong && strstr(vecbase[iscalar], "long")) continue; + if (!l_has_double && strstr(vecbase[iscalar], "double")) continue; // Scalar TypeInfo* elem_type = type_info + num_type_info++; - *elem_type = TypeInfo( vecbase[iscalar] ).set_vecbase().set_size( vecbase_size[iscalar] ); + *elem_type = TypeInfo(vecbase[iscalar]) + .set_vecbase() + .set_size(vecbase_size[iscalar]); // Vector - for ( ivecsize=0; ivecsize < sizeof(vecsizes)/sizeof(vecsizes[0]) ; ivecsize++ ) { - type_info[ num_type_info++ ] = TypeInfo( elem_type, vecsizes[ivecsize] ); + for (ivecsize = 0; ivecsize < sizeof(vecsizes) / sizeof(vecsizes[0]); + ivecsize++) + { + type_info[num_type_info++] = + TypeInfo(elem_type, vecsizes[ivecsize]); } } // Size_t-like types - for ( iscalar=0; iscalar < sizeof(like_size_t)/sizeof(like_size_t[0]) ; ++iscalar ) { - type_info[ num_type_info++ ] = TypeInfo( like_size_t[iscalar] ).set_like_size_t(); + for (iscalar = 0; iscalar < sizeof(like_size_t) / sizeof(like_size_t[0]); + ++iscalar) + { + type_info[num_type_info++] = + TypeInfo(like_size_t[iscalar]).set_like_size_t(); } // Atomic types. - for ( iscalar=0; iscalar < sizeof(atomics)/sizeof(atomics[0]) ; ++iscalar ) { - if ( !l_has_int64_atomics && strstr(atomics[iscalar],"long") ) continue; - if ( !(l_has_int64_atomics && l_has_double) && strstr(atomics[iscalar],"double") ) continue; + for (iscalar = 0; iscalar < sizeof(atomics) / sizeof(atomics[0]); ++iscalar) + { + if (!l_has_int64_atomics && strstr(atomics[iscalar], "long")) continue; + if (!(l_has_int64_atomics && l_has_double) + && strstr(atomics[iscalar], "double")) + continue; // The +7 is used to skip over the "atomic_" prefix. const char* buf_type = atomics[iscalar] + 7; - type_info[ num_type_info++ ] = TypeInfo( atomics[iscalar] ).set_atomic().set_size( atomics_size[iscalar] ).set_buf_elem_type( buf_type ); + type_info[num_type_info++] = TypeInfo(atomics[iscalar]) + .set_atomic() + .set_size(atomics_size[iscalar]) + .set_buf_elem_type(buf_type); } - if ( l_has_intptr_atomics ) { - for ( iscalar=0; iscalar < sizeof(intptr_atomics)/sizeof(intptr_atomics[0]) ; ++iscalar ) { - type_info[ num_type_info++ ] = TypeInfo( intptr_atomics[iscalar] ).set_atomic().set_like_size_t(); + if (l_has_intptr_atomics) + { + for (iscalar = 0; + iscalar < sizeof(intptr_atomics) / sizeof(intptr_atomics[0]); + ++iscalar) + { + type_info[num_type_info++] = TypeInfo(intptr_atomics[iscalar]) + .set_atomic() + .set_like_size_t(); } } - assert( num_type_info <= MAX_TYPES ); // or increase MAX_TYPES + assert(num_type_info <= MAX_TYPES); // or increase MAX_TYPES #if 0 for ( size_t i = 0 ; i < num_type_info ; i++ ) { @@ -594,7 +715,7 @@ static int l_build_type_table(cl_device_id device) return status; } -static const TypeInfo& l_find_type( const char* name ) +static const TypeInfo& l_find_type(const char* name) { auto itr = std::find_if(type_info, type_info + num_type_info, @@ -604,36 +725,54 @@ static const TypeInfo& l_find_type( const char* name ) } +// Populate return parameters for max program variable size, preferred program +// variable size. -// Populate return parameters for max program variable size, preferred program variable size. - -static int l_get_device_info(cl_device_id device, size_t* max_size_ret, size_t* pref_size_ret) +static int l_get_device_info(cl_device_id device, size_t* max_size_ret, + size_t* pref_size_ret) { int err = CL_SUCCESS; size_t return_size = 0; - err = clGetDeviceInfo(device, CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE, sizeof(*max_size_ret), max_size_ret, &return_size); - if ( err != CL_SUCCESS ) { - log_error("Error: Failed to get device info for CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE\n"); + err = clGetDeviceInfo(device, CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE, + sizeof(*max_size_ret), max_size_ret, &return_size); + if (err != CL_SUCCESS) + { + log_error("Error: Failed to get device info for " + "CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE\n"); return err; } - if ( return_size != sizeof(size_t) ) { - log_error("Error: Invalid size %d returned for CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE\n", (int)return_size ); + if (return_size != sizeof(size_t)) + { + log_error("Error: Invalid size %d returned for " + "CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE\n", + (int)return_size); return 1; } - if ( return_size != sizeof(size_t) ) { - log_error("Error: Invalid size %d returned for CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE\n", (int)return_size ); + if (return_size != sizeof(size_t)) + { + log_error("Error: Invalid size %d returned for " + "CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE\n", + (int)return_size); return 1; } return_size = 0; - err = clGetDeviceInfo(device, CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE, sizeof(*pref_size_ret), pref_size_ret, &return_size); - if ( err != CL_SUCCESS ) { - log_error("Error: Failed to get device info for CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE: %d\n",err); + err = + clGetDeviceInfo(device, CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE, + sizeof(*pref_size_ret), pref_size_ret, &return_size); + if (err != CL_SUCCESS) + { + log_error("Error: Failed to get device info for " + "CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE: %d\n", + err); return err; } - if ( return_size != sizeof(size_t) ) { - log_error("Error: Invalid size %d returned for CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE\n", (int)return_size ); + if (return_size != sizeof(size_t)) + { + log_error("Error: Invalid size %d returned for " + "CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE\n", + (int)return_size); return 1; } @@ -641,11 +780,13 @@ static int l_get_device_info(cl_device_id device, size_t* max_size_ret, size_t* } -static void l_set_randomly( cl_uchar* buf, size_t buf_size, RandomSeed& rand_state ) +static void l_set_randomly(cl_uchar* buf, size_t buf_size, + RandomSeed& rand_state) { - assert( 0 == (buf_size % sizeof(cl_uint) ) ); - for ( size_t i = 0; i < buf_size ; i += sizeof(cl_uint) ) { - *( (cl_uint*)(buf + i) ) = genrand_int32( rand_state ); + assert(0 == (buf_size % sizeof(cl_uint))); + for (size_t i = 0; i < buf_size; i += sizeof(cl_uint)) + { + *((cl_uint*)(buf + i)) = genrand_int32(rand_state); } #if 0 for ( size_t i = 0; i < buf_size ; i++ ) { @@ -657,20 +798,23 @@ static void l_set_randomly( cl_uchar* buf, size_t buf_size, RandomSeed& rand_sta // Return num_value values of the given type. // Returns CL_SUCCESS if they compared as equal. -static int l_compare( const char* test_name, const cl_uchar* expected, const cl_uchar* received, size_t num_values, const TypeInfo&ti ) +static int l_compare(const char* test_name, const cl_uchar* expected, + const cl_uchar* received, size_t num_values, + const TypeInfo& ti) { // Compare only the valid returned bytes. - for ( unsigned value_idx = 0; value_idx < num_values; value_idx++ ) { + for (unsigned value_idx = 0; value_idx < num_values; value_idx++) + { const cl_uchar* expv = expected + value_idx * ti.get_size(); const cl_uchar* gotv = received + value_idx * ti.get_size(); - if ( memcmp( expv, gotv, ti.get_value_size() ) ) { - std::string exp_str = ti.as_string( expv ); - std::string got_str = ti.as_string( gotv ); - log_error("Error: %s test for type %s, at index %d: Expected %s got %s\n", - test_name, - ti.get_name_c_str(), value_idx, - exp_str.c_str(), - got_str.c_str() ); + if (memcmp(expv, gotv, ti.get_value_size())) + { + std::string exp_str = ti.as_string(expv); + std::string got_str = ti.as_string(gotv); + log_error( + "Error: %s test for type %s, at index %d: Expected %s got %s\n", + test_name, ti.get_name_c_str(), value_idx, exp_str.c_str(), + got_str.c_str()); return 1; } } @@ -678,11 +822,12 @@ static int l_compare( const char* test_name, const cl_uchar* expected, const cl_ } // Copy a target value from src[idx] to dest[idx] -static int l_copy( cl_uchar* dest, unsigned dest_idx, const cl_uchar* src, unsigned src_idx, const TypeInfo&ti ) +static int l_copy(cl_uchar* dest, unsigned dest_idx, const cl_uchar* src, + unsigned src_idx, const TypeInfo& ti) { - cl_uchar* raw_dest = dest + dest_idx * ti.get_size(); - const cl_uchar* raw_src = src + src_idx * ti.get_size(); - memcpy( raw_dest, raw_src, ti.get_value_size() ); + cl_uchar* raw_dest = dest + dest_idx * ti.get_size(); + const cl_uchar* raw_src = src + src_idx * ti.get_size(); + memcpy(raw_dest, raw_src, ti.get_value_size()); return 0; } @@ -694,59 +839,70 @@ static std::string conversion_functions(const TypeInfo& ti) static char buf[MAX_STR]; int num_printed = 0; // The atomic types just use the base type. - if ( ti.is_atomic() || 0 == strcmp( ti.get_buf_elem_type(), ti.get_name_c_str() ) ) { + if (ti.is_atomic() + || 0 == strcmp(ti.get_buf_elem_type(), ti.get_name_c_str())) + { // The type is represented in a buffer by itself. - num_printed = snprintf(buf,MAX_STR, - "%s from_buf(%s a) { return a; }\n" - "%s to_buf(%s a) { return a; }\n", - ti.get_buf_elem_type(), ti.get_buf_elem_type(), - ti.get_buf_elem_type(), ti.get_buf_elem_type() ); - } else { + num_printed = snprintf(buf, MAX_STR, + "%s from_buf(%s a) { return a; }\n" + "%s to_buf(%s a) { return a; }\n", + ti.get_buf_elem_type(), ti.get_buf_elem_type(), + ti.get_buf_elem_type(), ti.get_buf_elem_type()); + } + else + { // Just use C-style cast. - num_printed = snprintf(buf,MAX_STR, - "%s from_buf(%s a) { return (%s)a; }\n" - "%s to_buf(%s a) { return (%s)a; }\n", - ti.get_name_c_str(), ti.get_buf_elem_type(), ti.get_name_c_str(), - ti.get_buf_elem_type(), ti.get_name_c_str(), ti.get_buf_elem_type() ); + num_printed = snprintf(buf, MAX_STR, + "%s from_buf(%s a) { return (%s)a; }\n" + "%s to_buf(%s a) { return (%s)a; }\n", + ti.get_name_c_str(), ti.get_buf_elem_type(), + ti.get_name_c_str(), ti.get_buf_elem_type(), + ti.get_name_c_str(), ti.get_buf_elem_type()); } // Add initializations. - if ( ti.is_atomic() ) { - num_printed += snprintf( buf + num_printed, MAX_STR-num_printed, - "#define INIT_VAR(a) ATOMIC_VAR_INIT(a)\n" ); - } else { + if (ti.is_atomic()) + { + num_printed += snprintf(buf + num_printed, MAX_STR - num_printed, + "#define INIT_VAR(a) ATOMIC_VAR_INIT(a)\n"); + } + else + { // This cast works even if the target type is a vector type. - num_printed += snprintf( buf + num_printed, MAX_STR-num_printed, - "#define INIT_VAR(a) ((%s)(a))\n", ti.get_name_c_str()); + num_printed += + snprintf(buf + num_printed, MAX_STR - num_printed, + "#define INIT_VAR(a) ((%s)(a))\n", ti.get_name_c_str()); } - assert( num_printed < MAX_STR ); // or increase MAX_STR + assert(num_printed < MAX_STR); // or increase MAX_STR result = buf; return result; } -static std::string global_decls(const TypeInfo& ti, bool with_init ) +static std::string global_decls(const TypeInfo& ti, bool with_init) { const char* tn = ti.get_name_c_str(); const char* vol = (ti.is_atomic() ? " volatile " : " "); static char decls[MAX_STR]; int num_printed = 0; - if ( with_init ) { - const char *decls_template_with_init = + if (with_init) + { + const char* decls_template_with_init = "%s %s var = INIT_VAR(0);\n" "global %s %s g_var = INIT_VAR(1);\n" "%s %s a_var[2] = { INIT_VAR(1), INIT_VAR(1) };\n" "volatile global %s %s* p_var = &a_var[1];\n\n"; - num_printed = snprintf(decls,sizeof(decls),decls_template_with_init, - vol,tn,vol,tn,vol,tn,vol,tn); - } else { - const char *decls_template_no_init = - "%s %s var;\n" - "global %s %s g_var;\n" - "%s %s a_var[2];\n" - "global %s %s* p_var;\n\n"; - num_printed = snprintf(decls,sizeof(decls),decls_template_no_init, - vol,tn,vol,tn,vol,tn,vol,tn); - } - assert( num_printed < sizeof(decls) ); + num_printed = snprintf(decls, sizeof(decls), decls_template_with_init, + vol, tn, vol, tn, vol, tn, vol, tn); + } + else + { + const char* decls_template_no_init = "%s %s var;\n" + "global %s %s g_var;\n" + "%s %s a_var[2];\n" + "global %s %s* p_var;\n\n"; + num_printed = snprintf(decls, sizeof(decls), decls_template_no_init, + vol, tn, vol, tn, vol, tn, vol, tn); + } + assert(num_printed < sizeof(decls)); return std::string(decls); } @@ -761,18 +917,26 @@ static std::string global_check_function(const TypeInfo& ti) // all() should only be used on vector inputs. For scalar comparison, the // result of the equality operator can be used as a bool value. - const bool is_scalar = ti.num_elem() == 0; // 0 is used to represent scalar types, not 1. + const bool is_scalar = + ti.num_elem() == 0; // 0 is used to represent scalar types, not 1. const std::string is_equality_true = is_scalar ? "" : "all"; std::string code = "kernel void global_check(global int* out) {\n"; code += " const " + type_name + " zero = ((" + type_name + ")0);\n"; code += " bool status = true;\n"; - if (ti.is_atomic()) { - code += " status &= " + is_equality_true + "(atomic_load(&var) == zero);\n"; - code += " status &= " + is_equality_true + "(atomic_load(&g_var) == zero);\n"; - code += " status &= " + is_equality_true + "(atomic_load(&a_var[0]) == zero);\n"; - code += " status &= " + is_equality_true + "(atomic_load(&a_var[1]) == zero);\n"; - } else { + if (ti.is_atomic()) + { + code += " status &= " + is_equality_true + + "(atomic_load(&var) == zero);\n"; + code += " status &= " + is_equality_true + + "(atomic_load(&g_var) == zero);\n"; + code += " status &= " + is_equality_true + + "(atomic_load(&a_var[0]) == zero);\n"; + code += " status &= " + is_equality_true + + "(atomic_load(&a_var[1]) == zero);\n"; + } + else + { code += " status &= " + is_equality_true + "(var == zero);\n"; code += " status &= " + is_equality_true + "(g_var == zero);\n"; code += " status &= " + is_equality_true + "(a_var[0] == zero);\n"; @@ -792,7 +956,8 @@ static std::string writer_function(const TypeInfo& ti) { static char writer_src[MAX_STR]; int num_printed = 0; - if ( !ti.is_atomic() ) { + if (!ti.is_atomic()) + { const char* writer_template_normal = "kernel void writer( global %s* src, uint idx ) {\n" " var = from_buf(src[0]);\n" @@ -801,8 +966,11 @@ static std::string writer_function(const TypeInfo& ti) " a_var[1] = from_buf(src[3]);\n" " p_var = a_var + idx;\n" "}\n\n"; - num_printed = snprintf(writer_src,sizeof(writer_src),writer_template_normal,ti.get_buf_elem_type()); - } else { + num_printed = snprintf(writer_src, sizeof(writer_src), + writer_template_normal, ti.get_buf_elem_type()); + } + else + { const char* writer_template_atomic = "kernel void writer( global %s* src, uint idx ) {\n" " atomic_store( &var, from_buf(src[0]) );\n" @@ -811,9 +979,10 @@ static std::string writer_function(const TypeInfo& ti) " atomic_store( &a_var[1], from_buf(src[3]) );\n" " p_var = a_var + idx;\n" "}\n\n"; - num_printed = snprintf(writer_src,sizeof(writer_src),writer_template_atomic,ti.get_buf_elem_type()); + num_printed = snprintf(writer_src, sizeof(writer_src), + writer_template_atomic, ti.get_buf_elem_type()); } - assert( num_printed < sizeof(writer_src) ); + assert(num_printed < sizeof(writer_src)); std::string result = writer_src; return result; } @@ -826,7 +995,8 @@ static std::string reader_function(const TypeInfo& ti) { static char reader_src[MAX_STR]; int num_printed = 0; - if ( !ti.is_atomic() ) { + if (!ti.is_atomic()) + { const char* reader_template_normal = "kernel void reader( global %s* dest, %s ptr_write_val ) {\n" " *p_var = from_buf(ptr_write_val);\n" @@ -835,8 +1005,12 @@ static std::string reader_function(const TypeInfo& ti) " dest[2] = to_buf(a_var[0]);\n" " dest[3] = to_buf(a_var[1]);\n" "}\n\n"; - num_printed = snprintf(reader_src,sizeof(reader_src),reader_template_normal,ti.get_buf_elem_type(),ti.get_buf_elem_type()); - } else { + num_printed = + snprintf(reader_src, sizeof(reader_src), reader_template_normal, + ti.get_buf_elem_type(), ti.get_buf_elem_type()); + } + else + { const char* reader_template_atomic = "kernel void reader( global %s* dest, %s ptr_write_val ) {\n" " atomic_store( p_var, from_buf(ptr_write_val) );\n" @@ -845,40 +1019,53 @@ static std::string reader_function(const TypeInfo& ti) " dest[2] = to_buf( atomic_load( &a_var[0] ) );\n" " dest[3] = to_buf( atomic_load( &a_var[1] ) );\n" "}\n\n"; - num_printed = snprintf(reader_src,sizeof(reader_src),reader_template_atomic,ti.get_buf_elem_type(),ti.get_buf_elem_type()); + num_printed = + snprintf(reader_src, sizeof(reader_src), reader_template_atomic, + ti.get_buf_elem_type(), ti.get_buf_elem_type()); } - assert( num_printed < sizeof(reader_src) ); + assert(num_printed < sizeof(reader_src)); std::string result = reader_src; return result; } // Check that all globals where appropriately default-initialized. -static int check_global_initialization(cl_context context, cl_program program, cl_command_queue queue) +static int check_global_initialization(cl_context context, cl_program program, + cl_command_queue queue) { int status = CL_SUCCESS; // Create a buffer on device to store a unique integer. cl_int is_init_valid = 0; - clMemWrapper buffer(clCreateBuffer(context, CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(is_init_valid), &is_init_valid, &status)); + clMemWrapper buffer( + clCreateBuffer(context, CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR, + sizeof(is_init_valid), &is_init_valid, &status)); test_error_ret(status, "Failed to allocate buffer", status); // Create, setup and invoke kernel. - clKernelWrapper global_check(clCreateKernel(program, "global_check", &status)); + clKernelWrapper global_check( + clCreateKernel(program, "global_check", &status)); test_error_ret(status, "Failed to create global_check kernel", status); status = clSetKernelArg(global_check, 0, sizeof(cl_mem), &buffer); - test_error_ret(status, "Failed to set up argument for the global_check kernel", status); + test_error_ret(status, + "Failed to set up argument for the global_check kernel", + status); const cl_uint work_dim = 1; const size_t global_work_offset[] = { 0 }; const size_t global_work_size[] = { 1 }; - status = clEnqueueNDRangeKernel(queue, global_check, work_dim, global_work_offset, global_work_size, nullptr, 0, nullptr, nullptr); + status = clEnqueueNDRangeKernel(queue, global_check, work_dim, + global_work_offset, global_work_size, + nullptr, 0, nullptr, nullptr); test_error_ret(status, "Failed to run global_check kernel", status); status = clFinish(queue); test_error_ret(status, "clFinish() failed", status); // Read back the memory buffer from the device. - status = clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, sizeof(is_init_valid), &is_init_valid, 0, nullptr, nullptr); + status = + clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, sizeof(is_init_valid), + &is_init_valid, 0, nullptr, nullptr); test_error_ret(status, "Failed to read buffer from device", status); - if (is_init_valid == 0) { + if (is_init_valid == 0) + { log_error("Unexpected default values were detected"); return 1; } @@ -887,58 +1074,75 @@ static int check_global_initialization(cl_context context, cl_program program, c } // Check write-then-read. -static int l_write_read( cl_device_id device, cl_context context, cl_command_queue queue ) +static int l_write_read(cl_device_id device, cl_context context, + cl_command_queue queue) { int status = CL_SUCCESS; int itype; - RandomSeed rand_state( gRandomSeed ); + RandomSeed rand_state(gRandomSeed); - for ( itype = 0; itype < num_type_info ; itype++ ) { - status = status | l_write_read_for_type(device,context,queue,type_info[itype], rand_state ); + for (itype = 0; itype < num_type_info; itype++) + { + status = status + | l_write_read_for_type(device, context, queue, type_info[itype], + rand_state); FLUSH; } return status; } -static int l_write_read_for_type( cl_device_id device, cl_context context, cl_command_queue queue, const TypeInfo& ti, RandomSeed& rand_state ) +static int l_write_read_for_type(cl_device_id device, cl_context context, + cl_command_queue queue, const TypeInfo& ti, + RandomSeed& rand_state) { int err = CL_SUCCESS; - std::string type_name( ti.get_name() ); + std::string type_name(ti.get_name()); const char* tn = type_name.c_str(); - log_info(" %s ",tn); + log_info(" %s ", tn); StringTable ksrc; - ksrc.add( l_get_fp64_pragma() ); - ksrc.add( l_get_cles_int64_pragma() ); - if (ti.is_atomic_64bit()) - ksrc.add( l_get_int64_atomic_pragma() ); - ksrc.add( conversion_functions(ti) ); - ksrc.add( global_decls(ti,false) ); - ksrc.add( global_check_function(ti) ); - ksrc.add( writer_function(ti) ); - ksrc.add( reader_function(ti) ); + ksrc.add(l_get_fp64_pragma()); + ksrc.add(l_get_cles_int64_pragma()); + if (ti.is_atomic_64bit()) ksrc.add(l_get_int64_atomic_pragma()); + ksrc.add(conversion_functions(ti)); + ksrc.add(global_decls(ti, false)); + ksrc.add(global_check_function(ti)); + ksrc.add(writer_function(ti)); + ksrc.add(reader_function(ti)); int status = CL_SUCCESS; clProgramWrapper program; clKernelWrapper writer; - status = create_single_kernel_helper_with_build_options(context, &program, &writer, ksrc.num_str(), ksrc.strs(), "writer", OPTIONS); - test_error_ret(status,"Failed to create program for read-after-write test",status); + status = create_single_kernel_helper_with_build_options( + context, &program, &writer, ksrc.num_str(), ksrc.strs(), "writer", + OPTIONS); + test_error_ret(status, "Failed to create program for read-after-write test", + status); - clKernelWrapper reader( clCreateKernel( program, "reader", &status ) ); - test_error_ret(status,"Failed to create reader kernel for read-after-write test",status); + clKernelWrapper reader(clCreateKernel(program, "reader", &status)); + test_error_ret(status, + "Failed to create reader kernel for read-after-write test", + status); // Check size query. size_t used_bytes = 0; - status = clGetProgramBuildInfo( program, device, CL_PROGRAM_BUILD_GLOBAL_VARIABLE_TOTAL_SIZE, sizeof(used_bytes), &used_bytes, 0 ); - test_error_ret(status,"Failed to query global variable total size",status); - size_t expected_used_bytes = - (NUM_TESTED_VALUES-1)*ti.get_size() // Two regular variables and an array of 2 elements. - + ( l_64bit_device ? 8 : 4 ); // The pointer - if ( used_bytes < expected_used_bytes ) { - log_error("Error program query for global variable total size query failed: Expected at least %llu but got %llu\n", (unsigned long long)expected_used_bytes, (unsigned long long)used_bytes ); + status = clGetProgramBuildInfo(program, device, + CL_PROGRAM_BUILD_GLOBAL_VARIABLE_TOTAL_SIZE, + sizeof(used_bytes), &used_bytes, 0); + test_error_ret(status, "Failed to query global variable total size", + status); + size_t expected_used_bytes = (NUM_TESTED_VALUES - 1) + * ti.get_size() // Two regular variables and an array of 2 elements. + + (l_64bit_device ? 8 : 4); // The pointer + if (used_bytes < expected_used_bytes) + { + log_error("Error program query for global variable total size query " + "failed: Expected at least %llu but got %llu\n", + (unsigned long long)expected_used_bytes, + (unsigned long long)used_bytes); err |= 1; } @@ -951,90 +1155,131 @@ static int l_write_read_for_type( cl_device_id device, cl_context context, cl_co cl_uchar* write_data = (cl_uchar*)align_malloc(write_data_size, ALIGNMENT); cl_uchar* read_data = (cl_uchar*)align_malloc(read_data_size, ALIGNMENT); - clMemWrapper write_mem( clCreateBuffer( context, CL_MEM_USE_HOST_PTR, write_data_size, write_data, &status ) ); - test_error_ret(status,"Failed to allocate write buffer",status); - clMemWrapper read_mem( clCreateBuffer( context, CL_MEM_USE_HOST_PTR, read_data_size, read_data, &status ) ); - test_error_ret(status,"Failed to allocate read buffer",status); + clMemWrapper write_mem(clCreateBuffer( + context, CL_MEM_USE_HOST_PTR, write_data_size, write_data, &status)); + test_error_ret(status, "Failed to allocate write buffer", status); + clMemWrapper read_mem(clCreateBuffer(context, CL_MEM_USE_HOST_PTR, + read_data_size, read_data, &status)); + test_error_ret(status, "Failed to allocate read buffer", status); - status = clSetKernelArg(writer,0,sizeof(cl_mem),&write_mem); test_error_ret(status,"set arg",status); - status = clSetKernelArg(reader,0,sizeof(cl_mem),&read_mem); test_error_ret(status,"set arg",status); + status = clSetKernelArg(writer, 0, sizeof(cl_mem), &write_mem); + test_error_ret(status, "set arg", status); + status = clSetKernelArg(reader, 0, sizeof(cl_mem), &read_mem); + test_error_ret(status, "set arg", status); // Boolean random data needs to be massaged a bit more. - const int num_rounds = ti.is_bool() ? (1 << NUM_TESTED_VALUES ) : NUM_ROUNDS; + const int num_rounds = ti.is_bool() ? (1 << NUM_TESTED_VALUES) : NUM_ROUNDS; unsigned bool_iter = 0; - for ( int iround = 0; iround < num_rounds ; iround++ ) { - for ( cl_uint iptr_idx = 0; iptr_idx < 2 ; iptr_idx++ ) { // Index into array, to write via pointer + for (int iround = 0; iround < num_rounds; iround++) + { + for (cl_uint iptr_idx = 0; iptr_idx < 2; iptr_idx++) + { // Index into array, to write via pointer // Generate new random data to push through. - // Generate 5 * 128 bytes all the time, even though the test for many types use less than all that. + // Generate 5 * 128 bytes all the time, even though the test for + // many types use less than all that. - cl_uchar *write_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, write_mem, CL_TRUE, CL_MAP_WRITE, 0, write_data_size, 0, 0, 0, 0); + cl_uchar* write_ptr = (cl_uchar*)clEnqueueMapBuffer( + queue, write_mem, CL_TRUE, CL_MAP_WRITE, 0, write_data_size, 0, + 0, 0, 0); - if ( ti.is_bool() ) { + if (ti.is_bool()) + { // For boolean, random data cast to bool isn't very random. // So use the bottom bit of bool_value_iter to get true // diversity. - for ( unsigned value_idx = 0; value_idx < NUM_TESTED_VALUES ; value_idx++ ) { - write_data[value_idx] = (1< Date: Thu, 13 Oct 2022 10:02:40 +0100 Subject: harness: Fix -Wformat warnings (#1527) The main sources of warnings were: * Printing of a `size_t` which requires the `%zu` specifier. * Printing of `cl_long`/`cl_ulong` which is now done using the `PRI*64` macros to ensure portability across 32 and 64-bit builds. Signed-off-by: Sven van Haastregt Signed-off-by: Sven van Haastregt --- test_common/harness/conversions.cpp | 5 +++-- test_common/harness/imageHelpers.cpp | 7 ++++--- test_common/harness/propertyHelpers.cpp | 11 ++++++----- test_common/harness/testHarness.cpp | 2 +- 4 files changed, 14 insertions(+), 11 deletions(-) diff --git a/test_common/harness/conversions.cpp b/test_common/harness/conversions.cpp index c7731269..d52a2ac6 100644 --- a/test_common/harness/conversions.cpp +++ b/test_common/harness/conversions.cpp @@ -14,6 +14,7 @@ // limitations under the License. // #include "conversions.h" +#include #include #include #include @@ -50,10 +51,10 @@ void print_type_to_string(ExplicitType type, void *data, char *string) case kInt: sprintf(string, "%d", *((cl_int *)data)); return; case kUInt: case kUnsignedInt: sprintf(string, "%u", *((cl_uint *)data)); return; - case kLong: sprintf(string, "%lld", *((cl_long *)data)); return; + case kLong: sprintf(string, "%" PRId64 "", *((cl_long *)data)); return; case kULong: case kUnsignedLong: - sprintf(string, "%llu", *((cl_ulong *)data)); + sprintf(string, "%" PRIu64 "", *((cl_ulong *)data)); return; case kFloat: sprintf(string, "%f", *((cl_float *)data)); return; case kHalf: sprintf(string, "half"); return; diff --git a/test_common/harness/imageHelpers.cpp b/test_common/harness/imageHelpers.cpp index 3dbdffa0..f1694e88 100644 --- a/test_common/harness/imageHelpers.cpp +++ b/test_common/harness/imageHelpers.cpp @@ -23,6 +23,7 @@ #include #endif #include +#include #include #if !defined(_WIN32) #include @@ -421,7 +422,7 @@ void print_first_pixel_difference_error(size_t where, const char *sourcePixel, (int)thirdDim, (int)imageInfo->rowPitch, (int)imageInfo->rowPitch - (int)imageInfo->width * (int)pixel_size); - log_error("Failed at column: %ld ", where); + log_error("Failed at column: %zu ", where); switch (pixel_size) { @@ -454,7 +455,7 @@ void print_first_pixel_difference_error(size_t where, const char *sourcePixel, ((cl_ushort *)destPixel)[1], ((cl_ushort *)destPixel)[2]); break; case 8: - log_error("*0x%16.16llx vs. 0x%16.16llx\n", + log_error("*0x%16.16" PRIx64 " vs. 0x%16.16" PRIx64 "\n", ((cl_ulong *)sourcePixel)[0], ((cl_ulong *)destPixel)[0]); break; case 12: @@ -473,7 +474,7 @@ void print_first_pixel_difference_error(size_t where, const char *sourcePixel, ((cl_uint *)destPixel)[2], ((cl_uint *)destPixel)[3]); break; default: - log_error("Don't know how to print pixel size of %ld\n", + log_error("Don't know how to print pixel size of %zu\n", pixel_size); break; } diff --git a/test_common/harness/propertyHelpers.cpp b/test_common/harness/propertyHelpers.cpp index e368f9b6..6a10c076 100644 --- a/test_common/harness/propertyHelpers.cpp +++ b/test_common/harness/propertyHelpers.cpp @@ -19,6 +19,7 @@ #include #include +#include #include static bool findProperty(const std::vector& props, @@ -97,16 +98,16 @@ int compareProperties(const std::vector& queried, if (!found) { - log_error("ERROR: expected property 0x%llx not found!\n", + log_error("ERROR: expected property 0x%" PRIx64 " not found!\n", check_prop); return TEST_FAIL; } else if (check_value != queried_value) { - log_error( - "ERROR: mis-matched value for property 0x%llx: wanted " - "0x%llx, got 0x%llx\n", - check_prop, check_value, queried_value); + log_error("ERROR: mis-matched value for property 0x%" PRIx64 + ": wanted " + "0x%" PRIx64 ", got 0x%" PRIx64 "\n", + check_prop, check_value, queried_value); return TEST_FAIL; } } diff --git a/test_common/harness/testHarness.cpp b/test_common/harness/testHarness.cpp index d07d982c..a309f53d 100644 --- a/test_common/harness/testHarness.cpp +++ b/test_common/harness/testHarness.cpp @@ -1198,7 +1198,7 @@ cl_platform_id getPlatformFromDevice(cl_device_id deviceID) void PrintArch(void) { - vlog("sizeof( void*) = %ld\n", sizeof(void *)); + vlog("sizeof( void*) = %zu\n", sizeof(void *)); #if defined(__ppc__) vlog("ARCH:\tppc\n"); #elif defined(__ppc64__) -- cgit v1.2.3 From 5e116e7b0d7fe29f637cdd4cff87ff996d91cb22 Mon Sep 17 00:00:00 2001 From: John Kesapides <46718829+JohnKesapidesARM@users.noreply.github.com> Date: Fri, 14 Oct 2022 09:53:33 +0100 Subject: Use CTS type wrappers for test_sizeof. (#1547) Signed-off-by: John Kesapides Signed-off-by: John Kesapides --- test_conformance/basic/test_sizeof.cpp | 39 +++++++--------------------------- 1 file changed, 8 insertions(+), 31 deletions(-) diff --git a/test_conformance/basic/test_sizeof.cpp b/test_conformance/basic/test_sizeof.cpp index 6b1ddb56..e980ed68 100644 --- a/test_conformance/basic/test_sizeof.cpp +++ b/test_conformance/basic/test_sizeof.cpp @@ -35,9 +35,9 @@ cl_int get_type_size( cl_context context, cl_command_queue queue, const char *ty "}\n" }; - cl_program p; - cl_kernel k; - cl_mem m; + clProgramWrapper p; + clKernelWrapper k; + clMemWrapper m; cl_uint temp; @@ -51,42 +51,19 @@ cl_int get_type_size( cl_context context, cl_command_queue queue, const char *ty } cl_int err = create_single_kernel_helper_with_build_options( context, &p, &k, 4, sizeof_kernel_code, "test_sizeof", nullptr); - if( err ) - return err; + test_error(err, "Failed to build kernel/program."); m = clCreateBuffer( context, CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR, sizeof( cl_ulong ), size, &err ); - if( NULL == m ) - { - clReleaseProgram( p ); - clReleaseKernel( k ); - log_error("\nclCreateBuffer FAILED\n"); - return err; - } + test_error(err, "clCreateBuffer failed."); err = clSetKernelArg( k, 0, sizeof( cl_mem ), &m ); - if( err ) - { - clReleaseProgram( p ); - clReleaseKernel( k ); - clReleaseMemObject( m ); - log_error("\nclSetKernelArg FAILED\n"); - return err; - } + test_error(err, "clSetKernelArg failed."); err = clEnqueueTask( queue, k, 0, NULL, NULL ); - clReleaseProgram( p ); - clReleaseKernel( k ); - if( err ) - { - clReleaseMemObject( m ); - log_error( "\nclEnqueueTask FAILED\n" ); - return err; - } + test_error(err, "clEnqueueTask failed."); err = clEnqueueReadBuffer( queue, m, CL_TRUE, 0, sizeof( cl_uint ), &temp, 0, NULL, NULL ); - clReleaseMemObject( m ); - if( err ) - log_error( "\nclEnqueueReadBuffer FAILED\n" ); + test_error(err, "clEnqueueReadBuffer failed."); *size = (cl_ulong) temp; -- cgit v1.2.3 From 90a5183ec499d5b4701f58f6134dd424d82c4dca Mon Sep 17 00:00:00 2001 From: John Kesapides <46718829+JohnKesapidesARM@users.noreply.github.com> Date: Fri, 14 Oct 2022 09:55:10 +0100 Subject: Use CTS type wrappers for test_enqueued_local_size (#1544) Signed-off-by: John Kesapides Signed-off-by: John Kesapides --- .../basic/test_enqueued_local_size.cpp | 122 ++++++++++----------- 1 file changed, 59 insertions(+), 63 deletions(-) diff --git a/test_conformance/basic/test_enqueued_local_size.cpp b/test_conformance/basic/test_enqueued_local_size.cpp index 91fe1434..ea95df68 100644 --- a/test_conformance/basic/test_enqueued_local_size.cpp +++ b/test_conformance/basic/test_enqueued_local_size.cpp @@ -1,6 +1,6 @@ // // Copyright (c) 2017 The Khronos Group Inc. -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -26,32 +26,33 @@ #include "procs.h" -static const char *enqueued_local_size_2d_code = -"__kernel void test_enqueued_local_size_2d(global int *dst)\n" -"{\n" -" if ((get_global_id(0) == 0) && (get_global_id(1) == 0))\n" -" {\n" -" dst[0] = (int)get_enqueued_local_size(0)\n;" -" dst[1] = (int)get_enqueued_local_size(1)\n;" -" }\n" -"}\n"; - -static const char *enqueued_local_size_1d_code = -"__kernel void test_enqueued_local_size_1d(global int *dst)\n" -"{\n" -" int tid_x = get_global_id(0);\n" -" if (get_global_id(0) == 0)\n" -" {\n" -" dst[tid_x] = (int)get_enqueued_local_size(0)\n;" -" }\n" -"}\n"; - - -static int -verify_enqueued_local_size(int *result, size_t *expected, int n) +static const char *enqueued_local_size_2d_code = R"( +__kernel void test_enqueued_local_size_2d(global int *dst) +{ + if ((get_global_id(0) == 0) && (get_global_id(1) == 0)) + { + dst[0] = (int)get_enqueued_local_size(0); + dst[1] = (int)get_enqueued_local_size(1); + } +} +)"; + +static const char *enqueued_local_size_1d_code = R"( +__kernel void test_enqueued_local_size_1d(global int *dst) +{ + int tid_x = get_global_id(0); + if (get_global_id(0) == 0) + { + dst[tid_x] = (int)get_enqueued_local_size(0); + } +} +)"; + + +static int verify_enqueued_local_size(int *result, size_t *expected, int n) { int i; - for (i=0; i(num_elements); + globalsize[1] = static_cast(num_elements); size_t max_wgs; - err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(max_wgs), &max_wgs, NULL); - test_error( err, "clGetDeviceInfo failed."); + err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE, + sizeof(max_wgs), &max_wgs, nullptr); + test_error(err, "clGetDeviceInfo failed."); localsize[0] = std::min(16, max_wgs); localsize[1] = std::min(11, max_wgs / localsize[0]); @@ -143,35 +143,31 @@ test_enqueued_local_size(cl_device_id device, cl_context context, cl_command_que } } - err = clEnqueueNDRangeKernel(queue, kernel[1], 2, NULL, globalsize, localsize, 0, NULL, NULL); - test_error( err, "clEnqueueNDRangeKernel failed."); + err = clEnqueueNDRangeKernel(queue, kernel[1], 2, nullptr, globalsize, + localsize, 0, nullptr, nullptr); + test_error(err, "clEnqueueNDRangeKernel failed."); - err = clEnqueueReadBuffer(queue, streams, CL_TRUE, 0, 2*sizeof(int), output_ptr, 0, NULL, NULL); - test_error( err, "clEnqueueReadBuffer failed."); + err = clEnqueueReadBuffer(queue, stream, CL_BLOCKING, 0, 2 * sizeof(int), + output_ptr, 0, nullptr, nullptr); + test_error(err, "clEnqueueReadBuffer failed."); err = verify_enqueued_local_size(output_ptr, localsize, 2); - globalsize[0] = (size_t)num_elements; + globalsize[0] = static_cast(num_elements); localsize[0] = 9; if (use_uniform_work_groups && (globalsize[0] % localsize[0])) { globalsize[0] += (localsize[0] - (globalsize[0] % localsize[0])); } - err = clEnqueueNDRangeKernel(queue, kernel[1], 1, NULL, globalsize, localsize, 0, NULL, NULL); - test_error( err, "clEnqueueNDRangeKernel failed."); + err = clEnqueueNDRangeKernel(queue, kernel[1], 1, nullptr, globalsize, + localsize, 0, nullptr, nullptr); + test_error(err, "clEnqueueNDRangeKernel failed."); - err = clEnqueueReadBuffer(queue, streams, CL_TRUE, 0, 2*sizeof(int), output_ptr, 0, NULL, NULL); - test_error( err, "clEnqueueReadBuffer failed."); + err = clEnqueueReadBuffer(queue, stream, CL_BLOCKING, 0, 2 * sizeof(int), + output_ptr, 0, nullptr, nullptr); + test_error(err, "clEnqueueReadBuffer failed."); err = verify_enqueued_local_size(output_ptr, localsize, 1); - // cleanup - clReleaseMemObject(streams); - clReleaseKernel(kernel[0]); - clReleaseKernel(kernel[1]); - clReleaseProgram(program[0]); - clReleaseProgram(program[1]); - free(output_ptr); - return err; } -- cgit v1.2.3